VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 5047

Last change on this file since 5047 was 4473, checked in by vboxsync, 17 years ago

movxz -> movzbl in the GNU parts. (hopefully this fixes some mac os x as issue.)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 99.7 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 */
16
17#ifndef ___iprt_asm_h
18#define ___iprt_asm_h
19
20#include <iprt/cdefs.h>
21#include <iprt/types.h>
22/** @todo #include <iprt/param.h> for PAGE_SIZE. */
23/** @def RT_INLINE_ASM_USES_INTRIN
24 * Defined as 1 if we're using a _MSC_VER 1400.
25 * Otherwise defined as 0.
26 */
27
28#ifdef _MSC_VER
29# if _MSC_VER >= 1400
30# define RT_INLINE_ASM_USES_INTRIN 1
31# include <intrin.h>
32 /* Emit the intrinsics at all optimization levels. */
33# pragma intrinsic(_ReadWriteBarrier)
34# pragma intrinsic(__cpuid)
35# pragma intrinsic(_enable)
36# pragma intrinsic(_disable)
37# pragma intrinsic(__rdtsc)
38# pragma intrinsic(__readmsr)
39# pragma intrinsic(__writemsr)
40# pragma intrinsic(__outbyte)
41# pragma intrinsic(__outword)
42# pragma intrinsic(__outdword)
43# pragma intrinsic(__inbyte)
44# pragma intrinsic(__inword)
45# pragma intrinsic(__indword)
46# pragma intrinsic(__invlpg)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(__readcr0)
51# pragma intrinsic(__readcr2)
52# pragma intrinsic(__readcr3)
53# pragma intrinsic(__readcr4)
54# pragma intrinsic(__writecr0)
55# pragma intrinsic(__writecr3)
56# pragma intrinsic(__writecr4)
57# pragma intrinsic(_BitScanForward)
58# pragma intrinsic(_BitScanReverse)
59# pragma intrinsic(_bittest)
60# pragma intrinsic(_bittestandset)
61# pragma intrinsic(_bittestandreset)
62# pragma intrinsic(_bittestandcomplement)
63# pragma intrinsic(_byteswap_ushort)
64# pragma intrinsic(_byteswap_ulong)
65# pragma intrinsic(_interlockedbittestandset)
66# pragma intrinsic(_interlockedbittestandreset)
67# pragma intrinsic(_InterlockedAnd)
68# pragma intrinsic(_InterlockedOr)
69# pragma intrinsic(_InterlockedIncrement)
70# pragma intrinsic(_InterlockedDecrement)
71# pragma intrinsic(_InterlockedExchange)
72# pragma intrinsic(_InterlockedCompareExchange)
73# pragma intrinsic(_InterlockedCompareExchange64)
74# ifdef RT_ARCH_AMD64
75# pragma intrinsic(__stosq)
76# pragma intrinsic(__readcr8)
77# pragma intrinsic(__writecr8)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# endif
81# endif
82#endif
83#ifndef RT_INLINE_ASM_USES_INTRIN
84# define RT_INLINE_ASM_USES_INTRIN 0
85#endif
86
87
88
89/** @defgroup grp_asm ASM - Assembly Routines
90 * @ingroup grp_rt
91 * @{
92 */
93
94/** @def RT_INLINE_ASM_EXTERNAL
95 * Defined as 1 if the compiler does not support inline assembly.
96 * The ASM* functions will then be implemented in an external .asm file.
97 *
98 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
99 * inline assmebly in their AMD64 compiler.
100 */
101#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
102# define RT_INLINE_ASM_EXTERNAL 1
103#else
104# define RT_INLINE_ASM_EXTERNAL 0
105#endif
106
107/** @def RT_INLINE_ASM_GNU_STYLE
108 * Defined as 1 if the compiler understand GNU style inline assembly.
109 */
110#if defined(_MSC_VER)
111# define RT_INLINE_ASM_GNU_STYLE 0
112#else
113# define RT_INLINE_ASM_GNU_STYLE 1
114#endif
115
116
117/** @todo find a more proper place for this structure? */
118#pragma pack(1)
119/** IDTR */
120typedef struct RTIDTR
121{
122 /** Size of the IDT. */
123 uint16_t cbIdt;
124 /** Address of the IDT. */
125 uintptr_t pIdt;
126} RTIDTR, *PRTIDTR;
127#pragma pack()
128
129#pragma pack(1)
130/** GDTR */
131typedef struct RTGDTR
132{
133 /** Size of the GDT. */
134 uint16_t cbGdt;
135 /** Address of the GDT. */
136 uintptr_t pGdt;
137} RTGDTR, *PRTGDTR;
138#pragma pack()
139
140
141/** @def ASMReturnAddress
142 * Gets the return address of the current (or calling if you like) function or method.
143 */
144#ifdef _MSC_VER
145# ifdef __cplusplus
146extern "C"
147# endif
148void * _ReturnAddress(void);
149# pragma intrinsic(_ReturnAddress)
150# define ASMReturnAddress() _ReturnAddress()
151#elif defined(__GNUC__) || defined(__DOXYGEN__)
152# define ASMReturnAddress() __builtin_return_address(0)
153#else
154# error "Unsupported compiler."
155#endif
156
157
158/**
159 * Gets the content of the IDTR CPU register.
160 * @param pIdtr Where to store the IDTR contents.
161 */
162#if RT_INLINE_ASM_EXTERNAL
163DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
164#else
165DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
166{
167# if RT_INLINE_ASM_GNU_STYLE
168 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
169# else
170 __asm
171 {
172# ifdef RT_ARCH_AMD64
173 mov rax, [pIdtr]
174 sidt [rax]
175# else
176 mov eax, [pIdtr]
177 sidt [eax]
178# endif
179 }
180# endif
181}
182#endif
183
184
185/**
186 * Sets the content of the IDTR CPU register.
187 * @param pIdtr Where to load the IDTR contents from
188 */
189#if RT_INLINE_ASM_EXTERNAL
190DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
191#else
192DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
193{
194# if RT_INLINE_ASM_GNU_STYLE
195 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
196# else
197 __asm
198 {
199# ifdef RT_ARCH_AMD64
200 mov rax, [pIdtr]
201 lidt [rax]
202# else
203 mov eax, [pIdtr]
204 lidt [eax]
205# endif
206 }
207# endif
208}
209#endif
210
211
212/**
213 * Gets the content of the GDTR CPU register.
214 * @param pGdtr Where to store the GDTR contents.
215 */
216#if RT_INLINE_ASM_EXTERNAL
217DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
218#else
219DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
220{
221# if RT_INLINE_ASM_GNU_STYLE
222 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
223# else
224 __asm
225 {
226# ifdef RT_ARCH_AMD64
227 mov rax, [pGdtr]
228 sgdt [rax]
229# else
230 mov eax, [pGdtr]
231 sgdt [eax]
232# endif
233 }
234# endif
235}
236#endif
237
238/**
239 * Get the cs register.
240 * @returns cs.
241 */
242#if RT_INLINE_ASM_EXTERNAL
243DECLASM(RTSEL) ASMGetCS(void);
244#else
245DECLINLINE(RTSEL) ASMGetCS(void)
246{
247 RTSEL SelCS;
248# if RT_INLINE_ASM_GNU_STYLE
249 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
250# else
251 __asm
252 {
253 mov ax, cs
254 mov [SelCS], ax
255 }
256# endif
257 return SelCS;
258}
259#endif
260
261
262/**
263 * Get the DS register.
264 * @returns DS.
265 */
266#if RT_INLINE_ASM_EXTERNAL
267DECLASM(RTSEL) ASMGetDS(void);
268#else
269DECLINLINE(RTSEL) ASMGetDS(void)
270{
271 RTSEL SelDS;
272# if RT_INLINE_ASM_GNU_STYLE
273 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
274# else
275 __asm
276 {
277 mov ax, ds
278 mov [SelDS], ax
279 }
280# endif
281 return SelDS;
282}
283#endif
284
285
286/**
287 * Get the ES register.
288 * @returns ES.
289 */
290#if RT_INLINE_ASM_EXTERNAL
291DECLASM(RTSEL) ASMGetES(void);
292#else
293DECLINLINE(RTSEL) ASMGetES(void)
294{
295 RTSEL SelES;
296# if RT_INLINE_ASM_GNU_STYLE
297 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
298# else
299 __asm
300 {
301 mov ax, es
302 mov [SelES], ax
303 }
304# endif
305 return SelES;
306}
307#endif
308
309
310/**
311 * Get the FS register.
312 * @returns FS.
313 */
314#if RT_INLINE_ASM_EXTERNAL
315DECLASM(RTSEL) ASMGetFS(void);
316#else
317DECLINLINE(RTSEL) ASMGetFS(void)
318{
319 RTSEL SelFS;
320# if RT_INLINE_ASM_GNU_STYLE
321 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
322# else
323 __asm
324 {
325 mov ax, fs
326 mov [SelFS], ax
327 }
328# endif
329 return SelFS;
330}
331# endif
332
333
334/**
335 * Get the GS register.
336 * @returns GS.
337 */
338#if RT_INLINE_ASM_EXTERNAL
339DECLASM(RTSEL) ASMGetGS(void);
340#else
341DECLINLINE(RTSEL) ASMGetGS(void)
342{
343 RTSEL SelGS;
344# if RT_INLINE_ASM_GNU_STYLE
345 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
346# else
347 __asm
348 {
349 mov ax, gs
350 mov [SelGS], ax
351 }
352# endif
353 return SelGS;
354}
355#endif
356
357
358/**
359 * Get the SS register.
360 * @returns SS.
361 */
362#if RT_INLINE_ASM_EXTERNAL
363DECLASM(RTSEL) ASMGetSS(void);
364#else
365DECLINLINE(RTSEL) ASMGetSS(void)
366{
367 RTSEL SelSS;
368# if RT_INLINE_ASM_GNU_STYLE
369 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
370# else
371 __asm
372 {
373 mov ax, ss
374 mov [SelSS], ax
375 }
376# endif
377 return SelSS;
378}
379#endif
380
381
382/**
383 * Get the TR register.
384 * @returns TR.
385 */
386#if RT_INLINE_ASM_EXTERNAL
387DECLASM(RTSEL) ASMGetTR(void);
388#else
389DECLINLINE(RTSEL) ASMGetTR(void)
390{
391 RTSEL SelTR;
392# if RT_INLINE_ASM_GNU_STYLE
393 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
394# else
395 __asm
396 {
397 str ax
398 mov [SelTR], ax
399 }
400# endif
401 return SelTR;
402}
403#endif
404
405
406/**
407 * Get the [RE]FLAGS register.
408 * @returns [RE]FLAGS.
409 */
410#if RT_INLINE_ASM_EXTERNAL
411DECLASM(RTCCUINTREG) ASMGetFlags(void);
412#else
413DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
414{
415 RTCCUINTREG uFlags;
416# if RT_INLINE_ASM_GNU_STYLE
417# ifdef RT_ARCH_AMD64
418 __asm__ __volatile__("pushfq\n\t"
419 "popq %0\n\t"
420 : "=m" (uFlags));
421# else
422 __asm__ __volatile__("pushfl\n\t"
423 "popl %0\n\t"
424 : "=m" (uFlags));
425# endif
426# else
427 __asm
428 {
429# ifdef RT_ARCH_AMD64
430 pushfq
431 pop [uFlags]
432# else
433 pushfd
434 pop [uFlags]
435# endif
436 }
437# endif
438 return uFlags;
439}
440#endif
441
442
443/**
444 * Set the [RE]FLAGS register.
445 * @param uFlags The new [RE]FLAGS value.
446 */
447#if RT_INLINE_ASM_EXTERNAL
448DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
449#else
450DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
451{
452# if RT_INLINE_ASM_GNU_STYLE
453# ifdef RT_ARCH_AMD64
454 __asm__ __volatile__("pushq %0\n\t"
455 "popfq\n\t"
456 : : "m" (uFlags));
457# else
458 __asm__ __volatile__("pushl %0\n\t"
459 "popfl\n\t"
460 : : "m" (uFlags));
461# endif
462# else
463 __asm
464 {
465# ifdef RT_ARCH_AMD64
466 push [uFlags]
467 popfq
468# else
469 push [uFlags]
470 popfd
471# endif
472 }
473# endif
474}
475#endif
476
477
478/**
479 * Gets the content of the CPU timestamp counter register.
480 *
481 * @returns TSC.
482 */
483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
484DECLASM(uint64_t) ASMReadTSC(void);
485#else
486DECLINLINE(uint64_t) ASMReadTSC(void)
487{
488 RTUINT64U u;
489# if RT_INLINE_ASM_GNU_STYLE
490 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
491# else
492# if RT_INLINE_ASM_USES_INTRIN
493 u.u = __rdtsc();
494# else
495 __asm
496 {
497 rdtsc
498 mov [u.s.Lo], eax
499 mov [u.s.Hi], edx
500 }
501# endif
502# endif
503 return u.u;
504}
505#endif
506
507
508/**
509 * Performs the cpuid instruction returning all registers.
510 *
511 * @param uOperator CPUID operation (eax).
512 * @param pvEAX Where to store eax.
513 * @param pvEBX Where to store ebx.
514 * @param pvECX Where to store ecx.
515 * @param pvEDX Where to store edx.
516 * @remark We're using void pointers to ease the use of special bitfield structures and such.
517 */
518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
519DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
520#else
521DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
522{
523# if RT_INLINE_ASM_GNU_STYLE
524# ifdef RT_ARCH_AMD64
525 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
526 __asm__ ("cpuid\n\t"
527 : "=a" (uRAX),
528 "=b" (uRBX),
529 "=c" (uRCX),
530 "=d" (uRDX)
531 : "0" (uOperator));
532 *(uint32_t *)pvEAX = (uint32_t)uRAX;
533 *(uint32_t *)pvEBX = (uint32_t)uRBX;
534 *(uint32_t *)pvECX = (uint32_t)uRCX;
535 *(uint32_t *)pvEDX = (uint32_t)uRDX;
536# else
537 __asm__ ("xchgl %%ebx, %1\n\t"
538 "cpuid\n\t"
539 "xchgl %%ebx, %1\n\t"
540 : "=a" (*(uint32_t *)pvEAX),
541 "=r" (*(uint32_t *)pvEBX),
542 "=c" (*(uint32_t *)pvECX),
543 "=d" (*(uint32_t *)pvEDX)
544 : "0" (uOperator));
545# endif
546
547# elif RT_INLINE_ASM_USES_INTRIN
548 int aInfo[4];
549 __cpuid(aInfo, uOperator);
550 *(uint32_t *)pvEAX = aInfo[0];
551 *(uint32_t *)pvEBX = aInfo[1];
552 *(uint32_t *)pvECX = aInfo[2];
553 *(uint32_t *)pvEDX = aInfo[3];
554
555# else
556 uint32_t uEAX;
557 uint32_t uEBX;
558 uint32_t uECX;
559 uint32_t uEDX;
560 __asm
561 {
562 push ebx
563 mov eax, [uOperator]
564 cpuid
565 mov [uEAX], eax
566 mov [uEBX], ebx
567 mov [uECX], ecx
568 mov [uEDX], edx
569 pop ebx
570 }
571 *(uint32_t *)pvEAX = uEAX;
572 *(uint32_t *)pvEBX = uEBX;
573 *(uint32_t *)pvECX = uECX;
574 *(uint32_t *)pvEDX = uEDX;
575# endif
576}
577#endif
578
579
580/**
581 * Performs the cpuid instruction returning ecx and edx.
582 *
583 * @param uOperator CPUID operation (eax).
584 * @param pvECX Where to store ecx.
585 * @param pvEDX Where to store edx.
586 * @remark We're using void pointers to ease the use of special bitfield structures and such.
587 */
588#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
589DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
590#else
591DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
592{
593 uint32_t uEBX;
594 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
595}
596#endif
597
598
599/**
600 * Performs the cpuid instruction returning edx.
601 *
602 * @param uOperator CPUID operation (eax).
603 * @returns EDX after cpuid operation.
604 */
605#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
606DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
607#else
608DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
609{
610 RTCCUINTREG xDX;
611# if RT_INLINE_ASM_GNU_STYLE
612# ifdef RT_ARCH_AMD64
613 RTCCUINTREG uSpill;
614 __asm__ ("cpuid"
615 : "=a" (uSpill),
616 "=d" (xDX)
617 : "0" (uOperator)
618 : "rbx", "rcx");
619# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
620 __asm__ ("push %%ebx\n\t"
621 "cpuid\n\t"
622 "pop %%ebx\n\t"
623 : "=a" (uOperator),
624 "=d" (xDX)
625 : "0" (uOperator)
626 : "ecx");
627# else
628 __asm__ ("cpuid"
629 : "=a" (uOperator),
630 "=d" (xDX)
631 : "0" (uOperator)
632 : "ebx", "ecx");
633# endif
634
635# elif RT_INLINE_ASM_USES_INTRIN
636 int aInfo[4];
637 __cpuid(aInfo, uOperator);
638 xDX = aInfo[3];
639
640# else
641 __asm
642 {
643 push ebx
644 mov eax, [uOperator]
645 cpuid
646 mov [xDX], edx
647 pop ebx
648 }
649# endif
650 return (uint32_t)xDX;
651}
652#endif
653
654
655/**
656 * Performs the cpuid instruction returning ecx.
657 *
658 * @param uOperator CPUID operation (eax).
659 * @returns ECX after cpuid operation.
660 */
661#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
662DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
663#else
664DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
665{
666 RTCCUINTREG xCX;
667# if RT_INLINE_ASM_GNU_STYLE
668# ifdef RT_ARCH_AMD64
669 RTCCUINTREG uSpill;
670 __asm__ ("cpuid"
671 : "=a" (uSpill),
672 "=c" (xCX)
673 : "0" (uOperator)
674 : "rbx", "rdx");
675# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
676 __asm__ ("push %%ebx\n\t"
677 "cpuid\n\t"
678 "pop %%ebx\n\t"
679 : "=a" (uOperator),
680 "=c" (xCX)
681 : "0" (uOperator)
682 : "edx");
683# else
684 __asm__ ("cpuid"
685 : "=a" (uOperator),
686 "=c" (xCX)
687 : "0" (uOperator)
688 : "ebx", "edx");
689
690# endif
691
692# elif RT_INLINE_ASM_USES_INTRIN
693 int aInfo[4];
694 __cpuid(aInfo, uOperator);
695 xCX = aInfo[2];
696
697# else
698 __asm
699 {
700 push ebx
701 mov eax, [uOperator]
702 cpuid
703 mov [xCX], ecx
704 pop ebx
705 }
706# endif
707 return (uint32_t)xCX;
708}
709#endif
710
711
712/**
713 * Checks if the current CPU supports CPUID.
714 *
715 * @returns true if CPUID is supported.
716 */
717DECLINLINE(bool) ASMHasCpuId(void)
718{
719#ifdef RT_ARCH_AMD64
720 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
721#else /* !RT_ARCH_AMD64 */
722 bool fRet = false;
723# if RT_INLINE_ASM_GNU_STYLE
724 uint32_t u1;
725 uint32_t u2;
726 __asm__ ("pushf\n\t"
727 "pop %1\n\t"
728 "mov %1, %2\n\t"
729 "xorl $0x200000, %1\n\t"
730 "push %1\n\t"
731 "popf\n\t"
732 "pushf\n\t"
733 "pop %1\n\t"
734 "cmpl %1, %2\n\t"
735 "setne %0\n\t"
736 "push %2\n\t"
737 "popf\n\t"
738 : "=m" (fRet), "=r" (u1), "=r" (u2));
739# else
740 __asm
741 {
742 pushfd
743 pop eax
744 mov ebx, eax
745 xor eax, 0200000h
746 push eax
747 popfd
748 pushfd
749 pop eax
750 cmp eax, ebx
751 setne fRet
752 push ebx
753 popfd
754 }
755# endif
756 return fRet;
757#endif /* !RT_ARCH_AMD64 */
758}
759
760
761/**
762 * Gets the APIC ID of the current CPU.
763 *
764 * @returns the APIC ID.
765 */
766#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
767DECLASM(uint8_t) ASMGetApicId(void);
768#else
769DECLINLINE(uint8_t) ASMGetApicId(void)
770{
771 RTCCUINTREG xBX;
772# if RT_INLINE_ASM_GNU_STYLE
773# ifdef RT_ARCH_AMD64
774 RTCCUINTREG uSpill;
775 __asm__ ("cpuid"
776 : "=a" (uSpill),
777 "=b" (xBX)
778 : "0" (1)
779 : "rcx", "rdx");
780# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
781 RTCCUINTREG uSpill;
782 __asm__ ("mov %%ebx,%1\n\t"
783 "cpuid\n\t"
784 "xchgl %%ebx,%1\n\t"
785 : "=a" (uSpill),
786 "=r" (xBX)
787 : "0" (1)
788 : "ecx", "edx");
789# else
790 RTCCUINTREG uSpill;
791 __asm__ ("cpuid"
792 : "=a" (uSpill),
793 "=b" (xBX)
794 : "0" (1)
795 : "ecx", "edx");
796# endif
797
798# elif RT_INLINE_ASM_USES_INTRIN
799 int aInfo[4];
800 __cpuid(aInfo, 1);
801 xBX = aInfo[1];
802
803# else
804 __asm
805 {
806 push ebx
807 mov eax, 1
808 cpuid
809 mov [xBX], ebx
810 pop ebx
811 }
812# endif
813 return (uint8_t)(xBX >> 24);
814}
815#endif
816
817/**
818 * Get cr0.
819 * @returns cr0.
820 */
821#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
822DECLASM(RTCCUINTREG) ASMGetCR0(void);
823#else
824DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
825{
826 RTCCUINTREG uCR0;
827# if RT_INLINE_ASM_USES_INTRIN
828 uCR0 = __readcr0();
829
830# elif RT_INLINE_ASM_GNU_STYLE
831# ifdef RT_ARCH_AMD64
832 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
833# else
834 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
835# endif
836# else
837 __asm
838 {
839# ifdef RT_ARCH_AMD64
840 mov rax, cr0
841 mov [uCR0], rax
842# else
843 mov eax, cr0
844 mov [uCR0], eax
845# endif
846 }
847# endif
848 return uCR0;
849}
850#endif
851
852
853/**
854 * Sets the CR0 register.
855 * @param uCR0 The new CR0 value.
856 */
857#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
858DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
859#else
860DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
861{
862# if RT_INLINE_ASM_USES_INTRIN
863 __writecr0(uCR0);
864
865# elif RT_INLINE_ASM_GNU_STYLE
866# ifdef RT_ARCH_AMD64
867 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
868# else
869 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
870# endif
871# else
872 __asm
873 {
874# ifdef RT_ARCH_AMD64
875 mov rax, [uCR0]
876 mov cr0, rax
877# else
878 mov eax, [uCR0]
879 mov cr0, eax
880# endif
881 }
882# endif
883}
884#endif
885
886
887/**
888 * Get cr2.
889 * @returns cr2.
890 */
891#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
892DECLASM(RTCCUINTREG) ASMGetCR2(void);
893#else
894DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
895{
896 RTCCUINTREG uCR2;
897# if RT_INLINE_ASM_USES_INTRIN
898 uCR2 = __readcr2();
899
900# elif RT_INLINE_ASM_GNU_STYLE
901# ifdef RT_ARCH_AMD64
902 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
903# else
904 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
905# endif
906# else
907 __asm
908 {
909# ifdef RT_ARCH_AMD64
910 mov rax, cr2
911 mov [uCR2], rax
912# else
913 mov eax, cr2
914 mov [uCR2], eax
915# endif
916 }
917# endif
918 return uCR2;
919}
920#endif
921
922
923/**
924 * Sets the CR2 register.
925 * @param uCR2 The new CR0 value.
926 */
927#if RT_INLINE_ASM_EXTERNAL
928DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
929#else
930DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
931{
932# if RT_INLINE_ASM_GNU_STYLE
933# ifdef RT_ARCH_AMD64
934 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
935# else
936 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
937# endif
938# else
939 __asm
940 {
941# ifdef RT_ARCH_AMD64
942 mov rax, [uCR2]
943 mov cr2, rax
944# else
945 mov eax, [uCR2]
946 mov cr2, eax
947# endif
948 }
949# endif
950}
951#endif
952
953
954/**
955 * Get cr3.
956 * @returns cr3.
957 */
958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
959DECLASM(RTCCUINTREG) ASMGetCR3(void);
960#else
961DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
962{
963 RTCCUINTREG uCR3;
964# if RT_INLINE_ASM_USES_INTRIN
965 uCR3 = __readcr3();
966
967# elif RT_INLINE_ASM_GNU_STYLE
968# ifdef RT_ARCH_AMD64
969 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
970# else
971 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
972# endif
973# else
974 __asm
975 {
976# ifdef RT_ARCH_AMD64
977 mov rax, cr3
978 mov [uCR3], rax
979# else
980 mov eax, cr3
981 mov [uCR3], eax
982# endif
983 }
984# endif
985 return uCR3;
986}
987#endif
988
989
990/**
991 * Sets the CR3 register.
992 *
993 * @param uCR3 New CR3 value.
994 */
995#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
996DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
997#else
998DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
999{
1000# if RT_INLINE_ASM_USES_INTRIN
1001 __writecr3(uCR3);
1002
1003# elif RT_INLINE_ASM_GNU_STYLE
1004# ifdef RT_ARCH_AMD64
1005 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1006# else
1007 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1008# endif
1009# else
1010 __asm
1011 {
1012# ifdef RT_ARCH_AMD64
1013 mov rax, [uCR3]
1014 mov cr3, rax
1015# else
1016 mov eax, [uCR3]
1017 mov cr3, eax
1018# endif
1019 }
1020# endif
1021}
1022#endif
1023
1024
1025/**
1026 * Reloads the CR3 register.
1027 */
1028#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1029DECLASM(void) ASMReloadCR3(void);
1030#else
1031DECLINLINE(void) ASMReloadCR3(void)
1032{
1033# if RT_INLINE_ASM_USES_INTRIN
1034 __writecr3(__readcr3());
1035
1036# elif RT_INLINE_ASM_GNU_STYLE
1037 RTCCUINTREG u;
1038# ifdef RT_ARCH_AMD64
1039 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1040 "movq %0, %%cr3\n\t"
1041 : "=r" (u));
1042# else
1043 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1044 "movl %0, %%cr3\n\t"
1045 : "=r" (u));
1046# endif
1047# else
1048 __asm
1049 {
1050# ifdef RT_ARCH_AMD64
1051 mov rax, cr3
1052 mov cr3, rax
1053# else
1054 mov eax, cr3
1055 mov cr3, eax
1056# endif
1057 }
1058# endif
1059}
1060#endif
1061
1062
1063/**
1064 * Get cr4.
1065 * @returns cr4.
1066 */
1067#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1068DECLASM(RTCCUINTREG) ASMGetCR4(void);
1069#else
1070DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1071{
1072 RTCCUINTREG uCR4;
1073# if RT_INLINE_ASM_USES_INTRIN
1074 uCR4 = __readcr4();
1075
1076# elif RT_INLINE_ASM_GNU_STYLE
1077# ifdef RT_ARCH_AMD64
1078 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1079# else
1080 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1081# endif
1082# else
1083 __asm
1084 {
1085# ifdef RT_ARCH_AMD64
1086 mov rax, cr4
1087 mov [uCR4], rax
1088# else
1089 push eax /* just in case */
1090 /*mov eax, cr4*/
1091 _emit 0x0f
1092 _emit 0x20
1093 _emit 0xe0
1094 mov [uCR4], eax
1095 pop eax
1096# endif
1097 }
1098# endif
1099 return uCR4;
1100}
1101#endif
1102
1103
1104/**
1105 * Sets the CR4 register.
1106 *
1107 * @param uCR4 New CR4 value.
1108 */
1109#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1110DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1111#else
1112DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1113{
1114# if RT_INLINE_ASM_USES_INTRIN
1115 __writecr4(uCR4);
1116
1117# elif RT_INLINE_ASM_GNU_STYLE
1118# ifdef RT_ARCH_AMD64
1119 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1120# else
1121 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1122# endif
1123# else
1124 __asm
1125 {
1126# ifdef RT_ARCH_AMD64
1127 mov rax, [uCR4]
1128 mov cr4, rax
1129# else
1130 mov eax, [uCR4]
1131 _emit 0x0F
1132 _emit 0x22
1133 _emit 0xE0 /* mov cr4, eax */
1134# endif
1135 }
1136# endif
1137}
1138#endif
1139
1140
1141/**
1142 * Get cr8.
1143 * @returns cr8.
1144 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1145 */
1146#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1147DECLASM(RTCCUINTREG) ASMGetCR8(void);
1148#else
1149DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1150{
1151# ifdef RT_ARCH_AMD64
1152 RTCCUINTREG uCR8;
1153# if RT_INLINE_ASM_USES_INTRIN
1154 uCR8 = __readcr8();
1155
1156# elif RT_INLINE_ASM_GNU_STYLE
1157 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1158# else
1159 __asm
1160 {
1161 mov rax, cr8
1162 mov [uCR8], rax
1163 }
1164# endif
1165 return uCR8;
1166# else /* !RT_ARCH_AMD64 */
1167 return 0;
1168# endif /* !RT_ARCH_AMD64 */
1169}
1170#endif
1171
1172
1173/**
1174 * Enables interrupts (EFLAGS.IF).
1175 */
1176#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1177DECLASM(void) ASMIntEnable(void);
1178#else
1179DECLINLINE(void) ASMIntEnable(void)
1180{
1181# if RT_INLINE_ASM_GNU_STYLE
1182 __asm("sti\n");
1183# elif RT_INLINE_ASM_USES_INTRIN
1184 _enable();
1185# else
1186 __asm sti
1187# endif
1188}
1189#endif
1190
1191
1192/**
1193 * Disables interrupts (!EFLAGS.IF).
1194 */
1195#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1196DECLASM(void) ASMIntDisable(void);
1197#else
1198DECLINLINE(void) ASMIntDisable(void)
1199{
1200# if RT_INLINE_ASM_GNU_STYLE
1201 __asm("cli\n");
1202# elif RT_INLINE_ASM_USES_INTRIN
1203 _disable();
1204# else
1205 __asm cli
1206# endif
1207}
1208#endif
1209
1210
1211/**
1212 * Disables interrupts and returns previous xFLAGS.
1213 */
1214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1215DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1216#else
1217DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1218{
1219 RTCCUINTREG xFlags;
1220# if RT_INLINE_ASM_GNU_STYLE
1221# ifdef RT_ARCH_AMD64
1222 __asm__ __volatile__("pushfq\n\t"
1223 "cli\n\t"
1224 "popq %0\n\t"
1225 : "=m" (xFlags));
1226# else
1227 __asm__ __volatile__("pushfl\n\t"
1228 "cli\n\t"
1229 "popl %0\n\t"
1230 : "=m" (xFlags));
1231# endif
1232# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1233 xFlags = ASMGetFlags();
1234 _disable();
1235# else
1236 __asm {
1237 pushfd
1238 cli
1239 pop [xFlags]
1240 }
1241# endif
1242 return xFlags;
1243}
1244#endif
1245
1246
1247/**
1248 * Reads a machine specific register.
1249 *
1250 * @returns Register content.
1251 * @param uRegister Register to read.
1252 */
1253#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1254DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1255#else
1256DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1257{
1258 RTUINT64U u;
1259# if RT_INLINE_ASM_GNU_STYLE
1260 __asm__ ("rdmsr\n\t"
1261 : "=a" (u.s.Lo),
1262 "=d" (u.s.Hi)
1263 : "c" (uRegister));
1264
1265# elif RT_INLINE_ASM_USES_INTRIN
1266 u.u = __readmsr(uRegister);
1267
1268# else
1269 __asm
1270 {
1271 mov ecx, [uRegister]
1272 rdmsr
1273 mov [u.s.Lo], eax
1274 mov [u.s.Hi], edx
1275 }
1276# endif
1277
1278 return u.u;
1279}
1280#endif
1281
1282
1283/**
1284 * Writes a machine specific register.
1285 *
1286 * @returns Register content.
1287 * @param uRegister Register to write to.
1288 * @param u64Val Value to write.
1289 */
1290#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1291DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1292#else
1293DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1294{
1295 RTUINT64U u;
1296
1297 u.u = u64Val;
1298# if RT_INLINE_ASM_GNU_STYLE
1299 __asm__ __volatile__("wrmsr\n\t"
1300 ::"a" (u.s.Lo),
1301 "d" (u.s.Hi),
1302 "c" (uRegister));
1303
1304# elif RT_INLINE_ASM_USES_INTRIN
1305 __writemsr(uRegister, u.u);
1306
1307# else
1308 __asm
1309 {
1310 mov ecx, [uRegister]
1311 mov edx, [u.s.Hi]
1312 mov eax, [u.s.Lo]
1313 wrmsr
1314 }
1315# endif
1316}
1317#endif
1318
1319
1320/**
1321 * Reads low part of a machine specific register.
1322 *
1323 * @returns Register content.
1324 * @param uRegister Register to read.
1325 */
1326#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1327DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1328#else
1329DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1330{
1331 uint32_t u32;
1332# if RT_INLINE_ASM_GNU_STYLE
1333 __asm__ ("rdmsr\n\t"
1334 : "=a" (u32)
1335 : "c" (uRegister)
1336 : "edx");
1337
1338# elif RT_INLINE_ASM_USES_INTRIN
1339 u32 = (uint32_t)__readmsr(uRegister);
1340
1341#else
1342 __asm
1343 {
1344 mov ecx, [uRegister]
1345 rdmsr
1346 mov [u32], eax
1347 }
1348# endif
1349
1350 return u32;
1351}
1352#endif
1353
1354
1355/**
1356 * Reads high part of a machine specific register.
1357 *
1358 * @returns Register content.
1359 * @param uRegister Register to read.
1360 */
1361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1362DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1363#else
1364DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1365{
1366 uint32_t u32;
1367# if RT_INLINE_ASM_GNU_STYLE
1368 __asm__ ("rdmsr\n\t"
1369 : "=d" (u32)
1370 : "c" (uRegister)
1371 : "eax");
1372
1373# elif RT_INLINE_ASM_USES_INTRIN
1374 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1375
1376# else
1377 __asm
1378 {
1379 mov ecx, [uRegister]
1380 rdmsr
1381 mov [u32], edx
1382 }
1383# endif
1384
1385 return u32;
1386}
1387#endif
1388
1389
1390/**
1391 * Gets dr7.
1392 *
1393 * @returns dr7.
1394 */
1395#if RT_INLINE_ASM_EXTERNAL
1396DECLASM(RTCCUINTREG) ASMGetDR7(void);
1397#else
1398DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1399{
1400 RTCCUINTREG uDR7;
1401# if RT_INLINE_ASM_GNU_STYLE
1402# ifdef RT_ARCH_AMD64
1403 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1404# else
1405 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1406# endif
1407# else
1408 __asm
1409 {
1410# ifdef RT_ARCH_AMD64
1411 mov rax, dr7
1412 mov [uDR7], rax
1413# else
1414 mov eax, dr7
1415 mov [uDR7], eax
1416# endif
1417 }
1418# endif
1419 return uDR7;
1420}
1421#endif
1422
1423
1424/**
1425 * Gets dr6.
1426 *
1427 * @returns dr6.
1428 */
1429#if RT_INLINE_ASM_EXTERNAL
1430DECLASM(RTCCUINTREG) ASMGetDR6(void);
1431#else
1432DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1433{
1434 RTCCUINTREG uDR6;
1435# if RT_INLINE_ASM_GNU_STYLE
1436# ifdef RT_ARCH_AMD64
1437 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1438# else
1439 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1440# endif
1441# else
1442 __asm
1443 {
1444# ifdef RT_ARCH_AMD64
1445 mov rax, dr6
1446 mov [uDR6], rax
1447# else
1448 mov eax, dr6
1449 mov [uDR6], eax
1450# endif
1451 }
1452# endif
1453 return uDR6;
1454}
1455#endif
1456
1457
1458/**
1459 * Reads and clears DR6.
1460 *
1461 * @returns DR6.
1462 */
1463#if RT_INLINE_ASM_EXTERNAL
1464DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1465#else
1466DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1467{
1468 RTCCUINTREG uDR6;
1469# if RT_INLINE_ASM_GNU_STYLE
1470 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1471# ifdef RT_ARCH_AMD64
1472 __asm__ ("movq %%dr6, %0\n\t"
1473 "movq %1, %%dr6\n\t"
1474 : "=r" (uDR6)
1475 : "r" (uNewValue));
1476# else
1477 __asm__ ("movl %%dr6, %0\n\t"
1478 "movl %1, %%dr6\n\t"
1479 : "=r" (uDR6)
1480 : "r" (uNewValue));
1481# endif
1482# else
1483 __asm
1484 {
1485# ifdef RT_ARCH_AMD64
1486 mov rax, dr6
1487 mov [uDR6], rax
1488 mov rcx, rax
1489 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1490 mov dr6, rcx
1491# else
1492 mov eax, dr6
1493 mov [uDR6], eax
1494 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1495 mov dr6, ecx
1496# endif
1497 }
1498# endif
1499 return uDR6;
1500}
1501#endif
1502
1503
1504/**
1505 * Compiler memory barrier.
1506 *
1507 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1508 * values or any outstanding writes when returning from this function.
1509 *
1510 * This function must be used if non-volatile data is modified by a
1511 * device or the VMM. Typical cases are port access, MMIO access,
1512 * trapping instruction, etc.
1513 */
1514#if RT_INLINE_ASM_GNU_STYLE
1515# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1516#elif RT_INLINE_ASM_USES_INTRIN
1517# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1518#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1519DECLINLINE(void) ASMCompilerBarrier(void)
1520{
1521 __asm
1522 {
1523 }
1524}
1525#endif
1526
1527
1528/**
1529 * Writes a 8-bit unsigned integer to an I/O port.
1530 *
1531 * @param Port I/O port to read from.
1532 * @param u8 8-bit integer to write.
1533 */
1534#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1535DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1536#else
1537DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1538{
1539# if RT_INLINE_ASM_GNU_STYLE
1540 __asm__ __volatile__("outb %b1, %w0\n\t"
1541 :: "Nd" (Port),
1542 "a" (u8));
1543
1544# elif RT_INLINE_ASM_USES_INTRIN
1545 __outbyte(Port, u8);
1546
1547# else
1548 __asm
1549 {
1550 mov dx, [Port]
1551 mov al, [u8]
1552 out dx, al
1553 }
1554# endif
1555}
1556#endif
1557
1558
1559/**
1560 * Gets a 8-bit unsigned integer from an I/O port.
1561 *
1562 * @returns 8-bit integer.
1563 * @param Port I/O port to read from.
1564 */
1565#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1566DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1567#else
1568DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1569{
1570 uint8_t u8;
1571# if RT_INLINE_ASM_GNU_STYLE
1572 __asm__ __volatile__("inb %w1, %b0\n\t"
1573 : "=a" (u8)
1574 : "Nd" (Port));
1575
1576# elif RT_INLINE_ASM_USES_INTRIN
1577 u8 = __inbyte(Port);
1578
1579# else
1580 __asm
1581 {
1582 mov dx, [Port]
1583 in al, dx
1584 mov [u8], al
1585 }
1586# endif
1587 return u8;
1588}
1589#endif
1590
1591
1592/**
1593 * Writes a 16-bit unsigned integer to an I/O port.
1594 *
1595 * @param Port I/O port to read from.
1596 * @param u16 16-bit integer to write.
1597 */
1598#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1599DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1600#else
1601DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1602{
1603# if RT_INLINE_ASM_GNU_STYLE
1604 __asm__ __volatile__("outw %w1, %w0\n\t"
1605 :: "Nd" (Port),
1606 "a" (u16));
1607
1608# elif RT_INLINE_ASM_USES_INTRIN
1609 __outword(Port, u16);
1610
1611# else
1612 __asm
1613 {
1614 mov dx, [Port]
1615 mov ax, [u16]
1616 out dx, ax
1617 }
1618# endif
1619}
1620#endif
1621
1622
1623/**
1624 * Gets a 16-bit unsigned integer from an I/O port.
1625 *
1626 * @returns 16-bit integer.
1627 * @param Port I/O port to read from.
1628 */
1629#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1630DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1631#else
1632DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1633{
1634 uint16_t u16;
1635# if RT_INLINE_ASM_GNU_STYLE
1636 __asm__ __volatile__("inw %w1, %w0\n\t"
1637 : "=a" (u16)
1638 : "Nd" (Port));
1639
1640# elif RT_INLINE_ASM_USES_INTRIN
1641 u16 = __inword(Port);
1642
1643# else
1644 __asm
1645 {
1646 mov dx, [Port]
1647 in ax, dx
1648 mov [u16], ax
1649 }
1650# endif
1651 return u16;
1652}
1653#endif
1654
1655
1656/**
1657 * Writes a 32-bit unsigned integer to an I/O port.
1658 *
1659 * @param Port I/O port to read from.
1660 * @param u32 32-bit integer to write.
1661 */
1662#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1663DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1664#else
1665DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1666{
1667# if RT_INLINE_ASM_GNU_STYLE
1668 __asm__ __volatile__("outl %1, %w0\n\t"
1669 :: "Nd" (Port),
1670 "a" (u32));
1671
1672# elif RT_INLINE_ASM_USES_INTRIN
1673 __outdword(Port, u32);
1674
1675# else
1676 __asm
1677 {
1678 mov dx, [Port]
1679 mov eax, [u32]
1680 out dx, eax
1681 }
1682# endif
1683}
1684#endif
1685
1686
1687/**
1688 * Gets a 32-bit unsigned integer from an I/O port.
1689 *
1690 * @returns 32-bit integer.
1691 * @param Port I/O port to read from.
1692 */
1693#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1694DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1695#else
1696DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1697{
1698 uint32_t u32;
1699# if RT_INLINE_ASM_GNU_STYLE
1700 __asm__ __volatile__("inl %w1, %0\n\t"
1701 : "=a" (u32)
1702 : "Nd" (Port));
1703
1704# elif RT_INLINE_ASM_USES_INTRIN
1705 u32 = __indword(Port);
1706
1707# else
1708 __asm
1709 {
1710 mov dx, [Port]
1711 in eax, dx
1712 mov [u32], eax
1713 }
1714# endif
1715 return u32;
1716}
1717#endif
1718
1719
1720/**
1721 * Atomically Exchange an unsigned 8-bit value.
1722 *
1723 * @returns Current *pu8 value
1724 * @param pu8 Pointer to the 8-bit variable to update.
1725 * @param u8 The 8-bit value to assign to *pu8.
1726 */
1727#if RT_INLINE_ASM_EXTERNAL
1728DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1729#else
1730DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1731{
1732# if RT_INLINE_ASM_GNU_STYLE
1733 __asm__ __volatile__("xchgb %0, %1\n\t"
1734 : "=m" (*pu8),
1735 "=r" (u8)
1736 : "1" (u8));
1737# else
1738 __asm
1739 {
1740# ifdef RT_ARCH_AMD64
1741 mov rdx, [pu8]
1742 mov al, [u8]
1743 xchg [rdx], al
1744 mov [u8], al
1745# else
1746 mov edx, [pu8]
1747 mov al, [u8]
1748 xchg [edx], al
1749 mov [u8], al
1750# endif
1751 }
1752# endif
1753 return u8;
1754}
1755#endif
1756
1757
1758/**
1759 * Atomically Exchange a signed 8-bit value.
1760 *
1761 * @returns Current *pu8 value
1762 * @param pi8 Pointer to the 8-bit variable to update.
1763 * @param i8 The 8-bit value to assign to *pi8.
1764 */
1765DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1766{
1767 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1768}
1769
1770
1771/**
1772 * Atomically Exchange a bool value.
1773 *
1774 * @returns Current *pf value
1775 * @param pf Pointer to the 8-bit variable to update.
1776 * @param f The 8-bit value to assign to *pi8.
1777 */
1778DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1779{
1780#ifdef _MSC_VER
1781 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1782#else
1783 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1784#endif
1785}
1786
1787
1788/**
1789 * Atomically Exchange an unsigned 16-bit value.
1790 *
1791 * @returns Current *pu16 value
1792 * @param pu16 Pointer to the 16-bit variable to update.
1793 * @param u16 The 16-bit value to assign to *pu16.
1794 */
1795#if RT_INLINE_ASM_EXTERNAL
1796DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1797#else
1798DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1799{
1800# if RT_INLINE_ASM_GNU_STYLE
1801 __asm__ __volatile__("xchgw %0, %1\n\t"
1802 : "=m" (*pu16),
1803 "=r" (u16)
1804 : "1" (u16));
1805# else
1806 __asm
1807 {
1808# ifdef RT_ARCH_AMD64
1809 mov rdx, [pu16]
1810 mov ax, [u16]
1811 xchg [rdx], ax
1812 mov [u16], ax
1813# else
1814 mov edx, [pu16]
1815 mov ax, [u16]
1816 xchg [edx], ax
1817 mov [u16], ax
1818# endif
1819 }
1820# endif
1821 return u16;
1822}
1823#endif
1824
1825
1826/**
1827 * Atomically Exchange a signed 16-bit value.
1828 *
1829 * @returns Current *pu16 value
1830 * @param pi16 Pointer to the 16-bit variable to update.
1831 * @param i16 The 16-bit value to assign to *pi16.
1832 */
1833DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1834{
1835 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1836}
1837
1838
1839/**
1840 * Atomically Exchange an unsigned 32-bit value.
1841 *
1842 * @returns Current *pu32 value
1843 * @param pu32 Pointer to the 32-bit variable to update.
1844 * @param u32 The 32-bit value to assign to *pu32.
1845 */
1846#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1847DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1848#else
1849DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1850{
1851# if RT_INLINE_ASM_GNU_STYLE
1852 __asm__ __volatile__("xchgl %0, %1\n\t"
1853 : "=m" (*pu32),
1854 "=r" (u32)
1855 : "1" (u32));
1856
1857# elif RT_INLINE_ASM_USES_INTRIN
1858 u32 = _InterlockedExchange((long *)pu32, u32);
1859
1860# else
1861 __asm
1862 {
1863# ifdef RT_ARCH_AMD64
1864 mov rdx, [pu32]
1865 mov eax, u32
1866 xchg [rdx], eax
1867 mov [u32], eax
1868# else
1869 mov edx, [pu32]
1870 mov eax, u32
1871 xchg [edx], eax
1872 mov [u32], eax
1873# endif
1874 }
1875# endif
1876 return u32;
1877}
1878#endif
1879
1880
1881/**
1882 * Atomically Exchange a signed 32-bit value.
1883 *
1884 * @returns Current *pu32 value
1885 * @param pi32 Pointer to the 32-bit variable to update.
1886 * @param i32 The 32-bit value to assign to *pi32.
1887 */
1888DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1889{
1890 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1891}
1892
1893
1894/**
1895 * Atomically Exchange an unsigned 64-bit value.
1896 *
1897 * @returns Current *pu64 value
1898 * @param pu64 Pointer to the 64-bit variable to update.
1899 * @param u64 The 64-bit value to assign to *pu64.
1900 */
1901#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1902DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1903#else
1904DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1905{
1906# if defined(RT_ARCH_AMD64)
1907# if RT_INLINE_ASM_USES_INTRIN
1908 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1909
1910# elif RT_INLINE_ASM_GNU_STYLE
1911 __asm__ __volatile__("xchgq %0, %1\n\t"
1912 : "=m" (*pu64),
1913 "=r" (u64)
1914 : "1" (u64));
1915# else
1916 __asm
1917 {
1918 mov rdx, [pu64]
1919 mov rax, [u64]
1920 xchg [rdx], rax
1921 mov [u64], rax
1922 }
1923# endif
1924# else /* !RT_ARCH_AMD64 */
1925# if RT_INLINE_ASM_GNU_STYLE
1926# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
1927 uint32_t u32 = (uint32_t)u64;
1928 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1929 "xchgl %%ebx, %3\n\t"
1930 "1:\n\t"
1931 "lock; cmpxchg8b (%5)\n\t"
1932 "jnz 1b\n\t"
1933 "xchgl %%ebx, %3\n\t"
1934 /*"xchgl %%esi, %5\n\t"*/
1935 : "=A" (u64),
1936 "=m" (*pu64)
1937 : "0" (*pu64),
1938 "m" ( u32 ),
1939 "c" ( (uint32_t)(u64 >> 32) ),
1940 "S" (pu64) );
1941# else /* !PIC */
1942 __asm__ __volatile__("1:\n\t"
1943 "lock; cmpxchg8b %1\n\t"
1944 "jnz 1b\n\t"
1945 : "=A" (u64),
1946 "=m" (*pu64)
1947 : "0" (*pu64),
1948 "b" ( (uint32_t)u64 ),
1949 "c" ( (uint32_t)(u64 >> 32) ));
1950# endif
1951# else
1952 __asm
1953 {
1954 mov ebx, dword ptr [u64]
1955 mov ecx, dword ptr [u64 + 4]
1956 mov edi, pu64
1957 mov eax, dword ptr [edi]
1958 mov edx, dword ptr [edi + 4]
1959 retry:
1960 lock cmpxchg8b [edi]
1961 jnz retry
1962 mov dword ptr [u64], eax
1963 mov dword ptr [u64 + 4], edx
1964 }
1965# endif
1966# endif /* !RT_ARCH_AMD64 */
1967 return u64;
1968}
1969#endif
1970
1971
1972/**
1973 * Atomically Exchange an signed 64-bit value.
1974 *
1975 * @returns Current *pi64 value
1976 * @param pi64 Pointer to the 64-bit variable to update.
1977 * @param i64 The 64-bit value to assign to *pi64.
1978 */
1979DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1980{
1981 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1982}
1983
1984
1985#ifdef RT_ARCH_AMD64
1986/**
1987 * Atomically Exchange an unsigned 128-bit value.
1988 *
1989 * @returns Current *pu128.
1990 * @param pu128 Pointer to the 128-bit variable to update.
1991 * @param u128 The 128-bit value to assign to *pu128.
1992 *
1993 * @remark We cannot really assume that any hardware supports this. Nor do I have
1994 * GAS support for it. So, for the time being we'll BREAK the atomic
1995 * bit of this function and use two 64-bit exchanges instead.
1996 */
1997# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
1998DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
1999# else
2000DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2001{
2002 if (true)/*ASMCpuId_ECX(1) & BIT(13))*/
2003 {
2004 /** @todo this is clumsy code */
2005 RTUINT128U u128Ret;
2006 u128Ret.u = u128;
2007 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2008 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2009 return u128Ret.u;
2010 }
2011#if 0 /* later? */
2012 else
2013 {
2014# if RT_INLINE_ASM_GNU_STYLE
2015 __asm__ __volatile__("1:\n\t"
2016 "lock; cmpxchg8b %1\n\t"
2017 "jnz 1b\n\t"
2018 : "=A" (u128),
2019 "=m" (*pu128)
2020 : "0" (*pu128),
2021 "b" ( (uint64_t)u128 ),
2022 "c" ( (uint64_t)(u128 >> 64) ));
2023# else
2024 __asm
2025 {
2026 mov rbx, dword ptr [u128]
2027 mov rcx, dword ptr [u128 + 4]
2028 mov rdi, pu128
2029 mov rax, dword ptr [rdi]
2030 mov rdx, dword ptr [rdi + 4]
2031 retry:
2032 lock cmpxchg16b [rdi]
2033 jnz retry
2034 mov dword ptr [u128], rax
2035 mov dword ptr [u128 + 4], rdx
2036 }
2037# endif
2038 }
2039 return u128;
2040#endif
2041}
2042# endif
2043#endif /* RT_ARCH_AMD64 */
2044
2045
2046/**
2047 * Atomically Reads a unsigned 64-bit value.
2048 *
2049 * @returns Current *pu64 value
2050 * @param pu64 Pointer to the 64-bit variable to read.
2051 * The memory pointed to must be writable.
2052 * @remark This will fault if the memory is read-only!
2053 */
2054#if RT_INLINE_ASM_EXTERNAL
2055DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2056#else
2057DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2058{
2059 uint64_t u64;
2060# ifdef RT_ARCH_AMD64
2061# if RT_INLINE_ASM_GNU_STYLE
2062 __asm__ __volatile__("movq %1, %0\n\t"
2063 : "=r" (u64)
2064 : "m" (*pu64));
2065# else
2066 __asm
2067 {
2068 mov rdx, [pu64]
2069 mov rax, [rdx]
2070 mov [u64], rax
2071 }
2072# endif
2073# else /* !RT_ARCH_AMD64 */
2074# if RT_INLINE_ASM_GNU_STYLE
2075# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2076 uint32_t u32EBX = 0;
2077 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2078 "lock; cmpxchg8b (%5)\n\t"
2079 "xchgl %%ebx, %3\n\t"
2080 : "=A" (u64),
2081 "=m" (*pu64)
2082 : "0" (0),
2083 "m" (u32EBX),
2084 "c" (0),
2085 "S" (pu64));
2086# else /* !PIC */
2087 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2088 : "=A" (u64),
2089 "=m" (*pu64)
2090 : "0" (0),
2091 "b" (0),
2092 "c" (0));
2093# endif
2094# else
2095 __asm
2096 {
2097 xor eax, eax
2098 xor edx, edx
2099 mov edi, pu64
2100 xor ecx, ecx
2101 xor ebx, ebx
2102 lock cmpxchg8b [edi]
2103 mov dword ptr [u64], eax
2104 mov dword ptr [u64 + 4], edx
2105 }
2106# endif
2107# endif /* !RT_ARCH_AMD64 */
2108 return u64;
2109}
2110#endif
2111
2112
2113/**
2114 * Atomically Reads a signed 64-bit value.
2115 *
2116 * @returns Current *pi64 value
2117 * @param pi64 Pointer to the 64-bit variable to read.
2118 * The memory pointed to must be writable.
2119 * @remark This will fault if the memory is read-only!
2120 */
2121DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2122{
2123 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2124}
2125
2126
2127/**
2128 * Atomically Exchange a value which size might differ
2129 * between platforms or compilers.
2130 *
2131 * @param pu Pointer to the variable to update.
2132 * @param uNew The value to assign to *pu.
2133 */
2134#define ASMAtomicXchgSize(pu, uNew) \
2135 do { \
2136 switch (sizeof(*(pu))) { \
2137 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2138 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2139 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2140 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2141 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2142 } \
2143 } while (0)
2144
2145
2146/**
2147 * Atomically Exchange a pointer value.
2148 *
2149 * @returns Current *ppv value
2150 * @param ppv Pointer to the pointer variable to update.
2151 * @param pv The pointer value to assign to *ppv.
2152 */
2153DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2154{
2155#if ARCH_BITS == 32
2156 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2157#elif ARCH_BITS == 64
2158 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2159#else
2160# error "ARCH_BITS is bogus"
2161#endif
2162}
2163
2164
2165/**
2166 * Atomically Compare and Exchange an unsigned 32-bit value.
2167 *
2168 * @returns true if xchg was done.
2169 * @returns false if xchg wasn't done.
2170 *
2171 * @param pu32 Pointer to the value to update.
2172 * @param u32New The new value to assigned to *pu32.
2173 * @param u32Old The old value to *pu32 compare with.
2174 */
2175#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2176DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2177#else
2178DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2179{
2180# if RT_INLINE_ASM_GNU_STYLE
2181 uint32_t u32Ret;
2182 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2183 "setz %%al\n\t"
2184 "movzbl %%al, %%eax\n\t"
2185 : "=m" (*pu32),
2186 "=a" (u32Ret)
2187 : "r" (u32New),
2188 "1" (u32Old));
2189 return (bool)u32Ret;
2190
2191# elif RT_INLINE_ASM_USES_INTRIN
2192 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2193
2194# else
2195 uint32_t u32Ret;
2196 __asm
2197 {
2198# ifdef RT_ARCH_AMD64
2199 mov rdx, [pu32]
2200# else
2201 mov edx, [pu32]
2202# endif
2203 mov eax, [u32Old]
2204 mov ecx, [u32New]
2205# ifdef RT_ARCH_AMD64
2206 lock cmpxchg [rdx], ecx
2207# else
2208 lock cmpxchg [edx], ecx
2209# endif
2210 setz al
2211 movzx eax, al
2212 mov [u32Ret], eax
2213 }
2214 return !!u32Ret;
2215# endif
2216}
2217#endif
2218
2219
2220/**
2221 * Atomically Compare and Exchange a signed 32-bit value.
2222 *
2223 * @returns true if xchg was done.
2224 * @returns false if xchg wasn't done.
2225 *
2226 * @param pi32 Pointer to the value to update.
2227 * @param i32New The new value to assigned to *pi32.
2228 * @param i32Old The old value to *pi32 compare with.
2229 */
2230DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2231{
2232 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2233}
2234
2235
2236/**
2237 * Atomically Compare and exchange an unsigned 64-bit value.
2238 *
2239 * @returns true if xchg was done.
2240 * @returns false if xchg wasn't done.
2241 *
2242 * @param pu64 Pointer to the 64-bit variable to update.
2243 * @param u64New The 64-bit value to assign to *pu64.
2244 * @param u64Old The value to compare with.
2245 */
2246#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2247DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2248#else
2249DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2250{
2251# if RT_INLINE_ASM_USES_INTRIN
2252 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2253
2254# elif defined(RT_ARCH_AMD64)
2255# if RT_INLINE_ASM_GNU_STYLE
2256 uint64_t u64Ret;
2257 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2258 "setz %%al\n\t"
2259 "movzbl %%al, %%eax\n\t"
2260 : "=m" (*pu64),
2261 "=a" (u64Ret)
2262 : "r" (u64New),
2263 "1" (u64Old));
2264 return (bool)u64Ret;
2265# else
2266 bool fRet;
2267 __asm
2268 {
2269 mov rdx, [pu32]
2270 mov rax, [u64Old]
2271 mov rcx, [u64New]
2272 lock cmpxchg [rdx], rcx
2273 setz al
2274 mov [fRet], al
2275 }
2276 return fRet;
2277# endif
2278# else /* !RT_ARCH_AMD64 */
2279 uint32_t u32Ret;
2280# if RT_INLINE_ASM_GNU_STYLE
2281# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2282 uint32_t u32 = (uint32_t)u64New;
2283 uint32_t u32Spill;
2284 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2285 "lock; cmpxchg8b (%6)\n\t"
2286 "setz %%al\n\t"
2287 "xchgl %%ebx, %4\n\t"
2288 "movzbl %%al, %%eax\n\t"
2289 : "=a" (u32Ret),
2290 "=d" (u32Spill),
2291 "=m" (*pu64)
2292 : "A" (u64Old),
2293 "m" ( u32 ),
2294 "c" ( (uint32_t)(u64New >> 32) ),
2295 "S" (pu64) );
2296# else /* !PIC */
2297 uint32_t u32Spill;
2298 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2299 "setz %%al\n\t"
2300 "movzbl %%al, %%eax\n\t"
2301 : "=a" (u32Ret),
2302 "=d" (u32Spill),
2303 "=m" (*pu64)
2304 : "A" (u64Old),
2305 "b" ( (uint32_t)u64New ),
2306 "c" ( (uint32_t)(u64New >> 32) ));
2307# endif
2308 return (bool)u32Ret;
2309# else
2310 __asm
2311 {
2312 mov ebx, dword ptr [u64New]
2313 mov ecx, dword ptr [u64New + 4]
2314 mov edi, [pu64]
2315 mov eax, dword ptr [u64Old]
2316 mov edx, dword ptr [u64Old + 4]
2317 lock cmpxchg8b [edi]
2318 setz al
2319 movzx eax, al
2320 mov dword ptr [u32Ret], eax
2321 }
2322 return !!u32Ret;
2323# endif
2324# endif /* !RT_ARCH_AMD64 */
2325}
2326#endif
2327
2328
2329/**
2330 * Atomically Compare and exchange a signed 64-bit value.
2331 *
2332 * @returns true if xchg was done.
2333 * @returns false if xchg wasn't done.
2334 *
2335 * @param pi64 Pointer to the 64-bit variable to update.
2336 * @param i64 The 64-bit value to assign to *pu64.
2337 * @param i64Old The value to compare with.
2338 */
2339DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2340{
2341 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2342}
2343
2344
2345
2346/** @def ASMAtomicCmpXchgSize
2347 * Atomically Compare and Exchange a value which size might differ
2348 * between platforms or compilers.
2349 *
2350 * @param pu Pointer to the value to update.
2351 * @param uNew The new value to assigned to *pu.
2352 * @param uOld The old value to *pu compare with.
2353 * @param fRc Where to store the result.
2354 */
2355#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2356 do { \
2357 switch (sizeof(*(pu))) { \
2358 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2359 break; \
2360 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2361 break; \
2362 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2363 (fRc) = false; \
2364 break; \
2365 } \
2366 } while (0)
2367
2368
2369/**
2370 * Atomically Compare and Exchange a pointer value.
2371 *
2372 * @returns true if xchg was done.
2373 * @returns false if xchg wasn't done.
2374 *
2375 * @param ppv Pointer to the value to update.
2376 * @param pvNew The new value to assigned to *ppv.
2377 * @param pvOld The old value to *ppv compare with.
2378 */
2379DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2380{
2381#if ARCH_BITS == 32
2382 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2383#elif ARCH_BITS == 64
2384 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2385#else
2386# error "ARCH_BITS is bogus"
2387#endif
2388}
2389
2390
2391/**
2392 * Atomically increment a 32-bit value.
2393 *
2394 * @returns The new value.
2395 * @param pu32 Pointer to the value to increment.
2396 */
2397#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2398DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2399#else
2400DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2401{
2402 uint32_t u32;
2403# if RT_INLINE_ASM_USES_INTRIN
2404 u32 = _InterlockedIncrement((long *)pu32);
2405
2406# elif RT_INLINE_ASM_GNU_STYLE
2407 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2408 "incl %0\n\t"
2409 : "=r" (u32),
2410 "=m" (*pu32)
2411 : "0" (1)
2412 : "memory");
2413# else
2414 __asm
2415 {
2416 mov eax, 1
2417# ifdef RT_ARCH_AMD64
2418 mov rdx, [pu32]
2419 lock xadd [rdx], eax
2420# else
2421 mov edx, [pu32]
2422 lock xadd [edx], eax
2423# endif
2424 inc eax
2425 mov u32, eax
2426 }
2427# endif
2428 return u32;
2429}
2430#endif
2431
2432
2433/**
2434 * Atomically increment a signed 32-bit value.
2435 *
2436 * @returns The new value.
2437 * @param pi32 Pointer to the value to increment.
2438 */
2439DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2440{
2441 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2442}
2443
2444
2445/**
2446 * Atomically decrement an unsigned 32-bit value.
2447 *
2448 * @returns The new value.
2449 * @param pu32 Pointer to the value to decrement.
2450 */
2451#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2452DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2453#else
2454DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2455{
2456 uint32_t u32;
2457# if RT_INLINE_ASM_USES_INTRIN
2458 u32 = _InterlockedDecrement((long *)pu32);
2459
2460# elif RT_INLINE_ASM_GNU_STYLE
2461 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2462 "decl %0\n\t"
2463 : "=r" (u32),
2464 "=m" (*pu32)
2465 : "0" (-1)
2466 : "memory");
2467# else
2468 __asm
2469 {
2470 mov eax, -1
2471# ifdef RT_ARCH_AMD64
2472 mov rdx, [pu32]
2473 lock xadd [rdx], eax
2474# else
2475 mov edx, [pu32]
2476 lock xadd [edx], eax
2477# endif
2478 dec eax
2479 mov u32, eax
2480 }
2481# endif
2482 return u32;
2483}
2484#endif
2485
2486
2487/**
2488 * Atomically decrement a signed 32-bit value.
2489 *
2490 * @returns The new value.
2491 * @param pi32 Pointer to the value to decrement.
2492 */
2493DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2494{
2495 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2496}
2497
2498
2499/**
2500 * Atomically Or an unsigned 32-bit value.
2501 *
2502 * @param pu32 Pointer to the pointer variable to OR u32 with.
2503 * @param u32 The value to OR *pu32 with.
2504 */
2505#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2506DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2507#else
2508DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2509{
2510# if RT_INLINE_ASM_USES_INTRIN
2511 _InterlockedOr((long volatile *)pu32, (long)u32);
2512
2513# elif RT_INLINE_ASM_GNU_STYLE
2514 __asm__ __volatile__("lock; orl %1, %0\n\t"
2515 : "=m" (*pu32)
2516 : "r" (u32));
2517# else
2518 __asm
2519 {
2520 mov eax, [u32]
2521# ifdef RT_ARCH_AMD64
2522 mov rdx, [pu32]
2523 lock or [rdx], eax
2524# else
2525 mov edx, [pu32]
2526 lock or [edx], eax
2527# endif
2528 }
2529# endif
2530}
2531#endif
2532
2533
2534/**
2535 * Atomically Or a signed 32-bit value.
2536 *
2537 * @param pi32 Pointer to the pointer variable to OR u32 with.
2538 * @param i32 The value to OR *pu32 with.
2539 */
2540DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2541{
2542 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2543}
2544
2545
2546/**
2547 * Atomically And an unsigned 32-bit value.
2548 *
2549 * @param pu32 Pointer to the pointer variable to AND u32 with.
2550 * @param u32 The value to AND *pu32 with.
2551 */
2552#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2553DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2554#else
2555DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2556{
2557# if RT_INLINE_ASM_USES_INTRIN
2558 _InterlockedAnd((long volatile *)pu32, u32);
2559
2560# elif RT_INLINE_ASM_GNU_STYLE
2561 __asm__ __volatile__("lock; andl %1, %0\n\t"
2562 : "=m" (*pu32)
2563 : "r" (u32));
2564# else
2565 __asm
2566 {
2567 mov eax, [u32]
2568# ifdef RT_ARCH_AMD64
2569 mov rdx, [pu32]
2570 lock and [rdx], eax
2571# else
2572 mov edx, [pu32]
2573 lock and [edx], eax
2574# endif
2575 }
2576# endif
2577}
2578#endif
2579
2580
2581/**
2582 * Atomically And a signed 32-bit value.
2583 *
2584 * @param pi32 Pointer to the pointer variable to AND i32 with.
2585 * @param i32 The value to AND *pi32 with.
2586 */
2587DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2588{
2589 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2590}
2591
2592
2593/**
2594 * Invalidate page.
2595 *
2596 * @param pv Address of the page to invalidate.
2597 */
2598#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2599DECLASM(void) ASMInvalidatePage(void *pv);
2600#else
2601DECLINLINE(void) ASMInvalidatePage(void *pv)
2602{
2603# if RT_INLINE_ASM_USES_INTRIN
2604 __invlpg(pv);
2605
2606# elif RT_INLINE_ASM_GNU_STYLE
2607 __asm__ __volatile__("invlpg %0\n\t"
2608 : : "m" (*(uint8_t *)pv));
2609# else
2610 __asm
2611 {
2612# ifdef RT_ARCH_AMD64
2613 mov rax, [pv]
2614 invlpg [rax]
2615# else
2616 mov eax, [pv]
2617 invlpg [eax]
2618# endif
2619 }
2620# endif
2621}
2622#endif
2623
2624
2625#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2626# if PAGE_SIZE != 0x1000
2627# error "PAGE_SIZE is not 0x1000!"
2628# endif
2629#endif
2630
2631/**
2632 * Zeros a 4K memory page.
2633 *
2634 * @param pv Pointer to the memory block. This must be page aligned.
2635 */
2636#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2637DECLASM(void) ASMMemZeroPage(volatile void *pv);
2638# else
2639DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2640{
2641# if RT_INLINE_ASM_USES_INTRIN
2642# ifdef RT_ARCH_AMD64
2643 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2644# else
2645 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2646# endif
2647
2648# elif RT_INLINE_ASM_GNU_STYLE
2649 RTUINTREG uDummy;
2650# ifdef RT_ARCH_AMD64
2651 __asm__ __volatile__ ("rep stosq"
2652 : "=D" (pv),
2653 "=c" (uDummy)
2654 : "0" (pv),
2655 "c" (0x1000 >> 3),
2656 "a" (0)
2657 : "memory");
2658# else
2659 __asm__ __volatile__ ("rep stosl"
2660 : "=D" (pv),
2661 "=c" (uDummy)
2662 : "0" (pv),
2663 "c" (0x1000 >> 2),
2664 "a" (0)
2665 : "memory");
2666# endif
2667# else
2668 __asm
2669 {
2670# ifdef RT_ARCH_AMD64
2671 xor rax, rax
2672 mov ecx, 0200h
2673 mov rdi, [pv]
2674 rep stosq
2675# else
2676 xor eax, eax
2677 mov ecx, 0400h
2678 mov edi, [pv]
2679 rep stosd
2680# endif
2681 }
2682# endif
2683}
2684# endif
2685
2686
2687/**
2688 * Zeros a memory block with a 32-bit aligned size.
2689 *
2690 * @param pv Pointer to the memory block.
2691 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2692 */
2693#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2694DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2695#else
2696DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2697{
2698# if RT_INLINE_ASM_USES_INTRIN
2699 __stosd((unsigned long *)pv, 0, cb >> 2);
2700
2701# elif RT_INLINE_ASM_GNU_STYLE
2702 __asm__ __volatile__ ("rep stosl"
2703 : "=D" (pv),
2704 "=c" (cb)
2705 : "0" (pv),
2706 "1" (cb >> 2),
2707 "a" (0)
2708 : "memory");
2709# else
2710 __asm
2711 {
2712 xor eax, eax
2713# ifdef RT_ARCH_AMD64
2714 mov rcx, [cb]
2715 shr rcx, 2
2716 mov rdi, [pv]
2717# else
2718 mov ecx, [cb]
2719 shr ecx, 2
2720 mov edi, [pv]
2721# endif
2722 rep stosd
2723 }
2724# endif
2725}
2726#endif
2727
2728
2729/**
2730 * Fills a memory block with a 32-bit aligned size.
2731 *
2732 * @param pv Pointer to the memory block.
2733 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2734 * @param u32 The value to fill with.
2735 */
2736#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2737DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2738#else
2739DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2740{
2741# if RT_INLINE_ASM_USES_INTRIN
2742 __stosd((unsigned long *)pv, 0, cb >> 2);
2743
2744# elif RT_INLINE_ASM_GNU_STYLE
2745 __asm__ __volatile__ ("rep stosl"
2746 : "=D" (pv),
2747 "=c" (cb)
2748 : "0" (pv),
2749 "1" (cb >> 2),
2750 "a" (u32)
2751 : "memory");
2752# else
2753 __asm
2754 {
2755# ifdef RT_ARCH_AMD64
2756 mov rcx, [cb]
2757 shr rcx, 2
2758 mov rdi, [pv]
2759# else
2760 mov ecx, [cb]
2761 shr ecx, 2
2762 mov edi, [pv]
2763# endif
2764 mov eax, [u32]
2765 rep stosd
2766 }
2767# endif
2768}
2769#endif
2770
2771
2772
2773/**
2774 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2775 *
2776 * @returns u32F1 * u32F2.
2777 */
2778#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2779DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2780#else
2781DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2782{
2783# ifdef RT_ARCH_AMD64
2784 return (uint64_t)u32F1 * u32F2;
2785# else /* !RT_ARCH_AMD64 */
2786 uint64_t u64;
2787# if RT_INLINE_ASM_GNU_STYLE
2788 __asm__ __volatile__("mull %%edx"
2789 : "=A" (u64)
2790 : "a" (u32F2), "d" (u32F1));
2791# else
2792 __asm
2793 {
2794 mov edx, [u32F1]
2795 mov eax, [u32F2]
2796 mul edx
2797 mov dword ptr [u64], eax
2798 mov dword ptr [u64 + 4], edx
2799 }
2800# endif
2801 return u64;
2802# endif /* !RT_ARCH_AMD64 */
2803}
2804#endif
2805
2806
2807/**
2808 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2809 *
2810 * @returns u32F1 * u32F2.
2811 */
2812#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2813DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2814#else
2815DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2816{
2817# ifdef RT_ARCH_AMD64
2818 return (int64_t)i32F1 * i32F2;
2819# else /* !RT_ARCH_AMD64 */
2820 int64_t i64;
2821# if RT_INLINE_ASM_GNU_STYLE
2822 __asm__ __volatile__("imull %%edx"
2823 : "=A" (i64)
2824 : "a" (i32F2), "d" (i32F1));
2825# else
2826 __asm
2827 {
2828 mov edx, [i32F1]
2829 mov eax, [i32F2]
2830 imul edx
2831 mov dword ptr [i64], eax
2832 mov dword ptr [i64 + 4], edx
2833 }
2834# endif
2835 return i64;
2836# endif /* !RT_ARCH_AMD64 */
2837}
2838#endif
2839
2840
2841/**
2842 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2843 *
2844 * @returns u64 / u32.
2845 */
2846#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2847DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2848#else
2849DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2850{
2851# ifdef RT_ARCH_AMD64
2852 return (uint32_t)(u64 / u32);
2853# else /* !RT_ARCH_AMD64 */
2854# if RT_INLINE_ASM_GNU_STYLE
2855 RTUINTREG uDummy;
2856 __asm__ __volatile__("divl %3"
2857 : "=a" (u32), "=d"(uDummy)
2858 : "A" (u64), "r" (u32));
2859# else
2860 __asm
2861 {
2862 mov eax, dword ptr [u64]
2863 mov edx, dword ptr [u64 + 4]
2864 mov ecx, [u32]
2865 div ecx
2866 mov [u32], eax
2867 }
2868# endif
2869 return u32;
2870# endif /* !RT_ARCH_AMD64 */
2871}
2872#endif
2873
2874
2875/**
2876 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2877 *
2878 * @returns u64 / u32.
2879 */
2880#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2881DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2882#else
2883DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2884{
2885# ifdef RT_ARCH_AMD64
2886 return (int32_t)(i64 / i32);
2887# else /* !RT_ARCH_AMD64 */
2888# if RT_INLINE_ASM_GNU_STYLE
2889 RTUINTREG iDummy;
2890 __asm__ __volatile__("idivl %3"
2891 : "=a" (i32), "=d"(iDummy)
2892 : "A" (i64), "r" (i32));
2893# else
2894 __asm
2895 {
2896 mov eax, dword ptr [i64]
2897 mov edx, dword ptr [i64 + 4]
2898 mov ecx, [i32]
2899 idiv ecx
2900 mov [i32], eax
2901 }
2902# endif
2903 return i32;
2904# endif /* !RT_ARCH_AMD64 */
2905}
2906#endif
2907
2908
2909/**
2910 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
2911 * using a 96 bit intermediate result.
2912 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
2913 * __udivdi3 and __umoddi3 even if this inline function is not used.
2914 *
2915 * @returns (u64A * u32B) / u32C.
2916 * @param u64A The 64-bit value.
2917 * @param u32B The 32-bit value to multiple by A.
2918 * @param u32C The 32-bit value to divide A*B by.
2919 */
2920#if RT_INLINE_ASM_EXTERNAL
2921DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
2922#else
2923DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
2924{
2925# if RT_INLINE_ASM_GNU_STYLE
2926# ifdef RT_ARCH_AMD64
2927 uint64_t u64Result, u64Spill;
2928 __asm__ __volatile__("mulq %2\n\t"
2929 "divq %3\n\t"
2930 : "=a" (u64Result),
2931 "=d" (u64Spill)
2932 : "r" ((uint64_t)u32B),
2933 "r" ((uint64_t)u32C),
2934 "0" (u64A),
2935 "1" (0));
2936 return u64Result;
2937# else
2938 uint32_t u32Dummy;
2939 uint64_t u64Result;
2940 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
2941 edx = u64Lo.hi = (u64A.lo * u32B).hi */
2942 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
2943 eax = u64A.hi */
2944 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
2945 edx = u32C */
2946 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
2947 edx = u32B */
2948 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
2949 edx = u64Hi.hi = (u64A.hi * u32B).hi */
2950 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
2951 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
2952 "divl %%ecx \n\t" /* eax = u64Hi / u32C
2953 edx = u64Hi % u32C */
2954 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
2955 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
2956 "divl %%ecx \n\t" /* u64Result.lo */
2957 "movl %%edi,%%edx \n\t" /* u64Result.hi */
2958 : "=A"(u64Result), "=c"(u32Dummy),
2959 "=S"(u32Dummy), "=D"(u32Dummy)
2960 : "a"((uint32_t)u64A),
2961 "S"((uint32_t)(u64A >> 32)),
2962 "c"(u32B),
2963 "D"(u32C));
2964 return u64Result;
2965# endif
2966# else
2967 RTUINT64U u;
2968 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
2969 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
2970 u64Hi += (u64Lo >> 32);
2971 u.s.Hi = (uint32_t)(u64Hi / u32C);
2972 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
2973 return u.u;
2974# endif
2975}
2976#endif
2977
2978
2979/**
2980 * Probes a byte pointer for read access.
2981 *
2982 * While the function will not fault if the byte is not read accessible,
2983 * the idea is to do this in a safe place like before acquiring locks
2984 * and such like.
2985 *
2986 * Also, this functions guarantees that an eager compiler is not going
2987 * to optimize the probing away.
2988 *
2989 * @param pvByte Pointer to the byte.
2990 */
2991#if RT_INLINE_ASM_EXTERNAL
2992DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2993#else
2994DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2995{
2996 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2997 uint8_t u8;
2998# if RT_INLINE_ASM_GNU_STYLE
2999 __asm__ __volatile__("movb (%1), %0\n\t"
3000 : "=r" (u8)
3001 : "r" (pvByte));
3002# else
3003 __asm
3004 {
3005# ifdef RT_ARCH_AMD64
3006 mov rax, [pvByte]
3007 mov al, [rax]
3008# else
3009 mov eax, [pvByte]
3010 mov al, [eax]
3011# endif
3012 mov [u8], al
3013 }
3014# endif
3015 return u8;
3016}
3017#endif
3018
3019/**
3020 * Probes a buffer for read access page by page.
3021 *
3022 * While the function will fault if the buffer is not fully read
3023 * accessible, the idea is to do this in a safe place like before
3024 * acquiring locks and such like.
3025 *
3026 * Also, this functions guarantees that an eager compiler is not going
3027 * to optimize the probing away.
3028 *
3029 * @param pvBuf Pointer to the buffer.
3030 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3031 */
3032DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3033{
3034 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3035 /* the first byte */
3036 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3037 ASMProbeReadByte(pu8);
3038
3039 /* the pages in between pages. */
3040 while (cbBuf > /*PAGE_SIZE*/0x1000)
3041 {
3042 ASMProbeReadByte(pu8);
3043 cbBuf -= /*PAGE_SIZE*/0x1000;
3044 pu8 += /*PAGE_SIZE*/0x1000;
3045 }
3046
3047 /* the last byte */
3048 ASMProbeReadByte(pu8 + cbBuf - 1);
3049}
3050
3051
3052/** @def ASMBreakpoint
3053 * Debugger Breakpoint.
3054 * @remark In the gnu world we add a nop instruction after the int3 to
3055 * force gdb to remain at the int3 source line.
3056 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3057 * @internal
3058 */
3059#if RT_INLINE_ASM_GNU_STYLE
3060# ifndef __L4ENV__
3061# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3062# else
3063# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3064# endif
3065#else
3066# define ASMBreakpoint() __debugbreak()
3067#endif
3068
3069
3070
3071/** @defgroup grp_inline_bits Bit Operations
3072 * @{
3073 */
3074
3075
3076/**
3077 * Sets a bit in a bitmap.
3078 *
3079 * @param pvBitmap Pointer to the bitmap.
3080 * @param iBit The bit to set.
3081 */
3082#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3083DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3084#else
3085DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3086{
3087# if RT_INLINE_ASM_USES_INTRIN
3088 _bittestandset((long *)pvBitmap, iBit);
3089
3090# elif RT_INLINE_ASM_GNU_STYLE
3091 __asm__ __volatile__ ("btsl %1, %0"
3092 : "=m" (*(volatile long *)pvBitmap)
3093 : "Ir" (iBit)
3094 : "memory");
3095# else
3096 __asm
3097 {
3098# ifdef RT_ARCH_AMD64
3099 mov rax, [pvBitmap]
3100 mov edx, [iBit]
3101 bts [rax], edx
3102# else
3103 mov eax, [pvBitmap]
3104 mov edx, [iBit]
3105 bts [eax], edx
3106# endif
3107 }
3108# endif
3109}
3110#endif
3111
3112
3113/**
3114 * Atomically sets a bit in a bitmap.
3115 *
3116 * @param pvBitmap Pointer to the bitmap.
3117 * @param iBit The bit to set.
3118 */
3119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3120DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3121#else
3122DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3123{
3124# if RT_INLINE_ASM_USES_INTRIN
3125 _interlockedbittestandset((long *)pvBitmap, iBit);
3126# elif RT_INLINE_ASM_GNU_STYLE
3127 __asm__ __volatile__ ("lock; btsl %1, %0"
3128 : "=m" (*(volatile long *)pvBitmap)
3129 : "Ir" (iBit)
3130 : "memory");
3131# else
3132 __asm
3133 {
3134# ifdef RT_ARCH_AMD64
3135 mov rax, [pvBitmap]
3136 mov edx, [iBit]
3137 lock bts [rax], edx
3138# else
3139 mov eax, [pvBitmap]
3140 mov edx, [iBit]
3141 lock bts [eax], edx
3142# endif
3143 }
3144# endif
3145}
3146#endif
3147
3148
3149/**
3150 * Clears a bit in a bitmap.
3151 *
3152 * @param pvBitmap Pointer to the bitmap.
3153 * @param iBit The bit to clear.
3154 */
3155#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3156DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3157#else
3158DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3159{
3160# if RT_INLINE_ASM_USES_INTRIN
3161 _bittestandreset((long *)pvBitmap, iBit);
3162
3163# elif RT_INLINE_ASM_GNU_STYLE
3164 __asm__ __volatile__ ("btrl %1, %0"
3165 : "=m" (*(volatile long *)pvBitmap)
3166 : "Ir" (iBit)
3167 : "memory");
3168# else
3169 __asm
3170 {
3171# ifdef RT_ARCH_AMD64
3172 mov rax, [pvBitmap]
3173 mov edx, [iBit]
3174 btr [rax], edx
3175# else
3176 mov eax, [pvBitmap]
3177 mov edx, [iBit]
3178 btr [eax], edx
3179# endif
3180 }
3181# endif
3182}
3183#endif
3184
3185
3186/**
3187 * Atomically clears a bit in a bitmap.
3188 *
3189 * @param pvBitmap Pointer to the bitmap.
3190 * @param iBit The bit to toggle set.
3191 * @remark No memory barrier, take care on smp.
3192 */
3193#if RT_INLINE_ASM_EXTERNAL
3194DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3195#else
3196DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3197{
3198# if RT_INLINE_ASM_GNU_STYLE
3199 __asm__ __volatile__ ("lock; btrl %1, %0"
3200 : "=m" (*(volatile long *)pvBitmap)
3201 : "Ir" (iBit)
3202 : "memory");
3203# else
3204 __asm
3205 {
3206# ifdef RT_ARCH_AMD64
3207 mov rax, [pvBitmap]
3208 mov edx, [iBit]
3209 lock btr [rax], edx
3210# else
3211 mov eax, [pvBitmap]
3212 mov edx, [iBit]
3213 lock btr [eax], edx
3214# endif
3215 }
3216# endif
3217}
3218#endif
3219
3220
3221/**
3222 * Toggles a bit in a bitmap.
3223 *
3224 * @param pvBitmap Pointer to the bitmap.
3225 * @param iBit The bit to toggle.
3226 */
3227#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3228DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3229#else
3230DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3231{
3232# if RT_INLINE_ASM_USES_INTRIN
3233 _bittestandcomplement((long *)pvBitmap, iBit);
3234# elif RT_INLINE_ASM_GNU_STYLE
3235 __asm__ __volatile__ ("btcl %1, %0"
3236 : "=m" (*(volatile long *)pvBitmap)
3237 : "Ir" (iBit)
3238 : "memory");
3239# else
3240 __asm
3241 {
3242# ifdef RT_ARCH_AMD64
3243 mov rax, [pvBitmap]
3244 mov edx, [iBit]
3245 btc [rax], edx
3246# else
3247 mov eax, [pvBitmap]
3248 mov edx, [iBit]
3249 btc [eax], edx
3250# endif
3251 }
3252# endif
3253}
3254#endif
3255
3256
3257/**
3258 * Atomically toggles a bit in a bitmap.
3259 *
3260 * @param pvBitmap Pointer to the bitmap.
3261 * @param iBit The bit to test and set.
3262 */
3263#if RT_INLINE_ASM_EXTERNAL
3264DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3265#else
3266DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3267{
3268# if RT_INLINE_ASM_GNU_STYLE
3269 __asm__ __volatile__ ("lock; btcl %1, %0"
3270 : "=m" (*(volatile long *)pvBitmap)
3271 : "Ir" (iBit)
3272 : "memory");
3273# else
3274 __asm
3275 {
3276# ifdef RT_ARCH_AMD64
3277 mov rax, [pvBitmap]
3278 mov edx, [iBit]
3279 lock btc [rax], edx
3280# else
3281 mov eax, [pvBitmap]
3282 mov edx, [iBit]
3283 lock btc [eax], edx
3284# endif
3285 }
3286# endif
3287}
3288#endif
3289
3290
3291/**
3292 * Tests and sets a bit in a bitmap.
3293 *
3294 * @returns true if the bit was set.
3295 * @returns false if the bit was clear.
3296 * @param pvBitmap Pointer to the bitmap.
3297 * @param iBit The bit to test and set.
3298 */
3299#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3300DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3301#else
3302DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3303{
3304 union { bool f; uint32_t u32; uint8_t u8; } rc;
3305# if RT_INLINE_ASM_USES_INTRIN
3306 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3307
3308# elif RT_INLINE_ASM_GNU_STYLE
3309 __asm__ __volatile__ ("btsl %2, %1\n\t"
3310 "setc %b0\n\t"
3311 "andl $1, %0\n\t"
3312 : "=q" (rc.u32),
3313 "=m" (*(volatile long *)pvBitmap)
3314 : "Ir" (iBit)
3315 : "memory");
3316# else
3317 __asm
3318 {
3319 mov edx, [iBit]
3320# ifdef RT_ARCH_AMD64
3321 mov rax, [pvBitmap]
3322 bts [rax], edx
3323# else
3324 mov eax, [pvBitmap]
3325 bts [eax], edx
3326# endif
3327 setc al
3328 and eax, 1
3329 mov [rc.u32], eax
3330 }
3331# endif
3332 return rc.f;
3333}
3334#endif
3335
3336
3337/**
3338 * Atomically tests and sets a bit in a bitmap.
3339 *
3340 * @returns true if the bit was set.
3341 * @returns false if the bit was clear.
3342 * @param pvBitmap Pointer to the bitmap.
3343 * @param iBit The bit to set.
3344 */
3345#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3346DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3347#else
3348DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3349{
3350 union { bool f; uint32_t u32; uint8_t u8; } rc;
3351# if RT_INLINE_ASM_USES_INTRIN
3352 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3353# elif RT_INLINE_ASM_GNU_STYLE
3354 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3355 "setc %b0\n\t"
3356 "andl $1, %0\n\t"
3357 : "=q" (rc.u32),
3358 "=m" (*(volatile long *)pvBitmap)
3359 : "Ir" (iBit)
3360 : "memory");
3361# else
3362 __asm
3363 {
3364 mov edx, [iBit]
3365# ifdef RT_ARCH_AMD64
3366 mov rax, [pvBitmap]
3367 lock bts [rax], edx
3368# else
3369 mov eax, [pvBitmap]
3370 lock bts [eax], edx
3371# endif
3372 setc al
3373 and eax, 1
3374 mov [rc.u32], eax
3375 }
3376# endif
3377 return rc.f;
3378}
3379#endif
3380
3381
3382/**
3383 * Tests and clears a bit in a bitmap.
3384 *
3385 * @returns true if the bit was set.
3386 * @returns false if the bit was clear.
3387 * @param pvBitmap Pointer to the bitmap.
3388 * @param iBit The bit to test and clear.
3389 */
3390#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3391DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3392#else
3393DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3394{
3395 union { bool f; uint32_t u32; uint8_t u8; } rc;
3396# if RT_INLINE_ASM_USES_INTRIN
3397 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3398
3399# elif RT_INLINE_ASM_GNU_STYLE
3400 __asm__ __volatile__ ("btrl %2, %1\n\t"
3401 "setc %b0\n\t"
3402 "andl $1, %0\n\t"
3403 : "=q" (rc.u32),
3404 "=m" (*(volatile long *)pvBitmap)
3405 : "Ir" (iBit)
3406 : "memory");
3407# else
3408 __asm
3409 {
3410 mov edx, [iBit]
3411# ifdef RT_ARCH_AMD64
3412 mov rax, [pvBitmap]
3413 btr [rax], edx
3414# else
3415 mov eax, [pvBitmap]
3416 btr [eax], edx
3417# endif
3418 setc al
3419 and eax, 1
3420 mov [rc.u32], eax
3421 }
3422# endif
3423 return rc.f;
3424}
3425#endif
3426
3427
3428/**
3429 * Atomically tests and clears a bit in a bitmap.
3430 *
3431 * @returns true if the bit was set.
3432 * @returns false if the bit was clear.
3433 * @param pvBitmap Pointer to the bitmap.
3434 * @param iBit The bit to test and clear.
3435 * @remark No memory barrier, take care on smp.
3436 */
3437#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3438DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3439#else
3440DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3441{
3442 union { bool f; uint32_t u32; uint8_t u8; } rc;
3443# if RT_INLINE_ASM_USES_INTRIN
3444 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3445
3446# elif RT_INLINE_ASM_GNU_STYLE
3447 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3448 "setc %b0\n\t"
3449 "andl $1, %0\n\t"
3450 : "=q" (rc.u32),
3451 "=m" (*(volatile long *)pvBitmap)
3452 : "Ir" (iBit)
3453 : "memory");
3454# else
3455 __asm
3456 {
3457 mov edx, [iBit]
3458# ifdef RT_ARCH_AMD64
3459 mov rax, [pvBitmap]
3460 lock btr [rax], edx
3461# else
3462 mov eax, [pvBitmap]
3463 lock btr [eax], edx
3464# endif
3465 setc al
3466 and eax, 1
3467 mov [rc.u32], eax
3468 }
3469# endif
3470 return rc.f;
3471}
3472#endif
3473
3474
3475/**
3476 * Tests and toggles a bit in a bitmap.
3477 *
3478 * @returns true if the bit was set.
3479 * @returns false if the bit was clear.
3480 * @param pvBitmap Pointer to the bitmap.
3481 * @param iBit The bit to test and toggle.
3482 */
3483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3484DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3485#else
3486DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3487{
3488 union { bool f; uint32_t u32; uint8_t u8; } rc;
3489# if RT_INLINE_ASM_USES_INTRIN
3490 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3491
3492# elif RT_INLINE_ASM_GNU_STYLE
3493 __asm__ __volatile__ ("btcl %2, %1\n\t"
3494 "setc %b0\n\t"
3495 "andl $1, %0\n\t"
3496 : "=q" (rc.u32),
3497 "=m" (*(volatile long *)pvBitmap)
3498 : "Ir" (iBit)
3499 : "memory");
3500# else
3501 __asm
3502 {
3503 mov edx, [iBit]
3504# ifdef RT_ARCH_AMD64
3505 mov rax, [pvBitmap]
3506 btc [rax], edx
3507# else
3508 mov eax, [pvBitmap]
3509 btc [eax], edx
3510# endif
3511 setc al
3512 and eax, 1
3513 mov [rc.u32], eax
3514 }
3515# endif
3516 return rc.f;
3517}
3518#endif
3519
3520
3521/**
3522 * Atomically tests and toggles a bit in a bitmap.
3523 *
3524 * @returns true if the bit was set.
3525 * @returns false if the bit was clear.
3526 * @param pvBitmap Pointer to the bitmap.
3527 * @param iBit The bit to test and toggle.
3528 */
3529#if RT_INLINE_ASM_EXTERNAL
3530DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3531#else
3532DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3533{
3534 union { bool f; uint32_t u32; uint8_t u8; } rc;
3535# if RT_INLINE_ASM_GNU_STYLE
3536 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3537 "setc %b0\n\t"
3538 "andl $1, %0\n\t"
3539 : "=q" (rc.u32),
3540 "=m" (*(volatile long *)pvBitmap)
3541 : "Ir" (iBit)
3542 : "memory");
3543# else
3544 __asm
3545 {
3546 mov edx, [iBit]
3547# ifdef RT_ARCH_AMD64
3548 mov rax, [pvBitmap]
3549 lock btc [rax], edx
3550# else
3551 mov eax, [pvBitmap]
3552 lock btc [eax], edx
3553# endif
3554 setc al
3555 and eax, 1
3556 mov [rc.u32], eax
3557 }
3558# endif
3559 return rc.f;
3560}
3561#endif
3562
3563
3564/**
3565 * Tests if a bit in a bitmap is set.
3566 *
3567 * @returns true if the bit is set.
3568 * @returns false if the bit is clear.
3569 * @param pvBitmap Pointer to the bitmap.
3570 * @param iBit The bit to test.
3571 */
3572#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3573DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3574#else
3575DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3576{
3577 union { bool f; uint32_t u32; uint8_t u8; } rc;
3578# if RT_INLINE_ASM_USES_INTRIN
3579 rc.u32 = _bittest((long *)pvBitmap, iBit);
3580# elif RT_INLINE_ASM_GNU_STYLE
3581
3582 __asm__ __volatile__ ("btl %2, %1\n\t"
3583 "setc %b0\n\t"
3584 "andl $1, %0\n\t"
3585 : "=q" (rc.u32),
3586 "=m" (*(volatile long *)pvBitmap)
3587 : "Ir" (iBit)
3588 : "memory");
3589# else
3590 __asm
3591 {
3592 mov edx, [iBit]
3593# ifdef RT_ARCH_AMD64
3594 mov rax, [pvBitmap]
3595 bt [rax], edx
3596# else
3597 mov eax, [pvBitmap]
3598 bt [eax], edx
3599# endif
3600 setc al
3601 and eax, 1
3602 mov [rc.u32], eax
3603 }
3604# endif
3605 return rc.f;
3606}
3607#endif
3608
3609
3610/**
3611 * Clears a bit range within a bitmap.
3612 *
3613 * @param pvBitmap Pointer to the bitmap.
3614 * @param iBitStart The First bit to clear.
3615 * @param iBitEnd The first bit not to clear.
3616 */
3617DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3618{
3619 if (iBitStart < iBitEnd)
3620 {
3621 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3622 int iStart = iBitStart & ~31;
3623 int iEnd = iBitEnd & ~31;
3624 if (iStart == iEnd)
3625 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3626 else
3627 {
3628 /* bits in first dword. */
3629 if (iBitStart & 31)
3630 {
3631 *pu32 &= (1 << (iBitStart & 31)) - 1;
3632 pu32++;
3633 iBitStart = iStart + 32;
3634 }
3635
3636 /* whole dword. */
3637 if (iBitStart != iEnd)
3638 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3639
3640 /* bits in last dword. */
3641 if (iBitEnd & 31)
3642 {
3643 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3644 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3645 }
3646 }
3647 }
3648}
3649
3650
3651/**
3652 * Finds the first clear bit in a bitmap.
3653 *
3654 * @returns Index of the first zero bit.
3655 * @returns -1 if no clear bit was found.
3656 * @param pvBitmap Pointer to the bitmap.
3657 * @param cBits The number of bits in the bitmap. Multiple of 32.
3658 */
3659#if RT_INLINE_ASM_EXTERNAL
3660DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3661#else
3662DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3663{
3664 if (cBits)
3665 {
3666 int32_t iBit;
3667# if RT_INLINE_ASM_GNU_STYLE
3668 RTCCUINTREG uEAX, uECX, uEDI;
3669 cBits = RT_ALIGN_32(cBits, 32);
3670 __asm__ __volatile__("repe; scasl\n\t"
3671 "je 1f\n\t"
3672# ifdef RT_ARCH_AMD64
3673 "lea -4(%%rdi), %%rdi\n\t"
3674 "xorl (%%rdi), %%eax\n\t"
3675 "subq %5, %%rdi\n\t"
3676# else
3677 "lea -4(%%edi), %%edi\n\t"
3678 "xorl (%%edi), %%eax\n\t"
3679 "subl %5, %%edi\n\t"
3680# endif
3681 "shll $3, %%edi\n\t"
3682 "bsfl %%eax, %%edx\n\t"
3683 "addl %%edi, %%edx\n\t"
3684 "1:\t\n"
3685 : "=d" (iBit),
3686 "=&c" (uECX),
3687 "=&D" (uEDI),
3688 "=&a" (uEAX)
3689 : "0" (0xffffffff),
3690 "mr" (pvBitmap),
3691 "1" (cBits >> 5),
3692 "2" (pvBitmap),
3693 "3" (0xffffffff));
3694# else
3695 cBits = RT_ALIGN_32(cBits, 32);
3696 __asm
3697 {
3698# ifdef RT_ARCH_AMD64
3699 mov rdi, [pvBitmap]
3700 mov rbx, rdi
3701# else
3702 mov edi, [pvBitmap]
3703 mov ebx, edi
3704# endif
3705 mov edx, 0ffffffffh
3706 mov eax, edx
3707 mov ecx, [cBits]
3708 shr ecx, 5
3709 repe scasd
3710 je done
3711
3712# ifdef RT_ARCH_AMD64
3713 lea rdi, [rdi - 4]
3714 xor eax, [rdi]
3715 sub rdi, rbx
3716# else
3717 lea edi, [edi - 4]
3718 xor eax, [edi]
3719 sub edi, ebx
3720# endif
3721 shl edi, 3
3722 bsf edx, eax
3723 add edx, edi
3724 done:
3725 mov [iBit], edx
3726 }
3727# endif
3728 return iBit;
3729 }
3730 return -1;
3731}
3732#endif
3733
3734
3735/**
3736 * Finds the next clear bit in a bitmap.
3737 *
3738 * @returns Index of the first zero bit.
3739 * @returns -1 if no clear bit was found.
3740 * @param pvBitmap Pointer to the bitmap.
3741 * @param cBits The number of bits in the bitmap. Multiple of 32.
3742 * @param iBitPrev The bit returned from the last search.
3743 * The search will start at iBitPrev + 1.
3744 */
3745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3746DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3747#else
3748DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3749{
3750 int iBit = ++iBitPrev & 31;
3751 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3752 cBits -= iBitPrev & ~31;
3753 if (iBit)
3754 {
3755 /* inspect the first dword. */
3756 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3757# if RT_INLINE_ASM_USES_INTRIN
3758 unsigned long ulBit = 0;
3759 if (_BitScanForward(&ulBit, u32))
3760 return ulBit + iBitPrev;
3761 iBit = -1;
3762# else
3763# if RT_INLINE_ASM_GNU_STYLE
3764 __asm__ __volatile__("bsf %1, %0\n\t"
3765 "jnz 1f\n\t"
3766 "movl $-1, %0\n\t"
3767 "1:\n\t"
3768 : "=r" (iBit)
3769 : "r" (u32));
3770# else
3771 __asm
3772 {
3773 mov edx, [u32]
3774 bsf eax, edx
3775 jnz done
3776 mov eax, 0ffffffffh
3777 done:
3778 mov [iBit], eax
3779 }
3780# endif
3781 if (iBit >= 0)
3782 return iBit + iBitPrev;
3783# endif
3784 /* Search the rest of the bitmap, if there is anything. */
3785 if (cBits > 32)
3786 {
3787 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3788 if (iBit >= 0)
3789 return iBit + (iBitPrev & ~31) + 32;
3790 }
3791 }
3792 else
3793 {
3794 /* Search the rest of the bitmap. */
3795 iBit = ASMBitFirstClear(pvBitmap, cBits);
3796 if (iBit >= 0)
3797 return iBit + (iBitPrev & ~31);
3798 }
3799 return iBit;
3800}
3801#endif
3802
3803
3804/**
3805 * Finds the first set bit in a bitmap.
3806 *
3807 * @returns Index of the first set bit.
3808 * @returns -1 if no clear bit was found.
3809 * @param pvBitmap Pointer to the bitmap.
3810 * @param cBits The number of bits in the bitmap. Multiple of 32.
3811 */
3812#if RT_INLINE_ASM_EXTERNAL
3813DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3814#else
3815DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3816{
3817 if (cBits)
3818 {
3819 int32_t iBit;
3820# if RT_INLINE_ASM_GNU_STYLE
3821 RTCCUINTREG uEAX, uECX, uEDI;
3822 cBits = RT_ALIGN_32(cBits, 32);
3823 __asm__ __volatile__("repe; scasl\n\t"
3824 "je 1f\n\t"
3825# ifdef RT_ARCH_AMD64
3826 "lea -4(%%rdi), %%rdi\n\t"
3827 "movl (%%rdi), %%eax\n\t"
3828 "subq %5, %%rdi\n\t"
3829# else
3830 "lea -4(%%edi), %%edi\n\t"
3831 "movl (%%edi), %%eax\n\t"
3832 "subl %5, %%edi\n\t"
3833# endif
3834 "shll $3, %%edi\n\t"
3835 "bsfl %%eax, %%edx\n\t"
3836 "addl %%edi, %%edx\n\t"
3837 "1:\t\n"
3838 : "=d" (iBit),
3839 "=&c" (uECX),
3840 "=&D" (uEDI),
3841 "=&a" (uEAX)
3842 : "0" (0xffffffff),
3843 "mr" (pvBitmap),
3844 "1" (cBits >> 5),
3845 "2" (pvBitmap),
3846 "3" (0));
3847# else
3848 cBits = RT_ALIGN_32(cBits, 32);
3849 __asm
3850 {
3851# ifdef RT_ARCH_AMD64
3852 mov rdi, [pvBitmap]
3853 mov rbx, rdi
3854# else
3855 mov edi, [pvBitmap]
3856 mov ebx, edi
3857# endif
3858 mov edx, 0ffffffffh
3859 xor eax, eax
3860 mov ecx, [cBits]
3861 shr ecx, 5
3862 repe scasd
3863 je done
3864# ifdef RT_ARCH_AMD64
3865 lea rdi, [rdi - 4]
3866 mov eax, [rdi]
3867 sub rdi, rbx
3868# else
3869 lea edi, [edi - 4]
3870 mov eax, [edi]
3871 sub edi, ebx
3872# endif
3873 shl edi, 3
3874 bsf edx, eax
3875 add edx, edi
3876 done:
3877 mov [iBit], edx
3878 }
3879# endif
3880 return iBit;
3881 }
3882 return -1;
3883}
3884#endif
3885
3886
3887/**
3888 * Finds the next set bit in a bitmap.
3889 *
3890 * @returns Index of the next set bit.
3891 * @returns -1 if no set bit was found.
3892 * @param pvBitmap Pointer to the bitmap.
3893 * @param cBits The number of bits in the bitmap. Multiple of 32.
3894 * @param iBitPrev The bit returned from the last search.
3895 * The search will start at iBitPrev + 1.
3896 */
3897#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3898DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3899#else
3900DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3901{
3902 int iBit = ++iBitPrev & 31;
3903 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3904 cBits -= iBitPrev & ~31;
3905 if (iBit)
3906 {
3907 /* inspect the first dword. */
3908 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3909# if RT_INLINE_ASM_USES_INTRIN
3910 unsigned long ulBit = 0;
3911 if (_BitScanForward(&ulBit, u32))
3912 return ulBit + iBitPrev;
3913 iBit = -1;
3914# else
3915# if RT_INLINE_ASM_GNU_STYLE
3916 __asm__ __volatile__("bsf %1, %0\n\t"
3917 "jnz 1f\n\t"
3918 "movl $-1, %0\n\t"
3919 "1:\n\t"
3920 : "=r" (iBit)
3921 : "r" (u32));
3922# else
3923 __asm
3924 {
3925 mov edx, u32
3926 bsf eax, edx
3927 jnz done
3928 mov eax, 0ffffffffh
3929 done:
3930 mov [iBit], eax
3931 }
3932# endif
3933 if (iBit >= 0)
3934 return iBit + iBitPrev;
3935# endif
3936 /* Search the rest of the bitmap, if there is anything. */
3937 if (cBits > 32)
3938 {
3939 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3940 if (iBit >= 0)
3941 return iBit + (iBitPrev & ~31) + 32;
3942 }
3943
3944 }
3945 else
3946 {
3947 /* Search the rest of the bitmap. */
3948 iBit = ASMBitFirstSet(pvBitmap, cBits);
3949 if (iBit >= 0)
3950 return iBit + (iBitPrev & ~31);
3951 }
3952 return iBit;
3953}
3954#endif
3955
3956
3957/**
3958 * Finds the first bit which is set in the given 32-bit integer.
3959 * Bits are numbered from 1 (least significant) to 32.
3960 *
3961 * @returns index [1..32] of the first set bit.
3962 * @returns 0 if all bits are cleared.
3963 * @param u32 Integer to search for set bits.
3964 * @remark Similar to ffs() in BSD.
3965 */
3966DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3967{
3968# if RT_INLINE_ASM_USES_INTRIN
3969 unsigned long iBit;
3970 if (_BitScanForward(&iBit, u32))
3971 iBit++;
3972 else
3973 iBit = 0;
3974# elif RT_INLINE_ASM_GNU_STYLE
3975 uint32_t iBit;
3976 __asm__ __volatile__("bsf %1, %0\n\t"
3977 "jnz 1f\n\t"
3978 "xorl %0, %0\n\t"
3979 "jmp 2f\n"
3980 "1:\n\t"
3981 "incl %0\n"
3982 "2:\n\t"
3983 : "=r" (iBit)
3984 : "rm" (u32));
3985# else
3986 uint32_t iBit;
3987 _asm
3988 {
3989 bsf eax, [u32]
3990 jnz found
3991 xor eax, eax
3992 jmp done
3993 found:
3994 inc eax
3995 done:
3996 mov [iBit], eax
3997 }
3998# endif
3999 return iBit;
4000}
4001
4002
4003/**
4004 * Finds the first bit which is set in the given 32-bit integer.
4005 * Bits are numbered from 1 (least significant) to 32.
4006 *
4007 * @returns index [1..32] of the first set bit.
4008 * @returns 0 if all bits are cleared.
4009 * @param i32 Integer to search for set bits.
4010 * @remark Similar to ffs() in BSD.
4011 */
4012DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4013{
4014 return ASMBitFirstSetU32((uint32_t)i32);
4015}
4016
4017
4018/**
4019 * Finds the last bit which is set in the given 32-bit integer.
4020 * Bits are numbered from 1 (least significant) to 32.
4021 *
4022 * @returns index [1..32] of the last set bit.
4023 * @returns 0 if all bits are cleared.
4024 * @param u32 Integer to search for set bits.
4025 * @remark Similar to fls() in BSD.
4026 */
4027DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4028{
4029# if RT_INLINE_ASM_USES_INTRIN
4030 unsigned long iBit;
4031 if (_BitScanReverse(&iBit, u32))
4032 iBit++;
4033 else
4034 iBit = 0;
4035# elif RT_INLINE_ASM_GNU_STYLE
4036 uint32_t iBit;
4037 __asm__ __volatile__("bsrl %1, %0\n\t"
4038 "jnz 1f\n\t"
4039 "xorl %0, %0\n\t"
4040 "jmp 2f\n"
4041 "1:\n\t"
4042 "incl %0\n"
4043 "2:\n\t"
4044 : "=r" (iBit)
4045 : "rm" (u32));
4046# else
4047 uint32_t iBit;
4048 _asm
4049 {
4050 bsr eax, [u32]
4051 jnz found
4052 xor eax, eax
4053 jmp done
4054 found:
4055 inc eax
4056 done:
4057 mov [iBit], eax
4058 }
4059# endif
4060 return iBit;
4061}
4062
4063
4064/**
4065 * Finds the last bit which is set in the given 32-bit integer.
4066 * Bits are numbered from 1 (least significant) to 32.
4067 *
4068 * @returns index [1..32] of the last set bit.
4069 * @returns 0 if all bits are cleared.
4070 * @param i32 Integer to search for set bits.
4071 * @remark Similar to fls() in BSD.
4072 */
4073DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4074{
4075 return ASMBitLastSetS32((uint32_t)i32);
4076}
4077
4078
4079/**
4080 * Reverse the byte order of the given 32-bit integer.
4081 * @param u32 Integer
4082 */
4083DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4084{
4085#if RT_INLINE_ASM_USES_INTRIN
4086 u32 = _byteswap_ulong(u32);
4087#elif RT_INLINE_ASM_GNU_STYLE
4088 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4089#else
4090 _asm
4091 {
4092 mov eax, [u32]
4093 bswap eax
4094 mov [u32], eax
4095 }
4096#endif
4097 return u32;
4098}
4099
4100/** @} */
4101
4102
4103/** @} */
4104#endif
4105
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette