VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 29063

Last change on this file since 29063 was 28800, checked in by vboxsync, 15 years ago

Automated rebranding to Oracle copyright/license strings via filemuncher

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 174.4 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @todo @code #include <iprt/param.h> @endcode for PAGE_SIZE. */
33/** @def RT_INLINE_ASM_USES_INTRIN
34 * Defined as 1 if we're using a _MSC_VER 1400.
35 * Otherwise defined as 0.
36 */
37
38/* Solaris 10 header ugliness */
39#ifdef u
40#undef u
41#endif
42
43#ifdef _MSC_VER
44# if _MSC_VER >= 1400
45# define RT_INLINE_ASM_USES_INTRIN 1
46# include <intrin.h>
47 /* Emit the intrinsics at all optimization levels. */
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(_enable)
51# pragma intrinsic(_disable)
52# pragma intrinsic(__rdtsc)
53# pragma intrinsic(__readmsr)
54# pragma intrinsic(__writemsr)
55# pragma intrinsic(__outbyte)
56# pragma intrinsic(__outbytestring)
57# pragma intrinsic(__outword)
58# pragma intrinsic(__outwordstring)
59# pragma intrinsic(__outdword)
60# pragma intrinsic(__outdwordstring)
61# pragma intrinsic(__inbyte)
62# pragma intrinsic(__inbytestring)
63# pragma intrinsic(__inword)
64# pragma intrinsic(__inwordstring)
65# pragma intrinsic(__indword)
66# pragma intrinsic(__indwordstring)
67# pragma intrinsic(__invlpg)
68# pragma intrinsic(__wbinvd)
69# pragma intrinsic(__stosd)
70# pragma intrinsic(__stosw)
71# pragma intrinsic(__stosb)
72# pragma intrinsic(__readcr0)
73# pragma intrinsic(__readcr2)
74# pragma intrinsic(__readcr3)
75# pragma intrinsic(__readcr4)
76# pragma intrinsic(__writecr0)
77# pragma intrinsic(__writecr3)
78# pragma intrinsic(__writecr4)
79# pragma intrinsic(__readdr)
80# pragma intrinsic(__writedr)
81# pragma intrinsic(_BitScanForward)
82# pragma intrinsic(_BitScanReverse)
83# pragma intrinsic(_bittest)
84# pragma intrinsic(_bittestandset)
85# pragma intrinsic(_bittestandreset)
86# pragma intrinsic(_bittestandcomplement)
87# pragma intrinsic(_byteswap_ushort)
88# pragma intrinsic(_byteswap_ulong)
89# pragma intrinsic(_interlockedbittestandset)
90# pragma intrinsic(_interlockedbittestandreset)
91# pragma intrinsic(_InterlockedAnd)
92# pragma intrinsic(_InterlockedOr)
93# pragma intrinsic(_InterlockedIncrement)
94# pragma intrinsic(_InterlockedDecrement)
95# pragma intrinsic(_InterlockedExchange)
96# pragma intrinsic(_InterlockedExchangeAdd)
97# pragma intrinsic(_InterlockedCompareExchange)
98# pragma intrinsic(_InterlockedCompareExchange64)
99# ifdef RT_ARCH_AMD64
100# pragma intrinsic(_mm_mfence)
101# pragma intrinsic(_mm_sfence)
102# pragma intrinsic(_mm_lfence)
103# pragma intrinsic(__stosq)
104# pragma intrinsic(__readcr8)
105# pragma intrinsic(__writecr8)
106# pragma intrinsic(_byteswap_uint64)
107# pragma intrinsic(_InterlockedExchange64)
108# endif
109# endif
110#endif
111#ifndef RT_INLINE_ASM_USES_INTRIN
112# define RT_INLINE_ASM_USES_INTRIN 0
113#endif
114
115
116/** @defgroup grp_asm ASM - Assembly Routines
117 * @ingroup grp_rt
118 *
119 * @remarks The difference between ordered and unordered atomic operations are that
120 * the former will complete outstanding reads and writes before continuing
121 * while the latter doesn't make any promisses about the order. Ordered
122 * operations doesn't, it seems, make any 100% promise wrt to whether
123 * the operation will complete before any subsequent memory access.
124 * (please, correct if wrong.)
125 *
126 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
127 * are unordered (note the Uo).
128 *
129 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
130 * or even optimize assembler instructions away. For instance, in the following code
131 * the second rdmsr instruction is optimized away because gcc treats that instruction
132 * as deterministic:
133 *
134 * @code
135 * static inline uint64_t rdmsr_low(int idx)
136 * {
137 * uint32_t low;
138 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
139 * }
140 * ...
141 * uint32_t msr1 = rdmsr_low(1);
142 * foo(msr1);
143 * msr1 = rdmsr_low(1);
144 * bar(msr1);
145 * @endcode
146 *
147 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
148 * use the result of the first call as input parameter for bar() as well. For rdmsr this
149 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
150 * machine status information in general.
151 *
152 * @{
153 */
154
155/** @def RT_INLINE_ASM_GCC_4_3_X_X86
156 * Used to work around some 4.3.x register allocation issues in this version of
157 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
158#ifdef __GNUC__
159# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
160#endif
161#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
162# define RT_INLINE_ASM_GCC_4_3_X_X86 0
163#endif
164
165/** @def RT_INLINE_DONT_USE_CMPXCHG8B
166 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
167 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
168 * mode, x86.
169 *
170 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
171 * when in PIC mode on x86.
172 */
173#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
174# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
175 ( (defined(PIC) || defined(__PIC__)) \
176 && defined(RT_ARCH_X86) \
177 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
178 || defined(RT_OS_DARWIN)) )
179#endif
180
181/** @def RT_INLINE_ASM_EXTERNAL
182 * Defined as 1 if the compiler does not support inline assembly.
183 * The ASM* functions will then be implemented in an external .asm file.
184 *
185 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
186 * inline assembly in their AMD64 compiler.
187 */
188#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
189# define RT_INLINE_ASM_EXTERNAL 1
190#else
191# define RT_INLINE_ASM_EXTERNAL 0
192#endif
193
194/** @def RT_INLINE_ASM_GNU_STYLE
195 * Defined as 1 if the compiler understands GNU style inline assembly.
196 */
197#if defined(_MSC_VER)
198# define RT_INLINE_ASM_GNU_STYLE 0
199#else
200# define RT_INLINE_ASM_GNU_STYLE 1
201#endif
202
203
204/** @todo find a more proper place for this structure? */
205#pragma pack(1)
206/** IDTR */
207typedef struct RTIDTR
208{
209 /** Size of the IDT. */
210 uint16_t cbIdt;
211 /** Address of the IDT. */
212 uintptr_t pIdt;
213} RTIDTR, *PRTIDTR;
214#pragma pack()
215
216#pragma pack(1)
217/** GDTR */
218typedef struct RTGDTR
219{
220 /** Size of the GDT. */
221 uint16_t cbGdt;
222 /** Address of the GDT. */
223 uintptr_t pGdt;
224} RTGDTR, *PRTGDTR;
225#pragma pack()
226
227
228/** @def ASMReturnAddress
229 * Gets the return address of the current (or calling if you like) function or method.
230 */
231#ifdef _MSC_VER
232# ifdef __cplusplus
233extern "C"
234# endif
235void * _ReturnAddress(void);
236# pragma intrinsic(_ReturnAddress)
237# define ASMReturnAddress() _ReturnAddress()
238#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
239# define ASMReturnAddress() __builtin_return_address(0)
240#else
241# error "Unsupported compiler."
242#endif
243
244
245/**
246 * Gets the content of the IDTR CPU register.
247 * @param pIdtr Where to store the IDTR contents.
248 */
249#if RT_INLINE_ASM_EXTERNAL
250DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
251#else
252DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
253{
254# if RT_INLINE_ASM_GNU_STYLE
255 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
256# else
257 __asm
258 {
259# ifdef RT_ARCH_AMD64
260 mov rax, [pIdtr]
261 sidt [rax]
262# else
263 mov eax, [pIdtr]
264 sidt [eax]
265# endif
266 }
267# endif
268}
269#endif
270
271
272/**
273 * Sets the content of the IDTR CPU register.
274 * @param pIdtr Where to load the IDTR contents from
275 */
276#if RT_INLINE_ASM_EXTERNAL
277DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
278#else
279DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
280{
281# if RT_INLINE_ASM_GNU_STYLE
282 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
283# else
284 __asm
285 {
286# ifdef RT_ARCH_AMD64
287 mov rax, [pIdtr]
288 lidt [rax]
289# else
290 mov eax, [pIdtr]
291 lidt [eax]
292# endif
293 }
294# endif
295}
296#endif
297
298
299/**
300 * Gets the content of the GDTR CPU register.
301 * @param pGdtr Where to store the GDTR contents.
302 */
303#if RT_INLINE_ASM_EXTERNAL
304DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
305#else
306DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
307{
308# if RT_INLINE_ASM_GNU_STYLE
309 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
310# else
311 __asm
312 {
313# ifdef RT_ARCH_AMD64
314 mov rax, [pGdtr]
315 sgdt [rax]
316# else
317 mov eax, [pGdtr]
318 sgdt [eax]
319# endif
320 }
321# endif
322}
323#endif
324
325/**
326 * Get the cs register.
327 * @returns cs.
328 */
329#if RT_INLINE_ASM_EXTERNAL
330DECLASM(RTSEL) ASMGetCS(void);
331#else
332DECLINLINE(RTSEL) ASMGetCS(void)
333{
334 RTSEL SelCS;
335# if RT_INLINE_ASM_GNU_STYLE
336 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
337# else
338 __asm
339 {
340 mov ax, cs
341 mov [SelCS], ax
342 }
343# endif
344 return SelCS;
345}
346#endif
347
348
349/**
350 * Get the DS register.
351 * @returns DS.
352 */
353#if RT_INLINE_ASM_EXTERNAL
354DECLASM(RTSEL) ASMGetDS(void);
355#else
356DECLINLINE(RTSEL) ASMGetDS(void)
357{
358 RTSEL SelDS;
359# if RT_INLINE_ASM_GNU_STYLE
360 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
361# else
362 __asm
363 {
364 mov ax, ds
365 mov [SelDS], ax
366 }
367# endif
368 return SelDS;
369}
370#endif
371
372
373/**
374 * Get the ES register.
375 * @returns ES.
376 */
377#if RT_INLINE_ASM_EXTERNAL
378DECLASM(RTSEL) ASMGetES(void);
379#else
380DECLINLINE(RTSEL) ASMGetES(void)
381{
382 RTSEL SelES;
383# if RT_INLINE_ASM_GNU_STYLE
384 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
385# else
386 __asm
387 {
388 mov ax, es
389 mov [SelES], ax
390 }
391# endif
392 return SelES;
393}
394#endif
395
396
397/**
398 * Get the FS register.
399 * @returns FS.
400 */
401#if RT_INLINE_ASM_EXTERNAL
402DECLASM(RTSEL) ASMGetFS(void);
403#else
404DECLINLINE(RTSEL) ASMGetFS(void)
405{
406 RTSEL SelFS;
407# if RT_INLINE_ASM_GNU_STYLE
408 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
409# else
410 __asm
411 {
412 mov ax, fs
413 mov [SelFS], ax
414 }
415# endif
416 return SelFS;
417}
418# endif
419
420
421/**
422 * Get the GS register.
423 * @returns GS.
424 */
425#if RT_INLINE_ASM_EXTERNAL
426DECLASM(RTSEL) ASMGetGS(void);
427#else
428DECLINLINE(RTSEL) ASMGetGS(void)
429{
430 RTSEL SelGS;
431# if RT_INLINE_ASM_GNU_STYLE
432 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
433# else
434 __asm
435 {
436 mov ax, gs
437 mov [SelGS], ax
438 }
439# endif
440 return SelGS;
441}
442#endif
443
444
445/**
446 * Get the SS register.
447 * @returns SS.
448 */
449#if RT_INLINE_ASM_EXTERNAL
450DECLASM(RTSEL) ASMGetSS(void);
451#else
452DECLINLINE(RTSEL) ASMGetSS(void)
453{
454 RTSEL SelSS;
455# if RT_INLINE_ASM_GNU_STYLE
456 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
457# else
458 __asm
459 {
460 mov ax, ss
461 mov [SelSS], ax
462 }
463# endif
464 return SelSS;
465}
466#endif
467
468
469/**
470 * Get the TR register.
471 * @returns TR.
472 */
473#if RT_INLINE_ASM_EXTERNAL
474DECLASM(RTSEL) ASMGetTR(void);
475#else
476DECLINLINE(RTSEL) ASMGetTR(void)
477{
478 RTSEL SelTR;
479# if RT_INLINE_ASM_GNU_STYLE
480 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
481# else
482 __asm
483 {
484 str ax
485 mov [SelTR], ax
486 }
487# endif
488 return SelTR;
489}
490#endif
491
492
493/**
494 * Get the [RE]FLAGS register.
495 * @returns [RE]FLAGS.
496 */
497#if RT_INLINE_ASM_EXTERNAL
498DECLASM(RTCCUINTREG) ASMGetFlags(void);
499#else
500DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
501{
502 RTCCUINTREG uFlags;
503# if RT_INLINE_ASM_GNU_STYLE
504# ifdef RT_ARCH_AMD64
505 __asm__ __volatile__("pushfq\n\t"
506 "popq %0\n\t"
507 : "=r" (uFlags));
508# else
509 __asm__ __volatile__("pushfl\n\t"
510 "popl %0\n\t"
511 : "=r" (uFlags));
512# endif
513# else
514 __asm
515 {
516# ifdef RT_ARCH_AMD64
517 pushfq
518 pop [uFlags]
519# else
520 pushfd
521 pop [uFlags]
522# endif
523 }
524# endif
525 return uFlags;
526}
527#endif
528
529
530/**
531 * Set the [RE]FLAGS register.
532 * @param uFlags The new [RE]FLAGS value.
533 */
534#if RT_INLINE_ASM_EXTERNAL
535DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
536#else
537DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
538{
539# if RT_INLINE_ASM_GNU_STYLE
540# ifdef RT_ARCH_AMD64
541 __asm__ __volatile__("pushq %0\n\t"
542 "popfq\n\t"
543 : : "g" (uFlags));
544# else
545 __asm__ __volatile__("pushl %0\n\t"
546 "popfl\n\t"
547 : : "g" (uFlags));
548# endif
549# else
550 __asm
551 {
552# ifdef RT_ARCH_AMD64
553 push [uFlags]
554 popfq
555# else
556 push [uFlags]
557 popfd
558# endif
559 }
560# endif
561}
562#endif
563
564
565/**
566 * Gets the content of the CPU timestamp counter register.
567 *
568 * @returns TSC.
569 */
570#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
571DECLASM(uint64_t) ASMReadTSC(void);
572#else
573DECLINLINE(uint64_t) ASMReadTSC(void)
574{
575 RTUINT64U u;
576# if RT_INLINE_ASM_GNU_STYLE
577 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
578# else
579# if RT_INLINE_ASM_USES_INTRIN
580 u.u = __rdtsc();
581# else
582 __asm
583 {
584 rdtsc
585 mov [u.s.Lo], eax
586 mov [u.s.Hi], edx
587 }
588# endif
589# endif
590 return u.u;
591}
592#endif
593
594
595/**
596 * Performs the cpuid instruction returning all registers.
597 *
598 * @param uOperator CPUID operation (eax).
599 * @param pvEAX Where to store eax.
600 * @param pvEBX Where to store ebx.
601 * @param pvECX Where to store ecx.
602 * @param pvEDX Where to store edx.
603 * @remark We're using void pointers to ease the use of special bitfield structures and such.
604 */
605#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
606DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
607#else
608DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
609{
610# if RT_INLINE_ASM_GNU_STYLE
611# ifdef RT_ARCH_AMD64
612 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
613 __asm__ ("cpuid\n\t"
614 : "=a" (uRAX),
615 "=b" (uRBX),
616 "=c" (uRCX),
617 "=d" (uRDX)
618 : "0" (uOperator));
619 *(uint32_t *)pvEAX = (uint32_t)uRAX;
620 *(uint32_t *)pvEBX = (uint32_t)uRBX;
621 *(uint32_t *)pvECX = (uint32_t)uRCX;
622 *(uint32_t *)pvEDX = (uint32_t)uRDX;
623# else
624 __asm__ ("xchgl %%ebx, %1\n\t"
625 "cpuid\n\t"
626 "xchgl %%ebx, %1\n\t"
627 : "=a" (*(uint32_t *)pvEAX),
628 "=r" (*(uint32_t *)pvEBX),
629 "=c" (*(uint32_t *)pvECX),
630 "=d" (*(uint32_t *)pvEDX)
631 : "0" (uOperator));
632# endif
633
634# elif RT_INLINE_ASM_USES_INTRIN
635 int aInfo[4];
636 __cpuid(aInfo, uOperator);
637 *(uint32_t *)pvEAX = aInfo[0];
638 *(uint32_t *)pvEBX = aInfo[1];
639 *(uint32_t *)pvECX = aInfo[2];
640 *(uint32_t *)pvEDX = aInfo[3];
641
642# else
643 uint32_t uEAX;
644 uint32_t uEBX;
645 uint32_t uECX;
646 uint32_t uEDX;
647 __asm
648 {
649 push ebx
650 mov eax, [uOperator]
651 cpuid
652 mov [uEAX], eax
653 mov [uEBX], ebx
654 mov [uECX], ecx
655 mov [uEDX], edx
656 pop ebx
657 }
658 *(uint32_t *)pvEAX = uEAX;
659 *(uint32_t *)pvEBX = uEBX;
660 *(uint32_t *)pvECX = uECX;
661 *(uint32_t *)pvEDX = uEDX;
662# endif
663}
664#endif
665
666
667/**
668 * Performs the cpuid instruction returning all registers.
669 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
670 *
671 * @param uOperator CPUID operation (eax).
672 * @param uIdxECX ecx index
673 * @param pvEAX Where to store eax.
674 * @param pvEBX Where to store ebx.
675 * @param pvECX Where to store ecx.
676 * @param pvEDX Where to store edx.
677 * @remark We're using void pointers to ease the use of special bitfield structures and such.
678 */
679#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
680DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
681#else
682DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
683{
684# if RT_INLINE_ASM_GNU_STYLE
685# ifdef RT_ARCH_AMD64
686 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
687 __asm__ ("cpuid\n\t"
688 : "=a" (uRAX),
689 "=b" (uRBX),
690 "=c" (uRCX),
691 "=d" (uRDX)
692 : "0" (uOperator),
693 "2" (uIdxECX));
694 *(uint32_t *)pvEAX = (uint32_t)uRAX;
695 *(uint32_t *)pvEBX = (uint32_t)uRBX;
696 *(uint32_t *)pvECX = (uint32_t)uRCX;
697 *(uint32_t *)pvEDX = (uint32_t)uRDX;
698# else
699 __asm__ ("xchgl %%ebx, %1\n\t"
700 "cpuid\n\t"
701 "xchgl %%ebx, %1\n\t"
702 : "=a" (*(uint32_t *)pvEAX),
703 "=r" (*(uint32_t *)pvEBX),
704 "=c" (*(uint32_t *)pvECX),
705 "=d" (*(uint32_t *)pvEDX)
706 : "0" (uOperator),
707 "2" (uIdxECX));
708# endif
709
710# elif RT_INLINE_ASM_USES_INTRIN
711 int aInfo[4];
712 /* ??? another intrinsic ??? */
713 __cpuid(aInfo, uOperator);
714 *(uint32_t *)pvEAX = aInfo[0];
715 *(uint32_t *)pvEBX = aInfo[1];
716 *(uint32_t *)pvECX = aInfo[2];
717 *(uint32_t *)pvEDX = aInfo[3];
718
719# else
720 uint32_t uEAX;
721 uint32_t uEBX;
722 uint32_t uECX;
723 uint32_t uEDX;
724 __asm
725 {
726 push ebx
727 mov eax, [uOperator]
728 mov ecx, [uIdxECX]
729 cpuid
730 mov [uEAX], eax
731 mov [uEBX], ebx
732 mov [uECX], ecx
733 mov [uEDX], edx
734 pop ebx
735 }
736 *(uint32_t *)pvEAX = uEAX;
737 *(uint32_t *)pvEBX = uEBX;
738 *(uint32_t *)pvECX = uECX;
739 *(uint32_t *)pvEDX = uEDX;
740# endif
741}
742#endif
743
744
745/**
746 * Performs the cpuid instruction returning ecx and edx.
747 *
748 * @param uOperator CPUID operation (eax).
749 * @param pvECX Where to store ecx.
750 * @param pvEDX Where to store edx.
751 * @remark We're using void pointers to ease the use of special bitfield structures and such.
752 */
753#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
754DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
755#else
756DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
757{
758 uint32_t uEBX;
759 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
760}
761#endif
762
763
764/**
765 * Performs the cpuid instruction returning edx.
766 *
767 * @param uOperator CPUID operation (eax).
768 * @returns EDX after cpuid operation.
769 */
770#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
771DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
772#else
773DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
774{
775 RTCCUINTREG xDX;
776# if RT_INLINE_ASM_GNU_STYLE
777# ifdef RT_ARCH_AMD64
778 RTCCUINTREG uSpill;
779 __asm__ ("cpuid"
780 : "=a" (uSpill),
781 "=d" (xDX)
782 : "0" (uOperator)
783 : "rbx", "rcx");
784# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
785 __asm__ ("push %%ebx\n\t"
786 "cpuid\n\t"
787 "pop %%ebx\n\t"
788 : "=a" (uOperator),
789 "=d" (xDX)
790 : "0" (uOperator)
791 : "ecx");
792# else
793 __asm__ ("cpuid"
794 : "=a" (uOperator),
795 "=d" (xDX)
796 : "0" (uOperator)
797 : "ebx", "ecx");
798# endif
799
800# elif RT_INLINE_ASM_USES_INTRIN
801 int aInfo[4];
802 __cpuid(aInfo, uOperator);
803 xDX = aInfo[3];
804
805# else
806 __asm
807 {
808 push ebx
809 mov eax, [uOperator]
810 cpuid
811 mov [xDX], edx
812 pop ebx
813 }
814# endif
815 return (uint32_t)xDX;
816}
817#endif
818
819
820/**
821 * Performs the cpuid instruction returning ecx.
822 *
823 * @param uOperator CPUID operation (eax).
824 * @returns ECX after cpuid operation.
825 */
826#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
827DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
828#else
829DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
830{
831 RTCCUINTREG xCX;
832# if RT_INLINE_ASM_GNU_STYLE
833# ifdef RT_ARCH_AMD64
834 RTCCUINTREG uSpill;
835 __asm__ ("cpuid"
836 : "=a" (uSpill),
837 "=c" (xCX)
838 : "0" (uOperator)
839 : "rbx", "rdx");
840# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
841 __asm__ ("push %%ebx\n\t"
842 "cpuid\n\t"
843 "pop %%ebx\n\t"
844 : "=a" (uOperator),
845 "=c" (xCX)
846 : "0" (uOperator)
847 : "edx");
848# else
849 __asm__ ("cpuid"
850 : "=a" (uOperator),
851 "=c" (xCX)
852 : "0" (uOperator)
853 : "ebx", "edx");
854
855# endif
856
857# elif RT_INLINE_ASM_USES_INTRIN
858 int aInfo[4];
859 __cpuid(aInfo, uOperator);
860 xCX = aInfo[2];
861
862# else
863 __asm
864 {
865 push ebx
866 mov eax, [uOperator]
867 cpuid
868 mov [xCX], ecx
869 pop ebx
870 }
871# endif
872 return (uint32_t)xCX;
873}
874#endif
875
876
877/**
878 * Checks if the current CPU supports CPUID.
879 *
880 * @returns true if CPUID is supported.
881 */
882DECLINLINE(bool) ASMHasCpuId(void)
883{
884#ifdef RT_ARCH_AMD64
885 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
886#else /* !RT_ARCH_AMD64 */
887 bool fRet = false;
888# if RT_INLINE_ASM_GNU_STYLE
889 uint32_t u1;
890 uint32_t u2;
891 __asm__ ("pushf\n\t"
892 "pop %1\n\t"
893 "mov %1, %2\n\t"
894 "xorl $0x200000, %1\n\t"
895 "push %1\n\t"
896 "popf\n\t"
897 "pushf\n\t"
898 "pop %1\n\t"
899 "cmpl %1, %2\n\t"
900 "setne %0\n\t"
901 "push %2\n\t"
902 "popf\n\t"
903 : "=m" (fRet), "=r" (u1), "=r" (u2));
904# else
905 __asm
906 {
907 pushfd
908 pop eax
909 mov ebx, eax
910 xor eax, 0200000h
911 push eax
912 popfd
913 pushfd
914 pop eax
915 cmp eax, ebx
916 setne fRet
917 push ebx
918 popfd
919 }
920# endif
921 return fRet;
922#endif /* !RT_ARCH_AMD64 */
923}
924
925
926/**
927 * Gets the APIC ID of the current CPU.
928 *
929 * @returns the APIC ID.
930 */
931#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
932DECLASM(uint8_t) ASMGetApicId(void);
933#else
934DECLINLINE(uint8_t) ASMGetApicId(void)
935{
936 RTCCUINTREG xBX;
937# if RT_INLINE_ASM_GNU_STYLE
938# ifdef RT_ARCH_AMD64
939 RTCCUINTREG uSpill;
940 __asm__ ("cpuid"
941 : "=a" (uSpill),
942 "=b" (xBX)
943 : "0" (1)
944 : "rcx", "rdx");
945# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
946 RTCCUINTREG uSpill;
947 __asm__ ("mov %%ebx,%1\n\t"
948 "cpuid\n\t"
949 "xchgl %%ebx,%1\n\t"
950 : "=a" (uSpill),
951 "=r" (xBX)
952 : "0" (1)
953 : "ecx", "edx");
954# else
955 RTCCUINTREG uSpill;
956 __asm__ ("cpuid"
957 : "=a" (uSpill),
958 "=b" (xBX)
959 : "0" (1)
960 : "ecx", "edx");
961# endif
962
963# elif RT_INLINE_ASM_USES_INTRIN
964 int aInfo[4];
965 __cpuid(aInfo, 1);
966 xBX = aInfo[1];
967
968# else
969 __asm
970 {
971 push ebx
972 mov eax, 1
973 cpuid
974 mov [xBX], ebx
975 pop ebx
976 }
977# endif
978 return (uint8_t)(xBX >> 24);
979}
980#endif
981
982
983/**
984 * Tests if it a genuine Intel CPU based on the ASMCpuId(0) output.
985 *
986 * @returns true/false.
987 * @param uEBX EBX return from ASMCpuId(0)
988 * @param uECX ECX return from ASMCpuId(0)
989 * @param uEDX EDX return from ASMCpuId(0)
990 */
991DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
992{
993 return uEBX == UINT32_C(0x756e6547)
994 && uECX == UINT32_C(0x6c65746e)
995 && uEDX == UINT32_C(0x49656e69);
996}
997
998
999/**
1000 * Tests if this is a genuine Intel CPU.
1001 *
1002 * @returns true/false.
1003 * @remarks ASSUMES that cpuid is supported by the CPU.
1004 */
1005DECLINLINE(bool) ASMIsIntelCpu(void)
1006{
1007 uint32_t uEAX, uEBX, uECX, uEDX;
1008 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1009 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
1010}
1011
1012
1013/**
1014 * Tests if it a authentic AMD CPU based on the ASMCpuId(0) output.
1015 *
1016 * @returns true/false.
1017 * @param uEBX EBX return from ASMCpuId(0)
1018 * @param uECX ECX return from ASMCpuId(0)
1019 * @param uEDX EDX return from ASMCpuId(0)
1020 */
1021DECLINLINE(bool) ASMIsAmdCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
1022{
1023 return uEBX == UINT32_C(0x68747541)
1024 && uECX == UINT32_C(0x444d4163)
1025 && uEDX == UINT32_C(0x69746e65);
1026}
1027
1028
1029/**
1030 * Tests if this is an authentic AMD CPU.
1031 *
1032 * @returns true/false.
1033 * @remarks ASSUMES that cpuid is supported by the CPU.
1034 */
1035DECLINLINE(bool) ASMIsAmdCpu(void)
1036{
1037 uint32_t uEAX, uEBX, uECX, uEDX;
1038 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1039 return ASMIsAmdCpuEx(uEBX, uECX, uEDX);
1040}
1041
1042
1043/**
1044 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
1045 *
1046 * @returns Family.
1047 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
1048 */
1049DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
1050{
1051 return ((uEAX >> 8) & 0xf) == 0xf
1052 ? ((uEAX >> 20) & 0x7f) + 0xf
1053 : ((uEAX >> 8) & 0xf);
1054}
1055
1056
1057/**
1058 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1059 *
1060 * @returns Model.
1061 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1062 */
1063DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1064{
1065 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1066 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1067 : ((uEAX >> 4) & 0xf);
1068}
1069
1070
1071/**
1072 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1073 *
1074 * @returns Model.
1075 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1076 */
1077DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1078{
1079 return ((uEAX >> 8) & 0xf) == 0xf
1080 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1081 : ((uEAX >> 4) & 0xf);
1082}
1083
1084
1085/**
1086 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1087 *
1088 * @returns Model.
1089 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1090 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1091 */
1092DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1093{
1094 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1095 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1096 : ((uEAX >> 4) & 0xf);
1097}
1098
1099
1100/**
1101 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1102 *
1103 * @returns Model.
1104 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1105 */
1106DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1107{
1108 return uEAX & 0xf;
1109}
1110
1111
1112/**
1113 * Get cr0.
1114 * @returns cr0.
1115 */
1116#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1117DECLASM(RTCCUINTREG) ASMGetCR0(void);
1118#else
1119DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1120{
1121 RTCCUINTREG uCR0;
1122# if RT_INLINE_ASM_USES_INTRIN
1123 uCR0 = __readcr0();
1124
1125# elif RT_INLINE_ASM_GNU_STYLE
1126# ifdef RT_ARCH_AMD64
1127 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1128# else
1129 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1130# endif
1131# else
1132 __asm
1133 {
1134# ifdef RT_ARCH_AMD64
1135 mov rax, cr0
1136 mov [uCR0], rax
1137# else
1138 mov eax, cr0
1139 mov [uCR0], eax
1140# endif
1141 }
1142# endif
1143 return uCR0;
1144}
1145#endif
1146
1147
1148/**
1149 * Sets the CR0 register.
1150 * @param uCR0 The new CR0 value.
1151 */
1152#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1153DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1154#else
1155DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1156{
1157# if RT_INLINE_ASM_USES_INTRIN
1158 __writecr0(uCR0);
1159
1160# elif RT_INLINE_ASM_GNU_STYLE
1161# ifdef RT_ARCH_AMD64
1162 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1163# else
1164 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1165# endif
1166# else
1167 __asm
1168 {
1169# ifdef RT_ARCH_AMD64
1170 mov rax, [uCR0]
1171 mov cr0, rax
1172# else
1173 mov eax, [uCR0]
1174 mov cr0, eax
1175# endif
1176 }
1177# endif
1178}
1179#endif
1180
1181
1182/**
1183 * Get cr2.
1184 * @returns cr2.
1185 */
1186#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1187DECLASM(RTCCUINTREG) ASMGetCR2(void);
1188#else
1189DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1190{
1191 RTCCUINTREG uCR2;
1192# if RT_INLINE_ASM_USES_INTRIN
1193 uCR2 = __readcr2();
1194
1195# elif RT_INLINE_ASM_GNU_STYLE
1196# ifdef RT_ARCH_AMD64
1197 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1198# else
1199 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1200# endif
1201# else
1202 __asm
1203 {
1204# ifdef RT_ARCH_AMD64
1205 mov rax, cr2
1206 mov [uCR2], rax
1207# else
1208 mov eax, cr2
1209 mov [uCR2], eax
1210# endif
1211 }
1212# endif
1213 return uCR2;
1214}
1215#endif
1216
1217
1218/**
1219 * Sets the CR2 register.
1220 * @param uCR2 The new CR0 value.
1221 */
1222#if RT_INLINE_ASM_EXTERNAL
1223DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1224#else
1225DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1226{
1227# if RT_INLINE_ASM_GNU_STYLE
1228# ifdef RT_ARCH_AMD64
1229 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1230# else
1231 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1232# endif
1233# else
1234 __asm
1235 {
1236# ifdef RT_ARCH_AMD64
1237 mov rax, [uCR2]
1238 mov cr2, rax
1239# else
1240 mov eax, [uCR2]
1241 mov cr2, eax
1242# endif
1243 }
1244# endif
1245}
1246#endif
1247
1248
1249/**
1250 * Get cr3.
1251 * @returns cr3.
1252 */
1253#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1254DECLASM(RTCCUINTREG) ASMGetCR3(void);
1255#else
1256DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1257{
1258 RTCCUINTREG uCR3;
1259# if RT_INLINE_ASM_USES_INTRIN
1260 uCR3 = __readcr3();
1261
1262# elif RT_INLINE_ASM_GNU_STYLE
1263# ifdef RT_ARCH_AMD64
1264 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1265# else
1266 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1267# endif
1268# else
1269 __asm
1270 {
1271# ifdef RT_ARCH_AMD64
1272 mov rax, cr3
1273 mov [uCR3], rax
1274# else
1275 mov eax, cr3
1276 mov [uCR3], eax
1277# endif
1278 }
1279# endif
1280 return uCR3;
1281}
1282#endif
1283
1284
1285/**
1286 * Sets the CR3 register.
1287 *
1288 * @param uCR3 New CR3 value.
1289 */
1290#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1291DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1292#else
1293DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1294{
1295# if RT_INLINE_ASM_USES_INTRIN
1296 __writecr3(uCR3);
1297
1298# elif RT_INLINE_ASM_GNU_STYLE
1299# ifdef RT_ARCH_AMD64
1300 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1301# else
1302 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1303# endif
1304# else
1305 __asm
1306 {
1307# ifdef RT_ARCH_AMD64
1308 mov rax, [uCR3]
1309 mov cr3, rax
1310# else
1311 mov eax, [uCR3]
1312 mov cr3, eax
1313# endif
1314 }
1315# endif
1316}
1317#endif
1318
1319
1320/**
1321 * Reloads the CR3 register.
1322 */
1323#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1324DECLASM(void) ASMReloadCR3(void);
1325#else
1326DECLINLINE(void) ASMReloadCR3(void)
1327{
1328# if RT_INLINE_ASM_USES_INTRIN
1329 __writecr3(__readcr3());
1330
1331# elif RT_INLINE_ASM_GNU_STYLE
1332 RTCCUINTREG u;
1333# ifdef RT_ARCH_AMD64
1334 __asm__ __volatile__("movq %%cr3, %0\n\t"
1335 "movq %0, %%cr3\n\t"
1336 : "=r" (u));
1337# else
1338 __asm__ __volatile__("movl %%cr3, %0\n\t"
1339 "movl %0, %%cr3\n\t"
1340 : "=r" (u));
1341# endif
1342# else
1343 __asm
1344 {
1345# ifdef RT_ARCH_AMD64
1346 mov rax, cr3
1347 mov cr3, rax
1348# else
1349 mov eax, cr3
1350 mov cr3, eax
1351# endif
1352 }
1353# endif
1354}
1355#endif
1356
1357
1358/**
1359 * Get cr4.
1360 * @returns cr4.
1361 */
1362#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1363DECLASM(RTCCUINTREG) ASMGetCR4(void);
1364#else
1365DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1366{
1367 RTCCUINTREG uCR4;
1368# if RT_INLINE_ASM_USES_INTRIN
1369 uCR4 = __readcr4();
1370
1371# elif RT_INLINE_ASM_GNU_STYLE
1372# ifdef RT_ARCH_AMD64
1373 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1374# else
1375 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1376# endif
1377# else
1378 __asm
1379 {
1380# ifdef RT_ARCH_AMD64
1381 mov rax, cr4
1382 mov [uCR4], rax
1383# else
1384 push eax /* just in case */
1385 /*mov eax, cr4*/
1386 _emit 0x0f
1387 _emit 0x20
1388 _emit 0xe0
1389 mov [uCR4], eax
1390 pop eax
1391# endif
1392 }
1393# endif
1394 return uCR4;
1395}
1396#endif
1397
1398
1399/**
1400 * Sets the CR4 register.
1401 *
1402 * @param uCR4 New CR4 value.
1403 */
1404#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1405DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1406#else
1407DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1408{
1409# if RT_INLINE_ASM_USES_INTRIN
1410 __writecr4(uCR4);
1411
1412# elif RT_INLINE_ASM_GNU_STYLE
1413# ifdef RT_ARCH_AMD64
1414 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1415# else
1416 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1417# endif
1418# else
1419 __asm
1420 {
1421# ifdef RT_ARCH_AMD64
1422 mov rax, [uCR4]
1423 mov cr4, rax
1424# else
1425 mov eax, [uCR4]
1426 _emit 0x0F
1427 _emit 0x22
1428 _emit 0xE0 /* mov cr4, eax */
1429# endif
1430 }
1431# endif
1432}
1433#endif
1434
1435
1436/**
1437 * Get cr8.
1438 * @returns cr8.
1439 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1440 */
1441#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1442DECLASM(RTCCUINTREG) ASMGetCR8(void);
1443#else
1444DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1445{
1446# ifdef RT_ARCH_AMD64
1447 RTCCUINTREG uCR8;
1448# if RT_INLINE_ASM_USES_INTRIN
1449 uCR8 = __readcr8();
1450
1451# elif RT_INLINE_ASM_GNU_STYLE
1452 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1453# else
1454 __asm
1455 {
1456 mov rax, cr8
1457 mov [uCR8], rax
1458 }
1459# endif
1460 return uCR8;
1461# else /* !RT_ARCH_AMD64 */
1462 return 0;
1463# endif /* !RT_ARCH_AMD64 */
1464}
1465#endif
1466
1467
1468/**
1469 * Enables interrupts (EFLAGS.IF).
1470 */
1471#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1472DECLASM(void) ASMIntEnable(void);
1473#else
1474DECLINLINE(void) ASMIntEnable(void)
1475{
1476# if RT_INLINE_ASM_GNU_STYLE
1477 __asm("sti\n");
1478# elif RT_INLINE_ASM_USES_INTRIN
1479 _enable();
1480# else
1481 __asm sti
1482# endif
1483}
1484#endif
1485
1486
1487/**
1488 * Disables interrupts (!EFLAGS.IF).
1489 */
1490#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1491DECLASM(void) ASMIntDisable(void);
1492#else
1493DECLINLINE(void) ASMIntDisable(void)
1494{
1495# if RT_INLINE_ASM_GNU_STYLE
1496 __asm("cli\n");
1497# elif RT_INLINE_ASM_USES_INTRIN
1498 _disable();
1499# else
1500 __asm cli
1501# endif
1502}
1503#endif
1504
1505
1506/**
1507 * Disables interrupts and returns previous xFLAGS.
1508 */
1509#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1510DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1511#else
1512DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1513{
1514 RTCCUINTREG xFlags;
1515# if RT_INLINE_ASM_GNU_STYLE
1516# ifdef RT_ARCH_AMD64
1517 __asm__ __volatile__("pushfq\n\t"
1518 "cli\n\t"
1519 "popq %0\n\t"
1520 : "=r" (xFlags));
1521# else
1522 __asm__ __volatile__("pushfl\n\t"
1523 "cli\n\t"
1524 "popl %0\n\t"
1525 : "=r" (xFlags));
1526# endif
1527# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1528 xFlags = ASMGetFlags();
1529 _disable();
1530# else
1531 __asm {
1532 pushfd
1533 cli
1534 pop [xFlags]
1535 }
1536# endif
1537 return xFlags;
1538}
1539#endif
1540
1541
1542/**
1543 * Are interrupts enabled?
1544 *
1545 * @returns true / false.
1546 */
1547DECLINLINE(RTCCUINTREG) ASMIntAreEnabled(void)
1548{
1549 RTCCUINTREG uFlags = ASMGetFlags();
1550 return uFlags & 0x200 /* X86_EFL_IF */ ? true : false;
1551}
1552
1553
1554/**
1555 * Halts the CPU until interrupted.
1556 */
1557#if RT_INLINE_ASM_EXTERNAL
1558DECLASM(void) ASMHalt(void);
1559#else
1560DECLINLINE(void) ASMHalt(void)
1561{
1562# if RT_INLINE_ASM_GNU_STYLE
1563 __asm__ __volatile__("hlt\n\t");
1564# else
1565 __asm {
1566 hlt
1567 }
1568# endif
1569}
1570#endif
1571
1572
1573/**
1574 * The PAUSE variant of NOP for helping hyperthreaded CPUs detecing spin locks.
1575 */
1576#if RT_INLINE_ASM_EXTERNAL
1577DECLASM(void) ASMNopPause(void);
1578#else
1579DECLINLINE(void) ASMNopPause(void)
1580{
1581# if RT_INLINE_ASM_GNU_STYLE
1582 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
1583# else
1584 __asm {
1585 _emit 0f3h
1586 _emit 090h
1587 }
1588# endif
1589}
1590#endif
1591
1592
1593/**
1594 * Reads a machine specific register.
1595 *
1596 * @returns Register content.
1597 * @param uRegister Register to read.
1598 */
1599#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1600DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1601#else
1602DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1603{
1604 RTUINT64U u;
1605# if RT_INLINE_ASM_GNU_STYLE
1606 __asm__ __volatile__("rdmsr\n\t"
1607 : "=a" (u.s.Lo),
1608 "=d" (u.s.Hi)
1609 : "c" (uRegister));
1610
1611# elif RT_INLINE_ASM_USES_INTRIN
1612 u.u = __readmsr(uRegister);
1613
1614# else
1615 __asm
1616 {
1617 mov ecx, [uRegister]
1618 rdmsr
1619 mov [u.s.Lo], eax
1620 mov [u.s.Hi], edx
1621 }
1622# endif
1623
1624 return u.u;
1625}
1626#endif
1627
1628
1629/**
1630 * Writes a machine specific register.
1631 *
1632 * @returns Register content.
1633 * @param uRegister Register to write to.
1634 * @param u64Val Value to write.
1635 */
1636#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1637DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1638#else
1639DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1640{
1641 RTUINT64U u;
1642
1643 u.u = u64Val;
1644# if RT_INLINE_ASM_GNU_STYLE
1645 __asm__ __volatile__("wrmsr\n\t"
1646 ::"a" (u.s.Lo),
1647 "d" (u.s.Hi),
1648 "c" (uRegister));
1649
1650# elif RT_INLINE_ASM_USES_INTRIN
1651 __writemsr(uRegister, u.u);
1652
1653# else
1654 __asm
1655 {
1656 mov ecx, [uRegister]
1657 mov edx, [u.s.Hi]
1658 mov eax, [u.s.Lo]
1659 wrmsr
1660 }
1661# endif
1662}
1663#endif
1664
1665
1666/**
1667 * Reads low part of a machine specific register.
1668 *
1669 * @returns Register content.
1670 * @param uRegister Register to read.
1671 */
1672#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1673DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1674#else
1675DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1676{
1677 uint32_t u32;
1678# if RT_INLINE_ASM_GNU_STYLE
1679 __asm__ __volatile__("rdmsr\n\t"
1680 : "=a" (u32)
1681 : "c" (uRegister)
1682 : "edx");
1683
1684# elif RT_INLINE_ASM_USES_INTRIN
1685 u32 = (uint32_t)__readmsr(uRegister);
1686
1687#else
1688 __asm
1689 {
1690 mov ecx, [uRegister]
1691 rdmsr
1692 mov [u32], eax
1693 }
1694# endif
1695
1696 return u32;
1697}
1698#endif
1699
1700
1701/**
1702 * Reads high part of a machine specific register.
1703 *
1704 * @returns Register content.
1705 * @param uRegister Register to read.
1706 */
1707#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1708DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1709#else
1710DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1711{
1712 uint32_t u32;
1713# if RT_INLINE_ASM_GNU_STYLE
1714 __asm__ __volatile__("rdmsr\n\t"
1715 : "=d" (u32)
1716 : "c" (uRegister)
1717 : "eax");
1718
1719# elif RT_INLINE_ASM_USES_INTRIN
1720 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1721
1722# else
1723 __asm
1724 {
1725 mov ecx, [uRegister]
1726 rdmsr
1727 mov [u32], edx
1728 }
1729# endif
1730
1731 return u32;
1732}
1733#endif
1734
1735
1736/**
1737 * Gets dr0.
1738 *
1739 * @returns dr0.
1740 */
1741#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1742DECLASM(RTCCUINTREG) ASMGetDR0(void);
1743#else
1744DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1745{
1746 RTCCUINTREG uDR0;
1747# if RT_INLINE_ASM_USES_INTRIN
1748 uDR0 = __readdr(0);
1749# elif RT_INLINE_ASM_GNU_STYLE
1750# ifdef RT_ARCH_AMD64
1751 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1752# else
1753 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1754# endif
1755# else
1756 __asm
1757 {
1758# ifdef RT_ARCH_AMD64
1759 mov rax, dr0
1760 mov [uDR0], rax
1761# else
1762 mov eax, dr0
1763 mov [uDR0], eax
1764# endif
1765 }
1766# endif
1767 return uDR0;
1768}
1769#endif
1770
1771
1772/**
1773 * Gets dr1.
1774 *
1775 * @returns dr1.
1776 */
1777#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1778DECLASM(RTCCUINTREG) ASMGetDR1(void);
1779#else
1780DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1781{
1782 RTCCUINTREG uDR1;
1783# if RT_INLINE_ASM_USES_INTRIN
1784 uDR1 = __readdr(1);
1785# elif RT_INLINE_ASM_GNU_STYLE
1786# ifdef RT_ARCH_AMD64
1787 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1788# else
1789 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1790# endif
1791# else
1792 __asm
1793 {
1794# ifdef RT_ARCH_AMD64
1795 mov rax, dr1
1796 mov [uDR1], rax
1797# else
1798 mov eax, dr1
1799 mov [uDR1], eax
1800# endif
1801 }
1802# endif
1803 return uDR1;
1804}
1805#endif
1806
1807
1808/**
1809 * Gets dr2.
1810 *
1811 * @returns dr2.
1812 */
1813#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1814DECLASM(RTCCUINTREG) ASMGetDR2(void);
1815#else
1816DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1817{
1818 RTCCUINTREG uDR2;
1819# if RT_INLINE_ASM_USES_INTRIN
1820 uDR2 = __readdr(2);
1821# elif RT_INLINE_ASM_GNU_STYLE
1822# ifdef RT_ARCH_AMD64
1823 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1824# else
1825 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1826# endif
1827# else
1828 __asm
1829 {
1830# ifdef RT_ARCH_AMD64
1831 mov rax, dr2
1832 mov [uDR2], rax
1833# else
1834 mov eax, dr2
1835 mov [uDR2], eax
1836# endif
1837 }
1838# endif
1839 return uDR2;
1840}
1841#endif
1842
1843
1844/**
1845 * Gets dr3.
1846 *
1847 * @returns dr3.
1848 */
1849#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1850DECLASM(RTCCUINTREG) ASMGetDR3(void);
1851#else
1852DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1853{
1854 RTCCUINTREG uDR3;
1855# if RT_INLINE_ASM_USES_INTRIN
1856 uDR3 = __readdr(3);
1857# elif RT_INLINE_ASM_GNU_STYLE
1858# ifdef RT_ARCH_AMD64
1859 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1860# else
1861 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1862# endif
1863# else
1864 __asm
1865 {
1866# ifdef RT_ARCH_AMD64
1867 mov rax, dr3
1868 mov [uDR3], rax
1869# else
1870 mov eax, dr3
1871 mov [uDR3], eax
1872# endif
1873 }
1874# endif
1875 return uDR3;
1876}
1877#endif
1878
1879
1880/**
1881 * Gets dr6.
1882 *
1883 * @returns dr6.
1884 */
1885#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1886DECLASM(RTCCUINTREG) ASMGetDR6(void);
1887#else
1888DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1889{
1890 RTCCUINTREG uDR6;
1891# if RT_INLINE_ASM_USES_INTRIN
1892 uDR6 = __readdr(6);
1893# elif RT_INLINE_ASM_GNU_STYLE
1894# ifdef RT_ARCH_AMD64
1895 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1896# else
1897 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1898# endif
1899# else
1900 __asm
1901 {
1902# ifdef RT_ARCH_AMD64
1903 mov rax, dr6
1904 mov [uDR6], rax
1905# else
1906 mov eax, dr6
1907 mov [uDR6], eax
1908# endif
1909 }
1910# endif
1911 return uDR6;
1912}
1913#endif
1914
1915
1916/**
1917 * Reads and clears DR6.
1918 *
1919 * @returns DR6.
1920 */
1921#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1922DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1923#else
1924DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1925{
1926 RTCCUINTREG uDR6;
1927# if RT_INLINE_ASM_USES_INTRIN
1928 uDR6 = __readdr(6);
1929 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1930# elif RT_INLINE_ASM_GNU_STYLE
1931 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1932# ifdef RT_ARCH_AMD64
1933 __asm__ __volatile__("movq %%dr6, %0\n\t"
1934 "movq %1, %%dr6\n\t"
1935 : "=r" (uDR6)
1936 : "r" (uNewValue));
1937# else
1938 __asm__ __volatile__("movl %%dr6, %0\n\t"
1939 "movl %1, %%dr6\n\t"
1940 : "=r" (uDR6)
1941 : "r" (uNewValue));
1942# endif
1943# else
1944 __asm
1945 {
1946# ifdef RT_ARCH_AMD64
1947 mov rax, dr6
1948 mov [uDR6], rax
1949 mov rcx, rax
1950 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1951 mov dr6, rcx
1952# else
1953 mov eax, dr6
1954 mov [uDR6], eax
1955 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1956 mov dr6, ecx
1957# endif
1958 }
1959# endif
1960 return uDR6;
1961}
1962#endif
1963
1964
1965/**
1966 * Gets dr7.
1967 *
1968 * @returns dr7.
1969 */
1970#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1971DECLASM(RTCCUINTREG) ASMGetDR7(void);
1972#else
1973DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1974{
1975 RTCCUINTREG uDR7;
1976# if RT_INLINE_ASM_USES_INTRIN
1977 uDR7 = __readdr(7);
1978# elif RT_INLINE_ASM_GNU_STYLE
1979# ifdef RT_ARCH_AMD64
1980 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1981# else
1982 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1983# endif
1984# else
1985 __asm
1986 {
1987# ifdef RT_ARCH_AMD64
1988 mov rax, dr7
1989 mov [uDR7], rax
1990# else
1991 mov eax, dr7
1992 mov [uDR7], eax
1993# endif
1994 }
1995# endif
1996 return uDR7;
1997}
1998#endif
1999
2000
2001/**
2002 * Sets dr0.
2003 *
2004 * @param uDRVal Debug register value to write
2005 */
2006#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2007DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
2008#else
2009DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
2010{
2011# if RT_INLINE_ASM_USES_INTRIN
2012 __writedr(0, uDRVal);
2013# elif RT_INLINE_ASM_GNU_STYLE
2014# ifdef RT_ARCH_AMD64
2015 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
2016# else
2017 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
2018# endif
2019# else
2020 __asm
2021 {
2022# ifdef RT_ARCH_AMD64
2023 mov rax, [uDRVal]
2024 mov dr0, rax
2025# else
2026 mov eax, [uDRVal]
2027 mov dr0, eax
2028# endif
2029 }
2030# endif
2031}
2032#endif
2033
2034
2035/**
2036 * Sets dr1.
2037 *
2038 * @param uDRVal Debug register value to write
2039 */
2040#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2041DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
2042#else
2043DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
2044{
2045# if RT_INLINE_ASM_USES_INTRIN
2046 __writedr(1, uDRVal);
2047# elif RT_INLINE_ASM_GNU_STYLE
2048# ifdef RT_ARCH_AMD64
2049 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
2050# else
2051 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
2052# endif
2053# else
2054 __asm
2055 {
2056# ifdef RT_ARCH_AMD64
2057 mov rax, [uDRVal]
2058 mov dr1, rax
2059# else
2060 mov eax, [uDRVal]
2061 mov dr1, eax
2062# endif
2063 }
2064# endif
2065}
2066#endif
2067
2068
2069/**
2070 * Sets dr2.
2071 *
2072 * @param uDRVal Debug register value to write
2073 */
2074#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2075DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
2076#else
2077DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
2078{
2079# if RT_INLINE_ASM_USES_INTRIN
2080 __writedr(2, uDRVal);
2081# elif RT_INLINE_ASM_GNU_STYLE
2082# ifdef RT_ARCH_AMD64
2083 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
2084# else
2085 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2086# endif
2087# else
2088 __asm
2089 {
2090# ifdef RT_ARCH_AMD64
2091 mov rax, [uDRVal]
2092 mov dr2, rax
2093# else
2094 mov eax, [uDRVal]
2095 mov dr2, eax
2096# endif
2097 }
2098# endif
2099}
2100#endif
2101
2102
2103/**
2104 * Sets dr3.
2105 *
2106 * @param uDRVal Debug register value to write
2107 */
2108#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2109DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2110#else
2111DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2112{
2113# if RT_INLINE_ASM_USES_INTRIN
2114 __writedr(3, uDRVal);
2115# elif RT_INLINE_ASM_GNU_STYLE
2116# ifdef RT_ARCH_AMD64
2117 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2118# else
2119 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2120# endif
2121# else
2122 __asm
2123 {
2124# ifdef RT_ARCH_AMD64
2125 mov rax, [uDRVal]
2126 mov dr3, rax
2127# else
2128 mov eax, [uDRVal]
2129 mov dr3, eax
2130# endif
2131 }
2132# endif
2133}
2134#endif
2135
2136
2137/**
2138 * Sets dr6.
2139 *
2140 * @param uDRVal Debug register value to write
2141 */
2142#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2143DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2144#else
2145DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2146{
2147# if RT_INLINE_ASM_USES_INTRIN
2148 __writedr(6, uDRVal);
2149# elif RT_INLINE_ASM_GNU_STYLE
2150# ifdef RT_ARCH_AMD64
2151 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2152# else
2153 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2154# endif
2155# else
2156 __asm
2157 {
2158# ifdef RT_ARCH_AMD64
2159 mov rax, [uDRVal]
2160 mov dr6, rax
2161# else
2162 mov eax, [uDRVal]
2163 mov dr6, eax
2164# endif
2165 }
2166# endif
2167}
2168#endif
2169
2170
2171/**
2172 * Sets dr7.
2173 *
2174 * @param uDRVal Debug register value to write
2175 */
2176#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2177DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2178#else
2179DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2180{
2181# if RT_INLINE_ASM_USES_INTRIN
2182 __writedr(7, uDRVal);
2183# elif RT_INLINE_ASM_GNU_STYLE
2184# ifdef RT_ARCH_AMD64
2185 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2186# else
2187 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2188# endif
2189# else
2190 __asm
2191 {
2192# ifdef RT_ARCH_AMD64
2193 mov rax, [uDRVal]
2194 mov dr7, rax
2195# else
2196 mov eax, [uDRVal]
2197 mov dr7, eax
2198# endif
2199 }
2200# endif
2201}
2202#endif
2203
2204
2205/**
2206 * Compiler memory barrier.
2207 *
2208 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2209 * values or any outstanding writes when returning from this function.
2210 *
2211 * This function must be used if non-volatile data is modified by a
2212 * device or the VMM. Typical cases are port access, MMIO access,
2213 * trapping instruction, etc.
2214 */
2215#if RT_INLINE_ASM_GNU_STYLE
2216# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2217#elif RT_INLINE_ASM_USES_INTRIN
2218# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2219#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2220DECLINLINE(void) ASMCompilerBarrier(void)
2221{
2222 __asm
2223 {
2224 }
2225}
2226#endif
2227
2228
2229/**
2230 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2231 *
2232 * @param Port I/O port to write to.
2233 * @param u8 8-bit integer to write.
2234 */
2235#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2236DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2237#else
2238DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2239{
2240# if RT_INLINE_ASM_GNU_STYLE
2241 __asm__ __volatile__("outb %b1, %w0\n\t"
2242 :: "Nd" (Port),
2243 "a" (u8));
2244
2245# elif RT_INLINE_ASM_USES_INTRIN
2246 __outbyte(Port, u8);
2247
2248# else
2249 __asm
2250 {
2251 mov dx, [Port]
2252 mov al, [u8]
2253 out dx, al
2254 }
2255# endif
2256}
2257#endif
2258
2259
2260/**
2261 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2262 *
2263 * @returns 8-bit integer.
2264 * @param Port I/O port to read from.
2265 */
2266#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2267DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2268#else
2269DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2270{
2271 uint8_t u8;
2272# if RT_INLINE_ASM_GNU_STYLE
2273 __asm__ __volatile__("inb %w1, %b0\n\t"
2274 : "=a" (u8)
2275 : "Nd" (Port));
2276
2277# elif RT_INLINE_ASM_USES_INTRIN
2278 u8 = __inbyte(Port);
2279
2280# else
2281 __asm
2282 {
2283 mov dx, [Port]
2284 in al, dx
2285 mov [u8], al
2286 }
2287# endif
2288 return u8;
2289}
2290#endif
2291
2292
2293/**
2294 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2295 *
2296 * @param Port I/O port to write to.
2297 * @param u16 16-bit integer to write.
2298 */
2299#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2300DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2301#else
2302DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2303{
2304# if RT_INLINE_ASM_GNU_STYLE
2305 __asm__ __volatile__("outw %w1, %w0\n\t"
2306 :: "Nd" (Port),
2307 "a" (u16));
2308
2309# elif RT_INLINE_ASM_USES_INTRIN
2310 __outword(Port, u16);
2311
2312# else
2313 __asm
2314 {
2315 mov dx, [Port]
2316 mov ax, [u16]
2317 out dx, ax
2318 }
2319# endif
2320}
2321#endif
2322
2323
2324/**
2325 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2326 *
2327 * @returns 16-bit integer.
2328 * @param Port I/O port to read from.
2329 */
2330#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2331DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2332#else
2333DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2334{
2335 uint16_t u16;
2336# if RT_INLINE_ASM_GNU_STYLE
2337 __asm__ __volatile__("inw %w1, %w0\n\t"
2338 : "=a" (u16)
2339 : "Nd" (Port));
2340
2341# elif RT_INLINE_ASM_USES_INTRIN
2342 u16 = __inword(Port);
2343
2344# else
2345 __asm
2346 {
2347 mov dx, [Port]
2348 in ax, dx
2349 mov [u16], ax
2350 }
2351# endif
2352 return u16;
2353}
2354#endif
2355
2356
2357/**
2358 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2359 *
2360 * @param Port I/O port to write to.
2361 * @param u32 32-bit integer to write.
2362 */
2363#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2364DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2365#else
2366DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2367{
2368# if RT_INLINE_ASM_GNU_STYLE
2369 __asm__ __volatile__("outl %1, %w0\n\t"
2370 :: "Nd" (Port),
2371 "a" (u32));
2372
2373# elif RT_INLINE_ASM_USES_INTRIN
2374 __outdword(Port, u32);
2375
2376# else
2377 __asm
2378 {
2379 mov dx, [Port]
2380 mov eax, [u32]
2381 out dx, eax
2382 }
2383# endif
2384}
2385#endif
2386
2387
2388/**
2389 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2390 *
2391 * @returns 32-bit integer.
2392 * @param Port I/O port to read from.
2393 */
2394#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2395DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2396#else
2397DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2398{
2399 uint32_t u32;
2400# if RT_INLINE_ASM_GNU_STYLE
2401 __asm__ __volatile__("inl %w1, %0\n\t"
2402 : "=a" (u32)
2403 : "Nd" (Port));
2404
2405# elif RT_INLINE_ASM_USES_INTRIN
2406 u32 = __indword(Port);
2407
2408# else
2409 __asm
2410 {
2411 mov dx, [Port]
2412 in eax, dx
2413 mov [u32], eax
2414 }
2415# endif
2416 return u32;
2417}
2418#endif
2419
2420
2421/**
2422 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2423 *
2424 * @param Port I/O port to write to.
2425 * @param pau8 Pointer to the string buffer.
2426 * @param c The number of items to write.
2427 */
2428#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2429DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2430#else
2431DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2432{
2433# if RT_INLINE_ASM_GNU_STYLE
2434 __asm__ __volatile__("rep; outsb\n\t"
2435 : "+S" (pau8),
2436 "+c" (c)
2437 : "d" (Port));
2438
2439# elif RT_INLINE_ASM_USES_INTRIN
2440 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2441
2442# else
2443 __asm
2444 {
2445 mov dx, [Port]
2446 mov ecx, [c]
2447 mov eax, [pau8]
2448 xchg esi, eax
2449 rep outsb
2450 xchg esi, eax
2451 }
2452# endif
2453}
2454#endif
2455
2456
2457/**
2458 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2459 *
2460 * @param Port I/O port to read from.
2461 * @param pau8 Pointer to the string buffer (output).
2462 * @param c The number of items to read.
2463 */
2464#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2465DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2466#else
2467DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2468{
2469# if RT_INLINE_ASM_GNU_STYLE
2470 __asm__ __volatile__("rep; insb\n\t"
2471 : "+D" (pau8),
2472 "+c" (c)
2473 : "d" (Port));
2474
2475# elif RT_INLINE_ASM_USES_INTRIN
2476 __inbytestring(Port, pau8, (unsigned long)c);
2477
2478# else
2479 __asm
2480 {
2481 mov dx, [Port]
2482 mov ecx, [c]
2483 mov eax, [pau8]
2484 xchg edi, eax
2485 rep insb
2486 xchg edi, eax
2487 }
2488# endif
2489}
2490#endif
2491
2492
2493/**
2494 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2495 *
2496 * @param Port I/O port to write to.
2497 * @param pau16 Pointer to the string buffer.
2498 * @param c The number of items to write.
2499 */
2500#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2501DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2502#else
2503DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2504{
2505# if RT_INLINE_ASM_GNU_STYLE
2506 __asm__ __volatile__("rep; outsw\n\t"
2507 : "+S" (pau16),
2508 "+c" (c)
2509 : "d" (Port));
2510
2511# elif RT_INLINE_ASM_USES_INTRIN
2512 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2513
2514# else
2515 __asm
2516 {
2517 mov dx, [Port]
2518 mov ecx, [c]
2519 mov eax, [pau16]
2520 xchg esi, eax
2521 rep outsw
2522 xchg esi, eax
2523 }
2524# endif
2525}
2526#endif
2527
2528
2529/**
2530 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2531 *
2532 * @param Port I/O port to read from.
2533 * @param pau16 Pointer to the string buffer (output).
2534 * @param c The number of items to read.
2535 */
2536#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2537DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2538#else
2539DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2540{
2541# if RT_INLINE_ASM_GNU_STYLE
2542 __asm__ __volatile__("rep; insw\n\t"
2543 : "+D" (pau16),
2544 "+c" (c)
2545 : "d" (Port));
2546
2547# elif RT_INLINE_ASM_USES_INTRIN
2548 __inwordstring(Port, pau16, (unsigned long)c);
2549
2550# else
2551 __asm
2552 {
2553 mov dx, [Port]
2554 mov ecx, [c]
2555 mov eax, [pau16]
2556 xchg edi, eax
2557 rep insw
2558 xchg edi, eax
2559 }
2560# endif
2561}
2562#endif
2563
2564
2565/**
2566 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2567 *
2568 * @param Port I/O port to write to.
2569 * @param pau32 Pointer to the string buffer.
2570 * @param c The number of items to write.
2571 */
2572#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2573DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2574#else
2575DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2576{
2577# if RT_INLINE_ASM_GNU_STYLE
2578 __asm__ __volatile__("rep; outsl\n\t"
2579 : "+S" (pau32),
2580 "+c" (c)
2581 : "d" (Port));
2582
2583# elif RT_INLINE_ASM_USES_INTRIN
2584 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2585
2586# else
2587 __asm
2588 {
2589 mov dx, [Port]
2590 mov ecx, [c]
2591 mov eax, [pau32]
2592 xchg esi, eax
2593 rep outsd
2594 xchg esi, eax
2595 }
2596# endif
2597}
2598#endif
2599
2600
2601/**
2602 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2603 *
2604 * @param Port I/O port to read from.
2605 * @param pau32 Pointer to the string buffer (output).
2606 * @param c The number of items to read.
2607 */
2608#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2609DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2610#else
2611DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2612{
2613# if RT_INLINE_ASM_GNU_STYLE
2614 __asm__ __volatile__("rep; insl\n\t"
2615 : "+D" (pau32),
2616 "+c" (c)
2617 : "d" (Port));
2618
2619# elif RT_INLINE_ASM_USES_INTRIN
2620 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2621
2622# else
2623 __asm
2624 {
2625 mov dx, [Port]
2626 mov ecx, [c]
2627 mov eax, [pau32]
2628 xchg edi, eax
2629 rep insd
2630 xchg edi, eax
2631 }
2632# endif
2633}
2634#endif
2635
2636
2637/**
2638 * Atomically Exchange an unsigned 8-bit value, ordered.
2639 *
2640 * @returns Current *pu8 value
2641 * @param pu8 Pointer to the 8-bit variable to update.
2642 * @param u8 The 8-bit value to assign to *pu8.
2643 */
2644#if RT_INLINE_ASM_EXTERNAL
2645DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2646#else
2647DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2648{
2649# if RT_INLINE_ASM_GNU_STYLE
2650 __asm__ __volatile__("xchgb %0, %1\n\t"
2651 : "=m" (*pu8),
2652 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2653 : "1" (u8),
2654 "m" (*pu8));
2655# else
2656 __asm
2657 {
2658# ifdef RT_ARCH_AMD64
2659 mov rdx, [pu8]
2660 mov al, [u8]
2661 xchg [rdx], al
2662 mov [u8], al
2663# else
2664 mov edx, [pu8]
2665 mov al, [u8]
2666 xchg [edx], al
2667 mov [u8], al
2668# endif
2669 }
2670# endif
2671 return u8;
2672}
2673#endif
2674
2675
2676/**
2677 * Atomically Exchange a signed 8-bit value, ordered.
2678 *
2679 * @returns Current *pu8 value
2680 * @param pi8 Pointer to the 8-bit variable to update.
2681 * @param i8 The 8-bit value to assign to *pi8.
2682 */
2683DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2684{
2685 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2686}
2687
2688
2689/**
2690 * Atomically Exchange a bool value, ordered.
2691 *
2692 * @returns Current *pf value
2693 * @param pf Pointer to the 8-bit variable to update.
2694 * @param f The 8-bit value to assign to *pi8.
2695 */
2696DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2697{
2698#ifdef _MSC_VER
2699 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2700#else
2701 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2702#endif
2703}
2704
2705
2706/**
2707 * Atomically Exchange an unsigned 16-bit value, ordered.
2708 *
2709 * @returns Current *pu16 value
2710 * @param pu16 Pointer to the 16-bit variable to update.
2711 * @param u16 The 16-bit value to assign to *pu16.
2712 */
2713#if RT_INLINE_ASM_EXTERNAL
2714DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2715#else
2716DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2717{
2718# if RT_INLINE_ASM_GNU_STYLE
2719 __asm__ __volatile__("xchgw %0, %1\n\t"
2720 : "=m" (*pu16),
2721 "=r" (u16)
2722 : "1" (u16),
2723 "m" (*pu16));
2724# else
2725 __asm
2726 {
2727# ifdef RT_ARCH_AMD64
2728 mov rdx, [pu16]
2729 mov ax, [u16]
2730 xchg [rdx], ax
2731 mov [u16], ax
2732# else
2733 mov edx, [pu16]
2734 mov ax, [u16]
2735 xchg [edx], ax
2736 mov [u16], ax
2737# endif
2738 }
2739# endif
2740 return u16;
2741}
2742#endif
2743
2744
2745/**
2746 * Atomically Exchange a signed 16-bit value, ordered.
2747 *
2748 * @returns Current *pu16 value
2749 * @param pi16 Pointer to the 16-bit variable to update.
2750 * @param i16 The 16-bit value to assign to *pi16.
2751 */
2752DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2753{
2754 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2755}
2756
2757
2758/**
2759 * Atomically Exchange an unsigned 32-bit value, ordered.
2760 *
2761 * @returns Current *pu32 value
2762 * @param pu32 Pointer to the 32-bit variable to update.
2763 * @param u32 The 32-bit value to assign to *pu32.
2764 */
2765#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2766DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2767#else
2768DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2769{
2770# if RT_INLINE_ASM_GNU_STYLE
2771 __asm__ __volatile__("xchgl %0, %1\n\t"
2772 : "=m" (*pu32),
2773 "=r" (u32)
2774 : "1" (u32),
2775 "m" (*pu32));
2776
2777# elif RT_INLINE_ASM_USES_INTRIN
2778 u32 = _InterlockedExchange((long *)pu32, u32);
2779
2780# else
2781 __asm
2782 {
2783# ifdef RT_ARCH_AMD64
2784 mov rdx, [pu32]
2785 mov eax, u32
2786 xchg [rdx], eax
2787 mov [u32], eax
2788# else
2789 mov edx, [pu32]
2790 mov eax, u32
2791 xchg [edx], eax
2792 mov [u32], eax
2793# endif
2794 }
2795# endif
2796 return u32;
2797}
2798#endif
2799
2800
2801/**
2802 * Atomically Exchange a signed 32-bit value, ordered.
2803 *
2804 * @returns Current *pu32 value
2805 * @param pi32 Pointer to the 32-bit variable to update.
2806 * @param i32 The 32-bit value to assign to *pi32.
2807 */
2808DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2809{
2810 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2811}
2812
2813
2814/**
2815 * Atomically Exchange an unsigned 64-bit value, ordered.
2816 *
2817 * @returns Current *pu64 value
2818 * @param pu64 Pointer to the 64-bit variable to update.
2819 * @param u64 The 64-bit value to assign to *pu64.
2820 */
2821#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
2822 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2823DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2824#else
2825DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2826{
2827# if defined(RT_ARCH_AMD64)
2828# if RT_INLINE_ASM_USES_INTRIN
2829 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2830
2831# elif RT_INLINE_ASM_GNU_STYLE
2832 __asm__ __volatile__("xchgq %0, %1\n\t"
2833 : "=m" (*pu64),
2834 "=r" (u64)
2835 : "1" (u64),
2836 "m" (*pu64));
2837# else
2838 __asm
2839 {
2840 mov rdx, [pu64]
2841 mov rax, [u64]
2842 xchg [rdx], rax
2843 mov [u64], rax
2844 }
2845# endif
2846# else /* !RT_ARCH_AMD64 */
2847# if RT_INLINE_ASM_GNU_STYLE
2848# if defined(PIC) || defined(__PIC__)
2849 uint32_t u32EBX = (uint32_t)u64;
2850 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2851 "xchgl %%ebx, %3\n\t"
2852 "1:\n\t"
2853 "lock; cmpxchg8b (%5)\n\t"
2854 "jnz 1b\n\t"
2855 "movl %3, %%ebx\n\t"
2856 /*"xchgl %%esi, %5\n\t"*/
2857 : "=A" (u64),
2858 "=m" (*pu64)
2859 : "0" (*pu64),
2860 "m" ( u32EBX ),
2861 "c" ( (uint32_t)(u64 >> 32) ),
2862 "S" (pu64));
2863# else /* !PIC */
2864 __asm__ __volatile__("1:\n\t"
2865 "lock; cmpxchg8b %1\n\t"
2866 "jnz 1b\n\t"
2867 : "=A" (u64),
2868 "=m" (*pu64)
2869 : "0" (*pu64),
2870 "b" ( (uint32_t)u64 ),
2871 "c" ( (uint32_t)(u64 >> 32) ));
2872# endif
2873# else
2874 __asm
2875 {
2876 mov ebx, dword ptr [u64]
2877 mov ecx, dword ptr [u64 + 4]
2878 mov edi, pu64
2879 mov eax, dword ptr [edi]
2880 mov edx, dword ptr [edi + 4]
2881 retry:
2882 lock cmpxchg8b [edi]
2883 jnz retry
2884 mov dword ptr [u64], eax
2885 mov dword ptr [u64 + 4], edx
2886 }
2887# endif
2888# endif /* !RT_ARCH_AMD64 */
2889 return u64;
2890}
2891#endif
2892
2893
2894/**
2895 * Atomically Exchange an signed 64-bit value, ordered.
2896 *
2897 * @returns Current *pi64 value
2898 * @param pi64 Pointer to the 64-bit variable to update.
2899 * @param i64 The 64-bit value to assign to *pi64.
2900 */
2901DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2902{
2903 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2904}
2905
2906
2907/**
2908 * Atomically Exchange a pointer value, ordered.
2909 *
2910 * @returns Current *ppv value
2911 * @param ppv Pointer to the pointer variable to update.
2912 * @param pv The pointer value to assign to *ppv.
2913 */
2914DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2915{
2916#if ARCH_BITS == 32
2917 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2918#elif ARCH_BITS == 64
2919 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2920#else
2921# error "ARCH_BITS is bogus"
2922#endif
2923}
2924
2925
2926/**
2927 * Atomically Exchange a raw-mode context pointer value, ordered.
2928 *
2929 * @returns Current *ppv value
2930 * @param ppvRC Pointer to the pointer variable to update.
2931 * @param pvRC The pointer value to assign to *ppv.
2932 */
2933DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2934{
2935 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2936}
2937
2938
2939/**
2940 * Atomically Exchange a ring-0 pointer value, ordered.
2941 *
2942 * @returns Current *ppv value
2943 * @param ppvR0 Pointer to the pointer variable to update.
2944 * @param pvR0 The pointer value to assign to *ppv.
2945 */
2946DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2947{
2948#if R0_ARCH_BITS == 32
2949 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2950#elif R0_ARCH_BITS == 64
2951 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2952#else
2953# error "R0_ARCH_BITS is bogus"
2954#endif
2955}
2956
2957
2958/**
2959 * Atomically Exchange a ring-3 pointer value, ordered.
2960 *
2961 * @returns Current *ppv value
2962 * @param ppvR3 Pointer to the pointer variable to update.
2963 * @param pvR3 The pointer value to assign to *ppv.
2964 */
2965DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2966{
2967#if R3_ARCH_BITS == 32
2968 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2969#elif R3_ARCH_BITS == 64
2970 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2971#else
2972# error "R3_ARCH_BITS is bogus"
2973#endif
2974}
2975
2976
2977/** @def ASMAtomicXchgHandle
2978 * Atomically Exchange a typical IPRT handle value, ordered.
2979 *
2980 * @param ph Pointer to the value to update.
2981 * @param hNew The new value to assigned to *pu.
2982 * @param phRes Where to store the current *ph value.
2983 *
2984 * @remarks This doesn't currently work for all handles (like RTFILE).
2985 */
2986#if HC_ARCH_BITS == 32
2987# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2988 do { \
2989 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2990 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2991 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2992 } while (0)
2993#elif HC_ARCH_BITS == 64
2994# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2995 do { \
2996 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2997 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2998 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2999 } while (0)
3000#else
3001# error HC_ARCH_BITS
3002#endif
3003
3004
3005/**
3006 * Atomically Exchange a value which size might differ
3007 * between platforms or compilers, ordered.
3008 *
3009 * @param pu Pointer to the variable to update.
3010 * @param uNew The value to assign to *pu.
3011 * @todo This is busted as its missing the result argument.
3012 */
3013#define ASMAtomicXchgSize(pu, uNew) \
3014 do { \
3015 switch (sizeof(*(pu))) { \
3016 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3017 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3018 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3019 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3020 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3021 } \
3022 } while (0)
3023
3024/**
3025 * Atomically Exchange a value which size might differ
3026 * between platforms or compilers, ordered.
3027 *
3028 * @param pu Pointer to the variable to update.
3029 * @param uNew The value to assign to *pu.
3030 * @param puRes Where to store the current *pu value.
3031 */
3032#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
3033 do { \
3034 switch (sizeof(*(pu))) { \
3035 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3036 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3037 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3038 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3039 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3040 } \
3041 } while (0)
3042
3043
3044
3045/**
3046 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
3047 *
3048 * @returns true if xchg was done.
3049 * @returns false if xchg wasn't done.
3050 *
3051 * @param pu8 Pointer to the value to update.
3052 * @param u8New The new value to assigned to *pu8.
3053 * @param u8Old The old value to *pu8 compare with.
3054 */
3055#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
3056DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
3057#else
3058DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
3059{
3060 uint8_t u8Ret;
3061 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
3062 "setz %1\n\t"
3063 : "=m" (*pu8),
3064 "=qm" (u8Ret),
3065 "=a" (u8Old)
3066 : "q" (u8New),
3067 "2" (u8Old),
3068 "m" (*pu8));
3069 return (bool)u8Ret;
3070}
3071#endif
3072
3073
3074/**
3075 * Atomically Compare and Exchange a signed 8-bit value, ordered.
3076 *
3077 * @returns true if xchg was done.
3078 * @returns false if xchg wasn't done.
3079 *
3080 * @param pi8 Pointer to the value to update.
3081 * @param i8New The new value to assigned to *pi8.
3082 * @param i8Old The old value to *pi8 compare with.
3083 */
3084DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
3085{
3086 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
3087}
3088
3089
3090/**
3091 * Atomically Compare and Exchange a bool value, ordered.
3092 *
3093 * @returns true if xchg was done.
3094 * @returns false if xchg wasn't done.
3095 *
3096 * @param pf Pointer to the value to update.
3097 * @param fNew The new value to assigned to *pf.
3098 * @param fOld The old value to *pf compare with.
3099 */
3100DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
3101{
3102 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
3103}
3104
3105
3106/**
3107 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3108 *
3109 * @returns true if xchg was done.
3110 * @returns false if xchg wasn't done.
3111 *
3112 * @param pu32 Pointer to the value to update.
3113 * @param u32New The new value to assigned to *pu32.
3114 * @param u32Old The old value to *pu32 compare with.
3115 */
3116#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3117DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3118#else
3119DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3120{
3121# if RT_INLINE_ASM_GNU_STYLE
3122 uint8_t u8Ret;
3123 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3124 "setz %1\n\t"
3125 : "=m" (*pu32),
3126 "=qm" (u8Ret),
3127 "=a" (u32Old)
3128 : "r" (u32New),
3129 "2" (u32Old),
3130 "m" (*pu32));
3131 return (bool)u8Ret;
3132
3133# elif RT_INLINE_ASM_USES_INTRIN
3134 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3135
3136# else
3137 uint32_t u32Ret;
3138 __asm
3139 {
3140# ifdef RT_ARCH_AMD64
3141 mov rdx, [pu32]
3142# else
3143 mov edx, [pu32]
3144# endif
3145 mov eax, [u32Old]
3146 mov ecx, [u32New]
3147# ifdef RT_ARCH_AMD64
3148 lock cmpxchg [rdx], ecx
3149# else
3150 lock cmpxchg [edx], ecx
3151# endif
3152 setz al
3153 movzx eax, al
3154 mov [u32Ret], eax
3155 }
3156 return !!u32Ret;
3157# endif
3158}
3159#endif
3160
3161
3162/**
3163 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3164 *
3165 * @returns true if xchg was done.
3166 * @returns false if xchg wasn't done.
3167 *
3168 * @param pi32 Pointer to the value to update.
3169 * @param i32New The new value to assigned to *pi32.
3170 * @param i32Old The old value to *pi32 compare with.
3171 */
3172DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3173{
3174 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3175}
3176
3177
3178/**
3179 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3180 *
3181 * @returns true if xchg was done.
3182 * @returns false if xchg wasn't done.
3183 *
3184 * @param pu64 Pointer to the 64-bit variable to update.
3185 * @param u64New The 64-bit value to assign to *pu64.
3186 * @param u64Old The value to compare with.
3187 */
3188#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3189 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
3190DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3191#else
3192DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
3193{
3194# if RT_INLINE_ASM_USES_INTRIN
3195 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3196
3197# elif defined(RT_ARCH_AMD64)
3198# if RT_INLINE_ASM_GNU_STYLE
3199 uint8_t u8Ret;
3200 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3201 "setz %1\n\t"
3202 : "=m" (*pu64),
3203 "=qm" (u8Ret),
3204 "=a" (u64Old)
3205 : "r" (u64New),
3206 "2" (u64Old),
3207 "m" (*pu64));
3208 return (bool)u8Ret;
3209# else
3210 bool fRet;
3211 __asm
3212 {
3213 mov rdx, [pu32]
3214 mov rax, [u64Old]
3215 mov rcx, [u64New]
3216 lock cmpxchg [rdx], rcx
3217 setz al
3218 mov [fRet], al
3219 }
3220 return fRet;
3221# endif
3222# else /* !RT_ARCH_AMD64 */
3223 uint32_t u32Ret;
3224# if RT_INLINE_ASM_GNU_STYLE
3225# if defined(PIC) || defined(__PIC__)
3226 uint32_t u32EBX = (uint32_t)u64New;
3227 uint32_t u32Spill;
3228 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3229 "lock; cmpxchg8b (%6)\n\t"
3230 "setz %%al\n\t"
3231 "movl %4, %%ebx\n\t"
3232 "movzbl %%al, %%eax\n\t"
3233 : "=a" (u32Ret),
3234 "=d" (u32Spill),
3235# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3236 "+m" (*pu64)
3237# else
3238 "=m" (*pu64)
3239# endif
3240 : "A" (u64Old),
3241 "m" ( u32EBX ),
3242 "c" ( (uint32_t)(u64New >> 32) ),
3243 "S" (pu64));
3244# else /* !PIC */
3245 uint32_t u32Spill;
3246 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3247 "setz %%al\n\t"
3248 "movzbl %%al, %%eax\n\t"
3249 : "=a" (u32Ret),
3250 "=d" (u32Spill),
3251 "+m" (*pu64)
3252 : "A" (u64Old),
3253 "b" ( (uint32_t)u64New ),
3254 "c" ( (uint32_t)(u64New >> 32) ));
3255# endif
3256 return (bool)u32Ret;
3257# else
3258 __asm
3259 {
3260 mov ebx, dword ptr [u64New]
3261 mov ecx, dword ptr [u64New + 4]
3262 mov edi, [pu64]
3263 mov eax, dword ptr [u64Old]
3264 mov edx, dword ptr [u64Old + 4]
3265 lock cmpxchg8b [edi]
3266 setz al
3267 movzx eax, al
3268 mov dword ptr [u32Ret], eax
3269 }
3270 return !!u32Ret;
3271# endif
3272# endif /* !RT_ARCH_AMD64 */
3273}
3274#endif
3275
3276
3277/**
3278 * Atomically Compare and exchange a signed 64-bit value, ordered.
3279 *
3280 * @returns true if xchg was done.
3281 * @returns false if xchg wasn't done.
3282 *
3283 * @param pi64 Pointer to the 64-bit variable to update.
3284 * @param i64 The 64-bit value to assign to *pu64.
3285 * @param i64Old The value to compare with.
3286 */
3287DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3288{
3289 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3290}
3291
3292
3293/**
3294 * Atomically Compare and Exchange a pointer value, ordered.
3295 *
3296 * @returns true if xchg was done.
3297 * @returns false if xchg wasn't done.
3298 *
3299 * @param ppv Pointer to the value to update.
3300 * @param pvNew The new value to assigned to *ppv.
3301 * @param pvOld The old value to *ppv compare with.
3302 */
3303DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3304{
3305#if ARCH_BITS == 32
3306 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3307#elif ARCH_BITS == 64
3308 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3309#else
3310# error "ARCH_BITS is bogus"
3311#endif
3312}
3313
3314
3315/** @def ASMAtomicCmpXchgHandle
3316 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3317 *
3318 * @param ph Pointer to the value to update.
3319 * @param hNew The new value to assigned to *pu.
3320 * @param hOld The old value to *pu compare with.
3321 * @param fRc Where to store the result.
3322 *
3323 * @remarks This doesn't currently work for all handles (like RTFILE).
3324 */
3325#if HC_ARCH_BITS == 32
3326# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3327 do { \
3328 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3329 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
3330 } while (0)
3331#elif HC_ARCH_BITS == 64
3332# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3333 do { \
3334 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3335 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
3336 } while (0)
3337#else
3338# error HC_ARCH_BITS
3339#endif
3340
3341
3342/** @def ASMAtomicCmpXchgSize
3343 * Atomically Compare and Exchange a value which size might differ
3344 * between platforms or compilers, ordered.
3345 *
3346 * @param pu Pointer to the value to update.
3347 * @param uNew The new value to assigned to *pu.
3348 * @param uOld The old value to *pu compare with.
3349 * @param fRc Where to store the result.
3350 */
3351#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3352 do { \
3353 switch (sizeof(*(pu))) { \
3354 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3355 break; \
3356 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3357 break; \
3358 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3359 (fRc) = false; \
3360 break; \
3361 } \
3362 } while (0)
3363
3364
3365/**
3366 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3367 * passes back old value, ordered.
3368 *
3369 * @returns true if xchg was done.
3370 * @returns false if xchg wasn't done.
3371 *
3372 * @param pu32 Pointer to the value to update.
3373 * @param u32New The new value to assigned to *pu32.
3374 * @param u32Old The old value to *pu32 compare with.
3375 * @param pu32Old Pointer store the old value at.
3376 */
3377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3378DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3379#else
3380DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3381{
3382# if RT_INLINE_ASM_GNU_STYLE
3383 uint8_t u8Ret;
3384 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3385 "setz %1\n\t"
3386 : "=m" (*pu32),
3387 "=qm" (u8Ret),
3388 "=a" (*pu32Old)
3389 : "r" (u32New),
3390 "a" (u32Old),
3391 "m" (*pu32));
3392 return (bool)u8Ret;
3393
3394# elif RT_INLINE_ASM_USES_INTRIN
3395 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3396
3397# else
3398 uint32_t u32Ret;
3399 __asm
3400 {
3401# ifdef RT_ARCH_AMD64
3402 mov rdx, [pu32]
3403# else
3404 mov edx, [pu32]
3405# endif
3406 mov eax, [u32Old]
3407 mov ecx, [u32New]
3408# ifdef RT_ARCH_AMD64
3409 lock cmpxchg [rdx], ecx
3410 mov rdx, [pu32Old]
3411 mov [rdx], eax
3412# else
3413 lock cmpxchg [edx], ecx
3414 mov edx, [pu32Old]
3415 mov [edx], eax
3416# endif
3417 setz al
3418 movzx eax, al
3419 mov [u32Ret], eax
3420 }
3421 return !!u32Ret;
3422# endif
3423}
3424#endif
3425
3426
3427/**
3428 * Atomically Compare and Exchange a signed 32-bit value, additionally
3429 * passes back old value, ordered.
3430 *
3431 * @returns true if xchg was done.
3432 * @returns false if xchg wasn't done.
3433 *
3434 * @param pi32 Pointer to the value to update.
3435 * @param i32New The new value to assigned to *pi32.
3436 * @param i32Old The old value to *pi32 compare with.
3437 * @param pi32Old Pointer store the old value at.
3438 */
3439DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3440{
3441 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3442}
3443
3444
3445/**
3446 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3447 * passing back old value, ordered.
3448 *
3449 * @returns true if xchg was done.
3450 * @returns false if xchg wasn't done.
3451 *
3452 * @param pu64 Pointer to the 64-bit variable to update.
3453 * @param u64New The 64-bit value to assign to *pu64.
3454 * @param u64Old The value to compare with.
3455 * @param pu64Old Pointer store the old value at.
3456 */
3457#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3458 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
3459DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3460#else
3461DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3462{
3463# if RT_INLINE_ASM_USES_INTRIN
3464 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3465
3466# elif defined(RT_ARCH_AMD64)
3467# if RT_INLINE_ASM_GNU_STYLE
3468 uint8_t u8Ret;
3469 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3470 "setz %1\n\t"
3471 : "=m" (*pu64),
3472 "=qm" (u8Ret),
3473 "=a" (*pu64Old)
3474 : "r" (u64New),
3475 "a" (u64Old),
3476 "m" (*pu64));
3477 return (bool)u8Ret;
3478# else
3479 bool fRet;
3480 __asm
3481 {
3482 mov rdx, [pu32]
3483 mov rax, [u64Old]
3484 mov rcx, [u64New]
3485 lock cmpxchg [rdx], rcx
3486 mov rdx, [pu64Old]
3487 mov [rdx], rax
3488 setz al
3489 mov [fRet], al
3490 }
3491 return fRet;
3492# endif
3493# else /* !RT_ARCH_AMD64 */
3494# if RT_INLINE_ASM_GNU_STYLE
3495 uint64_t u64Ret;
3496# if defined(PIC) || defined(__PIC__)
3497 /* NB: this code uses a memory clobber description, because the clean
3498 * solution with an output value for *pu64 makes gcc run out of registers.
3499 * This will cause suboptimal code, and anyone with a better solution is
3500 * welcome to improve this. */
3501 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3502 "lock; cmpxchg8b %3\n\t"
3503 "xchgl %%ebx, %1\n\t"
3504 : "=A" (u64Ret)
3505 : "DS" ((uint32_t)u64New),
3506 "c" ((uint32_t)(u64New >> 32)),
3507 "m" (*pu64),
3508 "0" (u64Old)
3509 : "memory" );
3510# else /* !PIC */
3511 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3512 : "=A" (u64Ret),
3513 "=m" (*pu64)
3514 : "b" ((uint32_t)u64New),
3515 "c" ((uint32_t)(u64New >> 32)),
3516 "m" (*pu64),
3517 "0" (u64Old));
3518# endif
3519 *pu64Old = u64Ret;
3520 return u64Ret == u64Old;
3521# else
3522 uint32_t u32Ret;
3523 __asm
3524 {
3525 mov ebx, dword ptr [u64New]
3526 mov ecx, dword ptr [u64New + 4]
3527 mov edi, [pu64]
3528 mov eax, dword ptr [u64Old]
3529 mov edx, dword ptr [u64Old + 4]
3530 lock cmpxchg8b [edi]
3531 mov ebx, [pu64Old]
3532 mov [ebx], eax
3533 setz al
3534 movzx eax, al
3535 add ebx, 4
3536 mov [ebx], edx
3537 mov dword ptr [u32Ret], eax
3538 }
3539 return !!u32Ret;
3540# endif
3541# endif /* !RT_ARCH_AMD64 */
3542}
3543#endif
3544
3545
3546/**
3547 * Atomically Compare and exchange a signed 64-bit value, additionally
3548 * passing back old value, ordered.
3549 *
3550 * @returns true if xchg was done.
3551 * @returns false if xchg wasn't done.
3552 *
3553 * @param pi64 Pointer to the 64-bit variable to update.
3554 * @param i64 The 64-bit value to assign to *pu64.
3555 * @param i64Old The value to compare with.
3556 * @param pi64Old Pointer store the old value at.
3557 */
3558DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3559{
3560 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3561}
3562
3563/** @def ASMAtomicCmpXchgExHandle
3564 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3565 *
3566 * @param ph Pointer to the value to update.
3567 * @param hNew The new value to assigned to *pu.
3568 * @param hOld The old value to *pu compare with.
3569 * @param fRc Where to store the result.
3570 * @param phOldVal Pointer to where to store the old value.
3571 *
3572 * @remarks This doesn't currently work for all handles (like RTFILE).
3573 */
3574#if HC_ARCH_BITS == 32
3575# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3576 do { \
3577 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
3578 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
3579 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3580 } while (0)
3581#elif HC_ARCH_BITS == 64
3582# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3583 do { \
3584 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3585 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
3586 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3587 } while (0)
3588#else
3589# error HC_ARCH_BITS
3590#endif
3591
3592
3593/** @def ASMAtomicCmpXchgExSize
3594 * Atomically Compare and Exchange a value which size might differ
3595 * between platforms or compilers. Additionally passes back old value.
3596 *
3597 * @param pu Pointer to the value to update.
3598 * @param uNew The new value to assigned to *pu.
3599 * @param uOld The old value to *pu compare with.
3600 * @param fRc Where to store the result.
3601 * @param puOldVal Pointer to where to store the old value.
3602 */
3603#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3604 do { \
3605 switch (sizeof(*(pu))) { \
3606 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3607 break; \
3608 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3609 break; \
3610 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3611 (fRc) = false; \
3612 (uOldVal) = 0; \
3613 break; \
3614 } \
3615 } while (0)
3616
3617
3618/**
3619 * Atomically Compare and Exchange a pointer value, additionally
3620 * passing back old value, ordered.
3621 *
3622 * @returns true if xchg was done.
3623 * @returns false if xchg wasn't done.
3624 *
3625 * @param ppv Pointer to the value to update.
3626 * @param pvNew The new value to assigned to *ppv.
3627 * @param pvOld The old value to *ppv compare with.
3628 * @param ppvOld Pointer store the old value at.
3629 */
3630DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3631{
3632#if ARCH_BITS == 32
3633 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3634#elif ARCH_BITS == 64
3635 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3636#else
3637# error "ARCH_BITS is bogus"
3638#endif
3639}
3640
3641
3642/**
3643 * Atomically exchanges and adds to a 32-bit value, ordered.
3644 *
3645 * @returns The old value.
3646 * @param pu32 Pointer to the value.
3647 * @param u32 Number to add.
3648 */
3649#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3650DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3651#else
3652DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3653{
3654# if RT_INLINE_ASM_USES_INTRIN
3655 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3656 return u32;
3657
3658# elif RT_INLINE_ASM_GNU_STYLE
3659 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3660 : "=r" (u32),
3661 "=m" (*pu32)
3662 : "0" (u32),
3663 "m" (*pu32)
3664 : "memory");
3665 return u32;
3666# else
3667 __asm
3668 {
3669 mov eax, [u32]
3670# ifdef RT_ARCH_AMD64
3671 mov rdx, [pu32]
3672 lock xadd [rdx], eax
3673# else
3674 mov edx, [pu32]
3675 lock xadd [edx], eax
3676# endif
3677 mov [u32], eax
3678 }
3679 return u32;
3680# endif
3681}
3682#endif
3683
3684
3685/**
3686 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3687 *
3688 * @returns The old value.
3689 * @param pi32 Pointer to the value.
3690 * @param i32 Number to add.
3691 */
3692DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3693{
3694 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3695}
3696
3697
3698/**
3699 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3700 *
3701 * @returns The old value.
3702 * @param pu32 Pointer to the value.
3703 * @param u32 Number to subtract.
3704 */
3705DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
3706{
3707 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
3708}
3709
3710
3711/**
3712 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3713 *
3714 * @returns The old value.
3715 * @param pi32 Pointer to the value.
3716 * @param i32 Number to subtract.
3717 */
3718DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3719{
3720 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3721}
3722
3723
3724/**
3725 * Atomically increment a 32-bit value, ordered.
3726 *
3727 * @returns The new value.
3728 * @param pu32 Pointer to the value to increment.
3729 */
3730#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3731DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3732#else
3733DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3734{
3735 uint32_t u32;
3736# if RT_INLINE_ASM_USES_INTRIN
3737 u32 = _InterlockedIncrement((long *)pu32);
3738 return u32;
3739
3740# elif RT_INLINE_ASM_GNU_STYLE
3741 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3742 : "=r" (u32),
3743 "=m" (*pu32)
3744 : "0" (1),
3745 "m" (*pu32)
3746 : "memory");
3747 return u32+1;
3748# else
3749 __asm
3750 {
3751 mov eax, 1
3752# ifdef RT_ARCH_AMD64
3753 mov rdx, [pu32]
3754 lock xadd [rdx], eax
3755# else
3756 mov edx, [pu32]
3757 lock xadd [edx], eax
3758# endif
3759 mov u32, eax
3760 }
3761 return u32+1;
3762# endif
3763}
3764#endif
3765
3766
3767/**
3768 * Atomically increment a signed 32-bit value, ordered.
3769 *
3770 * @returns The new value.
3771 * @param pi32 Pointer to the value to increment.
3772 */
3773DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3774{
3775 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3776}
3777
3778
3779/**
3780 * Atomically decrement an unsigned 32-bit value, ordered.
3781 *
3782 * @returns The new value.
3783 * @param pu32 Pointer to the value to decrement.
3784 */
3785#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3786DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3787#else
3788DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3789{
3790 uint32_t u32;
3791# if RT_INLINE_ASM_USES_INTRIN
3792 u32 = _InterlockedDecrement((long *)pu32);
3793 return u32;
3794
3795# elif RT_INLINE_ASM_GNU_STYLE
3796 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3797 : "=r" (u32),
3798 "=m" (*pu32)
3799 : "0" (-1),
3800 "m" (*pu32)
3801 : "memory");
3802 return u32-1;
3803# else
3804 __asm
3805 {
3806 mov eax, -1
3807# ifdef RT_ARCH_AMD64
3808 mov rdx, [pu32]
3809 lock xadd [rdx], eax
3810# else
3811 mov edx, [pu32]
3812 lock xadd [edx], eax
3813# endif
3814 mov u32, eax
3815 }
3816 return u32-1;
3817# endif
3818}
3819#endif
3820
3821
3822/**
3823 * Atomically decrement a signed 32-bit value, ordered.
3824 *
3825 * @returns The new value.
3826 * @param pi32 Pointer to the value to decrement.
3827 */
3828DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3829{
3830 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3831}
3832
3833
3834/**
3835 * Atomically Or an unsigned 32-bit value, ordered.
3836 *
3837 * @param pu32 Pointer to the pointer variable to OR u32 with.
3838 * @param u32 The value to OR *pu32 with.
3839 */
3840#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3841DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3842#else
3843DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3844{
3845# if RT_INLINE_ASM_USES_INTRIN
3846 _InterlockedOr((long volatile *)pu32, (long)u32);
3847
3848# elif RT_INLINE_ASM_GNU_STYLE
3849 __asm__ __volatile__("lock; orl %1, %0\n\t"
3850 : "=m" (*pu32)
3851 : "ir" (u32),
3852 "m" (*pu32));
3853# else
3854 __asm
3855 {
3856 mov eax, [u32]
3857# ifdef RT_ARCH_AMD64
3858 mov rdx, [pu32]
3859 lock or [rdx], eax
3860# else
3861 mov edx, [pu32]
3862 lock or [edx], eax
3863# endif
3864 }
3865# endif
3866}
3867#endif
3868
3869
3870/**
3871 * Atomically Or a signed 32-bit value, ordered.
3872 *
3873 * @param pi32 Pointer to the pointer variable to OR u32 with.
3874 * @param i32 The value to OR *pu32 with.
3875 */
3876DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3877{
3878 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3879}
3880
3881
3882/**
3883 * Atomically And an unsigned 32-bit value, ordered.
3884 *
3885 * @param pu32 Pointer to the pointer variable to AND u32 with.
3886 * @param u32 The value to AND *pu32 with.
3887 */
3888#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3889DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3890#else
3891DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3892{
3893# if RT_INLINE_ASM_USES_INTRIN
3894 _InterlockedAnd((long volatile *)pu32, u32);
3895
3896# elif RT_INLINE_ASM_GNU_STYLE
3897 __asm__ __volatile__("lock; andl %1, %0\n\t"
3898 : "=m" (*pu32)
3899 : "ir" (u32),
3900 "m" (*pu32));
3901# else
3902 __asm
3903 {
3904 mov eax, [u32]
3905# ifdef RT_ARCH_AMD64
3906 mov rdx, [pu32]
3907 lock and [rdx], eax
3908# else
3909 mov edx, [pu32]
3910 lock and [edx], eax
3911# endif
3912 }
3913# endif
3914}
3915#endif
3916
3917
3918/**
3919 * Atomically And a signed 32-bit value, ordered.
3920 *
3921 * @param pi32 Pointer to the pointer variable to AND i32 with.
3922 * @param i32 The value to AND *pi32 with.
3923 */
3924DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3925{
3926 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3927}
3928
3929
3930/**
3931 * Serialize Instruction.
3932 */
3933#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3934DECLASM(void) ASMSerializeInstruction(void);
3935#else
3936DECLINLINE(void) ASMSerializeInstruction(void)
3937{
3938# if RT_INLINE_ASM_GNU_STYLE
3939 RTCCUINTREG xAX = 0;
3940# ifdef RT_ARCH_AMD64
3941 __asm__ ("cpuid"
3942 : "=a" (xAX)
3943 : "0" (xAX)
3944 : "rbx", "rcx", "rdx");
3945# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
3946 __asm__ ("push %%ebx\n\t"
3947 "cpuid\n\t"
3948 "pop %%ebx\n\t"
3949 : "=a" (xAX)
3950 : "0" (xAX)
3951 : "ecx", "edx");
3952# else
3953 __asm__ ("cpuid"
3954 : "=a" (xAX)
3955 : "0" (xAX)
3956 : "ebx", "ecx", "edx");
3957# endif
3958
3959# elif RT_INLINE_ASM_USES_INTRIN
3960 int aInfo[4];
3961 __cpuid(aInfo, 0);
3962
3963# else
3964 __asm
3965 {
3966 push ebx
3967 xor eax, eax
3968 cpuid
3969 pop ebx
3970 }
3971# endif
3972}
3973#endif
3974
3975
3976/**
3977 * Memory load/store fence, waits for any pending writes and reads to complete.
3978 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
3979 */
3980DECLINLINE(void) ASMMemoryFenceSSE2(void)
3981{
3982#if RT_INLINE_ASM_GNU_STYLE
3983 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
3984#elif RT_INLINE_ASM_USES_INTRIN
3985 _mm_mfence();
3986#else
3987 __asm
3988 {
3989 _emit 0x0f
3990 _emit 0xae
3991 _emit 0xf0
3992 }
3993#endif
3994}
3995
3996
3997/**
3998 * Memory store fence, waits for any writes to complete.
3999 * Requires the X86_CPUID_FEATURE_EDX_SSE CPUID bit set.
4000 */
4001DECLINLINE(void) ASMWriteFenceSSE(void)
4002{
4003#if RT_INLINE_ASM_GNU_STYLE
4004 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
4005#elif RT_INLINE_ASM_USES_INTRIN
4006 _mm_sfence();
4007#else
4008 __asm
4009 {
4010 _emit 0x0f
4011 _emit 0xae
4012 _emit 0xf8
4013 }
4014#endif
4015}
4016
4017
4018/**
4019 * Memory load fence, waits for any pending reads to complete.
4020 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
4021 */
4022DECLINLINE(void) ASMReadFenceSSE2(void)
4023{
4024#if RT_INLINE_ASM_GNU_STYLE
4025 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
4026#elif RT_INLINE_ASM_USES_INTRIN
4027 _mm_lfence();
4028#else
4029 __asm
4030 {
4031 _emit 0x0f
4032 _emit 0xae
4033 _emit 0xe8
4034 }
4035#endif
4036}
4037
4038
4039/**
4040 * Memory fence, waits for any pending writes and reads to complete.
4041 */
4042DECLINLINE(void) ASMMemoryFence(void)
4043{
4044 /** @todo use mfence? check if all cpus we care for support it. */
4045 uint32_t volatile u32;
4046 ASMAtomicXchgU32(&u32, 0);
4047}
4048
4049
4050/**
4051 * Write fence, waits for any pending writes to complete.
4052 */
4053DECLINLINE(void) ASMWriteFence(void)
4054{
4055 /** @todo use sfence? check if all cpus we care for support it. */
4056 ASMMemoryFence();
4057}
4058
4059
4060/**
4061 * Read fence, waits for any pending reads to complete.
4062 */
4063DECLINLINE(void) ASMReadFence(void)
4064{
4065 /** @todo use lfence? check if all cpus we care for support it. */
4066 ASMMemoryFence();
4067}
4068
4069
4070/**
4071 * Atomically reads an unsigned 8-bit value, ordered.
4072 *
4073 * @returns Current *pu8 value
4074 * @param pu8 Pointer to the 8-bit variable to read.
4075 */
4076DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
4077{
4078 ASMMemoryFence();
4079 return *pu8; /* byte reads are atomic on x86 */
4080}
4081
4082
4083/**
4084 * Atomically reads an unsigned 8-bit value, unordered.
4085 *
4086 * @returns Current *pu8 value
4087 * @param pu8 Pointer to the 8-bit variable to read.
4088 */
4089DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
4090{
4091 return *pu8; /* byte reads are atomic on x86 */
4092}
4093
4094
4095/**
4096 * Atomically reads a signed 8-bit value, ordered.
4097 *
4098 * @returns Current *pi8 value
4099 * @param pi8 Pointer to the 8-bit variable to read.
4100 */
4101DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
4102{
4103 ASMMemoryFence();
4104 return *pi8; /* byte reads are atomic on x86 */
4105}
4106
4107
4108/**
4109 * Atomically reads a signed 8-bit value, unordered.
4110 *
4111 * @returns Current *pi8 value
4112 * @param pi8 Pointer to the 8-bit variable to read.
4113 */
4114DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
4115{
4116 return *pi8; /* byte reads are atomic on x86 */
4117}
4118
4119
4120/**
4121 * Atomically reads an unsigned 16-bit value, ordered.
4122 *
4123 * @returns Current *pu16 value
4124 * @param pu16 Pointer to the 16-bit variable to read.
4125 */
4126DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
4127{
4128 ASMMemoryFence();
4129 Assert(!((uintptr_t)pu16 & 1));
4130 return *pu16;
4131}
4132
4133
4134/**
4135 * Atomically reads an unsigned 16-bit value, unordered.
4136 *
4137 * @returns Current *pu16 value
4138 * @param pu16 Pointer to the 16-bit variable to read.
4139 */
4140DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
4141{
4142 Assert(!((uintptr_t)pu16 & 1));
4143 return *pu16;
4144}
4145
4146
4147/**
4148 * Atomically reads a signed 16-bit value, ordered.
4149 *
4150 * @returns Current *pi16 value
4151 * @param pi16 Pointer to the 16-bit variable to read.
4152 */
4153DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
4154{
4155 ASMMemoryFence();
4156 Assert(!((uintptr_t)pi16 & 1));
4157 return *pi16;
4158}
4159
4160
4161/**
4162 * Atomically reads a signed 16-bit value, unordered.
4163 *
4164 * @returns Current *pi16 value
4165 * @param pi16 Pointer to the 16-bit variable to read.
4166 */
4167DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
4168{
4169 Assert(!((uintptr_t)pi16 & 1));
4170 return *pi16;
4171}
4172
4173
4174/**
4175 * Atomically reads an unsigned 32-bit value, ordered.
4176 *
4177 * @returns Current *pu32 value
4178 * @param pu32 Pointer to the 32-bit variable to read.
4179 */
4180DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
4181{
4182 ASMMemoryFence();
4183 Assert(!((uintptr_t)pu32 & 3));
4184 return *pu32;
4185}
4186
4187
4188/**
4189 * Atomically reads an unsigned 32-bit value, unordered.
4190 *
4191 * @returns Current *pu32 value
4192 * @param pu32 Pointer to the 32-bit variable to read.
4193 */
4194DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
4195{
4196 Assert(!((uintptr_t)pu32 & 3));
4197 return *pu32;
4198}
4199
4200
4201/**
4202 * Atomically reads a signed 32-bit value, ordered.
4203 *
4204 * @returns Current *pi32 value
4205 * @param pi32 Pointer to the 32-bit variable to read.
4206 */
4207DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
4208{
4209 ASMMemoryFence();
4210 Assert(!((uintptr_t)pi32 & 3));
4211 return *pi32;
4212}
4213
4214
4215/**
4216 * Atomically reads a signed 32-bit value, unordered.
4217 *
4218 * @returns Current *pi32 value
4219 * @param pi32 Pointer to the 32-bit variable to read.
4220 */
4221DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4222{
4223 Assert(!((uintptr_t)pi32 & 3));
4224 return *pi32;
4225}
4226
4227
4228/**
4229 * Atomically reads an unsigned 64-bit value, ordered.
4230 *
4231 * @returns Current *pu64 value
4232 * @param pu64 Pointer to the 64-bit variable to read.
4233 * The memory pointed to must be writable.
4234 * @remark This will fault if the memory is read-only!
4235 */
4236#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
4237 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
4238DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4239#else
4240DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4241{
4242 uint64_t u64;
4243# ifdef RT_ARCH_AMD64
4244 Assert(!((uintptr_t)pu64 & 7));
4245/*# if RT_INLINE_ASM_GNU_STYLE
4246 __asm__ __volatile__( "mfence\n\t"
4247 "movq %1, %0\n\t"
4248 : "=r" (u64)
4249 : "m" (*pu64));
4250# else
4251 __asm
4252 {
4253 mfence
4254 mov rdx, [pu64]
4255 mov rax, [rdx]
4256 mov [u64], rax
4257 }
4258# endif*/
4259 ASMMemoryFence();
4260 u64 = *pu64;
4261# else /* !RT_ARCH_AMD64 */
4262# if RT_INLINE_ASM_GNU_STYLE
4263# if defined(PIC) || defined(__PIC__)
4264 uint32_t u32EBX = 0;
4265 Assert(!((uintptr_t)pu64 & 7));
4266 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4267 "lock; cmpxchg8b (%5)\n\t"
4268 "movl %3, %%ebx\n\t"
4269 : "=A" (u64),
4270# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4271 "+m" (*pu64)
4272# else
4273 "=m" (*pu64)
4274# endif
4275 : "0" (0),
4276 "m" (u32EBX),
4277 "c" (0),
4278 "S" (pu64));
4279# else /* !PIC */
4280 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4281 : "=A" (u64),
4282 "+m" (*pu64)
4283 : "0" (0),
4284 "b" (0),
4285 "c" (0));
4286# endif
4287# else
4288 Assert(!((uintptr_t)pu64 & 7));
4289 __asm
4290 {
4291 xor eax, eax
4292 xor edx, edx
4293 mov edi, pu64
4294 xor ecx, ecx
4295 xor ebx, ebx
4296 lock cmpxchg8b [edi]
4297 mov dword ptr [u64], eax
4298 mov dword ptr [u64 + 4], edx
4299 }
4300# endif
4301# endif /* !RT_ARCH_AMD64 */
4302 return u64;
4303}
4304#endif
4305
4306
4307/**
4308 * Atomically reads an unsigned 64-bit value, unordered.
4309 *
4310 * @returns Current *pu64 value
4311 * @param pu64 Pointer to the 64-bit variable to read.
4312 * The memory pointed to must be writable.
4313 * @remark This will fault if the memory is read-only!
4314 */
4315#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
4316 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
4317DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4318#else
4319DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4320{
4321 uint64_t u64;
4322# ifdef RT_ARCH_AMD64
4323 Assert(!((uintptr_t)pu64 & 7));
4324/*# if RT_INLINE_ASM_GNU_STYLE
4325 Assert(!((uintptr_t)pu64 & 7));
4326 __asm__ __volatile__("movq %1, %0\n\t"
4327 : "=r" (u64)
4328 : "m" (*pu64));
4329# else
4330 __asm
4331 {
4332 mov rdx, [pu64]
4333 mov rax, [rdx]
4334 mov [u64], rax
4335 }
4336# endif */
4337 u64 = *pu64;
4338# else /* !RT_ARCH_AMD64 */
4339# if RT_INLINE_ASM_GNU_STYLE
4340# if defined(PIC) || defined(__PIC__)
4341 uint32_t u32EBX = 0;
4342 uint32_t u32Spill;
4343 Assert(!((uintptr_t)pu64 & 7));
4344 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4345 "xor %%ecx,%%ecx\n\t"
4346 "xor %%edx,%%edx\n\t"
4347 "xchgl %%ebx, %3\n\t"
4348 "lock; cmpxchg8b (%4)\n\t"
4349 "movl %3, %%ebx\n\t"
4350 : "=A" (u64),
4351# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4352 "+m" (*pu64),
4353# else
4354 "=m" (*pu64),
4355# endif
4356 "=c" (u32Spill)
4357 : "m" (u32EBX),
4358 "S" (pu64));
4359# else /* !PIC */
4360 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4361 : "=A" (u64),
4362 "+m" (*pu64)
4363 : "0" (0),
4364 "b" (0),
4365 "c" (0));
4366# endif
4367# else
4368 Assert(!((uintptr_t)pu64 & 7));
4369 __asm
4370 {
4371 xor eax, eax
4372 xor edx, edx
4373 mov edi, pu64
4374 xor ecx, ecx
4375 xor ebx, ebx
4376 lock cmpxchg8b [edi]
4377 mov dword ptr [u64], eax
4378 mov dword ptr [u64 + 4], edx
4379 }
4380# endif
4381# endif /* !RT_ARCH_AMD64 */
4382 return u64;
4383}
4384#endif
4385
4386
4387/**
4388 * Atomically reads a signed 64-bit value, ordered.
4389 *
4390 * @returns Current *pi64 value
4391 * @param pi64 Pointer to the 64-bit variable to read.
4392 * The memory pointed to must be writable.
4393 * @remark This will fault if the memory is read-only!
4394 */
4395DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4396{
4397 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4398}
4399
4400
4401/**
4402 * Atomically reads a signed 64-bit value, unordered.
4403 *
4404 * @returns Current *pi64 value
4405 * @param pi64 Pointer to the 64-bit variable to read.
4406 * The memory pointed to must be writable.
4407 * @remark This will fault if the memory is read-only!
4408 */
4409DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4410{
4411 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4412}
4413
4414
4415/**
4416 * Atomically reads a pointer value, ordered.
4417 *
4418 * @returns Current *pv value
4419 * @param ppv Pointer to the pointer variable to read.
4420 */
4421DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4422{
4423#if ARCH_BITS == 32
4424 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4425#elif ARCH_BITS == 64
4426 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4427#else
4428# error "ARCH_BITS is bogus"
4429#endif
4430}
4431
4432
4433/**
4434 * Atomically reads a pointer value, unordered.
4435 *
4436 * @returns Current *pv value
4437 * @param ppv Pointer to the pointer variable to read.
4438 */
4439DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4440{
4441#if ARCH_BITS == 32
4442 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4443#elif ARCH_BITS == 64
4444 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4445#else
4446# error "ARCH_BITS is bogus"
4447#endif
4448}
4449
4450
4451/**
4452 * Atomically reads a boolean value, ordered.
4453 *
4454 * @returns Current *pf value
4455 * @param pf Pointer to the boolean variable to read.
4456 */
4457DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4458{
4459 ASMMemoryFence();
4460 return *pf; /* byte reads are atomic on x86 */
4461}
4462
4463
4464/**
4465 * Atomically reads a boolean value, unordered.
4466 *
4467 * @returns Current *pf value
4468 * @param pf Pointer to the boolean variable to read.
4469 */
4470DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4471{
4472 return *pf; /* byte reads are atomic on x86 */
4473}
4474
4475
4476/**
4477 * Atomically read a typical IPRT handle value, ordered.
4478 *
4479 * @param ph Pointer to the handle variable to read.
4480 * @param phRes Where to store the result.
4481 *
4482 * @remarks This doesn't currently work for all handles (like RTFILE).
4483 */
4484#if HC_ARCH_BITS == 32
4485# define ASMAtomicReadHandle(ph, phRes) \
4486 do { \
4487 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4488 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4489 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
4490 } while (0)
4491#elif HC_ARCH_BITS == 64
4492# define ASMAtomicReadHandle(ph, phRes) \
4493 do { \
4494 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4495 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4496 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
4497 } while (0)
4498#else
4499# error HC_ARCH_BITS
4500#endif
4501
4502
4503/**
4504 * Atomically read a typical IPRT handle value, unordered.
4505 *
4506 * @param ph Pointer to the handle variable to read.
4507 * @param phRes Where to store the result.
4508 *
4509 * @remarks This doesn't currently work for all handles (like RTFILE).
4510 */
4511#if HC_ARCH_BITS == 32
4512# define ASMAtomicUoReadHandle(ph, phRes) \
4513 do { \
4514 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4515 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4516 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
4517 } while (0)
4518#elif HC_ARCH_BITS == 64
4519# define ASMAtomicUoReadHandle(ph, phRes) \
4520 do { \
4521 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4522 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4523 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
4524 } while (0)
4525#else
4526# error HC_ARCH_BITS
4527#endif
4528
4529
4530/**
4531 * Atomically read a value which size might differ
4532 * between platforms or compilers, ordered.
4533 *
4534 * @param pu Pointer to the variable to update.
4535 * @param puRes Where to store the result.
4536 */
4537#define ASMAtomicReadSize(pu, puRes) \
4538 do { \
4539 switch (sizeof(*(pu))) { \
4540 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4541 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4542 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4543 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4544 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4545 } \
4546 } while (0)
4547
4548
4549/**
4550 * Atomically read a value which size might differ
4551 * between platforms or compilers, unordered.
4552 *
4553 * @param pu Pointer to the variable to read.
4554 * @param puRes Where to store the result.
4555 */
4556#define ASMAtomicUoReadSize(pu, puRes) \
4557 do { \
4558 switch (sizeof(*(pu))) { \
4559 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4560 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4561 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4562 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4563 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4564 } \
4565 } while (0)
4566
4567
4568/**
4569 * Atomically writes an unsigned 8-bit value, ordered.
4570 *
4571 * @param pu8 Pointer to the 8-bit variable.
4572 * @param u8 The 8-bit value to assign to *pu8.
4573 */
4574DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4575{
4576 ASMAtomicXchgU8(pu8, u8);
4577}
4578
4579
4580/**
4581 * Atomically writes an unsigned 8-bit value, unordered.
4582 *
4583 * @param pu8 Pointer to the 8-bit variable.
4584 * @param u8 The 8-bit value to assign to *pu8.
4585 */
4586DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4587{
4588 *pu8 = u8; /* byte writes are atomic on x86 */
4589}
4590
4591
4592/**
4593 * Atomically writes a signed 8-bit value, ordered.
4594 *
4595 * @param pi8 Pointer to the 8-bit variable to read.
4596 * @param i8 The 8-bit value to assign to *pi8.
4597 */
4598DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4599{
4600 ASMAtomicXchgS8(pi8, i8);
4601}
4602
4603
4604/**
4605 * Atomically writes a signed 8-bit value, unordered.
4606 *
4607 * @param pi8 Pointer to the 8-bit variable to read.
4608 * @param i8 The 8-bit value to assign to *pi8.
4609 */
4610DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4611{
4612 *pi8 = i8; /* byte writes are atomic on x86 */
4613}
4614
4615
4616/**
4617 * Atomically writes an unsigned 16-bit value, ordered.
4618 *
4619 * @param pu16 Pointer to the 16-bit variable.
4620 * @param u16 The 16-bit value to assign to *pu16.
4621 */
4622DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4623{
4624 ASMAtomicXchgU16(pu16, u16);
4625}
4626
4627
4628/**
4629 * Atomically writes an unsigned 16-bit value, unordered.
4630 *
4631 * @param pu16 Pointer to the 16-bit variable.
4632 * @param u16 The 16-bit value to assign to *pu16.
4633 */
4634DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4635{
4636 Assert(!((uintptr_t)pu16 & 1));
4637 *pu16 = u16;
4638}
4639
4640
4641/**
4642 * Atomically writes a signed 16-bit value, ordered.
4643 *
4644 * @param pi16 Pointer to the 16-bit variable to read.
4645 * @param i16 The 16-bit value to assign to *pi16.
4646 */
4647DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4648{
4649 ASMAtomicXchgS16(pi16, i16);
4650}
4651
4652
4653/**
4654 * Atomically writes a signed 16-bit value, unordered.
4655 *
4656 * @param pi16 Pointer to the 16-bit variable to read.
4657 * @param i16 The 16-bit value to assign to *pi16.
4658 */
4659DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4660{
4661 Assert(!((uintptr_t)pi16 & 1));
4662 *pi16 = i16;
4663}
4664
4665
4666/**
4667 * Atomically writes an unsigned 32-bit value, ordered.
4668 *
4669 * @param pu32 Pointer to the 32-bit variable.
4670 * @param u32 The 32-bit value to assign to *pu32.
4671 */
4672DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4673{
4674 ASMAtomicXchgU32(pu32, u32);
4675}
4676
4677
4678/**
4679 * Atomically writes an unsigned 32-bit value, unordered.
4680 *
4681 * @param pu32 Pointer to the 32-bit variable.
4682 * @param u32 The 32-bit value to assign to *pu32.
4683 */
4684DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4685{
4686 Assert(!((uintptr_t)pu32 & 3));
4687 *pu32 = u32;
4688}
4689
4690
4691/**
4692 * Atomically writes a signed 32-bit value, ordered.
4693 *
4694 * @param pi32 Pointer to the 32-bit variable to read.
4695 * @param i32 The 32-bit value to assign to *pi32.
4696 */
4697DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4698{
4699 ASMAtomicXchgS32(pi32, i32);
4700}
4701
4702
4703/**
4704 * Atomically writes a signed 32-bit value, unordered.
4705 *
4706 * @param pi32 Pointer to the 32-bit variable to read.
4707 * @param i32 The 32-bit value to assign to *pi32.
4708 */
4709DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4710{
4711 Assert(!((uintptr_t)pi32 & 3));
4712 *pi32 = i32;
4713}
4714
4715
4716/**
4717 * Atomically writes an unsigned 64-bit value, ordered.
4718 *
4719 * @param pu64 Pointer to the 64-bit variable.
4720 * @param u64 The 64-bit value to assign to *pu64.
4721 */
4722DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4723{
4724 ASMAtomicXchgU64(pu64, u64);
4725}
4726
4727
4728/**
4729 * Atomically writes an unsigned 64-bit value, unordered.
4730 *
4731 * @param pu64 Pointer to the 64-bit variable.
4732 * @param u64 The 64-bit value to assign to *pu64.
4733 */
4734DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4735{
4736 Assert(!((uintptr_t)pu64 & 7));
4737#if ARCH_BITS == 64
4738 *pu64 = u64;
4739#else
4740 ASMAtomicXchgU64(pu64, u64);
4741#endif
4742}
4743
4744
4745/**
4746 * Atomically writes a signed 64-bit value, ordered.
4747 *
4748 * @param pi64 Pointer to the 64-bit variable.
4749 * @param i64 The 64-bit value to assign to *pi64.
4750 */
4751DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4752{
4753 ASMAtomicXchgS64(pi64, i64);
4754}
4755
4756
4757/**
4758 * Atomically writes a signed 64-bit value, unordered.
4759 *
4760 * @param pi64 Pointer to the 64-bit variable.
4761 * @param i64 The 64-bit value to assign to *pi64.
4762 */
4763DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4764{
4765 Assert(!((uintptr_t)pi64 & 7));
4766#if ARCH_BITS == 64
4767 *pi64 = i64;
4768#else
4769 ASMAtomicXchgS64(pi64, i64);
4770#endif
4771}
4772
4773
4774/**
4775 * Atomically writes a boolean value, unordered.
4776 *
4777 * @param pf Pointer to the boolean variable.
4778 * @param f The boolean value to assign to *pf.
4779 */
4780DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4781{
4782 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4783}
4784
4785
4786/**
4787 * Atomically writes a boolean value, unordered.
4788 *
4789 * @param pf Pointer to the boolean variable.
4790 * @param f The boolean value to assign to *pf.
4791 */
4792DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4793{
4794 *pf = f; /* byte writes are atomic on x86 */
4795}
4796
4797
4798/**
4799 * Atomically writes a pointer value, ordered.
4800 *
4801 * @returns Current *pv value
4802 * @param ppv Pointer to the pointer variable.
4803 * @param pv The pointer value to assigne to *ppv.
4804 */
4805DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4806{
4807#if ARCH_BITS == 32
4808 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4809#elif ARCH_BITS == 64
4810 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4811#else
4812# error "ARCH_BITS is bogus"
4813#endif
4814}
4815
4816
4817/**
4818 * Atomically writes a pointer value, unordered.
4819 *
4820 * @returns Current *pv value
4821 * @param ppv Pointer to the pointer variable.
4822 * @param pv The pointer value to assigne to *ppv.
4823 */
4824DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4825{
4826#if ARCH_BITS == 32
4827 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4828#elif ARCH_BITS == 64
4829 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4830#else
4831# error "ARCH_BITS is bogus"
4832#endif
4833}
4834
4835
4836/**
4837 * Atomically write a typical IPRT handle value, ordered.
4838 *
4839 * @param ph Pointer to the variable to update.
4840 * @param hNew The value to assign to *ph.
4841 *
4842 * @remarks This doesn't currently work for all handles (like RTFILE).
4843 */
4844#if HC_ARCH_BITS == 32
4845# define ASMAtomicWriteHandle(ph, hNew) \
4846 do { \
4847 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4848 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
4849 } while (0)
4850#elif HC_ARCH_BITS == 64
4851# define ASMAtomicWriteHandle(ph, hNew) \
4852 do { \
4853 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4854 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
4855 } while (0)
4856#else
4857# error HC_ARCH_BITS
4858#endif
4859
4860
4861/**
4862 * Atomically write a typical IPRT handle value, unordered.
4863 *
4864 * @param ph Pointer to the variable to update.
4865 * @param hNew The value to assign to *ph.
4866 *
4867 * @remarks This doesn't currently work for all handles (like RTFILE).
4868 */
4869#if HC_ARCH_BITS == 32
4870# define ASMAtomicUoWriteHandle(ph, hNew) \
4871 do { \
4872 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4873 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
4874 } while (0)
4875#elif HC_ARCH_BITS == 64
4876# define ASMAtomicUoWriteHandle(ph, hNew) \
4877 do { \
4878 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4879 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
4880 } while (0)
4881#else
4882# error HC_ARCH_BITS
4883#endif
4884
4885
4886/**
4887 * Atomically write a value which size might differ
4888 * between platforms or compilers, ordered.
4889 *
4890 * @param pu Pointer to the variable to update.
4891 * @param uNew The value to assign to *pu.
4892 */
4893#define ASMAtomicWriteSize(pu, uNew) \
4894 do { \
4895 switch (sizeof(*(pu))) { \
4896 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4897 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4898 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4899 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4900 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4901 } \
4902 } while (0)
4903
4904/**
4905 * Atomically write a value which size might differ
4906 * between platforms or compilers, unordered.
4907 *
4908 * @param pu Pointer to the variable to update.
4909 * @param uNew The value to assign to *pu.
4910 */
4911#define ASMAtomicUoWriteSize(pu, uNew) \
4912 do { \
4913 switch (sizeof(*(pu))) { \
4914 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4915 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4916 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4917 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4918 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4919 } \
4920 } while (0)
4921
4922
4923
4924
4925/**
4926 * Invalidate page.
4927 *
4928 * @param pv Address of the page to invalidate.
4929 */
4930#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4931DECLASM(void) ASMInvalidatePage(void *pv);
4932#else
4933DECLINLINE(void) ASMInvalidatePage(void *pv)
4934{
4935# if RT_INLINE_ASM_USES_INTRIN
4936 __invlpg(pv);
4937
4938# elif RT_INLINE_ASM_GNU_STYLE
4939 __asm__ __volatile__("invlpg %0\n\t"
4940 : : "m" (*(uint8_t *)pv));
4941# else
4942 __asm
4943 {
4944# ifdef RT_ARCH_AMD64
4945 mov rax, [pv]
4946 invlpg [rax]
4947# else
4948 mov eax, [pv]
4949 invlpg [eax]
4950# endif
4951 }
4952# endif
4953}
4954#endif
4955
4956
4957/**
4958 * Write back the internal caches and invalidate them.
4959 */
4960#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4961DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4962#else
4963DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4964{
4965# if RT_INLINE_ASM_USES_INTRIN
4966 __wbinvd();
4967
4968# elif RT_INLINE_ASM_GNU_STYLE
4969 __asm__ __volatile__("wbinvd");
4970# else
4971 __asm
4972 {
4973 wbinvd
4974 }
4975# endif
4976}
4977#endif
4978
4979
4980/**
4981 * Invalidate internal and (perhaps) external caches without first
4982 * flushing dirty cache lines. Use with extreme care.
4983 */
4984#if RT_INLINE_ASM_EXTERNAL
4985DECLASM(void) ASMInvalidateInternalCaches(void);
4986#else
4987DECLINLINE(void) ASMInvalidateInternalCaches(void)
4988{
4989# if RT_INLINE_ASM_GNU_STYLE
4990 __asm__ __volatile__("invd");
4991# else
4992 __asm
4993 {
4994 invd
4995 }
4996# endif
4997}
4998#endif
4999
5000
5001#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5002# if PAGE_SIZE != 0x1000
5003# error "PAGE_SIZE is not 0x1000!"
5004# endif
5005#endif
5006
5007/**
5008 * Zeros a 4K memory page.
5009 *
5010 * @param pv Pointer to the memory block. This must be page aligned.
5011 */
5012#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5013DECLASM(void) ASMMemZeroPage(volatile void *pv);
5014# else
5015DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
5016{
5017# if RT_INLINE_ASM_USES_INTRIN
5018# ifdef RT_ARCH_AMD64
5019 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
5020# else
5021 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
5022# endif
5023
5024# elif RT_INLINE_ASM_GNU_STYLE
5025 RTCCUINTREG uDummy;
5026# ifdef RT_ARCH_AMD64
5027 __asm__ __volatile__("rep stosq"
5028 : "=D" (pv),
5029 "=c" (uDummy)
5030 : "0" (pv),
5031 "c" (0x1000 >> 3),
5032 "a" (0)
5033 : "memory");
5034# else
5035 __asm__ __volatile__("rep stosl"
5036 : "=D" (pv),
5037 "=c" (uDummy)
5038 : "0" (pv),
5039 "c" (0x1000 >> 2),
5040 "a" (0)
5041 : "memory");
5042# endif
5043# else
5044 __asm
5045 {
5046# ifdef RT_ARCH_AMD64
5047 xor rax, rax
5048 mov ecx, 0200h
5049 mov rdi, [pv]
5050 rep stosq
5051# else
5052 xor eax, eax
5053 mov ecx, 0400h
5054 mov edi, [pv]
5055 rep stosd
5056# endif
5057 }
5058# endif
5059}
5060# endif
5061
5062
5063/**
5064 * Zeros a memory block with a 32-bit aligned size.
5065 *
5066 * @param pv Pointer to the memory block.
5067 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5068 */
5069#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5070DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
5071#else
5072DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
5073{
5074# if RT_INLINE_ASM_USES_INTRIN
5075# ifdef RT_ARCH_AMD64
5076 if (!(cb & 7))
5077 __stosq((unsigned __int64 *)pv, 0, cb / 8);
5078 else
5079# endif
5080 __stosd((unsigned long *)pv, 0, cb / 4);
5081
5082# elif RT_INLINE_ASM_GNU_STYLE
5083 __asm__ __volatile__("rep stosl"
5084 : "=D" (pv),
5085 "=c" (cb)
5086 : "0" (pv),
5087 "1" (cb >> 2),
5088 "a" (0)
5089 : "memory");
5090# else
5091 __asm
5092 {
5093 xor eax, eax
5094# ifdef RT_ARCH_AMD64
5095 mov rcx, [cb]
5096 shr rcx, 2
5097 mov rdi, [pv]
5098# else
5099 mov ecx, [cb]
5100 shr ecx, 2
5101 mov edi, [pv]
5102# endif
5103 rep stosd
5104 }
5105# endif
5106}
5107#endif
5108
5109
5110/**
5111 * Fills a memory block with a 32-bit aligned size.
5112 *
5113 * @param pv Pointer to the memory block.
5114 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5115 * @param u32 The value to fill with.
5116 */
5117#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5118DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
5119#else
5120DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
5121{
5122# if RT_INLINE_ASM_USES_INTRIN
5123# ifdef RT_ARCH_AMD64
5124 if (!(cb & 7))
5125 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5126 else
5127# endif
5128 __stosd((unsigned long *)pv, u32, cb / 4);
5129
5130# elif RT_INLINE_ASM_GNU_STYLE
5131 __asm__ __volatile__("rep stosl"
5132 : "=D" (pv),
5133 "=c" (cb)
5134 : "0" (pv),
5135 "1" (cb >> 2),
5136 "a" (u32)
5137 : "memory");
5138# else
5139 __asm
5140 {
5141# ifdef RT_ARCH_AMD64
5142 mov rcx, [cb]
5143 shr rcx, 2
5144 mov rdi, [pv]
5145# else
5146 mov ecx, [cb]
5147 shr ecx, 2
5148 mov edi, [pv]
5149# endif
5150 mov eax, [u32]
5151 rep stosd
5152 }
5153# endif
5154}
5155#endif
5156
5157
5158/**
5159 * Checks if a memory page is all zeros.
5160 *
5161 * @returns true / false.
5162 *
5163 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5164 * boundrary
5165 */
5166DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
5167{
5168# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5169 union { RTCCUINTREG r; bool f; } uAX;
5170 RTCCUINTREG xCX, xDI;
5171 Assert(!((uintptr_t)pvPage & 15));
5172 __asm__ __volatile__("repe; "
5173# ifdef RT_ARCH_AMD64
5174 "scasq\n\t"
5175# else
5176 "scasl\n\t"
5177# endif
5178 "setnc %%al\n\t"
5179 : "=&c" (xCX),
5180 "=&D" (xDI),
5181 "=&a" (uAX.r)
5182 : "mr" (pvPage),
5183# ifdef RT_ARCH_AMD64
5184 "0" (0x1000/8),
5185# else
5186 "0" (0x1000/4),
5187# endif
5188 "1" (pvPage),
5189 "2" (0));
5190 return uAX.f;
5191# else
5192 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
5193 int cLeft = 0x1000 / sizeof(uintptr_t) / 8;
5194 Assert(!((uintptr_t)pvPage & 15));
5195 for (;;)
5196 {
5197 if (puPtr[0]) return false;
5198 if (puPtr[4]) return false;
5199
5200 if (puPtr[2]) return false;
5201 if (puPtr[6]) return false;
5202
5203 if (puPtr[1]) return false;
5204 if (puPtr[5]) return false;
5205
5206 if (puPtr[3]) return false;
5207 if (puPtr[7]) return false;
5208
5209 if (!--cLeft)
5210 return true;
5211 puPtr += 8;
5212 }
5213 return true;
5214# endif
5215}
5216
5217
5218/**
5219 * Checks if a memory block is filled with the specified byte.
5220 *
5221 * This is a sort of inverted memchr.
5222 *
5223 * @returns Pointer to the byte which doesn't equal u8.
5224 * @returns NULL if all equal to u8.
5225 *
5226 * @param pv Pointer to the memory block.
5227 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5228 * @param u8 The value it's supposed to be filled with.
5229 *
5230 * @todo Fix name, it is a predicate function but it's not returning boolean!
5231 */
5232#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5233DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
5234#else
5235DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
5236{
5237/** @todo rewrite this in inline assembly? */
5238 uint8_t const *pb = (uint8_t const *)pv;
5239 for (; cb; cb--, pb++)
5240 if (RT_UNLIKELY(*pb != u8))
5241 return (void *)pb;
5242 return NULL;
5243}
5244#endif
5245
5246
5247/**
5248 * Checks if a memory block is filled with the specified 32-bit value.
5249 *
5250 * This is a sort of inverted memchr.
5251 *
5252 * @returns Pointer to the first value which doesn't equal u32.
5253 * @returns NULL if all equal to u32.
5254 *
5255 * @param pv Pointer to the memory block.
5256 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5257 * @param u32 The value it's supposed to be filled with.
5258 *
5259 * @todo Fix name, it is a predicate function but it's not returning boolean!
5260 */
5261#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5262DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
5263#else
5264DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
5265{
5266/** @todo rewrite this in inline assembly? */
5267 uint32_t const *pu32 = (uint32_t const *)pv;
5268 for (; cb; cb -= 4, pu32++)
5269 if (RT_UNLIKELY(*pu32 != u32))
5270 return (uint32_t *)pu32;
5271 return NULL;
5272}
5273#endif
5274
5275
5276/**
5277 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
5278 *
5279 * @returns u32F1 * u32F2.
5280 */
5281#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5282DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
5283#else
5284DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
5285{
5286# ifdef RT_ARCH_AMD64
5287 return (uint64_t)u32F1 * u32F2;
5288# else /* !RT_ARCH_AMD64 */
5289 uint64_t u64;
5290# if RT_INLINE_ASM_GNU_STYLE
5291 __asm__ __volatile__("mull %%edx"
5292 : "=A" (u64)
5293 : "a" (u32F2), "d" (u32F1));
5294# else
5295 __asm
5296 {
5297 mov edx, [u32F1]
5298 mov eax, [u32F2]
5299 mul edx
5300 mov dword ptr [u64], eax
5301 mov dword ptr [u64 + 4], edx
5302 }
5303# endif
5304 return u64;
5305# endif /* !RT_ARCH_AMD64 */
5306}
5307#endif
5308
5309
5310/**
5311 * Multiplies two signed 32-bit values returning a signed 64-bit result.
5312 *
5313 * @returns u32F1 * u32F2.
5314 */
5315#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5316DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
5317#else
5318DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
5319{
5320# ifdef RT_ARCH_AMD64
5321 return (int64_t)i32F1 * i32F2;
5322# else /* !RT_ARCH_AMD64 */
5323 int64_t i64;
5324# if RT_INLINE_ASM_GNU_STYLE
5325 __asm__ __volatile__("imull %%edx"
5326 : "=A" (i64)
5327 : "a" (i32F2), "d" (i32F1));
5328# else
5329 __asm
5330 {
5331 mov edx, [i32F1]
5332 mov eax, [i32F2]
5333 imul edx
5334 mov dword ptr [i64], eax
5335 mov dword ptr [i64 + 4], edx
5336 }
5337# endif
5338 return i64;
5339# endif /* !RT_ARCH_AMD64 */
5340}
5341#endif
5342
5343
5344/**
5345 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5346 *
5347 * @returns u64 / u32.
5348 */
5349#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5350DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5351#else
5352DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5353{
5354# ifdef RT_ARCH_AMD64
5355 return (uint32_t)(u64 / u32);
5356# else /* !RT_ARCH_AMD64 */
5357# if RT_INLINE_ASM_GNU_STYLE
5358 RTCCUINTREG uDummy;
5359 __asm__ __volatile__("divl %3"
5360 : "=a" (u32), "=d"(uDummy)
5361 : "A" (u64), "r" (u32));
5362# else
5363 __asm
5364 {
5365 mov eax, dword ptr [u64]
5366 mov edx, dword ptr [u64 + 4]
5367 mov ecx, [u32]
5368 div ecx
5369 mov [u32], eax
5370 }
5371# endif
5372 return u32;
5373# endif /* !RT_ARCH_AMD64 */
5374}
5375#endif
5376
5377
5378/**
5379 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5380 *
5381 * @returns u64 / u32.
5382 */
5383#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5384DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5385#else
5386DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5387{
5388# ifdef RT_ARCH_AMD64
5389 return (int32_t)(i64 / i32);
5390# else /* !RT_ARCH_AMD64 */
5391# if RT_INLINE_ASM_GNU_STYLE
5392 RTCCUINTREG iDummy;
5393 __asm__ __volatile__("idivl %3"
5394 : "=a" (i32), "=d"(iDummy)
5395 : "A" (i64), "r" (i32));
5396# else
5397 __asm
5398 {
5399 mov eax, dword ptr [i64]
5400 mov edx, dword ptr [i64 + 4]
5401 mov ecx, [i32]
5402 idiv ecx
5403 mov [i32], eax
5404 }
5405# endif
5406 return i32;
5407# endif /* !RT_ARCH_AMD64 */
5408}
5409#endif
5410
5411
5412/**
5413 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5414 * returning the rest.
5415 *
5416 * @returns u64 % u32.
5417 *
5418 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5419 */
5420#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5421DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5422#else
5423DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5424{
5425# ifdef RT_ARCH_AMD64
5426 return (uint32_t)(u64 % u32);
5427# else /* !RT_ARCH_AMD64 */
5428# if RT_INLINE_ASM_GNU_STYLE
5429 RTCCUINTREG uDummy;
5430 __asm__ __volatile__("divl %3"
5431 : "=a" (uDummy), "=d"(u32)
5432 : "A" (u64), "r" (u32));
5433# else
5434 __asm
5435 {
5436 mov eax, dword ptr [u64]
5437 mov edx, dword ptr [u64 + 4]
5438 mov ecx, [u32]
5439 div ecx
5440 mov [u32], edx
5441 }
5442# endif
5443 return u32;
5444# endif /* !RT_ARCH_AMD64 */
5445}
5446#endif
5447
5448
5449/**
5450 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5451 * returning the rest.
5452 *
5453 * @returns u64 % u32.
5454 *
5455 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5456 */
5457#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5458DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5459#else
5460DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5461{
5462# ifdef RT_ARCH_AMD64
5463 return (int32_t)(i64 % i32);
5464# else /* !RT_ARCH_AMD64 */
5465# if RT_INLINE_ASM_GNU_STYLE
5466 RTCCUINTREG iDummy;
5467 __asm__ __volatile__("idivl %3"
5468 : "=a" (iDummy), "=d"(i32)
5469 : "A" (i64), "r" (i32));
5470# else
5471 __asm
5472 {
5473 mov eax, dword ptr [i64]
5474 mov edx, dword ptr [i64 + 4]
5475 mov ecx, [i32]
5476 idiv ecx
5477 mov [i32], edx
5478 }
5479# endif
5480 return i32;
5481# endif /* !RT_ARCH_AMD64 */
5482}
5483#endif
5484
5485
5486/**
5487 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5488 * using a 96 bit intermediate result.
5489 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5490 * __udivdi3 and __umoddi3 even if this inline function is not used.
5491 *
5492 * @returns (u64A * u32B) / u32C.
5493 * @param u64A The 64-bit value.
5494 * @param u32B The 32-bit value to multiple by A.
5495 * @param u32C The 32-bit value to divide A*B by.
5496 */
5497#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5498DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5499#else
5500DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5501{
5502# if RT_INLINE_ASM_GNU_STYLE
5503# ifdef RT_ARCH_AMD64
5504 uint64_t u64Result, u64Spill;
5505 __asm__ __volatile__("mulq %2\n\t"
5506 "divq %3\n\t"
5507 : "=a" (u64Result),
5508 "=d" (u64Spill)
5509 : "r" ((uint64_t)u32B),
5510 "r" ((uint64_t)u32C),
5511 "0" (u64A),
5512 "1" (0));
5513 return u64Result;
5514# else
5515 uint32_t u32Dummy;
5516 uint64_t u64Result;
5517 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5518 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5519 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5520 eax = u64A.hi */
5521 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5522 edx = u32C */
5523 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5524 edx = u32B */
5525 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5526 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5527 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5528 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5529 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5530 edx = u64Hi % u32C */
5531 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5532 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5533 "divl %%ecx \n\t" /* u64Result.lo */
5534 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5535 : "=A"(u64Result), "=c"(u32Dummy),
5536 "=S"(u32Dummy), "=D"(u32Dummy)
5537 : "a"((uint32_t)u64A),
5538 "S"((uint32_t)(u64A >> 32)),
5539 "c"(u32B),
5540 "D"(u32C));
5541 return u64Result;
5542# endif
5543# else
5544 RTUINT64U u;
5545 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5546 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5547 u64Hi += (u64Lo >> 32);
5548 u.s.Hi = (uint32_t)(u64Hi / u32C);
5549 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5550 return u.u;
5551# endif
5552}
5553#endif
5554
5555
5556/**
5557 * Probes a byte pointer for read access.
5558 *
5559 * While the function will not fault if the byte is not read accessible,
5560 * the idea is to do this in a safe place like before acquiring locks
5561 * and such like.
5562 *
5563 * Also, this functions guarantees that an eager compiler is not going
5564 * to optimize the probing away.
5565 *
5566 * @param pvByte Pointer to the byte.
5567 */
5568#if RT_INLINE_ASM_EXTERNAL
5569DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5570#else
5571DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5572{
5573 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5574 uint8_t u8;
5575# if RT_INLINE_ASM_GNU_STYLE
5576 __asm__ __volatile__("movb (%1), %0\n\t"
5577 : "=r" (u8)
5578 : "r" (pvByte));
5579# else
5580 __asm
5581 {
5582# ifdef RT_ARCH_AMD64
5583 mov rax, [pvByte]
5584 mov al, [rax]
5585# else
5586 mov eax, [pvByte]
5587 mov al, [eax]
5588# endif
5589 mov [u8], al
5590 }
5591# endif
5592 return u8;
5593}
5594#endif
5595
5596/**
5597 * Probes a buffer for read access page by page.
5598 *
5599 * While the function will fault if the buffer is not fully read
5600 * accessible, the idea is to do this in a safe place like before
5601 * acquiring locks and such like.
5602 *
5603 * Also, this functions guarantees that an eager compiler is not going
5604 * to optimize the probing away.
5605 *
5606 * @param pvBuf Pointer to the buffer.
5607 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5608 */
5609DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5610{
5611 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5612 /* the first byte */
5613 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5614 ASMProbeReadByte(pu8);
5615
5616 /* the pages in between pages. */
5617 while (cbBuf > /*PAGE_SIZE*/0x1000)
5618 {
5619 ASMProbeReadByte(pu8);
5620 cbBuf -= /*PAGE_SIZE*/0x1000;
5621 pu8 += /*PAGE_SIZE*/0x1000;
5622 }
5623
5624 /* the last byte */
5625 ASMProbeReadByte(pu8 + cbBuf - 1);
5626}
5627
5628
5629/** @def ASMBreakpoint
5630 * Debugger Breakpoint.
5631 * @remark In the gnu world we add a nop instruction after the int3 to
5632 * force gdb to remain at the int3 source line.
5633 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5634 * @internal
5635 */
5636#if RT_INLINE_ASM_GNU_STYLE
5637# ifndef __L4ENV__
5638# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5639# else
5640# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5641# endif
5642#else
5643# define ASMBreakpoint() __debugbreak()
5644#endif
5645
5646
5647
5648/** @defgroup grp_inline_bits Bit Operations
5649 * @{
5650 */
5651
5652
5653/**
5654 * Sets a bit in a bitmap.
5655 *
5656 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
5657 * @param iBit The bit to set.
5658 *
5659 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5660 * However, doing so will yield better performance as well as avoiding
5661 * traps accessing the last bits in the bitmap.
5662 */
5663#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5664DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5665#else
5666DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5667{
5668# if RT_INLINE_ASM_USES_INTRIN
5669 _bittestandset((long *)pvBitmap, iBit);
5670
5671# elif RT_INLINE_ASM_GNU_STYLE
5672 __asm__ __volatile__("btsl %1, %0"
5673 : "=m" (*(volatile long *)pvBitmap)
5674 : "Ir" (iBit),
5675 "m" (*(volatile long *)pvBitmap)
5676 : "memory");
5677# else
5678 __asm
5679 {
5680# ifdef RT_ARCH_AMD64
5681 mov rax, [pvBitmap]
5682 mov edx, [iBit]
5683 bts [rax], edx
5684# else
5685 mov eax, [pvBitmap]
5686 mov edx, [iBit]
5687 bts [eax], edx
5688# endif
5689 }
5690# endif
5691}
5692#endif
5693
5694
5695/**
5696 * Atomically sets a bit in a bitmap, ordered.
5697 *
5698 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5699 * the memory access isn't atomic!
5700 * @param iBit The bit to set.
5701 */
5702#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5703DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5704#else
5705DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5706{
5707 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5708# if RT_INLINE_ASM_USES_INTRIN
5709 _interlockedbittestandset((long *)pvBitmap, iBit);
5710# elif RT_INLINE_ASM_GNU_STYLE
5711 __asm__ __volatile__("lock; btsl %1, %0"
5712 : "=m" (*(volatile long *)pvBitmap)
5713 : "Ir" (iBit),
5714 "m" (*(volatile long *)pvBitmap)
5715 : "memory");
5716# else
5717 __asm
5718 {
5719# ifdef RT_ARCH_AMD64
5720 mov rax, [pvBitmap]
5721 mov edx, [iBit]
5722 lock bts [rax], edx
5723# else
5724 mov eax, [pvBitmap]
5725 mov edx, [iBit]
5726 lock bts [eax], edx
5727# endif
5728 }
5729# endif
5730}
5731#endif
5732
5733
5734/**
5735 * Clears a bit in a bitmap.
5736 *
5737 * @param pvBitmap Pointer to the bitmap.
5738 * @param iBit The bit to clear.
5739 *
5740 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5741 * However, doing so will yield better performance as well as avoiding
5742 * traps accessing the last bits in the bitmap.
5743 */
5744#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5745DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5746#else
5747DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5748{
5749# if RT_INLINE_ASM_USES_INTRIN
5750 _bittestandreset((long *)pvBitmap, iBit);
5751
5752# elif RT_INLINE_ASM_GNU_STYLE
5753 __asm__ __volatile__("btrl %1, %0"
5754 : "=m" (*(volatile long *)pvBitmap)
5755 : "Ir" (iBit),
5756 "m" (*(volatile long *)pvBitmap)
5757 : "memory");
5758# else
5759 __asm
5760 {
5761# ifdef RT_ARCH_AMD64
5762 mov rax, [pvBitmap]
5763 mov edx, [iBit]
5764 btr [rax], edx
5765# else
5766 mov eax, [pvBitmap]
5767 mov edx, [iBit]
5768 btr [eax], edx
5769# endif
5770 }
5771# endif
5772}
5773#endif
5774
5775
5776/**
5777 * Atomically clears a bit in a bitmap, ordered.
5778 *
5779 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5780 * the memory access isn't atomic!
5781 * @param iBit The bit to toggle set.
5782 * @remarks No memory barrier, take care on smp.
5783 */
5784#if RT_INLINE_ASM_EXTERNAL
5785DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5786#else
5787DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5788{
5789 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5790# if RT_INLINE_ASM_GNU_STYLE
5791 __asm__ __volatile__("lock; btrl %1, %0"
5792 : "=m" (*(volatile long *)pvBitmap)
5793 : "Ir" (iBit),
5794 "m" (*(volatile long *)pvBitmap)
5795 : "memory");
5796# else
5797 __asm
5798 {
5799# ifdef RT_ARCH_AMD64
5800 mov rax, [pvBitmap]
5801 mov edx, [iBit]
5802 lock btr [rax], edx
5803# else
5804 mov eax, [pvBitmap]
5805 mov edx, [iBit]
5806 lock btr [eax], edx
5807# endif
5808 }
5809# endif
5810}
5811#endif
5812
5813
5814/**
5815 * Toggles a bit in a bitmap.
5816 *
5817 * @param pvBitmap Pointer to the bitmap.
5818 * @param iBit The bit to toggle.
5819 *
5820 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5821 * However, doing so will yield better performance as well as avoiding
5822 * traps accessing the last bits in the bitmap.
5823 */
5824#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5825DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5826#else
5827DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5828{
5829# if RT_INLINE_ASM_USES_INTRIN
5830 _bittestandcomplement((long *)pvBitmap, iBit);
5831# elif RT_INLINE_ASM_GNU_STYLE
5832 __asm__ __volatile__("btcl %1, %0"
5833 : "=m" (*(volatile long *)pvBitmap)
5834 : "Ir" (iBit),
5835 "m" (*(volatile long *)pvBitmap)
5836 : "memory");
5837# else
5838 __asm
5839 {
5840# ifdef RT_ARCH_AMD64
5841 mov rax, [pvBitmap]
5842 mov edx, [iBit]
5843 btc [rax], edx
5844# else
5845 mov eax, [pvBitmap]
5846 mov edx, [iBit]
5847 btc [eax], edx
5848# endif
5849 }
5850# endif
5851}
5852#endif
5853
5854
5855/**
5856 * Atomically toggles a bit in a bitmap, ordered.
5857 *
5858 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5859 * the memory access isn't atomic!
5860 * @param iBit The bit to test and set.
5861 */
5862#if RT_INLINE_ASM_EXTERNAL
5863DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5864#else
5865DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5866{
5867 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5868# if RT_INLINE_ASM_GNU_STYLE
5869 __asm__ __volatile__("lock; btcl %1, %0"
5870 : "=m" (*(volatile long *)pvBitmap)
5871 : "Ir" (iBit),
5872 "m" (*(volatile long *)pvBitmap)
5873 : "memory");
5874# else
5875 __asm
5876 {
5877# ifdef RT_ARCH_AMD64
5878 mov rax, [pvBitmap]
5879 mov edx, [iBit]
5880 lock btc [rax], edx
5881# else
5882 mov eax, [pvBitmap]
5883 mov edx, [iBit]
5884 lock btc [eax], edx
5885# endif
5886 }
5887# endif
5888}
5889#endif
5890
5891
5892/**
5893 * Tests and sets a bit in a bitmap.
5894 *
5895 * @returns true if the bit was set.
5896 * @returns false if the bit was clear.
5897 *
5898 * @param pvBitmap Pointer to the bitmap.
5899 * @param iBit The bit to test and set.
5900 *
5901 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5902 * However, doing so will yield better performance as well as avoiding
5903 * traps accessing the last bits in the bitmap.
5904 */
5905#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5906DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5907#else
5908DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5909{
5910 union { bool f; uint32_t u32; uint8_t u8; } rc;
5911# if RT_INLINE_ASM_USES_INTRIN
5912 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5913
5914# elif RT_INLINE_ASM_GNU_STYLE
5915 __asm__ __volatile__("btsl %2, %1\n\t"
5916 "setc %b0\n\t"
5917 "andl $1, %0\n\t"
5918 : "=q" (rc.u32),
5919 "=m" (*(volatile long *)pvBitmap)
5920 : "Ir" (iBit),
5921 "m" (*(volatile long *)pvBitmap)
5922 : "memory");
5923# else
5924 __asm
5925 {
5926 mov edx, [iBit]
5927# ifdef RT_ARCH_AMD64
5928 mov rax, [pvBitmap]
5929 bts [rax], edx
5930# else
5931 mov eax, [pvBitmap]
5932 bts [eax], edx
5933# endif
5934 setc al
5935 and eax, 1
5936 mov [rc.u32], eax
5937 }
5938# endif
5939 return rc.f;
5940}
5941#endif
5942
5943
5944/**
5945 * Atomically tests and sets a bit in a bitmap, ordered.
5946 *
5947 * @returns true if the bit was set.
5948 * @returns false if the bit was clear.
5949 *
5950 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5951 * the memory access isn't atomic!
5952 * @param iBit The bit to set.
5953 */
5954#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5955DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5956#else
5957DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5958{
5959 union { bool f; uint32_t u32; uint8_t u8; } rc;
5960 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5961# if RT_INLINE_ASM_USES_INTRIN
5962 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5963# elif RT_INLINE_ASM_GNU_STYLE
5964 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5965 "setc %b0\n\t"
5966 "andl $1, %0\n\t"
5967 : "=q" (rc.u32),
5968 "=m" (*(volatile long *)pvBitmap)
5969 : "Ir" (iBit),
5970 "m" (*(volatile long *)pvBitmap)
5971 : "memory");
5972# else
5973 __asm
5974 {
5975 mov edx, [iBit]
5976# ifdef RT_ARCH_AMD64
5977 mov rax, [pvBitmap]
5978 lock bts [rax], edx
5979# else
5980 mov eax, [pvBitmap]
5981 lock bts [eax], edx
5982# endif
5983 setc al
5984 and eax, 1
5985 mov [rc.u32], eax
5986 }
5987# endif
5988 return rc.f;
5989}
5990#endif
5991
5992
5993/**
5994 * Tests and clears a bit in a bitmap.
5995 *
5996 * @returns true if the bit was set.
5997 * @returns false if the bit was clear.
5998 *
5999 * @param pvBitmap Pointer to the bitmap.
6000 * @param iBit The bit to test and clear.
6001 *
6002 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6003 * However, doing so will yield better performance as well as avoiding
6004 * traps accessing the last bits in the bitmap.
6005 */
6006#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6007DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
6008#else
6009DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
6010{
6011 union { bool f; uint32_t u32; uint8_t u8; } rc;
6012# if RT_INLINE_ASM_USES_INTRIN
6013 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
6014
6015# elif RT_INLINE_ASM_GNU_STYLE
6016 __asm__ __volatile__("btrl %2, %1\n\t"
6017 "setc %b0\n\t"
6018 "andl $1, %0\n\t"
6019 : "=q" (rc.u32),
6020 "=m" (*(volatile long *)pvBitmap)
6021 : "Ir" (iBit),
6022 "m" (*(volatile long *)pvBitmap)
6023 : "memory");
6024# else
6025 __asm
6026 {
6027 mov edx, [iBit]
6028# ifdef RT_ARCH_AMD64
6029 mov rax, [pvBitmap]
6030 btr [rax], edx
6031# else
6032 mov eax, [pvBitmap]
6033 btr [eax], edx
6034# endif
6035 setc al
6036 and eax, 1
6037 mov [rc.u32], eax
6038 }
6039# endif
6040 return rc.f;
6041}
6042#endif
6043
6044
6045/**
6046 * Atomically tests and clears a bit in a bitmap, ordered.
6047 *
6048 * @returns true if the bit was set.
6049 * @returns false if the bit was clear.
6050 *
6051 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
6052 * the memory access isn't atomic!
6053 * @param iBit The bit to test and clear.
6054 *
6055 * @remarks No memory barrier, take care on smp.
6056 */
6057#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6058DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
6059#else
6060DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
6061{
6062 union { bool f; uint32_t u32; uint8_t u8; } rc;
6063 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6064# if RT_INLINE_ASM_USES_INTRIN
6065 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
6066
6067# elif RT_INLINE_ASM_GNU_STYLE
6068 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6069 "setc %b0\n\t"
6070 "andl $1, %0\n\t"
6071 : "=q" (rc.u32),
6072 "=m" (*(volatile long *)pvBitmap)
6073 : "Ir" (iBit),
6074 "m" (*(volatile long *)pvBitmap)
6075 : "memory");
6076# else
6077 __asm
6078 {
6079 mov edx, [iBit]
6080# ifdef RT_ARCH_AMD64
6081 mov rax, [pvBitmap]
6082 lock btr [rax], edx
6083# else
6084 mov eax, [pvBitmap]
6085 lock btr [eax], edx
6086# endif
6087 setc al
6088 and eax, 1
6089 mov [rc.u32], eax
6090 }
6091# endif
6092 return rc.f;
6093}
6094#endif
6095
6096
6097/**
6098 * Tests and toggles a bit in a bitmap.
6099 *
6100 * @returns true if the bit was set.
6101 * @returns false if the bit was clear.
6102 *
6103 * @param pvBitmap Pointer to the bitmap.
6104 * @param iBit The bit to test and toggle.
6105 *
6106 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6107 * However, doing so will yield better performance as well as avoiding
6108 * traps accessing the last bits in the bitmap.
6109 */
6110#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6111DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6112#else
6113DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6114{
6115 union { bool f; uint32_t u32; uint8_t u8; } rc;
6116# if RT_INLINE_ASM_USES_INTRIN
6117 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
6118
6119# elif RT_INLINE_ASM_GNU_STYLE
6120 __asm__ __volatile__("btcl %2, %1\n\t"
6121 "setc %b0\n\t"
6122 "andl $1, %0\n\t"
6123 : "=q" (rc.u32),
6124 "=m" (*(volatile long *)pvBitmap)
6125 : "Ir" (iBit),
6126 "m" (*(volatile long *)pvBitmap)
6127 : "memory");
6128# else
6129 __asm
6130 {
6131 mov edx, [iBit]
6132# ifdef RT_ARCH_AMD64
6133 mov rax, [pvBitmap]
6134 btc [rax], edx
6135# else
6136 mov eax, [pvBitmap]
6137 btc [eax], edx
6138# endif
6139 setc al
6140 and eax, 1
6141 mov [rc.u32], eax
6142 }
6143# endif
6144 return rc.f;
6145}
6146#endif
6147
6148
6149/**
6150 * Atomically tests and toggles a bit in a bitmap, ordered.
6151 *
6152 * @returns true if the bit was set.
6153 * @returns false if the bit was clear.
6154 *
6155 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
6156 * the memory access isn't atomic!
6157 * @param iBit The bit to test and toggle.
6158 */
6159#if RT_INLINE_ASM_EXTERNAL
6160DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6161#else
6162DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6163{
6164 union { bool f; uint32_t u32; uint8_t u8; } rc;
6165 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6166# if RT_INLINE_ASM_GNU_STYLE
6167 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6168 "setc %b0\n\t"
6169 "andl $1, %0\n\t"
6170 : "=q" (rc.u32),
6171 "=m" (*(volatile long *)pvBitmap)
6172 : "Ir" (iBit),
6173 "m" (*(volatile long *)pvBitmap)
6174 : "memory");
6175# else
6176 __asm
6177 {
6178 mov edx, [iBit]
6179# ifdef RT_ARCH_AMD64
6180 mov rax, [pvBitmap]
6181 lock btc [rax], edx
6182# else
6183 mov eax, [pvBitmap]
6184 lock btc [eax], edx
6185# endif
6186 setc al
6187 and eax, 1
6188 mov [rc.u32], eax
6189 }
6190# endif
6191 return rc.f;
6192}
6193#endif
6194
6195
6196/**
6197 * Tests if a bit in a bitmap is set.
6198 *
6199 * @returns true if the bit is set.
6200 * @returns false if the bit is clear.
6201 *
6202 * @param pvBitmap Pointer to the bitmap.
6203 * @param iBit The bit to test.
6204 *
6205 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6206 * However, doing so will yield better performance as well as avoiding
6207 * traps accessing the last bits in the bitmap.
6208 */
6209#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6210DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
6211#else
6212DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
6213{
6214 union { bool f; uint32_t u32; uint8_t u8; } rc;
6215# if RT_INLINE_ASM_USES_INTRIN
6216 rc.u32 = _bittest((long *)pvBitmap, iBit);
6217# elif RT_INLINE_ASM_GNU_STYLE
6218
6219 __asm__ __volatile__("btl %2, %1\n\t"
6220 "setc %b0\n\t"
6221 "andl $1, %0\n\t"
6222 : "=q" (rc.u32)
6223 : "m" (*(const volatile long *)pvBitmap),
6224 "Ir" (iBit)
6225 : "memory");
6226# else
6227 __asm
6228 {
6229 mov edx, [iBit]
6230# ifdef RT_ARCH_AMD64
6231 mov rax, [pvBitmap]
6232 bt [rax], edx
6233# else
6234 mov eax, [pvBitmap]
6235 bt [eax], edx
6236# endif
6237 setc al
6238 and eax, 1
6239 mov [rc.u32], eax
6240 }
6241# endif
6242 return rc.f;
6243}
6244#endif
6245
6246
6247/**
6248 * Clears a bit range within a bitmap.
6249 *
6250 * @param pvBitmap Pointer to the bitmap.
6251 * @param iBitStart The First bit to clear.
6252 * @param iBitEnd The first bit not to clear.
6253 */
6254DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6255{
6256 if (iBitStart < iBitEnd)
6257 {
6258 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6259 int iStart = iBitStart & ~31;
6260 int iEnd = iBitEnd & ~31;
6261 if (iStart == iEnd)
6262 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
6263 else
6264 {
6265 /* bits in first dword. */
6266 if (iBitStart & 31)
6267 {
6268 *pu32 &= (1 << (iBitStart & 31)) - 1;
6269 pu32++;
6270 iBitStart = iStart + 32;
6271 }
6272
6273 /* whole dword. */
6274 if (iBitStart != iEnd)
6275 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6276
6277 /* bits in last dword. */
6278 if (iBitEnd & 31)
6279 {
6280 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6281 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
6282 }
6283 }
6284 }
6285}
6286
6287
6288/**
6289 * Sets a bit range within a bitmap.
6290 *
6291 * @param pvBitmap Pointer to the bitmap.
6292 * @param iBitStart The First bit to set.
6293 * @param iBitEnd The first bit not to set.
6294 */
6295DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6296{
6297 if (iBitStart < iBitEnd)
6298 {
6299 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6300 int iStart = iBitStart & ~31;
6301 int iEnd = iBitEnd & ~31;
6302 if (iStart == iEnd)
6303 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
6304 else
6305 {
6306 /* bits in first dword. */
6307 if (iBitStart & 31)
6308 {
6309 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
6310 pu32++;
6311 iBitStart = iStart + 32;
6312 }
6313
6314 /* whole dword. */
6315 if (iBitStart != iEnd)
6316 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
6317
6318 /* bits in last dword. */
6319 if (iBitEnd & 31)
6320 {
6321 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6322 *pu32 |= (1 << (iBitEnd & 31)) - 1;
6323 }
6324 }
6325 }
6326}
6327
6328
6329/**
6330 * Finds the first clear bit in a bitmap.
6331 *
6332 * @returns Index of the first zero bit.
6333 * @returns -1 if no clear bit was found.
6334 * @param pvBitmap Pointer to the bitmap.
6335 * @param cBits The number of bits in the bitmap. Multiple of 32.
6336 */
6337#if RT_INLINE_ASM_EXTERNAL
6338DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
6339#else
6340DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
6341{
6342 if (cBits)
6343 {
6344 int32_t iBit;
6345# if RT_INLINE_ASM_GNU_STYLE
6346 RTCCUINTREG uEAX, uECX, uEDI;
6347 cBits = RT_ALIGN_32(cBits, 32);
6348 __asm__ __volatile__("repe; scasl\n\t"
6349 "je 1f\n\t"
6350# ifdef RT_ARCH_AMD64
6351 "lea -4(%%rdi), %%rdi\n\t"
6352 "xorl (%%rdi), %%eax\n\t"
6353 "subq %5, %%rdi\n\t"
6354# else
6355 "lea -4(%%edi), %%edi\n\t"
6356 "xorl (%%edi), %%eax\n\t"
6357 "subl %5, %%edi\n\t"
6358# endif
6359 "shll $3, %%edi\n\t"
6360 "bsfl %%eax, %%edx\n\t"
6361 "addl %%edi, %%edx\n\t"
6362 "1:\t\n"
6363 : "=d" (iBit),
6364 "=&c" (uECX),
6365 "=&D" (uEDI),
6366 "=&a" (uEAX)
6367 : "0" (0xffffffff),
6368 "mr" (pvBitmap),
6369 "1" (cBits >> 5),
6370 "2" (pvBitmap),
6371 "3" (0xffffffff));
6372# else
6373 cBits = RT_ALIGN_32(cBits, 32);
6374 __asm
6375 {
6376# ifdef RT_ARCH_AMD64
6377 mov rdi, [pvBitmap]
6378 mov rbx, rdi
6379# else
6380 mov edi, [pvBitmap]
6381 mov ebx, edi
6382# endif
6383 mov edx, 0ffffffffh
6384 mov eax, edx
6385 mov ecx, [cBits]
6386 shr ecx, 5
6387 repe scasd
6388 je done
6389
6390# ifdef RT_ARCH_AMD64
6391 lea rdi, [rdi - 4]
6392 xor eax, [rdi]
6393 sub rdi, rbx
6394# else
6395 lea edi, [edi - 4]
6396 xor eax, [edi]
6397 sub edi, ebx
6398# endif
6399 shl edi, 3
6400 bsf edx, eax
6401 add edx, edi
6402 done:
6403 mov [iBit], edx
6404 }
6405# endif
6406 return iBit;
6407 }
6408 return -1;
6409}
6410#endif
6411
6412
6413/**
6414 * Finds the next clear bit in a bitmap.
6415 *
6416 * @returns Index of the first zero bit.
6417 * @returns -1 if no clear bit was found.
6418 * @param pvBitmap Pointer to the bitmap.
6419 * @param cBits The number of bits in the bitmap. Multiple of 32.
6420 * @param iBitPrev The bit returned from the last search.
6421 * The search will start at iBitPrev + 1.
6422 */
6423#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6424DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6425#else
6426DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6427{
6428 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6429 int iBit = ++iBitPrev & 31;
6430 if (iBit)
6431 {
6432 /*
6433 * Inspect the 32-bit word containing the unaligned bit.
6434 */
6435 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6436
6437# if RT_INLINE_ASM_USES_INTRIN
6438 unsigned long ulBit = 0;
6439 if (_BitScanForward(&ulBit, u32))
6440 return ulBit + iBitPrev;
6441# else
6442# if RT_INLINE_ASM_GNU_STYLE
6443 __asm__ __volatile__("bsf %1, %0\n\t"
6444 "jnz 1f\n\t"
6445 "movl $-1, %0\n\t"
6446 "1:\n\t"
6447 : "=r" (iBit)
6448 : "r" (u32));
6449# else
6450 __asm
6451 {
6452 mov edx, [u32]
6453 bsf eax, edx
6454 jnz done
6455 mov eax, 0ffffffffh
6456 done:
6457 mov [iBit], eax
6458 }
6459# endif
6460 if (iBit >= 0)
6461 return iBit + iBitPrev;
6462# endif
6463
6464 /*
6465 * Skip ahead and see if there is anything left to search.
6466 */
6467 iBitPrev |= 31;
6468 iBitPrev++;
6469 if (cBits <= (uint32_t)iBitPrev)
6470 return -1;
6471 }
6472
6473 /*
6474 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6475 */
6476 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6477 if (iBit >= 0)
6478 iBit += iBitPrev;
6479 return iBit;
6480}
6481#endif
6482
6483
6484/**
6485 * Finds the first set bit in a bitmap.
6486 *
6487 * @returns Index of the first set bit.
6488 * @returns -1 if no clear bit was found.
6489 * @param pvBitmap Pointer to the bitmap.
6490 * @param cBits The number of bits in the bitmap. Multiple of 32.
6491 */
6492#if RT_INLINE_ASM_EXTERNAL
6493DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6494#else
6495DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6496{
6497 if (cBits)
6498 {
6499 int32_t iBit;
6500# if RT_INLINE_ASM_GNU_STYLE
6501 RTCCUINTREG uEAX, uECX, uEDI;
6502 cBits = RT_ALIGN_32(cBits, 32);
6503 __asm__ __volatile__("repe; scasl\n\t"
6504 "je 1f\n\t"
6505# ifdef RT_ARCH_AMD64
6506 "lea -4(%%rdi), %%rdi\n\t"
6507 "movl (%%rdi), %%eax\n\t"
6508 "subq %5, %%rdi\n\t"
6509# else
6510 "lea -4(%%edi), %%edi\n\t"
6511 "movl (%%edi), %%eax\n\t"
6512 "subl %5, %%edi\n\t"
6513# endif
6514 "shll $3, %%edi\n\t"
6515 "bsfl %%eax, %%edx\n\t"
6516 "addl %%edi, %%edx\n\t"
6517 "1:\t\n"
6518 : "=d" (iBit),
6519 "=&c" (uECX),
6520 "=&D" (uEDI),
6521 "=&a" (uEAX)
6522 : "0" (0xffffffff),
6523 "mr" (pvBitmap),
6524 "1" (cBits >> 5),
6525 "2" (pvBitmap),
6526 "3" (0));
6527# else
6528 cBits = RT_ALIGN_32(cBits, 32);
6529 __asm
6530 {
6531# ifdef RT_ARCH_AMD64
6532 mov rdi, [pvBitmap]
6533 mov rbx, rdi
6534# else
6535 mov edi, [pvBitmap]
6536 mov ebx, edi
6537# endif
6538 mov edx, 0ffffffffh
6539 xor eax, eax
6540 mov ecx, [cBits]
6541 shr ecx, 5
6542 repe scasd
6543 je done
6544# ifdef RT_ARCH_AMD64
6545 lea rdi, [rdi - 4]
6546 mov eax, [rdi]
6547 sub rdi, rbx
6548# else
6549 lea edi, [edi - 4]
6550 mov eax, [edi]
6551 sub edi, ebx
6552# endif
6553 shl edi, 3
6554 bsf edx, eax
6555 add edx, edi
6556 done:
6557 mov [iBit], edx
6558 }
6559# endif
6560 return iBit;
6561 }
6562 return -1;
6563}
6564#endif
6565
6566
6567/**
6568 * Finds the next set bit in a bitmap.
6569 *
6570 * @returns Index of the next set bit.
6571 * @returns -1 if no set bit was found.
6572 * @param pvBitmap Pointer to the bitmap.
6573 * @param cBits The number of bits in the bitmap. Multiple of 32.
6574 * @param iBitPrev The bit returned from the last search.
6575 * The search will start at iBitPrev + 1.
6576 */
6577#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6578DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6579#else
6580DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6581{
6582 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6583 int iBit = ++iBitPrev & 31;
6584 if (iBit)
6585 {
6586 /*
6587 * Inspect the 32-bit word containing the unaligned bit.
6588 */
6589 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6590
6591# if RT_INLINE_ASM_USES_INTRIN
6592 unsigned long ulBit = 0;
6593 if (_BitScanForward(&ulBit, u32))
6594 return ulBit + iBitPrev;
6595# else
6596# if RT_INLINE_ASM_GNU_STYLE
6597 __asm__ __volatile__("bsf %1, %0\n\t"
6598 "jnz 1f\n\t"
6599 "movl $-1, %0\n\t"
6600 "1:\n\t"
6601 : "=r" (iBit)
6602 : "r" (u32));
6603# else
6604 __asm
6605 {
6606 mov edx, [u32]
6607 bsf eax, edx
6608 jnz done
6609 mov eax, 0ffffffffh
6610 done:
6611 mov [iBit], eax
6612 }
6613# endif
6614 if (iBit >= 0)
6615 return iBit + iBitPrev;
6616# endif
6617
6618 /*
6619 * Skip ahead and see if there is anything left to search.
6620 */
6621 iBitPrev |= 31;
6622 iBitPrev++;
6623 if (cBits <= (uint32_t)iBitPrev)
6624 return -1;
6625 }
6626
6627 /*
6628 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6629 */
6630 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6631 if (iBit >= 0)
6632 iBit += iBitPrev;
6633 return iBit;
6634}
6635#endif
6636
6637
6638/**
6639 * Finds the first bit which is set in the given 32-bit integer.
6640 * Bits are numbered from 1 (least significant) to 32.
6641 *
6642 * @returns index [1..32] of the first set bit.
6643 * @returns 0 if all bits are cleared.
6644 * @param u32 Integer to search for set bits.
6645 * @remark Similar to ffs() in BSD.
6646 */
6647DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6648{
6649# if RT_INLINE_ASM_USES_INTRIN
6650 unsigned long iBit;
6651 if (_BitScanForward(&iBit, u32))
6652 iBit++;
6653 else
6654 iBit = 0;
6655# elif RT_INLINE_ASM_GNU_STYLE
6656 uint32_t iBit;
6657 __asm__ __volatile__("bsf %1, %0\n\t"
6658 "jnz 1f\n\t"
6659 "xorl %0, %0\n\t"
6660 "jmp 2f\n"
6661 "1:\n\t"
6662 "incl %0\n"
6663 "2:\n\t"
6664 : "=r" (iBit)
6665 : "rm" (u32));
6666# else
6667 uint32_t iBit;
6668 _asm
6669 {
6670 bsf eax, [u32]
6671 jnz found
6672 xor eax, eax
6673 jmp done
6674 found:
6675 inc eax
6676 done:
6677 mov [iBit], eax
6678 }
6679# endif
6680 return iBit;
6681}
6682
6683
6684/**
6685 * Finds the first bit which is set in the given 32-bit integer.
6686 * Bits are numbered from 1 (least significant) to 32.
6687 *
6688 * @returns index [1..32] of the first set bit.
6689 * @returns 0 if all bits are cleared.
6690 * @param i32 Integer to search for set bits.
6691 * @remark Similar to ffs() in BSD.
6692 */
6693DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6694{
6695 return ASMBitFirstSetU32((uint32_t)i32);
6696}
6697
6698
6699/**
6700 * Finds the last bit which is set in the given 32-bit integer.
6701 * Bits are numbered from 1 (least significant) to 32.
6702 *
6703 * @returns index [1..32] of the last set bit.
6704 * @returns 0 if all bits are cleared.
6705 * @param u32 Integer to search for set bits.
6706 * @remark Similar to fls() in BSD.
6707 */
6708DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6709{
6710# if RT_INLINE_ASM_USES_INTRIN
6711 unsigned long iBit;
6712 if (_BitScanReverse(&iBit, u32))
6713 iBit++;
6714 else
6715 iBit = 0;
6716# elif RT_INLINE_ASM_GNU_STYLE
6717 uint32_t iBit;
6718 __asm__ __volatile__("bsrl %1, %0\n\t"
6719 "jnz 1f\n\t"
6720 "xorl %0, %0\n\t"
6721 "jmp 2f\n"
6722 "1:\n\t"
6723 "incl %0\n"
6724 "2:\n\t"
6725 : "=r" (iBit)
6726 : "rm" (u32));
6727# else
6728 uint32_t iBit;
6729 _asm
6730 {
6731 bsr eax, [u32]
6732 jnz found
6733 xor eax, eax
6734 jmp done
6735 found:
6736 inc eax
6737 done:
6738 mov [iBit], eax
6739 }
6740# endif
6741 return iBit;
6742}
6743
6744
6745/**
6746 * Finds the last bit which is set in the given 32-bit integer.
6747 * Bits are numbered from 1 (least significant) to 32.
6748 *
6749 * @returns index [1..32] of the last set bit.
6750 * @returns 0 if all bits are cleared.
6751 * @param i32 Integer to search for set bits.
6752 * @remark Similar to fls() in BSD.
6753 */
6754DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6755{
6756 return ASMBitLastSetU32((uint32_t)i32);
6757}
6758
6759/**
6760 * Reverse the byte order of the given 16-bit integer.
6761 *
6762 * @returns Revert
6763 * @param u16 16-bit integer value.
6764 */
6765DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6766{
6767#if RT_INLINE_ASM_USES_INTRIN
6768 u16 = _byteswap_ushort(u16);
6769#elif RT_INLINE_ASM_GNU_STYLE
6770 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6771#else
6772 _asm
6773 {
6774 mov ax, [u16]
6775 ror ax, 8
6776 mov [u16], ax
6777 }
6778#endif
6779 return u16;
6780}
6781
6782/**
6783 * Reverse the byte order of the given 32-bit integer.
6784 *
6785 * @returns Revert
6786 * @param u32 32-bit integer value.
6787 */
6788DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6789{
6790#if RT_INLINE_ASM_USES_INTRIN
6791 u32 = _byteswap_ulong(u32);
6792#elif RT_INLINE_ASM_GNU_STYLE
6793 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6794#else
6795 _asm
6796 {
6797 mov eax, [u32]
6798 bswap eax
6799 mov [u32], eax
6800 }
6801#endif
6802 return u32;
6803}
6804
6805
6806/**
6807 * Reverse the byte order of the given 64-bit integer.
6808 *
6809 * @returns Revert
6810 * @param u64 64-bit integer value.
6811 */
6812DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6813{
6814#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6815 u64 = _byteswap_uint64(u64);
6816#else
6817 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6818 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6819#endif
6820 return u64;
6821}
6822
6823
6824/** @} */
6825
6826
6827/** @} */
6828#endif
6829
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette