VirtualBox

Changeset 29245 in vbox


Ignore:
Timestamp:
May 9, 2010 3:06:56 PM (15 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
61327
Message:

include/iprt/asm.h: split out the bits which are obviously x86 and/or AMD64 specific.

Location:
trunk/include/iprt
Files:
1 edited
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/include/iprt/asm-amd64-x86.h

    r29234 r29245  
    11/** @file
    2  * IPRT - Assembly Functions.
     2 * IPRT - AMD64 and x86 Specific Assembly Functions.
    33 */
    44
    55/*
    6  * Copyright (C) 2006-2007 Oracle Corporation
     6 * Copyright (C) 2006-2010 Oracle Corporation
    77 *
    88 * This file is part of VirtualBox Open Source Edition (OSE), as
     
    2424 */
    2525
    26 #ifndef ___iprt_asm_h
    27 #define ___iprt_asm_h
    28 
    29 #include <iprt/cdefs.h>
    30 #include <iprt/types.h>
    31 #include <iprt/assert.h>
    32 /** @todo @code #include <iprt/param.h> @endcode for PAGE_SIZE. */
    33 /** @def RT_INLINE_ASM_USES_INTRIN
    34  * Defined as 1 if we're using a _MSC_VER 1400.
    35  * Otherwise defined as 0.
    36  */
    37 
    38 /* Solaris 10 header ugliness */
    39 #ifdef u
    40 #undef u
    41 #endif
     26#ifndef ___iprt_asm_amd64_x86_h
     27#define ___iprt_asm_amd64_x86_h
     28
     29/* We depend on several defines and pragmas that live in iprt/asm.h. */
     30#include <iprt/asm.h>
    4231
    4332#ifdef _MSC_VER
    44 # if _MSC_VER >= 1400
    45 #  define RT_INLINE_ASM_USES_INTRIN 1
    46 #  include <intrin.h>
     33# if _MSC_VER >= 1400 && RT_INLINE_ASM_USES_INTRIN
    4734   /* Emit the intrinsics at all optimization levels. */
    4835#  pragma intrinsic(_ReadWriteBarrier)
     
    6754#  pragma intrinsic(__invlpg)
    6855#  pragma intrinsic(__wbinvd)
    69 #  pragma intrinsic(__stosd)
    70 #  pragma intrinsic(__stosw)
    71 #  pragma intrinsic(__stosb)
    7256#  pragma intrinsic(__readcr0)
    7357#  pragma intrinsic(__readcr2)
     
    7963#  pragma intrinsic(__readdr)
    8064#  pragma intrinsic(__writedr)
    81 #  pragma intrinsic(_BitScanForward)
    82 #  pragma intrinsic(_BitScanReverse)
    83 #  pragma intrinsic(_bittest)
    84 #  pragma intrinsic(_bittestandset)
    85 #  pragma intrinsic(_bittestandreset)
    86 #  pragma intrinsic(_bittestandcomplement)
    87 #  pragma intrinsic(_byteswap_ushort)
    88 #  pragma intrinsic(_byteswap_ulong)
    89 #  pragma intrinsic(_interlockedbittestandset)
    90 #  pragma intrinsic(_interlockedbittestandreset)
    91 #  pragma intrinsic(_InterlockedAnd)
    92 #  pragma intrinsic(_InterlockedOr)
    93 #  pragma intrinsic(_InterlockedIncrement)
    94 #  pragma intrinsic(_InterlockedDecrement)
    95 #  pragma intrinsic(_InterlockedExchange)
    96 #  pragma intrinsic(_InterlockedExchangeAdd)
    97 #  pragma intrinsic(_InterlockedCompareExchange)
    98 #  pragma intrinsic(_InterlockedCompareExchange64)
    99 #  ifdef RT_ARCH_AMD64
    100 #   pragma intrinsic(_mm_mfence)
    101 #   pragma intrinsic(_mm_sfence)
    102 #   pragma intrinsic(_mm_lfence)
    103 #   pragma intrinsic(__stosq)
     65#  ifdef RT_ARCH_AMD64
    10466#   pragma intrinsic(__readcr8)
    10567#   pragma intrinsic(__writecr8)
    106 #   pragma intrinsic(_byteswap_uint64)
    107 #   pragma intrinsic(_InterlockedExchange64)
    108 #  endif
    109 # endif
    110 #endif
    111 #ifndef RT_INLINE_ASM_USES_INTRIN
    112 # define RT_INLINE_ASM_USES_INTRIN 0
    113 #endif
    114 
    115 
    116 /** @defgroup grp_asm       ASM - Assembly Routines
    117  * @ingroup grp_rt
    118  *
    119  * @remarks The difference between ordered and unordered atomic operations are that
    120  *          the former will complete outstanding reads and writes before continuing
    121  *          while the latter doesn't make any promisses about the order. Ordered
    122  *          operations doesn't, it seems, make any 100% promise wrt to whether
    123  *          the operation will complete before any subsequent memory access.
    124  *          (please, correct if wrong.)
    125  *
    126  *          ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
    127  *          are unordered (note the Uo).
    128  *
    129  * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
    130  *          or even optimize assembler instructions away. For instance, in the following code
    131  *          the second rdmsr instruction is optimized away because gcc treats that instruction
    132  *          as deterministic:
    133  *
    134  *            @code
    135  *            static inline uint64_t rdmsr_low(int idx)
    136  *            {
    137  *              uint32_t low;
    138  *              __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
    139  *            }
    140  *            ...
    141  *            uint32_t msr1 = rdmsr_low(1);
    142  *            foo(msr1);
    143  *            msr1 = rdmsr_low(1);
    144  *            bar(msr1);
    145  *            @endcode
    146  *
    147  *          The input parameter of rdmsr_low is the same for both calls and therefore gcc will
    148  *          use the result of the first call as input parameter for bar() as well. For rdmsr this
    149  *          is not acceptable as this instruction is _not_ deterministic. This applies to reading
    150  *          machine status information in general.
    151  *
     68#  endif
     69# endif
     70#endif
     71
     72
     73
     74/** @defgroup grp_rt_asm_amd64_x86  AMD64 and x86 Specific ASM Routines
     75 * @ingroup grp_rt_asm
    15276 * @{
    15377 */
    154 
    155 /** @def RT_INLINE_ASM_GCC_4_3_X_X86
    156  * Used to work around some 4.3.x register allocation issues in this version of
    157  * the compiler. So far this workaround is still required for 4.4 and 4.5. */
    158 #ifdef __GNUC__
    159 # define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
    160 #endif
    161 #ifndef RT_INLINE_ASM_GCC_4_3_X_X86
    162 # define RT_INLINE_ASM_GCC_4_3_X_X86 0
    163 #endif
    164 
    165 /** @def RT_INLINE_DONT_USE_CMPXCHG8B
    166  * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
    167  * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
    168  * mode, x86.
    169  *
    170  * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
    171  * when in PIC mode on x86.
    172  */
    173 #ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
    174 # define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
    175     (   (defined(PIC) || defined(__PIC__)) \
    176      && defined(RT_ARCH_X86) \
    177      && (   RT_INLINE_ASM_GCC_4_3_X_X86 \
    178          || defined(RT_OS_DARWIN)) )
    179 #endif
    180 
    181 /** @def RT_INLINE_ASM_EXTERNAL
    182  * Defined as 1 if the compiler does not support inline assembly.
    183  * The ASM* functions will then be implemented in an external .asm file.
    184  *
    185  * @remark  At the present time it's unconfirmed whether or not Microsoft skipped
    186  *          inline assembly in their AMD64 compiler.
    187  */
    188 #if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
    189 # define RT_INLINE_ASM_EXTERNAL 1
    190 #else
    191 # define RT_INLINE_ASM_EXTERNAL 0
    192 #endif
    193 
    194 /** @def RT_INLINE_ASM_GNU_STYLE
    195  * Defined as 1 if the compiler understands GNU style inline assembly.
    196  */
    197 #if defined(_MSC_VER)
    198 # define RT_INLINE_ASM_GNU_STYLE 0
    199 #else
    200 # define RT_INLINE_ASM_GNU_STYLE 1
    201 #endif
    202 
    20378
    20479/** @todo find a more proper place for this structure? */
     
    22499} RTGDTR, *PRTGDTR;
    225100#pragma pack()
    226 
    227 
    228 /** @def ASMReturnAddress
    229  * Gets the return address of the current (or calling if you like) function or method.
    230  */
    231 #ifdef _MSC_VER
    232 # ifdef __cplusplus
    233 extern "C"
    234 # endif
    235 void * _ReturnAddress(void);
    236 # pragma intrinsic(_ReturnAddress)
    237 # define ASMReturnAddress() _ReturnAddress()
    238 #elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
    239 # define ASMReturnAddress() __builtin_return_address(0)
    240 #else
    241 # error "Unsupported compiler."
    242 #endif
    243101
    244102
     
    22042062
    22052063/**
    2206  * Compiler memory barrier.
    2207  *
    2208  * Ensure that the compiler does not use any cached (register/tmp stack) memory
    2209  * values or any outstanding writes when returning from this function.
    2210  *
    2211  * This function must be used if non-volatile data is modified by a
    2212  * device or the VMM. Typical cases are port access, MMIO access,
    2213  * trapping instruction, etc.
    2214  */
    2215 #if RT_INLINE_ASM_GNU_STYLE
    2216 # define ASMCompilerBarrier()   do { __asm__ __volatile__("" : : : "memory"); } while (0)
    2217 #elif RT_INLINE_ASM_USES_INTRIN
    2218 # define ASMCompilerBarrier()   do { _ReadWriteBarrier(); } while (0)
    2219 #else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
    2220 DECLINLINE(void) ASMCompilerBarrier(void)
    2221 {
    2222     __asm
    2223     {
    2224     }
    2225 }
    2226 #endif
    2227 
    2228 
    2229 /**
    22302064 * Writes a 8-bit unsigned integer to an I/O port, ordered.
    22312065 *
     
    26362470
    26372471/**
    2638  * Atomically Exchange an unsigned 8-bit value, ordered.
    2639  *
    2640  * @returns Current *pu8 value
    2641  * @param   pu8    Pointer to the 8-bit variable to update.
    2642  * @param   u8     The 8-bit value to assign to *pu8.
     2472 * Invalidate page.
     2473 *
     2474 * @param   pv      Address of the page to invalidate.
     2475 */
     2476#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
     2477DECLASM(void) ASMInvalidatePage(void *pv);
     2478#else
     2479DECLINLINE(void) ASMInvalidatePage(void *pv)
     2480{
     2481# if RT_INLINE_ASM_USES_INTRIN
     2482    __invlpg(pv);
     2483
     2484# elif RT_INLINE_ASM_GNU_STYLE
     2485    __asm__ __volatile__("invlpg %0\n\t"
     2486                         : : "m" (*(uint8_t *)pv));
     2487# else
     2488    __asm
     2489    {
     2490#  ifdef RT_ARCH_AMD64
     2491        mov     rax, [pv]
     2492        invlpg  [rax]
     2493#  else
     2494        mov     eax, [pv]
     2495        invlpg  [eax]
     2496#  endif
     2497    }
     2498# endif
     2499}
     2500#endif
     2501
     2502
     2503/**
     2504 * Write back the internal caches and invalidate them.
     2505 */
     2506#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
     2507DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
     2508#else
     2509DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
     2510{
     2511# if RT_INLINE_ASM_USES_INTRIN
     2512    __wbinvd();
     2513
     2514# elif RT_INLINE_ASM_GNU_STYLE
     2515    __asm__ __volatile__("wbinvd");
     2516# else
     2517    __asm
     2518    {
     2519        wbinvd
     2520    }
     2521# endif
     2522}
     2523#endif
     2524
     2525
     2526/**
     2527 * Invalidate internal and (perhaps) external caches without first
     2528 * flushing dirty cache lines. Use with extreme care.
    26432529 */
    26442530#if RT_INLINE_ASM_EXTERNAL
    2645 DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
    2646 #else
    2647 DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
    2648 {
    2649 # if RT_INLINE_ASM_GNU_STYLE
    2650     __asm__ __volatile__("xchgb %0, %1\n\t"
    2651                          : "=m" (*pu8),
    2652                            "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
    2653                          : "1" (u8),
    2654                            "m" (*pu8));
    2655 # else
    2656     __asm
    2657     {
    2658 #  ifdef RT_ARCH_AMD64
    2659         mov     rdx, [pu8]
    2660         mov     al, [u8]
    2661         xchg    [rdx], al
    2662         mov     [u8], al
    2663 #  else
    2664         mov     edx, [pu8]
    2665         mov     al, [u8]
    2666         xchg    [edx], al
    2667         mov     [u8], al
    2668 #  endif
    2669     }
    2670 # endif
    2671     return u8;
    2672 }
    2673 #endif
    2674 
    2675 
    2676 /**
    2677  * Atomically Exchange a signed 8-bit value, ordered.
    2678  *
    2679  * @returns Current *pu8 value
    2680  * @param   pi8     Pointer to the 8-bit variable to update.
    2681  * @param   i8      The 8-bit value to assign to *pi8.
    2682  */
    2683 DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
    2684 {
    2685     return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
    2686 }
    2687 
    2688 
    2689 /**
    2690  * Atomically Exchange a bool value, ordered.
    2691  *
    2692  * @returns Current *pf value
    2693  * @param   pf      Pointer to the 8-bit variable to update.
    2694  * @param   f       The 8-bit value to assign to *pi8.
    2695  */
    2696 DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
    2697 {
    2698 #ifdef _MSC_VER
    2699     return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
    2700 #else
    2701     return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
    2702 #endif
    2703 }
    2704 
    2705 
    2706 /**
    2707  * Atomically Exchange an unsigned 16-bit value, ordered.
    2708  *
    2709  * @returns Current *pu16 value
    2710  * @param   pu16    Pointer to the 16-bit variable to update.
    2711  * @param   u16     The 16-bit value to assign to *pu16.
    2712  */
    2713 #if RT_INLINE_ASM_EXTERNAL
    2714 DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
    2715 #else
    2716 DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
    2717 {
    2718 # if RT_INLINE_ASM_GNU_STYLE
    2719     __asm__ __volatile__("xchgw %0, %1\n\t"
    2720                          : "=m" (*pu16),
    2721                            "=r" (u16)
    2722                          : "1" (u16),
    2723                            "m" (*pu16));
    2724 # else
    2725     __asm
    2726     {
    2727 #  ifdef RT_ARCH_AMD64
    2728         mov     rdx, [pu16]
    2729         mov     ax, [u16]
    2730         xchg    [rdx], ax
    2731         mov     [u16], ax
    2732 #  else
    2733         mov     edx, [pu16]
    2734         mov     ax, [u16]
    2735         xchg    [edx], ax
    2736         mov     [u16], ax
    2737 #  endif
    2738     }
    2739 # endif
    2740     return u16;
    2741 }
    2742 #endif
    2743 
    2744 
    2745 /**
    2746  * Atomically Exchange a signed 16-bit value, ordered.
    2747  *
    2748  * @returns Current *pu16 value
    2749  * @param   pi16    Pointer to the 16-bit variable to update.
    2750  * @param   i16     The 16-bit value to assign to *pi16.
    2751  */
    2752 DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
    2753 {
    2754     return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
    2755 }
    2756 
    2757 
    2758 /**
    2759  * Atomically Exchange an unsigned 32-bit value, ordered.
    2760  *
    2761  * @returns Current *pu32 value
    2762  * @param   pu32    Pointer to the 32-bit variable to update.
    2763  * @param   u32     The 32-bit value to assign to *pu32.
    2764  */
    2765 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2766 DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
    2767 #else
    2768 DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
    2769 {
    2770 # if RT_INLINE_ASM_GNU_STYLE
    2771     __asm__ __volatile__("xchgl %0, %1\n\t"
    2772                          : "=m" (*pu32),
    2773                            "=r" (u32)
    2774                          : "1" (u32),
    2775                            "m" (*pu32));
    2776 
    2777 # elif RT_INLINE_ASM_USES_INTRIN
    2778    u32 = _InterlockedExchange((long *)pu32, u32);
    2779 
    2780 # else
    2781     __asm
    2782     {
    2783 #  ifdef RT_ARCH_AMD64
    2784         mov     rdx, [pu32]
    2785         mov     eax, u32
    2786         xchg    [rdx], eax
    2787         mov     [u32], eax
    2788 #  else
    2789         mov     edx, [pu32]
    2790         mov     eax, u32
    2791         xchg    [edx], eax
    2792         mov     [u32], eax
    2793 #  endif
    2794     }
    2795 # endif
    2796     return u32;
    2797 }
    2798 #endif
    2799 
    2800 
    2801 /**
    2802  * Atomically Exchange a signed 32-bit value, ordered.
    2803  *
    2804  * @returns Current *pu32 value
    2805  * @param   pi32    Pointer to the 32-bit variable to update.
    2806  * @param   i32     The 32-bit value to assign to *pi32.
    2807  */
    2808 DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
    2809 {
    2810     return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
    2811 }
    2812 
    2813 
    2814 /**
    2815  * Atomically Exchange an unsigned 64-bit value, ordered.
    2816  *
    2817  * @returns Current *pu64 value
    2818  * @param   pu64    Pointer to the 64-bit variable to update.
    2819  * @param   u64     The 64-bit value to assign to *pu64.
    2820  */
    2821 #if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
    2822  || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
    2823 DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
    2824 #else
    2825 DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
    2826 {
    2827 # if defined(RT_ARCH_AMD64)
    2828 #  if RT_INLINE_ASM_USES_INTRIN
    2829    u64 = _InterlockedExchange64((__int64 *)pu64, u64);
    2830 
    2831 #  elif RT_INLINE_ASM_GNU_STYLE
    2832     __asm__ __volatile__("xchgq %0, %1\n\t"
    2833                          : "=m" (*pu64),
    2834                            "=r" (u64)
    2835                          : "1" (u64),
    2836                            "m" (*pu64));
    2837 #  else
    2838     __asm
    2839     {
    2840         mov     rdx, [pu64]
    2841         mov     rax, [u64]
    2842         xchg    [rdx], rax
    2843         mov     [u64], rax
    2844     }
    2845 #  endif
    2846 # else /* !RT_ARCH_AMD64 */
    2847 #  if RT_INLINE_ASM_GNU_STYLE
    2848 #   if defined(PIC) || defined(__PIC__)
    2849     uint32_t u32EBX = (uint32_t)u64;
    2850     __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
    2851                          "xchgl %%ebx, %3\n\t"
    2852                          "1:\n\t"
    2853                          "lock; cmpxchg8b (%5)\n\t"
    2854                          "jnz 1b\n\t"
    2855                          "movl %3, %%ebx\n\t"
    2856                          /*"xchgl %%esi, %5\n\t"*/
    2857                          : "=A" (u64),
    2858                            "=m" (*pu64)
    2859                          : "0" (*pu64),
    2860                            "m" ( u32EBX ),
    2861                            "c" ( (uint32_t)(u64 >> 32) ),
    2862                            "S" (pu64));
    2863 #   else /* !PIC */
    2864     __asm__ __volatile__("1:\n\t"
    2865                          "lock; cmpxchg8b %1\n\t"
    2866                          "jnz 1b\n\t"
    2867                          : "=A" (u64),
    2868                            "=m" (*pu64)
    2869                          : "0" (*pu64),
    2870                            "b" ( (uint32_t)u64 ),
    2871                            "c" ( (uint32_t)(u64 >> 32) ));
    2872 #   endif
    2873 #  else
    2874     __asm
    2875     {
    2876         mov     ebx, dword ptr [u64]
    2877         mov     ecx, dword ptr [u64 + 4]
    2878         mov     edi, pu64
    2879         mov     eax, dword ptr [edi]
    2880         mov     edx, dword ptr [edi + 4]
    2881     retry:
    2882         lock cmpxchg8b [edi]
    2883         jnz retry
    2884         mov     dword ptr [u64], eax
    2885         mov     dword ptr [u64 + 4], edx
    2886     }
    2887 #  endif
    2888 # endif /* !RT_ARCH_AMD64 */
    2889     return u64;
    2890 }
    2891 #endif
    2892 
    2893 
    2894 /**
    2895  * Atomically Exchange an signed 64-bit value, ordered.
    2896  *
    2897  * @returns Current *pi64 value
    2898  * @param   pi64    Pointer to the 64-bit variable to update.
    2899  * @param   i64     The 64-bit value to assign to *pi64.
    2900  */
    2901 DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
    2902 {
    2903     return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
    2904 }
    2905 
    2906 
    2907 /**
    2908  * Atomically Exchange a pointer value, ordered.
    2909  *
    2910  * @returns Current *ppv value
    2911  * @param   ppv    Pointer to the pointer variable to update.
    2912  * @param   pv     The pointer value to assign to *ppv.
    2913  */
    2914 DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
    2915 {
    2916 #if ARCH_BITS == 32
    2917     return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
    2918 #elif ARCH_BITS == 64
    2919     return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
    2920 #else
    2921 # error "ARCH_BITS is bogus"
    2922 #endif
    2923 }
    2924 
    2925 
    2926 /**
    2927  * Atomically Exchange a raw-mode context pointer value, ordered.
    2928  *
    2929  * @returns Current *ppv value
    2930  * @param   ppvRC   Pointer to the pointer variable to update.
    2931  * @param   pvRC    The pointer value to assign to *ppv.
    2932  */
    2933 DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
    2934 {
    2935     return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
    2936 }
    2937 
    2938 
    2939 /**
    2940  * Atomically Exchange a ring-0 pointer value, ordered.
    2941  *
    2942  * @returns Current *ppv value
    2943  * @param   ppvR0  Pointer to the pointer variable to update.
    2944  * @param   pvR0   The pointer value to assign to *ppv.
    2945  */
    2946 DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
    2947 {
    2948 #if R0_ARCH_BITS == 32
    2949     return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
    2950 #elif R0_ARCH_BITS == 64
    2951     return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
    2952 #else
    2953 # error "R0_ARCH_BITS is bogus"
    2954 #endif
    2955 }
    2956 
    2957 
    2958 /**
    2959  * Atomically Exchange a ring-3 pointer value, ordered.
    2960  *
    2961  * @returns Current *ppv value
    2962  * @param   ppvR3  Pointer to the pointer variable to update.
    2963  * @param   pvR3   The pointer value to assign to *ppv.
    2964  */
    2965 DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
    2966 {
    2967 #if R3_ARCH_BITS == 32
    2968     return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
    2969 #elif R3_ARCH_BITS == 64
    2970     return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
    2971 #else
    2972 # error "R3_ARCH_BITS is bogus"
    2973 #endif
    2974 }
    2975 
    2976 
    2977 /** @def ASMAtomicXchgHandle
    2978  * Atomically Exchange a typical IPRT handle value, ordered.
    2979  *
    2980  * @param   ph          Pointer to the value to update.
    2981  * @param   hNew        The new value to assigned to *pu.
    2982  * @param   phRes       Where to store the current *ph value.
    2983  *
    2984  * @remarks This doesn't currently work for all handles (like RTFILE).
    2985  */
    2986 #if HC_ARCH_BITS == 32
    2987 # define ASMAtomicXchgHandle(ph, hNew, phRes) \
    2988    do { \
    2989        AssertCompile(sizeof(*(ph))    == sizeof(uint32_t)); \
    2990        AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
    2991        *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
    2992    } while (0)
    2993 #elif HC_ARCH_BITS == 64
    2994 # define ASMAtomicXchgHandle(ph, hNew, phRes) \
    2995    do { \
    2996        AssertCompile(sizeof(*(ph))    == sizeof(uint64_t)); \
    2997        AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
    2998        *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
    2999    } while (0)
    3000 #else
    3001 # error HC_ARCH_BITS
    3002 #endif
    3003 
    3004 
    3005 /**
    3006  * Atomically Exchange a value which size might differ
    3007  * between platforms or compilers, ordered.
    3008  *
    3009  * @param   pu      Pointer to the variable to update.
    3010  * @param   uNew    The value to assign to *pu.
    3011  * @todo This is busted as its missing the result argument.
    3012  */
    3013 #define ASMAtomicXchgSize(pu, uNew) \
    3014     do { \
    3015         switch (sizeof(*(pu))) { \
    3016             case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
    3017             case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
    3018             case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
    3019             case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
    3020             default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
    3021         } \
    3022     } while (0)
    3023 
    3024 /**
    3025  * Atomically Exchange a value which size might differ
    3026  * between platforms or compilers, ordered.
    3027  *
    3028  * @param   pu      Pointer to the variable to update.
    3029  * @param   uNew    The value to assign to *pu.
    3030  * @param   puRes   Where to store the current *pu value.
    3031  */
    3032 #define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
    3033     do { \
    3034         switch (sizeof(*(pu))) { \
    3035             case 1: *(uint8_t  *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
    3036             case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
    3037             case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
    3038             case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
    3039             default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
    3040         } \
    3041     } while (0)
    3042 
    3043 
    3044 
    3045 /**
    3046  * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
    3047  *
    3048  * @returns true if xchg was done.
    3049  * @returns false if xchg wasn't done.
    3050  *
    3051  * @param   pu8         Pointer to the value to update.
    3052  * @param   u8New       The new value to assigned to *pu8.
    3053  * @param   u8Old       The old value to *pu8 compare with.
    3054  */
    3055 #if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
    3056 DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
    3057 #else
    3058 DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
    3059 {
    3060     uint8_t u8Ret;
    3061     __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
    3062                          "setz  %1\n\t"
    3063                          : "=m" (*pu8),
    3064                            "=qm" (u8Ret),
    3065                            "=a" (u8Old)
    3066                          : "q" (u8New),
    3067                            "2" (u8Old),
    3068                            "m" (*pu8));
    3069     return (bool)u8Ret;
    3070 }
    3071 #endif
    3072 
    3073 
    3074 /**
    3075  * Atomically Compare and Exchange a signed 8-bit value, ordered.
    3076  *
    3077  * @returns true if xchg was done.
    3078  * @returns false if xchg wasn't done.
    3079  *
    3080  * @param   pi8         Pointer to the value to update.
    3081  * @param   i8New       The new value to assigned to *pi8.
    3082  * @param   i8Old       The old value to *pi8 compare with.
    3083  */
    3084 DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
    3085 {
    3086     return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
    3087 }
    3088 
    3089 
    3090 /**
    3091  * Atomically Compare and Exchange a bool value, ordered.
    3092  *
    3093  * @returns true if xchg was done.
    3094  * @returns false if xchg wasn't done.
    3095  *
    3096  * @param   pf          Pointer to the value to update.
    3097  * @param   fNew        The new value to assigned to *pf.
    3098  * @param   fOld        The old value to *pf compare with.
    3099  */
    3100 DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
    3101 {
    3102     return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
    3103 }
    3104 
    3105 
    3106 /**
    3107  * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
    3108  *
    3109  * @returns true if xchg was done.
    3110  * @returns false if xchg wasn't done.
    3111  *
    3112  * @param   pu32        Pointer to the value to update.
    3113  * @param   u32New      The new value to assigned to *pu32.
    3114  * @param   u32Old      The old value to *pu32 compare with.
    3115  */
    3116 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    3117 DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
    3118 #else
    3119 DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
    3120 {
    3121 # if RT_INLINE_ASM_GNU_STYLE
    3122     uint8_t u8Ret;
    3123     __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
    3124                          "setz  %1\n\t"
    3125                          : "=m" (*pu32),
    3126                            "=qm" (u8Ret),
    3127                            "=a" (u32Old)
    3128                          : "r" (u32New),
    3129                            "2" (u32Old),
    3130                            "m" (*pu32));
    3131     return (bool)u8Ret;
    3132 
    3133 # elif RT_INLINE_ASM_USES_INTRIN
    3134     return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
    3135 
    3136 # else
    3137     uint32_t u32Ret;
    3138     __asm
    3139     {
    3140 #  ifdef RT_ARCH_AMD64
    3141         mov     rdx, [pu32]
    3142 #  else
    3143         mov     edx, [pu32]
    3144 #  endif
    3145         mov     eax, [u32Old]
    3146         mov     ecx, [u32New]
    3147 #  ifdef RT_ARCH_AMD64
    3148         lock cmpxchg [rdx], ecx
    3149 #  else
    3150         lock cmpxchg [edx], ecx
    3151 #  endif
    3152         setz    al
    3153         movzx   eax, al
    3154         mov     [u32Ret], eax
    3155     }
    3156     return !!u32Ret;
    3157 # endif
    3158 }
    3159 #endif
    3160 
    3161 
    3162 /**
    3163  * Atomically Compare and Exchange a signed 32-bit value, ordered.
    3164  *
    3165  * @returns true if xchg was done.
    3166  * @returns false if xchg wasn't done.
    3167  *
    3168  * @param   pi32        Pointer to the value to update.
    3169  * @param   i32New      The new value to assigned to *pi32.
    3170  * @param   i32Old      The old value to *pi32 compare with.
    3171  */
    3172 DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
    3173 {
    3174     return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
    3175 }
    3176 
    3177 
    3178 /**
    3179  * Atomically Compare and exchange an unsigned 64-bit value, ordered.
    3180  *
    3181  * @returns true if xchg was done.
    3182  * @returns false if xchg wasn't done.
    3183  *
    3184  * @param   pu64    Pointer to the 64-bit variable to update.
    3185  * @param   u64New  The 64-bit value to assign to *pu64.
    3186  * @param   u64Old  The value to compare with.
    3187  */
    3188 #if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
    3189  || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
    3190 DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
    3191 #else
    3192 DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
    3193 {
    3194 # if RT_INLINE_ASM_USES_INTRIN
    3195    return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
    3196 
    3197 # elif defined(RT_ARCH_AMD64)
    3198 #  if RT_INLINE_ASM_GNU_STYLE
    3199     uint8_t u8Ret;
    3200     __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
    3201                          "setz  %1\n\t"
    3202                          : "=m" (*pu64),
    3203                            "=qm" (u8Ret),
    3204                            "=a" (u64Old)
    3205                          : "r" (u64New),
    3206                            "2" (u64Old),
    3207                            "m" (*pu64));
    3208     return (bool)u8Ret;
    3209 #  else
    3210     bool fRet;
    3211     __asm
    3212     {
    3213         mov     rdx, [pu32]
    3214         mov     rax, [u64Old]
    3215         mov     rcx, [u64New]
    3216         lock cmpxchg [rdx], rcx
    3217         setz    al
    3218         mov     [fRet], al
    3219     }
    3220     return fRet;
    3221 #  endif
    3222 # else /* !RT_ARCH_AMD64 */
    3223     uint32_t u32Ret;
    3224 #  if RT_INLINE_ASM_GNU_STYLE
    3225 #   if defined(PIC) || defined(__PIC__)
    3226     uint32_t u32EBX = (uint32_t)u64New;
    3227     uint32_t u32Spill;
    3228     __asm__ __volatile__("xchgl %%ebx, %4\n\t"
    3229                          "lock; cmpxchg8b (%6)\n\t"
    3230                          "setz  %%al\n\t"
    3231                          "movl  %4, %%ebx\n\t"
    3232                          "movzbl %%al, %%eax\n\t"
    3233                          : "=a" (u32Ret),
    3234                            "=d" (u32Spill),
    3235 #    if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
    3236                            "+m" (*pu64)
    3237 #    else
    3238                            "=m" (*pu64)
    3239 #    endif
    3240                          : "A" (u64Old),
    3241                            "m" ( u32EBX ),
    3242                            "c" ( (uint32_t)(u64New >> 32) ),
    3243                            "S" (pu64));
    3244 #   else /* !PIC */
    3245     uint32_t u32Spill;
    3246     __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
    3247                          "setz  %%al\n\t"
    3248                          "movzbl %%al, %%eax\n\t"
    3249                          : "=a" (u32Ret),
    3250                            "=d" (u32Spill),
    3251                            "+m" (*pu64)
    3252                          : "A" (u64Old),
    3253                            "b" ( (uint32_t)u64New ),
    3254                            "c" ( (uint32_t)(u64New >> 32) ));
    3255 #   endif
    3256     return (bool)u32Ret;
    3257 #  else
    3258     __asm
    3259     {
    3260         mov     ebx, dword ptr [u64New]
    3261         mov     ecx, dword ptr [u64New + 4]
    3262         mov     edi, [pu64]
    3263         mov     eax, dword ptr [u64Old]
    3264         mov     edx, dword ptr [u64Old + 4]
    3265         lock cmpxchg8b [edi]
    3266         setz    al
    3267         movzx   eax, al
    3268         mov     dword ptr [u32Ret], eax
    3269     }
    3270     return !!u32Ret;
    3271 #  endif
    3272 # endif /* !RT_ARCH_AMD64 */
    3273 }
    3274 #endif
    3275 
    3276 
    3277 /**
    3278  * Atomically Compare and exchange a signed 64-bit value, ordered.
    3279  *
    3280  * @returns true if xchg was done.
    3281  * @returns false if xchg wasn't done.
    3282  *
    3283  * @param   pi64    Pointer to the 64-bit variable to update.
    3284  * @param   i64     The 64-bit value to assign to *pu64.
    3285  * @param   i64Old  The value to compare with.
    3286  */
    3287 DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
    3288 {
    3289     return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
    3290 }
    3291 
    3292 
    3293 /**
    3294  * Atomically Compare and Exchange a pointer value, ordered.
    3295  *
    3296  * @returns true if xchg was done.
    3297  * @returns false if xchg wasn't done.
    3298  *
    3299  * @param   ppv         Pointer to the value to update.
    3300  * @param   pvNew       The new value to assigned to *ppv.
    3301  * @param   pvOld       The old value to *ppv compare with.
    3302  */
    3303 DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
    3304 {
    3305 #if ARCH_BITS == 32
    3306     return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
    3307 #elif ARCH_BITS == 64
    3308     return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
    3309 #else
    3310 # error "ARCH_BITS is bogus"
    3311 #endif
    3312 }
    3313 
    3314 
    3315 /** @def ASMAtomicCmpXchgHandle
    3316  * Atomically Compare and Exchange a typical IPRT handle value, ordered.
    3317  *
    3318  * @param   ph          Pointer to the value to update.
    3319  * @param   hNew        The new value to assigned to *pu.
    3320  * @param   hOld        The old value to *pu compare with.
    3321  * @param   fRc         Where to store the result.
    3322  *
    3323  * @remarks This doesn't currently work for all handles (like RTFILE).
    3324  */
    3325 #if HC_ARCH_BITS == 32
    3326 # define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
    3327    do { \
    3328        AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
    3329        (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
    3330    } while (0)
    3331 #elif HC_ARCH_BITS == 64
    3332 # define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
    3333    do { \
    3334        AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
    3335        (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
    3336    } while (0)
    3337 #else
    3338 # error HC_ARCH_BITS
    3339 #endif
    3340 
    3341 
    3342 /** @def ASMAtomicCmpXchgSize
    3343  * Atomically Compare and Exchange a value which size might differ
    3344  * between platforms or compilers, ordered.
    3345  *
    3346  * @param   pu          Pointer to the value to update.
    3347  * @param   uNew        The new value to assigned to *pu.
    3348  * @param   uOld        The old value to *pu compare with.
    3349  * @param   fRc         Where to store the result.
    3350  */
    3351 #define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
    3352     do { \
    3353         switch (sizeof(*(pu))) { \
    3354             case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
    3355                 break; \
    3356             case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
    3357                 break; \
    3358             default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
    3359                 (fRc) = false; \
    3360                 break; \
    3361         } \
    3362     } while (0)
    3363 
    3364 
    3365 /**
    3366  * Atomically Compare and Exchange an unsigned 32-bit value, additionally
    3367  * passes back old value, ordered.
    3368  *
    3369  * @returns true if xchg was done.
    3370  * @returns false if xchg wasn't done.
    3371  *
    3372  * @param   pu32        Pointer to the value to update.
    3373  * @param   u32New      The new value to assigned to *pu32.
    3374  * @param   u32Old      The old value to *pu32 compare with.
    3375  * @param   pu32Old     Pointer store the old value at.
    3376  */
    3377 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    3378 DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
    3379 #else
    3380 DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
    3381 {
    3382 # if RT_INLINE_ASM_GNU_STYLE
    3383     uint8_t u8Ret;
    3384     __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
    3385                          "setz  %1\n\t"
    3386                          : "=m" (*pu32),
    3387                            "=qm" (u8Ret),
    3388                            "=a" (*pu32Old)
    3389                          : "r" (u32New),
    3390                            "a" (u32Old),
    3391                            "m" (*pu32));
    3392     return (bool)u8Ret;
    3393 
    3394 # elif RT_INLINE_ASM_USES_INTRIN
    3395     return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
    3396 
    3397 # else
    3398     uint32_t u32Ret;
    3399     __asm
    3400     {
    3401 #  ifdef RT_ARCH_AMD64
    3402         mov     rdx, [pu32]
    3403 #  else
    3404         mov     edx, [pu32]
    3405 #  endif
    3406         mov     eax, [u32Old]
    3407         mov     ecx, [u32New]
    3408 #  ifdef RT_ARCH_AMD64
    3409         lock cmpxchg [rdx], ecx
    3410         mov     rdx, [pu32Old]
    3411         mov     [rdx], eax
    3412 #  else
    3413         lock cmpxchg [edx], ecx
    3414         mov     edx, [pu32Old]
    3415         mov     [edx], eax
    3416 #  endif
    3417         setz    al
    3418         movzx   eax, al
    3419         mov     [u32Ret], eax
    3420     }
    3421     return !!u32Ret;
    3422 # endif
    3423 }
    3424 #endif
    3425 
    3426 
    3427 /**
    3428  * Atomically Compare and Exchange a signed 32-bit value, additionally
    3429  * passes back old value, ordered.
    3430  *
    3431  * @returns true if xchg was done.
    3432  * @returns false if xchg wasn't done.
    3433  *
    3434  * @param   pi32        Pointer to the value to update.
    3435  * @param   i32New      The new value to assigned to *pi32.
    3436  * @param   i32Old      The old value to *pi32 compare with.
    3437  * @param   pi32Old     Pointer store the old value at.
    3438  */
    3439 DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
    3440 {
    3441     return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
    3442 }
    3443 
    3444 
    3445 /**
    3446  * Atomically Compare and exchange an unsigned 64-bit value, additionally
    3447  * passing back old value, ordered.
    3448  *
    3449  * @returns true if xchg was done.
    3450  * @returns false if xchg wasn't done.
    3451  *
    3452  * @param   pu64    Pointer to the 64-bit variable to update.
    3453  * @param   u64New  The 64-bit value to assign to *pu64.
    3454  * @param   u64Old  The value to compare with.
    3455  * @param   pu64Old     Pointer store the old value at.
    3456  */
    3457 #if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
    3458  || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
    3459 DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
    3460 #else
    3461 DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
    3462 {
    3463 # if RT_INLINE_ASM_USES_INTRIN
    3464    return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
    3465 
    3466 # elif defined(RT_ARCH_AMD64)
    3467 #  if RT_INLINE_ASM_GNU_STYLE
    3468     uint8_t u8Ret;
    3469     __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
    3470                          "setz  %1\n\t"
    3471                          : "=m" (*pu64),
    3472                            "=qm" (u8Ret),
    3473                            "=a" (*pu64Old)
    3474                          : "r" (u64New),
    3475                            "a" (u64Old),
    3476                            "m" (*pu64));
    3477     return (bool)u8Ret;
    3478 #  else
    3479     bool fRet;
    3480     __asm
    3481     {
    3482         mov     rdx, [pu32]
    3483         mov     rax, [u64Old]
    3484         mov     rcx, [u64New]
    3485         lock cmpxchg [rdx], rcx
    3486         mov     rdx, [pu64Old]
    3487         mov     [rdx], rax
    3488         setz    al
    3489         mov     [fRet], al
    3490     }
    3491     return fRet;
    3492 #  endif
    3493 # else /* !RT_ARCH_AMD64 */
    3494 #  if RT_INLINE_ASM_GNU_STYLE
    3495     uint64_t u64Ret;
    3496 #   if defined(PIC) || defined(__PIC__)
    3497     /* NB: this code uses a memory clobber description, because the clean
    3498      * solution with an output value for *pu64 makes gcc run out of registers.
    3499      * This will cause suboptimal code, and anyone with a better solution is
    3500      * welcome to improve this. */
    3501     __asm__ __volatile__("xchgl %%ebx, %1\n\t"
    3502                          "lock; cmpxchg8b %3\n\t"
    3503                          "xchgl %%ebx, %1\n\t"
    3504                          : "=A" (u64Ret)
    3505                          : "DS" ((uint32_t)u64New),
    3506                            "c" ((uint32_t)(u64New >> 32)),
    3507                            "m" (*pu64),
    3508                            "0" (u64Old)
    3509                          : "memory" );
    3510 #   else /* !PIC */
    3511     __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
    3512                          : "=A" (u64Ret),
    3513                            "=m" (*pu64)
    3514                          : "b" ((uint32_t)u64New),
    3515                            "c" ((uint32_t)(u64New >> 32)),
    3516                            "m" (*pu64),
    3517                            "0" (u64Old));
    3518 #   endif
    3519     *pu64Old = u64Ret;
    3520     return u64Ret == u64Old;
    3521 #  else
    3522     uint32_t u32Ret;
    3523     __asm
    3524     {
    3525         mov     ebx, dword ptr [u64New]
    3526         mov     ecx, dword ptr [u64New + 4]
    3527         mov     edi, [pu64]
    3528         mov     eax, dword ptr [u64Old]
    3529         mov     edx, dword ptr [u64Old + 4]
    3530         lock cmpxchg8b [edi]
    3531         mov     ebx, [pu64Old]
    3532         mov     [ebx], eax
    3533         setz    al
    3534         movzx   eax, al
    3535         add     ebx, 4
    3536         mov     [ebx], edx
    3537         mov     dword ptr [u32Ret], eax
    3538     }
    3539     return !!u32Ret;
    3540 #  endif
    3541 # endif /* !RT_ARCH_AMD64 */
    3542 }
    3543 #endif
    3544 
    3545 
    3546 /**
    3547  * Atomically Compare and exchange a signed 64-bit value, additionally
    3548  * passing back old value, ordered.
    3549  *
    3550  * @returns true if xchg was done.
    3551  * @returns false if xchg wasn't done.
    3552  *
    3553  * @param   pi64    Pointer to the 64-bit variable to update.
    3554  * @param   i64     The 64-bit value to assign to *pu64.
    3555  * @param   i64Old  The value to compare with.
    3556  * @param   pi64Old Pointer store the old value at.
    3557  */
    3558 DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
    3559 {
    3560     return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
    3561 }
    3562 
    3563 /** @def ASMAtomicCmpXchgExHandle
    3564  * Atomically Compare and Exchange a typical IPRT handle value, ordered.
    3565  *
    3566  * @param   ph          Pointer to the value to update.
    3567  * @param   hNew        The new value to assigned to *pu.
    3568  * @param   hOld        The old value to *pu compare with.
    3569  * @param   fRc         Where to store the result.
    3570  * @param   phOldVal    Pointer to where to store the old value.
    3571  *
    3572  * @remarks This doesn't currently work for all handles (like RTFILE).
    3573  */
    3574 #if HC_ARCH_BITS == 32
    3575 # define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
    3576     do { \
    3577         AssertCompile(sizeof(*ph)       == sizeof(uint32_t)); \
    3578         AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
    3579         (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
    3580     } while (0)
    3581 #elif HC_ARCH_BITS == 64
    3582 # define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
    3583     do { \
    3584         AssertCompile(sizeof(*(ph))       == sizeof(uint64_t)); \
    3585         AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
    3586         (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
    3587     } while (0)
    3588 #else
    3589 # error HC_ARCH_BITS
    3590 #endif
    3591 
    3592 
    3593 /** @def ASMAtomicCmpXchgExSize
    3594  * Atomically Compare and Exchange a value which size might differ
    3595  * between platforms or compilers. Additionally passes back old value.
    3596  *
    3597  * @param   pu          Pointer to the value to update.
    3598  * @param   uNew        The new value to assigned to *pu.
    3599  * @param   uOld        The old value to *pu compare with.
    3600  * @param   fRc         Where to store the result.
    3601  * @param   puOldVal    Pointer to where to store the old value.
    3602  */
    3603 #define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
    3604     do { \
    3605         switch (sizeof(*(pu))) { \
    3606             case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
    3607                 break; \
    3608             case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
    3609                 break; \
    3610             default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
    3611                 (fRc) = false; \
    3612                 (uOldVal) = 0; \
    3613                 break; \
    3614         } \
    3615     } while (0)
    3616 
    3617 
    3618 /**
    3619  * Atomically Compare and Exchange a pointer value, additionally
    3620  * passing back old value, ordered.
    3621  *
    3622  * @returns true if xchg was done.
    3623  * @returns false if xchg wasn't done.
    3624  *
    3625  * @param   ppv         Pointer to the value to update.
    3626  * @param   pvNew       The new value to assigned to *ppv.
    3627  * @param   pvOld       The old value to *ppv compare with.
    3628  * @param   ppvOld      Pointer store the old value at.
    3629  */
    3630 DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
    3631 {
    3632 #if ARCH_BITS == 32
    3633     return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
    3634 #elif ARCH_BITS == 64
    3635     return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
    3636 #else
    3637 # error "ARCH_BITS is bogus"
    3638 #endif
    3639 }
    3640 
    3641 
    3642 /**
    3643  * Atomically exchanges and adds to a 32-bit value, ordered.
    3644  *
    3645  * @returns The old value.
    3646  * @param   pu32        Pointer to the value.
    3647  * @param   u32         Number to add.
    3648  */
    3649 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    3650 DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
    3651 #else
    3652 DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
    3653 {
    3654 # if RT_INLINE_ASM_USES_INTRIN
    3655     u32 = _InterlockedExchangeAdd((long *)pu32, u32);
    3656     return u32;
    3657 
    3658 # elif RT_INLINE_ASM_GNU_STYLE
    3659     __asm__ __volatile__("lock; xaddl %0, %1\n\t"
    3660                          : "=r" (u32),
    3661                            "=m" (*pu32)
    3662                          : "0" (u32),
    3663                            "m" (*pu32)
    3664                          : "memory");
    3665     return u32;
    3666 # else
    3667     __asm
    3668     {
    3669         mov     eax, [u32]
    3670 #  ifdef RT_ARCH_AMD64
    3671         mov     rdx, [pu32]
    3672         lock xadd [rdx], eax
    3673 #  else
    3674         mov     edx, [pu32]
    3675         lock xadd [edx], eax
    3676 #  endif
    3677         mov     [u32], eax
    3678     }
    3679     return u32;
    3680 # endif
    3681 }
    3682 #endif
    3683 
    3684 
    3685 /**
    3686  * Atomically exchanges and adds to a signed 32-bit value, ordered.
    3687  *
    3688  * @returns The old value.
    3689  * @param   pi32        Pointer to the value.
    3690  * @param   i32         Number to add.
    3691  */
    3692 DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
    3693 {
    3694     return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
    3695 }
    3696 
    3697 
    3698 /**
    3699  * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
    3700  *
    3701  * @returns The old value.
    3702  * @param   pu32        Pointer to the value.
    3703  * @param   u32         Number to subtract.
    3704  */
    3705 DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
    3706 {
    3707     return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
    3708 }
    3709 
    3710 
    3711 /**
    3712  * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
    3713  *
    3714  * @returns The old value.
    3715  * @param   pi32        Pointer to the value.
    3716  * @param   i32         Number to subtract.
    3717  */
    3718 DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
    3719 {
    3720     return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
    3721 }
    3722 
    3723 
    3724 /**
    3725  * Atomically increment a 32-bit value, ordered.
    3726  *
    3727  * @returns The new value.
    3728  * @param   pu32        Pointer to the value to increment.
    3729  */
    3730 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    3731 DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
    3732 #else
    3733 DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
    3734 {
    3735     uint32_t u32;
    3736 # if RT_INLINE_ASM_USES_INTRIN
    3737     u32 = _InterlockedIncrement((long *)pu32);
    3738     return u32;
    3739 
    3740 # elif RT_INLINE_ASM_GNU_STYLE
    3741     __asm__ __volatile__("lock; xaddl %0, %1\n\t"
    3742                          : "=r" (u32),
    3743                            "=m" (*pu32)
    3744                          : "0" (1),
    3745                            "m" (*pu32)
    3746                          : "memory");
    3747     return u32+1;
    3748 # else
    3749     __asm
    3750     {
    3751         mov     eax, 1
    3752 #  ifdef RT_ARCH_AMD64
    3753         mov     rdx, [pu32]
    3754         lock xadd [rdx], eax
    3755 #  else
    3756         mov     edx, [pu32]
    3757         lock xadd [edx], eax
    3758 #  endif
    3759         mov     u32, eax
    3760     }
    3761     return u32+1;
    3762 # endif
    3763 }
    3764 #endif
    3765 
    3766 
    3767 /**
    3768  * Atomically increment a signed 32-bit value, ordered.
    3769  *
    3770  * @returns The new value.
    3771  * @param   pi32        Pointer to the value to increment.
    3772  */
    3773 DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
    3774 {
    3775     return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
    3776 }
    3777 
    3778 
    3779 /**
    3780  * Atomically decrement an unsigned 32-bit value, ordered.
    3781  *
    3782  * @returns The new value.
    3783  * @param   pu32        Pointer to the value to decrement.
    3784  */
    3785 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    3786 DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
    3787 #else
    3788 DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
    3789 {
    3790     uint32_t u32;
    3791 # if RT_INLINE_ASM_USES_INTRIN
    3792     u32 = _InterlockedDecrement((long *)pu32);
    3793     return u32;
    3794 
    3795 # elif RT_INLINE_ASM_GNU_STYLE
    3796     __asm__ __volatile__("lock; xaddl %0, %1\n\t"
    3797                          : "=r" (u32),
    3798                            "=m" (*pu32)
    3799                          : "0" (-1),
    3800                            "m" (*pu32)
    3801                          : "memory");
    3802     return u32-1;
    3803 # else
    3804     __asm
    3805     {
    3806         mov     eax, -1
    3807 #  ifdef RT_ARCH_AMD64
    3808         mov     rdx, [pu32]
    3809         lock xadd [rdx], eax
    3810 #  else
    3811         mov     edx, [pu32]
    3812         lock xadd [edx], eax
    3813 #  endif
    3814         mov     u32, eax
    3815     }
    3816     return u32-1;
    3817 # endif
    3818 }
    3819 #endif
    3820 
    3821 
    3822 /**
    3823  * Atomically decrement a signed 32-bit value, ordered.
    3824  *
    3825  * @returns The new value.
    3826  * @param   pi32        Pointer to the value to decrement.
    3827  */
    3828 DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
    3829 {
    3830     return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
    3831 }
    3832 
    3833 
    3834 /**
    3835  * Atomically Or an unsigned 32-bit value, ordered.
    3836  *
    3837  * @param   pu32   Pointer to the pointer variable to OR u32 with.
    3838  * @param   u32    The value to OR *pu32 with.
    3839  */
    3840 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    3841 DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
    3842 #else
    3843 DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
    3844 {
    3845 # if RT_INLINE_ASM_USES_INTRIN
    3846     _InterlockedOr((long volatile *)pu32, (long)u32);
    3847 
    3848 # elif RT_INLINE_ASM_GNU_STYLE
    3849     __asm__ __volatile__("lock; orl %1, %0\n\t"
    3850                          : "=m" (*pu32)
    3851                          : "ir" (u32),
    3852                            "m" (*pu32));
    3853 # else
    3854     __asm
    3855     {
    3856         mov     eax, [u32]
    3857 #  ifdef RT_ARCH_AMD64
    3858         mov     rdx, [pu32]
    3859         lock    or [rdx], eax
    3860 #  else
    3861         mov     edx, [pu32]
    3862         lock    or [edx], eax
    3863 #  endif
    3864     }
    3865 # endif
    3866 }
    3867 #endif
    3868 
    3869 
    3870 /**
    3871  * Atomically Or a signed 32-bit value, ordered.
    3872  *
    3873  * @param   pi32   Pointer to the pointer variable to OR u32 with.
    3874  * @param   i32    The value to OR *pu32 with.
    3875  */
    3876 DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
    3877 {
    3878     ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
    3879 }
    3880 
    3881 
    3882 /**
    3883  * Atomically And an unsigned 32-bit value, ordered.
    3884  *
    3885  * @param   pu32   Pointer to the pointer variable to AND u32 with.
    3886  * @param   u32    The value to AND *pu32 with.
    3887  */
    3888 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    3889 DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
    3890 #else
    3891 DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
    3892 {
    3893 # if RT_INLINE_ASM_USES_INTRIN
    3894     _InterlockedAnd((long volatile *)pu32, u32);
    3895 
    3896 # elif RT_INLINE_ASM_GNU_STYLE
    3897     __asm__ __volatile__("lock; andl %1, %0\n\t"
    3898                          : "=m" (*pu32)
    3899                          : "ir" (u32),
    3900                            "m" (*pu32));
    3901 # else
    3902     __asm
    3903     {
    3904         mov     eax, [u32]
    3905 #  ifdef RT_ARCH_AMD64
    3906         mov     rdx, [pu32]
    3907         lock and [rdx], eax
    3908 #  else
    3909         mov     edx, [pu32]
    3910         lock and [edx], eax
    3911 #  endif
    3912     }
    3913 # endif
    3914 }
    3915 #endif
    3916 
    3917 
    3918 /**
    3919  * Atomically And a signed 32-bit value, ordered.
    3920  *
    3921  * @param   pi32   Pointer to the pointer variable to AND i32 with.
    3922  * @param   i32    The value to AND *pi32 with.
    3923  */
    3924 DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
    3925 {
    3926     ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
    3927 }
    3928 
    3929 
    3930 /**
    3931  * Serialize Instruction.
    3932  */
    3933 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    3934 DECLASM(void) ASMSerializeInstruction(void);
    3935 #else
    3936 DECLINLINE(void) ASMSerializeInstruction(void)
    3937 {
    3938 # if RT_INLINE_ASM_GNU_STYLE
    3939     RTCCUINTREG xAX = 0;
    3940 #  ifdef RT_ARCH_AMD64
    3941     __asm__ ("cpuid"
    3942              : "=a" (xAX)
    3943              : "0" (xAX)
    3944              : "rbx", "rcx", "rdx");
    3945 #  elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
    3946     __asm__ ("push  %%ebx\n\t"
    3947              "cpuid\n\t"
    3948              "pop   %%ebx\n\t"
    3949              : "=a" (xAX)
    3950              : "0" (xAX)
    3951              : "ecx", "edx");
    3952 #  else
    3953     __asm__ ("cpuid"
    3954              : "=a" (xAX)
    3955              : "0" (xAX)
    3956              : "ebx", "ecx", "edx");
    3957 #  endif
    3958 
    3959 # elif RT_INLINE_ASM_USES_INTRIN
    3960     int aInfo[4];
    3961     __cpuid(aInfo, 0);
    3962 
    3963 # else
    3964     __asm
    3965     {
    3966         push    ebx
    3967         xor     eax, eax
    3968         cpuid
    3969         pop     ebx
     2531DECLASM(void) ASMInvalidateInternalCaches(void);
     2532#else
     2533DECLINLINE(void) ASMInvalidateInternalCaches(void)
     2534{
     2535# if RT_INLINE_ASM_GNU_STYLE
     2536    __asm__ __volatile__("invd");
     2537# else
     2538    __asm
     2539    {
     2540        invd
    39702541    }
    39712542# endif
     
    40372608
    40382609
    4039 /**
    4040  * Memory fence, waits for any pending writes and reads to complete.
    4041  */
    4042 DECLINLINE(void) ASMMemoryFence(void)
    4043 {
    4044     /** @todo use mfence? check if all cpus we care for support it. */
    4045     uint32_t volatile u32;
    4046     ASMAtomicXchgU32(&u32, 0);
    4047 }
    4048 
    4049 
    4050 /**
    4051  * Write fence, waits for any pending writes to complete.
    4052  */
    4053 DECLINLINE(void) ASMWriteFence(void)
    4054 {
    4055     /** @todo use sfence? check if all cpus we care for support it. */
    4056     ASMMemoryFence();
    4057 }
    4058 
    4059 
    4060 /**
    4061  * Read fence, waits for any pending reads to complete.
    4062  */
    4063 DECLINLINE(void) ASMReadFence(void)
    4064 {
    4065     /** @todo use lfence? check if all cpus we care for support it. */
    4066     ASMMemoryFence();
    4067 }
    4068 
    4069 
    4070 /**
    4071  * Atomically reads an unsigned 8-bit value, ordered.
    4072  *
    4073  * @returns Current *pu8 value
    4074  * @param   pu8    Pointer to the 8-bit variable to read.
    4075  */
    4076 DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
    4077 {
    4078     ASMMemoryFence();
    4079     return *pu8;    /* byte reads are atomic on x86 */
    4080 }
    4081 
    4082 
    4083 /**
    4084  * Atomically reads an unsigned 8-bit value, unordered.
    4085  *
    4086  * @returns Current *pu8 value
    4087  * @param   pu8    Pointer to the 8-bit variable to read.
    4088  */
    4089 DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
    4090 {
    4091     return *pu8;    /* byte reads are atomic on x86 */
    4092 }
    4093 
    4094 
    4095 /**
    4096  * Atomically reads a signed 8-bit value, ordered.
    4097  *
    4098  * @returns Current *pi8 value
    4099  * @param   pi8    Pointer to the 8-bit variable to read.
    4100  */
    4101 DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
    4102 {
    4103     ASMMemoryFence();
    4104     return *pi8;    /* byte reads are atomic on x86 */
    4105 }
    4106 
    4107 
    4108 /**
    4109  * Atomically reads a signed 8-bit value, unordered.
    4110  *
    4111  * @returns Current *pi8 value
    4112  * @param   pi8    Pointer to the 8-bit variable to read.
    4113  */
    4114 DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
    4115 {
    4116     return *pi8;    /* byte reads are atomic on x86 */
    4117 }
    4118 
    4119 
    4120 /**
    4121  * Atomically reads an unsigned 16-bit value, ordered.
    4122  *
    4123  * @returns Current *pu16 value
    4124  * @param   pu16    Pointer to the 16-bit variable to read.
    4125  */
    4126 DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
    4127 {
    4128     ASMMemoryFence();
    4129     Assert(!((uintptr_t)pu16 & 1));
    4130     return *pu16;
    4131 }
    4132 
    4133 
    4134 /**
    4135  * Atomically reads an unsigned 16-bit value, unordered.
    4136  *
    4137  * @returns Current *pu16 value
    4138  * @param   pu16    Pointer to the 16-bit variable to read.
    4139  */
    4140 DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
    4141 {
    4142     Assert(!((uintptr_t)pu16 & 1));
    4143     return *pu16;
    4144 }
    4145 
    4146 
    4147 /**
    4148  * Atomically reads a signed 16-bit value, ordered.
    4149  *
    4150  * @returns Current *pi16 value
    4151  * @param   pi16    Pointer to the 16-bit variable to read.
    4152  */
    4153 DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
    4154 {
    4155     ASMMemoryFence();
    4156     Assert(!((uintptr_t)pi16 & 1));
    4157     return *pi16;
    4158 }
    4159 
    4160 
    4161 /**
    4162  * Atomically reads a signed 16-bit value, unordered.
    4163  *
    4164  * @returns Current *pi16 value
    4165  * @param   pi16    Pointer to the 16-bit variable to read.
    4166  */
    4167 DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
    4168 {
    4169     Assert(!((uintptr_t)pi16 & 1));
    4170     return *pi16;
    4171 }
    4172 
    4173 
    4174 /**
    4175  * Atomically reads an unsigned 32-bit value, ordered.
    4176  *
    4177  * @returns Current *pu32 value
    4178  * @param   pu32    Pointer to the 32-bit variable to read.
    4179  */
    4180 DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
    4181 {
    4182     ASMMemoryFence();
    4183     Assert(!((uintptr_t)pu32 & 3));
    4184     return *pu32;
    4185 }
    4186 
    4187 
    4188 /**
    4189  * Atomically reads an unsigned 32-bit value, unordered.
    4190  *
    4191  * @returns Current *pu32 value
    4192  * @param   pu32    Pointer to the 32-bit variable to read.
    4193  */
    4194 DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
    4195 {
    4196     Assert(!((uintptr_t)pu32 & 3));
    4197     return *pu32;
    4198 }
    4199 
    4200 
    4201 /**
    4202  * Atomically reads a signed 32-bit value, ordered.
    4203  *
    4204  * @returns Current *pi32 value
    4205  * @param   pi32    Pointer to the 32-bit variable to read.
    4206  */
    4207 DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
    4208 {
    4209     ASMMemoryFence();
    4210     Assert(!((uintptr_t)pi32 & 3));
    4211     return *pi32;
    4212 }
    4213 
    4214 
    4215 /**
    4216  * Atomically reads a signed 32-bit value, unordered.
    4217  *
    4218  * @returns Current *pi32 value
    4219  * @param   pi32    Pointer to the 32-bit variable to read.
    4220  */
    4221 DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
    4222 {
    4223     Assert(!((uintptr_t)pi32 & 3));
    4224     return *pi32;
    4225 }
    4226 
    4227 
    4228 /**
    4229  * Atomically reads an unsigned 64-bit value, ordered.
    4230  *
    4231  * @returns Current *pu64 value
    4232  * @param   pu64    Pointer to the 64-bit variable to read.
    4233  *                  The memory pointed to must be writable.
    4234  * @remark  This will fault if the memory is read-only!
    4235  */
    4236 #if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
    4237  || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
    4238 DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
    4239 #else
    4240 DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
    4241 {
    4242     uint64_t u64;
    4243 # ifdef RT_ARCH_AMD64
    4244     Assert(!((uintptr_t)pu64 & 7));
    4245 /*#  if RT_INLINE_ASM_GNU_STYLE
    4246     __asm__ __volatile__(  "mfence\n\t"
    4247                            "movq %1, %0\n\t"
    4248                          : "=r" (u64)
    4249                          : "m" (*pu64));
    4250 #  else
    4251     __asm
    4252     {
    4253         mfence
    4254         mov     rdx, [pu64]
    4255         mov     rax, [rdx]
    4256         mov     [u64], rax
    4257     }
    4258 #  endif*/
    4259     ASMMemoryFence();
    4260     u64 = *pu64;
    4261 # else /* !RT_ARCH_AMD64 */
    4262 #  if RT_INLINE_ASM_GNU_STYLE
    4263 #   if defined(PIC) || defined(__PIC__)
    4264     uint32_t u32EBX = 0;
    4265     Assert(!((uintptr_t)pu64 & 7));
    4266     __asm__ __volatile__("xchgl %%ebx, %3\n\t"
    4267                          "lock; cmpxchg8b (%5)\n\t"
    4268                          "movl %3, %%ebx\n\t"
    4269                          : "=A" (u64),
    4270 #    if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
    4271                            "+m" (*pu64)
    4272 #    else
    4273                            "=m" (*pu64)
    4274 #    endif
    4275                          : "0" (0),
    4276                            "m" (u32EBX),
    4277                            "c" (0),
    4278                            "S" (pu64));
    4279 #   else /* !PIC */
    4280     __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
    4281                          : "=A" (u64),
    4282                            "+m" (*pu64)
    4283                          : "0" (0),
    4284                            "b" (0),
    4285                            "c" (0));
    4286 #   endif
    4287 #  else
    4288     Assert(!((uintptr_t)pu64 & 7));
    4289     __asm
    4290     {
    4291         xor     eax, eax
    4292         xor     edx, edx
    4293         mov     edi, pu64
    4294         xor     ecx, ecx
    4295         xor     ebx, ebx
    4296         lock cmpxchg8b [edi]
    4297         mov     dword ptr [u64], eax
    4298         mov     dword ptr [u64 + 4], edx
    4299     }
    4300 #  endif
    4301 # endif /* !RT_ARCH_AMD64 */
    4302     return u64;
    4303 }
    4304 #endif
    4305 
    4306 
    4307 /**
    4308  * Atomically reads an unsigned 64-bit value, unordered.
    4309  *
    4310  * @returns Current *pu64 value
    4311  * @param   pu64    Pointer to the 64-bit variable to read.
    4312  *                  The memory pointed to must be writable.
    4313  * @remark  This will fault if the memory is read-only!
    4314  */
    4315 #if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
    4316  || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
    4317 DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
    4318 #else
    4319 DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
    4320 {
    4321     uint64_t u64;
    4322 # ifdef RT_ARCH_AMD64
    4323     Assert(!((uintptr_t)pu64 & 7));
    4324 /*#  if RT_INLINE_ASM_GNU_STYLE
    4325     Assert(!((uintptr_t)pu64 & 7));
    4326     __asm__ __volatile__("movq %1, %0\n\t"
    4327                          : "=r" (u64)
    4328                          : "m" (*pu64));
    4329 #  else
    4330     __asm
    4331     {
    4332         mov     rdx, [pu64]
    4333         mov     rax, [rdx]
    4334         mov     [u64], rax
    4335     }
    4336 #  endif */
    4337     u64 = *pu64;
    4338 # else /* !RT_ARCH_AMD64 */
    4339 #  if RT_INLINE_ASM_GNU_STYLE
    4340 #   if defined(PIC) || defined(__PIC__)
    4341     uint32_t u32EBX = 0;
    4342     uint32_t u32Spill;
    4343     Assert(!((uintptr_t)pu64 & 7));
    4344     __asm__ __volatile__("xor   %%eax,%%eax\n\t"
    4345                          "xor   %%ecx,%%ecx\n\t"
    4346                          "xor   %%edx,%%edx\n\t"
    4347                          "xchgl %%ebx, %3\n\t"
    4348                          "lock; cmpxchg8b (%4)\n\t"
    4349                          "movl %3, %%ebx\n\t"
    4350                          : "=A" (u64),
    4351 #    if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
    4352                            "+m" (*pu64),
    4353 #    else
    4354                            "=m" (*pu64),
    4355 #    endif
    4356                            "=c" (u32Spill)
    4357                          : "m" (u32EBX),
    4358                            "S" (pu64));
    4359 #   else /* !PIC */
    4360     __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
    4361                          : "=A" (u64),
    4362                            "+m" (*pu64)
    4363                          : "0" (0),
    4364                            "b" (0),
    4365                            "c" (0));
    4366 #   endif
    4367 #  else
    4368     Assert(!((uintptr_t)pu64 & 7));
    4369     __asm
    4370     {
    4371         xor     eax, eax
    4372         xor     edx, edx
    4373         mov     edi, pu64
    4374         xor     ecx, ecx
    4375         xor     ebx, ebx
    4376         lock cmpxchg8b [edi]
    4377         mov     dword ptr [u64], eax
    4378         mov     dword ptr [u64 + 4], edx
    4379     }
    4380 #  endif
    4381 # endif /* !RT_ARCH_AMD64 */
    4382     return u64;
    4383 }
    4384 #endif
    4385 
    4386 
    4387 /**
    4388  * Atomically reads a signed 64-bit value, ordered.
    4389  *
    4390  * @returns Current *pi64 value
    4391  * @param   pi64    Pointer to the 64-bit variable to read.
    4392  *                  The memory pointed to must be writable.
    4393  * @remark  This will fault if the memory is read-only!
    4394  */
    4395 DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
    4396 {
    4397     return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
    4398 }
    4399 
    4400 
    4401 /**
    4402  * Atomically reads a signed 64-bit value, unordered.
    4403  *
    4404  * @returns Current *pi64 value
    4405  * @param   pi64    Pointer to the 64-bit variable to read.
    4406  *                  The memory pointed to must be writable.
    4407  * @remark  This will fault if the memory is read-only!
    4408  */
    4409 DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
    4410 {
    4411     return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
    4412 }
    4413 
    4414 
    4415 /**
    4416  * Atomically reads a pointer value, ordered.
    4417  *
    4418  * @returns Current *pv value
    4419  * @param   ppv     Pointer to the pointer variable to read.
    4420  */
    4421 DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
    4422 {
    4423 #if ARCH_BITS == 32
    4424     return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
    4425 #elif ARCH_BITS == 64
    4426     return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
    4427 #else
    4428 # error "ARCH_BITS is bogus"
    4429 #endif
    4430 }
    4431 
    4432 
    4433 /**
    4434  * Atomically reads a pointer value, unordered.
    4435  *
    4436  * @returns Current *pv value
    4437  * @param   ppv     Pointer to the pointer variable to read.
    4438  */
    4439 DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
    4440 {
    4441 #if ARCH_BITS == 32
    4442     return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
    4443 #elif ARCH_BITS == 64
    4444     return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
    4445 #else
    4446 # error "ARCH_BITS is bogus"
    4447 #endif
    4448 }
    4449 
    4450 
    4451 /**
    4452  * Atomically reads a boolean value, ordered.
    4453  *
    4454  * @returns Current *pf value
    4455  * @param   pf      Pointer to the boolean variable to read.
    4456  */
    4457 DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
    4458 {
    4459     ASMMemoryFence();
    4460     return *pf;     /* byte reads are atomic on x86 */
    4461 }
    4462 
    4463 
    4464 /**
    4465  * Atomically reads a boolean value, unordered.
    4466  *
    4467  * @returns Current *pf value
    4468  * @param   pf      Pointer to the boolean variable to read.
    4469  */
    4470 DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
    4471 {
    4472     return *pf;     /* byte reads are atomic on x86 */
    4473 }
    4474 
    4475 
    4476 /**
    4477  * Atomically read a typical IPRT handle value, ordered.
    4478  *
    4479  * @param   ph      Pointer to the handle variable to read.
    4480  * @param   phRes   Where to store the result.
    4481  *
    4482  * @remarks This doesn't currently work for all handles (like RTFILE).
    4483  */
    4484 #if HC_ARCH_BITS == 32
    4485 # define ASMAtomicReadHandle(ph, phRes) \
    4486     do { \
    4487         AssertCompile(sizeof(*(ph))    == sizeof(uint32_t)); \
    4488         AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
    4489         *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
    4490     } while (0)
    4491 #elif HC_ARCH_BITS == 64
    4492 # define ASMAtomicReadHandle(ph, phRes) \
    4493     do { \
    4494         AssertCompile(sizeof(*(ph))    == sizeof(uint64_t)); \
    4495         AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
    4496         *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
    4497     } while (0)
    4498 #else
    4499 # error HC_ARCH_BITS
    4500 #endif
    4501 
    4502 
    4503 /**
    4504  * Atomically read a typical IPRT handle value, unordered.
    4505  *
    4506  * @param   ph      Pointer to the handle variable to read.
    4507  * @param   phRes   Where to store the result.
    4508  *
    4509  * @remarks This doesn't currently work for all handles (like RTFILE).
    4510  */
    4511 #if HC_ARCH_BITS == 32
    4512 # define ASMAtomicUoReadHandle(ph, phRes) \
    4513     do { \
    4514         AssertCompile(sizeof(*(ph))    == sizeof(uint32_t)); \
    4515         AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
    4516         *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
    4517     } while (0)
    4518 #elif HC_ARCH_BITS == 64
    4519 # define ASMAtomicUoReadHandle(ph, phRes) \
    4520     do { \
    4521         AssertCompile(sizeof(*(ph))    == sizeof(uint64_t)); \
    4522         AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
    4523         *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
    4524     } while (0)
    4525 #else
    4526 # error HC_ARCH_BITS
    4527 #endif
    4528 
    4529 
    4530 /**
    4531  * Atomically read a value which size might differ
    4532  * between platforms or compilers, ordered.
    4533  *
    4534  * @param   pu      Pointer to the variable to update.
    4535  * @param   puRes   Where to store the result.
    4536  */
    4537 #define ASMAtomicReadSize(pu, puRes) \
    4538     do { \
    4539         switch (sizeof(*(pu))) { \
    4540             case 1: *(uint8_t  *)(puRes) = ASMAtomicReadU8( (volatile uint8_t  *)(void *)(pu)); break; \
    4541             case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
    4542             case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
    4543             case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
    4544             default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
    4545         } \
    4546     } while (0)
    4547 
    4548 
    4549 /**
    4550  * Atomically read a value which size might differ
    4551  * between platforms or compilers, unordered.
    4552  *
    4553  * @param   pu      Pointer to the variable to read.
    4554  * @param   puRes   Where to store the result.
    4555  */
    4556 #define ASMAtomicUoReadSize(pu, puRes) \
    4557     do { \
    4558         switch (sizeof(*(pu))) { \
    4559             case 1: *(uint8_t  *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t  *)(void *)(pu)); break; \
    4560             case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
    4561             case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
    4562             case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
    4563             default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
    4564         } \
    4565     } while (0)
    4566 
    4567 
    4568 /**
    4569  * Atomically writes an unsigned 8-bit value, ordered.
    4570  *
    4571  * @param   pu8     Pointer to the 8-bit variable.
    4572  * @param   u8      The 8-bit value to assign to *pu8.
    4573  */
    4574 DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
    4575 {
    4576     ASMAtomicXchgU8(pu8, u8);
    4577 }
    4578 
    4579 
    4580 /**
    4581  * Atomically writes an unsigned 8-bit value, unordered.
    4582  *
    4583  * @param   pu8     Pointer to the 8-bit variable.
    4584  * @param   u8      The 8-bit value to assign to *pu8.
    4585  */
    4586 DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
    4587 {
    4588     *pu8 = u8;      /* byte writes are atomic on x86 */
    4589 }
    4590 
    4591 
    4592 /**
    4593  * Atomically writes a signed 8-bit value, ordered.
    4594  *
    4595  * @param   pi8     Pointer to the 8-bit variable to read.
    4596  * @param   i8      The 8-bit value to assign to *pi8.
    4597  */
    4598 DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
    4599 {
    4600     ASMAtomicXchgS8(pi8, i8);
    4601 }
    4602 
    4603 
    4604 /**
    4605  * Atomically writes a signed 8-bit value, unordered.
    4606  *
    4607  * @param   pi8     Pointer to the 8-bit variable to read.
    4608  * @param   i8      The 8-bit value to assign to *pi8.
    4609  */
    4610 DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
    4611 {
    4612     *pi8 = i8;      /* byte writes are atomic on x86 */
    4613 }
    4614 
    4615 
    4616 /**
    4617  * Atomically writes an unsigned 16-bit value, ordered.
    4618  *
    4619  * @param   pu16    Pointer to the 16-bit variable.
    4620  * @param   u16     The 16-bit value to assign to *pu16.
    4621  */
    4622 DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
    4623 {
    4624     ASMAtomicXchgU16(pu16, u16);
    4625 }
    4626 
    4627 
    4628 /**
    4629  * Atomically writes an unsigned 16-bit value, unordered.
    4630  *
    4631  * @param   pu16    Pointer to the 16-bit variable.
    4632  * @param   u16     The 16-bit value to assign to *pu16.
    4633  */
    4634 DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
    4635 {
    4636     Assert(!((uintptr_t)pu16 & 1));
    4637     *pu16 = u16;
    4638 }
    4639 
    4640 
    4641 /**
    4642  * Atomically writes a signed 16-bit value, ordered.
    4643  *
    4644  * @param   pi16    Pointer to the 16-bit variable to read.
    4645  * @param   i16     The 16-bit value to assign to *pi16.
    4646  */
    4647 DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
    4648 {
    4649     ASMAtomicXchgS16(pi16, i16);
    4650 }
    4651 
    4652 
    4653 /**
    4654  * Atomically writes a signed 16-bit value, unordered.
    4655  *
    4656  * @param   pi16    Pointer to the 16-bit variable to read.
    4657  * @param   i16     The 16-bit value to assign to *pi16.
    4658  */
    4659 DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
    4660 {
    4661     Assert(!((uintptr_t)pi16 & 1));
    4662     *pi16 = i16;
    4663 }
    4664 
    4665 
    4666 /**
    4667  * Atomically writes an unsigned 32-bit value, ordered.
    4668  *
    4669  * @param   pu32    Pointer to the 32-bit variable.
    4670  * @param   u32     The 32-bit value to assign to *pu32.
    4671  */
    4672 DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
    4673 {
    4674     ASMAtomicXchgU32(pu32, u32);
    4675 }
    4676 
    4677 
    4678 /**
    4679  * Atomically writes an unsigned 32-bit value, unordered.
    4680  *
    4681  * @param   pu32    Pointer to the 32-bit variable.
    4682  * @param   u32     The 32-bit value to assign to *pu32.
    4683  */
    4684 DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
    4685 {
    4686     Assert(!((uintptr_t)pu32 & 3));
    4687     *pu32 = u32;
    4688 }
    4689 
    4690 
    4691 /**
    4692  * Atomically writes a signed 32-bit value, ordered.
    4693  *
    4694  * @param   pi32    Pointer to the 32-bit variable to read.
    4695  * @param   i32     The 32-bit value to assign to *pi32.
    4696  */
    4697 DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
    4698 {
    4699     ASMAtomicXchgS32(pi32, i32);
    4700 }
    4701 
    4702 
    4703 /**
    4704  * Atomically writes a signed 32-bit value, unordered.
    4705  *
    4706  * @param   pi32    Pointer to the 32-bit variable to read.
    4707  * @param   i32     The 32-bit value to assign to *pi32.
    4708  */
    4709 DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
    4710 {
    4711     Assert(!((uintptr_t)pi32 & 3));
    4712     *pi32 = i32;
    4713 }
    4714 
    4715 
    4716 /**
    4717  * Atomically writes an unsigned 64-bit value, ordered.
    4718  *
    4719  * @param   pu64    Pointer to the 64-bit variable.
    4720  * @param   u64     The 64-bit value to assign to *pu64.
    4721  */
    4722 DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
    4723 {
    4724     ASMAtomicXchgU64(pu64, u64);
    4725 }
    4726 
    4727 
    4728 /**
    4729  * Atomically writes an unsigned 64-bit value, unordered.
    4730  *
    4731  * @param   pu64    Pointer to the 64-bit variable.
    4732  * @param   u64     The 64-bit value to assign to *pu64.
    4733  */
    4734 DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
    4735 {
    4736     Assert(!((uintptr_t)pu64 & 7));
    4737 #if ARCH_BITS == 64
    4738     *pu64 = u64;
    4739 #else
    4740     ASMAtomicXchgU64(pu64, u64);
    4741 #endif
    4742 }
    4743 
    4744 
    4745 /**
    4746  * Atomically writes a signed 64-bit value, ordered.
    4747  *
    4748  * @param   pi64    Pointer to the 64-bit variable.
    4749  * @param   i64     The 64-bit value to assign to *pi64.
    4750  */
    4751 DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
    4752 {
    4753     ASMAtomicXchgS64(pi64, i64);
    4754 }
    4755 
    4756 
    4757 /**
    4758  * Atomically writes a signed 64-bit value, unordered.
    4759  *
    4760  * @param   pi64    Pointer to the 64-bit variable.
    4761  * @param   i64     The 64-bit value to assign to *pi64.
    4762  */
    4763 DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
    4764 {
    4765     Assert(!((uintptr_t)pi64 & 7));
    4766 #if ARCH_BITS == 64
    4767     *pi64 = i64;
    4768 #else
    4769     ASMAtomicXchgS64(pi64, i64);
    4770 #endif
    4771 }
    4772 
    4773 
    4774 /**
    4775  * Atomically writes a boolean value, unordered.
    4776  *
    4777  * @param   pf      Pointer to the boolean variable.
    4778  * @param   f       The boolean value to assign to *pf.
    4779  */
    4780 DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
    4781 {
    4782     ASMAtomicWriteU8((uint8_t volatile *)pf, f);
    4783 }
    4784 
    4785 
    4786 /**
    4787  * Atomically writes a boolean value, unordered.
    4788  *
    4789  * @param   pf      Pointer to the boolean variable.
    4790  * @param   f       The boolean value to assign to *pf.
    4791  */
    4792 DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
    4793 {
    4794     *pf = f;    /* byte writes are atomic on x86 */
    4795 }
    4796 
    4797 
    4798 /**
    4799  * Atomically writes a pointer value, ordered.
    4800  *
    4801  * @returns Current *pv value
    4802  * @param   ppv     Pointer to the pointer variable.
    4803  * @param   pv      The pointer value to assigne to *ppv.
    4804  */
    4805 DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
    4806 {
    4807 #if ARCH_BITS == 32
    4808     ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
    4809 #elif ARCH_BITS == 64
    4810     ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
    4811 #else
    4812 # error "ARCH_BITS is bogus"
    4813 #endif
    4814 }
    4815 
    4816 
    4817 /**
    4818  * Atomically writes a pointer value, unordered.
    4819  *
    4820  * @returns Current *pv value
    4821  * @param   ppv     Pointer to the pointer variable.
    4822  * @param   pv      The pointer value to assigne to *ppv.
    4823  */
    4824 DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
    4825 {
    4826 #if ARCH_BITS == 32
    4827     ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
    4828 #elif ARCH_BITS == 64
    4829     ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
    4830 #else
    4831 # error "ARCH_BITS is bogus"
    4832 #endif
    4833 }
    4834 
    4835 
    4836 /**
    4837  * Atomically write a typical IPRT handle value, ordered.
    4838  *
    4839  * @param   ph      Pointer to the variable to update.
    4840  * @param   hNew    The value to assign to *ph.
    4841  *
    4842  * @remarks This doesn't currently work for all handles (like RTFILE).
    4843  */
    4844 #if HC_ARCH_BITS == 32
    4845 # define ASMAtomicWriteHandle(ph, hNew) \
    4846     do { \
    4847         AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
    4848         ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
    4849     } while (0)
    4850 #elif HC_ARCH_BITS == 64
    4851 # define ASMAtomicWriteHandle(ph, hNew) \
    4852     do { \
    4853         AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
    4854         ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
    4855     } while (0)
    4856 #else
    4857 # error HC_ARCH_BITS
    4858 #endif
    4859 
    4860 
    4861 /**
    4862  * Atomically write a typical IPRT handle value, unordered.
    4863  *
    4864  * @param   ph      Pointer to the variable to update.
    4865  * @param   hNew    The value to assign to *ph.
    4866  *
    4867  * @remarks This doesn't currently work for all handles (like RTFILE).
    4868  */
    4869 #if HC_ARCH_BITS == 32
    4870 # define ASMAtomicUoWriteHandle(ph, hNew) \
    4871     do { \
    4872         AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
    4873         ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
    4874     } while (0)
    4875 #elif HC_ARCH_BITS == 64
    4876 # define ASMAtomicUoWriteHandle(ph, hNew) \
    4877     do { \
    4878         AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
    4879         ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
    4880     } while (0)
    4881 #else
    4882 # error HC_ARCH_BITS
    4883 #endif
    4884 
    4885 
    4886 /**
    4887  * Atomically write a value which size might differ
    4888  * between platforms or compilers, ordered.
    4889  *
    4890  * @param   pu      Pointer to the variable to update.
    4891  * @param   uNew    The value to assign to *pu.
    4892  */
    4893 #define ASMAtomicWriteSize(pu, uNew) \
    4894     do { \
    4895         switch (sizeof(*(pu))) { \
    4896             case 1: ASMAtomicWriteU8( (volatile uint8_t  *)(void *)(pu), (uint8_t )(uNew)); break; \
    4897             case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
    4898             case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
    4899             case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
    4900             default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
    4901         } \
    4902     } while (0)
    4903 
    4904 /**
    4905  * Atomically write a value which size might differ
    4906  * between platforms or compilers, unordered.
    4907  *
    4908  * @param   pu      Pointer to the variable to update.
    4909  * @param   uNew    The value to assign to *pu.
    4910  */
    4911 #define ASMAtomicUoWriteSize(pu, uNew) \
    4912     do { \
    4913         switch (sizeof(*(pu))) { \
    4914             case 1: ASMAtomicUoWriteU8( (volatile uint8_t  *)(void *)(pu), (uint8_t )(uNew)); break; \
    4915             case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
    4916             case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
    4917             case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
    4918             default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
    4919         } \
    4920     } while (0)
    4921 
    4922 
    4923 
    4924 
    4925 /**
    4926  * Invalidate page.
    4927  *
    4928  * @param   pv      Address of the page to invalidate.
    4929  */
    4930 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    4931 DECLASM(void) ASMInvalidatePage(void *pv);
    4932 #else
    4933 DECLINLINE(void) ASMInvalidatePage(void *pv)
    4934 {
    4935 # if RT_INLINE_ASM_USES_INTRIN
    4936     __invlpg(pv);
    4937 
    4938 # elif RT_INLINE_ASM_GNU_STYLE
    4939     __asm__ __volatile__("invlpg %0\n\t"
    4940                          : : "m" (*(uint8_t *)pv));
    4941 # else
    4942     __asm
    4943     {
    4944 #  ifdef RT_ARCH_AMD64
    4945         mov     rax, [pv]
    4946         invlpg  [rax]
    4947 #  else
    4948         mov     eax, [pv]
    4949         invlpg  [eax]
    4950 #  endif
    4951     }
    4952 # endif
    4953 }
    4954 #endif
    4955 
    4956 
    4957 /**
    4958  * Write back the internal caches and invalidate them.
    4959  */
    4960 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    4961 DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
    4962 #else
    4963 DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
    4964 {
    4965 # if RT_INLINE_ASM_USES_INTRIN
    4966     __wbinvd();
    4967 
    4968 # elif RT_INLINE_ASM_GNU_STYLE
    4969     __asm__ __volatile__("wbinvd");
    4970 # else
    4971     __asm
    4972     {
    4973         wbinvd
    4974     }
    4975 # endif
    4976 }
    4977 #endif
    4978 
    4979 
    4980 /**
    4981  * Invalidate internal and (perhaps) external caches without first
    4982  * flushing dirty cache lines. Use with extreme care.
    4983  */
    4984 #if RT_INLINE_ASM_EXTERNAL
    4985 DECLASM(void) ASMInvalidateInternalCaches(void);
    4986 #else
    4987 DECLINLINE(void) ASMInvalidateInternalCaches(void)
    4988 {
    4989 # if RT_INLINE_ASM_GNU_STYLE
    4990     __asm__ __volatile__("invd");
    4991 # else
    4992     __asm
    4993     {
    4994         invd
    4995     }
    4996 # endif
    4997 }
    4998 #endif
    4999 
    5000 
    5001 #if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
    5002 # if PAGE_SIZE != 0x1000
    5003 #  error "PAGE_SIZE is not 0x1000!"
    5004 # endif
    5005 #endif
    5006 
    5007 /**
    5008  * Zeros a 4K memory page.
    5009  *
    5010  * @param   pv  Pointer to the memory block. This must be page aligned.
    5011  */
    5012 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    5013 DECLASM(void) ASMMemZeroPage(volatile void *pv);
    5014 # else
    5015 DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
    5016 {
    5017 #  if RT_INLINE_ASM_USES_INTRIN
    5018 #   ifdef RT_ARCH_AMD64
    5019     __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
    5020 #   else
    5021     __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
    5022 #   endif
    5023 
    5024 #  elif RT_INLINE_ASM_GNU_STYLE
    5025     RTCCUINTREG uDummy;
    5026 #   ifdef RT_ARCH_AMD64
    5027     __asm__ __volatile__("rep stosq"
    5028                          : "=D" (pv),
    5029                            "=c" (uDummy)
    5030                          : "0" (pv),
    5031                            "c" (0x1000 >> 3),
    5032                            "a" (0)
    5033                          : "memory");
    5034 #   else
    5035     __asm__ __volatile__("rep stosl"
    5036                          : "=D" (pv),
    5037                            "=c" (uDummy)
    5038                          : "0" (pv),
    5039                            "c" (0x1000 >> 2),
    5040                            "a" (0)
    5041                          : "memory");
    5042 #   endif
    5043 #  else
    5044     __asm
    5045     {
    5046 #   ifdef RT_ARCH_AMD64
    5047         xor     rax, rax
    5048         mov     ecx, 0200h
    5049         mov     rdi, [pv]
    5050         rep     stosq
    5051 #   else
    5052         xor     eax, eax
    5053         mov     ecx, 0400h
    5054         mov     edi, [pv]
    5055         rep     stosd
    5056 #   endif
    5057     }
    5058 #  endif
    5059 }
    5060 # endif
    5061 
    5062 
    5063 /**
    5064  * Zeros a memory block with a 32-bit aligned size.
    5065  *
    5066  * @param   pv  Pointer to the memory block.
    5067  * @param   cb  Number of bytes in the block. This MUST be aligned on 32-bit!
    5068  */
    5069 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    5070 DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
    5071 #else
    5072 DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
    5073 {
    5074 # if RT_INLINE_ASM_USES_INTRIN
    5075 #  ifdef RT_ARCH_AMD64
    5076     if (!(cb & 7))
    5077         __stosq((unsigned __int64 *)pv, 0, cb / 8);
    5078     else
    5079 #  endif
    5080         __stosd((unsigned long *)pv, 0, cb / 4);
    5081 
    5082 # elif RT_INLINE_ASM_GNU_STYLE
    5083     __asm__ __volatile__("rep stosl"
    5084                          : "=D" (pv),
    5085                            "=c" (cb)
    5086                          : "0" (pv),
    5087                            "1" (cb >> 2),
    5088                            "a" (0)
    5089                          : "memory");
    5090 # else
    5091     __asm
    5092     {
    5093         xor     eax, eax
    5094 #  ifdef RT_ARCH_AMD64
    5095         mov     rcx, [cb]
    5096         shr     rcx, 2
    5097         mov     rdi, [pv]
    5098 #  else
    5099         mov     ecx, [cb]
    5100         shr     ecx, 2
    5101         mov     edi, [pv]
    5102 #  endif
    5103         rep stosd
    5104     }
    5105 # endif
    5106 }
    5107 #endif
    5108 
    5109 
    5110 /**
    5111  * Fills a memory block with a 32-bit aligned size.
    5112  *
    5113  * @param   pv  Pointer to the memory block.
    5114  * @param   cb  Number of bytes in the block. This MUST be aligned on 32-bit!
    5115  * @param   u32 The value to fill with.
    5116  */
    5117 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    5118 DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
    5119 #else
    5120 DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
    5121 {
    5122 # if RT_INLINE_ASM_USES_INTRIN
    5123 #  ifdef RT_ARCH_AMD64
    5124     if (!(cb & 7))
    5125         __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
    5126     else
    5127 #  endif
    5128         __stosd((unsigned long *)pv, u32, cb / 4);
    5129 
    5130 # elif RT_INLINE_ASM_GNU_STYLE
    5131     __asm__ __volatile__("rep stosl"
    5132                          : "=D" (pv),
    5133                            "=c" (cb)
    5134                          : "0" (pv),
    5135                            "1" (cb >> 2),
    5136                            "a" (u32)
    5137                          : "memory");
    5138 # else
    5139     __asm
    5140     {
    5141 #  ifdef RT_ARCH_AMD64
    5142         mov     rcx, [cb]
    5143         shr     rcx, 2
    5144         mov     rdi, [pv]
    5145 #  else
    5146         mov     ecx, [cb]
    5147         shr     ecx, 2
    5148         mov     edi, [pv]
    5149 #  endif
    5150         mov     eax, [u32]
    5151         rep stosd
    5152     }
    5153 # endif
    5154 }
    5155 #endif
    5156 
    5157 
    5158 /**
    5159  * Checks if a memory page is all zeros.
    5160  *
    5161  * @returns true / false.
    5162  *
    5163  * @param   pvPage      Pointer to the page.  Must be aligned on 16 byte
    5164  *                      boundrary
    5165  */
    5166 DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
    5167 {
    5168 # if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
    5169     union { RTCCUINTREG r; bool f; } uAX;
    5170     RTCCUINTREG xCX, xDI;
    5171    Assert(!((uintptr_t)pvPage & 15));
    5172     __asm__ __volatile__("repe; "
    5173 #  ifdef RT_ARCH_AMD64
    5174                          "scasq\n\t"
    5175 #  else
    5176                          "scasl\n\t"
    5177 #  endif
    5178                          "setnc %%al\n\t"
    5179                          : "=&c" (xCX),
    5180                            "=&D" (xDI),
    5181                            "=&a" (uAX.r)
    5182                          : "mr" (pvPage),
    5183 #  ifdef RT_ARCH_AMD64
    5184                          "0" (0x1000/8),
    5185 #  else
    5186                          "0" (0x1000/4),
    5187 #  endif
    5188                          "1" (pvPage),
    5189                          "2" (0));
    5190     return uAX.f;
    5191 # else
    5192    uintptr_t const *puPtr = (uintptr_t const *)pvPage;
    5193    int              cLeft = 0x1000 / sizeof(uintptr_t) / 8;
    5194    Assert(!((uintptr_t)pvPage & 15));
    5195    for (;;)
    5196    {
    5197        if (puPtr[0])        return false;
    5198        if (puPtr[4])        return false;
    5199 
    5200        if (puPtr[2])        return false;
    5201        if (puPtr[6])        return false;
    5202 
    5203        if (puPtr[1])        return false;
    5204        if (puPtr[5])        return false;
    5205 
    5206        if (puPtr[3])        return false;
    5207        if (puPtr[7])        return false;
    5208 
    5209        if (!--cLeft)
    5210            return true;
    5211        puPtr += 8;
    5212    }
    5213    return true;
    5214 # endif
    5215 }
    5216 
    5217 
    5218 /**
    5219  * Checks if a memory block is filled with the specified byte.
    5220  *
    5221  * This is a sort of inverted memchr.
    5222  *
    5223  * @returns Pointer to the byte which doesn't equal u8.
    5224  * @returns NULL if all equal to u8.
    5225  *
    5226  * @param   pv      Pointer to the memory block.
    5227  * @param   cb      Number of bytes in the block. This MUST be aligned on 32-bit!
    5228  * @param   u8      The value it's supposed to be filled with.
    5229  *
    5230  * @todo Fix name, it is a predicate function but it's not returning boolean!
    5231  */
    5232 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    5233 DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
    5234 #else
    5235 DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
    5236 {
    5237 /** @todo rewrite this in inline assembly? */
    5238     uint8_t const *pb = (uint8_t const *)pv;
    5239     for (; cb; cb--, pb++)
    5240         if (RT_UNLIKELY(*pb != u8))
    5241             return (void *)pb;
    5242     return NULL;
    5243 }
    5244 #endif
    5245 
    5246 
    5247 /**
    5248  * Checks if a memory block is filled with the specified 32-bit value.
    5249  *
    5250  * This is a sort of inverted memchr.
    5251  *
    5252  * @returns Pointer to the first value which doesn't equal u32.
    5253  * @returns NULL if all equal to u32.
    5254  *
    5255  * @param   pv      Pointer to the memory block.
    5256  * @param   cb      Number of bytes in the block. This MUST be aligned on 32-bit!
    5257  * @param   u32     The value it's supposed to be filled with.
    5258  *
    5259  * @todo Fix name, it is a predicate function but it's not returning boolean!
    5260  */
    5261 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    5262 DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
    5263 #else
    5264 DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
    5265 {
    5266 /** @todo rewrite this in inline assembly? */
    5267     uint32_t const *pu32 = (uint32_t const *)pv;
    5268     for (; cb; cb -= 4, pu32++)
    5269         if (RT_UNLIKELY(*pu32 != u32))
    5270             return (uint32_t *)pu32;
    5271     return NULL;
    5272 }
    5273 #endif
    5274 
     2610/** @name Interger Math Optimizations
     2611 * @{ */
    52752612
    52762613/**
     
    55532890#endif
    55542891
    5555 
    5556 /**
    5557  * Probes a byte pointer for read access.
    5558  *
    5559  * While the function will not fault if the byte is not read accessible,
    5560  * the idea is to do this in a safe place like before acquiring locks
    5561  * and such like.
    5562  *
    5563  * Also, this functions guarantees that an eager compiler is not going
    5564  * to optimize the probing away.
    5565  *
    5566  * @param   pvByte      Pointer to the byte.
    5567  */
    5568 #if RT_INLINE_ASM_EXTERNAL
    5569 DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
    5570 #else
    5571 DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
    5572 {
    5573     /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
    5574     uint8_t u8;
    5575 # if RT_INLINE_ASM_GNU_STYLE
    5576     __asm__ __volatile__("movb (%1), %0\n\t"
    5577                          : "=r" (u8)
    5578                          : "r" (pvByte));
    5579 # else
    5580     __asm
    5581     {
    5582 #  ifdef RT_ARCH_AMD64
    5583         mov     rax, [pvByte]
    5584         mov     al, [rax]
    5585 #  else
    5586         mov     eax, [pvByte]
    5587         mov     al, [eax]
    5588 #  endif
    5589         mov     [u8], al
    5590     }
    5591 # endif
    5592     return u8;
    5593 }
    5594 #endif
    5595 
    5596 /**
    5597  * Probes a buffer for read access page by page.
    5598  *
    5599  * While the function will fault if the buffer is not fully read
    5600  * accessible, the idea is to do this in a safe place like before
    5601  * acquiring locks and such like.
    5602  *
    5603  * Also, this functions guarantees that an eager compiler is not going
    5604  * to optimize the probing away.
    5605  *
    5606  * @param   pvBuf       Pointer to the buffer.
    5607  * @param   cbBuf       The size of the buffer in bytes. Must be >= 1.
    5608  */
    5609 DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
    5610 {
    5611     /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
    5612     /* the first byte */
    5613     const uint8_t *pu8 = (const uint8_t *)pvBuf;
    5614     ASMProbeReadByte(pu8);
    5615 
    5616     /* the pages in between pages. */
    5617     while (cbBuf > /*PAGE_SIZE*/0x1000)
    5618     {
    5619         ASMProbeReadByte(pu8);
    5620         cbBuf -= /*PAGE_SIZE*/0x1000;
    5621         pu8   += /*PAGE_SIZE*/0x1000;
    5622     }
    5623 
    5624     /* the last byte */
    5625     ASMProbeReadByte(pu8 + cbBuf - 1);
    5626 }
    5627 
    5628 
    5629 /** @def ASMBreakpoint
    5630  * Debugger Breakpoint.
    5631  * @remark  In the gnu world we add a nop instruction after the int3 to
    5632  *          force gdb to remain at the int3 source line.
    5633  * @remark  The L4 kernel will try make sense of the breakpoint, thus the jmp.
    5634  * @internal
    5635  */
    5636 #if RT_INLINE_ASM_GNU_STYLE
    5637 # ifndef __L4ENV__
    5638 #  define ASMBreakpoint()       do { __asm__ __volatile__("int3\n\tnop"); } while (0)
    5639 # else
    5640 #  define ASMBreakpoint()       do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
    5641 # endif
    5642 #else
    5643 # define ASMBreakpoint()        __debugbreak()
    5644 #endif
    5645 
    5646 
    5647 
    5648 /** @defgroup grp_inline_bits   Bit Operations
    5649  * @{
    5650  */
    5651 
    5652 
    5653 /**
    5654  * Sets a bit in a bitmap.
    5655  *
    5656  * @param   pvBitmap    Pointer to the bitmap. This should be 32-bit aligned.
    5657  * @param   iBit        The bit to set.
    5658  *
    5659  * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
    5660  *          However, doing so will yield better performance as well as avoiding
    5661  *          traps accessing the last bits in the bitmap.
    5662  */
    5663 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    5664 DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
    5665 #else
    5666 DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
    5667 {
    5668 # if RT_INLINE_ASM_USES_INTRIN
    5669     _bittestandset((long *)pvBitmap, iBit);
    5670 
    5671 # elif RT_INLINE_ASM_GNU_STYLE
    5672     __asm__ __volatile__("btsl %1, %0"
    5673                          : "=m" (*(volatile long *)pvBitmap)
    5674                          : "Ir" (iBit),
    5675                            "m" (*(volatile long *)pvBitmap)
    5676                          : "memory");
    5677 # else
    5678     __asm
    5679     {
    5680 #  ifdef RT_ARCH_AMD64
    5681         mov     rax, [pvBitmap]
    5682         mov     edx, [iBit]
    5683         bts     [rax], edx
    5684 #  else
    5685         mov     eax, [pvBitmap]
    5686         mov     edx, [iBit]
    5687         bts     [eax], edx
    5688 #  endif
    5689     }
    5690 # endif
    5691 }
    5692 #endif
    5693 
    5694 
    5695 /**
    5696  * Atomically sets a bit in a bitmap, ordered.
    5697  *
    5698  * @param   pvBitmap    Pointer to the bitmap. Must be 32-bit aligned, otherwise
    5699  *                      the memory access isn't atomic!
    5700  * @param   iBit        The bit to set.
    5701  */
    5702 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    5703 DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
    5704 #else
    5705 DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
    5706 {
    5707     AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
    5708 # if RT_INLINE_ASM_USES_INTRIN
    5709     _interlockedbittestandset((long *)pvBitmap, iBit);
    5710 # elif RT_INLINE_ASM_GNU_STYLE
    5711     __asm__ __volatile__("lock; btsl %1, %0"
    5712                          : "=m" (*(volatile long *)pvBitmap)
    5713                          : "Ir" (iBit),
    5714                            "m" (*(volatile long *)pvBitmap)
    5715                          : "memory");
    5716 # else
    5717     __asm
    5718     {
    5719 #  ifdef RT_ARCH_AMD64
    5720         mov     rax, [pvBitmap]
    5721         mov     edx, [iBit]
    5722         lock bts [rax], edx
    5723 #  else
    5724         mov     eax, [pvBitmap]
    5725         mov     edx, [iBit]
    5726         lock bts [eax], edx
    5727 #  endif
    5728     }
    5729 # endif
    5730 }
    5731 #endif
    5732 
    5733 
    5734 /**
    5735  * Clears a bit in a bitmap.
    5736  *
    5737  * @param   pvBitmap    Pointer to the bitmap.
    5738  * @param   iBit        The bit to clear.
    5739  *
    5740  * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
    5741  *          However, doing so will yield better performance as well as avoiding
    5742  *          traps accessing the last bits in the bitmap.
    5743  */
    5744 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    5745 DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
    5746 #else
    5747 DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
    5748 {
    5749 # if RT_INLINE_ASM_USES_INTRIN
    5750     _bittestandreset((long *)pvBitmap, iBit);
    5751 
    5752 # elif RT_INLINE_ASM_GNU_STYLE
    5753     __asm__ __volatile__("btrl %1, %0"
    5754                          : "=m" (*(volatile long *)pvBitmap)
    5755                          : "Ir" (iBit),
    5756                            "m" (*(volatile long *)pvBitmap)
    5757                          : "memory");
    5758 # else
    5759     __asm
    5760     {
    5761 #  ifdef RT_ARCH_AMD64
    5762         mov     rax, [pvBitmap]
    5763         mov     edx, [iBit]
    5764         btr     [rax], edx
    5765 #  else
    5766         mov     eax, [pvBitmap]
    5767         mov     edx, [iBit]
    5768         btr     [eax], edx
    5769 #  endif
    5770     }
    5771 # endif
    5772 }
    5773 #endif
    5774 
    5775 
    5776 /**
    5777  * Atomically clears a bit in a bitmap, ordered.
    5778  *
    5779  * @param   pvBitmap    Pointer to the bitmap. Must be 32-bit aligned, otherwise
    5780  *                      the memory access isn't atomic!
    5781  * @param   iBit        The bit to toggle set.
    5782  * @remarks No memory barrier, take care on smp.
    5783  */
    5784 #if RT_INLINE_ASM_EXTERNAL
    5785 DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
    5786 #else
    5787 DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
    5788 {
    5789     AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
    5790 # if RT_INLINE_ASM_GNU_STYLE
    5791     __asm__ __volatile__("lock; btrl %1, %0"
    5792                          : "=m" (*(volatile long *)pvBitmap)
    5793                          : "Ir" (iBit),
    5794                            "m" (*(volatile long *)pvBitmap)
    5795                          : "memory");
    5796 # else
    5797     __asm
    5798     {
    5799 #  ifdef RT_ARCH_AMD64
    5800         mov     rax, [pvBitmap]
    5801         mov     edx, [iBit]
    5802         lock btr [rax], edx
    5803 #  else
    5804         mov     eax, [pvBitmap]
    5805         mov     edx, [iBit]
    5806         lock btr [eax], edx
    5807 #  endif
    5808     }
    5809 # endif
    5810 }
    5811 #endif
    5812 
    5813 
    5814 /**
    5815  * Toggles a bit in a bitmap.
    5816  *
    5817  * @param   pvBitmap    Pointer to the bitmap.
    5818  * @param   iBit        The bit to toggle.
    5819  *
    5820  * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
    5821  *          However, doing so will yield better performance as well as avoiding
    5822  *          traps accessing the last bits in the bitmap.
    5823  */
    5824 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    5825 DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
    5826 #else
    5827 DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
    5828 {
    5829 # if RT_INLINE_ASM_USES_INTRIN
    5830     _bittestandcomplement((long *)pvBitmap, iBit);
    5831 # elif RT_INLINE_ASM_GNU_STYLE
    5832     __asm__ __volatile__("btcl %1, %0"
    5833                          : "=m" (*(volatile long *)pvBitmap)
    5834                          : "Ir" (iBit),
    5835                            "m" (*(volatile long *)pvBitmap)
    5836                          : "memory");
    5837 # else
    5838     __asm
    5839     {
    5840 #  ifdef RT_ARCH_AMD64
    5841         mov     rax, [pvBitmap]
    5842         mov     edx, [iBit]
    5843         btc     [rax], edx
    5844 #  else
    5845         mov     eax, [pvBitmap]
    5846         mov     edx, [iBit]
    5847         btc     [eax], edx
    5848 #  endif
    5849     }
    5850 # endif
    5851 }
    5852 #endif
    5853 
    5854 
    5855 /**
    5856  * Atomically toggles a bit in a bitmap, ordered.
    5857  *
    5858  * @param   pvBitmap    Pointer to the bitmap. Must be 32-bit aligned, otherwise
    5859  *                      the memory access isn't atomic!
    5860  * @param   iBit        The bit to test and set.
    5861  */
    5862 #if RT_INLINE_ASM_EXTERNAL
    5863 DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
    5864 #else
    5865 DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
    5866 {
    5867     AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
    5868 # if RT_INLINE_ASM_GNU_STYLE
    5869     __asm__ __volatile__("lock; btcl %1, %0"
    5870                          : "=m" (*(volatile long *)pvBitmap)
    5871                          : "Ir" (iBit),
    5872                            "m" (*(volatile long *)pvBitmap)
    5873                          : "memory");
    5874 # else
    5875     __asm
    5876     {
    5877 #  ifdef RT_ARCH_AMD64
    5878         mov     rax, [pvBitmap]
    5879         mov     edx, [iBit]
    5880         lock btc [rax], edx
    5881 #  else
    5882         mov     eax, [pvBitmap]
    5883         mov     edx, [iBit]
    5884         lock btc [eax], edx
    5885 #  endif
    5886     }
    5887 # endif
    5888 }
    5889 #endif
    5890 
    5891 
    5892 /**
    5893  * Tests and sets a bit in a bitmap.
    5894  *
    5895  * @returns true if the bit was set.
    5896  * @returns false if the bit was clear.
    5897  *
    5898  * @param   pvBitmap    Pointer to the bitmap.
    5899  * @param   iBit        The bit to test and set.
    5900  *
    5901  * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
    5902  *          However, doing so will yield better performance as well as avoiding
    5903  *          traps accessing the last bits in the bitmap.
    5904  */
    5905 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    5906 DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
    5907 #else
    5908 DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
    5909 {
    5910     union { bool f; uint32_t u32; uint8_t u8; } rc;
    5911 # if RT_INLINE_ASM_USES_INTRIN
    5912     rc.u8 = _bittestandset((long *)pvBitmap, iBit);
    5913 
    5914 # elif RT_INLINE_ASM_GNU_STYLE
    5915     __asm__ __volatile__("btsl %2, %1\n\t"
    5916                          "setc %b0\n\t"
    5917                          "andl $1, %0\n\t"
    5918                          : "=q" (rc.u32),
    5919                            "=m" (*(volatile long *)pvBitmap)
    5920                          : "Ir" (iBit),
    5921                            "m" (*(volatile long *)pvBitmap)
    5922                          : "memory");
    5923 # else
    5924     __asm
    5925     {
    5926         mov     edx, [iBit]
    5927 #  ifdef RT_ARCH_AMD64
    5928         mov     rax, [pvBitmap]
    5929         bts     [rax], edx
    5930 #  else
    5931         mov     eax, [pvBitmap]
    5932         bts     [eax], edx
    5933 #  endif
    5934         setc    al
    5935         and     eax, 1
    5936         mov     [rc.u32], eax
    5937     }
    5938 # endif
    5939     return rc.f;
    5940 }
    5941 #endif
    5942 
    5943 
    5944 /**
    5945  * Atomically tests and sets a bit in a bitmap, ordered.
    5946  *
    5947  * @returns true if the bit was set.
    5948  * @returns false if the bit was clear.
    5949  *
    5950  * @param   pvBitmap    Pointer to the bitmap. Must be 32-bit aligned, otherwise
    5951  *                      the memory access isn't atomic!
    5952  * @param   iBit        The bit to set.
    5953  */
    5954 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    5955 DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
    5956 #else
    5957 DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
    5958 {
    5959     union { bool f; uint32_t u32; uint8_t u8; } rc;
    5960     AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
    5961 # if RT_INLINE_ASM_USES_INTRIN
    5962     rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
    5963 # elif RT_INLINE_ASM_GNU_STYLE
    5964     __asm__ __volatile__("lock; btsl %2, %1\n\t"
    5965                          "setc %b0\n\t"
    5966                          "andl $1, %0\n\t"
    5967                          : "=q" (rc.u32),
    5968                            "=m" (*(volatile long *)pvBitmap)
    5969                          : "Ir" (iBit),
    5970                            "m" (*(volatile long *)pvBitmap)
    5971                          : "memory");
    5972 # else
    5973     __asm
    5974     {
    5975         mov     edx, [iBit]
    5976 #  ifdef RT_ARCH_AMD64
    5977         mov     rax, [pvBitmap]
    5978         lock bts [rax], edx
    5979 #  else
    5980         mov     eax, [pvBitmap]
    5981         lock bts [eax], edx
    5982 #  endif
    5983         setc    al
    5984         and     eax, 1
    5985         mov     [rc.u32], eax
    5986     }
    5987 # endif
    5988     return rc.f;
    5989 }
    5990 #endif
    5991 
    5992 
    5993 /**
    5994  * Tests and clears a bit in a bitmap.
    5995  *
    5996  * @returns true if the bit was set.
    5997  * @returns false if the bit was clear.
    5998  *
    5999  * @param   pvBitmap    Pointer to the bitmap.
    6000  * @param   iBit        The bit to test and clear.
    6001  *
    6002  * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
    6003  *          However, doing so will yield better performance as well as avoiding
    6004  *          traps accessing the last bits in the bitmap.
    6005  */
    6006 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    6007 DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
    6008 #else
    6009 DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
    6010 {
    6011     union { bool f; uint32_t u32; uint8_t u8; } rc;
    6012 # if RT_INLINE_ASM_USES_INTRIN
    6013     rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
    6014 
    6015 # elif RT_INLINE_ASM_GNU_STYLE
    6016     __asm__ __volatile__("btrl %2, %1\n\t"
    6017                          "setc %b0\n\t"
    6018                          "andl $1, %0\n\t"
    6019                          : "=q" (rc.u32),
    6020                            "=m" (*(volatile long *)pvBitmap)
    6021                          : "Ir" (iBit),
    6022                            "m" (*(volatile long *)pvBitmap)
    6023                          : "memory");
    6024 # else
    6025     __asm
    6026     {
    6027         mov     edx, [iBit]
    6028 #  ifdef RT_ARCH_AMD64
    6029         mov     rax, [pvBitmap]
    6030         btr     [rax], edx
    6031 #  else
    6032         mov     eax, [pvBitmap]
    6033         btr     [eax], edx
    6034 #  endif
    6035         setc    al
    6036         and     eax, 1
    6037         mov     [rc.u32], eax
    6038     }
    6039 # endif
    6040     return rc.f;
    6041 }
    6042 #endif
    6043 
    6044 
    6045 /**
    6046  * Atomically tests and clears a bit in a bitmap, ordered.
    6047  *
    6048  * @returns true if the bit was set.
    6049  * @returns false if the bit was clear.
    6050  *
    6051  * @param   pvBitmap    Pointer to the bitmap. Must be 32-bit aligned, otherwise
    6052  *                      the memory access isn't atomic!
    6053  * @param   iBit        The bit to test and clear.
    6054  *
    6055  * @remarks No memory barrier, take care on smp.
    6056  */
    6057 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    6058 DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
    6059 #else
    6060 DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
    6061 {
    6062     union { bool f; uint32_t u32; uint8_t u8; } rc;
    6063     AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
    6064 # if RT_INLINE_ASM_USES_INTRIN
    6065     rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
    6066 
    6067 # elif RT_INLINE_ASM_GNU_STYLE
    6068     __asm__ __volatile__("lock; btrl %2, %1\n\t"
    6069                          "setc %b0\n\t"
    6070                          "andl $1, %0\n\t"
    6071                          : "=q" (rc.u32),
    6072                            "=m" (*(volatile long *)pvBitmap)
    6073                          : "Ir" (iBit),
    6074                            "m" (*(volatile long *)pvBitmap)
    6075                          : "memory");
    6076 # else
    6077     __asm
    6078     {
    6079         mov     edx, [iBit]
    6080 #  ifdef RT_ARCH_AMD64
    6081         mov     rax, [pvBitmap]
    6082         lock btr [rax], edx
    6083 #  else
    6084         mov     eax, [pvBitmap]
    6085         lock btr [eax], edx
    6086 #  endif
    6087         setc    al
    6088         and     eax, 1
    6089         mov     [rc.u32], eax
    6090     }
    6091 # endif
    6092     return rc.f;
    6093 }
    6094 #endif
    6095 
    6096 
    6097 /**
    6098  * Tests and toggles a bit in a bitmap.
    6099  *
    6100  * @returns true if the bit was set.
    6101  * @returns false if the bit was clear.
    6102  *
    6103  * @param   pvBitmap    Pointer to the bitmap.
    6104  * @param   iBit        The bit to test and toggle.
    6105  *
    6106  * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
    6107  *          However, doing so will yield better performance as well as avoiding
    6108  *          traps accessing the last bits in the bitmap.
    6109  */
    6110 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    6111 DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
    6112 #else
    6113 DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
    6114 {
    6115     union { bool f; uint32_t u32; uint8_t u8; } rc;
    6116 # if RT_INLINE_ASM_USES_INTRIN
    6117     rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
    6118 
    6119 # elif RT_INLINE_ASM_GNU_STYLE
    6120     __asm__ __volatile__("btcl %2, %1\n\t"
    6121                          "setc %b0\n\t"
    6122                          "andl $1, %0\n\t"
    6123                          : "=q" (rc.u32),
    6124                            "=m" (*(volatile long *)pvBitmap)
    6125                          : "Ir" (iBit),
    6126                            "m" (*(volatile long *)pvBitmap)
    6127                          : "memory");
    6128 # else
    6129     __asm
    6130     {
    6131         mov   edx, [iBit]
    6132 #  ifdef RT_ARCH_AMD64
    6133         mov   rax, [pvBitmap]
    6134         btc   [rax], edx
    6135 #  else
    6136         mov   eax, [pvBitmap]
    6137         btc   [eax], edx
    6138 #  endif
    6139         setc  al
    6140         and   eax, 1
    6141         mov   [rc.u32], eax
    6142     }
    6143 # endif
    6144     return rc.f;
    6145 }
    6146 #endif
    6147 
    6148 
    6149 /**
    6150  * Atomically tests and toggles a bit in a bitmap, ordered.
    6151  *
    6152  * @returns true if the bit was set.
    6153  * @returns false if the bit was clear.
    6154  *
    6155  * @param   pvBitmap    Pointer to the bitmap. Must be 32-bit aligned, otherwise
    6156  *                      the memory access isn't atomic!
    6157  * @param   iBit        The bit to test and toggle.
    6158  */
    6159 #if RT_INLINE_ASM_EXTERNAL
    6160 DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
    6161 #else
    6162 DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
    6163 {
    6164     union { bool f; uint32_t u32; uint8_t u8; } rc;
    6165     AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
    6166 # if RT_INLINE_ASM_GNU_STYLE
    6167     __asm__ __volatile__("lock; btcl %2, %1\n\t"
    6168                          "setc %b0\n\t"
    6169                          "andl $1, %0\n\t"
    6170                          : "=q" (rc.u32),
    6171                            "=m" (*(volatile long *)pvBitmap)
    6172                          : "Ir" (iBit),
    6173                            "m" (*(volatile long *)pvBitmap)
    6174                          : "memory");
    6175 # else
    6176     __asm
    6177     {
    6178         mov     edx, [iBit]
    6179 #  ifdef RT_ARCH_AMD64
    6180         mov     rax, [pvBitmap]
    6181         lock btc [rax], edx
    6182 #  else
    6183         mov     eax, [pvBitmap]
    6184         lock btc [eax], edx
    6185 #  endif
    6186         setc    al
    6187         and     eax, 1
    6188         mov     [rc.u32], eax
    6189     }
    6190 # endif
    6191     return rc.f;
    6192 }
    6193 #endif
    6194 
    6195 
    6196 /**
    6197  * Tests if a bit in a bitmap is set.
    6198  *
    6199  * @returns true if the bit is set.
    6200  * @returns false if the bit is clear.
    6201  *
    6202  * @param   pvBitmap    Pointer to the bitmap.
    6203  * @param   iBit        The bit to test.
    6204  *
    6205  * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
    6206  *          However, doing so will yield better performance as well as avoiding
    6207  *          traps accessing the last bits in the bitmap.
    6208  */
    6209 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    6210 DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
    6211 #else
    6212 DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
    6213 {
    6214     union { bool f; uint32_t u32; uint8_t u8; } rc;
    6215 # if RT_INLINE_ASM_USES_INTRIN
    6216     rc.u32 = _bittest((long *)pvBitmap, iBit);
    6217 # elif RT_INLINE_ASM_GNU_STYLE
    6218 
    6219     __asm__ __volatile__("btl %2, %1\n\t"
    6220                          "setc %b0\n\t"
    6221                          "andl $1, %0\n\t"
    6222                          : "=q" (rc.u32)
    6223                          : "m" (*(const volatile long *)pvBitmap),
    6224                            "Ir" (iBit)
    6225                          : "memory");
    6226 # else
    6227     __asm
    6228     {
    6229         mov   edx, [iBit]
    6230 #  ifdef RT_ARCH_AMD64
    6231         mov   rax, [pvBitmap]
    6232         bt    [rax], edx
    6233 #  else
    6234         mov   eax, [pvBitmap]
    6235         bt    [eax], edx
    6236 #  endif
    6237         setc  al
    6238         and   eax, 1
    6239         mov   [rc.u32], eax
    6240     }
    6241 # endif
    6242     return rc.f;
    6243 }
    6244 #endif
    6245 
    6246 
    6247 /**
    6248  * Clears a bit range within a bitmap.
    6249  *
    6250  * @param   pvBitmap    Pointer to the bitmap.
    6251  * @param   iBitStart   The First bit to clear.
    6252  * @param   iBitEnd     The first bit not to clear.
    6253  */
    6254 DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
    6255 {
    6256     if (iBitStart < iBitEnd)
    6257     {
    6258         volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
    6259         int iStart = iBitStart & ~31;
    6260         int iEnd   = iBitEnd & ~31;
    6261         if (iStart == iEnd)
    6262             *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
    6263         else
    6264         {
    6265             /* bits in first dword. */
    6266             if (iBitStart & 31)
    6267             {
    6268                 *pu32 &= (1 << (iBitStart & 31)) - 1;
    6269                 pu32++;
    6270                 iBitStart = iStart + 32;
    6271             }
    6272 
    6273             /* whole dword. */
    6274             if (iBitStart != iEnd)
    6275                 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
    6276 
    6277             /* bits in last dword. */
    6278             if (iBitEnd & 31)
    6279             {
    6280                 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
    6281                 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
    6282             }
    6283         }
    6284     }
    6285 }
    6286 
    6287 
    6288 /**
    6289  * Sets a bit range within a bitmap.
    6290  *
    6291  * @param   pvBitmap    Pointer to the bitmap.
    6292  * @param   iBitStart   The First bit to set.
    6293  * @param   iBitEnd     The first bit not to set.
    6294  */
    6295 DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
    6296 {
    6297     if (iBitStart < iBitEnd)
    6298     {
    6299         volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
    6300         int iStart = iBitStart & ~31;
    6301         int iEnd   = iBitEnd & ~31;
    6302         if (iStart == iEnd)
    6303             *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
    6304         else
    6305         {
    6306             /* bits in first dword. */
    6307             if (iBitStart & 31)
    6308             {
    6309                 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
    6310                 pu32++;
    6311                 iBitStart = iStart + 32;
    6312             }
    6313 
    6314             /* whole dword. */
    6315             if (iBitStart != iEnd)
    6316                 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
    6317 
    6318             /* bits in last dword. */
    6319             if (iBitEnd & 31)
    6320             {
    6321                 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
    6322                 *pu32 |= (1 << (iBitEnd & 31)) - 1;
    6323             }
    6324         }
    6325     }
    6326 }
    6327 
    6328 
    6329 /**
    6330  * Finds the first clear bit in a bitmap.
    6331  *
    6332  * @returns Index of the first zero bit.
    6333  * @returns -1 if no clear bit was found.
    6334  * @param   pvBitmap    Pointer to the bitmap.
    6335  * @param   cBits       The number of bits in the bitmap. Multiple of 32.
    6336  */
    6337 #if RT_INLINE_ASM_EXTERNAL
    6338 DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
    6339 #else
    6340 DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
    6341 {
    6342     if (cBits)
    6343     {
    6344         int32_t iBit;
    6345 # if RT_INLINE_ASM_GNU_STYLE
    6346         RTCCUINTREG uEAX, uECX, uEDI;
    6347         cBits = RT_ALIGN_32(cBits, 32);
    6348         __asm__ __volatile__("repe; scasl\n\t"
    6349                              "je    1f\n\t"
    6350 #  ifdef RT_ARCH_AMD64
    6351                              "lea   -4(%%rdi), %%rdi\n\t"
    6352                              "xorl  (%%rdi), %%eax\n\t"
    6353                              "subq  %5, %%rdi\n\t"
    6354 #  else
    6355                              "lea   -4(%%edi), %%edi\n\t"
    6356                              "xorl  (%%edi), %%eax\n\t"
    6357                              "subl  %5, %%edi\n\t"
    6358 #  endif
    6359                              "shll  $3, %%edi\n\t"
    6360                              "bsfl  %%eax, %%edx\n\t"
    6361                              "addl  %%edi, %%edx\n\t"
    6362                              "1:\t\n"
    6363                              : "=d" (iBit),
    6364                                "=&c" (uECX),
    6365                                "=&D" (uEDI),
    6366                                "=&a" (uEAX)
    6367                              : "0" (0xffffffff),
    6368                                "mr" (pvBitmap),
    6369                                "1" (cBits >> 5),
    6370                                "2" (pvBitmap),
    6371                                "3" (0xffffffff));
    6372 # else
    6373         cBits = RT_ALIGN_32(cBits, 32);
    6374         __asm
    6375         {
    6376 #  ifdef RT_ARCH_AMD64
    6377             mov     rdi, [pvBitmap]
    6378             mov     rbx, rdi
    6379 #  else
    6380             mov     edi, [pvBitmap]
    6381             mov     ebx, edi
    6382 #  endif
    6383             mov     edx, 0ffffffffh
    6384             mov     eax, edx
    6385             mov     ecx, [cBits]
    6386             shr     ecx, 5
    6387             repe    scasd
    6388             je      done
    6389 
    6390 #  ifdef RT_ARCH_AMD64
    6391             lea     rdi, [rdi - 4]
    6392             xor     eax, [rdi]
    6393             sub     rdi, rbx
    6394 #  else
    6395             lea     edi, [edi - 4]
    6396             xor     eax, [edi]
    6397             sub     edi, ebx
    6398 #  endif
    6399             shl     edi, 3
    6400             bsf     edx, eax
    6401             add     edx, edi
    6402         done:
    6403             mov     [iBit], edx
    6404         }
    6405 # endif
    6406         return iBit;
    6407     }
    6408     return -1;
    6409 }
    6410 #endif
    6411 
    6412 
    6413 /**
    6414  * Finds the next clear bit in a bitmap.
    6415  *
    6416  * @returns Index of the first zero bit.
    6417  * @returns -1 if no clear bit was found.
    6418  * @param   pvBitmap    Pointer to the bitmap.
    6419  * @param   cBits       The number of bits in the bitmap. Multiple of 32.
    6420  * @param   iBitPrev    The bit returned from the last search.
    6421  *                      The search will start at iBitPrev + 1.
    6422  */
    6423 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    6424 DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
    6425 #else
    6426 DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
    6427 {
    6428     const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
    6429     int                      iBit = ++iBitPrev & 31;
    6430     if (iBit)
    6431     {
    6432         /*
    6433          * Inspect the 32-bit word containing the unaligned bit.
    6434          */
    6435         uint32_t  u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
    6436 
    6437 # if RT_INLINE_ASM_USES_INTRIN
    6438         unsigned long ulBit = 0;
    6439         if (_BitScanForward(&ulBit, u32))
    6440             return ulBit + iBitPrev;
    6441 # else
    6442 #  if RT_INLINE_ASM_GNU_STYLE
    6443         __asm__ __volatile__("bsf %1, %0\n\t"
    6444                              "jnz 1f\n\t"
    6445                              "movl $-1, %0\n\t"
    6446                              "1:\n\t"
    6447                              : "=r" (iBit)
    6448                              : "r" (u32));
    6449 #  else
    6450         __asm
    6451         {
    6452             mov     edx, [u32]
    6453             bsf     eax, edx
    6454             jnz     done
    6455             mov     eax, 0ffffffffh
    6456         done:
    6457             mov     [iBit], eax
    6458         }
    6459 #  endif
    6460         if (iBit >= 0)
    6461             return iBit + iBitPrev;
    6462 # endif
    6463 
    6464         /*
    6465          * Skip ahead and see if there is anything left to search.
    6466          */
    6467         iBitPrev |= 31;
    6468         iBitPrev++;
    6469         if (cBits <= (uint32_t)iBitPrev)
    6470             return -1;
    6471     }
    6472 
    6473     /*
    6474      * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
    6475      */
    6476     iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
    6477     if (iBit >= 0)
    6478         iBit += iBitPrev;
    6479     return iBit;
    6480 }
    6481 #endif
    6482 
    6483 
    6484 /**
    6485  * Finds the first set bit in a bitmap.
    6486  *
    6487  * @returns Index of the first set bit.
    6488  * @returns -1 if no clear bit was found.
    6489  * @param   pvBitmap    Pointer to the bitmap.
    6490  * @param   cBits       The number of bits in the bitmap. Multiple of 32.
    6491  */
    6492 #if RT_INLINE_ASM_EXTERNAL
    6493 DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
    6494 #else
    6495 DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
    6496 {
    6497     if (cBits)
    6498     {
    6499         int32_t iBit;
    6500 # if RT_INLINE_ASM_GNU_STYLE
    6501         RTCCUINTREG uEAX, uECX, uEDI;
    6502         cBits = RT_ALIGN_32(cBits, 32);
    6503         __asm__ __volatile__("repe; scasl\n\t"
    6504                              "je    1f\n\t"
    6505 #  ifdef RT_ARCH_AMD64
    6506                              "lea   -4(%%rdi), %%rdi\n\t"
    6507                              "movl  (%%rdi), %%eax\n\t"
    6508                              "subq  %5, %%rdi\n\t"
    6509 #  else
    6510                              "lea   -4(%%edi), %%edi\n\t"
    6511                              "movl  (%%edi), %%eax\n\t"
    6512                              "subl  %5, %%edi\n\t"
    6513 #  endif
    6514                              "shll  $3, %%edi\n\t"
    6515                              "bsfl  %%eax, %%edx\n\t"
    6516                              "addl  %%edi, %%edx\n\t"
    6517                              "1:\t\n"
    6518                              : "=d" (iBit),
    6519                                "=&c" (uECX),
    6520                                "=&D" (uEDI),
    6521                                "=&a" (uEAX)
    6522                              : "0" (0xffffffff),
    6523                                "mr" (pvBitmap),
    6524                                "1" (cBits >> 5),
    6525                                "2" (pvBitmap),
    6526                                "3" (0));
    6527 # else
    6528         cBits = RT_ALIGN_32(cBits, 32);
    6529         __asm
    6530         {
    6531 #  ifdef RT_ARCH_AMD64
    6532             mov     rdi, [pvBitmap]
    6533             mov     rbx, rdi
    6534 #  else
    6535             mov     edi, [pvBitmap]
    6536             mov     ebx, edi
    6537 #  endif
    6538             mov     edx, 0ffffffffh
    6539             xor     eax, eax
    6540             mov     ecx, [cBits]
    6541             shr     ecx, 5
    6542             repe    scasd
    6543             je      done
    6544 #  ifdef RT_ARCH_AMD64
    6545             lea     rdi, [rdi - 4]
    6546             mov     eax, [rdi]
    6547             sub     rdi, rbx
    6548 #  else
    6549             lea     edi, [edi - 4]
    6550             mov     eax, [edi]
    6551             sub     edi, ebx
    6552 #  endif
    6553             shl     edi, 3
    6554             bsf     edx, eax
    6555             add     edx, edi
    6556         done:
    6557             mov   [iBit], edx
    6558         }
    6559 # endif
    6560         return iBit;
    6561     }
    6562     return -1;
    6563 }
    6564 #endif
    6565 
    6566 
    6567 /**
    6568  * Finds the next set bit in a bitmap.
    6569  *
    6570  * @returns Index of the next set bit.
    6571  * @returns -1 if no set bit was found.
    6572  * @param   pvBitmap    Pointer to the bitmap.
    6573  * @param   cBits       The number of bits in the bitmap. Multiple of 32.
    6574  * @param   iBitPrev    The bit returned from the last search.
    6575  *                      The search will start at iBitPrev + 1.
    6576  */
    6577 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    6578 DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
    6579 #else
    6580 DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
    6581 {
    6582     const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
    6583     int                      iBit = ++iBitPrev & 31;
    6584     if (iBit)
    6585     {
    6586         /*
    6587          * Inspect the 32-bit word containing the unaligned bit.
    6588          */
    6589         uint32_t  u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
    6590 
    6591 # if RT_INLINE_ASM_USES_INTRIN
    6592         unsigned long ulBit = 0;
    6593         if (_BitScanForward(&ulBit, u32))
    6594             return ulBit + iBitPrev;
    6595 # else
    6596 #  if RT_INLINE_ASM_GNU_STYLE
    6597         __asm__ __volatile__("bsf %1, %0\n\t"
    6598                              "jnz 1f\n\t"
    6599                              "movl $-1, %0\n\t"
    6600                              "1:\n\t"
    6601                              : "=r" (iBit)
    6602                              : "r" (u32));
    6603 #  else
    6604         __asm
    6605         {
    6606             mov     edx, [u32]
    6607             bsf     eax, edx
    6608             jnz     done
    6609             mov     eax, 0ffffffffh
    6610         done:
    6611             mov     [iBit], eax
    6612         }
    6613 #  endif
    6614         if (iBit >= 0)
    6615             return iBit + iBitPrev;
    6616 # endif
    6617 
    6618         /*
    6619          * Skip ahead and see if there is anything left to search.
    6620          */
    6621         iBitPrev |= 31;
    6622         iBitPrev++;
    6623         if (cBits <= (uint32_t)iBitPrev)
    6624             return -1;
    6625     }
    6626 
    6627     /*
    6628      * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
    6629      */
    6630     iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
    6631     if (iBit >= 0)
    6632         iBit += iBitPrev;
    6633     return iBit;
    6634 }
    6635 #endif
    6636 
    6637 
    6638 /**
    6639  * Finds the first bit which is set in the given 32-bit integer.
    6640  * Bits are numbered from 1 (least significant) to 32.
    6641  *
    6642  * @returns index [1..32] of the first set bit.
    6643  * @returns 0 if all bits are cleared.
    6644  * @param   u32     Integer to search for set bits.
    6645  * @remark  Similar to ffs() in BSD.
    6646  */
    6647 DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
    6648 {
    6649 # if RT_INLINE_ASM_USES_INTRIN
    6650     unsigned long iBit;
    6651     if (_BitScanForward(&iBit, u32))
    6652         iBit++;
    6653     else
    6654         iBit = 0;
    6655 # elif RT_INLINE_ASM_GNU_STYLE
    6656     uint32_t iBit;
    6657     __asm__ __volatile__("bsf  %1, %0\n\t"
    6658                          "jnz  1f\n\t"
    6659                          "xorl %0, %0\n\t"
    6660                          "jmp  2f\n"
    6661                          "1:\n\t"
    6662                          "incl %0\n"
    6663                          "2:\n\t"
    6664                          : "=r" (iBit)
    6665                          : "rm" (u32));
    6666 # else
    6667     uint32_t iBit;
    6668     _asm
    6669     {
    6670         bsf     eax, [u32]
    6671         jnz     found
    6672         xor     eax, eax
    6673         jmp     done
    6674     found:
    6675         inc     eax
    6676     done:
    6677         mov     [iBit], eax
    6678     }
    6679 # endif
    6680     return iBit;
    6681 }
    6682 
    6683 
    6684 /**
    6685  * Finds the first bit which is set in the given 32-bit integer.
    6686  * Bits are numbered from 1 (least significant) to 32.
    6687  *
    6688  * @returns index [1..32] of the first set bit.
    6689  * @returns 0 if all bits are cleared.
    6690  * @param   i32     Integer to search for set bits.
    6691  * @remark  Similar to ffs() in BSD.
    6692  */
    6693 DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
    6694 {
    6695     return ASMBitFirstSetU32((uint32_t)i32);
    6696 }
    6697 
    6698 
    6699 /**
    6700  * Finds the last bit which is set in the given 32-bit integer.
    6701  * Bits are numbered from 1 (least significant) to 32.
    6702  *
    6703  * @returns index [1..32] of the last set bit.
    6704  * @returns 0 if all bits are cleared.
    6705  * @param   u32     Integer to search for set bits.
    6706  * @remark  Similar to fls() in BSD.
    6707  */
    6708 DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
    6709 {
    6710 # if RT_INLINE_ASM_USES_INTRIN
    6711     unsigned long iBit;
    6712     if (_BitScanReverse(&iBit, u32))
    6713         iBit++;
    6714     else
    6715         iBit = 0;
    6716 # elif RT_INLINE_ASM_GNU_STYLE
    6717     uint32_t iBit;
    6718     __asm__ __volatile__("bsrl %1, %0\n\t"
    6719                          "jnz   1f\n\t"
    6720                          "xorl %0, %0\n\t"
    6721                          "jmp  2f\n"
    6722                          "1:\n\t"
    6723                          "incl %0\n"
    6724                          "2:\n\t"
    6725                          : "=r" (iBit)
    6726                          : "rm" (u32));
    6727 # else
    6728     uint32_t iBit;
    6729     _asm
    6730     {
    6731         bsr     eax, [u32]
    6732         jnz     found
    6733         xor     eax, eax
    6734         jmp     done
    6735     found:
    6736         inc     eax
    6737     done:
    6738         mov     [iBit], eax
    6739     }
    6740 # endif
    6741     return iBit;
    6742 }
    6743 
    6744 
    6745 /**
    6746  * Finds the last bit which is set in the given 32-bit integer.
    6747  * Bits are numbered from 1 (least significant) to 32.
    6748  *
    6749  * @returns index [1..32] of the last set bit.
    6750  * @returns 0 if all bits are cleared.
    6751  * @param   i32     Integer to search for set bits.
    6752  * @remark  Similar to fls() in BSD.
    6753  */
    6754 DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
    6755 {
    6756     return ASMBitLastSetU32((uint32_t)i32);
    6757 }
    6758 
    6759 /**
    6760  * Reverse the byte order of the given 16-bit integer.
    6761  *
    6762  * @returns Revert
    6763  * @param   u16     16-bit integer value.
    6764  */
    6765 DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
    6766 {
    6767 #if RT_INLINE_ASM_USES_INTRIN
    6768     u16 = _byteswap_ushort(u16);
    6769 #elif RT_INLINE_ASM_GNU_STYLE
    6770     __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
    6771 #else
    6772     _asm
    6773     {
    6774         mov     ax, [u16]
    6775         ror     ax, 8
    6776         mov     [u16], ax
    6777     }
    6778 #endif
    6779     return u16;
    6780 }
    6781 
    6782 /**
    6783  * Reverse the byte order of the given 32-bit integer.
    6784  *
    6785  * @returns Revert
    6786  * @param   u32     32-bit integer value.
    6787  */
    6788 DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
    6789 {
    6790 #if RT_INLINE_ASM_USES_INTRIN
    6791     u32 = _byteswap_ulong(u32);
    6792 #elif RT_INLINE_ASM_GNU_STYLE
    6793     __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
    6794 #else
    6795     _asm
    6796     {
    6797         mov     eax, [u32]
    6798         bswap   eax
    6799         mov     [u32], eax
    6800     }
    6801 #endif
    6802     return u32;
    6803 }
    6804 
    6805 
    6806 /**
    6807  * Reverse the byte order of the given 64-bit integer.
    6808  *
    6809  * @returns Revert
    6810  * @param   u64     64-bit integer value.
    6811  */
    6812 DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
    6813 {
    6814 #if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
    6815     u64 = _byteswap_uint64(u64);
    6816 #else
    6817     u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
    6818         | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
    6819 #endif
    6820     return u64;
    6821 }
    6822 
    6823 
    68242892/** @} */
    68252893
    6826 
    68272894/** @} */
    68282895#endif
  • trunk/include/iprt/asm.h

    r28800 r29245  
    44
    55/*
    6  * Copyright (C) 2006-2007 Oracle Corporation
     6 * Copyright (C) 2006-2010 Oracle Corporation
    77 *
    88 * This file is part of VirtualBox Open Source Edition (OSE), as
     
    4848#  pragma intrinsic(_ReadWriteBarrier)
    4949#  pragma intrinsic(__cpuid)
    50 #  pragma intrinsic(_enable)
    51 #  pragma intrinsic(_disable)
    52 #  pragma intrinsic(__rdtsc)
    53 #  pragma intrinsic(__readmsr)
    54 #  pragma intrinsic(__writemsr)
    55 #  pragma intrinsic(__outbyte)
    56 #  pragma intrinsic(__outbytestring)
    57 #  pragma intrinsic(__outword)
    58 #  pragma intrinsic(__outwordstring)
    59 #  pragma intrinsic(__outdword)
    60 #  pragma intrinsic(__outdwordstring)
    61 #  pragma intrinsic(__inbyte)
    62 #  pragma intrinsic(__inbytestring)
    63 #  pragma intrinsic(__inword)
    64 #  pragma intrinsic(__inwordstring)
    65 #  pragma intrinsic(__indword)
    66 #  pragma intrinsic(__indwordstring)
    67 #  pragma intrinsic(__invlpg)
    68 #  pragma intrinsic(__wbinvd)
    6950#  pragma intrinsic(__stosd)
    7051#  pragma intrinsic(__stosw)
    7152#  pragma intrinsic(__stosb)
    72 #  pragma intrinsic(__readcr0)
    73 #  pragma intrinsic(__readcr2)
    74 #  pragma intrinsic(__readcr3)
    75 #  pragma intrinsic(__readcr4)
    76 #  pragma intrinsic(__writecr0)
    77 #  pragma intrinsic(__writecr3)
    78 #  pragma intrinsic(__writecr4)
    79 #  pragma intrinsic(__readdr)
    80 #  pragma intrinsic(__writedr)
    8153#  pragma intrinsic(_BitScanForward)
    8254#  pragma intrinsic(_BitScanReverse)
     
    9870#  pragma intrinsic(_InterlockedCompareExchange64)
    9971#  ifdef RT_ARCH_AMD64
    100 #   pragma intrinsic(_mm_mfence)
    101 #   pragma intrinsic(_mm_sfence)
    102 #   pragma intrinsic(_mm_lfence)
    10372#   pragma intrinsic(__stosq)
    104 #   pragma intrinsic(__readcr8)
    105 #   pragma intrinsic(__writecr8)
    10673#   pragma intrinsic(_byteswap_uint64)
    10774#   pragma intrinsic(_InterlockedExchange64)
     
    11481
    11582
    116 /** @defgroup grp_asm       ASM - Assembly Routines
     83/** @defgroup grp_rt_asm    ASM - Assembly Routines
    11784 * @ingroup grp_rt
    11885 *
     
    202169
    203170
    204 /** @todo find a more proper place for this structure? */
    205 #pragma pack(1)
    206 /** IDTR */
    207 typedef struct RTIDTR
    208 {
    209     /** Size of the IDT. */
    210     uint16_t    cbIdt;
    211     /** Address of the IDT. */
    212     uintptr_t   pIdt;
    213 } RTIDTR, *PRTIDTR;
    214 #pragma pack()
    215 
    216 #pragma pack(1)
    217 /** GDTR */
    218 typedef struct RTGDTR
    219 {
    220     /** Size of the GDT. */
    221     uint16_t    cbGdt;
    222     /** Address of the GDT. */
    223     uintptr_t   pGdt;
    224 } RTGDTR, *PRTGDTR;
    225 #pragma pack()
    226 
    227 
    228171/** @def ASMReturnAddress
    229172 * Gets the return address of the current (or calling if you like) function or method.
     
    244187
    245188/**
    246  * Gets the content of the IDTR CPU register.
    247  * @param   pIdtr   Where to store the IDTR contents.
    248  */
    249 #if RT_INLINE_ASM_EXTERNAL
    250 DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
    251 #else
    252 DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
    253 {
    254 # if RT_INLINE_ASM_GNU_STYLE
    255     __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
    256 # else
    257     __asm
    258     {
    259 #  ifdef RT_ARCH_AMD64
    260         mov     rax, [pIdtr]
    261         sidt    [rax]
    262 #  else
    263         mov     eax, [pIdtr]
    264         sidt    [eax]
    265 #  endif
    266     }
    267 # endif
    268 }
    269 #endif
    270 
    271 
    272 /**
    273  * Sets the content of the IDTR CPU register.
    274  * @param   pIdtr   Where to load the IDTR contents from
    275  */
    276 #if RT_INLINE_ASM_EXTERNAL
    277 DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
    278 #else
    279 DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
    280 {
    281 # if RT_INLINE_ASM_GNU_STYLE
    282     __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
    283 # else
    284     __asm
    285     {
    286 #  ifdef RT_ARCH_AMD64
    287         mov     rax, [pIdtr]
    288         lidt    [rax]
    289 #  else
    290         mov     eax, [pIdtr]
    291         lidt    [eax]
    292 #  endif
    293     }
    294 # endif
    295 }
    296 #endif
    297 
    298 
    299 /**
    300  * Gets the content of the GDTR CPU register.
    301  * @param   pGdtr   Where to store the GDTR contents.
    302  */
    303 #if RT_INLINE_ASM_EXTERNAL
    304 DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
    305 #else
    306 DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
    307 {
    308 # if RT_INLINE_ASM_GNU_STYLE
    309     __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
    310 # else
    311     __asm
    312     {
    313 #  ifdef RT_ARCH_AMD64
    314         mov     rax, [pGdtr]
    315         sgdt    [rax]
    316 #  else
    317         mov     eax, [pGdtr]
    318         sgdt    [eax]
    319 #  endif
    320     }
    321 # endif
    322 }
    323 #endif
    324 
    325 /**
    326  * Get the cs register.
    327  * @returns cs.
    328  */
    329 #if RT_INLINE_ASM_EXTERNAL
    330 DECLASM(RTSEL) ASMGetCS(void);
    331 #else
    332 DECLINLINE(RTSEL) ASMGetCS(void)
    333 {
    334     RTSEL SelCS;
    335 # if RT_INLINE_ASM_GNU_STYLE
    336     __asm__ __volatile__("movw  %%cs, %0\n\t" : "=r" (SelCS));
    337 # else
    338     __asm
    339     {
    340         mov     ax, cs
    341         mov     [SelCS], ax
    342     }
    343 # endif
    344     return SelCS;
    345 }
    346 #endif
    347 
    348 
    349 /**
    350  * Get the DS register.
    351  * @returns DS.
    352  */
    353 #if RT_INLINE_ASM_EXTERNAL
    354 DECLASM(RTSEL) ASMGetDS(void);
    355 #else
    356 DECLINLINE(RTSEL) ASMGetDS(void)
    357 {
    358     RTSEL SelDS;
    359 # if RT_INLINE_ASM_GNU_STYLE
    360     __asm__ __volatile__("movw  %%ds, %0\n\t" : "=r" (SelDS));
    361 # else
    362     __asm
    363     {
    364         mov     ax, ds
    365         mov     [SelDS], ax
    366     }
    367 # endif
    368     return SelDS;
    369 }
    370 #endif
    371 
    372 
    373 /**
    374  * Get the ES register.
    375  * @returns ES.
    376  */
    377 #if RT_INLINE_ASM_EXTERNAL
    378 DECLASM(RTSEL) ASMGetES(void);
    379 #else
    380 DECLINLINE(RTSEL) ASMGetES(void)
    381 {
    382     RTSEL SelES;
    383 # if RT_INLINE_ASM_GNU_STYLE
    384     __asm__ __volatile__("movw  %%es, %0\n\t" : "=r" (SelES));
    385 # else
    386     __asm
    387     {
    388         mov     ax, es
    389         mov     [SelES], ax
    390     }
    391 # endif
    392     return SelES;
    393 }
    394 #endif
    395 
    396 
    397 /**
    398  * Get the FS register.
    399  * @returns FS.
    400  */
    401 #if RT_INLINE_ASM_EXTERNAL
    402 DECLASM(RTSEL) ASMGetFS(void);
    403 #else
    404 DECLINLINE(RTSEL) ASMGetFS(void)
    405 {
    406     RTSEL SelFS;
    407 # if RT_INLINE_ASM_GNU_STYLE
    408     __asm__ __volatile__("movw  %%fs, %0\n\t" : "=r" (SelFS));
    409 # else
    410     __asm
    411     {
    412         mov     ax, fs
    413         mov     [SelFS], ax
    414     }
    415 # endif
    416     return SelFS;
    417 }
    418 # endif
    419 
    420 
    421 /**
    422  * Get the GS register.
    423  * @returns GS.
    424  */
    425 #if RT_INLINE_ASM_EXTERNAL
    426 DECLASM(RTSEL) ASMGetGS(void);
    427 #else
    428 DECLINLINE(RTSEL) ASMGetGS(void)
    429 {
    430     RTSEL SelGS;
    431 # if RT_INLINE_ASM_GNU_STYLE
    432     __asm__ __volatile__("movw  %%gs, %0\n\t" : "=r" (SelGS));
    433 # else
    434     __asm
    435     {
    436         mov     ax, gs
    437         mov     [SelGS], ax
    438     }
    439 # endif
    440     return SelGS;
    441 }
    442 #endif
    443 
    444 
    445 /**
    446  * Get the SS register.
    447  * @returns SS.
    448  */
    449 #if RT_INLINE_ASM_EXTERNAL
    450 DECLASM(RTSEL) ASMGetSS(void);
    451 #else
    452 DECLINLINE(RTSEL) ASMGetSS(void)
    453 {
    454     RTSEL SelSS;
    455 # if RT_INLINE_ASM_GNU_STYLE
    456     __asm__ __volatile__("movw  %%ss, %0\n\t" : "=r" (SelSS));
    457 # else
    458     __asm
    459     {
    460         mov     ax, ss
    461         mov     [SelSS], ax
    462     }
    463 # endif
    464     return SelSS;
    465 }
    466 #endif
    467 
    468 
    469 /**
    470  * Get the TR register.
    471  * @returns TR.
    472  */
    473 #if RT_INLINE_ASM_EXTERNAL
    474 DECLASM(RTSEL) ASMGetTR(void);
    475 #else
    476 DECLINLINE(RTSEL) ASMGetTR(void)
    477 {
    478     RTSEL SelTR;
    479 # if RT_INLINE_ASM_GNU_STYLE
    480     __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
    481 # else
    482     __asm
    483     {
    484         str     ax
    485         mov     [SelTR], ax
    486     }
    487 # endif
    488     return SelTR;
    489 }
    490 #endif
    491 
    492 
    493 /**
    494  * Get the [RE]FLAGS register.
    495  * @returns [RE]FLAGS.
    496  */
    497 #if RT_INLINE_ASM_EXTERNAL
    498 DECLASM(RTCCUINTREG) ASMGetFlags(void);
    499 #else
    500 DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
    501 {
    502     RTCCUINTREG uFlags;
    503 # if RT_INLINE_ASM_GNU_STYLE
    504 #  ifdef RT_ARCH_AMD64
    505     __asm__ __volatile__("pushfq\n\t"
    506                          "popq  %0\n\t"
    507                          : "=r" (uFlags));
    508 #  else
    509     __asm__ __volatile__("pushfl\n\t"
    510                          "popl  %0\n\t"
    511                          : "=r" (uFlags));
    512 #  endif
    513 # else
    514     __asm
    515     {
    516 #  ifdef RT_ARCH_AMD64
    517         pushfq
    518         pop  [uFlags]
    519 #  else
    520         pushfd
    521         pop  [uFlags]
    522 #  endif
    523     }
    524 # endif
    525     return uFlags;
    526 }
    527 #endif
    528 
    529 
    530 /**
    531  * Set the [RE]FLAGS register.
    532  * @param   uFlags      The new [RE]FLAGS value.
    533  */
    534 #if RT_INLINE_ASM_EXTERNAL
    535 DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
    536 #else
    537 DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
    538 {
    539 # if RT_INLINE_ASM_GNU_STYLE
    540 #  ifdef RT_ARCH_AMD64
    541     __asm__ __volatile__("pushq %0\n\t"
    542                          "popfq\n\t"
    543                          : : "g" (uFlags));
    544 #  else
    545     __asm__ __volatile__("pushl %0\n\t"
    546                          "popfl\n\t"
    547                          : : "g" (uFlags));
    548 #  endif
    549 # else
    550     __asm
    551     {
    552 #  ifdef RT_ARCH_AMD64
    553         push    [uFlags]
    554         popfq
    555 #  else
    556         push    [uFlags]
    557         popfd
    558 #  endif
    559     }
    560 # endif
    561 }
    562 #endif
    563 
    564 
    565 /**
    566  * Gets the content of the CPU timestamp counter register.
    567  *
    568  * @returns TSC.
    569  */
    570 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    571 DECLASM(uint64_t) ASMReadTSC(void);
    572 #else
    573 DECLINLINE(uint64_t) ASMReadTSC(void)
    574 {
    575     RTUINT64U u;
    576 # if RT_INLINE_ASM_GNU_STYLE
    577     __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
    578 # else
    579 #  if RT_INLINE_ASM_USES_INTRIN
    580     u.u = __rdtsc();
    581 #  else
    582     __asm
    583     {
    584         rdtsc
    585         mov     [u.s.Lo], eax
    586         mov     [u.s.Hi], edx
    587     }
    588 #  endif
    589 # endif
    590     return u.u;
    591 }
    592 #endif
    593 
    594 
    595 /**
    596  * Performs the cpuid instruction returning all registers.
    597  *
    598  * @param   uOperator   CPUID operation (eax).
    599  * @param   pvEAX       Where to store eax.
    600  * @param   pvEBX       Where to store ebx.
    601  * @param   pvECX       Where to store ecx.
    602  * @param   pvEDX       Where to store edx.
    603  * @remark  We're using void pointers to ease the use of special bitfield structures and such.
    604  */
    605 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    606 DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
    607 #else
    608 DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
    609 {
    610 # if RT_INLINE_ASM_GNU_STYLE
    611 #  ifdef RT_ARCH_AMD64
    612     RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
    613     __asm__ ("cpuid\n\t"
    614              : "=a" (uRAX),
    615                "=b" (uRBX),
    616                "=c" (uRCX),
    617                "=d" (uRDX)
    618              : "0" (uOperator));
    619     *(uint32_t *)pvEAX = (uint32_t)uRAX;
    620     *(uint32_t *)pvEBX = (uint32_t)uRBX;
    621     *(uint32_t *)pvECX = (uint32_t)uRCX;
    622     *(uint32_t *)pvEDX = (uint32_t)uRDX;
    623 #  else
    624     __asm__ ("xchgl %%ebx, %1\n\t"
    625              "cpuid\n\t"
    626              "xchgl %%ebx, %1\n\t"
    627              : "=a" (*(uint32_t *)pvEAX),
    628                "=r" (*(uint32_t *)pvEBX),
    629                "=c" (*(uint32_t *)pvECX),
    630                "=d" (*(uint32_t *)pvEDX)
    631              : "0" (uOperator));
    632 #  endif
    633 
    634 # elif RT_INLINE_ASM_USES_INTRIN
    635     int aInfo[4];
    636     __cpuid(aInfo, uOperator);
    637     *(uint32_t *)pvEAX = aInfo[0];
    638     *(uint32_t *)pvEBX = aInfo[1];
    639     *(uint32_t *)pvECX = aInfo[2];
    640     *(uint32_t *)pvEDX = aInfo[3];
    641 
    642 # else
    643     uint32_t    uEAX;
    644     uint32_t    uEBX;
    645     uint32_t    uECX;
    646     uint32_t    uEDX;
    647     __asm
    648     {
    649         push    ebx
    650         mov     eax, [uOperator]
    651         cpuid
    652         mov     [uEAX], eax
    653         mov     [uEBX], ebx
    654         mov     [uECX], ecx
    655         mov     [uEDX], edx
    656         pop     ebx
    657     }
    658     *(uint32_t *)pvEAX = uEAX;
    659     *(uint32_t *)pvEBX = uEBX;
    660     *(uint32_t *)pvECX = uECX;
    661     *(uint32_t *)pvEDX = uEDX;
    662 # endif
    663 }
    664 #endif
    665 
    666 
    667 /**
    668  * Performs the cpuid instruction returning all registers.
    669  * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
    670  *
    671  * @param   uOperator   CPUID operation (eax).
    672  * @param   uIdxECX     ecx index
    673  * @param   pvEAX       Where to store eax.
    674  * @param   pvEBX       Where to store ebx.
    675  * @param   pvECX       Where to store ecx.
    676  * @param   pvEDX       Where to store edx.
    677  * @remark  We're using void pointers to ease the use of special bitfield structures and such.
    678  */
    679 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    680 DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
    681 #else
    682 DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
    683 {
    684 # if RT_INLINE_ASM_GNU_STYLE
    685 #  ifdef RT_ARCH_AMD64
    686     RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
    687     __asm__ ("cpuid\n\t"
    688              : "=a" (uRAX),
    689                "=b" (uRBX),
    690                "=c" (uRCX),
    691                "=d" (uRDX)
    692              : "0" (uOperator),
    693                "2" (uIdxECX));
    694     *(uint32_t *)pvEAX = (uint32_t)uRAX;
    695     *(uint32_t *)pvEBX = (uint32_t)uRBX;
    696     *(uint32_t *)pvECX = (uint32_t)uRCX;
    697     *(uint32_t *)pvEDX = (uint32_t)uRDX;
    698 #  else
    699     __asm__ ("xchgl %%ebx, %1\n\t"
    700              "cpuid\n\t"
    701              "xchgl %%ebx, %1\n\t"
    702              : "=a" (*(uint32_t *)pvEAX),
    703                "=r" (*(uint32_t *)pvEBX),
    704                "=c" (*(uint32_t *)pvECX),
    705                "=d" (*(uint32_t *)pvEDX)
    706              : "0" (uOperator),
    707                "2" (uIdxECX));
    708 #  endif
    709 
    710 # elif RT_INLINE_ASM_USES_INTRIN
    711     int aInfo[4];
    712     /* ??? another intrinsic ??? */
    713     __cpuid(aInfo, uOperator);
    714     *(uint32_t *)pvEAX = aInfo[0];
    715     *(uint32_t *)pvEBX = aInfo[1];
    716     *(uint32_t *)pvECX = aInfo[2];
    717     *(uint32_t *)pvEDX = aInfo[3];
    718 
    719 # else
    720     uint32_t    uEAX;
    721     uint32_t    uEBX;
    722     uint32_t    uECX;
    723     uint32_t    uEDX;
    724     __asm
    725     {
    726         push    ebx
    727         mov     eax, [uOperator]
    728         mov     ecx, [uIdxECX]
    729         cpuid
    730         mov     [uEAX], eax
    731         mov     [uEBX], ebx
    732         mov     [uECX], ecx
    733         mov     [uEDX], edx
    734         pop     ebx
    735     }
    736     *(uint32_t *)pvEAX = uEAX;
    737     *(uint32_t *)pvEBX = uEBX;
    738     *(uint32_t *)pvECX = uECX;
    739     *(uint32_t *)pvEDX = uEDX;
    740 # endif
    741 }
    742 #endif
    743 
    744 
    745 /**
    746  * Performs the cpuid instruction returning ecx and edx.
    747  *
    748  * @param   uOperator   CPUID operation (eax).
    749  * @param   pvECX       Where to store ecx.
    750  * @param   pvEDX       Where to store edx.
    751  * @remark  We're using void pointers to ease the use of special bitfield structures and such.
    752  */
    753 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    754 DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
    755 #else
    756 DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
    757 {
    758     uint32_t uEBX;
    759     ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
    760 }
    761 #endif
    762 
    763 
    764 /**
    765  * Performs the cpuid instruction returning edx.
    766  *
    767  * @param   uOperator   CPUID operation (eax).
    768  * @returns EDX after cpuid operation.
    769  */
    770 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    771 DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
    772 #else
    773 DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
    774 {
    775     RTCCUINTREG xDX;
    776 # if RT_INLINE_ASM_GNU_STYLE
    777 #  ifdef RT_ARCH_AMD64
    778     RTCCUINTREG uSpill;
    779     __asm__ ("cpuid"
    780              : "=a" (uSpill),
    781                "=d" (xDX)
    782              : "0" (uOperator)
    783              : "rbx", "rcx");
    784 #  elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
    785     __asm__ ("push  %%ebx\n\t"
    786              "cpuid\n\t"
    787              "pop   %%ebx\n\t"
    788              : "=a" (uOperator),
    789                "=d" (xDX)
    790              : "0" (uOperator)
    791              : "ecx");
    792 #  else
    793     __asm__ ("cpuid"
    794              : "=a" (uOperator),
    795                "=d" (xDX)
    796              : "0" (uOperator)
    797              : "ebx", "ecx");
    798 #  endif
    799 
    800 # elif RT_INLINE_ASM_USES_INTRIN
    801     int aInfo[4];
    802     __cpuid(aInfo, uOperator);
    803     xDX = aInfo[3];
    804 
    805 # else
    806     __asm
    807     {
    808         push    ebx
    809         mov     eax, [uOperator]
    810         cpuid
    811         mov     [xDX], edx
    812         pop     ebx
    813     }
    814 # endif
    815     return (uint32_t)xDX;
    816 }
    817 #endif
    818 
    819 
    820 /**
    821  * Performs the cpuid instruction returning ecx.
    822  *
    823  * @param   uOperator   CPUID operation (eax).
    824  * @returns ECX after cpuid operation.
    825  */
    826 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    827 DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
    828 #else
    829 DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
    830 {
    831     RTCCUINTREG xCX;
    832 # if RT_INLINE_ASM_GNU_STYLE
    833 #  ifdef RT_ARCH_AMD64
    834     RTCCUINTREG uSpill;
    835     __asm__ ("cpuid"
    836              : "=a" (uSpill),
    837                "=c" (xCX)
    838              : "0" (uOperator)
    839              : "rbx", "rdx");
    840 #  elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
    841     __asm__ ("push  %%ebx\n\t"
    842              "cpuid\n\t"
    843              "pop   %%ebx\n\t"
    844              : "=a" (uOperator),
    845                "=c" (xCX)
    846              : "0" (uOperator)
    847              : "edx");
    848 #  else
    849     __asm__ ("cpuid"
    850              : "=a" (uOperator),
    851                "=c" (xCX)
    852              : "0" (uOperator)
    853              : "ebx", "edx");
    854 
    855 #  endif
    856 
    857 # elif RT_INLINE_ASM_USES_INTRIN
    858     int aInfo[4];
    859     __cpuid(aInfo, uOperator);
    860     xCX = aInfo[2];
    861 
    862 # else
    863     __asm
    864     {
    865         push    ebx
    866         mov     eax, [uOperator]
    867         cpuid
    868         mov     [xCX], ecx
    869         pop     ebx
    870     }
    871 # endif
    872     return (uint32_t)xCX;
    873 }
    874 #endif
    875 
    876 
    877 /**
    878  * Checks if the current CPU supports CPUID.
    879  *
    880  * @returns true if CPUID is supported.
    881  */
    882 DECLINLINE(bool) ASMHasCpuId(void)
    883 {
    884 #ifdef RT_ARCH_AMD64
    885     return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
    886 #else /* !RT_ARCH_AMD64 */
    887     bool        fRet = false;
    888 # if RT_INLINE_ASM_GNU_STYLE
    889     uint32_t    u1;
    890     uint32_t    u2;
    891     __asm__ ("pushf\n\t"
    892              "pop   %1\n\t"
    893              "mov   %1, %2\n\t"
    894              "xorl  $0x200000, %1\n\t"
    895              "push  %1\n\t"
    896              "popf\n\t"
    897              "pushf\n\t"
    898              "pop   %1\n\t"
    899              "cmpl  %1, %2\n\t"
    900              "setne %0\n\t"
    901              "push  %2\n\t"
    902              "popf\n\t"
    903              : "=m" (fRet), "=r" (u1), "=r" (u2));
    904 # else
    905     __asm
    906     {
    907         pushfd
    908         pop     eax
    909         mov     ebx, eax
    910         xor     eax, 0200000h
    911         push    eax
    912         popfd
    913         pushfd
    914         pop     eax
    915         cmp     eax, ebx
    916         setne   fRet
    917         push    ebx
    918         popfd
    919     }
    920 # endif
    921     return fRet;
    922 #endif /* !RT_ARCH_AMD64 */
    923 }
    924 
    925 
    926 /**
    927  * Gets the APIC ID of the current CPU.
    928  *
    929  * @returns the APIC ID.
    930  */
    931 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    932 DECLASM(uint8_t) ASMGetApicId(void);
    933 #else
    934 DECLINLINE(uint8_t) ASMGetApicId(void)
    935 {
    936     RTCCUINTREG xBX;
    937 # if RT_INLINE_ASM_GNU_STYLE
    938 #  ifdef RT_ARCH_AMD64
    939     RTCCUINTREG uSpill;
    940     __asm__ ("cpuid"
    941              : "=a" (uSpill),
    942                "=b" (xBX)
    943              : "0" (1)
    944              : "rcx", "rdx");
    945 #  elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
    946     RTCCUINTREG uSpill;
    947     __asm__ ("mov   %%ebx,%1\n\t"
    948              "cpuid\n\t"
    949              "xchgl %%ebx,%1\n\t"
    950              : "=a" (uSpill),
    951                "=r" (xBX)
    952              : "0" (1)
    953              : "ecx", "edx");
    954 #  else
    955     RTCCUINTREG uSpill;
    956     __asm__ ("cpuid"
    957              : "=a" (uSpill),
    958                "=b" (xBX)
    959              : "0" (1)
    960              : "ecx", "edx");
    961 #  endif
    962 
    963 # elif RT_INLINE_ASM_USES_INTRIN
    964     int aInfo[4];
    965     __cpuid(aInfo, 1);
    966     xBX = aInfo[1];
    967 
    968 # else
    969     __asm
    970     {
    971         push    ebx
    972         mov     eax, 1
    973         cpuid
    974         mov     [xBX], ebx
    975         pop     ebx
    976     }
    977 # endif
    978     return (uint8_t)(xBX >> 24);
    979 }
    980 #endif
    981 
    982 
    983 /**
    984  * Tests if it a genuine Intel CPU based on the ASMCpuId(0) output.
    985  *
    986  * @returns true/false.
    987  * @param   uEBX    EBX return from ASMCpuId(0)
    988  * @param   uECX    ECX return from ASMCpuId(0)
    989  * @param   uEDX    EDX return from ASMCpuId(0)
    990  */
    991 DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
    992 {
    993     return uEBX == UINT32_C(0x756e6547)
    994         && uECX == UINT32_C(0x6c65746e)
    995         && uEDX == UINT32_C(0x49656e69);
    996 }
    997 
    998 
    999 /**
    1000  * Tests if this is a genuine Intel CPU.
    1001  *
    1002  * @returns true/false.
    1003  * @remarks ASSUMES that cpuid is supported by the CPU.
    1004  */
    1005 DECLINLINE(bool) ASMIsIntelCpu(void)
    1006 {
    1007     uint32_t uEAX, uEBX, uECX, uEDX;
    1008     ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
    1009     return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
    1010 }
    1011 
    1012 
    1013 /**
    1014  * Tests if it a authentic AMD CPU based on the ASMCpuId(0) output.
    1015  *
    1016  * @returns true/false.
    1017  * @param   uEBX    EBX return from ASMCpuId(0)
    1018  * @param   uECX    ECX return from ASMCpuId(0)
    1019  * @param   uEDX    EDX return from ASMCpuId(0)
    1020  */
    1021 DECLINLINE(bool) ASMIsAmdCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
    1022 {
    1023     return uEBX == UINT32_C(0x68747541)
    1024         && uECX == UINT32_C(0x444d4163)
    1025         && uEDX == UINT32_C(0x69746e65);
    1026 }
    1027 
    1028 
    1029 /**
    1030  * Tests if this is an authentic AMD CPU.
    1031  *
    1032  * @returns true/false.
    1033  * @remarks ASSUMES that cpuid is supported by the CPU.
    1034  */
    1035 DECLINLINE(bool) ASMIsAmdCpu(void)
    1036 {
    1037     uint32_t uEAX, uEBX, uECX, uEDX;
    1038     ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
    1039     return ASMIsAmdCpuEx(uEBX, uECX, uEDX);
    1040 }
    1041 
    1042 
    1043 /**
    1044  * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
    1045  *
    1046  * @returns Family.
    1047  * @param   uEAX    EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
    1048  */
    1049 DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
    1050 {
    1051     return ((uEAX >> 8) & 0xf) == 0xf
    1052          ? ((uEAX >> 20) & 0x7f) + 0xf
    1053          : ((uEAX >> 8) & 0xf);
    1054 }
    1055 
    1056 
    1057 /**
    1058  * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
    1059  *
    1060  * @returns Model.
    1061  * @param   uEAX    EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
    1062  */
    1063 DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
    1064 {
    1065     return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
    1066          ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
    1067          : ((uEAX >> 4) & 0xf);
    1068 }
    1069 
    1070 
    1071 /**
    1072  * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
    1073  *
    1074  * @returns Model.
    1075  * @param   uEAX    EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
    1076  */
    1077 DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
    1078 {
    1079     return ((uEAX >> 8) & 0xf) == 0xf
    1080          ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
    1081          : ((uEAX >> 4) & 0xf);
    1082 }
    1083 
    1084 
    1085 /**
    1086  * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
    1087  *
    1088  * @returns Model.
    1089  * @param   uEAX    EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
    1090  * @param   fIntel  Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
    1091  */
    1092 DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
    1093 {
    1094     return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
    1095          ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
    1096          : ((uEAX >> 4) & 0xf);
    1097 }
    1098 
    1099 
    1100 /**
    1101  * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
    1102  *
    1103  * @returns Model.
    1104  * @param   uEAX    EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
    1105  */
    1106 DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
    1107 {
    1108     return uEAX & 0xf;
    1109 }
    1110 
    1111 
    1112 /**
    1113  * Get cr0.
    1114  * @returns cr0.
    1115  */
    1116 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1117 DECLASM(RTCCUINTREG) ASMGetCR0(void);
    1118 #else
    1119 DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
    1120 {
    1121     RTCCUINTREG uCR0;
    1122 # if RT_INLINE_ASM_USES_INTRIN
    1123     uCR0 = __readcr0();
    1124 
    1125 # elif RT_INLINE_ASM_GNU_STYLE
    1126 #  ifdef RT_ARCH_AMD64
    1127     __asm__ __volatile__("movq  %%cr0, %0\t\n" : "=r" (uCR0));
    1128 #  else
    1129     __asm__ __volatile__("movl  %%cr0, %0\t\n" : "=r" (uCR0));
    1130 #  endif
    1131 # else
    1132     __asm
    1133     {
    1134 #  ifdef RT_ARCH_AMD64
    1135         mov     rax, cr0
    1136         mov     [uCR0], rax
    1137 #  else
    1138         mov     eax, cr0
    1139         mov     [uCR0], eax
    1140 #  endif
    1141     }
    1142 # endif
    1143     return uCR0;
    1144 }
    1145 #endif
    1146 
    1147 
    1148 /**
    1149  * Sets the CR0 register.
    1150  * @param   uCR0 The new CR0 value.
    1151  */
    1152 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1153 DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
    1154 #else
    1155 DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
    1156 {
    1157 # if RT_INLINE_ASM_USES_INTRIN
    1158     __writecr0(uCR0);
    1159 
    1160 # elif RT_INLINE_ASM_GNU_STYLE
    1161 #  ifdef RT_ARCH_AMD64
    1162     __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
    1163 #  else
    1164     __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
    1165 #  endif
    1166 # else
    1167     __asm
    1168     {
    1169 #  ifdef RT_ARCH_AMD64
    1170         mov     rax, [uCR0]
    1171         mov     cr0, rax
    1172 #  else
    1173         mov     eax, [uCR0]
    1174         mov     cr0, eax
    1175 #  endif
    1176     }
    1177 # endif
    1178 }
    1179 #endif
    1180 
    1181 
    1182 /**
    1183  * Get cr2.
    1184  * @returns cr2.
    1185  */
    1186 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1187 DECLASM(RTCCUINTREG) ASMGetCR2(void);
    1188 #else
    1189 DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
    1190 {
    1191     RTCCUINTREG uCR2;
    1192 # if RT_INLINE_ASM_USES_INTRIN
    1193     uCR2 = __readcr2();
    1194 
    1195 # elif RT_INLINE_ASM_GNU_STYLE
    1196 #  ifdef RT_ARCH_AMD64
    1197     __asm__ __volatile__("movq  %%cr2, %0\t\n" : "=r" (uCR2));
    1198 #  else
    1199     __asm__ __volatile__("movl  %%cr2, %0\t\n" : "=r" (uCR2));
    1200 #  endif
    1201 # else
    1202     __asm
    1203     {
    1204 #  ifdef RT_ARCH_AMD64
    1205         mov     rax, cr2
    1206         mov     [uCR2], rax
    1207 #  else
    1208         mov     eax, cr2
    1209         mov     [uCR2], eax
    1210 #  endif
    1211     }
    1212 # endif
    1213     return uCR2;
    1214 }
    1215 #endif
    1216 
    1217 
    1218 /**
    1219  * Sets the CR2 register.
    1220  * @param   uCR2 The new CR0 value.
    1221  */
    1222 #if RT_INLINE_ASM_EXTERNAL
    1223 DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
    1224 #else
    1225 DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
    1226 {
    1227 # if RT_INLINE_ASM_GNU_STYLE
    1228 #  ifdef RT_ARCH_AMD64
    1229     __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
    1230 #  else
    1231     __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
    1232 #  endif
    1233 # else
    1234     __asm
    1235     {
    1236 #  ifdef RT_ARCH_AMD64
    1237         mov     rax, [uCR2]
    1238         mov     cr2, rax
    1239 #  else
    1240         mov     eax, [uCR2]
    1241         mov     cr2, eax
    1242 #  endif
    1243     }
    1244 # endif
    1245 }
    1246 #endif
    1247 
    1248 
    1249 /**
    1250  * Get cr3.
    1251  * @returns cr3.
    1252  */
    1253 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1254 DECLASM(RTCCUINTREG) ASMGetCR3(void);
    1255 #else
    1256 DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
    1257 {
    1258     RTCCUINTREG uCR3;
    1259 # if RT_INLINE_ASM_USES_INTRIN
    1260     uCR3 = __readcr3();
    1261 
    1262 # elif RT_INLINE_ASM_GNU_STYLE
    1263 #  ifdef RT_ARCH_AMD64
    1264     __asm__ __volatile__("movq  %%cr3, %0\t\n" : "=r" (uCR3));
    1265 #  else
    1266     __asm__ __volatile__("movl  %%cr3, %0\t\n" : "=r" (uCR3));
    1267 #  endif
    1268 # else
    1269     __asm
    1270     {
    1271 #  ifdef RT_ARCH_AMD64
    1272         mov     rax, cr3
    1273         mov     [uCR3], rax
    1274 #  else
    1275         mov     eax, cr3
    1276         mov     [uCR3], eax
    1277 #  endif
    1278     }
    1279 # endif
    1280     return uCR3;
    1281 }
    1282 #endif
    1283 
    1284 
    1285 /**
    1286  * Sets the CR3 register.
    1287  *
    1288  * @param   uCR3    New CR3 value.
    1289  */
    1290 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1291 DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
    1292 #else
    1293 DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
    1294 {
    1295 # if RT_INLINE_ASM_USES_INTRIN
    1296     __writecr3(uCR3);
    1297 
    1298 # elif RT_INLINE_ASM_GNU_STYLE
    1299 #  ifdef RT_ARCH_AMD64
    1300     __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
    1301 #  else
    1302     __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
    1303 #  endif
    1304 # else
    1305     __asm
    1306     {
    1307 #  ifdef RT_ARCH_AMD64
    1308         mov     rax, [uCR3]
    1309         mov     cr3, rax
    1310 #  else
    1311         mov     eax, [uCR3]
    1312         mov     cr3, eax
    1313 #  endif
    1314     }
    1315 # endif
    1316 }
    1317 #endif
    1318 
    1319 
    1320 /**
    1321  * Reloads the CR3 register.
    1322  */
    1323 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1324 DECLASM(void) ASMReloadCR3(void);
    1325 #else
    1326 DECLINLINE(void) ASMReloadCR3(void)
    1327 {
    1328 # if RT_INLINE_ASM_USES_INTRIN
    1329     __writecr3(__readcr3());
    1330 
    1331 # elif RT_INLINE_ASM_GNU_STYLE
    1332     RTCCUINTREG u;
    1333 #  ifdef RT_ARCH_AMD64
    1334     __asm__ __volatile__("movq %%cr3, %0\n\t"
    1335                          "movq %0, %%cr3\n\t"
    1336                          : "=r" (u));
    1337 #  else
    1338     __asm__ __volatile__("movl %%cr3, %0\n\t"
    1339                          "movl %0, %%cr3\n\t"
    1340                          : "=r" (u));
    1341 #  endif
    1342 # else
    1343     __asm
    1344     {
    1345 #  ifdef RT_ARCH_AMD64
    1346         mov     rax, cr3
    1347         mov     cr3, rax
    1348 #  else
    1349         mov     eax, cr3
    1350         mov     cr3, eax
    1351 #  endif
    1352     }
    1353 # endif
    1354 }
    1355 #endif
    1356 
    1357 
    1358 /**
    1359  * Get cr4.
    1360  * @returns cr4.
    1361  */
    1362 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1363 DECLASM(RTCCUINTREG) ASMGetCR4(void);
    1364 #else
    1365 DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
    1366 {
    1367     RTCCUINTREG uCR4;
    1368 # if RT_INLINE_ASM_USES_INTRIN
    1369     uCR4 = __readcr4();
    1370 
    1371 # elif RT_INLINE_ASM_GNU_STYLE
    1372 #  ifdef RT_ARCH_AMD64
    1373     __asm__ __volatile__("movq  %%cr4, %0\t\n" : "=r" (uCR4));
    1374 #  else
    1375     __asm__ __volatile__("movl  %%cr4, %0\t\n" : "=r" (uCR4));
    1376 #  endif
    1377 # else
    1378     __asm
    1379     {
    1380 #  ifdef RT_ARCH_AMD64
    1381         mov     rax, cr4
    1382         mov     [uCR4], rax
    1383 #  else
    1384         push    eax /* just in case */
    1385         /*mov     eax, cr4*/
    1386         _emit   0x0f
    1387         _emit   0x20
    1388         _emit   0xe0
    1389         mov     [uCR4], eax
    1390         pop     eax
    1391 #  endif
    1392     }
    1393 # endif
    1394     return uCR4;
    1395 }
    1396 #endif
    1397 
    1398 
    1399 /**
    1400  * Sets the CR4 register.
    1401  *
    1402  * @param   uCR4    New CR4 value.
    1403  */
    1404 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1405 DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
    1406 #else
    1407 DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
    1408 {
    1409 # if RT_INLINE_ASM_USES_INTRIN
    1410     __writecr4(uCR4);
    1411 
    1412 # elif RT_INLINE_ASM_GNU_STYLE
    1413 #  ifdef RT_ARCH_AMD64
    1414     __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
    1415 #  else
    1416     __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
    1417 #  endif
    1418 # else
    1419     __asm
    1420     {
    1421 #  ifdef RT_ARCH_AMD64
    1422         mov     rax, [uCR4]
    1423         mov     cr4, rax
    1424 #  else
    1425         mov     eax, [uCR4]
    1426         _emit   0x0F
    1427         _emit   0x22
    1428         _emit   0xE0        /* mov     cr4, eax */
    1429 #  endif
    1430     }
    1431 # endif
    1432 }
    1433 #endif
    1434 
    1435 
    1436 /**
    1437  * Get cr8.
    1438  * @returns cr8.
    1439  * @remark  The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
    1440  */
    1441 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1442 DECLASM(RTCCUINTREG) ASMGetCR8(void);
    1443 #else
    1444 DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
    1445 {
    1446 # ifdef RT_ARCH_AMD64
    1447     RTCCUINTREG uCR8;
    1448 #  if RT_INLINE_ASM_USES_INTRIN
    1449     uCR8 = __readcr8();
    1450 
    1451 #  elif RT_INLINE_ASM_GNU_STYLE
    1452     __asm__ __volatile__("movq  %%cr8, %0\t\n" : "=r" (uCR8));
    1453 #  else
    1454     __asm
    1455     {
    1456         mov     rax, cr8
    1457         mov     [uCR8], rax
    1458     }
    1459 #  endif
    1460     return uCR8;
    1461 # else /* !RT_ARCH_AMD64 */
    1462     return 0;
    1463 # endif /* !RT_ARCH_AMD64 */
    1464 }
    1465 #endif
    1466 
    1467 
    1468 /**
    1469  * Enables interrupts (EFLAGS.IF).
    1470  */
    1471 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1472 DECLASM(void) ASMIntEnable(void);
    1473 #else
    1474 DECLINLINE(void) ASMIntEnable(void)
    1475 {
    1476 # if RT_INLINE_ASM_GNU_STYLE
    1477     __asm("sti\n");
    1478 # elif RT_INLINE_ASM_USES_INTRIN
    1479     _enable();
    1480 # else
    1481     __asm sti
    1482 # endif
    1483 }
    1484 #endif
    1485 
    1486 
    1487 /**
    1488  * Disables interrupts (!EFLAGS.IF).
    1489  */
    1490 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1491 DECLASM(void) ASMIntDisable(void);
    1492 #else
    1493 DECLINLINE(void) ASMIntDisable(void)
    1494 {
    1495 # if RT_INLINE_ASM_GNU_STYLE
    1496     __asm("cli\n");
    1497 # elif RT_INLINE_ASM_USES_INTRIN
    1498     _disable();
    1499 # else
    1500     __asm cli
    1501 # endif
    1502 }
    1503 #endif
    1504 
    1505 
    1506 /**
    1507  * Disables interrupts and returns previous xFLAGS.
    1508  */
    1509 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1510 DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
    1511 #else
    1512 DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
    1513 {
    1514     RTCCUINTREG xFlags;
    1515 # if RT_INLINE_ASM_GNU_STYLE
    1516 #  ifdef RT_ARCH_AMD64
    1517     __asm__ __volatile__("pushfq\n\t"
    1518                          "cli\n\t"
    1519                          "popq  %0\n\t"
    1520                          : "=r" (xFlags));
    1521 #  else
    1522     __asm__ __volatile__("pushfl\n\t"
    1523                          "cli\n\t"
    1524                          "popl  %0\n\t"
    1525                          : "=r" (xFlags));
    1526 #  endif
    1527 # elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
    1528     xFlags = ASMGetFlags();
    1529     _disable();
    1530 # else
    1531     __asm {
    1532         pushfd
    1533         cli
    1534         pop  [xFlags]
    1535     }
    1536 # endif
    1537     return xFlags;
    1538 }
    1539 #endif
    1540 
    1541 
    1542 /**
    1543  * Are interrupts enabled?
    1544  *
    1545  * @returns true / false.
    1546  */
    1547 DECLINLINE(RTCCUINTREG) ASMIntAreEnabled(void)
    1548 {
    1549     RTCCUINTREG uFlags = ASMGetFlags();
    1550     return uFlags & 0x200 /* X86_EFL_IF */ ? true : false;
    1551 }
    1552 
    1553 
    1554 /**
    1555  * Halts the CPU until interrupted.
    1556  */
    1557 #if RT_INLINE_ASM_EXTERNAL
    1558 DECLASM(void) ASMHalt(void);
    1559 #else
    1560 DECLINLINE(void) ASMHalt(void)
    1561 {
    1562 # if RT_INLINE_ASM_GNU_STYLE
    1563     __asm__ __volatile__("hlt\n\t");
    1564 # else
    1565     __asm {
    1566         hlt
    1567     }
    1568 # endif
    1569 }
    1570 #endif
    1571 
    1572 
    1573 /**
    1574  * The PAUSE variant of NOP for helping hyperthreaded CPUs detecing spin locks.
    1575  */
    1576 #if RT_INLINE_ASM_EXTERNAL
    1577 DECLASM(void) ASMNopPause(void);
    1578 #else
    1579 DECLINLINE(void) ASMNopPause(void)
    1580 {
    1581 # if RT_INLINE_ASM_GNU_STYLE
    1582     __asm__ __volatile__(".byte 0xf3,0x90\n\t");
    1583 # else
    1584     __asm {
    1585         _emit 0f3h
    1586         _emit 090h
    1587     }
    1588 # endif
    1589 }
    1590 #endif
    1591 
    1592 
    1593 /**
    1594  * Reads a machine specific register.
    1595  *
    1596  * @returns Register content.
    1597  * @param   uRegister   Register to read.
    1598  */
    1599 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1600 DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
    1601 #else
    1602 DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
    1603 {
    1604     RTUINT64U u;
    1605 # if RT_INLINE_ASM_GNU_STYLE
    1606     __asm__ __volatile__("rdmsr\n\t"
    1607                          : "=a" (u.s.Lo),
    1608                            "=d" (u.s.Hi)
    1609                          : "c" (uRegister));
    1610 
    1611 # elif RT_INLINE_ASM_USES_INTRIN
    1612     u.u = __readmsr(uRegister);
    1613 
    1614 # else
    1615     __asm
    1616     {
    1617         mov     ecx, [uRegister]
    1618         rdmsr
    1619         mov     [u.s.Lo], eax
    1620         mov     [u.s.Hi], edx
    1621     }
    1622 # endif
    1623 
    1624     return u.u;
    1625 }
    1626 #endif
    1627 
    1628 
    1629 /**
    1630  * Writes a machine specific register.
    1631  *
    1632  * @returns Register content.
    1633  * @param   uRegister   Register to write to.
    1634  * @param   u64Val      Value to write.
    1635  */
    1636 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1637 DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
    1638 #else
    1639 DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
    1640 {
    1641     RTUINT64U u;
    1642 
    1643     u.u = u64Val;
    1644 # if RT_INLINE_ASM_GNU_STYLE
    1645     __asm__ __volatile__("wrmsr\n\t"
    1646                          ::"a" (u.s.Lo),
    1647                            "d" (u.s.Hi),
    1648                            "c" (uRegister));
    1649 
    1650 # elif RT_INLINE_ASM_USES_INTRIN
    1651     __writemsr(uRegister, u.u);
    1652 
    1653 # else
    1654     __asm
    1655     {
    1656         mov     ecx, [uRegister]
    1657         mov     edx, [u.s.Hi]
    1658         mov     eax, [u.s.Lo]
    1659         wrmsr
    1660     }
    1661 # endif
    1662 }
    1663 #endif
    1664 
    1665 
    1666 /**
    1667  * Reads low part of a machine specific register.
    1668  *
    1669  * @returns Register content.
    1670  * @param   uRegister   Register to read.
    1671  */
    1672 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1673 DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
    1674 #else
    1675 DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
    1676 {
    1677     uint32_t u32;
    1678 # if RT_INLINE_ASM_GNU_STYLE
    1679     __asm__ __volatile__("rdmsr\n\t"
    1680                          : "=a" (u32)
    1681                          : "c" (uRegister)
    1682                          : "edx");
    1683 
    1684 # elif RT_INLINE_ASM_USES_INTRIN
    1685     u32 = (uint32_t)__readmsr(uRegister);
    1686 
    1687 #else
    1688     __asm
    1689     {
    1690         mov     ecx, [uRegister]
    1691         rdmsr
    1692         mov     [u32], eax
    1693     }
    1694 # endif
    1695 
    1696     return u32;
    1697 }
    1698 #endif
    1699 
    1700 
    1701 /**
    1702  * Reads high part of a machine specific register.
    1703  *
    1704  * @returns Register content.
    1705  * @param   uRegister   Register to read.
    1706  */
    1707 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1708 DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
    1709 #else
    1710 DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
    1711 {
    1712     uint32_t    u32;
    1713 # if RT_INLINE_ASM_GNU_STYLE
    1714     __asm__ __volatile__("rdmsr\n\t"
    1715                          : "=d" (u32)
    1716                          : "c" (uRegister)
    1717                          : "eax");
    1718 
    1719 # elif RT_INLINE_ASM_USES_INTRIN
    1720     u32 = (uint32_t)(__readmsr(uRegister) >> 32);
    1721 
    1722 # else
    1723     __asm
    1724     {
    1725         mov     ecx, [uRegister]
    1726         rdmsr
    1727         mov     [u32], edx
    1728     }
    1729 # endif
    1730 
    1731     return u32;
    1732 }
    1733 #endif
    1734 
    1735 
    1736 /**
    1737  * Gets dr0.
    1738  *
    1739  * @returns dr0.
    1740  */
    1741 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1742 DECLASM(RTCCUINTREG) ASMGetDR0(void);
    1743 #else
    1744 DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
    1745 {
    1746     RTCCUINTREG uDR0;
    1747 # if RT_INLINE_ASM_USES_INTRIN
    1748     uDR0 = __readdr(0);
    1749 # elif RT_INLINE_ASM_GNU_STYLE
    1750 #  ifdef RT_ARCH_AMD64
    1751     __asm__ __volatile__("movq   %%dr0, %0\n\t" : "=r" (uDR0));
    1752 #  else
    1753     __asm__ __volatile__("movl   %%dr0, %0\n\t" : "=r" (uDR0));
    1754 #  endif
    1755 # else
    1756     __asm
    1757     {
    1758 #  ifdef RT_ARCH_AMD64
    1759         mov     rax, dr0
    1760         mov     [uDR0], rax
    1761 #  else
    1762         mov     eax, dr0
    1763         mov     [uDR0], eax
    1764 #  endif
    1765     }
    1766 # endif
    1767     return uDR0;
    1768 }
    1769 #endif
    1770 
    1771 
    1772 /**
    1773  * Gets dr1.
    1774  *
    1775  * @returns dr1.
    1776  */
    1777 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1778 DECLASM(RTCCUINTREG) ASMGetDR1(void);
    1779 #else
    1780 DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
    1781 {
    1782     RTCCUINTREG uDR1;
    1783 # if RT_INLINE_ASM_USES_INTRIN
    1784     uDR1 = __readdr(1);
    1785 # elif RT_INLINE_ASM_GNU_STYLE
    1786 #  ifdef RT_ARCH_AMD64
    1787     __asm__ __volatile__("movq   %%dr1, %0\n\t" : "=r" (uDR1));
    1788 #  else
    1789     __asm__ __volatile__("movl   %%dr1, %0\n\t" : "=r" (uDR1));
    1790 #  endif
    1791 # else
    1792     __asm
    1793     {
    1794 #  ifdef RT_ARCH_AMD64
    1795         mov     rax, dr1
    1796         mov     [uDR1], rax
    1797 #  else
    1798         mov     eax, dr1
    1799         mov     [uDR1], eax
    1800 #  endif
    1801     }
    1802 # endif
    1803     return uDR1;
    1804 }
    1805 #endif
    1806 
    1807 
    1808 /**
    1809  * Gets dr2.
    1810  *
    1811  * @returns dr2.
    1812  */
    1813 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1814 DECLASM(RTCCUINTREG) ASMGetDR2(void);
    1815 #else
    1816 DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
    1817 {
    1818     RTCCUINTREG uDR2;
    1819 # if RT_INLINE_ASM_USES_INTRIN
    1820     uDR2 = __readdr(2);
    1821 # elif RT_INLINE_ASM_GNU_STYLE
    1822 #  ifdef RT_ARCH_AMD64
    1823     __asm__ __volatile__("movq   %%dr2, %0\n\t" : "=r" (uDR2));
    1824 #  else
    1825     __asm__ __volatile__("movl   %%dr2, %0\n\t" : "=r" (uDR2));
    1826 #  endif
    1827 # else
    1828     __asm
    1829     {
    1830 #  ifdef RT_ARCH_AMD64
    1831         mov     rax, dr2
    1832         mov     [uDR2], rax
    1833 #  else
    1834         mov     eax, dr2
    1835         mov     [uDR2], eax
    1836 #  endif
    1837     }
    1838 # endif
    1839     return uDR2;
    1840 }
    1841 #endif
    1842 
    1843 
    1844 /**
    1845  * Gets dr3.
    1846  *
    1847  * @returns dr3.
    1848  */
    1849 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1850 DECLASM(RTCCUINTREG) ASMGetDR3(void);
    1851 #else
    1852 DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
    1853 {
    1854     RTCCUINTREG uDR3;
    1855 # if RT_INLINE_ASM_USES_INTRIN
    1856     uDR3 = __readdr(3);
    1857 # elif RT_INLINE_ASM_GNU_STYLE
    1858 #  ifdef RT_ARCH_AMD64
    1859     __asm__ __volatile__("movq   %%dr3, %0\n\t" : "=r" (uDR3));
    1860 #  else
    1861     __asm__ __volatile__("movl   %%dr3, %0\n\t" : "=r" (uDR3));
    1862 #  endif
    1863 # else
    1864     __asm
    1865     {
    1866 #  ifdef RT_ARCH_AMD64
    1867         mov     rax, dr3
    1868         mov     [uDR3], rax
    1869 #  else
    1870         mov     eax, dr3
    1871         mov     [uDR3], eax
    1872 #  endif
    1873     }
    1874 # endif
    1875     return uDR3;
    1876 }
    1877 #endif
    1878 
    1879 
    1880 /**
    1881  * Gets dr6.
    1882  *
    1883  * @returns dr6.
    1884  */
    1885 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1886 DECLASM(RTCCUINTREG) ASMGetDR6(void);
    1887 #else
    1888 DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
    1889 {
    1890     RTCCUINTREG uDR6;
    1891 # if RT_INLINE_ASM_USES_INTRIN
    1892     uDR6 = __readdr(6);
    1893 # elif RT_INLINE_ASM_GNU_STYLE
    1894 #  ifdef RT_ARCH_AMD64
    1895     __asm__ __volatile__("movq   %%dr6, %0\n\t" : "=r" (uDR6));
    1896 #  else
    1897     __asm__ __volatile__("movl   %%dr6, %0\n\t" : "=r" (uDR6));
    1898 #  endif
    1899 # else
    1900     __asm
    1901     {
    1902 #  ifdef RT_ARCH_AMD64
    1903         mov     rax, dr6
    1904         mov     [uDR6], rax
    1905 #  else
    1906         mov     eax, dr6
    1907         mov     [uDR6], eax
    1908 #  endif
    1909     }
    1910 # endif
    1911     return uDR6;
    1912 }
    1913 #endif
    1914 
    1915 
    1916 /**
    1917  * Reads and clears DR6.
    1918  *
    1919  * @returns DR6.
    1920  */
    1921 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1922 DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
    1923 #else
    1924 DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
    1925 {
    1926     RTCCUINTREG uDR6;
    1927 # if RT_INLINE_ASM_USES_INTRIN
    1928     uDR6 = __readdr(6);
    1929     __writedr(6, 0xffff0ff0U);          /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
    1930 # elif RT_INLINE_ASM_GNU_STYLE
    1931     RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
    1932 #  ifdef RT_ARCH_AMD64
    1933     __asm__ __volatile__("movq   %%dr6, %0\n\t"
    1934                          "movq   %1, %%dr6\n\t"
    1935                          : "=r" (uDR6)
    1936                          : "r" (uNewValue));
    1937 #  else
    1938     __asm__ __volatile__("movl   %%dr6, %0\n\t"
    1939                          "movl   %1, %%dr6\n\t"
    1940                          : "=r" (uDR6)
    1941                          : "r" (uNewValue));
    1942 #  endif
    1943 # else
    1944     __asm
    1945     {
    1946 #  ifdef RT_ARCH_AMD64
    1947         mov     rax, dr6
    1948         mov     [uDR6], rax
    1949         mov     rcx, rax
    1950         mov     ecx, 0ffff0ff0h;        /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
    1951         mov     dr6, rcx
    1952 #  else
    1953         mov     eax, dr6
    1954         mov     [uDR6], eax
    1955         mov     ecx, 0ffff0ff0h;        /* 31-16 and 4-11 are 1's, 12 is zero. */
    1956         mov     dr6, ecx
    1957 #  endif
    1958     }
    1959 # endif
    1960     return uDR6;
    1961 }
    1962 #endif
    1963 
    1964 
    1965 /**
    1966  * Gets dr7.
    1967  *
    1968  * @returns dr7.
    1969  */
    1970 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    1971 DECLASM(RTCCUINTREG) ASMGetDR7(void);
    1972 #else
    1973 DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
    1974 {
    1975     RTCCUINTREG uDR7;
    1976 # if RT_INLINE_ASM_USES_INTRIN
    1977     uDR7 = __readdr(7);
    1978 # elif RT_INLINE_ASM_GNU_STYLE
    1979 #  ifdef RT_ARCH_AMD64
    1980     __asm__ __volatile__("movq   %%dr7, %0\n\t" : "=r" (uDR7));
    1981 #  else
    1982     __asm__ __volatile__("movl   %%dr7, %0\n\t" : "=r" (uDR7));
    1983 #  endif
    1984 # else
    1985     __asm
    1986     {
    1987 #  ifdef RT_ARCH_AMD64
    1988         mov     rax, dr7
    1989         mov     [uDR7], rax
    1990 #  else
    1991         mov     eax, dr7
    1992         mov     [uDR7], eax
    1993 #  endif
    1994     }
    1995 # endif
    1996     return uDR7;
    1997 }
    1998 #endif
    1999 
    2000 
    2001 /**
    2002  * Sets dr0.
    2003  *
    2004  * @param   uDRVal   Debug register value to write
    2005  */
    2006 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2007 DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
    2008 #else
    2009 DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
    2010 {
    2011 # if RT_INLINE_ASM_USES_INTRIN
    2012     __writedr(0, uDRVal);
    2013 # elif RT_INLINE_ASM_GNU_STYLE
    2014 #  ifdef RT_ARCH_AMD64
    2015     __asm__ __volatile__("movq   %0, %%dr0\n\t" : : "r" (uDRVal));
    2016 #  else
    2017     __asm__ __volatile__("movl   %0, %%dr0\n\t" : : "r" (uDRVal));
    2018 #  endif
    2019 # else
    2020     __asm
    2021     {
    2022 #  ifdef RT_ARCH_AMD64
    2023         mov     rax, [uDRVal]
    2024         mov     dr0, rax
    2025 #  else
    2026         mov     eax, [uDRVal]
    2027         mov     dr0, eax
    2028 #  endif
    2029     }
    2030 # endif
    2031 }
    2032 #endif
    2033 
    2034 
    2035 /**
    2036  * Sets dr1.
    2037  *
    2038  * @param   uDRVal   Debug register value to write
    2039  */
    2040 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2041 DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
    2042 #else
    2043 DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
    2044 {
    2045 # if RT_INLINE_ASM_USES_INTRIN
    2046     __writedr(1, uDRVal);
    2047 # elif RT_INLINE_ASM_GNU_STYLE
    2048 #  ifdef RT_ARCH_AMD64
    2049     __asm__ __volatile__("movq   %0, %%dr1\n\t" : : "r" (uDRVal));
    2050 #  else
    2051     __asm__ __volatile__("movl   %0, %%dr1\n\t" : : "r" (uDRVal));
    2052 #  endif
    2053 # else
    2054     __asm
    2055     {
    2056 #  ifdef RT_ARCH_AMD64
    2057         mov     rax, [uDRVal]
    2058         mov     dr1, rax
    2059 #  else
    2060         mov     eax, [uDRVal]
    2061         mov     dr1, eax
    2062 #  endif
    2063     }
    2064 # endif
    2065 }
    2066 #endif
    2067 
    2068 
    2069 /**
    2070  * Sets dr2.
    2071  *
    2072  * @param   uDRVal   Debug register value to write
    2073  */
    2074 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2075 DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
    2076 #else
    2077 DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
    2078 {
    2079 # if RT_INLINE_ASM_USES_INTRIN
    2080     __writedr(2, uDRVal);
    2081 # elif RT_INLINE_ASM_GNU_STYLE
    2082 #  ifdef RT_ARCH_AMD64
    2083     __asm__ __volatile__("movq   %0, %%dr2\n\t" : : "r" (uDRVal));
    2084 #  else
    2085     __asm__ __volatile__("movl   %0, %%dr2\n\t" : : "r" (uDRVal));
    2086 #  endif
    2087 # else
    2088     __asm
    2089     {
    2090 #  ifdef RT_ARCH_AMD64
    2091         mov     rax, [uDRVal]
    2092         mov     dr2, rax
    2093 #  else
    2094         mov     eax, [uDRVal]
    2095         mov     dr2, eax
    2096 #  endif
    2097     }
    2098 # endif
    2099 }
    2100 #endif
    2101 
    2102 
    2103 /**
    2104  * Sets dr3.
    2105  *
    2106  * @param   uDRVal   Debug register value to write
    2107  */
    2108 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2109 DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
    2110 #else
    2111 DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
    2112 {
    2113 # if RT_INLINE_ASM_USES_INTRIN
    2114     __writedr(3, uDRVal);
    2115 # elif RT_INLINE_ASM_GNU_STYLE
    2116 #  ifdef RT_ARCH_AMD64
    2117     __asm__ __volatile__("movq   %0, %%dr3\n\t" : : "r" (uDRVal));
    2118 #  else
    2119     __asm__ __volatile__("movl   %0, %%dr3\n\t" : : "r" (uDRVal));
    2120 #  endif
    2121 # else
    2122     __asm
    2123     {
    2124 #  ifdef RT_ARCH_AMD64
    2125         mov     rax, [uDRVal]
    2126         mov     dr3, rax
    2127 #  else
    2128         mov     eax, [uDRVal]
    2129         mov     dr3, eax
    2130 #  endif
    2131     }
    2132 # endif
    2133 }
    2134 #endif
    2135 
    2136 
    2137 /**
    2138  * Sets dr6.
    2139  *
    2140  * @param   uDRVal   Debug register value to write
    2141  */
    2142 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2143 DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
    2144 #else
    2145 DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
    2146 {
    2147 # if RT_INLINE_ASM_USES_INTRIN
    2148     __writedr(6, uDRVal);
    2149 # elif RT_INLINE_ASM_GNU_STYLE
    2150 #  ifdef RT_ARCH_AMD64
    2151     __asm__ __volatile__("movq   %0, %%dr6\n\t" : : "r" (uDRVal));
    2152 #  else
    2153     __asm__ __volatile__("movl   %0, %%dr6\n\t" : : "r" (uDRVal));
    2154 #  endif
    2155 # else
    2156     __asm
    2157     {
    2158 #  ifdef RT_ARCH_AMD64
    2159         mov     rax, [uDRVal]
    2160         mov     dr6, rax
    2161 #  else
    2162         mov     eax, [uDRVal]
    2163         mov     dr6, eax
    2164 #  endif
    2165     }
    2166 # endif
    2167 }
    2168 #endif
    2169 
    2170 
    2171 /**
    2172  * Sets dr7.
    2173  *
    2174  * @param   uDRVal   Debug register value to write
    2175  */
    2176 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2177 DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
    2178 #else
    2179 DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
    2180 {
    2181 # if RT_INLINE_ASM_USES_INTRIN
    2182     __writedr(7, uDRVal);
    2183 # elif RT_INLINE_ASM_GNU_STYLE
    2184 #  ifdef RT_ARCH_AMD64
    2185     __asm__ __volatile__("movq   %0, %%dr7\n\t" : : "r" (uDRVal));
    2186 #  else
    2187     __asm__ __volatile__("movl   %0, %%dr7\n\t" : : "r" (uDRVal));
    2188 #  endif
    2189 # else
    2190     __asm
    2191     {
    2192 #  ifdef RT_ARCH_AMD64
    2193         mov     rax, [uDRVal]
    2194         mov     dr7, rax
    2195 #  else
    2196         mov     eax, [uDRVal]
    2197         mov     dr7, eax
    2198 #  endif
    2199     }
    2200 # endif
    2201 }
    2202 #endif
    2203 
    2204 
    2205 /**
    2206189 * Compiler memory barrier.
    2207190 *
     
    2226209#endif
    2227210
    2228 
    2229 /**
    2230  * Writes a 8-bit unsigned integer to an I/O port, ordered.
    2231  *
    2232  * @param   Port    I/O port to write to.
    2233  * @param   u8      8-bit integer to write.
    2234  */
    2235 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2236 DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
    2237 #else
    2238 DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
    2239 {
    2240 # if RT_INLINE_ASM_GNU_STYLE
    2241     __asm__ __volatile__("outb %b1, %w0\n\t"
    2242                          :: "Nd" (Port),
    2243                             "a" (u8));
    2244 
    2245 # elif RT_INLINE_ASM_USES_INTRIN
    2246     __outbyte(Port, u8);
    2247 
    2248 # else
    2249     __asm
    2250     {
    2251         mov     dx, [Port]
    2252         mov     al, [u8]
    2253         out     dx, al
    2254     }
    2255 # endif
    2256 }
    2257 #endif
    2258 
    2259 
    2260 /**
    2261  * Reads a 8-bit unsigned integer from an I/O port, ordered.
    2262  *
    2263  * @returns 8-bit integer.
    2264  * @param   Port    I/O port to read from.
    2265  */
    2266 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2267 DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
    2268 #else
    2269 DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
    2270 {
    2271     uint8_t u8;
    2272 # if RT_INLINE_ASM_GNU_STYLE
    2273     __asm__ __volatile__("inb %w1, %b0\n\t"
    2274                          : "=a" (u8)
    2275                          : "Nd" (Port));
    2276 
    2277 # elif RT_INLINE_ASM_USES_INTRIN
    2278     u8 = __inbyte(Port);
    2279 
    2280 # else
    2281     __asm
    2282     {
    2283         mov     dx, [Port]
    2284         in      al, dx
    2285         mov     [u8], al
    2286     }
    2287 # endif
    2288     return u8;
    2289 }
    2290 #endif
    2291 
    2292 
    2293 /**
    2294  * Writes a 16-bit unsigned integer to an I/O port, ordered.
    2295  *
    2296  * @param   Port    I/O port to write to.
    2297  * @param   u16     16-bit integer to write.
    2298  */
    2299 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2300 DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
    2301 #else
    2302 DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
    2303 {
    2304 # if RT_INLINE_ASM_GNU_STYLE
    2305     __asm__ __volatile__("outw %w1, %w0\n\t"
    2306                          :: "Nd" (Port),
    2307                             "a" (u16));
    2308 
    2309 # elif RT_INLINE_ASM_USES_INTRIN
    2310     __outword(Port, u16);
    2311 
    2312 # else
    2313     __asm
    2314     {
    2315         mov     dx, [Port]
    2316         mov     ax, [u16]
    2317         out     dx, ax
    2318     }
    2319 # endif
    2320 }
    2321 #endif
    2322 
    2323 
    2324 /**
    2325  * Reads a 16-bit unsigned integer from an I/O port, ordered.
    2326  *
    2327  * @returns 16-bit integer.
    2328  * @param   Port    I/O port to read from.
    2329  */
    2330 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2331 DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
    2332 #else
    2333 DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
    2334 {
    2335     uint16_t u16;
    2336 # if RT_INLINE_ASM_GNU_STYLE
    2337     __asm__ __volatile__("inw %w1, %w0\n\t"
    2338                          : "=a" (u16)
    2339                          : "Nd" (Port));
    2340 
    2341 # elif RT_INLINE_ASM_USES_INTRIN
    2342     u16 = __inword(Port);
    2343 
    2344 # else
    2345     __asm
    2346     {
    2347         mov     dx, [Port]
    2348         in      ax, dx
    2349         mov     [u16], ax
    2350     }
    2351 # endif
    2352     return u16;
    2353 }
    2354 #endif
    2355 
    2356 
    2357 /**
    2358  * Writes a 32-bit unsigned integer to an I/O port, ordered.
    2359  *
    2360  * @param   Port    I/O port to write to.
    2361  * @param   u32     32-bit integer to write.
    2362  */
    2363 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2364 DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
    2365 #else
    2366 DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
    2367 {
    2368 # if RT_INLINE_ASM_GNU_STYLE
    2369     __asm__ __volatile__("outl %1, %w0\n\t"
    2370                          :: "Nd" (Port),
    2371                             "a" (u32));
    2372 
    2373 # elif RT_INLINE_ASM_USES_INTRIN
    2374     __outdword(Port, u32);
    2375 
    2376 # else
    2377     __asm
    2378     {
    2379         mov     dx, [Port]
    2380         mov     eax, [u32]
    2381         out     dx, eax
    2382     }
    2383 # endif
    2384 }
    2385 #endif
    2386 
    2387 
    2388 /**
    2389  * Reads a 32-bit unsigned integer from an I/O port, ordered.
    2390  *
    2391  * @returns 32-bit integer.
    2392  * @param   Port    I/O port to read from.
    2393  */
    2394 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2395 DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
    2396 #else
    2397 DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
    2398 {
    2399     uint32_t u32;
    2400 # if RT_INLINE_ASM_GNU_STYLE
    2401     __asm__ __volatile__("inl %w1, %0\n\t"
    2402                          : "=a" (u32)
    2403                          : "Nd" (Port));
    2404 
    2405 # elif RT_INLINE_ASM_USES_INTRIN
    2406     u32 = __indword(Port);
    2407 
    2408 # else
    2409     __asm
    2410     {
    2411         mov     dx, [Port]
    2412         in      eax, dx
    2413         mov     [u32], eax
    2414     }
    2415 # endif
    2416     return u32;
    2417 }
    2418 #endif
    2419 
    2420 
    2421 /**
    2422  * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
    2423  *
    2424  * @param   Port    I/O port to write to.
    2425  * @param   pau8    Pointer to the string buffer.
    2426  * @param   c       The number of items to write.
    2427  */
    2428 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2429 DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
    2430 #else
    2431 DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
    2432 {
    2433 # if RT_INLINE_ASM_GNU_STYLE
    2434     __asm__ __volatile__("rep; outsb\n\t"
    2435                          : "+S" (pau8),
    2436                            "+c" (c)
    2437                          : "d" (Port));
    2438 
    2439 # elif RT_INLINE_ASM_USES_INTRIN
    2440     __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
    2441 
    2442 # else
    2443     __asm
    2444     {
    2445         mov     dx, [Port]
    2446         mov     ecx, [c]
    2447         mov     eax, [pau8]
    2448         xchg    esi, eax
    2449         rep outsb
    2450         xchg    esi, eax
    2451     }
    2452 # endif
    2453 }
    2454 #endif
    2455 
    2456 
    2457 /**
    2458  * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
    2459  *
    2460  * @param   Port    I/O port to read from.
    2461  * @param   pau8    Pointer to the string buffer (output).
    2462  * @param   c       The number of items to read.
    2463  */
    2464 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2465 DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
    2466 #else
    2467 DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
    2468 {
    2469 # if RT_INLINE_ASM_GNU_STYLE
    2470     __asm__ __volatile__("rep; insb\n\t"
    2471                          : "+D" (pau8),
    2472                            "+c" (c)
    2473                          : "d" (Port));
    2474 
    2475 # elif RT_INLINE_ASM_USES_INTRIN
    2476     __inbytestring(Port, pau8, (unsigned long)c);
    2477 
    2478 # else
    2479     __asm
    2480     {
    2481         mov     dx, [Port]
    2482         mov     ecx, [c]
    2483         mov     eax, [pau8]
    2484         xchg    edi, eax
    2485         rep insb
    2486         xchg    edi, eax
    2487     }
    2488 # endif
    2489 }
    2490 #endif
    2491 
    2492 
    2493 /**
    2494  * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
    2495  *
    2496  * @param   Port    I/O port to write to.
    2497  * @param   pau16   Pointer to the string buffer.
    2498  * @param   c       The number of items to write.
    2499  */
    2500 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2501 DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
    2502 #else
    2503 DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
    2504 {
    2505 # if RT_INLINE_ASM_GNU_STYLE
    2506     __asm__ __volatile__("rep; outsw\n\t"
    2507                          : "+S" (pau16),
    2508                            "+c" (c)
    2509                          : "d" (Port));
    2510 
    2511 # elif RT_INLINE_ASM_USES_INTRIN
    2512     __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
    2513 
    2514 # else
    2515     __asm
    2516     {
    2517         mov     dx, [Port]
    2518         mov     ecx, [c]
    2519         mov     eax, [pau16]
    2520         xchg    esi, eax
    2521         rep outsw
    2522         xchg    esi, eax
    2523     }
    2524 # endif
    2525 }
    2526 #endif
    2527 
    2528 
    2529 /**
    2530  * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
    2531  *
    2532  * @param   Port    I/O port to read from.
    2533  * @param   pau16   Pointer to the string buffer (output).
    2534  * @param   c       The number of items to read.
    2535  */
    2536 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2537 DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
    2538 #else
    2539 DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
    2540 {
    2541 # if RT_INLINE_ASM_GNU_STYLE
    2542     __asm__ __volatile__("rep; insw\n\t"
    2543                          : "+D" (pau16),
    2544                            "+c" (c)
    2545                          : "d" (Port));
    2546 
    2547 # elif RT_INLINE_ASM_USES_INTRIN
    2548     __inwordstring(Port, pau16, (unsigned long)c);
    2549 
    2550 # else
    2551     __asm
    2552     {
    2553         mov     dx, [Port]
    2554         mov     ecx, [c]
    2555         mov     eax, [pau16]
    2556         xchg    edi, eax
    2557         rep insw
    2558         xchg    edi, eax
    2559     }
    2560 # endif
    2561 }
    2562 #endif
    2563 
    2564 
    2565 /**
    2566  * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
    2567  *
    2568  * @param   Port    I/O port to write to.
    2569  * @param   pau32   Pointer to the string buffer.
    2570  * @param   c       The number of items to write.
    2571  */
    2572 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2573 DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
    2574 #else
    2575 DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
    2576 {
    2577 # if RT_INLINE_ASM_GNU_STYLE
    2578     __asm__ __volatile__("rep; outsl\n\t"
    2579                          : "+S" (pau32),
    2580                            "+c" (c)
    2581                          : "d" (Port));
    2582 
    2583 # elif RT_INLINE_ASM_USES_INTRIN
    2584     __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
    2585 
    2586 # else
    2587     __asm
    2588     {
    2589         mov     dx, [Port]
    2590         mov     ecx, [c]
    2591         mov     eax, [pau32]
    2592         xchg    esi, eax
    2593         rep outsd
    2594         xchg    esi, eax
    2595     }
    2596 # endif
    2597 }
    2598 #endif
    2599 
    2600 
    2601 /**
    2602  * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
    2603  *
    2604  * @param   Port    I/O port to read from.
    2605  * @param   pau32   Pointer to the string buffer (output).
    2606  * @param   c       The number of items to read.
    2607  */
    2608 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    2609 DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
    2610 #else
    2611 DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
    2612 {
    2613 # if RT_INLINE_ASM_GNU_STYLE
    2614     __asm__ __volatile__("rep; insl\n\t"
    2615                          : "+D" (pau32),
    2616                            "+c" (c)
    2617                          : "d" (Port));
    2618 
    2619 # elif RT_INLINE_ASM_USES_INTRIN
    2620     __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
    2621 
    2622 # else
    2623     __asm
    2624     {
    2625         mov     dx, [Port]
    2626         mov     ecx, [c]
    2627         mov     eax, [pau32]
    2628         xchg    edi, eax
    2629         rep insd
    2630         xchg    edi, eax
    2631     }
    2632 # endif
    2633 }
    2634 #endif
    2635211
    2636212
     
    39751551
    39761552/**
    3977  * Memory load/store fence, waits for any pending writes and reads to complete.
    3978  * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
    3979  */
    3980 DECLINLINE(void) ASMMemoryFenceSSE2(void)
    3981 {
    3982 #if RT_INLINE_ASM_GNU_STYLE
    3983     __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
    3984 #elif RT_INLINE_ASM_USES_INTRIN
    3985     _mm_mfence();
    3986 #else
    3987     __asm
    3988     {
    3989         _emit   0x0f
    3990         _emit   0xae
    3991         _emit   0xf0
    3992     }
    3993 #endif
    3994 }
    3995 
    3996 
    3997 /**
    3998  * Memory store fence, waits for any writes to complete.
    3999  * Requires the X86_CPUID_FEATURE_EDX_SSE CPUID bit set.
    4000  */
    4001 DECLINLINE(void) ASMWriteFenceSSE(void)
    4002 {
    4003 #if RT_INLINE_ASM_GNU_STYLE
    4004     __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
    4005 #elif RT_INLINE_ASM_USES_INTRIN
    4006     _mm_sfence();
    4007 #else
    4008     __asm
    4009     {
    4010         _emit   0x0f
    4011         _emit   0xae
    4012         _emit   0xf8
    4013     }
    4014 #endif
    4015 }
    4016 
    4017 
    4018 /**
    4019  * Memory load fence, waits for any pending reads to complete.
    4020  * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
    4021  */
    4022 DECLINLINE(void) ASMReadFenceSSE2(void)
    4023 {
    4024 #if RT_INLINE_ASM_GNU_STYLE
    4025     __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
    4026 #elif RT_INLINE_ASM_USES_INTRIN
    4027     _mm_lfence();
    4028 #else
    4029     __asm
    4030     {
    4031         _emit   0x0f
    4032         _emit   0xae
    4033         _emit   0xe8
    4034     }
    4035 #endif
    4036 }
    4037 
    4038 
    4039 /**
    40401553 * Memory fence, waits for any pending writes and reads to complete.
    40411554 */
     
    49232436
    49242437
    4925 /**
    4926  * Invalidate page.
    4927  *
    4928  * @param   pv      Address of the page to invalidate.
    4929  */
    4930 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    4931 DECLASM(void) ASMInvalidatePage(void *pv);
    4932 #else
    4933 DECLINLINE(void) ASMInvalidatePage(void *pv)
    4934 {
    4935 # if RT_INLINE_ASM_USES_INTRIN
    4936     __invlpg(pv);
    4937 
    4938 # elif RT_INLINE_ASM_GNU_STYLE
    4939     __asm__ __volatile__("invlpg %0\n\t"
    4940                          : : "m" (*(uint8_t *)pv));
    4941 # else
    4942     __asm
    4943     {
    4944 #  ifdef RT_ARCH_AMD64
    4945         mov     rax, [pv]
    4946         invlpg  [rax]
    4947 #  else
    4948         mov     eax, [pv]
    4949         invlpg  [eax]
    4950 #  endif
    4951     }
    4952 # endif
    4953 }
    4954 #endif
    4955 
    4956 
    4957 /**
    4958  * Write back the internal caches and invalidate them.
    4959  */
    4960 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
    4961 DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
    4962 #else
    4963 DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
    4964 {
    4965 # if RT_INLINE_ASM_USES_INTRIN
    4966     __wbinvd();
    4967 
    4968 # elif RT_INLINE_ASM_GNU_STYLE
    4969     __asm__ __volatile__("wbinvd");
    4970 # else
    4971     __asm
    4972     {
    4973         wbinvd
    4974     }
    4975 # endif
    4976 }
    4977 #endif
    4978 
    4979 
    4980 /**
    4981  * Invalidate internal and (perhaps) external caches without first
    4982  * flushing dirty cache lines. Use with extreme care.
    4983  */
    4984 #if RT_INLINE_ASM_EXTERNAL
    4985 DECLASM(void) ASMInvalidateInternalCaches(void);
    4986 #else
    4987 DECLINLINE(void) ASMInvalidateInternalCaches(void)
    4988 {
    4989 # if RT_INLINE_ASM_GNU_STYLE
    4990     __asm__ __volatile__("invd");
    4991 # else
    4992     __asm
    4993     {
    4994         invd
    4995     }
    4996 # endif
    4997 }
    4998 #endif
    4999 
    5000 
    50012438#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
    50022439# if PAGE_SIZE != 0x1000
     
    52702707            return (uint32_t *)pu32;
    52712708    return NULL;
    5272 }
    5273 #endif
    5274 
    5275 
    5276 /**
    5277  * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
    5278  *
    5279  * @returns u32F1 * u32F2.
    5280  */
    5281 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
    5282 DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
    5283 #else
    5284 DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
    5285 {
    5286 # ifdef RT_ARCH_AMD64
    5287     return (uint64_t)u32F1 * u32F2;
    5288 # else /* !RT_ARCH_AMD64 */
    5289     uint64_t u64;
    5290 #  if RT_INLINE_ASM_GNU_STYLE
    5291     __asm__ __volatile__("mull %%edx"
    5292                          : "=A" (u64)
    5293                          : "a" (u32F2), "d" (u32F1));
    5294 #  else
    5295     __asm
    5296     {
    5297         mov     edx, [u32F1]
    5298         mov     eax, [u32F2]
    5299         mul     edx
    5300         mov     dword ptr [u64], eax
    5301         mov     dword ptr [u64 + 4], edx
    5302     }
    5303 #  endif
    5304     return u64;
    5305 # endif /* !RT_ARCH_AMD64 */
    5306 }
    5307 #endif
    5308 
    5309 
    5310 /**
    5311  * Multiplies two signed 32-bit values returning a signed 64-bit result.
    5312  *
    5313  * @returns u32F1 * u32F2.
    5314  */
    5315 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
    5316 DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
    5317 #else
    5318 DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
    5319 {
    5320 # ifdef RT_ARCH_AMD64
    5321     return (int64_t)i32F1 * i32F2;
    5322 # else /* !RT_ARCH_AMD64 */
    5323     int64_t i64;
    5324 #  if RT_INLINE_ASM_GNU_STYLE
    5325     __asm__ __volatile__("imull %%edx"
    5326                          : "=A" (i64)
    5327                          : "a" (i32F2), "d" (i32F1));
    5328 #  else
    5329     __asm
    5330     {
    5331         mov     edx, [i32F1]
    5332         mov     eax, [i32F2]
    5333         imul    edx
    5334         mov     dword ptr [i64], eax
    5335         mov     dword ptr [i64 + 4], edx
    5336     }
    5337 #  endif
    5338     return i64;
    5339 # endif /* !RT_ARCH_AMD64 */
    5340 }
    5341 #endif
    5342 
    5343 
    5344 /**
    5345  * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
    5346  *
    5347  * @returns u64 / u32.
    5348  */
    5349 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
    5350 DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
    5351 #else
    5352 DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
    5353 {
    5354 # ifdef RT_ARCH_AMD64
    5355     return (uint32_t)(u64 / u32);
    5356 # else /* !RT_ARCH_AMD64 */
    5357 #  if RT_INLINE_ASM_GNU_STYLE
    5358     RTCCUINTREG uDummy;
    5359     __asm__ __volatile__("divl %3"
    5360                          : "=a" (u32), "=d"(uDummy)
    5361                          : "A" (u64), "r" (u32));
    5362 #  else
    5363     __asm
    5364     {
    5365         mov     eax, dword ptr [u64]
    5366         mov     edx, dword ptr [u64 + 4]
    5367         mov     ecx, [u32]
    5368         div     ecx
    5369         mov     [u32], eax
    5370     }
    5371 #  endif
    5372     return u32;
    5373 # endif /* !RT_ARCH_AMD64 */
    5374 }
    5375 #endif
    5376 
    5377 
    5378 /**
    5379  * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
    5380  *
    5381  * @returns u64 / u32.
    5382  */
    5383 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
    5384 DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
    5385 #else
    5386 DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
    5387 {
    5388 # ifdef RT_ARCH_AMD64
    5389     return (int32_t)(i64 / i32);
    5390 # else /* !RT_ARCH_AMD64 */
    5391 #  if RT_INLINE_ASM_GNU_STYLE
    5392     RTCCUINTREG iDummy;
    5393     __asm__ __volatile__("idivl %3"
    5394                          : "=a" (i32), "=d"(iDummy)
    5395                          : "A" (i64), "r" (i32));
    5396 #  else
    5397     __asm
    5398     {
    5399         mov     eax, dword ptr [i64]
    5400         mov     edx, dword ptr [i64 + 4]
    5401         mov     ecx, [i32]
    5402         idiv    ecx
    5403         mov     [i32], eax
    5404     }
    5405 #  endif
    5406     return i32;
    5407 # endif /* !RT_ARCH_AMD64 */
    5408 }
    5409 #endif
    5410 
    5411 
    5412 /**
    5413  * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
    5414  * returning the rest.
    5415  *
    5416  * @returns u64 % u32.
    5417  *
    5418  * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
    5419  */
    5420 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
    5421 DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
    5422 #else
    5423 DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
    5424 {
    5425 # ifdef RT_ARCH_AMD64
    5426     return (uint32_t)(u64 % u32);
    5427 # else /* !RT_ARCH_AMD64 */
    5428 #  if RT_INLINE_ASM_GNU_STYLE
    5429     RTCCUINTREG uDummy;
    5430     __asm__ __volatile__("divl %3"
    5431                          : "=a" (uDummy), "=d"(u32)
    5432                          : "A" (u64), "r" (u32));
    5433 #  else
    5434     __asm
    5435     {
    5436         mov     eax, dword ptr [u64]
    5437         mov     edx, dword ptr [u64 + 4]
    5438         mov     ecx, [u32]
    5439         div     ecx
    5440         mov     [u32], edx
    5441     }
    5442 #  endif
    5443     return u32;
    5444 # endif /* !RT_ARCH_AMD64 */
    5445 }
    5446 #endif
    5447 
    5448 
    5449 /**
    5450  * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
    5451  * returning the rest.
    5452  *
    5453  * @returns u64 % u32.
    5454  *
    5455  * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
    5456  */
    5457 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
    5458 DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
    5459 #else
    5460 DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
    5461 {
    5462 # ifdef RT_ARCH_AMD64
    5463     return (int32_t)(i64 % i32);
    5464 # else /* !RT_ARCH_AMD64 */
    5465 #  if RT_INLINE_ASM_GNU_STYLE
    5466     RTCCUINTREG iDummy;
    5467     __asm__ __volatile__("idivl %3"
    5468                          : "=a" (iDummy), "=d"(i32)
    5469                          : "A" (i64), "r" (i32));
    5470 #  else
    5471     __asm
    5472     {
    5473         mov     eax, dword ptr [i64]
    5474         mov     edx, dword ptr [i64 + 4]
    5475         mov     ecx, [i32]
    5476         idiv    ecx
    5477         mov     [i32], edx
    5478     }
    5479 #  endif
    5480     return i32;
    5481 # endif /* !RT_ARCH_AMD64 */
    5482 }
    5483 #endif
    5484 
    5485 
    5486 /**
    5487  * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
    5488  * using a 96 bit intermediate result.
    5489  * @note    Don't use 64-bit C arithmetic here since some gcc compilers generate references to
    5490  *          __udivdi3 and __umoddi3 even if this inline function is not used.
    5491  *
    5492  * @returns (u64A * u32B) / u32C.
    5493  * @param   u64A    The 64-bit value.
    5494  * @param   u32B    The 32-bit value to multiple by A.
    5495  * @param   u32C    The 32-bit value to divide A*B by.
    5496  */
    5497 #if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
    5498 DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
    5499 #else
    5500 DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
    5501 {
    5502 # if RT_INLINE_ASM_GNU_STYLE
    5503 #  ifdef RT_ARCH_AMD64
    5504     uint64_t u64Result, u64Spill;
    5505     __asm__ __volatile__("mulq %2\n\t"
    5506                          "divq %3\n\t"
    5507                          : "=a" (u64Result),
    5508                            "=d" (u64Spill)
    5509                          : "r" ((uint64_t)u32B),
    5510                            "r" ((uint64_t)u32C),
    5511                            "0" (u64A),
    5512                            "1" (0));
    5513     return u64Result;
    5514 #  else
    5515     uint32_t u32Dummy;
    5516     uint64_t u64Result;
    5517     __asm__ __volatile__("mull %%ecx       \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
    5518                                                     edx = u64Lo.hi = (u64A.lo * u32B).hi */
    5519                          "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
    5520                                                     eax = u64A.hi */
    5521                          "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
    5522                                                     edx = u32C */
    5523                          "xchg %%edx,%%ecx \n\t" /* ecx = u32C
    5524                                                     edx = u32B */
    5525                          "mull %%edx       \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
    5526                                                     edx = u64Hi.hi = (u64A.hi * u32B).hi */
    5527                          "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
    5528                          "adcl $0,%%edx    \n\t" /* u64Hi.hi += carry */
    5529                          "divl %%ecx       \n\t" /* eax = u64Hi / u32C
    5530                                                     edx = u64Hi % u32C */
    5531                          "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
    5532                          "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
    5533                          "divl %%ecx       \n\t" /* u64Result.lo */
    5534                          "movl %%edi,%%edx \n\t" /* u64Result.hi */
    5535                          : "=A"(u64Result), "=c"(u32Dummy),
    5536                            "=S"(u32Dummy), "=D"(u32Dummy)
    5537                          : "a"((uint32_t)u64A),
    5538                            "S"((uint32_t)(u64A >> 32)),
    5539                            "c"(u32B),
    5540                            "D"(u32C));
    5541     return u64Result;
    5542 #  endif
    5543 # else
    5544     RTUINT64U   u;
    5545     uint64_t    u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
    5546     uint64_t    u64Hi = (uint64_t)(u64A >> 32)        * u32B;
    5547     u64Hi  += (u64Lo >> 32);
    5548     u.s.Hi = (uint32_t)(u64Hi / u32C);
    5549     u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
    5550     return u.u;
    5551 # endif
    55522709}
    55532710#endif
     
    68263983
    68273984/** @} */
    6828 #endif
    6829 
     3985
     3986/*
     3987 * Include the architecture specific header.
     3988 */
     3989/** @todo drop this bit and require the asm-x86.h to be included explicitly
     3990 *        instead... */
     3991# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
     3992#  include <iprt/asm-amd64-x86.h>
     3993# endif
     3994
     3995#endif
     3996
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette