Changeset 29245 in vbox
- Timestamp:
- May 9, 2010 3:06:56 PM (15 years ago)
- svn:sync-xref-src-repo-rev:
- 61327
- Location:
- trunk/include/iprt
- Files:
-
- 1 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/asm-amd64-x86.h
r29234 r29245 1 1 /** @file 2 * IPRT - A ssembly Functions.2 * IPRT - AMD64 and x86 Specific Assembly Functions. 3 3 */ 4 4 5 5 /* 6 * Copyright (C) 2006-20 07Oracle Corporation6 * Copyright (C) 2006-2010 Oracle Corporation 7 7 * 8 8 * This file is part of VirtualBox Open Source Edition (OSE), as … … 24 24 */ 25 25 26 #ifndef ___iprt_asm_h 27 #define ___iprt_asm_h 28 29 #include <iprt/cdefs.h> 30 #include <iprt/types.h> 31 #include <iprt/assert.h> 32 /** @todo @code #include <iprt/param.h> @endcode for PAGE_SIZE. */ 33 /** @def RT_INLINE_ASM_USES_INTRIN 34 * Defined as 1 if we're using a _MSC_VER 1400. 35 * Otherwise defined as 0. 36 */ 37 38 /* Solaris 10 header ugliness */ 39 #ifdef u 40 #undef u 41 #endif 26 #ifndef ___iprt_asm_amd64_x86_h 27 #define ___iprt_asm_amd64_x86_h 28 29 /* We depend on several defines and pragmas that live in iprt/asm.h. */ 30 #include <iprt/asm.h> 42 31 43 32 #ifdef _MSC_VER 44 # if _MSC_VER >= 1400 45 # define RT_INLINE_ASM_USES_INTRIN 1 46 # include <intrin.h> 33 # if _MSC_VER >= 1400 && RT_INLINE_ASM_USES_INTRIN 47 34 /* Emit the intrinsics at all optimization levels. */ 48 35 # pragma intrinsic(_ReadWriteBarrier) … … 67 54 # pragma intrinsic(__invlpg) 68 55 # pragma intrinsic(__wbinvd) 69 # pragma intrinsic(__stosd)70 # pragma intrinsic(__stosw)71 # pragma intrinsic(__stosb)72 56 # pragma intrinsic(__readcr0) 73 57 # pragma intrinsic(__readcr2) … … 79 63 # pragma intrinsic(__readdr) 80 64 # pragma intrinsic(__writedr) 81 # pragma intrinsic(_BitScanForward) 82 # pragma intrinsic(_BitScanReverse) 83 # pragma intrinsic(_bittest) 84 # pragma intrinsic(_bittestandset) 85 # pragma intrinsic(_bittestandreset) 86 # pragma intrinsic(_bittestandcomplement) 87 # pragma intrinsic(_byteswap_ushort) 88 # pragma intrinsic(_byteswap_ulong) 89 # pragma intrinsic(_interlockedbittestandset) 90 # pragma intrinsic(_interlockedbittestandreset) 91 # pragma intrinsic(_InterlockedAnd) 92 # pragma intrinsic(_InterlockedOr) 93 # pragma intrinsic(_InterlockedIncrement) 94 # pragma intrinsic(_InterlockedDecrement) 95 # pragma intrinsic(_InterlockedExchange) 96 # pragma intrinsic(_InterlockedExchangeAdd) 97 # pragma intrinsic(_InterlockedCompareExchange) 98 # pragma intrinsic(_InterlockedCompareExchange64) 99 # ifdef RT_ARCH_AMD64 100 # pragma intrinsic(_mm_mfence) 101 # pragma intrinsic(_mm_sfence) 102 # pragma intrinsic(_mm_lfence) 103 # pragma intrinsic(__stosq) 65 # ifdef RT_ARCH_AMD64 104 66 # pragma intrinsic(__readcr8) 105 67 # pragma intrinsic(__writecr8) 106 # pragma intrinsic(_byteswap_uint64) 107 # pragma intrinsic(_InterlockedExchange64) 108 # endif 109 # endif 110 #endif 111 #ifndef RT_INLINE_ASM_USES_INTRIN 112 # define RT_INLINE_ASM_USES_INTRIN 0 113 #endif 114 115 116 /** @defgroup grp_asm ASM - Assembly Routines 117 * @ingroup grp_rt 118 * 119 * @remarks The difference between ordered and unordered atomic operations are that 120 * the former will complete outstanding reads and writes before continuing 121 * while the latter doesn't make any promisses about the order. Ordered 122 * operations doesn't, it seems, make any 100% promise wrt to whether 123 * the operation will complete before any subsequent memory access. 124 * (please, correct if wrong.) 125 * 126 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething 127 * are unordered (note the Uo). 128 * 129 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder 130 * or even optimize assembler instructions away. For instance, in the following code 131 * the second rdmsr instruction is optimized away because gcc treats that instruction 132 * as deterministic: 133 * 134 * @code 135 * static inline uint64_t rdmsr_low(int idx) 136 * { 137 * uint32_t low; 138 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx"); 139 * } 140 * ... 141 * uint32_t msr1 = rdmsr_low(1); 142 * foo(msr1); 143 * msr1 = rdmsr_low(1); 144 * bar(msr1); 145 * @endcode 146 * 147 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will 148 * use the result of the first call as input parameter for bar() as well. For rdmsr this 149 * is not acceptable as this instruction is _not_ deterministic. This applies to reading 150 * machine status information in general. 151 * 68 # endif 69 # endif 70 #endif 71 72 73 74 /** @defgroup grp_rt_asm_amd64_x86 AMD64 and x86 Specific ASM Routines 75 * @ingroup grp_rt_asm 152 76 * @{ 153 77 */ 154 155 /** @def RT_INLINE_ASM_GCC_4_3_X_X86156 * Used to work around some 4.3.x register allocation issues in this version of157 * the compiler. So far this workaround is still required for 4.4 and 4.5. */158 #ifdef __GNUC__159 # define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))160 #endif161 #ifndef RT_INLINE_ASM_GCC_4_3_X_X86162 # define RT_INLINE_ASM_GCC_4_3_X_X86 0163 #endif164 165 /** @def RT_INLINE_DONT_USE_CMPXCHG8B166 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up167 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC168 * mode, x86.169 *170 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b171 * when in PIC mode on x86.172 */173 #ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC174 # define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \175 ( (defined(PIC) || defined(__PIC__)) \176 && defined(RT_ARCH_X86) \177 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \178 || defined(RT_OS_DARWIN)) )179 #endif180 181 /** @def RT_INLINE_ASM_EXTERNAL182 * Defined as 1 if the compiler does not support inline assembly.183 * The ASM* functions will then be implemented in an external .asm file.184 *185 * @remark At the present time it's unconfirmed whether or not Microsoft skipped186 * inline assembly in their AMD64 compiler.187 */188 #if defined(_MSC_VER) && defined(RT_ARCH_AMD64)189 # define RT_INLINE_ASM_EXTERNAL 1190 #else191 # define RT_INLINE_ASM_EXTERNAL 0192 #endif193 194 /** @def RT_INLINE_ASM_GNU_STYLE195 * Defined as 1 if the compiler understands GNU style inline assembly.196 */197 #if defined(_MSC_VER)198 # define RT_INLINE_ASM_GNU_STYLE 0199 #else200 # define RT_INLINE_ASM_GNU_STYLE 1201 #endif202 203 78 204 79 /** @todo find a more proper place for this structure? */ … … 224 99 } RTGDTR, *PRTGDTR; 225 100 #pragma pack() 226 227 228 /** @def ASMReturnAddress229 * Gets the return address of the current (or calling if you like) function or method.230 */231 #ifdef _MSC_VER232 # ifdef __cplusplus233 extern "C"234 # endif235 void * _ReturnAddress(void);236 # pragma intrinsic(_ReturnAddress)237 # define ASMReturnAddress() _ReturnAddress()238 #elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)239 # define ASMReturnAddress() __builtin_return_address(0)240 #else241 # error "Unsupported compiler."242 #endif243 101 244 102 … … 2204 2062 2205 2063 /** 2206 * Compiler memory barrier.2207 *2208 * Ensure that the compiler does not use any cached (register/tmp stack) memory2209 * values or any outstanding writes when returning from this function.2210 *2211 * This function must be used if non-volatile data is modified by a2212 * device or the VMM. Typical cases are port access, MMIO access,2213 * trapping instruction, etc.2214 */2215 #if RT_INLINE_ASM_GNU_STYLE2216 # define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)2217 #elif RT_INLINE_ASM_USES_INTRIN2218 # define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)2219 #else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */2220 DECLINLINE(void) ASMCompilerBarrier(void)2221 {2222 __asm2223 {2224 }2225 }2226 #endif2227 2228 2229 /**2230 2064 * Writes a 8-bit unsigned integer to an I/O port, ordered. 2231 2065 * … … 2636 2470 2637 2471 /** 2638 * Atomically Exchange an unsigned 8-bit value, ordered. 2639 * 2640 * @returns Current *pu8 value 2641 * @param pu8 Pointer to the 8-bit variable to update. 2642 * @param u8 The 8-bit value to assign to *pu8. 2472 * Invalidate page. 2473 * 2474 * @param pv Address of the page to invalidate. 2475 */ 2476 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 2477 DECLASM(void) ASMInvalidatePage(void *pv); 2478 #else 2479 DECLINLINE(void) ASMInvalidatePage(void *pv) 2480 { 2481 # if RT_INLINE_ASM_USES_INTRIN 2482 __invlpg(pv); 2483 2484 # elif RT_INLINE_ASM_GNU_STYLE 2485 __asm__ __volatile__("invlpg %0\n\t" 2486 : : "m" (*(uint8_t *)pv)); 2487 # else 2488 __asm 2489 { 2490 # ifdef RT_ARCH_AMD64 2491 mov rax, [pv] 2492 invlpg [rax] 2493 # else 2494 mov eax, [pv] 2495 invlpg [eax] 2496 # endif 2497 } 2498 # endif 2499 } 2500 #endif 2501 2502 2503 /** 2504 * Write back the internal caches and invalidate them. 2505 */ 2506 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 2507 DECLASM(void) ASMWriteBackAndInvalidateCaches(void); 2508 #else 2509 DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void) 2510 { 2511 # if RT_INLINE_ASM_USES_INTRIN 2512 __wbinvd(); 2513 2514 # elif RT_INLINE_ASM_GNU_STYLE 2515 __asm__ __volatile__("wbinvd"); 2516 # else 2517 __asm 2518 { 2519 wbinvd 2520 } 2521 # endif 2522 } 2523 #endif 2524 2525 2526 /** 2527 * Invalidate internal and (perhaps) external caches without first 2528 * flushing dirty cache lines. Use with extreme care. 2643 2529 */ 2644 2530 #if RT_INLINE_ASM_EXTERNAL 2645 DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8); 2646 #else 2647 DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8) 2648 { 2649 # if RT_INLINE_ASM_GNU_STYLE 2650 __asm__ __volatile__("xchgb %0, %1\n\t" 2651 : "=m" (*pu8), 2652 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */ 2653 : "1" (u8), 2654 "m" (*pu8)); 2655 # else 2656 __asm 2657 { 2658 # ifdef RT_ARCH_AMD64 2659 mov rdx, [pu8] 2660 mov al, [u8] 2661 xchg [rdx], al 2662 mov [u8], al 2663 # else 2664 mov edx, [pu8] 2665 mov al, [u8] 2666 xchg [edx], al 2667 mov [u8], al 2668 # endif 2669 } 2670 # endif 2671 return u8; 2672 } 2673 #endif 2674 2675 2676 /** 2677 * Atomically Exchange a signed 8-bit value, ordered. 2678 * 2679 * @returns Current *pu8 value 2680 * @param pi8 Pointer to the 8-bit variable to update. 2681 * @param i8 The 8-bit value to assign to *pi8. 2682 */ 2683 DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8) 2684 { 2685 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8); 2686 } 2687 2688 2689 /** 2690 * Atomically Exchange a bool value, ordered. 2691 * 2692 * @returns Current *pf value 2693 * @param pf Pointer to the 8-bit variable to update. 2694 * @param f The 8-bit value to assign to *pi8. 2695 */ 2696 DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f) 2697 { 2698 #ifdef _MSC_VER 2699 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f); 2700 #else 2701 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f); 2702 #endif 2703 } 2704 2705 2706 /** 2707 * Atomically Exchange an unsigned 16-bit value, ordered. 2708 * 2709 * @returns Current *pu16 value 2710 * @param pu16 Pointer to the 16-bit variable to update. 2711 * @param u16 The 16-bit value to assign to *pu16. 2712 */ 2713 #if RT_INLINE_ASM_EXTERNAL 2714 DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16); 2715 #else 2716 DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16) 2717 { 2718 # if RT_INLINE_ASM_GNU_STYLE 2719 __asm__ __volatile__("xchgw %0, %1\n\t" 2720 : "=m" (*pu16), 2721 "=r" (u16) 2722 : "1" (u16), 2723 "m" (*pu16)); 2724 # else 2725 __asm 2726 { 2727 # ifdef RT_ARCH_AMD64 2728 mov rdx, [pu16] 2729 mov ax, [u16] 2730 xchg [rdx], ax 2731 mov [u16], ax 2732 # else 2733 mov edx, [pu16] 2734 mov ax, [u16] 2735 xchg [edx], ax 2736 mov [u16], ax 2737 # endif 2738 } 2739 # endif 2740 return u16; 2741 } 2742 #endif 2743 2744 2745 /** 2746 * Atomically Exchange a signed 16-bit value, ordered. 2747 * 2748 * @returns Current *pu16 value 2749 * @param pi16 Pointer to the 16-bit variable to update. 2750 * @param i16 The 16-bit value to assign to *pi16. 2751 */ 2752 DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16) 2753 { 2754 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16); 2755 } 2756 2757 2758 /** 2759 * Atomically Exchange an unsigned 32-bit value, ordered. 2760 * 2761 * @returns Current *pu32 value 2762 * @param pu32 Pointer to the 32-bit variable to update. 2763 * @param u32 The 32-bit value to assign to *pu32. 2764 */ 2765 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 2766 DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32); 2767 #else 2768 DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32) 2769 { 2770 # if RT_INLINE_ASM_GNU_STYLE 2771 __asm__ __volatile__("xchgl %0, %1\n\t" 2772 : "=m" (*pu32), 2773 "=r" (u32) 2774 : "1" (u32), 2775 "m" (*pu32)); 2776 2777 # elif RT_INLINE_ASM_USES_INTRIN 2778 u32 = _InterlockedExchange((long *)pu32, u32); 2779 2780 # else 2781 __asm 2782 { 2783 # ifdef RT_ARCH_AMD64 2784 mov rdx, [pu32] 2785 mov eax, u32 2786 xchg [rdx], eax 2787 mov [u32], eax 2788 # else 2789 mov edx, [pu32] 2790 mov eax, u32 2791 xchg [edx], eax 2792 mov [u32], eax 2793 # endif 2794 } 2795 # endif 2796 return u32; 2797 } 2798 #endif 2799 2800 2801 /** 2802 * Atomically Exchange a signed 32-bit value, ordered. 2803 * 2804 * @returns Current *pu32 value 2805 * @param pi32 Pointer to the 32-bit variable to update. 2806 * @param i32 The 32-bit value to assign to *pi32. 2807 */ 2808 DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32) 2809 { 2810 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32); 2811 } 2812 2813 2814 /** 2815 * Atomically Exchange an unsigned 64-bit value, ordered. 2816 * 2817 * @returns Current *pu64 value 2818 * @param pu64 Pointer to the 64-bit variable to update. 2819 * @param u64 The 64-bit value to assign to *pu64. 2820 */ 2821 #if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \ 2822 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 2823 DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64); 2824 #else 2825 DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64) 2826 { 2827 # if defined(RT_ARCH_AMD64) 2828 # if RT_INLINE_ASM_USES_INTRIN 2829 u64 = _InterlockedExchange64((__int64 *)pu64, u64); 2830 2831 # elif RT_INLINE_ASM_GNU_STYLE 2832 __asm__ __volatile__("xchgq %0, %1\n\t" 2833 : "=m" (*pu64), 2834 "=r" (u64) 2835 : "1" (u64), 2836 "m" (*pu64)); 2837 # else 2838 __asm 2839 { 2840 mov rdx, [pu64] 2841 mov rax, [u64] 2842 xchg [rdx], rax 2843 mov [u64], rax 2844 } 2845 # endif 2846 # else /* !RT_ARCH_AMD64 */ 2847 # if RT_INLINE_ASM_GNU_STYLE 2848 # if defined(PIC) || defined(__PIC__) 2849 uint32_t u32EBX = (uint32_t)u64; 2850 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/ 2851 "xchgl %%ebx, %3\n\t" 2852 "1:\n\t" 2853 "lock; cmpxchg8b (%5)\n\t" 2854 "jnz 1b\n\t" 2855 "movl %3, %%ebx\n\t" 2856 /*"xchgl %%esi, %5\n\t"*/ 2857 : "=A" (u64), 2858 "=m" (*pu64) 2859 : "0" (*pu64), 2860 "m" ( u32EBX ), 2861 "c" ( (uint32_t)(u64 >> 32) ), 2862 "S" (pu64)); 2863 # else /* !PIC */ 2864 __asm__ __volatile__("1:\n\t" 2865 "lock; cmpxchg8b %1\n\t" 2866 "jnz 1b\n\t" 2867 : "=A" (u64), 2868 "=m" (*pu64) 2869 : "0" (*pu64), 2870 "b" ( (uint32_t)u64 ), 2871 "c" ( (uint32_t)(u64 >> 32) )); 2872 # endif 2873 # else 2874 __asm 2875 { 2876 mov ebx, dword ptr [u64] 2877 mov ecx, dword ptr [u64 + 4] 2878 mov edi, pu64 2879 mov eax, dword ptr [edi] 2880 mov edx, dword ptr [edi + 4] 2881 retry: 2882 lock cmpxchg8b [edi] 2883 jnz retry 2884 mov dword ptr [u64], eax 2885 mov dword ptr [u64 + 4], edx 2886 } 2887 # endif 2888 # endif /* !RT_ARCH_AMD64 */ 2889 return u64; 2890 } 2891 #endif 2892 2893 2894 /** 2895 * Atomically Exchange an signed 64-bit value, ordered. 2896 * 2897 * @returns Current *pi64 value 2898 * @param pi64 Pointer to the 64-bit variable to update. 2899 * @param i64 The 64-bit value to assign to *pi64. 2900 */ 2901 DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64) 2902 { 2903 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64); 2904 } 2905 2906 2907 /** 2908 * Atomically Exchange a pointer value, ordered. 2909 * 2910 * @returns Current *ppv value 2911 * @param ppv Pointer to the pointer variable to update. 2912 * @param pv The pointer value to assign to *ppv. 2913 */ 2914 DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv) 2915 { 2916 #if ARCH_BITS == 32 2917 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv); 2918 #elif ARCH_BITS == 64 2919 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv); 2920 #else 2921 # error "ARCH_BITS is bogus" 2922 #endif 2923 } 2924 2925 2926 /** 2927 * Atomically Exchange a raw-mode context pointer value, ordered. 2928 * 2929 * @returns Current *ppv value 2930 * @param ppvRC Pointer to the pointer variable to update. 2931 * @param pvRC The pointer value to assign to *ppv. 2932 */ 2933 DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC) 2934 { 2935 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC); 2936 } 2937 2938 2939 /** 2940 * Atomically Exchange a ring-0 pointer value, ordered. 2941 * 2942 * @returns Current *ppv value 2943 * @param ppvR0 Pointer to the pointer variable to update. 2944 * @param pvR0 The pointer value to assign to *ppv. 2945 */ 2946 DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0) 2947 { 2948 #if R0_ARCH_BITS == 32 2949 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0); 2950 #elif R0_ARCH_BITS == 64 2951 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0); 2952 #else 2953 # error "R0_ARCH_BITS is bogus" 2954 #endif 2955 } 2956 2957 2958 /** 2959 * Atomically Exchange a ring-3 pointer value, ordered. 2960 * 2961 * @returns Current *ppv value 2962 * @param ppvR3 Pointer to the pointer variable to update. 2963 * @param pvR3 The pointer value to assign to *ppv. 2964 */ 2965 DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3) 2966 { 2967 #if R3_ARCH_BITS == 32 2968 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3); 2969 #elif R3_ARCH_BITS == 64 2970 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3); 2971 #else 2972 # error "R3_ARCH_BITS is bogus" 2973 #endif 2974 } 2975 2976 2977 /** @def ASMAtomicXchgHandle 2978 * Atomically Exchange a typical IPRT handle value, ordered. 2979 * 2980 * @param ph Pointer to the value to update. 2981 * @param hNew The new value to assigned to *pu. 2982 * @param phRes Where to store the current *ph value. 2983 * 2984 * @remarks This doesn't currently work for all handles (like RTFILE). 2985 */ 2986 #if HC_ARCH_BITS == 32 2987 # define ASMAtomicXchgHandle(ph, hNew, phRes) \ 2988 do { \ 2989 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \ 2990 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \ 2991 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \ 2992 } while (0) 2993 #elif HC_ARCH_BITS == 64 2994 # define ASMAtomicXchgHandle(ph, hNew, phRes) \ 2995 do { \ 2996 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \ 2997 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \ 2998 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \ 2999 } while (0) 3000 #else 3001 # error HC_ARCH_BITS 3002 #endif 3003 3004 3005 /** 3006 * Atomically Exchange a value which size might differ 3007 * between platforms or compilers, ordered. 3008 * 3009 * @param pu Pointer to the variable to update. 3010 * @param uNew The value to assign to *pu. 3011 * @todo This is busted as its missing the result argument. 3012 */ 3013 #define ASMAtomicXchgSize(pu, uNew) \ 3014 do { \ 3015 switch (sizeof(*(pu))) { \ 3016 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \ 3017 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \ 3018 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \ 3019 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \ 3020 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \ 3021 } \ 3022 } while (0) 3023 3024 /** 3025 * Atomically Exchange a value which size might differ 3026 * between platforms or compilers, ordered. 3027 * 3028 * @param pu Pointer to the variable to update. 3029 * @param uNew The value to assign to *pu. 3030 * @param puRes Where to store the current *pu value. 3031 */ 3032 #define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \ 3033 do { \ 3034 switch (sizeof(*(pu))) { \ 3035 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \ 3036 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \ 3037 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \ 3038 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \ 3039 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \ 3040 } \ 3041 } while (0) 3042 3043 3044 3045 /** 3046 * Atomically Compare and Exchange an unsigned 8-bit value, ordered. 3047 * 3048 * @returns true if xchg was done. 3049 * @returns false if xchg wasn't done. 3050 * 3051 * @param pu8 Pointer to the value to update. 3052 * @param u8New The new value to assigned to *pu8. 3053 * @param u8Old The old value to *pu8 compare with. 3054 */ 3055 #if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE 3056 DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old); 3057 #else 3058 DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old) 3059 { 3060 uint8_t u8Ret; 3061 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t" 3062 "setz %1\n\t" 3063 : "=m" (*pu8), 3064 "=qm" (u8Ret), 3065 "=a" (u8Old) 3066 : "q" (u8New), 3067 "2" (u8Old), 3068 "m" (*pu8)); 3069 return (bool)u8Ret; 3070 } 3071 #endif 3072 3073 3074 /** 3075 * Atomically Compare and Exchange a signed 8-bit value, ordered. 3076 * 3077 * @returns true if xchg was done. 3078 * @returns false if xchg wasn't done. 3079 * 3080 * @param pi8 Pointer to the value to update. 3081 * @param i8New The new value to assigned to *pi8. 3082 * @param i8Old The old value to *pi8 compare with. 3083 */ 3084 DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old) 3085 { 3086 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old); 3087 } 3088 3089 3090 /** 3091 * Atomically Compare and Exchange a bool value, ordered. 3092 * 3093 * @returns true if xchg was done. 3094 * @returns false if xchg wasn't done. 3095 * 3096 * @param pf Pointer to the value to update. 3097 * @param fNew The new value to assigned to *pf. 3098 * @param fOld The old value to *pf compare with. 3099 */ 3100 DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld) 3101 { 3102 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld); 3103 } 3104 3105 3106 /** 3107 * Atomically Compare and Exchange an unsigned 32-bit value, ordered. 3108 * 3109 * @returns true if xchg was done. 3110 * @returns false if xchg wasn't done. 3111 * 3112 * @param pu32 Pointer to the value to update. 3113 * @param u32New The new value to assigned to *pu32. 3114 * @param u32Old The old value to *pu32 compare with. 3115 */ 3116 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 3117 DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old); 3118 #else 3119 DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old) 3120 { 3121 # if RT_INLINE_ASM_GNU_STYLE 3122 uint8_t u8Ret; 3123 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t" 3124 "setz %1\n\t" 3125 : "=m" (*pu32), 3126 "=qm" (u8Ret), 3127 "=a" (u32Old) 3128 : "r" (u32New), 3129 "2" (u32Old), 3130 "m" (*pu32)); 3131 return (bool)u8Ret; 3132 3133 # elif RT_INLINE_ASM_USES_INTRIN 3134 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old; 3135 3136 # else 3137 uint32_t u32Ret; 3138 __asm 3139 { 3140 # ifdef RT_ARCH_AMD64 3141 mov rdx, [pu32] 3142 # else 3143 mov edx, [pu32] 3144 # endif 3145 mov eax, [u32Old] 3146 mov ecx, [u32New] 3147 # ifdef RT_ARCH_AMD64 3148 lock cmpxchg [rdx], ecx 3149 # else 3150 lock cmpxchg [edx], ecx 3151 # endif 3152 setz al 3153 movzx eax, al 3154 mov [u32Ret], eax 3155 } 3156 return !!u32Ret; 3157 # endif 3158 } 3159 #endif 3160 3161 3162 /** 3163 * Atomically Compare and Exchange a signed 32-bit value, ordered. 3164 * 3165 * @returns true if xchg was done. 3166 * @returns false if xchg wasn't done. 3167 * 3168 * @param pi32 Pointer to the value to update. 3169 * @param i32New The new value to assigned to *pi32. 3170 * @param i32Old The old value to *pi32 compare with. 3171 */ 3172 DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old) 3173 { 3174 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old); 3175 } 3176 3177 3178 /** 3179 * Atomically Compare and exchange an unsigned 64-bit value, ordered. 3180 * 3181 * @returns true if xchg was done. 3182 * @returns false if xchg wasn't done. 3183 * 3184 * @param pu64 Pointer to the 64-bit variable to update. 3185 * @param u64New The 64-bit value to assign to *pu64. 3186 * @param u64Old The value to compare with. 3187 */ 3188 #if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \ 3189 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 3190 DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old); 3191 #else 3192 DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old) 3193 { 3194 # if RT_INLINE_ASM_USES_INTRIN 3195 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old; 3196 3197 # elif defined(RT_ARCH_AMD64) 3198 # if RT_INLINE_ASM_GNU_STYLE 3199 uint8_t u8Ret; 3200 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t" 3201 "setz %1\n\t" 3202 : "=m" (*pu64), 3203 "=qm" (u8Ret), 3204 "=a" (u64Old) 3205 : "r" (u64New), 3206 "2" (u64Old), 3207 "m" (*pu64)); 3208 return (bool)u8Ret; 3209 # else 3210 bool fRet; 3211 __asm 3212 { 3213 mov rdx, [pu32] 3214 mov rax, [u64Old] 3215 mov rcx, [u64New] 3216 lock cmpxchg [rdx], rcx 3217 setz al 3218 mov [fRet], al 3219 } 3220 return fRet; 3221 # endif 3222 # else /* !RT_ARCH_AMD64 */ 3223 uint32_t u32Ret; 3224 # if RT_INLINE_ASM_GNU_STYLE 3225 # if defined(PIC) || defined(__PIC__) 3226 uint32_t u32EBX = (uint32_t)u64New; 3227 uint32_t u32Spill; 3228 __asm__ __volatile__("xchgl %%ebx, %4\n\t" 3229 "lock; cmpxchg8b (%6)\n\t" 3230 "setz %%al\n\t" 3231 "movl %4, %%ebx\n\t" 3232 "movzbl %%al, %%eax\n\t" 3233 : "=a" (u32Ret), 3234 "=d" (u32Spill), 3235 # if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403 3236 "+m" (*pu64) 3237 # else 3238 "=m" (*pu64) 3239 # endif 3240 : "A" (u64Old), 3241 "m" ( u32EBX ), 3242 "c" ( (uint32_t)(u64New >> 32) ), 3243 "S" (pu64)); 3244 # else /* !PIC */ 3245 uint32_t u32Spill; 3246 __asm__ __volatile__("lock; cmpxchg8b %2\n\t" 3247 "setz %%al\n\t" 3248 "movzbl %%al, %%eax\n\t" 3249 : "=a" (u32Ret), 3250 "=d" (u32Spill), 3251 "+m" (*pu64) 3252 : "A" (u64Old), 3253 "b" ( (uint32_t)u64New ), 3254 "c" ( (uint32_t)(u64New >> 32) )); 3255 # endif 3256 return (bool)u32Ret; 3257 # else 3258 __asm 3259 { 3260 mov ebx, dword ptr [u64New] 3261 mov ecx, dword ptr [u64New + 4] 3262 mov edi, [pu64] 3263 mov eax, dword ptr [u64Old] 3264 mov edx, dword ptr [u64Old + 4] 3265 lock cmpxchg8b [edi] 3266 setz al 3267 movzx eax, al 3268 mov dword ptr [u32Ret], eax 3269 } 3270 return !!u32Ret; 3271 # endif 3272 # endif /* !RT_ARCH_AMD64 */ 3273 } 3274 #endif 3275 3276 3277 /** 3278 * Atomically Compare and exchange a signed 64-bit value, ordered. 3279 * 3280 * @returns true if xchg was done. 3281 * @returns false if xchg wasn't done. 3282 * 3283 * @param pi64 Pointer to the 64-bit variable to update. 3284 * @param i64 The 64-bit value to assign to *pu64. 3285 * @param i64Old The value to compare with. 3286 */ 3287 DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old) 3288 { 3289 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old); 3290 } 3291 3292 3293 /** 3294 * Atomically Compare and Exchange a pointer value, ordered. 3295 * 3296 * @returns true if xchg was done. 3297 * @returns false if xchg wasn't done. 3298 * 3299 * @param ppv Pointer to the value to update. 3300 * @param pvNew The new value to assigned to *ppv. 3301 * @param pvOld The old value to *ppv compare with. 3302 */ 3303 DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld) 3304 { 3305 #if ARCH_BITS == 32 3306 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld); 3307 #elif ARCH_BITS == 64 3308 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld); 3309 #else 3310 # error "ARCH_BITS is bogus" 3311 #endif 3312 } 3313 3314 3315 /** @def ASMAtomicCmpXchgHandle 3316 * Atomically Compare and Exchange a typical IPRT handle value, ordered. 3317 * 3318 * @param ph Pointer to the value to update. 3319 * @param hNew The new value to assigned to *pu. 3320 * @param hOld The old value to *pu compare with. 3321 * @param fRc Where to store the result. 3322 * 3323 * @remarks This doesn't currently work for all handles (like RTFILE). 3324 */ 3325 #if HC_ARCH_BITS == 32 3326 # define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \ 3327 do { \ 3328 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \ 3329 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \ 3330 } while (0) 3331 #elif HC_ARCH_BITS == 64 3332 # define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \ 3333 do { \ 3334 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \ 3335 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \ 3336 } while (0) 3337 #else 3338 # error HC_ARCH_BITS 3339 #endif 3340 3341 3342 /** @def ASMAtomicCmpXchgSize 3343 * Atomically Compare and Exchange a value which size might differ 3344 * between platforms or compilers, ordered. 3345 * 3346 * @param pu Pointer to the value to update. 3347 * @param uNew The new value to assigned to *pu. 3348 * @param uOld The old value to *pu compare with. 3349 * @param fRc Where to store the result. 3350 */ 3351 #define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \ 3352 do { \ 3353 switch (sizeof(*(pu))) { \ 3354 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \ 3355 break; \ 3356 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \ 3357 break; \ 3358 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \ 3359 (fRc) = false; \ 3360 break; \ 3361 } \ 3362 } while (0) 3363 3364 3365 /** 3366 * Atomically Compare and Exchange an unsigned 32-bit value, additionally 3367 * passes back old value, ordered. 3368 * 3369 * @returns true if xchg was done. 3370 * @returns false if xchg wasn't done. 3371 * 3372 * @param pu32 Pointer to the value to update. 3373 * @param u32New The new value to assigned to *pu32. 3374 * @param u32Old The old value to *pu32 compare with. 3375 * @param pu32Old Pointer store the old value at. 3376 */ 3377 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 3378 DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old); 3379 #else 3380 DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old) 3381 { 3382 # if RT_INLINE_ASM_GNU_STYLE 3383 uint8_t u8Ret; 3384 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t" 3385 "setz %1\n\t" 3386 : "=m" (*pu32), 3387 "=qm" (u8Ret), 3388 "=a" (*pu32Old) 3389 : "r" (u32New), 3390 "a" (u32Old), 3391 "m" (*pu32)); 3392 return (bool)u8Ret; 3393 3394 # elif RT_INLINE_ASM_USES_INTRIN 3395 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old; 3396 3397 # else 3398 uint32_t u32Ret; 3399 __asm 3400 { 3401 # ifdef RT_ARCH_AMD64 3402 mov rdx, [pu32] 3403 # else 3404 mov edx, [pu32] 3405 # endif 3406 mov eax, [u32Old] 3407 mov ecx, [u32New] 3408 # ifdef RT_ARCH_AMD64 3409 lock cmpxchg [rdx], ecx 3410 mov rdx, [pu32Old] 3411 mov [rdx], eax 3412 # else 3413 lock cmpxchg [edx], ecx 3414 mov edx, [pu32Old] 3415 mov [edx], eax 3416 # endif 3417 setz al 3418 movzx eax, al 3419 mov [u32Ret], eax 3420 } 3421 return !!u32Ret; 3422 # endif 3423 } 3424 #endif 3425 3426 3427 /** 3428 * Atomically Compare and Exchange a signed 32-bit value, additionally 3429 * passes back old value, ordered. 3430 * 3431 * @returns true if xchg was done. 3432 * @returns false if xchg wasn't done. 3433 * 3434 * @param pi32 Pointer to the value to update. 3435 * @param i32New The new value to assigned to *pi32. 3436 * @param i32Old The old value to *pi32 compare with. 3437 * @param pi32Old Pointer store the old value at. 3438 */ 3439 DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old) 3440 { 3441 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old); 3442 } 3443 3444 3445 /** 3446 * Atomically Compare and exchange an unsigned 64-bit value, additionally 3447 * passing back old value, ordered. 3448 * 3449 * @returns true if xchg was done. 3450 * @returns false if xchg wasn't done. 3451 * 3452 * @param pu64 Pointer to the 64-bit variable to update. 3453 * @param u64New The 64-bit value to assign to *pu64. 3454 * @param u64Old The value to compare with. 3455 * @param pu64Old Pointer store the old value at. 3456 */ 3457 #if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \ 3458 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 3459 DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old); 3460 #else 3461 DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old) 3462 { 3463 # if RT_INLINE_ASM_USES_INTRIN 3464 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old; 3465 3466 # elif defined(RT_ARCH_AMD64) 3467 # if RT_INLINE_ASM_GNU_STYLE 3468 uint8_t u8Ret; 3469 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t" 3470 "setz %1\n\t" 3471 : "=m" (*pu64), 3472 "=qm" (u8Ret), 3473 "=a" (*pu64Old) 3474 : "r" (u64New), 3475 "a" (u64Old), 3476 "m" (*pu64)); 3477 return (bool)u8Ret; 3478 # else 3479 bool fRet; 3480 __asm 3481 { 3482 mov rdx, [pu32] 3483 mov rax, [u64Old] 3484 mov rcx, [u64New] 3485 lock cmpxchg [rdx], rcx 3486 mov rdx, [pu64Old] 3487 mov [rdx], rax 3488 setz al 3489 mov [fRet], al 3490 } 3491 return fRet; 3492 # endif 3493 # else /* !RT_ARCH_AMD64 */ 3494 # if RT_INLINE_ASM_GNU_STYLE 3495 uint64_t u64Ret; 3496 # if defined(PIC) || defined(__PIC__) 3497 /* NB: this code uses a memory clobber description, because the clean 3498 * solution with an output value for *pu64 makes gcc run out of registers. 3499 * This will cause suboptimal code, and anyone with a better solution is 3500 * welcome to improve this. */ 3501 __asm__ __volatile__("xchgl %%ebx, %1\n\t" 3502 "lock; cmpxchg8b %3\n\t" 3503 "xchgl %%ebx, %1\n\t" 3504 : "=A" (u64Ret) 3505 : "DS" ((uint32_t)u64New), 3506 "c" ((uint32_t)(u64New >> 32)), 3507 "m" (*pu64), 3508 "0" (u64Old) 3509 : "memory" ); 3510 # else /* !PIC */ 3511 __asm__ __volatile__("lock; cmpxchg8b %4\n\t" 3512 : "=A" (u64Ret), 3513 "=m" (*pu64) 3514 : "b" ((uint32_t)u64New), 3515 "c" ((uint32_t)(u64New >> 32)), 3516 "m" (*pu64), 3517 "0" (u64Old)); 3518 # endif 3519 *pu64Old = u64Ret; 3520 return u64Ret == u64Old; 3521 # else 3522 uint32_t u32Ret; 3523 __asm 3524 { 3525 mov ebx, dword ptr [u64New] 3526 mov ecx, dword ptr [u64New + 4] 3527 mov edi, [pu64] 3528 mov eax, dword ptr [u64Old] 3529 mov edx, dword ptr [u64Old + 4] 3530 lock cmpxchg8b [edi] 3531 mov ebx, [pu64Old] 3532 mov [ebx], eax 3533 setz al 3534 movzx eax, al 3535 add ebx, 4 3536 mov [ebx], edx 3537 mov dword ptr [u32Ret], eax 3538 } 3539 return !!u32Ret; 3540 # endif 3541 # endif /* !RT_ARCH_AMD64 */ 3542 } 3543 #endif 3544 3545 3546 /** 3547 * Atomically Compare and exchange a signed 64-bit value, additionally 3548 * passing back old value, ordered. 3549 * 3550 * @returns true if xchg was done. 3551 * @returns false if xchg wasn't done. 3552 * 3553 * @param pi64 Pointer to the 64-bit variable to update. 3554 * @param i64 The 64-bit value to assign to *pu64. 3555 * @param i64Old The value to compare with. 3556 * @param pi64Old Pointer store the old value at. 3557 */ 3558 DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old) 3559 { 3560 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old); 3561 } 3562 3563 /** @def ASMAtomicCmpXchgExHandle 3564 * Atomically Compare and Exchange a typical IPRT handle value, ordered. 3565 * 3566 * @param ph Pointer to the value to update. 3567 * @param hNew The new value to assigned to *pu. 3568 * @param hOld The old value to *pu compare with. 3569 * @param fRc Where to store the result. 3570 * @param phOldVal Pointer to where to store the old value. 3571 * 3572 * @remarks This doesn't currently work for all handles (like RTFILE). 3573 */ 3574 #if HC_ARCH_BITS == 32 3575 # define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \ 3576 do { \ 3577 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \ 3578 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \ 3579 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \ 3580 } while (0) 3581 #elif HC_ARCH_BITS == 64 3582 # define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \ 3583 do { \ 3584 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \ 3585 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \ 3586 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \ 3587 } while (0) 3588 #else 3589 # error HC_ARCH_BITS 3590 #endif 3591 3592 3593 /** @def ASMAtomicCmpXchgExSize 3594 * Atomically Compare and Exchange a value which size might differ 3595 * between platforms or compilers. Additionally passes back old value. 3596 * 3597 * @param pu Pointer to the value to update. 3598 * @param uNew The new value to assigned to *pu. 3599 * @param uOld The old value to *pu compare with. 3600 * @param fRc Where to store the result. 3601 * @param puOldVal Pointer to where to store the old value. 3602 */ 3603 #define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \ 3604 do { \ 3605 switch (sizeof(*(pu))) { \ 3606 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \ 3607 break; \ 3608 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \ 3609 break; \ 3610 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \ 3611 (fRc) = false; \ 3612 (uOldVal) = 0; \ 3613 break; \ 3614 } \ 3615 } while (0) 3616 3617 3618 /** 3619 * Atomically Compare and Exchange a pointer value, additionally 3620 * passing back old value, ordered. 3621 * 3622 * @returns true if xchg was done. 3623 * @returns false if xchg wasn't done. 3624 * 3625 * @param ppv Pointer to the value to update. 3626 * @param pvNew The new value to assigned to *ppv. 3627 * @param pvOld The old value to *ppv compare with. 3628 * @param ppvOld Pointer store the old value at. 3629 */ 3630 DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld) 3631 { 3632 #if ARCH_BITS == 32 3633 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld); 3634 #elif ARCH_BITS == 64 3635 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld); 3636 #else 3637 # error "ARCH_BITS is bogus" 3638 #endif 3639 } 3640 3641 3642 /** 3643 * Atomically exchanges and adds to a 32-bit value, ordered. 3644 * 3645 * @returns The old value. 3646 * @param pu32 Pointer to the value. 3647 * @param u32 Number to add. 3648 */ 3649 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 3650 DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32); 3651 #else 3652 DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32) 3653 { 3654 # if RT_INLINE_ASM_USES_INTRIN 3655 u32 = _InterlockedExchangeAdd((long *)pu32, u32); 3656 return u32; 3657 3658 # elif RT_INLINE_ASM_GNU_STYLE 3659 __asm__ __volatile__("lock; xaddl %0, %1\n\t" 3660 : "=r" (u32), 3661 "=m" (*pu32) 3662 : "0" (u32), 3663 "m" (*pu32) 3664 : "memory"); 3665 return u32; 3666 # else 3667 __asm 3668 { 3669 mov eax, [u32] 3670 # ifdef RT_ARCH_AMD64 3671 mov rdx, [pu32] 3672 lock xadd [rdx], eax 3673 # else 3674 mov edx, [pu32] 3675 lock xadd [edx], eax 3676 # endif 3677 mov [u32], eax 3678 } 3679 return u32; 3680 # endif 3681 } 3682 #endif 3683 3684 3685 /** 3686 * Atomically exchanges and adds to a signed 32-bit value, ordered. 3687 * 3688 * @returns The old value. 3689 * @param pi32 Pointer to the value. 3690 * @param i32 Number to add. 3691 */ 3692 DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32) 3693 { 3694 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32); 3695 } 3696 3697 3698 /** 3699 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered. 3700 * 3701 * @returns The old value. 3702 * @param pu32 Pointer to the value. 3703 * @param u32 Number to subtract. 3704 */ 3705 DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32) 3706 { 3707 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32); 3708 } 3709 3710 3711 /** 3712 * Atomically exchanges and subtracts to a signed 32-bit value, ordered. 3713 * 3714 * @returns The old value. 3715 * @param pi32 Pointer to the value. 3716 * @param i32 Number to subtract. 3717 */ 3718 DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32) 3719 { 3720 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32); 3721 } 3722 3723 3724 /** 3725 * Atomically increment a 32-bit value, ordered. 3726 * 3727 * @returns The new value. 3728 * @param pu32 Pointer to the value to increment. 3729 */ 3730 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 3731 DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32); 3732 #else 3733 DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32) 3734 { 3735 uint32_t u32; 3736 # if RT_INLINE_ASM_USES_INTRIN 3737 u32 = _InterlockedIncrement((long *)pu32); 3738 return u32; 3739 3740 # elif RT_INLINE_ASM_GNU_STYLE 3741 __asm__ __volatile__("lock; xaddl %0, %1\n\t" 3742 : "=r" (u32), 3743 "=m" (*pu32) 3744 : "0" (1), 3745 "m" (*pu32) 3746 : "memory"); 3747 return u32+1; 3748 # else 3749 __asm 3750 { 3751 mov eax, 1 3752 # ifdef RT_ARCH_AMD64 3753 mov rdx, [pu32] 3754 lock xadd [rdx], eax 3755 # else 3756 mov edx, [pu32] 3757 lock xadd [edx], eax 3758 # endif 3759 mov u32, eax 3760 } 3761 return u32+1; 3762 # endif 3763 } 3764 #endif 3765 3766 3767 /** 3768 * Atomically increment a signed 32-bit value, ordered. 3769 * 3770 * @returns The new value. 3771 * @param pi32 Pointer to the value to increment. 3772 */ 3773 DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32) 3774 { 3775 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32); 3776 } 3777 3778 3779 /** 3780 * Atomically decrement an unsigned 32-bit value, ordered. 3781 * 3782 * @returns The new value. 3783 * @param pu32 Pointer to the value to decrement. 3784 */ 3785 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 3786 DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32); 3787 #else 3788 DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32) 3789 { 3790 uint32_t u32; 3791 # if RT_INLINE_ASM_USES_INTRIN 3792 u32 = _InterlockedDecrement((long *)pu32); 3793 return u32; 3794 3795 # elif RT_INLINE_ASM_GNU_STYLE 3796 __asm__ __volatile__("lock; xaddl %0, %1\n\t" 3797 : "=r" (u32), 3798 "=m" (*pu32) 3799 : "0" (-1), 3800 "m" (*pu32) 3801 : "memory"); 3802 return u32-1; 3803 # else 3804 __asm 3805 { 3806 mov eax, -1 3807 # ifdef RT_ARCH_AMD64 3808 mov rdx, [pu32] 3809 lock xadd [rdx], eax 3810 # else 3811 mov edx, [pu32] 3812 lock xadd [edx], eax 3813 # endif 3814 mov u32, eax 3815 } 3816 return u32-1; 3817 # endif 3818 } 3819 #endif 3820 3821 3822 /** 3823 * Atomically decrement a signed 32-bit value, ordered. 3824 * 3825 * @returns The new value. 3826 * @param pi32 Pointer to the value to decrement. 3827 */ 3828 DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32) 3829 { 3830 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32); 3831 } 3832 3833 3834 /** 3835 * Atomically Or an unsigned 32-bit value, ordered. 3836 * 3837 * @param pu32 Pointer to the pointer variable to OR u32 with. 3838 * @param u32 The value to OR *pu32 with. 3839 */ 3840 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 3841 DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32); 3842 #else 3843 DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32) 3844 { 3845 # if RT_INLINE_ASM_USES_INTRIN 3846 _InterlockedOr((long volatile *)pu32, (long)u32); 3847 3848 # elif RT_INLINE_ASM_GNU_STYLE 3849 __asm__ __volatile__("lock; orl %1, %0\n\t" 3850 : "=m" (*pu32) 3851 : "ir" (u32), 3852 "m" (*pu32)); 3853 # else 3854 __asm 3855 { 3856 mov eax, [u32] 3857 # ifdef RT_ARCH_AMD64 3858 mov rdx, [pu32] 3859 lock or [rdx], eax 3860 # else 3861 mov edx, [pu32] 3862 lock or [edx], eax 3863 # endif 3864 } 3865 # endif 3866 } 3867 #endif 3868 3869 3870 /** 3871 * Atomically Or a signed 32-bit value, ordered. 3872 * 3873 * @param pi32 Pointer to the pointer variable to OR u32 with. 3874 * @param i32 The value to OR *pu32 with. 3875 */ 3876 DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32) 3877 { 3878 ASMAtomicOrU32((uint32_t volatile *)pi32, i32); 3879 } 3880 3881 3882 /** 3883 * Atomically And an unsigned 32-bit value, ordered. 3884 * 3885 * @param pu32 Pointer to the pointer variable to AND u32 with. 3886 * @param u32 The value to AND *pu32 with. 3887 */ 3888 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 3889 DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32); 3890 #else 3891 DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32) 3892 { 3893 # if RT_INLINE_ASM_USES_INTRIN 3894 _InterlockedAnd((long volatile *)pu32, u32); 3895 3896 # elif RT_INLINE_ASM_GNU_STYLE 3897 __asm__ __volatile__("lock; andl %1, %0\n\t" 3898 : "=m" (*pu32) 3899 : "ir" (u32), 3900 "m" (*pu32)); 3901 # else 3902 __asm 3903 { 3904 mov eax, [u32] 3905 # ifdef RT_ARCH_AMD64 3906 mov rdx, [pu32] 3907 lock and [rdx], eax 3908 # else 3909 mov edx, [pu32] 3910 lock and [edx], eax 3911 # endif 3912 } 3913 # endif 3914 } 3915 #endif 3916 3917 3918 /** 3919 * Atomically And a signed 32-bit value, ordered. 3920 * 3921 * @param pi32 Pointer to the pointer variable to AND i32 with. 3922 * @param i32 The value to AND *pi32 with. 3923 */ 3924 DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32) 3925 { 3926 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32); 3927 } 3928 3929 3930 /** 3931 * Serialize Instruction. 3932 */ 3933 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 3934 DECLASM(void) ASMSerializeInstruction(void); 3935 #else 3936 DECLINLINE(void) ASMSerializeInstruction(void) 3937 { 3938 # if RT_INLINE_ASM_GNU_STYLE 3939 RTCCUINTREG xAX = 0; 3940 # ifdef RT_ARCH_AMD64 3941 __asm__ ("cpuid" 3942 : "=a" (xAX) 3943 : "0" (xAX) 3944 : "rbx", "rcx", "rdx"); 3945 # elif (defined(PIC) || defined(__PIC__)) && defined(__i386__) 3946 __asm__ ("push %%ebx\n\t" 3947 "cpuid\n\t" 3948 "pop %%ebx\n\t" 3949 : "=a" (xAX) 3950 : "0" (xAX) 3951 : "ecx", "edx"); 3952 # else 3953 __asm__ ("cpuid" 3954 : "=a" (xAX) 3955 : "0" (xAX) 3956 : "ebx", "ecx", "edx"); 3957 # endif 3958 3959 # elif RT_INLINE_ASM_USES_INTRIN 3960 int aInfo[4]; 3961 __cpuid(aInfo, 0); 3962 3963 # else 3964 __asm 3965 { 3966 push ebx 3967 xor eax, eax 3968 cpuid 3969 pop ebx 2531 DECLASM(void) ASMInvalidateInternalCaches(void); 2532 #else 2533 DECLINLINE(void) ASMInvalidateInternalCaches(void) 2534 { 2535 # if RT_INLINE_ASM_GNU_STYLE 2536 __asm__ __volatile__("invd"); 2537 # else 2538 __asm 2539 { 2540 invd 3970 2541 } 3971 2542 # endif … … 4037 2608 4038 2609 4039 /** 4040 * Memory fence, waits for any pending writes and reads to complete. 4041 */ 4042 DECLINLINE(void) ASMMemoryFence(void) 4043 { 4044 /** @todo use mfence? check if all cpus we care for support it. */ 4045 uint32_t volatile u32; 4046 ASMAtomicXchgU32(&u32, 0); 4047 } 4048 4049 4050 /** 4051 * Write fence, waits for any pending writes to complete. 4052 */ 4053 DECLINLINE(void) ASMWriteFence(void) 4054 { 4055 /** @todo use sfence? check if all cpus we care for support it. */ 4056 ASMMemoryFence(); 4057 } 4058 4059 4060 /** 4061 * Read fence, waits for any pending reads to complete. 4062 */ 4063 DECLINLINE(void) ASMReadFence(void) 4064 { 4065 /** @todo use lfence? check if all cpus we care for support it. */ 4066 ASMMemoryFence(); 4067 } 4068 4069 4070 /** 4071 * Atomically reads an unsigned 8-bit value, ordered. 4072 * 4073 * @returns Current *pu8 value 4074 * @param pu8 Pointer to the 8-bit variable to read. 4075 */ 4076 DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8) 4077 { 4078 ASMMemoryFence(); 4079 return *pu8; /* byte reads are atomic on x86 */ 4080 } 4081 4082 4083 /** 4084 * Atomically reads an unsigned 8-bit value, unordered. 4085 * 4086 * @returns Current *pu8 value 4087 * @param pu8 Pointer to the 8-bit variable to read. 4088 */ 4089 DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8) 4090 { 4091 return *pu8; /* byte reads are atomic on x86 */ 4092 } 4093 4094 4095 /** 4096 * Atomically reads a signed 8-bit value, ordered. 4097 * 4098 * @returns Current *pi8 value 4099 * @param pi8 Pointer to the 8-bit variable to read. 4100 */ 4101 DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8) 4102 { 4103 ASMMemoryFence(); 4104 return *pi8; /* byte reads are atomic on x86 */ 4105 } 4106 4107 4108 /** 4109 * Atomically reads a signed 8-bit value, unordered. 4110 * 4111 * @returns Current *pi8 value 4112 * @param pi8 Pointer to the 8-bit variable to read. 4113 */ 4114 DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8) 4115 { 4116 return *pi8; /* byte reads are atomic on x86 */ 4117 } 4118 4119 4120 /** 4121 * Atomically reads an unsigned 16-bit value, ordered. 4122 * 4123 * @returns Current *pu16 value 4124 * @param pu16 Pointer to the 16-bit variable to read. 4125 */ 4126 DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16) 4127 { 4128 ASMMemoryFence(); 4129 Assert(!((uintptr_t)pu16 & 1)); 4130 return *pu16; 4131 } 4132 4133 4134 /** 4135 * Atomically reads an unsigned 16-bit value, unordered. 4136 * 4137 * @returns Current *pu16 value 4138 * @param pu16 Pointer to the 16-bit variable to read. 4139 */ 4140 DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16) 4141 { 4142 Assert(!((uintptr_t)pu16 & 1)); 4143 return *pu16; 4144 } 4145 4146 4147 /** 4148 * Atomically reads a signed 16-bit value, ordered. 4149 * 4150 * @returns Current *pi16 value 4151 * @param pi16 Pointer to the 16-bit variable to read. 4152 */ 4153 DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16) 4154 { 4155 ASMMemoryFence(); 4156 Assert(!((uintptr_t)pi16 & 1)); 4157 return *pi16; 4158 } 4159 4160 4161 /** 4162 * Atomically reads a signed 16-bit value, unordered. 4163 * 4164 * @returns Current *pi16 value 4165 * @param pi16 Pointer to the 16-bit variable to read. 4166 */ 4167 DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16) 4168 { 4169 Assert(!((uintptr_t)pi16 & 1)); 4170 return *pi16; 4171 } 4172 4173 4174 /** 4175 * Atomically reads an unsigned 32-bit value, ordered. 4176 * 4177 * @returns Current *pu32 value 4178 * @param pu32 Pointer to the 32-bit variable to read. 4179 */ 4180 DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32) 4181 { 4182 ASMMemoryFence(); 4183 Assert(!((uintptr_t)pu32 & 3)); 4184 return *pu32; 4185 } 4186 4187 4188 /** 4189 * Atomically reads an unsigned 32-bit value, unordered. 4190 * 4191 * @returns Current *pu32 value 4192 * @param pu32 Pointer to the 32-bit variable to read. 4193 */ 4194 DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32) 4195 { 4196 Assert(!((uintptr_t)pu32 & 3)); 4197 return *pu32; 4198 } 4199 4200 4201 /** 4202 * Atomically reads a signed 32-bit value, ordered. 4203 * 4204 * @returns Current *pi32 value 4205 * @param pi32 Pointer to the 32-bit variable to read. 4206 */ 4207 DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32) 4208 { 4209 ASMMemoryFence(); 4210 Assert(!((uintptr_t)pi32 & 3)); 4211 return *pi32; 4212 } 4213 4214 4215 /** 4216 * Atomically reads a signed 32-bit value, unordered. 4217 * 4218 * @returns Current *pi32 value 4219 * @param pi32 Pointer to the 32-bit variable to read. 4220 */ 4221 DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32) 4222 { 4223 Assert(!((uintptr_t)pi32 & 3)); 4224 return *pi32; 4225 } 4226 4227 4228 /** 4229 * Atomically reads an unsigned 64-bit value, ordered. 4230 * 4231 * @returns Current *pu64 value 4232 * @param pu64 Pointer to the 64-bit variable to read. 4233 * The memory pointed to must be writable. 4234 * @remark This will fault if the memory is read-only! 4235 */ 4236 #if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \ 4237 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 4238 DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64); 4239 #else 4240 DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64) 4241 { 4242 uint64_t u64; 4243 # ifdef RT_ARCH_AMD64 4244 Assert(!((uintptr_t)pu64 & 7)); 4245 /*# if RT_INLINE_ASM_GNU_STYLE 4246 __asm__ __volatile__( "mfence\n\t" 4247 "movq %1, %0\n\t" 4248 : "=r" (u64) 4249 : "m" (*pu64)); 4250 # else 4251 __asm 4252 { 4253 mfence 4254 mov rdx, [pu64] 4255 mov rax, [rdx] 4256 mov [u64], rax 4257 } 4258 # endif*/ 4259 ASMMemoryFence(); 4260 u64 = *pu64; 4261 # else /* !RT_ARCH_AMD64 */ 4262 # if RT_INLINE_ASM_GNU_STYLE 4263 # if defined(PIC) || defined(__PIC__) 4264 uint32_t u32EBX = 0; 4265 Assert(!((uintptr_t)pu64 & 7)); 4266 __asm__ __volatile__("xchgl %%ebx, %3\n\t" 4267 "lock; cmpxchg8b (%5)\n\t" 4268 "movl %3, %%ebx\n\t" 4269 : "=A" (u64), 4270 # if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403 4271 "+m" (*pu64) 4272 # else 4273 "=m" (*pu64) 4274 # endif 4275 : "0" (0), 4276 "m" (u32EBX), 4277 "c" (0), 4278 "S" (pu64)); 4279 # else /* !PIC */ 4280 __asm__ __volatile__("lock; cmpxchg8b %1\n\t" 4281 : "=A" (u64), 4282 "+m" (*pu64) 4283 : "0" (0), 4284 "b" (0), 4285 "c" (0)); 4286 # endif 4287 # else 4288 Assert(!((uintptr_t)pu64 & 7)); 4289 __asm 4290 { 4291 xor eax, eax 4292 xor edx, edx 4293 mov edi, pu64 4294 xor ecx, ecx 4295 xor ebx, ebx 4296 lock cmpxchg8b [edi] 4297 mov dword ptr [u64], eax 4298 mov dword ptr [u64 + 4], edx 4299 } 4300 # endif 4301 # endif /* !RT_ARCH_AMD64 */ 4302 return u64; 4303 } 4304 #endif 4305 4306 4307 /** 4308 * Atomically reads an unsigned 64-bit value, unordered. 4309 * 4310 * @returns Current *pu64 value 4311 * @param pu64 Pointer to the 64-bit variable to read. 4312 * The memory pointed to must be writable. 4313 * @remark This will fault if the memory is read-only! 4314 */ 4315 #if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \ 4316 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 4317 DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64); 4318 #else 4319 DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64) 4320 { 4321 uint64_t u64; 4322 # ifdef RT_ARCH_AMD64 4323 Assert(!((uintptr_t)pu64 & 7)); 4324 /*# if RT_INLINE_ASM_GNU_STYLE 4325 Assert(!((uintptr_t)pu64 & 7)); 4326 __asm__ __volatile__("movq %1, %0\n\t" 4327 : "=r" (u64) 4328 : "m" (*pu64)); 4329 # else 4330 __asm 4331 { 4332 mov rdx, [pu64] 4333 mov rax, [rdx] 4334 mov [u64], rax 4335 } 4336 # endif */ 4337 u64 = *pu64; 4338 # else /* !RT_ARCH_AMD64 */ 4339 # if RT_INLINE_ASM_GNU_STYLE 4340 # if defined(PIC) || defined(__PIC__) 4341 uint32_t u32EBX = 0; 4342 uint32_t u32Spill; 4343 Assert(!((uintptr_t)pu64 & 7)); 4344 __asm__ __volatile__("xor %%eax,%%eax\n\t" 4345 "xor %%ecx,%%ecx\n\t" 4346 "xor %%edx,%%edx\n\t" 4347 "xchgl %%ebx, %3\n\t" 4348 "lock; cmpxchg8b (%4)\n\t" 4349 "movl %3, %%ebx\n\t" 4350 : "=A" (u64), 4351 # if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403 4352 "+m" (*pu64), 4353 # else 4354 "=m" (*pu64), 4355 # endif 4356 "=c" (u32Spill) 4357 : "m" (u32EBX), 4358 "S" (pu64)); 4359 # else /* !PIC */ 4360 __asm__ __volatile__("lock; cmpxchg8b %1\n\t" 4361 : "=A" (u64), 4362 "+m" (*pu64) 4363 : "0" (0), 4364 "b" (0), 4365 "c" (0)); 4366 # endif 4367 # else 4368 Assert(!((uintptr_t)pu64 & 7)); 4369 __asm 4370 { 4371 xor eax, eax 4372 xor edx, edx 4373 mov edi, pu64 4374 xor ecx, ecx 4375 xor ebx, ebx 4376 lock cmpxchg8b [edi] 4377 mov dword ptr [u64], eax 4378 mov dword ptr [u64 + 4], edx 4379 } 4380 # endif 4381 # endif /* !RT_ARCH_AMD64 */ 4382 return u64; 4383 } 4384 #endif 4385 4386 4387 /** 4388 * Atomically reads a signed 64-bit value, ordered. 4389 * 4390 * @returns Current *pi64 value 4391 * @param pi64 Pointer to the 64-bit variable to read. 4392 * The memory pointed to must be writable. 4393 * @remark This will fault if the memory is read-only! 4394 */ 4395 DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64) 4396 { 4397 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64); 4398 } 4399 4400 4401 /** 4402 * Atomically reads a signed 64-bit value, unordered. 4403 * 4404 * @returns Current *pi64 value 4405 * @param pi64 Pointer to the 64-bit variable to read. 4406 * The memory pointed to must be writable. 4407 * @remark This will fault if the memory is read-only! 4408 */ 4409 DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64) 4410 { 4411 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64); 4412 } 4413 4414 4415 /** 4416 * Atomically reads a pointer value, ordered. 4417 * 4418 * @returns Current *pv value 4419 * @param ppv Pointer to the pointer variable to read. 4420 */ 4421 DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv) 4422 { 4423 #if ARCH_BITS == 32 4424 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv); 4425 #elif ARCH_BITS == 64 4426 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv); 4427 #else 4428 # error "ARCH_BITS is bogus" 4429 #endif 4430 } 4431 4432 4433 /** 4434 * Atomically reads a pointer value, unordered. 4435 * 4436 * @returns Current *pv value 4437 * @param ppv Pointer to the pointer variable to read. 4438 */ 4439 DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv) 4440 { 4441 #if ARCH_BITS == 32 4442 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv); 4443 #elif ARCH_BITS == 64 4444 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv); 4445 #else 4446 # error "ARCH_BITS is bogus" 4447 #endif 4448 } 4449 4450 4451 /** 4452 * Atomically reads a boolean value, ordered. 4453 * 4454 * @returns Current *pf value 4455 * @param pf Pointer to the boolean variable to read. 4456 */ 4457 DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf) 4458 { 4459 ASMMemoryFence(); 4460 return *pf; /* byte reads are atomic on x86 */ 4461 } 4462 4463 4464 /** 4465 * Atomically reads a boolean value, unordered. 4466 * 4467 * @returns Current *pf value 4468 * @param pf Pointer to the boolean variable to read. 4469 */ 4470 DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf) 4471 { 4472 return *pf; /* byte reads are atomic on x86 */ 4473 } 4474 4475 4476 /** 4477 * Atomically read a typical IPRT handle value, ordered. 4478 * 4479 * @param ph Pointer to the handle variable to read. 4480 * @param phRes Where to store the result. 4481 * 4482 * @remarks This doesn't currently work for all handles (like RTFILE). 4483 */ 4484 #if HC_ARCH_BITS == 32 4485 # define ASMAtomicReadHandle(ph, phRes) \ 4486 do { \ 4487 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \ 4488 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \ 4489 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \ 4490 } while (0) 4491 #elif HC_ARCH_BITS == 64 4492 # define ASMAtomicReadHandle(ph, phRes) \ 4493 do { \ 4494 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \ 4495 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \ 4496 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \ 4497 } while (0) 4498 #else 4499 # error HC_ARCH_BITS 4500 #endif 4501 4502 4503 /** 4504 * Atomically read a typical IPRT handle value, unordered. 4505 * 4506 * @param ph Pointer to the handle variable to read. 4507 * @param phRes Where to store the result. 4508 * 4509 * @remarks This doesn't currently work for all handles (like RTFILE). 4510 */ 4511 #if HC_ARCH_BITS == 32 4512 # define ASMAtomicUoReadHandle(ph, phRes) \ 4513 do { \ 4514 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \ 4515 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \ 4516 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \ 4517 } while (0) 4518 #elif HC_ARCH_BITS == 64 4519 # define ASMAtomicUoReadHandle(ph, phRes) \ 4520 do { \ 4521 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \ 4522 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \ 4523 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \ 4524 } while (0) 4525 #else 4526 # error HC_ARCH_BITS 4527 #endif 4528 4529 4530 /** 4531 * Atomically read a value which size might differ 4532 * between platforms or compilers, ordered. 4533 * 4534 * @param pu Pointer to the variable to update. 4535 * @param puRes Where to store the result. 4536 */ 4537 #define ASMAtomicReadSize(pu, puRes) \ 4538 do { \ 4539 switch (sizeof(*(pu))) { \ 4540 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \ 4541 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \ 4542 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \ 4543 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \ 4544 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \ 4545 } \ 4546 } while (0) 4547 4548 4549 /** 4550 * Atomically read a value which size might differ 4551 * between platforms or compilers, unordered. 4552 * 4553 * @param pu Pointer to the variable to read. 4554 * @param puRes Where to store the result. 4555 */ 4556 #define ASMAtomicUoReadSize(pu, puRes) \ 4557 do { \ 4558 switch (sizeof(*(pu))) { \ 4559 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \ 4560 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \ 4561 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \ 4562 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \ 4563 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \ 4564 } \ 4565 } while (0) 4566 4567 4568 /** 4569 * Atomically writes an unsigned 8-bit value, ordered. 4570 * 4571 * @param pu8 Pointer to the 8-bit variable. 4572 * @param u8 The 8-bit value to assign to *pu8. 4573 */ 4574 DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8) 4575 { 4576 ASMAtomicXchgU8(pu8, u8); 4577 } 4578 4579 4580 /** 4581 * Atomically writes an unsigned 8-bit value, unordered. 4582 * 4583 * @param pu8 Pointer to the 8-bit variable. 4584 * @param u8 The 8-bit value to assign to *pu8. 4585 */ 4586 DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8) 4587 { 4588 *pu8 = u8; /* byte writes are atomic on x86 */ 4589 } 4590 4591 4592 /** 4593 * Atomically writes a signed 8-bit value, ordered. 4594 * 4595 * @param pi8 Pointer to the 8-bit variable to read. 4596 * @param i8 The 8-bit value to assign to *pi8. 4597 */ 4598 DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8) 4599 { 4600 ASMAtomicXchgS8(pi8, i8); 4601 } 4602 4603 4604 /** 4605 * Atomically writes a signed 8-bit value, unordered. 4606 * 4607 * @param pi8 Pointer to the 8-bit variable to read. 4608 * @param i8 The 8-bit value to assign to *pi8. 4609 */ 4610 DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8) 4611 { 4612 *pi8 = i8; /* byte writes are atomic on x86 */ 4613 } 4614 4615 4616 /** 4617 * Atomically writes an unsigned 16-bit value, ordered. 4618 * 4619 * @param pu16 Pointer to the 16-bit variable. 4620 * @param u16 The 16-bit value to assign to *pu16. 4621 */ 4622 DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16) 4623 { 4624 ASMAtomicXchgU16(pu16, u16); 4625 } 4626 4627 4628 /** 4629 * Atomically writes an unsigned 16-bit value, unordered. 4630 * 4631 * @param pu16 Pointer to the 16-bit variable. 4632 * @param u16 The 16-bit value to assign to *pu16. 4633 */ 4634 DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16) 4635 { 4636 Assert(!((uintptr_t)pu16 & 1)); 4637 *pu16 = u16; 4638 } 4639 4640 4641 /** 4642 * Atomically writes a signed 16-bit value, ordered. 4643 * 4644 * @param pi16 Pointer to the 16-bit variable to read. 4645 * @param i16 The 16-bit value to assign to *pi16. 4646 */ 4647 DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16) 4648 { 4649 ASMAtomicXchgS16(pi16, i16); 4650 } 4651 4652 4653 /** 4654 * Atomically writes a signed 16-bit value, unordered. 4655 * 4656 * @param pi16 Pointer to the 16-bit variable to read. 4657 * @param i16 The 16-bit value to assign to *pi16. 4658 */ 4659 DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16) 4660 { 4661 Assert(!((uintptr_t)pi16 & 1)); 4662 *pi16 = i16; 4663 } 4664 4665 4666 /** 4667 * Atomically writes an unsigned 32-bit value, ordered. 4668 * 4669 * @param pu32 Pointer to the 32-bit variable. 4670 * @param u32 The 32-bit value to assign to *pu32. 4671 */ 4672 DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32) 4673 { 4674 ASMAtomicXchgU32(pu32, u32); 4675 } 4676 4677 4678 /** 4679 * Atomically writes an unsigned 32-bit value, unordered. 4680 * 4681 * @param pu32 Pointer to the 32-bit variable. 4682 * @param u32 The 32-bit value to assign to *pu32. 4683 */ 4684 DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32) 4685 { 4686 Assert(!((uintptr_t)pu32 & 3)); 4687 *pu32 = u32; 4688 } 4689 4690 4691 /** 4692 * Atomically writes a signed 32-bit value, ordered. 4693 * 4694 * @param pi32 Pointer to the 32-bit variable to read. 4695 * @param i32 The 32-bit value to assign to *pi32. 4696 */ 4697 DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32) 4698 { 4699 ASMAtomicXchgS32(pi32, i32); 4700 } 4701 4702 4703 /** 4704 * Atomically writes a signed 32-bit value, unordered. 4705 * 4706 * @param pi32 Pointer to the 32-bit variable to read. 4707 * @param i32 The 32-bit value to assign to *pi32. 4708 */ 4709 DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32) 4710 { 4711 Assert(!((uintptr_t)pi32 & 3)); 4712 *pi32 = i32; 4713 } 4714 4715 4716 /** 4717 * Atomically writes an unsigned 64-bit value, ordered. 4718 * 4719 * @param pu64 Pointer to the 64-bit variable. 4720 * @param u64 The 64-bit value to assign to *pu64. 4721 */ 4722 DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64) 4723 { 4724 ASMAtomicXchgU64(pu64, u64); 4725 } 4726 4727 4728 /** 4729 * Atomically writes an unsigned 64-bit value, unordered. 4730 * 4731 * @param pu64 Pointer to the 64-bit variable. 4732 * @param u64 The 64-bit value to assign to *pu64. 4733 */ 4734 DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64) 4735 { 4736 Assert(!((uintptr_t)pu64 & 7)); 4737 #if ARCH_BITS == 64 4738 *pu64 = u64; 4739 #else 4740 ASMAtomicXchgU64(pu64, u64); 4741 #endif 4742 } 4743 4744 4745 /** 4746 * Atomically writes a signed 64-bit value, ordered. 4747 * 4748 * @param pi64 Pointer to the 64-bit variable. 4749 * @param i64 The 64-bit value to assign to *pi64. 4750 */ 4751 DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64) 4752 { 4753 ASMAtomicXchgS64(pi64, i64); 4754 } 4755 4756 4757 /** 4758 * Atomically writes a signed 64-bit value, unordered. 4759 * 4760 * @param pi64 Pointer to the 64-bit variable. 4761 * @param i64 The 64-bit value to assign to *pi64. 4762 */ 4763 DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64) 4764 { 4765 Assert(!((uintptr_t)pi64 & 7)); 4766 #if ARCH_BITS == 64 4767 *pi64 = i64; 4768 #else 4769 ASMAtomicXchgS64(pi64, i64); 4770 #endif 4771 } 4772 4773 4774 /** 4775 * Atomically writes a boolean value, unordered. 4776 * 4777 * @param pf Pointer to the boolean variable. 4778 * @param f The boolean value to assign to *pf. 4779 */ 4780 DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f) 4781 { 4782 ASMAtomicWriteU8((uint8_t volatile *)pf, f); 4783 } 4784 4785 4786 /** 4787 * Atomically writes a boolean value, unordered. 4788 * 4789 * @param pf Pointer to the boolean variable. 4790 * @param f The boolean value to assign to *pf. 4791 */ 4792 DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f) 4793 { 4794 *pf = f; /* byte writes are atomic on x86 */ 4795 } 4796 4797 4798 /** 4799 * Atomically writes a pointer value, ordered. 4800 * 4801 * @returns Current *pv value 4802 * @param ppv Pointer to the pointer variable. 4803 * @param pv The pointer value to assigne to *ppv. 4804 */ 4805 DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv) 4806 { 4807 #if ARCH_BITS == 32 4808 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv); 4809 #elif ARCH_BITS == 64 4810 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv); 4811 #else 4812 # error "ARCH_BITS is bogus" 4813 #endif 4814 } 4815 4816 4817 /** 4818 * Atomically writes a pointer value, unordered. 4819 * 4820 * @returns Current *pv value 4821 * @param ppv Pointer to the pointer variable. 4822 * @param pv The pointer value to assigne to *ppv. 4823 */ 4824 DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv) 4825 { 4826 #if ARCH_BITS == 32 4827 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv); 4828 #elif ARCH_BITS == 64 4829 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv); 4830 #else 4831 # error "ARCH_BITS is bogus" 4832 #endif 4833 } 4834 4835 4836 /** 4837 * Atomically write a typical IPRT handle value, ordered. 4838 * 4839 * @param ph Pointer to the variable to update. 4840 * @param hNew The value to assign to *ph. 4841 * 4842 * @remarks This doesn't currently work for all handles (like RTFILE). 4843 */ 4844 #if HC_ARCH_BITS == 32 4845 # define ASMAtomicWriteHandle(ph, hNew) \ 4846 do { \ 4847 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \ 4848 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \ 4849 } while (0) 4850 #elif HC_ARCH_BITS == 64 4851 # define ASMAtomicWriteHandle(ph, hNew) \ 4852 do { \ 4853 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \ 4854 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \ 4855 } while (0) 4856 #else 4857 # error HC_ARCH_BITS 4858 #endif 4859 4860 4861 /** 4862 * Atomically write a typical IPRT handle value, unordered. 4863 * 4864 * @param ph Pointer to the variable to update. 4865 * @param hNew The value to assign to *ph. 4866 * 4867 * @remarks This doesn't currently work for all handles (like RTFILE). 4868 */ 4869 #if HC_ARCH_BITS == 32 4870 # define ASMAtomicUoWriteHandle(ph, hNew) \ 4871 do { \ 4872 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \ 4873 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \ 4874 } while (0) 4875 #elif HC_ARCH_BITS == 64 4876 # define ASMAtomicUoWriteHandle(ph, hNew) \ 4877 do { \ 4878 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \ 4879 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \ 4880 } while (0) 4881 #else 4882 # error HC_ARCH_BITS 4883 #endif 4884 4885 4886 /** 4887 * Atomically write a value which size might differ 4888 * between platforms or compilers, ordered. 4889 * 4890 * @param pu Pointer to the variable to update. 4891 * @param uNew The value to assign to *pu. 4892 */ 4893 #define ASMAtomicWriteSize(pu, uNew) \ 4894 do { \ 4895 switch (sizeof(*(pu))) { \ 4896 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \ 4897 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \ 4898 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \ 4899 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \ 4900 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \ 4901 } \ 4902 } while (0) 4903 4904 /** 4905 * Atomically write a value which size might differ 4906 * between platforms or compilers, unordered. 4907 * 4908 * @param pu Pointer to the variable to update. 4909 * @param uNew The value to assign to *pu. 4910 */ 4911 #define ASMAtomicUoWriteSize(pu, uNew) \ 4912 do { \ 4913 switch (sizeof(*(pu))) { \ 4914 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \ 4915 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \ 4916 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \ 4917 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \ 4918 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \ 4919 } \ 4920 } while (0) 4921 4922 4923 4924 4925 /** 4926 * Invalidate page. 4927 * 4928 * @param pv Address of the page to invalidate. 4929 */ 4930 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 4931 DECLASM(void) ASMInvalidatePage(void *pv); 4932 #else 4933 DECLINLINE(void) ASMInvalidatePage(void *pv) 4934 { 4935 # if RT_INLINE_ASM_USES_INTRIN 4936 __invlpg(pv); 4937 4938 # elif RT_INLINE_ASM_GNU_STYLE 4939 __asm__ __volatile__("invlpg %0\n\t" 4940 : : "m" (*(uint8_t *)pv)); 4941 # else 4942 __asm 4943 { 4944 # ifdef RT_ARCH_AMD64 4945 mov rax, [pv] 4946 invlpg [rax] 4947 # else 4948 mov eax, [pv] 4949 invlpg [eax] 4950 # endif 4951 } 4952 # endif 4953 } 4954 #endif 4955 4956 4957 /** 4958 * Write back the internal caches and invalidate them. 4959 */ 4960 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 4961 DECLASM(void) ASMWriteBackAndInvalidateCaches(void); 4962 #else 4963 DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void) 4964 { 4965 # if RT_INLINE_ASM_USES_INTRIN 4966 __wbinvd(); 4967 4968 # elif RT_INLINE_ASM_GNU_STYLE 4969 __asm__ __volatile__("wbinvd"); 4970 # else 4971 __asm 4972 { 4973 wbinvd 4974 } 4975 # endif 4976 } 4977 #endif 4978 4979 4980 /** 4981 * Invalidate internal and (perhaps) external caches without first 4982 * flushing dirty cache lines. Use with extreme care. 4983 */ 4984 #if RT_INLINE_ASM_EXTERNAL 4985 DECLASM(void) ASMInvalidateInternalCaches(void); 4986 #else 4987 DECLINLINE(void) ASMInvalidateInternalCaches(void) 4988 { 4989 # if RT_INLINE_ASM_GNU_STYLE 4990 __asm__ __volatile__("invd"); 4991 # else 4992 __asm 4993 { 4994 invd 4995 } 4996 # endif 4997 } 4998 #endif 4999 5000 5001 #if defined(PAGE_SIZE) && !defined(NT_INCLUDED) 5002 # if PAGE_SIZE != 0x1000 5003 # error "PAGE_SIZE is not 0x1000!" 5004 # endif 5005 #endif 5006 5007 /** 5008 * Zeros a 4K memory page. 5009 * 5010 * @param pv Pointer to the memory block. This must be page aligned. 5011 */ 5012 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 5013 DECLASM(void) ASMMemZeroPage(volatile void *pv); 5014 # else 5015 DECLINLINE(void) ASMMemZeroPage(volatile void *pv) 5016 { 5017 # if RT_INLINE_ASM_USES_INTRIN 5018 # ifdef RT_ARCH_AMD64 5019 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8); 5020 # else 5021 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4); 5022 # endif 5023 5024 # elif RT_INLINE_ASM_GNU_STYLE 5025 RTCCUINTREG uDummy; 5026 # ifdef RT_ARCH_AMD64 5027 __asm__ __volatile__("rep stosq" 5028 : "=D" (pv), 5029 "=c" (uDummy) 5030 : "0" (pv), 5031 "c" (0x1000 >> 3), 5032 "a" (0) 5033 : "memory"); 5034 # else 5035 __asm__ __volatile__("rep stosl" 5036 : "=D" (pv), 5037 "=c" (uDummy) 5038 : "0" (pv), 5039 "c" (0x1000 >> 2), 5040 "a" (0) 5041 : "memory"); 5042 # endif 5043 # else 5044 __asm 5045 { 5046 # ifdef RT_ARCH_AMD64 5047 xor rax, rax 5048 mov ecx, 0200h 5049 mov rdi, [pv] 5050 rep stosq 5051 # else 5052 xor eax, eax 5053 mov ecx, 0400h 5054 mov edi, [pv] 5055 rep stosd 5056 # endif 5057 } 5058 # endif 5059 } 5060 # endif 5061 5062 5063 /** 5064 * Zeros a memory block with a 32-bit aligned size. 5065 * 5066 * @param pv Pointer to the memory block. 5067 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit! 5068 */ 5069 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 5070 DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb); 5071 #else 5072 DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb) 5073 { 5074 # if RT_INLINE_ASM_USES_INTRIN 5075 # ifdef RT_ARCH_AMD64 5076 if (!(cb & 7)) 5077 __stosq((unsigned __int64 *)pv, 0, cb / 8); 5078 else 5079 # endif 5080 __stosd((unsigned long *)pv, 0, cb / 4); 5081 5082 # elif RT_INLINE_ASM_GNU_STYLE 5083 __asm__ __volatile__("rep stosl" 5084 : "=D" (pv), 5085 "=c" (cb) 5086 : "0" (pv), 5087 "1" (cb >> 2), 5088 "a" (0) 5089 : "memory"); 5090 # else 5091 __asm 5092 { 5093 xor eax, eax 5094 # ifdef RT_ARCH_AMD64 5095 mov rcx, [cb] 5096 shr rcx, 2 5097 mov rdi, [pv] 5098 # else 5099 mov ecx, [cb] 5100 shr ecx, 2 5101 mov edi, [pv] 5102 # endif 5103 rep stosd 5104 } 5105 # endif 5106 } 5107 #endif 5108 5109 5110 /** 5111 * Fills a memory block with a 32-bit aligned size. 5112 * 5113 * @param pv Pointer to the memory block. 5114 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit! 5115 * @param u32 The value to fill with. 5116 */ 5117 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 5118 DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32); 5119 #else 5120 DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32) 5121 { 5122 # if RT_INLINE_ASM_USES_INTRIN 5123 # ifdef RT_ARCH_AMD64 5124 if (!(cb & 7)) 5125 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8); 5126 else 5127 # endif 5128 __stosd((unsigned long *)pv, u32, cb / 4); 5129 5130 # elif RT_INLINE_ASM_GNU_STYLE 5131 __asm__ __volatile__("rep stosl" 5132 : "=D" (pv), 5133 "=c" (cb) 5134 : "0" (pv), 5135 "1" (cb >> 2), 5136 "a" (u32) 5137 : "memory"); 5138 # else 5139 __asm 5140 { 5141 # ifdef RT_ARCH_AMD64 5142 mov rcx, [cb] 5143 shr rcx, 2 5144 mov rdi, [pv] 5145 # else 5146 mov ecx, [cb] 5147 shr ecx, 2 5148 mov edi, [pv] 5149 # endif 5150 mov eax, [u32] 5151 rep stosd 5152 } 5153 # endif 5154 } 5155 #endif 5156 5157 5158 /** 5159 * Checks if a memory page is all zeros. 5160 * 5161 * @returns true / false. 5162 * 5163 * @param pvPage Pointer to the page. Must be aligned on 16 byte 5164 * boundrary 5165 */ 5166 DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage) 5167 { 5168 # if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */ 5169 union { RTCCUINTREG r; bool f; } uAX; 5170 RTCCUINTREG xCX, xDI; 5171 Assert(!((uintptr_t)pvPage & 15)); 5172 __asm__ __volatile__("repe; " 5173 # ifdef RT_ARCH_AMD64 5174 "scasq\n\t" 5175 # else 5176 "scasl\n\t" 5177 # endif 5178 "setnc %%al\n\t" 5179 : "=&c" (xCX), 5180 "=&D" (xDI), 5181 "=&a" (uAX.r) 5182 : "mr" (pvPage), 5183 # ifdef RT_ARCH_AMD64 5184 "0" (0x1000/8), 5185 # else 5186 "0" (0x1000/4), 5187 # endif 5188 "1" (pvPage), 5189 "2" (0)); 5190 return uAX.f; 5191 # else 5192 uintptr_t const *puPtr = (uintptr_t const *)pvPage; 5193 int cLeft = 0x1000 / sizeof(uintptr_t) / 8; 5194 Assert(!((uintptr_t)pvPage & 15)); 5195 for (;;) 5196 { 5197 if (puPtr[0]) return false; 5198 if (puPtr[4]) return false; 5199 5200 if (puPtr[2]) return false; 5201 if (puPtr[6]) return false; 5202 5203 if (puPtr[1]) return false; 5204 if (puPtr[5]) return false; 5205 5206 if (puPtr[3]) return false; 5207 if (puPtr[7]) return false; 5208 5209 if (!--cLeft) 5210 return true; 5211 puPtr += 8; 5212 } 5213 return true; 5214 # endif 5215 } 5216 5217 5218 /** 5219 * Checks if a memory block is filled with the specified byte. 5220 * 5221 * This is a sort of inverted memchr. 5222 * 5223 * @returns Pointer to the byte which doesn't equal u8. 5224 * @returns NULL if all equal to u8. 5225 * 5226 * @param pv Pointer to the memory block. 5227 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit! 5228 * @param u8 The value it's supposed to be filled with. 5229 * 5230 * @todo Fix name, it is a predicate function but it's not returning boolean! 5231 */ 5232 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 5233 DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8); 5234 #else 5235 DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8) 5236 { 5237 /** @todo rewrite this in inline assembly? */ 5238 uint8_t const *pb = (uint8_t const *)pv; 5239 for (; cb; cb--, pb++) 5240 if (RT_UNLIKELY(*pb != u8)) 5241 return (void *)pb; 5242 return NULL; 5243 } 5244 #endif 5245 5246 5247 /** 5248 * Checks if a memory block is filled with the specified 32-bit value. 5249 * 5250 * This is a sort of inverted memchr. 5251 * 5252 * @returns Pointer to the first value which doesn't equal u32. 5253 * @returns NULL if all equal to u32. 5254 * 5255 * @param pv Pointer to the memory block. 5256 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit! 5257 * @param u32 The value it's supposed to be filled with. 5258 * 5259 * @todo Fix name, it is a predicate function but it's not returning boolean! 5260 */ 5261 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN 5262 DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32); 5263 #else 5264 DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32) 5265 { 5266 /** @todo rewrite this in inline assembly? */ 5267 uint32_t const *pu32 = (uint32_t const *)pv; 5268 for (; cb; cb -= 4, pu32++) 5269 if (RT_UNLIKELY(*pu32 != u32)) 5270 return (uint32_t *)pu32; 5271 return NULL; 5272 } 5273 #endif 5274 2610 /** @name Interger Math Optimizations 2611 * @{ */ 5275 2612 5276 2613 /** … … 5553 2890 #endif 5554 2891 5555 5556 /**5557 * Probes a byte pointer for read access.5558 *5559 * While the function will not fault if the byte is not read accessible,5560 * the idea is to do this in a safe place like before acquiring locks5561 * and such like.5562 *5563 * Also, this functions guarantees that an eager compiler is not going5564 * to optimize the probing away.5565 *5566 * @param pvByte Pointer to the byte.5567 */5568 #if RT_INLINE_ASM_EXTERNAL5569 DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);5570 #else5571 DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)5572 {5573 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */5574 uint8_t u8;5575 # if RT_INLINE_ASM_GNU_STYLE5576 __asm__ __volatile__("movb (%1), %0\n\t"5577 : "=r" (u8)5578 : "r" (pvByte));5579 # else5580 __asm5581 {5582 # ifdef RT_ARCH_AMD645583 mov rax, [pvByte]5584 mov al, [rax]5585 # else5586 mov eax, [pvByte]5587 mov al, [eax]5588 # endif5589 mov [u8], al5590 }5591 # endif5592 return u8;5593 }5594 #endif5595 5596 /**5597 * Probes a buffer for read access page by page.5598 *5599 * While the function will fault if the buffer is not fully read5600 * accessible, the idea is to do this in a safe place like before5601 * acquiring locks and such like.5602 *5603 * Also, this functions guarantees that an eager compiler is not going5604 * to optimize the probing away.5605 *5606 * @param pvBuf Pointer to the buffer.5607 * @param cbBuf The size of the buffer in bytes. Must be >= 1.5608 */5609 DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)5610 {5611 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */5612 /* the first byte */5613 const uint8_t *pu8 = (const uint8_t *)pvBuf;5614 ASMProbeReadByte(pu8);5615 5616 /* the pages in between pages. */5617 while (cbBuf > /*PAGE_SIZE*/0x1000)5618 {5619 ASMProbeReadByte(pu8);5620 cbBuf -= /*PAGE_SIZE*/0x1000;5621 pu8 += /*PAGE_SIZE*/0x1000;5622 }5623 5624 /* the last byte */5625 ASMProbeReadByte(pu8 + cbBuf - 1);5626 }5627 5628 5629 /** @def ASMBreakpoint5630 * Debugger Breakpoint.5631 * @remark In the gnu world we add a nop instruction after the int3 to5632 * force gdb to remain at the int3 source line.5633 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.5634 * @internal5635 */5636 #if RT_INLINE_ASM_GNU_STYLE5637 # ifndef __L4ENV__5638 # define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)5639 # else5640 # define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)5641 # endif5642 #else5643 # define ASMBreakpoint() __debugbreak()5644 #endif5645 5646 5647 5648 /** @defgroup grp_inline_bits Bit Operations5649 * @{5650 */5651 5652 5653 /**5654 * Sets a bit in a bitmap.5655 *5656 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.5657 * @param iBit The bit to set.5658 *5659 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.5660 * However, doing so will yield better performance as well as avoiding5661 * traps accessing the last bits in the bitmap.5662 */5663 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN5664 DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);5665 #else5666 DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)5667 {5668 # if RT_INLINE_ASM_USES_INTRIN5669 _bittestandset((long *)pvBitmap, iBit);5670 5671 # elif RT_INLINE_ASM_GNU_STYLE5672 __asm__ __volatile__("btsl %1, %0"5673 : "=m" (*(volatile long *)pvBitmap)5674 : "Ir" (iBit),5675 "m" (*(volatile long *)pvBitmap)5676 : "memory");5677 # else5678 __asm5679 {5680 # ifdef RT_ARCH_AMD645681 mov rax, [pvBitmap]5682 mov edx, [iBit]5683 bts [rax], edx5684 # else5685 mov eax, [pvBitmap]5686 mov edx, [iBit]5687 bts [eax], edx5688 # endif5689 }5690 # endif5691 }5692 #endif5693 5694 5695 /**5696 * Atomically sets a bit in a bitmap, ordered.5697 *5698 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise5699 * the memory access isn't atomic!5700 * @param iBit The bit to set.5701 */5702 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN5703 DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);5704 #else5705 DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)5706 {5707 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));5708 # if RT_INLINE_ASM_USES_INTRIN5709 _interlockedbittestandset((long *)pvBitmap, iBit);5710 # elif RT_INLINE_ASM_GNU_STYLE5711 __asm__ __volatile__("lock; btsl %1, %0"5712 : "=m" (*(volatile long *)pvBitmap)5713 : "Ir" (iBit),5714 "m" (*(volatile long *)pvBitmap)5715 : "memory");5716 # else5717 __asm5718 {5719 # ifdef RT_ARCH_AMD645720 mov rax, [pvBitmap]5721 mov edx, [iBit]5722 lock bts [rax], edx5723 # else5724 mov eax, [pvBitmap]5725 mov edx, [iBit]5726 lock bts [eax], edx5727 # endif5728 }5729 # endif5730 }5731 #endif5732 5733 5734 /**5735 * Clears a bit in a bitmap.5736 *5737 * @param pvBitmap Pointer to the bitmap.5738 * @param iBit The bit to clear.5739 *5740 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.5741 * However, doing so will yield better performance as well as avoiding5742 * traps accessing the last bits in the bitmap.5743 */5744 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN5745 DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);5746 #else5747 DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)5748 {5749 # if RT_INLINE_ASM_USES_INTRIN5750 _bittestandreset((long *)pvBitmap, iBit);5751 5752 # elif RT_INLINE_ASM_GNU_STYLE5753 __asm__ __volatile__("btrl %1, %0"5754 : "=m" (*(volatile long *)pvBitmap)5755 : "Ir" (iBit),5756 "m" (*(volatile long *)pvBitmap)5757 : "memory");5758 # else5759 __asm5760 {5761 # ifdef RT_ARCH_AMD645762 mov rax, [pvBitmap]5763 mov edx, [iBit]5764 btr [rax], edx5765 # else5766 mov eax, [pvBitmap]5767 mov edx, [iBit]5768 btr [eax], edx5769 # endif5770 }5771 # endif5772 }5773 #endif5774 5775 5776 /**5777 * Atomically clears a bit in a bitmap, ordered.5778 *5779 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise5780 * the memory access isn't atomic!5781 * @param iBit The bit to toggle set.5782 * @remarks No memory barrier, take care on smp.5783 */5784 #if RT_INLINE_ASM_EXTERNAL5785 DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);5786 #else5787 DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)5788 {5789 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));5790 # if RT_INLINE_ASM_GNU_STYLE5791 __asm__ __volatile__("lock; btrl %1, %0"5792 : "=m" (*(volatile long *)pvBitmap)5793 : "Ir" (iBit),5794 "m" (*(volatile long *)pvBitmap)5795 : "memory");5796 # else5797 __asm5798 {5799 # ifdef RT_ARCH_AMD645800 mov rax, [pvBitmap]5801 mov edx, [iBit]5802 lock btr [rax], edx5803 # else5804 mov eax, [pvBitmap]5805 mov edx, [iBit]5806 lock btr [eax], edx5807 # endif5808 }5809 # endif5810 }5811 #endif5812 5813 5814 /**5815 * Toggles a bit in a bitmap.5816 *5817 * @param pvBitmap Pointer to the bitmap.5818 * @param iBit The bit to toggle.5819 *5820 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.5821 * However, doing so will yield better performance as well as avoiding5822 * traps accessing the last bits in the bitmap.5823 */5824 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN5825 DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);5826 #else5827 DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)5828 {5829 # if RT_INLINE_ASM_USES_INTRIN5830 _bittestandcomplement((long *)pvBitmap, iBit);5831 # elif RT_INLINE_ASM_GNU_STYLE5832 __asm__ __volatile__("btcl %1, %0"5833 : "=m" (*(volatile long *)pvBitmap)5834 : "Ir" (iBit),5835 "m" (*(volatile long *)pvBitmap)5836 : "memory");5837 # else5838 __asm5839 {5840 # ifdef RT_ARCH_AMD645841 mov rax, [pvBitmap]5842 mov edx, [iBit]5843 btc [rax], edx5844 # else5845 mov eax, [pvBitmap]5846 mov edx, [iBit]5847 btc [eax], edx5848 # endif5849 }5850 # endif5851 }5852 #endif5853 5854 5855 /**5856 * Atomically toggles a bit in a bitmap, ordered.5857 *5858 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise5859 * the memory access isn't atomic!5860 * @param iBit The bit to test and set.5861 */5862 #if RT_INLINE_ASM_EXTERNAL5863 DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);5864 #else5865 DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)5866 {5867 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));5868 # if RT_INLINE_ASM_GNU_STYLE5869 __asm__ __volatile__("lock; btcl %1, %0"5870 : "=m" (*(volatile long *)pvBitmap)5871 : "Ir" (iBit),5872 "m" (*(volatile long *)pvBitmap)5873 : "memory");5874 # else5875 __asm5876 {5877 # ifdef RT_ARCH_AMD645878 mov rax, [pvBitmap]5879 mov edx, [iBit]5880 lock btc [rax], edx5881 # else5882 mov eax, [pvBitmap]5883 mov edx, [iBit]5884 lock btc [eax], edx5885 # endif5886 }5887 # endif5888 }5889 #endif5890 5891 5892 /**5893 * Tests and sets a bit in a bitmap.5894 *5895 * @returns true if the bit was set.5896 * @returns false if the bit was clear.5897 *5898 * @param pvBitmap Pointer to the bitmap.5899 * @param iBit The bit to test and set.5900 *5901 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.5902 * However, doing so will yield better performance as well as avoiding5903 * traps accessing the last bits in the bitmap.5904 */5905 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN5906 DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);5907 #else5908 DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)5909 {5910 union { bool f; uint32_t u32; uint8_t u8; } rc;5911 # if RT_INLINE_ASM_USES_INTRIN5912 rc.u8 = _bittestandset((long *)pvBitmap, iBit);5913 5914 # elif RT_INLINE_ASM_GNU_STYLE5915 __asm__ __volatile__("btsl %2, %1\n\t"5916 "setc %b0\n\t"5917 "andl $1, %0\n\t"5918 : "=q" (rc.u32),5919 "=m" (*(volatile long *)pvBitmap)5920 : "Ir" (iBit),5921 "m" (*(volatile long *)pvBitmap)5922 : "memory");5923 # else5924 __asm5925 {5926 mov edx, [iBit]5927 # ifdef RT_ARCH_AMD645928 mov rax, [pvBitmap]5929 bts [rax], edx5930 # else5931 mov eax, [pvBitmap]5932 bts [eax], edx5933 # endif5934 setc al5935 and eax, 15936 mov [rc.u32], eax5937 }5938 # endif5939 return rc.f;5940 }5941 #endif5942 5943 5944 /**5945 * Atomically tests and sets a bit in a bitmap, ordered.5946 *5947 * @returns true if the bit was set.5948 * @returns false if the bit was clear.5949 *5950 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise5951 * the memory access isn't atomic!5952 * @param iBit The bit to set.5953 */5954 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN5955 DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);5956 #else5957 DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)5958 {5959 union { bool f; uint32_t u32; uint8_t u8; } rc;5960 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));5961 # if RT_INLINE_ASM_USES_INTRIN5962 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);5963 # elif RT_INLINE_ASM_GNU_STYLE5964 __asm__ __volatile__("lock; btsl %2, %1\n\t"5965 "setc %b0\n\t"5966 "andl $1, %0\n\t"5967 : "=q" (rc.u32),5968 "=m" (*(volatile long *)pvBitmap)5969 : "Ir" (iBit),5970 "m" (*(volatile long *)pvBitmap)5971 : "memory");5972 # else5973 __asm5974 {5975 mov edx, [iBit]5976 # ifdef RT_ARCH_AMD645977 mov rax, [pvBitmap]5978 lock bts [rax], edx5979 # else5980 mov eax, [pvBitmap]5981 lock bts [eax], edx5982 # endif5983 setc al5984 and eax, 15985 mov [rc.u32], eax5986 }5987 # endif5988 return rc.f;5989 }5990 #endif5991 5992 5993 /**5994 * Tests and clears a bit in a bitmap.5995 *5996 * @returns true if the bit was set.5997 * @returns false if the bit was clear.5998 *5999 * @param pvBitmap Pointer to the bitmap.6000 * @param iBit The bit to test and clear.6001 *6002 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.6003 * However, doing so will yield better performance as well as avoiding6004 * traps accessing the last bits in the bitmap.6005 */6006 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6007 DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);6008 #else6009 DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)6010 {6011 union { bool f; uint32_t u32; uint8_t u8; } rc;6012 # if RT_INLINE_ASM_USES_INTRIN6013 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);6014 6015 # elif RT_INLINE_ASM_GNU_STYLE6016 __asm__ __volatile__("btrl %2, %1\n\t"6017 "setc %b0\n\t"6018 "andl $1, %0\n\t"6019 : "=q" (rc.u32),6020 "=m" (*(volatile long *)pvBitmap)6021 : "Ir" (iBit),6022 "m" (*(volatile long *)pvBitmap)6023 : "memory");6024 # else6025 __asm6026 {6027 mov edx, [iBit]6028 # ifdef RT_ARCH_AMD646029 mov rax, [pvBitmap]6030 btr [rax], edx6031 # else6032 mov eax, [pvBitmap]6033 btr [eax], edx6034 # endif6035 setc al6036 and eax, 16037 mov [rc.u32], eax6038 }6039 # endif6040 return rc.f;6041 }6042 #endif6043 6044 6045 /**6046 * Atomically tests and clears a bit in a bitmap, ordered.6047 *6048 * @returns true if the bit was set.6049 * @returns false if the bit was clear.6050 *6051 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise6052 * the memory access isn't atomic!6053 * @param iBit The bit to test and clear.6054 *6055 * @remarks No memory barrier, take care on smp.6056 */6057 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6058 DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);6059 #else6060 DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)6061 {6062 union { bool f; uint32_t u32; uint8_t u8; } rc;6063 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));6064 # if RT_INLINE_ASM_USES_INTRIN6065 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);6066 6067 # elif RT_INLINE_ASM_GNU_STYLE6068 __asm__ __volatile__("lock; btrl %2, %1\n\t"6069 "setc %b0\n\t"6070 "andl $1, %0\n\t"6071 : "=q" (rc.u32),6072 "=m" (*(volatile long *)pvBitmap)6073 : "Ir" (iBit),6074 "m" (*(volatile long *)pvBitmap)6075 : "memory");6076 # else6077 __asm6078 {6079 mov edx, [iBit]6080 # ifdef RT_ARCH_AMD646081 mov rax, [pvBitmap]6082 lock btr [rax], edx6083 # else6084 mov eax, [pvBitmap]6085 lock btr [eax], edx6086 # endif6087 setc al6088 and eax, 16089 mov [rc.u32], eax6090 }6091 # endif6092 return rc.f;6093 }6094 #endif6095 6096 6097 /**6098 * Tests and toggles a bit in a bitmap.6099 *6100 * @returns true if the bit was set.6101 * @returns false if the bit was clear.6102 *6103 * @param pvBitmap Pointer to the bitmap.6104 * @param iBit The bit to test and toggle.6105 *6106 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.6107 * However, doing so will yield better performance as well as avoiding6108 * traps accessing the last bits in the bitmap.6109 */6110 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6111 DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);6112 #else6113 DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)6114 {6115 union { bool f; uint32_t u32; uint8_t u8; } rc;6116 # if RT_INLINE_ASM_USES_INTRIN6117 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);6118 6119 # elif RT_INLINE_ASM_GNU_STYLE6120 __asm__ __volatile__("btcl %2, %1\n\t"6121 "setc %b0\n\t"6122 "andl $1, %0\n\t"6123 : "=q" (rc.u32),6124 "=m" (*(volatile long *)pvBitmap)6125 : "Ir" (iBit),6126 "m" (*(volatile long *)pvBitmap)6127 : "memory");6128 # else6129 __asm6130 {6131 mov edx, [iBit]6132 # ifdef RT_ARCH_AMD646133 mov rax, [pvBitmap]6134 btc [rax], edx6135 # else6136 mov eax, [pvBitmap]6137 btc [eax], edx6138 # endif6139 setc al6140 and eax, 16141 mov [rc.u32], eax6142 }6143 # endif6144 return rc.f;6145 }6146 #endif6147 6148 6149 /**6150 * Atomically tests and toggles a bit in a bitmap, ordered.6151 *6152 * @returns true if the bit was set.6153 * @returns false if the bit was clear.6154 *6155 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise6156 * the memory access isn't atomic!6157 * @param iBit The bit to test and toggle.6158 */6159 #if RT_INLINE_ASM_EXTERNAL6160 DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);6161 #else6162 DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)6163 {6164 union { bool f; uint32_t u32; uint8_t u8; } rc;6165 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));6166 # if RT_INLINE_ASM_GNU_STYLE6167 __asm__ __volatile__("lock; btcl %2, %1\n\t"6168 "setc %b0\n\t"6169 "andl $1, %0\n\t"6170 : "=q" (rc.u32),6171 "=m" (*(volatile long *)pvBitmap)6172 : "Ir" (iBit),6173 "m" (*(volatile long *)pvBitmap)6174 : "memory");6175 # else6176 __asm6177 {6178 mov edx, [iBit]6179 # ifdef RT_ARCH_AMD646180 mov rax, [pvBitmap]6181 lock btc [rax], edx6182 # else6183 mov eax, [pvBitmap]6184 lock btc [eax], edx6185 # endif6186 setc al6187 and eax, 16188 mov [rc.u32], eax6189 }6190 # endif6191 return rc.f;6192 }6193 #endif6194 6195 6196 /**6197 * Tests if a bit in a bitmap is set.6198 *6199 * @returns true if the bit is set.6200 * @returns false if the bit is clear.6201 *6202 * @param pvBitmap Pointer to the bitmap.6203 * @param iBit The bit to test.6204 *6205 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.6206 * However, doing so will yield better performance as well as avoiding6207 * traps accessing the last bits in the bitmap.6208 */6209 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6210 DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);6211 #else6212 DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)6213 {6214 union { bool f; uint32_t u32; uint8_t u8; } rc;6215 # if RT_INLINE_ASM_USES_INTRIN6216 rc.u32 = _bittest((long *)pvBitmap, iBit);6217 # elif RT_INLINE_ASM_GNU_STYLE6218 6219 __asm__ __volatile__("btl %2, %1\n\t"6220 "setc %b0\n\t"6221 "andl $1, %0\n\t"6222 : "=q" (rc.u32)6223 : "m" (*(const volatile long *)pvBitmap),6224 "Ir" (iBit)6225 : "memory");6226 # else6227 __asm6228 {6229 mov edx, [iBit]6230 # ifdef RT_ARCH_AMD646231 mov rax, [pvBitmap]6232 bt [rax], edx6233 # else6234 mov eax, [pvBitmap]6235 bt [eax], edx6236 # endif6237 setc al6238 and eax, 16239 mov [rc.u32], eax6240 }6241 # endif6242 return rc.f;6243 }6244 #endif6245 6246 6247 /**6248 * Clears a bit range within a bitmap.6249 *6250 * @param pvBitmap Pointer to the bitmap.6251 * @param iBitStart The First bit to clear.6252 * @param iBitEnd The first bit not to clear.6253 */6254 DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)6255 {6256 if (iBitStart < iBitEnd)6257 {6258 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);6259 int iStart = iBitStart & ~31;6260 int iEnd = iBitEnd & ~31;6261 if (iStart == iEnd)6262 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);6263 else6264 {6265 /* bits in first dword. */6266 if (iBitStart & 31)6267 {6268 *pu32 &= (1 << (iBitStart & 31)) - 1;6269 pu32++;6270 iBitStart = iStart + 32;6271 }6272 6273 /* whole dword. */6274 if (iBitStart != iEnd)6275 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);6276 6277 /* bits in last dword. */6278 if (iBitEnd & 31)6279 {6280 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);6281 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);6282 }6283 }6284 }6285 }6286 6287 6288 /**6289 * Sets a bit range within a bitmap.6290 *6291 * @param pvBitmap Pointer to the bitmap.6292 * @param iBitStart The First bit to set.6293 * @param iBitEnd The first bit not to set.6294 */6295 DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)6296 {6297 if (iBitStart < iBitEnd)6298 {6299 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);6300 int iStart = iBitStart & ~31;6301 int iEnd = iBitEnd & ~31;6302 if (iStart == iEnd)6303 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;6304 else6305 {6306 /* bits in first dword. */6307 if (iBitStart & 31)6308 {6309 *pu32 |= ~((1 << (iBitStart & 31)) - 1);6310 pu32++;6311 iBitStart = iStart + 32;6312 }6313 6314 /* whole dword. */6315 if (iBitStart != iEnd)6316 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);6317 6318 /* bits in last dword. */6319 if (iBitEnd & 31)6320 {6321 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);6322 *pu32 |= (1 << (iBitEnd & 31)) - 1;6323 }6324 }6325 }6326 }6327 6328 6329 /**6330 * Finds the first clear bit in a bitmap.6331 *6332 * @returns Index of the first zero bit.6333 * @returns -1 if no clear bit was found.6334 * @param pvBitmap Pointer to the bitmap.6335 * @param cBits The number of bits in the bitmap. Multiple of 32.6336 */6337 #if RT_INLINE_ASM_EXTERNAL6338 DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);6339 #else6340 DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)6341 {6342 if (cBits)6343 {6344 int32_t iBit;6345 # if RT_INLINE_ASM_GNU_STYLE6346 RTCCUINTREG uEAX, uECX, uEDI;6347 cBits = RT_ALIGN_32(cBits, 32);6348 __asm__ __volatile__("repe; scasl\n\t"6349 "je 1f\n\t"6350 # ifdef RT_ARCH_AMD646351 "lea -4(%%rdi), %%rdi\n\t"6352 "xorl (%%rdi), %%eax\n\t"6353 "subq %5, %%rdi\n\t"6354 # else6355 "lea -4(%%edi), %%edi\n\t"6356 "xorl (%%edi), %%eax\n\t"6357 "subl %5, %%edi\n\t"6358 # endif6359 "shll $3, %%edi\n\t"6360 "bsfl %%eax, %%edx\n\t"6361 "addl %%edi, %%edx\n\t"6362 "1:\t\n"6363 : "=d" (iBit),6364 "=&c" (uECX),6365 "=&D" (uEDI),6366 "=&a" (uEAX)6367 : "0" (0xffffffff),6368 "mr" (pvBitmap),6369 "1" (cBits >> 5),6370 "2" (pvBitmap),6371 "3" (0xffffffff));6372 # else6373 cBits = RT_ALIGN_32(cBits, 32);6374 __asm6375 {6376 # ifdef RT_ARCH_AMD646377 mov rdi, [pvBitmap]6378 mov rbx, rdi6379 # else6380 mov edi, [pvBitmap]6381 mov ebx, edi6382 # endif6383 mov edx, 0ffffffffh6384 mov eax, edx6385 mov ecx, [cBits]6386 shr ecx, 56387 repe scasd6388 je done6389 6390 # ifdef RT_ARCH_AMD646391 lea rdi, [rdi - 4]6392 xor eax, [rdi]6393 sub rdi, rbx6394 # else6395 lea edi, [edi - 4]6396 xor eax, [edi]6397 sub edi, ebx6398 # endif6399 shl edi, 36400 bsf edx, eax6401 add edx, edi6402 done:6403 mov [iBit], edx6404 }6405 # endif6406 return iBit;6407 }6408 return -1;6409 }6410 #endif6411 6412 6413 /**6414 * Finds the next clear bit in a bitmap.6415 *6416 * @returns Index of the first zero bit.6417 * @returns -1 if no clear bit was found.6418 * @param pvBitmap Pointer to the bitmap.6419 * @param cBits The number of bits in the bitmap. Multiple of 32.6420 * @param iBitPrev The bit returned from the last search.6421 * The search will start at iBitPrev + 1.6422 */6423 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6424 DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);6425 #else6426 DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)6427 {6428 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;6429 int iBit = ++iBitPrev & 31;6430 if (iBit)6431 {6432 /*6433 * Inspect the 32-bit word containing the unaligned bit.6434 */6435 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;6436 6437 # if RT_INLINE_ASM_USES_INTRIN6438 unsigned long ulBit = 0;6439 if (_BitScanForward(&ulBit, u32))6440 return ulBit + iBitPrev;6441 # else6442 # if RT_INLINE_ASM_GNU_STYLE6443 __asm__ __volatile__("bsf %1, %0\n\t"6444 "jnz 1f\n\t"6445 "movl $-1, %0\n\t"6446 "1:\n\t"6447 : "=r" (iBit)6448 : "r" (u32));6449 # else6450 __asm6451 {6452 mov edx, [u32]6453 bsf eax, edx6454 jnz done6455 mov eax, 0ffffffffh6456 done:6457 mov [iBit], eax6458 }6459 # endif6460 if (iBit >= 0)6461 return iBit + iBitPrev;6462 # endif6463 6464 /*6465 * Skip ahead and see if there is anything left to search.6466 */6467 iBitPrev |= 31;6468 iBitPrev++;6469 if (cBits <= (uint32_t)iBitPrev)6470 return -1;6471 }6472 6473 /*6474 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.6475 */6476 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);6477 if (iBit >= 0)6478 iBit += iBitPrev;6479 return iBit;6480 }6481 #endif6482 6483 6484 /**6485 * Finds the first set bit in a bitmap.6486 *6487 * @returns Index of the first set bit.6488 * @returns -1 if no clear bit was found.6489 * @param pvBitmap Pointer to the bitmap.6490 * @param cBits The number of bits in the bitmap. Multiple of 32.6491 */6492 #if RT_INLINE_ASM_EXTERNAL6493 DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);6494 #else6495 DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)6496 {6497 if (cBits)6498 {6499 int32_t iBit;6500 # if RT_INLINE_ASM_GNU_STYLE6501 RTCCUINTREG uEAX, uECX, uEDI;6502 cBits = RT_ALIGN_32(cBits, 32);6503 __asm__ __volatile__("repe; scasl\n\t"6504 "je 1f\n\t"6505 # ifdef RT_ARCH_AMD646506 "lea -4(%%rdi), %%rdi\n\t"6507 "movl (%%rdi), %%eax\n\t"6508 "subq %5, %%rdi\n\t"6509 # else6510 "lea -4(%%edi), %%edi\n\t"6511 "movl (%%edi), %%eax\n\t"6512 "subl %5, %%edi\n\t"6513 # endif6514 "shll $3, %%edi\n\t"6515 "bsfl %%eax, %%edx\n\t"6516 "addl %%edi, %%edx\n\t"6517 "1:\t\n"6518 : "=d" (iBit),6519 "=&c" (uECX),6520 "=&D" (uEDI),6521 "=&a" (uEAX)6522 : "0" (0xffffffff),6523 "mr" (pvBitmap),6524 "1" (cBits >> 5),6525 "2" (pvBitmap),6526 "3" (0));6527 # else6528 cBits = RT_ALIGN_32(cBits, 32);6529 __asm6530 {6531 # ifdef RT_ARCH_AMD646532 mov rdi, [pvBitmap]6533 mov rbx, rdi6534 # else6535 mov edi, [pvBitmap]6536 mov ebx, edi6537 # endif6538 mov edx, 0ffffffffh6539 xor eax, eax6540 mov ecx, [cBits]6541 shr ecx, 56542 repe scasd6543 je done6544 # ifdef RT_ARCH_AMD646545 lea rdi, [rdi - 4]6546 mov eax, [rdi]6547 sub rdi, rbx6548 # else6549 lea edi, [edi - 4]6550 mov eax, [edi]6551 sub edi, ebx6552 # endif6553 shl edi, 36554 bsf edx, eax6555 add edx, edi6556 done:6557 mov [iBit], edx6558 }6559 # endif6560 return iBit;6561 }6562 return -1;6563 }6564 #endif6565 6566 6567 /**6568 * Finds the next set bit in a bitmap.6569 *6570 * @returns Index of the next set bit.6571 * @returns -1 if no set bit was found.6572 * @param pvBitmap Pointer to the bitmap.6573 * @param cBits The number of bits in the bitmap. Multiple of 32.6574 * @param iBitPrev The bit returned from the last search.6575 * The search will start at iBitPrev + 1.6576 */6577 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN6578 DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);6579 #else6580 DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)6581 {6582 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;6583 int iBit = ++iBitPrev & 31;6584 if (iBit)6585 {6586 /*6587 * Inspect the 32-bit word containing the unaligned bit.6588 */6589 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;6590 6591 # if RT_INLINE_ASM_USES_INTRIN6592 unsigned long ulBit = 0;6593 if (_BitScanForward(&ulBit, u32))6594 return ulBit + iBitPrev;6595 # else6596 # if RT_INLINE_ASM_GNU_STYLE6597 __asm__ __volatile__("bsf %1, %0\n\t"6598 "jnz 1f\n\t"6599 "movl $-1, %0\n\t"6600 "1:\n\t"6601 : "=r" (iBit)6602 : "r" (u32));6603 # else6604 __asm6605 {6606 mov edx, [u32]6607 bsf eax, edx6608 jnz done6609 mov eax, 0ffffffffh6610 done:6611 mov [iBit], eax6612 }6613 # endif6614 if (iBit >= 0)6615 return iBit + iBitPrev;6616 # endif6617 6618 /*6619 * Skip ahead and see if there is anything left to search.6620 */6621 iBitPrev |= 31;6622 iBitPrev++;6623 if (cBits <= (uint32_t)iBitPrev)6624 return -1;6625 }6626 6627 /*6628 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.6629 */6630 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);6631 if (iBit >= 0)6632 iBit += iBitPrev;6633 return iBit;6634 }6635 #endif6636 6637 6638 /**6639 * Finds the first bit which is set in the given 32-bit integer.6640 * Bits are numbered from 1 (least significant) to 32.6641 *6642 * @returns index [1..32] of the first set bit.6643 * @returns 0 if all bits are cleared.6644 * @param u32 Integer to search for set bits.6645 * @remark Similar to ffs() in BSD.6646 */6647 DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)6648 {6649 # if RT_INLINE_ASM_USES_INTRIN6650 unsigned long iBit;6651 if (_BitScanForward(&iBit, u32))6652 iBit++;6653 else6654 iBit = 0;6655 # elif RT_INLINE_ASM_GNU_STYLE6656 uint32_t iBit;6657 __asm__ __volatile__("bsf %1, %0\n\t"6658 "jnz 1f\n\t"6659 "xorl %0, %0\n\t"6660 "jmp 2f\n"6661 "1:\n\t"6662 "incl %0\n"6663 "2:\n\t"6664 : "=r" (iBit)6665 : "rm" (u32));6666 # else6667 uint32_t iBit;6668 _asm6669 {6670 bsf eax, [u32]6671 jnz found6672 xor eax, eax6673 jmp done6674 found:6675 inc eax6676 done:6677 mov [iBit], eax6678 }6679 # endif6680 return iBit;6681 }6682 6683 6684 /**6685 * Finds the first bit which is set in the given 32-bit integer.6686 * Bits are numbered from 1 (least significant) to 32.6687 *6688 * @returns index [1..32] of the first set bit.6689 * @returns 0 if all bits are cleared.6690 * @param i32 Integer to search for set bits.6691 * @remark Similar to ffs() in BSD.6692 */6693 DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)6694 {6695 return ASMBitFirstSetU32((uint32_t)i32);6696 }6697 6698 6699 /**6700 * Finds the last bit which is set in the given 32-bit integer.6701 * Bits are numbered from 1 (least significant) to 32.6702 *6703 * @returns index [1..32] of the last set bit.6704 * @returns 0 if all bits are cleared.6705 * @param u32 Integer to search for set bits.6706 * @remark Similar to fls() in BSD.6707 */6708 DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)6709 {6710 # if RT_INLINE_ASM_USES_INTRIN6711 unsigned long iBit;6712 if (_BitScanReverse(&iBit, u32))6713 iBit++;6714 else6715 iBit = 0;6716 # elif RT_INLINE_ASM_GNU_STYLE6717 uint32_t iBit;6718 __asm__ __volatile__("bsrl %1, %0\n\t"6719 "jnz 1f\n\t"6720 "xorl %0, %0\n\t"6721 "jmp 2f\n"6722 "1:\n\t"6723 "incl %0\n"6724 "2:\n\t"6725 : "=r" (iBit)6726 : "rm" (u32));6727 # else6728 uint32_t iBit;6729 _asm6730 {6731 bsr eax, [u32]6732 jnz found6733 xor eax, eax6734 jmp done6735 found:6736 inc eax6737 done:6738 mov [iBit], eax6739 }6740 # endif6741 return iBit;6742 }6743 6744 6745 /**6746 * Finds the last bit which is set in the given 32-bit integer.6747 * Bits are numbered from 1 (least significant) to 32.6748 *6749 * @returns index [1..32] of the last set bit.6750 * @returns 0 if all bits are cleared.6751 * @param i32 Integer to search for set bits.6752 * @remark Similar to fls() in BSD.6753 */6754 DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)6755 {6756 return ASMBitLastSetU32((uint32_t)i32);6757 }6758 6759 /**6760 * Reverse the byte order of the given 16-bit integer.6761 *6762 * @returns Revert6763 * @param u16 16-bit integer value.6764 */6765 DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)6766 {6767 #if RT_INLINE_ASM_USES_INTRIN6768 u16 = _byteswap_ushort(u16);6769 #elif RT_INLINE_ASM_GNU_STYLE6770 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));6771 #else6772 _asm6773 {6774 mov ax, [u16]6775 ror ax, 86776 mov [u16], ax6777 }6778 #endif6779 return u16;6780 }6781 6782 /**6783 * Reverse the byte order of the given 32-bit integer.6784 *6785 * @returns Revert6786 * @param u32 32-bit integer value.6787 */6788 DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)6789 {6790 #if RT_INLINE_ASM_USES_INTRIN6791 u32 = _byteswap_ulong(u32);6792 #elif RT_INLINE_ASM_GNU_STYLE6793 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));6794 #else6795 _asm6796 {6797 mov eax, [u32]6798 bswap eax6799 mov [u32], eax6800 }6801 #endif6802 return u32;6803 }6804 6805 6806 /**6807 * Reverse the byte order of the given 64-bit integer.6808 *6809 * @returns Revert6810 * @param u64 64-bit integer value.6811 */6812 DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)6813 {6814 #if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN6815 u64 = _byteswap_uint64(u64);6816 #else6817 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 326818 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));6819 #endif6820 return u64;6821 }6822 6823 6824 2892 /** @} */ 6825 2893 6826 6827 2894 /** @} */ 6828 2895 #endif -
trunk/include/iprt/asm.h
r28800 r29245 4 4 5 5 /* 6 * Copyright (C) 2006-20 07Oracle Corporation6 * Copyright (C) 2006-2010 Oracle Corporation 7 7 * 8 8 * This file is part of VirtualBox Open Source Edition (OSE), as … … 48 48 # pragma intrinsic(_ReadWriteBarrier) 49 49 # pragma intrinsic(__cpuid) 50 # pragma intrinsic(_enable)51 # pragma intrinsic(_disable)52 # pragma intrinsic(__rdtsc)53 # pragma intrinsic(__readmsr)54 # pragma intrinsic(__writemsr)55 # pragma intrinsic(__outbyte)56 # pragma intrinsic(__outbytestring)57 # pragma intrinsic(__outword)58 # pragma intrinsic(__outwordstring)59 # pragma intrinsic(__outdword)60 # pragma intrinsic(__outdwordstring)61 # pragma intrinsic(__inbyte)62 # pragma intrinsic(__inbytestring)63 # pragma intrinsic(__inword)64 # pragma intrinsic(__inwordstring)65 # pragma intrinsic(__indword)66 # pragma intrinsic(__indwordstring)67 # pragma intrinsic(__invlpg)68 # pragma intrinsic(__wbinvd)69 50 # pragma intrinsic(__stosd) 70 51 # pragma intrinsic(__stosw) 71 52 # pragma intrinsic(__stosb) 72 # pragma intrinsic(__readcr0)73 # pragma intrinsic(__readcr2)74 # pragma intrinsic(__readcr3)75 # pragma intrinsic(__readcr4)76 # pragma intrinsic(__writecr0)77 # pragma intrinsic(__writecr3)78 # pragma intrinsic(__writecr4)79 # pragma intrinsic(__readdr)80 # pragma intrinsic(__writedr)81 53 # pragma intrinsic(_BitScanForward) 82 54 # pragma intrinsic(_BitScanReverse) … … 98 70 # pragma intrinsic(_InterlockedCompareExchange64) 99 71 # ifdef RT_ARCH_AMD64 100 # pragma intrinsic(_mm_mfence)101 # pragma intrinsic(_mm_sfence)102 # pragma intrinsic(_mm_lfence)103 72 # pragma intrinsic(__stosq) 104 # pragma intrinsic(__readcr8)105 # pragma intrinsic(__writecr8)106 73 # pragma intrinsic(_byteswap_uint64) 107 74 # pragma intrinsic(_InterlockedExchange64) … … 114 81 115 82 116 /** @defgroup grp_ asmASM - Assembly Routines83 /** @defgroup grp_rt_asm ASM - Assembly Routines 117 84 * @ingroup grp_rt 118 85 * … … 202 169 203 170 204 /** @todo find a more proper place for this structure? */205 #pragma pack(1)206 /** IDTR */207 typedef struct RTIDTR208 {209 /** Size of the IDT. */210 uint16_t cbIdt;211 /** Address of the IDT. */212 uintptr_t pIdt;213 } RTIDTR, *PRTIDTR;214 #pragma pack()215 216 #pragma pack(1)217 /** GDTR */218 typedef struct RTGDTR219 {220 /** Size of the GDT. */221 uint16_t cbGdt;222 /** Address of the GDT. */223 uintptr_t pGdt;224 } RTGDTR, *PRTGDTR;225 #pragma pack()226 227 228 171 /** @def ASMReturnAddress 229 172 * Gets the return address of the current (or calling if you like) function or method. … … 244 187 245 188 /** 246 * Gets the content of the IDTR CPU register.247 * @param pIdtr Where to store the IDTR contents.248 */249 #if RT_INLINE_ASM_EXTERNAL250 DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);251 #else252 DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)253 {254 # if RT_INLINE_ASM_GNU_STYLE255 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));256 # else257 __asm258 {259 # ifdef RT_ARCH_AMD64260 mov rax, [pIdtr]261 sidt [rax]262 # else263 mov eax, [pIdtr]264 sidt [eax]265 # endif266 }267 # endif268 }269 #endif270 271 272 /**273 * Sets the content of the IDTR CPU register.274 * @param pIdtr Where to load the IDTR contents from275 */276 #if RT_INLINE_ASM_EXTERNAL277 DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);278 #else279 DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)280 {281 # if RT_INLINE_ASM_GNU_STYLE282 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));283 # else284 __asm285 {286 # ifdef RT_ARCH_AMD64287 mov rax, [pIdtr]288 lidt [rax]289 # else290 mov eax, [pIdtr]291 lidt [eax]292 # endif293 }294 # endif295 }296 #endif297 298 299 /**300 * Gets the content of the GDTR CPU register.301 * @param pGdtr Where to store the GDTR contents.302 */303 #if RT_INLINE_ASM_EXTERNAL304 DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);305 #else306 DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)307 {308 # if RT_INLINE_ASM_GNU_STYLE309 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));310 # else311 __asm312 {313 # ifdef RT_ARCH_AMD64314 mov rax, [pGdtr]315 sgdt [rax]316 # else317 mov eax, [pGdtr]318 sgdt [eax]319 # endif320 }321 # endif322 }323 #endif324 325 /**326 * Get the cs register.327 * @returns cs.328 */329 #if RT_INLINE_ASM_EXTERNAL330 DECLASM(RTSEL) ASMGetCS(void);331 #else332 DECLINLINE(RTSEL) ASMGetCS(void)333 {334 RTSEL SelCS;335 # if RT_INLINE_ASM_GNU_STYLE336 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));337 # else338 __asm339 {340 mov ax, cs341 mov [SelCS], ax342 }343 # endif344 return SelCS;345 }346 #endif347 348 349 /**350 * Get the DS register.351 * @returns DS.352 */353 #if RT_INLINE_ASM_EXTERNAL354 DECLASM(RTSEL) ASMGetDS(void);355 #else356 DECLINLINE(RTSEL) ASMGetDS(void)357 {358 RTSEL SelDS;359 # if RT_INLINE_ASM_GNU_STYLE360 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));361 # else362 __asm363 {364 mov ax, ds365 mov [SelDS], ax366 }367 # endif368 return SelDS;369 }370 #endif371 372 373 /**374 * Get the ES register.375 * @returns ES.376 */377 #if RT_INLINE_ASM_EXTERNAL378 DECLASM(RTSEL) ASMGetES(void);379 #else380 DECLINLINE(RTSEL) ASMGetES(void)381 {382 RTSEL SelES;383 # if RT_INLINE_ASM_GNU_STYLE384 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));385 # else386 __asm387 {388 mov ax, es389 mov [SelES], ax390 }391 # endif392 return SelES;393 }394 #endif395 396 397 /**398 * Get the FS register.399 * @returns FS.400 */401 #if RT_INLINE_ASM_EXTERNAL402 DECLASM(RTSEL) ASMGetFS(void);403 #else404 DECLINLINE(RTSEL) ASMGetFS(void)405 {406 RTSEL SelFS;407 # if RT_INLINE_ASM_GNU_STYLE408 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));409 # else410 __asm411 {412 mov ax, fs413 mov [SelFS], ax414 }415 # endif416 return SelFS;417 }418 # endif419 420 421 /**422 * Get the GS register.423 * @returns GS.424 */425 #if RT_INLINE_ASM_EXTERNAL426 DECLASM(RTSEL) ASMGetGS(void);427 #else428 DECLINLINE(RTSEL) ASMGetGS(void)429 {430 RTSEL SelGS;431 # if RT_INLINE_ASM_GNU_STYLE432 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));433 # else434 __asm435 {436 mov ax, gs437 mov [SelGS], ax438 }439 # endif440 return SelGS;441 }442 #endif443 444 445 /**446 * Get the SS register.447 * @returns SS.448 */449 #if RT_INLINE_ASM_EXTERNAL450 DECLASM(RTSEL) ASMGetSS(void);451 #else452 DECLINLINE(RTSEL) ASMGetSS(void)453 {454 RTSEL SelSS;455 # if RT_INLINE_ASM_GNU_STYLE456 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));457 # else458 __asm459 {460 mov ax, ss461 mov [SelSS], ax462 }463 # endif464 return SelSS;465 }466 #endif467 468 469 /**470 * Get the TR register.471 * @returns TR.472 */473 #if RT_INLINE_ASM_EXTERNAL474 DECLASM(RTSEL) ASMGetTR(void);475 #else476 DECLINLINE(RTSEL) ASMGetTR(void)477 {478 RTSEL SelTR;479 # if RT_INLINE_ASM_GNU_STYLE480 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));481 # else482 __asm483 {484 str ax485 mov [SelTR], ax486 }487 # endif488 return SelTR;489 }490 #endif491 492 493 /**494 * Get the [RE]FLAGS register.495 * @returns [RE]FLAGS.496 */497 #if RT_INLINE_ASM_EXTERNAL498 DECLASM(RTCCUINTREG) ASMGetFlags(void);499 #else500 DECLINLINE(RTCCUINTREG) ASMGetFlags(void)501 {502 RTCCUINTREG uFlags;503 # if RT_INLINE_ASM_GNU_STYLE504 # ifdef RT_ARCH_AMD64505 __asm__ __volatile__("pushfq\n\t"506 "popq %0\n\t"507 : "=r" (uFlags));508 # else509 __asm__ __volatile__("pushfl\n\t"510 "popl %0\n\t"511 : "=r" (uFlags));512 # endif513 # else514 __asm515 {516 # ifdef RT_ARCH_AMD64517 pushfq518 pop [uFlags]519 # else520 pushfd521 pop [uFlags]522 # endif523 }524 # endif525 return uFlags;526 }527 #endif528 529 530 /**531 * Set the [RE]FLAGS register.532 * @param uFlags The new [RE]FLAGS value.533 */534 #if RT_INLINE_ASM_EXTERNAL535 DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);536 #else537 DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)538 {539 # if RT_INLINE_ASM_GNU_STYLE540 # ifdef RT_ARCH_AMD64541 __asm__ __volatile__("pushq %0\n\t"542 "popfq\n\t"543 : : "g" (uFlags));544 # else545 __asm__ __volatile__("pushl %0\n\t"546 "popfl\n\t"547 : : "g" (uFlags));548 # endif549 # else550 __asm551 {552 # ifdef RT_ARCH_AMD64553 push [uFlags]554 popfq555 # else556 push [uFlags]557 popfd558 # endif559 }560 # endif561 }562 #endif563 564 565 /**566 * Gets the content of the CPU timestamp counter register.567 *568 * @returns TSC.569 */570 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN571 DECLASM(uint64_t) ASMReadTSC(void);572 #else573 DECLINLINE(uint64_t) ASMReadTSC(void)574 {575 RTUINT64U u;576 # if RT_INLINE_ASM_GNU_STYLE577 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));578 # else579 # if RT_INLINE_ASM_USES_INTRIN580 u.u = __rdtsc();581 # else582 __asm583 {584 rdtsc585 mov [u.s.Lo], eax586 mov [u.s.Hi], edx587 }588 # endif589 # endif590 return u.u;591 }592 #endif593 594 595 /**596 * Performs the cpuid instruction returning all registers.597 *598 * @param uOperator CPUID operation (eax).599 * @param pvEAX Where to store eax.600 * @param pvEBX Where to store ebx.601 * @param pvECX Where to store ecx.602 * @param pvEDX Where to store edx.603 * @remark We're using void pointers to ease the use of special bitfield structures and such.604 */605 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN606 DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);607 #else608 DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)609 {610 # if RT_INLINE_ASM_GNU_STYLE611 # ifdef RT_ARCH_AMD64612 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;613 __asm__ ("cpuid\n\t"614 : "=a" (uRAX),615 "=b" (uRBX),616 "=c" (uRCX),617 "=d" (uRDX)618 : "0" (uOperator));619 *(uint32_t *)pvEAX = (uint32_t)uRAX;620 *(uint32_t *)pvEBX = (uint32_t)uRBX;621 *(uint32_t *)pvECX = (uint32_t)uRCX;622 *(uint32_t *)pvEDX = (uint32_t)uRDX;623 # else624 __asm__ ("xchgl %%ebx, %1\n\t"625 "cpuid\n\t"626 "xchgl %%ebx, %1\n\t"627 : "=a" (*(uint32_t *)pvEAX),628 "=r" (*(uint32_t *)pvEBX),629 "=c" (*(uint32_t *)pvECX),630 "=d" (*(uint32_t *)pvEDX)631 : "0" (uOperator));632 # endif633 634 # elif RT_INLINE_ASM_USES_INTRIN635 int aInfo[4];636 __cpuid(aInfo, uOperator);637 *(uint32_t *)pvEAX = aInfo[0];638 *(uint32_t *)pvEBX = aInfo[1];639 *(uint32_t *)pvECX = aInfo[2];640 *(uint32_t *)pvEDX = aInfo[3];641 642 # else643 uint32_t uEAX;644 uint32_t uEBX;645 uint32_t uECX;646 uint32_t uEDX;647 __asm648 {649 push ebx650 mov eax, [uOperator]651 cpuid652 mov [uEAX], eax653 mov [uEBX], ebx654 mov [uECX], ecx655 mov [uEDX], edx656 pop ebx657 }658 *(uint32_t *)pvEAX = uEAX;659 *(uint32_t *)pvEBX = uEBX;660 *(uint32_t *)pvECX = uECX;661 *(uint32_t *)pvEDX = uEDX;662 # endif663 }664 #endif665 666 667 /**668 * Performs the cpuid instruction returning all registers.669 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)670 *671 * @param uOperator CPUID operation (eax).672 * @param uIdxECX ecx index673 * @param pvEAX Where to store eax.674 * @param pvEBX Where to store ebx.675 * @param pvECX Where to store ecx.676 * @param pvEDX Where to store edx.677 * @remark We're using void pointers to ease the use of special bitfield structures and such.678 */679 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN680 DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);681 #else682 DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)683 {684 # if RT_INLINE_ASM_GNU_STYLE685 # ifdef RT_ARCH_AMD64686 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;687 __asm__ ("cpuid\n\t"688 : "=a" (uRAX),689 "=b" (uRBX),690 "=c" (uRCX),691 "=d" (uRDX)692 : "0" (uOperator),693 "2" (uIdxECX));694 *(uint32_t *)pvEAX = (uint32_t)uRAX;695 *(uint32_t *)pvEBX = (uint32_t)uRBX;696 *(uint32_t *)pvECX = (uint32_t)uRCX;697 *(uint32_t *)pvEDX = (uint32_t)uRDX;698 # else699 __asm__ ("xchgl %%ebx, %1\n\t"700 "cpuid\n\t"701 "xchgl %%ebx, %1\n\t"702 : "=a" (*(uint32_t *)pvEAX),703 "=r" (*(uint32_t *)pvEBX),704 "=c" (*(uint32_t *)pvECX),705 "=d" (*(uint32_t *)pvEDX)706 : "0" (uOperator),707 "2" (uIdxECX));708 # endif709 710 # elif RT_INLINE_ASM_USES_INTRIN711 int aInfo[4];712 /* ??? another intrinsic ??? */713 __cpuid(aInfo, uOperator);714 *(uint32_t *)pvEAX = aInfo[0];715 *(uint32_t *)pvEBX = aInfo[1];716 *(uint32_t *)pvECX = aInfo[2];717 *(uint32_t *)pvEDX = aInfo[3];718 719 # else720 uint32_t uEAX;721 uint32_t uEBX;722 uint32_t uECX;723 uint32_t uEDX;724 __asm725 {726 push ebx727 mov eax, [uOperator]728 mov ecx, [uIdxECX]729 cpuid730 mov [uEAX], eax731 mov [uEBX], ebx732 mov [uECX], ecx733 mov [uEDX], edx734 pop ebx735 }736 *(uint32_t *)pvEAX = uEAX;737 *(uint32_t *)pvEBX = uEBX;738 *(uint32_t *)pvECX = uECX;739 *(uint32_t *)pvEDX = uEDX;740 # endif741 }742 #endif743 744 745 /**746 * Performs the cpuid instruction returning ecx and edx.747 *748 * @param uOperator CPUID operation (eax).749 * @param pvECX Where to store ecx.750 * @param pvEDX Where to store edx.751 * @remark We're using void pointers to ease the use of special bitfield structures and such.752 */753 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN754 DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);755 #else756 DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)757 {758 uint32_t uEBX;759 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);760 }761 #endif762 763 764 /**765 * Performs the cpuid instruction returning edx.766 *767 * @param uOperator CPUID operation (eax).768 * @returns EDX after cpuid operation.769 */770 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN771 DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);772 #else773 DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)774 {775 RTCCUINTREG xDX;776 # if RT_INLINE_ASM_GNU_STYLE777 # ifdef RT_ARCH_AMD64778 RTCCUINTREG uSpill;779 __asm__ ("cpuid"780 : "=a" (uSpill),781 "=d" (xDX)782 : "0" (uOperator)783 : "rbx", "rcx");784 # elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)785 __asm__ ("push %%ebx\n\t"786 "cpuid\n\t"787 "pop %%ebx\n\t"788 : "=a" (uOperator),789 "=d" (xDX)790 : "0" (uOperator)791 : "ecx");792 # else793 __asm__ ("cpuid"794 : "=a" (uOperator),795 "=d" (xDX)796 : "0" (uOperator)797 : "ebx", "ecx");798 # endif799 800 # elif RT_INLINE_ASM_USES_INTRIN801 int aInfo[4];802 __cpuid(aInfo, uOperator);803 xDX = aInfo[3];804 805 # else806 __asm807 {808 push ebx809 mov eax, [uOperator]810 cpuid811 mov [xDX], edx812 pop ebx813 }814 # endif815 return (uint32_t)xDX;816 }817 #endif818 819 820 /**821 * Performs the cpuid instruction returning ecx.822 *823 * @param uOperator CPUID operation (eax).824 * @returns ECX after cpuid operation.825 */826 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN827 DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);828 #else829 DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)830 {831 RTCCUINTREG xCX;832 # if RT_INLINE_ASM_GNU_STYLE833 # ifdef RT_ARCH_AMD64834 RTCCUINTREG uSpill;835 __asm__ ("cpuid"836 : "=a" (uSpill),837 "=c" (xCX)838 : "0" (uOperator)839 : "rbx", "rdx");840 # elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)841 __asm__ ("push %%ebx\n\t"842 "cpuid\n\t"843 "pop %%ebx\n\t"844 : "=a" (uOperator),845 "=c" (xCX)846 : "0" (uOperator)847 : "edx");848 # else849 __asm__ ("cpuid"850 : "=a" (uOperator),851 "=c" (xCX)852 : "0" (uOperator)853 : "ebx", "edx");854 855 # endif856 857 # elif RT_INLINE_ASM_USES_INTRIN858 int aInfo[4];859 __cpuid(aInfo, uOperator);860 xCX = aInfo[2];861 862 # else863 __asm864 {865 push ebx866 mov eax, [uOperator]867 cpuid868 mov [xCX], ecx869 pop ebx870 }871 # endif872 return (uint32_t)xCX;873 }874 #endif875 876 877 /**878 * Checks if the current CPU supports CPUID.879 *880 * @returns true if CPUID is supported.881 */882 DECLINLINE(bool) ASMHasCpuId(void)883 {884 #ifdef RT_ARCH_AMD64885 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */886 #else /* !RT_ARCH_AMD64 */887 bool fRet = false;888 # if RT_INLINE_ASM_GNU_STYLE889 uint32_t u1;890 uint32_t u2;891 __asm__ ("pushf\n\t"892 "pop %1\n\t"893 "mov %1, %2\n\t"894 "xorl $0x200000, %1\n\t"895 "push %1\n\t"896 "popf\n\t"897 "pushf\n\t"898 "pop %1\n\t"899 "cmpl %1, %2\n\t"900 "setne %0\n\t"901 "push %2\n\t"902 "popf\n\t"903 : "=m" (fRet), "=r" (u1), "=r" (u2));904 # else905 __asm906 {907 pushfd908 pop eax909 mov ebx, eax910 xor eax, 0200000h911 push eax912 popfd913 pushfd914 pop eax915 cmp eax, ebx916 setne fRet917 push ebx918 popfd919 }920 # endif921 return fRet;922 #endif /* !RT_ARCH_AMD64 */923 }924 925 926 /**927 * Gets the APIC ID of the current CPU.928 *929 * @returns the APIC ID.930 */931 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN932 DECLASM(uint8_t) ASMGetApicId(void);933 #else934 DECLINLINE(uint8_t) ASMGetApicId(void)935 {936 RTCCUINTREG xBX;937 # if RT_INLINE_ASM_GNU_STYLE938 # ifdef RT_ARCH_AMD64939 RTCCUINTREG uSpill;940 __asm__ ("cpuid"941 : "=a" (uSpill),942 "=b" (xBX)943 : "0" (1)944 : "rcx", "rdx");945 # elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)946 RTCCUINTREG uSpill;947 __asm__ ("mov %%ebx,%1\n\t"948 "cpuid\n\t"949 "xchgl %%ebx,%1\n\t"950 : "=a" (uSpill),951 "=r" (xBX)952 : "0" (1)953 : "ecx", "edx");954 # else955 RTCCUINTREG uSpill;956 __asm__ ("cpuid"957 : "=a" (uSpill),958 "=b" (xBX)959 : "0" (1)960 : "ecx", "edx");961 # endif962 963 # elif RT_INLINE_ASM_USES_INTRIN964 int aInfo[4];965 __cpuid(aInfo, 1);966 xBX = aInfo[1];967 968 # else969 __asm970 {971 push ebx972 mov eax, 1973 cpuid974 mov [xBX], ebx975 pop ebx976 }977 # endif978 return (uint8_t)(xBX >> 24);979 }980 #endif981 982 983 /**984 * Tests if it a genuine Intel CPU based on the ASMCpuId(0) output.985 *986 * @returns true/false.987 * @param uEBX EBX return from ASMCpuId(0)988 * @param uECX ECX return from ASMCpuId(0)989 * @param uEDX EDX return from ASMCpuId(0)990 */991 DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)992 {993 return uEBX == UINT32_C(0x756e6547)994 && uECX == UINT32_C(0x6c65746e)995 && uEDX == UINT32_C(0x49656e69);996 }997 998 999 /**1000 * Tests if this is a genuine Intel CPU.1001 *1002 * @returns true/false.1003 * @remarks ASSUMES that cpuid is supported by the CPU.1004 */1005 DECLINLINE(bool) ASMIsIntelCpu(void)1006 {1007 uint32_t uEAX, uEBX, uECX, uEDX;1008 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);1009 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);1010 }1011 1012 1013 /**1014 * Tests if it a authentic AMD CPU based on the ASMCpuId(0) output.1015 *1016 * @returns true/false.1017 * @param uEBX EBX return from ASMCpuId(0)1018 * @param uECX ECX return from ASMCpuId(0)1019 * @param uEDX EDX return from ASMCpuId(0)1020 */1021 DECLINLINE(bool) ASMIsAmdCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)1022 {1023 return uEBX == UINT32_C(0x68747541)1024 && uECX == UINT32_C(0x444d4163)1025 && uEDX == UINT32_C(0x69746e65);1026 }1027 1028 1029 /**1030 * Tests if this is an authentic AMD CPU.1031 *1032 * @returns true/false.1033 * @remarks ASSUMES that cpuid is supported by the CPU.1034 */1035 DECLINLINE(bool) ASMIsAmdCpu(void)1036 {1037 uint32_t uEAX, uEBX, uECX, uEDX;1038 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);1039 return ASMIsAmdCpuEx(uEBX, uECX, uEDX);1040 }1041 1042 1043 /**1044 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)1045 *1046 * @returns Family.1047 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).1048 */1049 DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)1050 {1051 return ((uEAX >> 8) & 0xf) == 0xf1052 ? ((uEAX >> 20) & 0x7f) + 0xf1053 : ((uEAX >> 8) & 0xf);1054 }1055 1056 1057 /**1058 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.1059 *1060 * @returns Model.1061 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).1062 */1063 DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)1064 {1065 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */1066 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)1067 : ((uEAX >> 4) & 0xf);1068 }1069 1070 1071 /**1072 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.1073 *1074 * @returns Model.1075 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).1076 */1077 DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)1078 {1079 return ((uEAX >> 8) & 0xf) == 0xf1080 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)1081 : ((uEAX >> 4) & 0xf);1082 }1083 1084 1085 /**1086 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)1087 *1088 * @returns Model.1089 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).1090 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().1091 */1092 DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)1093 {1094 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */1095 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)1096 : ((uEAX >> 4) & 0xf);1097 }1098 1099 1100 /**1101 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)1102 *1103 * @returns Model.1104 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).1105 */1106 DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)1107 {1108 return uEAX & 0xf;1109 }1110 1111 1112 /**1113 * Get cr0.1114 * @returns cr0.1115 */1116 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1117 DECLASM(RTCCUINTREG) ASMGetCR0(void);1118 #else1119 DECLINLINE(RTCCUINTREG) ASMGetCR0(void)1120 {1121 RTCCUINTREG uCR0;1122 # if RT_INLINE_ASM_USES_INTRIN1123 uCR0 = __readcr0();1124 1125 # elif RT_INLINE_ASM_GNU_STYLE1126 # ifdef RT_ARCH_AMD641127 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));1128 # else1129 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));1130 # endif1131 # else1132 __asm1133 {1134 # ifdef RT_ARCH_AMD641135 mov rax, cr01136 mov [uCR0], rax1137 # else1138 mov eax, cr01139 mov [uCR0], eax1140 # endif1141 }1142 # endif1143 return uCR0;1144 }1145 #endif1146 1147 1148 /**1149 * Sets the CR0 register.1150 * @param uCR0 The new CR0 value.1151 */1152 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1153 DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);1154 #else1155 DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)1156 {1157 # if RT_INLINE_ASM_USES_INTRIN1158 __writecr0(uCR0);1159 1160 # elif RT_INLINE_ASM_GNU_STYLE1161 # ifdef RT_ARCH_AMD641162 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));1163 # else1164 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));1165 # endif1166 # else1167 __asm1168 {1169 # ifdef RT_ARCH_AMD641170 mov rax, [uCR0]1171 mov cr0, rax1172 # else1173 mov eax, [uCR0]1174 mov cr0, eax1175 # endif1176 }1177 # endif1178 }1179 #endif1180 1181 1182 /**1183 * Get cr2.1184 * @returns cr2.1185 */1186 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1187 DECLASM(RTCCUINTREG) ASMGetCR2(void);1188 #else1189 DECLINLINE(RTCCUINTREG) ASMGetCR2(void)1190 {1191 RTCCUINTREG uCR2;1192 # if RT_INLINE_ASM_USES_INTRIN1193 uCR2 = __readcr2();1194 1195 # elif RT_INLINE_ASM_GNU_STYLE1196 # ifdef RT_ARCH_AMD641197 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));1198 # else1199 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));1200 # endif1201 # else1202 __asm1203 {1204 # ifdef RT_ARCH_AMD641205 mov rax, cr21206 mov [uCR2], rax1207 # else1208 mov eax, cr21209 mov [uCR2], eax1210 # endif1211 }1212 # endif1213 return uCR2;1214 }1215 #endif1216 1217 1218 /**1219 * Sets the CR2 register.1220 * @param uCR2 The new CR0 value.1221 */1222 #if RT_INLINE_ASM_EXTERNAL1223 DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);1224 #else1225 DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)1226 {1227 # if RT_INLINE_ASM_GNU_STYLE1228 # ifdef RT_ARCH_AMD641229 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));1230 # else1231 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));1232 # endif1233 # else1234 __asm1235 {1236 # ifdef RT_ARCH_AMD641237 mov rax, [uCR2]1238 mov cr2, rax1239 # else1240 mov eax, [uCR2]1241 mov cr2, eax1242 # endif1243 }1244 # endif1245 }1246 #endif1247 1248 1249 /**1250 * Get cr3.1251 * @returns cr3.1252 */1253 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1254 DECLASM(RTCCUINTREG) ASMGetCR3(void);1255 #else1256 DECLINLINE(RTCCUINTREG) ASMGetCR3(void)1257 {1258 RTCCUINTREG uCR3;1259 # if RT_INLINE_ASM_USES_INTRIN1260 uCR3 = __readcr3();1261 1262 # elif RT_INLINE_ASM_GNU_STYLE1263 # ifdef RT_ARCH_AMD641264 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));1265 # else1266 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));1267 # endif1268 # else1269 __asm1270 {1271 # ifdef RT_ARCH_AMD641272 mov rax, cr31273 mov [uCR3], rax1274 # else1275 mov eax, cr31276 mov [uCR3], eax1277 # endif1278 }1279 # endif1280 return uCR3;1281 }1282 #endif1283 1284 1285 /**1286 * Sets the CR3 register.1287 *1288 * @param uCR3 New CR3 value.1289 */1290 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1291 DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);1292 #else1293 DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)1294 {1295 # if RT_INLINE_ASM_USES_INTRIN1296 __writecr3(uCR3);1297 1298 # elif RT_INLINE_ASM_GNU_STYLE1299 # ifdef RT_ARCH_AMD641300 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));1301 # else1302 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));1303 # endif1304 # else1305 __asm1306 {1307 # ifdef RT_ARCH_AMD641308 mov rax, [uCR3]1309 mov cr3, rax1310 # else1311 mov eax, [uCR3]1312 mov cr3, eax1313 # endif1314 }1315 # endif1316 }1317 #endif1318 1319 1320 /**1321 * Reloads the CR3 register.1322 */1323 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1324 DECLASM(void) ASMReloadCR3(void);1325 #else1326 DECLINLINE(void) ASMReloadCR3(void)1327 {1328 # if RT_INLINE_ASM_USES_INTRIN1329 __writecr3(__readcr3());1330 1331 # elif RT_INLINE_ASM_GNU_STYLE1332 RTCCUINTREG u;1333 # ifdef RT_ARCH_AMD641334 __asm__ __volatile__("movq %%cr3, %0\n\t"1335 "movq %0, %%cr3\n\t"1336 : "=r" (u));1337 # else1338 __asm__ __volatile__("movl %%cr3, %0\n\t"1339 "movl %0, %%cr3\n\t"1340 : "=r" (u));1341 # endif1342 # else1343 __asm1344 {1345 # ifdef RT_ARCH_AMD641346 mov rax, cr31347 mov cr3, rax1348 # else1349 mov eax, cr31350 mov cr3, eax1351 # endif1352 }1353 # endif1354 }1355 #endif1356 1357 1358 /**1359 * Get cr4.1360 * @returns cr4.1361 */1362 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1363 DECLASM(RTCCUINTREG) ASMGetCR4(void);1364 #else1365 DECLINLINE(RTCCUINTREG) ASMGetCR4(void)1366 {1367 RTCCUINTREG uCR4;1368 # if RT_INLINE_ASM_USES_INTRIN1369 uCR4 = __readcr4();1370 1371 # elif RT_INLINE_ASM_GNU_STYLE1372 # ifdef RT_ARCH_AMD641373 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));1374 # else1375 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));1376 # endif1377 # else1378 __asm1379 {1380 # ifdef RT_ARCH_AMD641381 mov rax, cr41382 mov [uCR4], rax1383 # else1384 push eax /* just in case */1385 /*mov eax, cr4*/1386 _emit 0x0f1387 _emit 0x201388 _emit 0xe01389 mov [uCR4], eax1390 pop eax1391 # endif1392 }1393 # endif1394 return uCR4;1395 }1396 #endif1397 1398 1399 /**1400 * Sets the CR4 register.1401 *1402 * @param uCR4 New CR4 value.1403 */1404 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1405 DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);1406 #else1407 DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)1408 {1409 # if RT_INLINE_ASM_USES_INTRIN1410 __writecr4(uCR4);1411 1412 # elif RT_INLINE_ASM_GNU_STYLE1413 # ifdef RT_ARCH_AMD641414 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));1415 # else1416 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));1417 # endif1418 # else1419 __asm1420 {1421 # ifdef RT_ARCH_AMD641422 mov rax, [uCR4]1423 mov cr4, rax1424 # else1425 mov eax, [uCR4]1426 _emit 0x0F1427 _emit 0x221428 _emit 0xE0 /* mov cr4, eax */1429 # endif1430 }1431 # endif1432 }1433 #endif1434 1435 1436 /**1437 * Get cr8.1438 * @returns cr8.1439 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.1440 */1441 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1442 DECLASM(RTCCUINTREG) ASMGetCR8(void);1443 #else1444 DECLINLINE(RTCCUINTREG) ASMGetCR8(void)1445 {1446 # ifdef RT_ARCH_AMD641447 RTCCUINTREG uCR8;1448 # if RT_INLINE_ASM_USES_INTRIN1449 uCR8 = __readcr8();1450 1451 # elif RT_INLINE_ASM_GNU_STYLE1452 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));1453 # else1454 __asm1455 {1456 mov rax, cr81457 mov [uCR8], rax1458 }1459 # endif1460 return uCR8;1461 # else /* !RT_ARCH_AMD64 */1462 return 0;1463 # endif /* !RT_ARCH_AMD64 */1464 }1465 #endif1466 1467 1468 /**1469 * Enables interrupts (EFLAGS.IF).1470 */1471 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1472 DECLASM(void) ASMIntEnable(void);1473 #else1474 DECLINLINE(void) ASMIntEnable(void)1475 {1476 # if RT_INLINE_ASM_GNU_STYLE1477 __asm("sti\n");1478 # elif RT_INLINE_ASM_USES_INTRIN1479 _enable();1480 # else1481 __asm sti1482 # endif1483 }1484 #endif1485 1486 1487 /**1488 * Disables interrupts (!EFLAGS.IF).1489 */1490 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1491 DECLASM(void) ASMIntDisable(void);1492 #else1493 DECLINLINE(void) ASMIntDisable(void)1494 {1495 # if RT_INLINE_ASM_GNU_STYLE1496 __asm("cli\n");1497 # elif RT_INLINE_ASM_USES_INTRIN1498 _disable();1499 # else1500 __asm cli1501 # endif1502 }1503 #endif1504 1505 1506 /**1507 * Disables interrupts and returns previous xFLAGS.1508 */1509 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1510 DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);1511 #else1512 DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)1513 {1514 RTCCUINTREG xFlags;1515 # if RT_INLINE_ASM_GNU_STYLE1516 # ifdef RT_ARCH_AMD641517 __asm__ __volatile__("pushfq\n\t"1518 "cli\n\t"1519 "popq %0\n\t"1520 : "=r" (xFlags));1521 # else1522 __asm__ __volatile__("pushfl\n\t"1523 "cli\n\t"1524 "popl %0\n\t"1525 : "=r" (xFlags));1526 # endif1527 # elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)1528 xFlags = ASMGetFlags();1529 _disable();1530 # else1531 __asm {1532 pushfd1533 cli1534 pop [xFlags]1535 }1536 # endif1537 return xFlags;1538 }1539 #endif1540 1541 1542 /**1543 * Are interrupts enabled?1544 *1545 * @returns true / false.1546 */1547 DECLINLINE(RTCCUINTREG) ASMIntAreEnabled(void)1548 {1549 RTCCUINTREG uFlags = ASMGetFlags();1550 return uFlags & 0x200 /* X86_EFL_IF */ ? true : false;1551 }1552 1553 1554 /**1555 * Halts the CPU until interrupted.1556 */1557 #if RT_INLINE_ASM_EXTERNAL1558 DECLASM(void) ASMHalt(void);1559 #else1560 DECLINLINE(void) ASMHalt(void)1561 {1562 # if RT_INLINE_ASM_GNU_STYLE1563 __asm__ __volatile__("hlt\n\t");1564 # else1565 __asm {1566 hlt1567 }1568 # endif1569 }1570 #endif1571 1572 1573 /**1574 * The PAUSE variant of NOP for helping hyperthreaded CPUs detecing spin locks.1575 */1576 #if RT_INLINE_ASM_EXTERNAL1577 DECLASM(void) ASMNopPause(void);1578 #else1579 DECLINLINE(void) ASMNopPause(void)1580 {1581 # if RT_INLINE_ASM_GNU_STYLE1582 __asm__ __volatile__(".byte 0xf3,0x90\n\t");1583 # else1584 __asm {1585 _emit 0f3h1586 _emit 090h1587 }1588 # endif1589 }1590 #endif1591 1592 1593 /**1594 * Reads a machine specific register.1595 *1596 * @returns Register content.1597 * @param uRegister Register to read.1598 */1599 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1600 DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);1601 #else1602 DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)1603 {1604 RTUINT64U u;1605 # if RT_INLINE_ASM_GNU_STYLE1606 __asm__ __volatile__("rdmsr\n\t"1607 : "=a" (u.s.Lo),1608 "=d" (u.s.Hi)1609 : "c" (uRegister));1610 1611 # elif RT_INLINE_ASM_USES_INTRIN1612 u.u = __readmsr(uRegister);1613 1614 # else1615 __asm1616 {1617 mov ecx, [uRegister]1618 rdmsr1619 mov [u.s.Lo], eax1620 mov [u.s.Hi], edx1621 }1622 # endif1623 1624 return u.u;1625 }1626 #endif1627 1628 1629 /**1630 * Writes a machine specific register.1631 *1632 * @returns Register content.1633 * @param uRegister Register to write to.1634 * @param u64Val Value to write.1635 */1636 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1637 DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);1638 #else1639 DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)1640 {1641 RTUINT64U u;1642 1643 u.u = u64Val;1644 # if RT_INLINE_ASM_GNU_STYLE1645 __asm__ __volatile__("wrmsr\n\t"1646 ::"a" (u.s.Lo),1647 "d" (u.s.Hi),1648 "c" (uRegister));1649 1650 # elif RT_INLINE_ASM_USES_INTRIN1651 __writemsr(uRegister, u.u);1652 1653 # else1654 __asm1655 {1656 mov ecx, [uRegister]1657 mov edx, [u.s.Hi]1658 mov eax, [u.s.Lo]1659 wrmsr1660 }1661 # endif1662 }1663 #endif1664 1665 1666 /**1667 * Reads low part of a machine specific register.1668 *1669 * @returns Register content.1670 * @param uRegister Register to read.1671 */1672 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1673 DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);1674 #else1675 DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)1676 {1677 uint32_t u32;1678 # if RT_INLINE_ASM_GNU_STYLE1679 __asm__ __volatile__("rdmsr\n\t"1680 : "=a" (u32)1681 : "c" (uRegister)1682 : "edx");1683 1684 # elif RT_INLINE_ASM_USES_INTRIN1685 u32 = (uint32_t)__readmsr(uRegister);1686 1687 #else1688 __asm1689 {1690 mov ecx, [uRegister]1691 rdmsr1692 mov [u32], eax1693 }1694 # endif1695 1696 return u32;1697 }1698 #endif1699 1700 1701 /**1702 * Reads high part of a machine specific register.1703 *1704 * @returns Register content.1705 * @param uRegister Register to read.1706 */1707 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1708 DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);1709 #else1710 DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)1711 {1712 uint32_t u32;1713 # if RT_INLINE_ASM_GNU_STYLE1714 __asm__ __volatile__("rdmsr\n\t"1715 : "=d" (u32)1716 : "c" (uRegister)1717 : "eax");1718 1719 # elif RT_INLINE_ASM_USES_INTRIN1720 u32 = (uint32_t)(__readmsr(uRegister) >> 32);1721 1722 # else1723 __asm1724 {1725 mov ecx, [uRegister]1726 rdmsr1727 mov [u32], edx1728 }1729 # endif1730 1731 return u32;1732 }1733 #endif1734 1735 1736 /**1737 * Gets dr0.1738 *1739 * @returns dr0.1740 */1741 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1742 DECLASM(RTCCUINTREG) ASMGetDR0(void);1743 #else1744 DECLINLINE(RTCCUINTREG) ASMGetDR0(void)1745 {1746 RTCCUINTREG uDR0;1747 # if RT_INLINE_ASM_USES_INTRIN1748 uDR0 = __readdr(0);1749 # elif RT_INLINE_ASM_GNU_STYLE1750 # ifdef RT_ARCH_AMD641751 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));1752 # else1753 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));1754 # endif1755 # else1756 __asm1757 {1758 # ifdef RT_ARCH_AMD641759 mov rax, dr01760 mov [uDR0], rax1761 # else1762 mov eax, dr01763 mov [uDR0], eax1764 # endif1765 }1766 # endif1767 return uDR0;1768 }1769 #endif1770 1771 1772 /**1773 * Gets dr1.1774 *1775 * @returns dr1.1776 */1777 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1778 DECLASM(RTCCUINTREG) ASMGetDR1(void);1779 #else1780 DECLINLINE(RTCCUINTREG) ASMGetDR1(void)1781 {1782 RTCCUINTREG uDR1;1783 # if RT_INLINE_ASM_USES_INTRIN1784 uDR1 = __readdr(1);1785 # elif RT_INLINE_ASM_GNU_STYLE1786 # ifdef RT_ARCH_AMD641787 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));1788 # else1789 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));1790 # endif1791 # else1792 __asm1793 {1794 # ifdef RT_ARCH_AMD641795 mov rax, dr11796 mov [uDR1], rax1797 # else1798 mov eax, dr11799 mov [uDR1], eax1800 # endif1801 }1802 # endif1803 return uDR1;1804 }1805 #endif1806 1807 1808 /**1809 * Gets dr2.1810 *1811 * @returns dr2.1812 */1813 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1814 DECLASM(RTCCUINTREG) ASMGetDR2(void);1815 #else1816 DECLINLINE(RTCCUINTREG) ASMGetDR2(void)1817 {1818 RTCCUINTREG uDR2;1819 # if RT_INLINE_ASM_USES_INTRIN1820 uDR2 = __readdr(2);1821 # elif RT_INLINE_ASM_GNU_STYLE1822 # ifdef RT_ARCH_AMD641823 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));1824 # else1825 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));1826 # endif1827 # else1828 __asm1829 {1830 # ifdef RT_ARCH_AMD641831 mov rax, dr21832 mov [uDR2], rax1833 # else1834 mov eax, dr21835 mov [uDR2], eax1836 # endif1837 }1838 # endif1839 return uDR2;1840 }1841 #endif1842 1843 1844 /**1845 * Gets dr3.1846 *1847 * @returns dr3.1848 */1849 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1850 DECLASM(RTCCUINTREG) ASMGetDR3(void);1851 #else1852 DECLINLINE(RTCCUINTREG) ASMGetDR3(void)1853 {1854 RTCCUINTREG uDR3;1855 # if RT_INLINE_ASM_USES_INTRIN1856 uDR3 = __readdr(3);1857 # elif RT_INLINE_ASM_GNU_STYLE1858 # ifdef RT_ARCH_AMD641859 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));1860 # else1861 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));1862 # endif1863 # else1864 __asm1865 {1866 # ifdef RT_ARCH_AMD641867 mov rax, dr31868 mov [uDR3], rax1869 # else1870 mov eax, dr31871 mov [uDR3], eax1872 # endif1873 }1874 # endif1875 return uDR3;1876 }1877 #endif1878 1879 1880 /**1881 * Gets dr6.1882 *1883 * @returns dr6.1884 */1885 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1886 DECLASM(RTCCUINTREG) ASMGetDR6(void);1887 #else1888 DECLINLINE(RTCCUINTREG) ASMGetDR6(void)1889 {1890 RTCCUINTREG uDR6;1891 # if RT_INLINE_ASM_USES_INTRIN1892 uDR6 = __readdr(6);1893 # elif RT_INLINE_ASM_GNU_STYLE1894 # ifdef RT_ARCH_AMD641895 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));1896 # else1897 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));1898 # endif1899 # else1900 __asm1901 {1902 # ifdef RT_ARCH_AMD641903 mov rax, dr61904 mov [uDR6], rax1905 # else1906 mov eax, dr61907 mov [uDR6], eax1908 # endif1909 }1910 # endif1911 return uDR6;1912 }1913 #endif1914 1915 1916 /**1917 * Reads and clears DR6.1918 *1919 * @returns DR6.1920 */1921 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1922 DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);1923 #else1924 DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)1925 {1926 RTCCUINTREG uDR6;1927 # if RT_INLINE_ASM_USES_INTRIN1928 uDR6 = __readdr(6);1929 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */1930 # elif RT_INLINE_ASM_GNU_STYLE1931 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */1932 # ifdef RT_ARCH_AMD641933 __asm__ __volatile__("movq %%dr6, %0\n\t"1934 "movq %1, %%dr6\n\t"1935 : "=r" (uDR6)1936 : "r" (uNewValue));1937 # else1938 __asm__ __volatile__("movl %%dr6, %0\n\t"1939 "movl %1, %%dr6\n\t"1940 : "=r" (uDR6)1941 : "r" (uNewValue));1942 # endif1943 # else1944 __asm1945 {1946 # ifdef RT_ARCH_AMD641947 mov rax, dr61948 mov [uDR6], rax1949 mov rcx, rax1950 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */1951 mov dr6, rcx1952 # else1953 mov eax, dr61954 mov [uDR6], eax1955 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */1956 mov dr6, ecx1957 # endif1958 }1959 # endif1960 return uDR6;1961 }1962 #endif1963 1964 1965 /**1966 * Gets dr7.1967 *1968 * @returns dr7.1969 */1970 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN1971 DECLASM(RTCCUINTREG) ASMGetDR7(void);1972 #else1973 DECLINLINE(RTCCUINTREG) ASMGetDR7(void)1974 {1975 RTCCUINTREG uDR7;1976 # if RT_INLINE_ASM_USES_INTRIN1977 uDR7 = __readdr(7);1978 # elif RT_INLINE_ASM_GNU_STYLE1979 # ifdef RT_ARCH_AMD641980 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));1981 # else1982 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));1983 # endif1984 # else1985 __asm1986 {1987 # ifdef RT_ARCH_AMD641988 mov rax, dr71989 mov [uDR7], rax1990 # else1991 mov eax, dr71992 mov [uDR7], eax1993 # endif1994 }1995 # endif1996 return uDR7;1997 }1998 #endif1999 2000 2001 /**2002 * Sets dr0.2003 *2004 * @param uDRVal Debug register value to write2005 */2006 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2007 DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);2008 #else2009 DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)2010 {2011 # if RT_INLINE_ASM_USES_INTRIN2012 __writedr(0, uDRVal);2013 # elif RT_INLINE_ASM_GNU_STYLE2014 # ifdef RT_ARCH_AMD642015 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));2016 # else2017 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));2018 # endif2019 # else2020 __asm2021 {2022 # ifdef RT_ARCH_AMD642023 mov rax, [uDRVal]2024 mov dr0, rax2025 # else2026 mov eax, [uDRVal]2027 mov dr0, eax2028 # endif2029 }2030 # endif2031 }2032 #endif2033 2034 2035 /**2036 * Sets dr1.2037 *2038 * @param uDRVal Debug register value to write2039 */2040 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2041 DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);2042 #else2043 DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)2044 {2045 # if RT_INLINE_ASM_USES_INTRIN2046 __writedr(1, uDRVal);2047 # elif RT_INLINE_ASM_GNU_STYLE2048 # ifdef RT_ARCH_AMD642049 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));2050 # else2051 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));2052 # endif2053 # else2054 __asm2055 {2056 # ifdef RT_ARCH_AMD642057 mov rax, [uDRVal]2058 mov dr1, rax2059 # else2060 mov eax, [uDRVal]2061 mov dr1, eax2062 # endif2063 }2064 # endif2065 }2066 #endif2067 2068 2069 /**2070 * Sets dr2.2071 *2072 * @param uDRVal Debug register value to write2073 */2074 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2075 DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);2076 #else2077 DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)2078 {2079 # if RT_INLINE_ASM_USES_INTRIN2080 __writedr(2, uDRVal);2081 # elif RT_INLINE_ASM_GNU_STYLE2082 # ifdef RT_ARCH_AMD642083 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));2084 # else2085 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));2086 # endif2087 # else2088 __asm2089 {2090 # ifdef RT_ARCH_AMD642091 mov rax, [uDRVal]2092 mov dr2, rax2093 # else2094 mov eax, [uDRVal]2095 mov dr2, eax2096 # endif2097 }2098 # endif2099 }2100 #endif2101 2102 2103 /**2104 * Sets dr3.2105 *2106 * @param uDRVal Debug register value to write2107 */2108 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2109 DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);2110 #else2111 DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)2112 {2113 # if RT_INLINE_ASM_USES_INTRIN2114 __writedr(3, uDRVal);2115 # elif RT_INLINE_ASM_GNU_STYLE2116 # ifdef RT_ARCH_AMD642117 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));2118 # else2119 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));2120 # endif2121 # else2122 __asm2123 {2124 # ifdef RT_ARCH_AMD642125 mov rax, [uDRVal]2126 mov dr3, rax2127 # else2128 mov eax, [uDRVal]2129 mov dr3, eax2130 # endif2131 }2132 # endif2133 }2134 #endif2135 2136 2137 /**2138 * Sets dr6.2139 *2140 * @param uDRVal Debug register value to write2141 */2142 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2143 DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);2144 #else2145 DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)2146 {2147 # if RT_INLINE_ASM_USES_INTRIN2148 __writedr(6, uDRVal);2149 # elif RT_INLINE_ASM_GNU_STYLE2150 # ifdef RT_ARCH_AMD642151 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));2152 # else2153 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));2154 # endif2155 # else2156 __asm2157 {2158 # ifdef RT_ARCH_AMD642159 mov rax, [uDRVal]2160 mov dr6, rax2161 # else2162 mov eax, [uDRVal]2163 mov dr6, eax2164 # endif2165 }2166 # endif2167 }2168 #endif2169 2170 2171 /**2172 * Sets dr7.2173 *2174 * @param uDRVal Debug register value to write2175 */2176 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2177 DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);2178 #else2179 DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)2180 {2181 # if RT_INLINE_ASM_USES_INTRIN2182 __writedr(7, uDRVal);2183 # elif RT_INLINE_ASM_GNU_STYLE2184 # ifdef RT_ARCH_AMD642185 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));2186 # else2187 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));2188 # endif2189 # else2190 __asm2191 {2192 # ifdef RT_ARCH_AMD642193 mov rax, [uDRVal]2194 mov dr7, rax2195 # else2196 mov eax, [uDRVal]2197 mov dr7, eax2198 # endif2199 }2200 # endif2201 }2202 #endif2203 2204 2205 /**2206 189 * Compiler memory barrier. 2207 190 * … … 2226 209 #endif 2227 210 2228 2229 /**2230 * Writes a 8-bit unsigned integer to an I/O port, ordered.2231 *2232 * @param Port I/O port to write to.2233 * @param u8 8-bit integer to write.2234 */2235 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2236 DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);2237 #else2238 DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)2239 {2240 # if RT_INLINE_ASM_GNU_STYLE2241 __asm__ __volatile__("outb %b1, %w0\n\t"2242 :: "Nd" (Port),2243 "a" (u8));2244 2245 # elif RT_INLINE_ASM_USES_INTRIN2246 __outbyte(Port, u8);2247 2248 # else2249 __asm2250 {2251 mov dx, [Port]2252 mov al, [u8]2253 out dx, al2254 }2255 # endif2256 }2257 #endif2258 2259 2260 /**2261 * Reads a 8-bit unsigned integer from an I/O port, ordered.2262 *2263 * @returns 8-bit integer.2264 * @param Port I/O port to read from.2265 */2266 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2267 DECLASM(uint8_t) ASMInU8(RTIOPORT Port);2268 #else2269 DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)2270 {2271 uint8_t u8;2272 # if RT_INLINE_ASM_GNU_STYLE2273 __asm__ __volatile__("inb %w1, %b0\n\t"2274 : "=a" (u8)2275 : "Nd" (Port));2276 2277 # elif RT_INLINE_ASM_USES_INTRIN2278 u8 = __inbyte(Port);2279 2280 # else2281 __asm2282 {2283 mov dx, [Port]2284 in al, dx2285 mov [u8], al2286 }2287 # endif2288 return u8;2289 }2290 #endif2291 2292 2293 /**2294 * Writes a 16-bit unsigned integer to an I/O port, ordered.2295 *2296 * @param Port I/O port to write to.2297 * @param u16 16-bit integer to write.2298 */2299 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2300 DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);2301 #else2302 DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)2303 {2304 # if RT_INLINE_ASM_GNU_STYLE2305 __asm__ __volatile__("outw %w1, %w0\n\t"2306 :: "Nd" (Port),2307 "a" (u16));2308 2309 # elif RT_INLINE_ASM_USES_INTRIN2310 __outword(Port, u16);2311 2312 # else2313 __asm2314 {2315 mov dx, [Port]2316 mov ax, [u16]2317 out dx, ax2318 }2319 # endif2320 }2321 #endif2322 2323 2324 /**2325 * Reads a 16-bit unsigned integer from an I/O port, ordered.2326 *2327 * @returns 16-bit integer.2328 * @param Port I/O port to read from.2329 */2330 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2331 DECLASM(uint16_t) ASMInU16(RTIOPORT Port);2332 #else2333 DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)2334 {2335 uint16_t u16;2336 # if RT_INLINE_ASM_GNU_STYLE2337 __asm__ __volatile__("inw %w1, %w0\n\t"2338 : "=a" (u16)2339 : "Nd" (Port));2340 2341 # elif RT_INLINE_ASM_USES_INTRIN2342 u16 = __inword(Port);2343 2344 # else2345 __asm2346 {2347 mov dx, [Port]2348 in ax, dx2349 mov [u16], ax2350 }2351 # endif2352 return u16;2353 }2354 #endif2355 2356 2357 /**2358 * Writes a 32-bit unsigned integer to an I/O port, ordered.2359 *2360 * @param Port I/O port to write to.2361 * @param u32 32-bit integer to write.2362 */2363 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2364 DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);2365 #else2366 DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)2367 {2368 # if RT_INLINE_ASM_GNU_STYLE2369 __asm__ __volatile__("outl %1, %w0\n\t"2370 :: "Nd" (Port),2371 "a" (u32));2372 2373 # elif RT_INLINE_ASM_USES_INTRIN2374 __outdword(Port, u32);2375 2376 # else2377 __asm2378 {2379 mov dx, [Port]2380 mov eax, [u32]2381 out dx, eax2382 }2383 # endif2384 }2385 #endif2386 2387 2388 /**2389 * Reads a 32-bit unsigned integer from an I/O port, ordered.2390 *2391 * @returns 32-bit integer.2392 * @param Port I/O port to read from.2393 */2394 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2395 DECLASM(uint32_t) ASMInU32(RTIOPORT Port);2396 #else2397 DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)2398 {2399 uint32_t u32;2400 # if RT_INLINE_ASM_GNU_STYLE2401 __asm__ __volatile__("inl %w1, %0\n\t"2402 : "=a" (u32)2403 : "Nd" (Port));2404 2405 # elif RT_INLINE_ASM_USES_INTRIN2406 u32 = __indword(Port);2407 2408 # else2409 __asm2410 {2411 mov dx, [Port]2412 in eax, dx2413 mov [u32], eax2414 }2415 # endif2416 return u32;2417 }2418 #endif2419 2420 2421 /**2422 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.2423 *2424 * @param Port I/O port to write to.2425 * @param pau8 Pointer to the string buffer.2426 * @param c The number of items to write.2427 */2428 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2429 DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);2430 #else2431 DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)2432 {2433 # if RT_INLINE_ASM_GNU_STYLE2434 __asm__ __volatile__("rep; outsb\n\t"2435 : "+S" (pau8),2436 "+c" (c)2437 : "d" (Port));2438 2439 # elif RT_INLINE_ASM_USES_INTRIN2440 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);2441 2442 # else2443 __asm2444 {2445 mov dx, [Port]2446 mov ecx, [c]2447 mov eax, [pau8]2448 xchg esi, eax2449 rep outsb2450 xchg esi, eax2451 }2452 # endif2453 }2454 #endif2455 2456 2457 /**2458 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.2459 *2460 * @param Port I/O port to read from.2461 * @param pau8 Pointer to the string buffer (output).2462 * @param c The number of items to read.2463 */2464 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2465 DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);2466 #else2467 DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)2468 {2469 # if RT_INLINE_ASM_GNU_STYLE2470 __asm__ __volatile__("rep; insb\n\t"2471 : "+D" (pau8),2472 "+c" (c)2473 : "d" (Port));2474 2475 # elif RT_INLINE_ASM_USES_INTRIN2476 __inbytestring(Port, pau8, (unsigned long)c);2477 2478 # else2479 __asm2480 {2481 mov dx, [Port]2482 mov ecx, [c]2483 mov eax, [pau8]2484 xchg edi, eax2485 rep insb2486 xchg edi, eax2487 }2488 # endif2489 }2490 #endif2491 2492 2493 /**2494 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.2495 *2496 * @param Port I/O port to write to.2497 * @param pau16 Pointer to the string buffer.2498 * @param c The number of items to write.2499 */2500 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2501 DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);2502 #else2503 DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)2504 {2505 # if RT_INLINE_ASM_GNU_STYLE2506 __asm__ __volatile__("rep; outsw\n\t"2507 : "+S" (pau16),2508 "+c" (c)2509 : "d" (Port));2510 2511 # elif RT_INLINE_ASM_USES_INTRIN2512 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);2513 2514 # else2515 __asm2516 {2517 mov dx, [Port]2518 mov ecx, [c]2519 mov eax, [pau16]2520 xchg esi, eax2521 rep outsw2522 xchg esi, eax2523 }2524 # endif2525 }2526 #endif2527 2528 2529 /**2530 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.2531 *2532 * @param Port I/O port to read from.2533 * @param pau16 Pointer to the string buffer (output).2534 * @param c The number of items to read.2535 */2536 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2537 DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);2538 #else2539 DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)2540 {2541 # if RT_INLINE_ASM_GNU_STYLE2542 __asm__ __volatile__("rep; insw\n\t"2543 : "+D" (pau16),2544 "+c" (c)2545 : "d" (Port));2546 2547 # elif RT_INLINE_ASM_USES_INTRIN2548 __inwordstring(Port, pau16, (unsigned long)c);2549 2550 # else2551 __asm2552 {2553 mov dx, [Port]2554 mov ecx, [c]2555 mov eax, [pau16]2556 xchg edi, eax2557 rep insw2558 xchg edi, eax2559 }2560 # endif2561 }2562 #endif2563 2564 2565 /**2566 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.2567 *2568 * @param Port I/O port to write to.2569 * @param pau32 Pointer to the string buffer.2570 * @param c The number of items to write.2571 */2572 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2573 DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);2574 #else2575 DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)2576 {2577 # if RT_INLINE_ASM_GNU_STYLE2578 __asm__ __volatile__("rep; outsl\n\t"2579 : "+S" (pau32),2580 "+c" (c)2581 : "d" (Port));2582 2583 # elif RT_INLINE_ASM_USES_INTRIN2584 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);2585 2586 # else2587 __asm2588 {2589 mov dx, [Port]2590 mov ecx, [c]2591 mov eax, [pau32]2592 xchg esi, eax2593 rep outsd2594 xchg esi, eax2595 }2596 # endif2597 }2598 #endif2599 2600 2601 /**2602 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.2603 *2604 * @param Port I/O port to read from.2605 * @param pau32 Pointer to the string buffer (output).2606 * @param c The number of items to read.2607 */2608 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN2609 DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);2610 #else2611 DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)2612 {2613 # if RT_INLINE_ASM_GNU_STYLE2614 __asm__ __volatile__("rep; insl\n\t"2615 : "+D" (pau32),2616 "+c" (c)2617 : "d" (Port));2618 2619 # elif RT_INLINE_ASM_USES_INTRIN2620 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);2621 2622 # else2623 __asm2624 {2625 mov dx, [Port]2626 mov ecx, [c]2627 mov eax, [pau32]2628 xchg edi, eax2629 rep insd2630 xchg edi, eax2631 }2632 # endif2633 }2634 #endif2635 211 2636 212 … … 3975 1551 3976 1552 /** 3977 * Memory load/store fence, waits for any pending writes and reads to complete.3978 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.3979 */3980 DECLINLINE(void) ASMMemoryFenceSSE2(void)3981 {3982 #if RT_INLINE_ASM_GNU_STYLE3983 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");3984 #elif RT_INLINE_ASM_USES_INTRIN3985 _mm_mfence();3986 #else3987 __asm3988 {3989 _emit 0x0f3990 _emit 0xae3991 _emit 0xf03992 }3993 #endif3994 }3995 3996 3997 /**3998 * Memory store fence, waits for any writes to complete.3999 * Requires the X86_CPUID_FEATURE_EDX_SSE CPUID bit set.4000 */4001 DECLINLINE(void) ASMWriteFenceSSE(void)4002 {4003 #if RT_INLINE_ASM_GNU_STYLE4004 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");4005 #elif RT_INLINE_ASM_USES_INTRIN4006 _mm_sfence();4007 #else4008 __asm4009 {4010 _emit 0x0f4011 _emit 0xae4012 _emit 0xf84013 }4014 #endif4015 }4016 4017 4018 /**4019 * Memory load fence, waits for any pending reads to complete.4020 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.4021 */4022 DECLINLINE(void) ASMReadFenceSSE2(void)4023 {4024 #if RT_INLINE_ASM_GNU_STYLE4025 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");4026 #elif RT_INLINE_ASM_USES_INTRIN4027 _mm_lfence();4028 #else4029 __asm4030 {4031 _emit 0x0f4032 _emit 0xae4033 _emit 0xe84034 }4035 #endif4036 }4037 4038 4039 /**4040 1553 * Memory fence, waits for any pending writes and reads to complete. 4041 1554 */ … … 4923 2436 4924 2437 4925 /**4926 * Invalidate page.4927 *4928 * @param pv Address of the page to invalidate.4929 */4930 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN4931 DECLASM(void) ASMInvalidatePage(void *pv);4932 #else4933 DECLINLINE(void) ASMInvalidatePage(void *pv)4934 {4935 # if RT_INLINE_ASM_USES_INTRIN4936 __invlpg(pv);4937 4938 # elif RT_INLINE_ASM_GNU_STYLE4939 __asm__ __volatile__("invlpg %0\n\t"4940 : : "m" (*(uint8_t *)pv));4941 # else4942 __asm4943 {4944 # ifdef RT_ARCH_AMD644945 mov rax, [pv]4946 invlpg [rax]4947 # else4948 mov eax, [pv]4949 invlpg [eax]4950 # endif4951 }4952 # endif4953 }4954 #endif4955 4956 4957 /**4958 * Write back the internal caches and invalidate them.4959 */4960 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN4961 DECLASM(void) ASMWriteBackAndInvalidateCaches(void);4962 #else4963 DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)4964 {4965 # if RT_INLINE_ASM_USES_INTRIN4966 __wbinvd();4967 4968 # elif RT_INLINE_ASM_GNU_STYLE4969 __asm__ __volatile__("wbinvd");4970 # else4971 __asm4972 {4973 wbinvd4974 }4975 # endif4976 }4977 #endif4978 4979 4980 /**4981 * Invalidate internal and (perhaps) external caches without first4982 * flushing dirty cache lines. Use with extreme care.4983 */4984 #if RT_INLINE_ASM_EXTERNAL4985 DECLASM(void) ASMInvalidateInternalCaches(void);4986 #else4987 DECLINLINE(void) ASMInvalidateInternalCaches(void)4988 {4989 # if RT_INLINE_ASM_GNU_STYLE4990 __asm__ __volatile__("invd");4991 # else4992 __asm4993 {4994 invd4995 }4996 # endif4997 }4998 #endif4999 5000 5001 2438 #if defined(PAGE_SIZE) && !defined(NT_INCLUDED) 5002 2439 # if PAGE_SIZE != 0x1000 … … 5270 2707 return (uint32_t *)pu32; 5271 2708 return NULL; 5272 }5273 #endif5274 5275 5276 /**5277 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.5278 *5279 * @returns u32F1 * u32F2.5280 */5281 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)5282 DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);5283 #else5284 DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)5285 {5286 # ifdef RT_ARCH_AMD645287 return (uint64_t)u32F1 * u32F2;5288 # else /* !RT_ARCH_AMD64 */5289 uint64_t u64;5290 # if RT_INLINE_ASM_GNU_STYLE5291 __asm__ __volatile__("mull %%edx"5292 : "=A" (u64)5293 : "a" (u32F2), "d" (u32F1));5294 # else5295 __asm5296 {5297 mov edx, [u32F1]5298 mov eax, [u32F2]5299 mul edx5300 mov dword ptr [u64], eax5301 mov dword ptr [u64 + 4], edx5302 }5303 # endif5304 return u64;5305 # endif /* !RT_ARCH_AMD64 */5306 }5307 #endif5308 5309 5310 /**5311 * Multiplies two signed 32-bit values returning a signed 64-bit result.5312 *5313 * @returns u32F1 * u32F2.5314 */5315 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)5316 DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);5317 #else5318 DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)5319 {5320 # ifdef RT_ARCH_AMD645321 return (int64_t)i32F1 * i32F2;5322 # else /* !RT_ARCH_AMD64 */5323 int64_t i64;5324 # if RT_INLINE_ASM_GNU_STYLE5325 __asm__ __volatile__("imull %%edx"5326 : "=A" (i64)5327 : "a" (i32F2), "d" (i32F1));5328 # else5329 __asm5330 {5331 mov edx, [i32F1]5332 mov eax, [i32F2]5333 imul edx5334 mov dword ptr [i64], eax5335 mov dword ptr [i64 + 4], edx5336 }5337 # endif5338 return i64;5339 # endif /* !RT_ARCH_AMD64 */5340 }5341 #endif5342 5343 5344 /**5345 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.5346 *5347 * @returns u64 / u32.5348 */5349 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)5350 DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);5351 #else5352 DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)5353 {5354 # ifdef RT_ARCH_AMD645355 return (uint32_t)(u64 / u32);5356 # else /* !RT_ARCH_AMD64 */5357 # if RT_INLINE_ASM_GNU_STYLE5358 RTCCUINTREG uDummy;5359 __asm__ __volatile__("divl %3"5360 : "=a" (u32), "=d"(uDummy)5361 : "A" (u64), "r" (u32));5362 # else5363 __asm5364 {5365 mov eax, dword ptr [u64]5366 mov edx, dword ptr [u64 + 4]5367 mov ecx, [u32]5368 div ecx5369 mov [u32], eax5370 }5371 # endif5372 return u32;5373 # endif /* !RT_ARCH_AMD64 */5374 }5375 #endif5376 5377 5378 /**5379 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.5380 *5381 * @returns u64 / u32.5382 */5383 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)5384 DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);5385 #else5386 DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)5387 {5388 # ifdef RT_ARCH_AMD645389 return (int32_t)(i64 / i32);5390 # else /* !RT_ARCH_AMD64 */5391 # if RT_INLINE_ASM_GNU_STYLE5392 RTCCUINTREG iDummy;5393 __asm__ __volatile__("idivl %3"5394 : "=a" (i32), "=d"(iDummy)5395 : "A" (i64), "r" (i32));5396 # else5397 __asm5398 {5399 mov eax, dword ptr [i64]5400 mov edx, dword ptr [i64 + 4]5401 mov ecx, [i32]5402 idiv ecx5403 mov [i32], eax5404 }5405 # endif5406 return i32;5407 # endif /* !RT_ARCH_AMD64 */5408 }5409 #endif5410 5411 5412 /**5413 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,5414 * returning the rest.5415 *5416 * @returns u64 % u32.5417 *5418 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.5419 */5420 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)5421 DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);5422 #else5423 DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)5424 {5425 # ifdef RT_ARCH_AMD645426 return (uint32_t)(u64 % u32);5427 # else /* !RT_ARCH_AMD64 */5428 # if RT_INLINE_ASM_GNU_STYLE5429 RTCCUINTREG uDummy;5430 __asm__ __volatile__("divl %3"5431 : "=a" (uDummy), "=d"(u32)5432 : "A" (u64), "r" (u32));5433 # else5434 __asm5435 {5436 mov eax, dword ptr [u64]5437 mov edx, dword ptr [u64 + 4]5438 mov ecx, [u32]5439 div ecx5440 mov [u32], edx5441 }5442 # endif5443 return u32;5444 # endif /* !RT_ARCH_AMD64 */5445 }5446 #endif5447 5448 5449 /**5450 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,5451 * returning the rest.5452 *5453 * @returns u64 % u32.5454 *5455 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.5456 */5457 #if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)5458 DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);5459 #else5460 DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)5461 {5462 # ifdef RT_ARCH_AMD645463 return (int32_t)(i64 % i32);5464 # else /* !RT_ARCH_AMD64 */5465 # if RT_INLINE_ASM_GNU_STYLE5466 RTCCUINTREG iDummy;5467 __asm__ __volatile__("idivl %3"5468 : "=a" (iDummy), "=d"(i32)5469 : "A" (i64), "r" (i32));5470 # else5471 __asm5472 {5473 mov eax, dword ptr [i64]5474 mov edx, dword ptr [i64 + 4]5475 mov ecx, [i32]5476 idiv ecx5477 mov [i32], edx5478 }5479 # endif5480 return i32;5481 # endif /* !RT_ARCH_AMD64 */5482 }5483 #endif5484 5485 5486 /**5487 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer5488 * using a 96 bit intermediate result.5489 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to5490 * __udivdi3 and __umoddi3 even if this inline function is not used.5491 *5492 * @returns (u64A * u32B) / u32C.5493 * @param u64A The 64-bit value.5494 * @param u32B The 32-bit value to multiple by A.5495 * @param u32C The 32-bit value to divide A*B by.5496 */5497 #if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)5498 DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);5499 #else5500 DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)5501 {5502 # if RT_INLINE_ASM_GNU_STYLE5503 # ifdef RT_ARCH_AMD645504 uint64_t u64Result, u64Spill;5505 __asm__ __volatile__("mulq %2\n\t"5506 "divq %3\n\t"5507 : "=a" (u64Result),5508 "=d" (u64Spill)5509 : "r" ((uint64_t)u32B),5510 "r" ((uint64_t)u32C),5511 "0" (u64A),5512 "1" (0));5513 return u64Result;5514 # else5515 uint32_t u32Dummy;5516 uint64_t u64Result;5517 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo5518 edx = u64Lo.hi = (u64A.lo * u32B).hi */5519 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo5520 eax = u64A.hi */5521 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi5522 edx = u32C */5523 "xchg %%edx,%%ecx \n\t" /* ecx = u32C5524 edx = u32B */5525 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo5526 edx = u64Hi.hi = (u64A.hi * u32B).hi */5527 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */5528 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */5529 "divl %%ecx \n\t" /* eax = u64Hi / u32C5530 edx = u64Hi % u32C */5531 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */5532 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */5533 "divl %%ecx \n\t" /* u64Result.lo */5534 "movl %%edi,%%edx \n\t" /* u64Result.hi */5535 : "=A"(u64Result), "=c"(u32Dummy),5536 "=S"(u32Dummy), "=D"(u32Dummy)5537 : "a"((uint32_t)u64A),5538 "S"((uint32_t)(u64A >> 32)),5539 "c"(u32B),5540 "D"(u32C));5541 return u64Result;5542 # endif5543 # else5544 RTUINT64U u;5545 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;5546 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;5547 u64Hi += (u64Lo >> 32);5548 u.s.Hi = (uint32_t)(u64Hi / u32C);5549 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);5550 return u.u;5551 # endif5552 2709 } 5553 2710 #endif … … 6826 3983 6827 3984 /** @} */ 6828 #endif 6829 3985 3986 /* 3987 * Include the architecture specific header. 3988 */ 3989 /** @todo drop this bit and require the asm-x86.h to be included explicitly 3990 * instead... */ 3991 # if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) 3992 # include <iprt/asm-amd64-x86.h> 3993 # endif 3994 3995 #endif 3996
Note:
See TracChangeset
for help on using the changeset viewer.