VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 53399

Last change on this file since 53399 was 52929, checked in by vboxsync, 10 years ago

iprt/asm.h: fixed warning, volatile not nececessary here

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 142.4 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2012 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# pragma intrinsic(_rotl)
69# pragma intrinsic(_rotr)
70# pragma intrinsic(_rotl64)
71# pragma intrinsic(_rotr64)
72# ifdef RT_ARCH_AMD64
73# pragma intrinsic(__stosq)
74# pragma intrinsic(_byteswap_uint64)
75# pragma intrinsic(_InterlockedExchange64)
76# pragma intrinsic(_InterlockedExchangeAdd64)
77# pragma intrinsic(_InterlockedAnd64)
78# pragma intrinsic(_InterlockedOr64)
79# pragma intrinsic(_InterlockedIncrement64)
80# pragma intrinsic(_InterlockedDecrement64)
81# endif
82#endif
83
84
85/** @defgroup grp_rt_asm ASM - Assembly Routines
86 * @ingroup grp_rt
87 *
88 * @remarks The difference between ordered and unordered atomic operations are that
89 * the former will complete outstanding reads and writes before continuing
90 * while the latter doesn't make any promises about the order. Ordered
91 * operations doesn't, it seems, make any 100% promise wrt to whether
92 * the operation will complete before any subsequent memory access.
93 * (please, correct if wrong.)
94 *
95 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
96 * are unordered (note the Uo).
97 *
98 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
99 * or even optimize assembler instructions away. For instance, in the following code
100 * the second rdmsr instruction is optimized away because gcc treats that instruction
101 * as deterministic:
102 *
103 * @code
104 * static inline uint64_t rdmsr_low(int idx)
105 * {
106 * uint32_t low;
107 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
108 * }
109 * ...
110 * uint32_t msr1 = rdmsr_low(1);
111 * foo(msr1);
112 * msr1 = rdmsr_low(1);
113 * bar(msr1);
114 * @endcode
115 *
116 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
117 * use the result of the first call as input parameter for bar() as well. For rdmsr this
118 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
119 * machine status information in general.
120 *
121 * @{
122 */
123
124
125/** @def RT_INLINE_ASM_GCC_4_3_X_X86
126 * Used to work around some 4.3.x register allocation issues in this version of
127 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
128#ifdef __GNUC__
129# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
130#endif
131#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
132# define RT_INLINE_ASM_GCC_4_3_X_X86 0
133#endif
134
135/** @def RT_INLINE_DONT_USE_CMPXCHG8B
136 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
137 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
138 * mode, x86.
139 *
140 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
141 * when in PIC mode on x86.
142 */
143#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
144# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
145 ( (defined(PIC) || defined(__PIC__)) \
146 && defined(RT_ARCH_X86) \
147 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
148 || defined(RT_OS_DARWIN)) )
149#endif
150
151
152/** @def ASMReturnAddress
153 * Gets the return address of the current (or calling if you like) function or method.
154 */
155#ifdef _MSC_VER
156# ifdef __cplusplus
157extern "C"
158# endif
159void * _ReturnAddress(void);
160# pragma intrinsic(_ReturnAddress)
161# define ASMReturnAddress() _ReturnAddress()
162#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
163# define ASMReturnAddress() __builtin_return_address(0)
164#else
165# error "Unsupported compiler."
166#endif
167
168
169/**
170 * Compiler memory barrier.
171 *
172 * Ensure that the compiler does not use any cached (register/tmp stack) memory
173 * values or any outstanding writes when returning from this function.
174 *
175 * This function must be used if non-volatile data is modified by a
176 * device or the VMM. Typical cases are port access, MMIO access,
177 * trapping instruction, etc.
178 */
179#if RT_INLINE_ASM_GNU_STYLE
180# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
181#elif RT_INLINE_ASM_USES_INTRIN
182# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
183#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
184DECLINLINE(void) ASMCompilerBarrier(void)
185{
186 __asm
187 {
188 }
189}
190#endif
191
192
193/** @def ASMBreakpoint
194 * Debugger Breakpoint.
195 * @deprecated Use RT_BREAKPOINT instead.
196 * @internal
197 */
198#define ASMBreakpoint() RT_BREAKPOINT()
199
200
201/**
202 * Spinloop hint for platforms that have these, empty function on the other
203 * platforms.
204 *
205 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
206 * spin locks.
207 */
208#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
209DECLASM(void) ASMNopPause(void);
210#else
211DECLINLINE(void) ASMNopPause(void)
212{
213# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
214# if RT_INLINE_ASM_GNU_STYLE
215 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
216# else
217 __asm {
218 _emit 0f3h
219 _emit 090h
220 }
221# endif
222# else
223 /* dummy */
224# endif
225}
226#endif
227
228
229/**
230 * Atomically Exchange an unsigned 8-bit value, ordered.
231 *
232 * @returns Current *pu8 value
233 * @param pu8 Pointer to the 8-bit variable to update.
234 * @param u8 The 8-bit value to assign to *pu8.
235 */
236#if RT_INLINE_ASM_EXTERNAL
237DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
238#else
239DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
240{
241# if RT_INLINE_ASM_GNU_STYLE
242 __asm__ __volatile__("xchgb %0, %1\n\t"
243 : "=m" (*pu8),
244 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
245 : "1" (u8),
246 "m" (*pu8));
247# else
248 __asm
249 {
250# ifdef RT_ARCH_AMD64
251 mov rdx, [pu8]
252 mov al, [u8]
253 xchg [rdx], al
254 mov [u8], al
255# else
256 mov edx, [pu8]
257 mov al, [u8]
258 xchg [edx], al
259 mov [u8], al
260# endif
261 }
262# endif
263 return u8;
264}
265#endif
266
267
268/**
269 * Atomically Exchange a signed 8-bit value, ordered.
270 *
271 * @returns Current *pu8 value
272 * @param pi8 Pointer to the 8-bit variable to update.
273 * @param i8 The 8-bit value to assign to *pi8.
274 */
275DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
276{
277 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
278}
279
280
281/**
282 * Atomically Exchange a bool value, ordered.
283 *
284 * @returns Current *pf value
285 * @param pf Pointer to the 8-bit variable to update.
286 * @param f The 8-bit value to assign to *pi8.
287 */
288DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
289{
290#ifdef _MSC_VER
291 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
292#else
293 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
294#endif
295}
296
297
298/**
299 * Atomically Exchange an unsigned 16-bit value, ordered.
300 *
301 * @returns Current *pu16 value
302 * @param pu16 Pointer to the 16-bit variable to update.
303 * @param u16 The 16-bit value to assign to *pu16.
304 */
305#if RT_INLINE_ASM_EXTERNAL
306DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
307#else
308DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
309{
310# if RT_INLINE_ASM_GNU_STYLE
311 __asm__ __volatile__("xchgw %0, %1\n\t"
312 : "=m" (*pu16),
313 "=r" (u16)
314 : "1" (u16),
315 "m" (*pu16));
316# else
317 __asm
318 {
319# ifdef RT_ARCH_AMD64
320 mov rdx, [pu16]
321 mov ax, [u16]
322 xchg [rdx], ax
323 mov [u16], ax
324# else
325 mov edx, [pu16]
326 mov ax, [u16]
327 xchg [edx], ax
328 mov [u16], ax
329# endif
330 }
331# endif
332 return u16;
333}
334#endif
335
336
337/**
338 * Atomically Exchange a signed 16-bit value, ordered.
339 *
340 * @returns Current *pu16 value
341 * @param pi16 Pointer to the 16-bit variable to update.
342 * @param i16 The 16-bit value to assign to *pi16.
343 */
344DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
345{
346 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
347}
348
349
350/**
351 * Atomically Exchange an unsigned 32-bit value, ordered.
352 *
353 * @returns Current *pu32 value
354 * @param pu32 Pointer to the 32-bit variable to update.
355 * @param u32 The 32-bit value to assign to *pu32.
356 */
357#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
358DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
359#else
360DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
361{
362# if RT_INLINE_ASM_GNU_STYLE
363 __asm__ __volatile__("xchgl %0, %1\n\t"
364 : "=m" (*pu32),
365 "=r" (u32)
366 : "1" (u32),
367 "m" (*pu32));
368
369# elif RT_INLINE_ASM_USES_INTRIN
370 u32 = _InterlockedExchange((long *)pu32, u32);
371
372# else
373 __asm
374 {
375# ifdef RT_ARCH_AMD64
376 mov rdx, [pu32]
377 mov eax, u32
378 xchg [rdx], eax
379 mov [u32], eax
380# else
381 mov edx, [pu32]
382 mov eax, u32
383 xchg [edx], eax
384 mov [u32], eax
385# endif
386 }
387# endif
388 return u32;
389}
390#endif
391
392
393/**
394 * Atomically Exchange a signed 32-bit value, ordered.
395 *
396 * @returns Current *pu32 value
397 * @param pi32 Pointer to the 32-bit variable to update.
398 * @param i32 The 32-bit value to assign to *pi32.
399 */
400DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
401{
402 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
403}
404
405
406/**
407 * Atomically Exchange an unsigned 64-bit value, ordered.
408 *
409 * @returns Current *pu64 value
410 * @param pu64 Pointer to the 64-bit variable to update.
411 * @param u64 The 64-bit value to assign to *pu64.
412 */
413#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
414 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
415DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
416#else
417DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
418{
419# if defined(RT_ARCH_AMD64)
420# if RT_INLINE_ASM_USES_INTRIN
421 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
422
423# elif RT_INLINE_ASM_GNU_STYLE
424 __asm__ __volatile__("xchgq %0, %1\n\t"
425 : "=m" (*pu64),
426 "=r" (u64)
427 : "1" (u64),
428 "m" (*pu64));
429# else
430 __asm
431 {
432 mov rdx, [pu64]
433 mov rax, [u64]
434 xchg [rdx], rax
435 mov [u64], rax
436 }
437# endif
438# else /* !RT_ARCH_AMD64 */
439# if RT_INLINE_ASM_GNU_STYLE
440# if defined(PIC) || defined(__PIC__)
441 uint32_t u32EBX = (uint32_t)u64;
442 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
443 "xchgl %%ebx, %3\n\t"
444 "1:\n\t"
445 "lock; cmpxchg8b (%5)\n\t"
446 "jnz 1b\n\t"
447 "movl %3, %%ebx\n\t"
448 /*"xchgl %%esi, %5\n\t"*/
449 : "=A" (u64),
450 "=m" (*pu64)
451 : "0" (*pu64),
452 "m" ( u32EBX ),
453 "c" ( (uint32_t)(u64 >> 32) ),
454 "S" (pu64));
455# else /* !PIC */
456 __asm__ __volatile__("1:\n\t"
457 "lock; cmpxchg8b %1\n\t"
458 "jnz 1b\n\t"
459 : "=A" (u64),
460 "=m" (*pu64)
461 : "0" (*pu64),
462 "b" ( (uint32_t)u64 ),
463 "c" ( (uint32_t)(u64 >> 32) ));
464# endif
465# else
466 __asm
467 {
468 mov ebx, dword ptr [u64]
469 mov ecx, dword ptr [u64 + 4]
470 mov edi, pu64
471 mov eax, dword ptr [edi]
472 mov edx, dword ptr [edi + 4]
473 retry:
474 lock cmpxchg8b [edi]
475 jnz retry
476 mov dword ptr [u64], eax
477 mov dword ptr [u64 + 4], edx
478 }
479# endif
480# endif /* !RT_ARCH_AMD64 */
481 return u64;
482}
483#endif
484
485
486/**
487 * Atomically Exchange an signed 64-bit value, ordered.
488 *
489 * @returns Current *pi64 value
490 * @param pi64 Pointer to the 64-bit variable to update.
491 * @param i64 The 64-bit value to assign to *pi64.
492 */
493DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
494{
495 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
496}
497
498
499/**
500 * Atomically Exchange a pointer value, ordered.
501 *
502 * @returns Current *ppv value
503 * @param ppv Pointer to the pointer variable to update.
504 * @param pv The pointer value to assign to *ppv.
505 */
506DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
507{
508#if ARCH_BITS == 32
509 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
510#elif ARCH_BITS == 64
511 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
512#else
513# error "ARCH_BITS is bogus"
514#endif
515}
516
517
518/**
519 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
520 *
521 * @returns Current *pv value
522 * @param ppv Pointer to the pointer variable to update.
523 * @param pv The pointer value to assign to *ppv.
524 * @param Type The type of *ppv, sans volatile.
525 */
526#ifdef __GNUC__
527# define ASMAtomicXchgPtrT(ppv, pv, Type) \
528 __extension__ \
529 ({\
530 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
531 Type const pvTypeChecked = (pv); \
532 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
533 pvTypeCheckedRet; \
534 })
535#else
536# define ASMAtomicXchgPtrT(ppv, pv, Type) \
537 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
538#endif
539
540
541/**
542 * Atomically Exchange a raw-mode context pointer value, ordered.
543 *
544 * @returns Current *ppv value
545 * @param ppvRC Pointer to the pointer variable to update.
546 * @param pvRC The pointer value to assign to *ppv.
547 */
548DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
549{
550 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
551}
552
553
554/**
555 * Atomically Exchange a ring-0 pointer value, ordered.
556 *
557 * @returns Current *ppv value
558 * @param ppvR0 Pointer to the pointer variable to update.
559 * @param pvR0 The pointer value to assign to *ppv.
560 */
561DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
562{
563#if R0_ARCH_BITS == 32
564 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
565#elif R0_ARCH_BITS == 64
566 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
567#else
568# error "R0_ARCH_BITS is bogus"
569#endif
570}
571
572
573/**
574 * Atomically Exchange a ring-3 pointer value, ordered.
575 *
576 * @returns Current *ppv value
577 * @param ppvR3 Pointer to the pointer variable to update.
578 * @param pvR3 The pointer value to assign to *ppv.
579 */
580DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
581{
582#if R3_ARCH_BITS == 32
583 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
584#elif R3_ARCH_BITS == 64
585 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
586#else
587# error "R3_ARCH_BITS is bogus"
588#endif
589}
590
591
592/** @def ASMAtomicXchgHandle
593 * Atomically Exchange a typical IPRT handle value, ordered.
594 *
595 * @param ph Pointer to the value to update.
596 * @param hNew The new value to assigned to *pu.
597 * @param phRes Where to store the current *ph value.
598 *
599 * @remarks This doesn't currently work for all handles (like RTFILE).
600 */
601#if HC_ARCH_BITS == 32
602# define ASMAtomicXchgHandle(ph, hNew, phRes) \
603 do { \
604 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
605 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
606 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
607 } while (0)
608#elif HC_ARCH_BITS == 64
609# define ASMAtomicXchgHandle(ph, hNew, phRes) \
610 do { \
611 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
612 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
613 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
614 } while (0)
615#else
616# error HC_ARCH_BITS
617#endif
618
619
620/**
621 * Atomically Exchange a value which size might differ
622 * between platforms or compilers, ordered.
623 *
624 * @param pu Pointer to the variable to update.
625 * @param uNew The value to assign to *pu.
626 * @todo This is busted as its missing the result argument.
627 */
628#define ASMAtomicXchgSize(pu, uNew) \
629 do { \
630 switch (sizeof(*(pu))) { \
631 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
632 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
633 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
634 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
635 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
636 } \
637 } while (0)
638
639/**
640 * Atomically Exchange a value which size might differ
641 * between platforms or compilers, ordered.
642 *
643 * @param pu Pointer to the variable to update.
644 * @param uNew The value to assign to *pu.
645 * @param puRes Where to store the current *pu value.
646 */
647#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
648 do { \
649 switch (sizeof(*(pu))) { \
650 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
651 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
652 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
653 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
654 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
655 } \
656 } while (0)
657
658
659
660/**
661 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
662 *
663 * @returns true if xchg was done.
664 * @returns false if xchg wasn't done.
665 *
666 * @param pu8 Pointer to the value to update.
667 * @param u8New The new value to assigned to *pu8.
668 * @param u8Old The old value to *pu8 compare with.
669 */
670#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
671DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
672#else
673DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
674{
675 uint8_t u8Ret;
676 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
677 "setz %1\n\t"
678 : "=m" (*pu8),
679 "=qm" (u8Ret),
680 "=a" (u8Old)
681 : "q" (u8New),
682 "2" (u8Old),
683 "m" (*pu8));
684 return (bool)u8Ret;
685}
686#endif
687
688
689/**
690 * Atomically Compare and Exchange a signed 8-bit value, ordered.
691 *
692 * @returns true if xchg was done.
693 * @returns false if xchg wasn't done.
694 *
695 * @param pi8 Pointer to the value to update.
696 * @param i8New The new value to assigned to *pi8.
697 * @param i8Old The old value to *pi8 compare with.
698 */
699DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
700{
701 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
702}
703
704
705/**
706 * Atomically Compare and Exchange a bool value, ordered.
707 *
708 * @returns true if xchg was done.
709 * @returns false if xchg wasn't done.
710 *
711 * @param pf Pointer to the value to update.
712 * @param fNew The new value to assigned to *pf.
713 * @param fOld The old value to *pf compare with.
714 */
715DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
716{
717 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
718}
719
720
721/**
722 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
723 *
724 * @returns true if xchg was done.
725 * @returns false if xchg wasn't done.
726 *
727 * @param pu32 Pointer to the value to update.
728 * @param u32New The new value to assigned to *pu32.
729 * @param u32Old The old value to *pu32 compare with.
730 */
731#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
732DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
733#else
734DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
735{
736# if RT_INLINE_ASM_GNU_STYLE
737 uint8_t u8Ret;
738 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
739 "setz %1\n\t"
740 : "=m" (*pu32),
741 "=qm" (u8Ret),
742 "=a" (u32Old)
743 : "r" (u32New),
744 "2" (u32Old),
745 "m" (*pu32));
746 return (bool)u8Ret;
747
748# elif RT_INLINE_ASM_USES_INTRIN
749 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
750
751# else
752 uint32_t u32Ret;
753 __asm
754 {
755# ifdef RT_ARCH_AMD64
756 mov rdx, [pu32]
757# else
758 mov edx, [pu32]
759# endif
760 mov eax, [u32Old]
761 mov ecx, [u32New]
762# ifdef RT_ARCH_AMD64
763 lock cmpxchg [rdx], ecx
764# else
765 lock cmpxchg [edx], ecx
766# endif
767 setz al
768 movzx eax, al
769 mov [u32Ret], eax
770 }
771 return !!u32Ret;
772# endif
773}
774#endif
775
776
777/**
778 * Atomically Compare and Exchange a signed 32-bit value, ordered.
779 *
780 * @returns true if xchg was done.
781 * @returns false if xchg wasn't done.
782 *
783 * @param pi32 Pointer to the value to update.
784 * @param i32New The new value to assigned to *pi32.
785 * @param i32Old The old value to *pi32 compare with.
786 */
787DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
788{
789 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
790}
791
792
793/**
794 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
795 *
796 * @returns true if xchg was done.
797 * @returns false if xchg wasn't done.
798 *
799 * @param pu64 Pointer to the 64-bit variable to update.
800 * @param u64New The 64-bit value to assign to *pu64.
801 * @param u64Old The value to compare with.
802 */
803#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
804 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
805DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
806#else
807DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
808{
809# if RT_INLINE_ASM_USES_INTRIN
810 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
811
812# elif defined(RT_ARCH_AMD64)
813# if RT_INLINE_ASM_GNU_STYLE
814 uint8_t u8Ret;
815 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
816 "setz %1\n\t"
817 : "=m" (*pu64),
818 "=qm" (u8Ret),
819 "=a" (u64Old)
820 : "r" (u64New),
821 "2" (u64Old),
822 "m" (*pu64));
823 return (bool)u8Ret;
824# else
825 bool fRet;
826 __asm
827 {
828 mov rdx, [pu32]
829 mov rax, [u64Old]
830 mov rcx, [u64New]
831 lock cmpxchg [rdx], rcx
832 setz al
833 mov [fRet], al
834 }
835 return fRet;
836# endif
837# else /* !RT_ARCH_AMD64 */
838 uint32_t u32Ret;
839# if RT_INLINE_ASM_GNU_STYLE
840# if defined(PIC) || defined(__PIC__)
841 uint32_t u32EBX = (uint32_t)u64New;
842 uint32_t u32Spill;
843 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
844 "lock; cmpxchg8b (%6)\n\t"
845 "setz %%al\n\t"
846 "movl %4, %%ebx\n\t"
847 "movzbl %%al, %%eax\n\t"
848 : "=a" (u32Ret),
849 "=d" (u32Spill),
850# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
851 "+m" (*pu64)
852# else
853 "=m" (*pu64)
854# endif
855 : "A" (u64Old),
856 "m" ( u32EBX ),
857 "c" ( (uint32_t)(u64New >> 32) ),
858 "S" (pu64));
859# else /* !PIC */
860 uint32_t u32Spill;
861 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
862 "setz %%al\n\t"
863 "movzbl %%al, %%eax\n\t"
864 : "=a" (u32Ret),
865 "=d" (u32Spill),
866 "+m" (*pu64)
867 : "A" (u64Old),
868 "b" ( (uint32_t)u64New ),
869 "c" ( (uint32_t)(u64New >> 32) ));
870# endif
871 return (bool)u32Ret;
872# else
873 __asm
874 {
875 mov ebx, dword ptr [u64New]
876 mov ecx, dword ptr [u64New + 4]
877 mov edi, [pu64]
878 mov eax, dword ptr [u64Old]
879 mov edx, dword ptr [u64Old + 4]
880 lock cmpxchg8b [edi]
881 setz al
882 movzx eax, al
883 mov dword ptr [u32Ret], eax
884 }
885 return !!u32Ret;
886# endif
887# endif /* !RT_ARCH_AMD64 */
888}
889#endif
890
891
892/**
893 * Atomically Compare and exchange a signed 64-bit value, ordered.
894 *
895 * @returns true if xchg was done.
896 * @returns false if xchg wasn't done.
897 *
898 * @param pi64 Pointer to the 64-bit variable to update.
899 * @param i64 The 64-bit value to assign to *pu64.
900 * @param i64Old The value to compare with.
901 */
902DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
903{
904 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
905}
906
907
908/**
909 * Atomically Compare and Exchange a pointer value, ordered.
910 *
911 * @returns true if xchg was done.
912 * @returns false if xchg wasn't done.
913 *
914 * @param ppv Pointer to the value to update.
915 * @param pvNew The new value to assigned to *ppv.
916 * @param pvOld The old value to *ppv compare with.
917 */
918DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
919{
920#if ARCH_BITS == 32
921 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
922#elif ARCH_BITS == 64
923 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
924#else
925# error "ARCH_BITS is bogus"
926#endif
927}
928
929
930/**
931 * Atomically Compare and Exchange a pointer value, ordered.
932 *
933 * @returns true if xchg was done.
934 * @returns false if xchg wasn't done.
935 *
936 * @param ppv Pointer to the value to update.
937 * @param pvNew The new value to assigned to *ppv.
938 * @param pvOld The old value to *ppv compare with.
939 *
940 * @remarks This is relatively type safe on GCC platforms.
941 */
942#ifdef __GNUC__
943# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
944 __extension__ \
945 ({\
946 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
947 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
948 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
949 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
950 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
951 fMacroRet; \
952 })
953#else
954# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
955 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
956#endif
957
958
959/** @def ASMAtomicCmpXchgHandle
960 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
961 *
962 * @param ph Pointer to the value to update.
963 * @param hNew The new value to assigned to *pu.
964 * @param hOld The old value to *pu compare with.
965 * @param fRc Where to store the result.
966 *
967 * @remarks This doesn't currently work for all handles (like RTFILE).
968 */
969#if HC_ARCH_BITS == 32
970# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
971 do { \
972 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
973 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
974 } while (0)
975#elif HC_ARCH_BITS == 64
976# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
977 do { \
978 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
979 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
980 } while (0)
981#else
982# error HC_ARCH_BITS
983#endif
984
985
986/** @def ASMAtomicCmpXchgSize
987 * Atomically Compare and Exchange a value which size might differ
988 * between platforms or compilers, ordered.
989 *
990 * @param pu Pointer to the value to update.
991 * @param uNew The new value to assigned to *pu.
992 * @param uOld The old value to *pu compare with.
993 * @param fRc Where to store the result.
994 */
995#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
996 do { \
997 switch (sizeof(*(pu))) { \
998 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
999 break; \
1000 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1001 break; \
1002 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1003 (fRc) = false; \
1004 break; \
1005 } \
1006 } while (0)
1007
1008
1009/**
1010 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1011 * passes back old value, ordered.
1012 *
1013 * @returns true if xchg was done.
1014 * @returns false if xchg wasn't done.
1015 *
1016 * @param pu32 Pointer to the value to update.
1017 * @param u32New The new value to assigned to *pu32.
1018 * @param u32Old The old value to *pu32 compare with.
1019 * @param pu32Old Pointer store the old value at.
1020 */
1021#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1022DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1023#else
1024DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1025{
1026# if RT_INLINE_ASM_GNU_STYLE
1027 uint8_t u8Ret;
1028 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1029 "setz %1\n\t"
1030 : "=m" (*pu32),
1031 "=qm" (u8Ret),
1032 "=a" (*pu32Old)
1033 : "r" (u32New),
1034 "a" (u32Old),
1035 "m" (*pu32));
1036 return (bool)u8Ret;
1037
1038# elif RT_INLINE_ASM_USES_INTRIN
1039 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1040
1041# else
1042 uint32_t u32Ret;
1043 __asm
1044 {
1045# ifdef RT_ARCH_AMD64
1046 mov rdx, [pu32]
1047# else
1048 mov edx, [pu32]
1049# endif
1050 mov eax, [u32Old]
1051 mov ecx, [u32New]
1052# ifdef RT_ARCH_AMD64
1053 lock cmpxchg [rdx], ecx
1054 mov rdx, [pu32Old]
1055 mov [rdx], eax
1056# else
1057 lock cmpxchg [edx], ecx
1058 mov edx, [pu32Old]
1059 mov [edx], eax
1060# endif
1061 setz al
1062 movzx eax, al
1063 mov [u32Ret], eax
1064 }
1065 return !!u32Ret;
1066# endif
1067}
1068#endif
1069
1070
1071/**
1072 * Atomically Compare and Exchange a signed 32-bit value, additionally
1073 * passes back old value, ordered.
1074 *
1075 * @returns true if xchg was done.
1076 * @returns false if xchg wasn't done.
1077 *
1078 * @param pi32 Pointer to the value to update.
1079 * @param i32New The new value to assigned to *pi32.
1080 * @param i32Old The old value to *pi32 compare with.
1081 * @param pi32Old Pointer store the old value at.
1082 */
1083DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1084{
1085 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1086}
1087
1088
1089/**
1090 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1091 * passing back old value, ordered.
1092 *
1093 * @returns true if xchg was done.
1094 * @returns false if xchg wasn't done.
1095 *
1096 * @param pu64 Pointer to the 64-bit variable to update.
1097 * @param u64New The 64-bit value to assign to *pu64.
1098 * @param u64Old The value to compare with.
1099 * @param pu64Old Pointer store the old value at.
1100 */
1101#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1102 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1103DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1104#else
1105DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1106{
1107# if RT_INLINE_ASM_USES_INTRIN
1108 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1109
1110# elif defined(RT_ARCH_AMD64)
1111# if RT_INLINE_ASM_GNU_STYLE
1112 uint8_t u8Ret;
1113 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1114 "setz %1\n\t"
1115 : "=m" (*pu64),
1116 "=qm" (u8Ret),
1117 "=a" (*pu64Old)
1118 : "r" (u64New),
1119 "a" (u64Old),
1120 "m" (*pu64));
1121 return (bool)u8Ret;
1122# else
1123 bool fRet;
1124 __asm
1125 {
1126 mov rdx, [pu32]
1127 mov rax, [u64Old]
1128 mov rcx, [u64New]
1129 lock cmpxchg [rdx], rcx
1130 mov rdx, [pu64Old]
1131 mov [rdx], rax
1132 setz al
1133 mov [fRet], al
1134 }
1135 return fRet;
1136# endif
1137# else /* !RT_ARCH_AMD64 */
1138# if RT_INLINE_ASM_GNU_STYLE
1139 uint64_t u64Ret;
1140# if defined(PIC) || defined(__PIC__)
1141 /* NB: this code uses a memory clobber description, because the clean
1142 * solution with an output value for *pu64 makes gcc run out of registers.
1143 * This will cause suboptimal code, and anyone with a better solution is
1144 * welcome to improve this. */
1145 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1146 "lock; cmpxchg8b %3\n\t"
1147 "xchgl %%ebx, %1\n\t"
1148 : "=A" (u64Ret)
1149 : "DS" ((uint32_t)u64New),
1150 "c" ((uint32_t)(u64New >> 32)),
1151 "m" (*pu64),
1152 "0" (u64Old)
1153 : "memory" );
1154# else /* !PIC */
1155 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1156 : "=A" (u64Ret),
1157 "=m" (*pu64)
1158 : "b" ((uint32_t)u64New),
1159 "c" ((uint32_t)(u64New >> 32)),
1160 "m" (*pu64),
1161 "0" (u64Old));
1162# endif
1163 *pu64Old = u64Ret;
1164 return u64Ret == u64Old;
1165# else
1166 uint32_t u32Ret;
1167 __asm
1168 {
1169 mov ebx, dword ptr [u64New]
1170 mov ecx, dword ptr [u64New + 4]
1171 mov edi, [pu64]
1172 mov eax, dword ptr [u64Old]
1173 mov edx, dword ptr [u64Old + 4]
1174 lock cmpxchg8b [edi]
1175 mov ebx, [pu64Old]
1176 mov [ebx], eax
1177 setz al
1178 movzx eax, al
1179 add ebx, 4
1180 mov [ebx], edx
1181 mov dword ptr [u32Ret], eax
1182 }
1183 return !!u32Ret;
1184# endif
1185# endif /* !RT_ARCH_AMD64 */
1186}
1187#endif
1188
1189
1190/**
1191 * Atomically Compare and exchange a signed 64-bit value, additionally
1192 * passing back old value, ordered.
1193 *
1194 * @returns true if xchg was done.
1195 * @returns false if xchg wasn't done.
1196 *
1197 * @param pi64 Pointer to the 64-bit variable to update.
1198 * @param i64 The 64-bit value to assign to *pu64.
1199 * @param i64Old The value to compare with.
1200 * @param pi64Old Pointer store the old value at.
1201 */
1202DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1203{
1204 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1205}
1206
1207/** @def ASMAtomicCmpXchgExHandle
1208 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1209 *
1210 * @param ph Pointer to the value to update.
1211 * @param hNew The new value to assigned to *pu.
1212 * @param hOld The old value to *pu compare with.
1213 * @param fRc Where to store the result.
1214 * @param phOldVal Pointer to where to store the old value.
1215 *
1216 * @remarks This doesn't currently work for all handles (like RTFILE).
1217 */
1218#if HC_ARCH_BITS == 32
1219# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1220 do { \
1221 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1222 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1223 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1224 } while (0)
1225#elif HC_ARCH_BITS == 64
1226# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1227 do { \
1228 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1229 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1230 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1231 } while (0)
1232#else
1233# error HC_ARCH_BITS
1234#endif
1235
1236
1237/** @def ASMAtomicCmpXchgExSize
1238 * Atomically Compare and Exchange a value which size might differ
1239 * between platforms or compilers. Additionally passes back old value.
1240 *
1241 * @param pu Pointer to the value to update.
1242 * @param uNew The new value to assigned to *pu.
1243 * @param uOld The old value to *pu compare with.
1244 * @param fRc Where to store the result.
1245 * @param puOldVal Pointer to where to store the old value.
1246 */
1247#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1248 do { \
1249 switch (sizeof(*(pu))) { \
1250 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1251 break; \
1252 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1253 break; \
1254 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1255 (fRc) = false; \
1256 (uOldVal) = 0; \
1257 break; \
1258 } \
1259 } while (0)
1260
1261
1262/**
1263 * Atomically Compare and Exchange a pointer value, additionally
1264 * passing back old value, ordered.
1265 *
1266 * @returns true if xchg was done.
1267 * @returns false if xchg wasn't done.
1268 *
1269 * @param ppv Pointer to the value to update.
1270 * @param pvNew The new value to assigned to *ppv.
1271 * @param pvOld The old value to *ppv compare with.
1272 * @param ppvOld Pointer store the old value at.
1273 */
1274DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1275{
1276#if ARCH_BITS == 32
1277 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1278#elif ARCH_BITS == 64
1279 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1280#else
1281# error "ARCH_BITS is bogus"
1282#endif
1283}
1284
1285
1286/**
1287 * Atomically Compare and Exchange a pointer value, additionally
1288 * passing back old value, ordered.
1289 *
1290 * @returns true if xchg was done.
1291 * @returns false if xchg wasn't done.
1292 *
1293 * @param ppv Pointer to the value to update.
1294 * @param pvNew The new value to assigned to *ppv.
1295 * @param pvOld The old value to *ppv compare with.
1296 * @param ppvOld Pointer store the old value at.
1297 *
1298 * @remarks This is relatively type safe on GCC platforms.
1299 */
1300#ifdef __GNUC__
1301# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1302 __extension__ \
1303 ({\
1304 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1305 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1306 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1307 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1308 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1309 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1310 (void **)ppvOldTypeChecked); \
1311 fMacroRet; \
1312 })
1313#else
1314# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1315 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1316#endif
1317
1318
1319/**
1320 * Serialize Instruction.
1321 */
1322#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1323DECLASM(void) ASMSerializeInstruction(void);
1324#else
1325DECLINLINE(void) ASMSerializeInstruction(void)
1326{
1327# if RT_INLINE_ASM_GNU_STYLE
1328 RTCCUINTREG xAX = 0;
1329# ifdef RT_ARCH_AMD64
1330 __asm__ ("cpuid"
1331 : "=a" (xAX)
1332 : "0" (xAX)
1333 : "rbx", "rcx", "rdx");
1334# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1335 __asm__ ("push %%ebx\n\t"
1336 "cpuid\n\t"
1337 "pop %%ebx\n\t"
1338 : "=a" (xAX)
1339 : "0" (xAX)
1340 : "ecx", "edx");
1341# else
1342 __asm__ ("cpuid"
1343 : "=a" (xAX)
1344 : "0" (xAX)
1345 : "ebx", "ecx", "edx");
1346# endif
1347
1348# elif RT_INLINE_ASM_USES_INTRIN
1349 int aInfo[4];
1350 __cpuid(aInfo, 0);
1351
1352# else
1353 __asm
1354 {
1355 push ebx
1356 xor eax, eax
1357 cpuid
1358 pop ebx
1359 }
1360# endif
1361}
1362#endif
1363
1364
1365/**
1366 * Memory fence, waits for any pending writes and reads to complete.
1367 */
1368DECLINLINE(void) ASMMemoryFence(void)
1369{
1370 /** @todo use mfence? check if all cpus we care for support it. */
1371 uint32_t volatile u32;
1372 ASMAtomicXchgU32(&u32, 0);
1373}
1374
1375
1376/**
1377 * Write fence, waits for any pending writes to complete.
1378 */
1379DECLINLINE(void) ASMWriteFence(void)
1380{
1381 /** @todo use sfence? check if all cpus we care for support it. */
1382 ASMMemoryFence();
1383}
1384
1385
1386/**
1387 * Read fence, waits for any pending reads to complete.
1388 */
1389DECLINLINE(void) ASMReadFence(void)
1390{
1391 /** @todo use lfence? check if all cpus we care for support it. */
1392 ASMMemoryFence();
1393}
1394
1395
1396/**
1397 * Atomically reads an unsigned 8-bit value, ordered.
1398 *
1399 * @returns Current *pu8 value
1400 * @param pu8 Pointer to the 8-bit variable to read.
1401 */
1402DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1403{
1404 ASMMemoryFence();
1405 return *pu8; /* byte reads are atomic on x86 */
1406}
1407
1408
1409/**
1410 * Atomically reads an unsigned 8-bit value, unordered.
1411 *
1412 * @returns Current *pu8 value
1413 * @param pu8 Pointer to the 8-bit variable to read.
1414 */
1415DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1416{
1417 return *pu8; /* byte reads are atomic on x86 */
1418}
1419
1420
1421/**
1422 * Atomically reads a signed 8-bit value, ordered.
1423 *
1424 * @returns Current *pi8 value
1425 * @param pi8 Pointer to the 8-bit variable to read.
1426 */
1427DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1428{
1429 ASMMemoryFence();
1430 return *pi8; /* byte reads are atomic on x86 */
1431}
1432
1433
1434/**
1435 * Atomically reads a signed 8-bit value, unordered.
1436 *
1437 * @returns Current *pi8 value
1438 * @param pi8 Pointer to the 8-bit variable to read.
1439 */
1440DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1441{
1442 return *pi8; /* byte reads are atomic on x86 */
1443}
1444
1445
1446/**
1447 * Atomically reads an unsigned 16-bit value, ordered.
1448 *
1449 * @returns Current *pu16 value
1450 * @param pu16 Pointer to the 16-bit variable to read.
1451 */
1452DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1453{
1454 ASMMemoryFence();
1455 Assert(!((uintptr_t)pu16 & 1));
1456 return *pu16;
1457}
1458
1459
1460/**
1461 * Atomically reads an unsigned 16-bit value, unordered.
1462 *
1463 * @returns Current *pu16 value
1464 * @param pu16 Pointer to the 16-bit variable to read.
1465 */
1466DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1467{
1468 Assert(!((uintptr_t)pu16 & 1));
1469 return *pu16;
1470}
1471
1472
1473/**
1474 * Atomically reads a signed 16-bit value, ordered.
1475 *
1476 * @returns Current *pi16 value
1477 * @param pi16 Pointer to the 16-bit variable to read.
1478 */
1479DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1480{
1481 ASMMemoryFence();
1482 Assert(!((uintptr_t)pi16 & 1));
1483 return *pi16;
1484}
1485
1486
1487/**
1488 * Atomically reads a signed 16-bit value, unordered.
1489 *
1490 * @returns Current *pi16 value
1491 * @param pi16 Pointer to the 16-bit variable to read.
1492 */
1493DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1494{
1495 Assert(!((uintptr_t)pi16 & 1));
1496 return *pi16;
1497}
1498
1499
1500/**
1501 * Atomically reads an unsigned 32-bit value, ordered.
1502 *
1503 * @returns Current *pu32 value
1504 * @param pu32 Pointer to the 32-bit variable to read.
1505 */
1506DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1507{
1508 ASMMemoryFence();
1509 Assert(!((uintptr_t)pu32 & 3));
1510 return *pu32;
1511}
1512
1513
1514/**
1515 * Atomically reads an unsigned 32-bit value, unordered.
1516 *
1517 * @returns Current *pu32 value
1518 * @param pu32 Pointer to the 32-bit variable to read.
1519 */
1520DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1521{
1522 Assert(!((uintptr_t)pu32 & 3));
1523 return *pu32;
1524}
1525
1526
1527/**
1528 * Atomically reads a signed 32-bit value, ordered.
1529 *
1530 * @returns Current *pi32 value
1531 * @param pi32 Pointer to the 32-bit variable to read.
1532 */
1533DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1534{
1535 ASMMemoryFence();
1536 Assert(!((uintptr_t)pi32 & 3));
1537 return *pi32;
1538}
1539
1540
1541/**
1542 * Atomically reads a signed 32-bit value, unordered.
1543 *
1544 * @returns Current *pi32 value
1545 * @param pi32 Pointer to the 32-bit variable to read.
1546 */
1547DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1548{
1549 Assert(!((uintptr_t)pi32 & 3));
1550 return *pi32;
1551}
1552
1553
1554/**
1555 * Atomically reads an unsigned 64-bit value, ordered.
1556 *
1557 * @returns Current *pu64 value
1558 * @param pu64 Pointer to the 64-bit variable to read.
1559 * The memory pointed to must be writable.
1560 * @remark This will fault if the memory is read-only!
1561 */
1562#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1563 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1564DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1565#else
1566DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1567{
1568 uint64_t u64;
1569# ifdef RT_ARCH_AMD64
1570 Assert(!((uintptr_t)pu64 & 7));
1571/*# if RT_INLINE_ASM_GNU_STYLE
1572 __asm__ __volatile__( "mfence\n\t"
1573 "movq %1, %0\n\t"
1574 : "=r" (u64)
1575 : "m" (*pu64));
1576# else
1577 __asm
1578 {
1579 mfence
1580 mov rdx, [pu64]
1581 mov rax, [rdx]
1582 mov [u64], rax
1583 }
1584# endif*/
1585 ASMMemoryFence();
1586 u64 = *pu64;
1587# else /* !RT_ARCH_AMD64 */
1588# if RT_INLINE_ASM_GNU_STYLE
1589# if defined(PIC) || defined(__PIC__)
1590 uint32_t u32EBX = 0;
1591 Assert(!((uintptr_t)pu64 & 7));
1592 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1593 "lock; cmpxchg8b (%5)\n\t"
1594 "movl %3, %%ebx\n\t"
1595 : "=A" (u64),
1596# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1597 "+m" (*pu64)
1598# else
1599 "=m" (*pu64)
1600# endif
1601 : "0" (0ULL),
1602 "m" (u32EBX),
1603 "c" (0),
1604 "S" (pu64));
1605# else /* !PIC */
1606 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1607 : "=A" (u64),
1608 "+m" (*pu64)
1609 : "0" (0ULL),
1610 "b" (0),
1611 "c" (0));
1612# endif
1613# else
1614 Assert(!((uintptr_t)pu64 & 7));
1615 __asm
1616 {
1617 xor eax, eax
1618 xor edx, edx
1619 mov edi, pu64
1620 xor ecx, ecx
1621 xor ebx, ebx
1622 lock cmpxchg8b [edi]
1623 mov dword ptr [u64], eax
1624 mov dword ptr [u64 + 4], edx
1625 }
1626# endif
1627# endif /* !RT_ARCH_AMD64 */
1628 return u64;
1629}
1630#endif
1631
1632
1633/**
1634 * Atomically reads an unsigned 64-bit value, unordered.
1635 *
1636 * @returns Current *pu64 value
1637 * @param pu64 Pointer to the 64-bit variable to read.
1638 * The memory pointed to must be writable.
1639 * @remark This will fault if the memory is read-only!
1640 */
1641#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1642 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1643DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1644#else
1645DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1646{
1647 uint64_t u64;
1648# ifdef RT_ARCH_AMD64
1649 Assert(!((uintptr_t)pu64 & 7));
1650/*# if RT_INLINE_ASM_GNU_STYLE
1651 Assert(!((uintptr_t)pu64 & 7));
1652 __asm__ __volatile__("movq %1, %0\n\t"
1653 : "=r" (u64)
1654 : "m" (*pu64));
1655# else
1656 __asm
1657 {
1658 mov rdx, [pu64]
1659 mov rax, [rdx]
1660 mov [u64], rax
1661 }
1662# endif */
1663 u64 = *pu64;
1664# else /* !RT_ARCH_AMD64 */
1665# if RT_INLINE_ASM_GNU_STYLE
1666# if defined(PIC) || defined(__PIC__)
1667 uint32_t u32EBX = 0;
1668 uint32_t u32Spill;
1669 Assert(!((uintptr_t)pu64 & 7));
1670 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1671 "xor %%ecx,%%ecx\n\t"
1672 "xor %%edx,%%edx\n\t"
1673 "xchgl %%ebx, %3\n\t"
1674 "lock; cmpxchg8b (%4)\n\t"
1675 "movl %3, %%ebx\n\t"
1676 : "=A" (u64),
1677# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1678 "+m" (*pu64),
1679# else
1680 "=m" (*pu64),
1681# endif
1682 "=c" (u32Spill)
1683 : "m" (u32EBX),
1684 "S" (pu64));
1685# else /* !PIC */
1686 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1687 : "=A" (u64),
1688 "+m" (*pu64)
1689 : "0" (0ULL),
1690 "b" (0),
1691 "c" (0));
1692# endif
1693# else
1694 Assert(!((uintptr_t)pu64 & 7));
1695 __asm
1696 {
1697 xor eax, eax
1698 xor edx, edx
1699 mov edi, pu64
1700 xor ecx, ecx
1701 xor ebx, ebx
1702 lock cmpxchg8b [edi]
1703 mov dword ptr [u64], eax
1704 mov dword ptr [u64 + 4], edx
1705 }
1706# endif
1707# endif /* !RT_ARCH_AMD64 */
1708 return u64;
1709}
1710#endif
1711
1712
1713/**
1714 * Atomically reads a signed 64-bit value, ordered.
1715 *
1716 * @returns Current *pi64 value
1717 * @param pi64 Pointer to the 64-bit variable to read.
1718 * The memory pointed to must be writable.
1719 * @remark This will fault if the memory is read-only!
1720 */
1721DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1722{
1723 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1724}
1725
1726
1727/**
1728 * Atomically reads a signed 64-bit value, unordered.
1729 *
1730 * @returns Current *pi64 value
1731 * @param pi64 Pointer to the 64-bit variable to read.
1732 * The memory pointed to must be writable.
1733 * @remark This will fault if the memory is read-only!
1734 */
1735DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1736{
1737 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1738}
1739
1740
1741/**
1742 * Atomically reads a size_t value, ordered.
1743 *
1744 * @returns Current *pcb value
1745 * @param pcb Pointer to the size_t variable to read.
1746 */
1747DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1748{
1749#if ARCH_BITS == 64
1750 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1751#elif ARCH_BITS == 32
1752 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1753#else
1754# error "Unsupported ARCH_BITS value"
1755#endif
1756}
1757
1758
1759/**
1760 * Atomically reads a size_t value, unordered.
1761 *
1762 * @returns Current *pcb value
1763 * @param pcb Pointer to the size_t variable to read.
1764 */
1765DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1766{
1767#if ARCH_BITS == 64
1768 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1769#elif ARCH_BITS == 32
1770 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1771#else
1772# error "Unsupported ARCH_BITS value"
1773#endif
1774}
1775
1776
1777/**
1778 * Atomically reads a pointer value, ordered.
1779 *
1780 * @returns Current *pv value
1781 * @param ppv Pointer to the pointer variable to read.
1782 *
1783 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1784 * requires less typing (no casts).
1785 */
1786DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1787{
1788#if ARCH_BITS == 32
1789 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1790#elif ARCH_BITS == 64
1791 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1792#else
1793# error "ARCH_BITS is bogus"
1794#endif
1795}
1796
1797/**
1798 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1799 *
1800 * @returns Current *pv value
1801 * @param ppv Pointer to the pointer variable to read.
1802 * @param Type The type of *ppv, sans volatile.
1803 */
1804#ifdef __GNUC__
1805# define ASMAtomicReadPtrT(ppv, Type) \
1806 __extension__ \
1807 ({\
1808 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1809 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1810 pvTypeChecked; \
1811 })
1812#else
1813# define ASMAtomicReadPtrT(ppv, Type) \
1814 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1815#endif
1816
1817
1818/**
1819 * Atomically reads a pointer value, unordered.
1820 *
1821 * @returns Current *pv value
1822 * @param ppv Pointer to the pointer variable to read.
1823 *
1824 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1825 * requires less typing (no casts).
1826 */
1827DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1828{
1829#if ARCH_BITS == 32
1830 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1831#elif ARCH_BITS == 64
1832 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1833#else
1834# error "ARCH_BITS is bogus"
1835#endif
1836}
1837
1838
1839/**
1840 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
1841 *
1842 * @returns Current *pv value
1843 * @param ppv Pointer to the pointer variable to read.
1844 * @param Type The type of *ppv, sans volatile.
1845 */
1846#ifdef __GNUC__
1847# define ASMAtomicUoReadPtrT(ppv, Type) \
1848 __extension__ \
1849 ({\
1850 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1851 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
1852 pvTypeChecked; \
1853 })
1854#else
1855# define ASMAtomicUoReadPtrT(ppv, Type) \
1856 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
1857#endif
1858
1859
1860/**
1861 * Atomically reads a boolean value, ordered.
1862 *
1863 * @returns Current *pf value
1864 * @param pf Pointer to the boolean variable to read.
1865 */
1866DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
1867{
1868 ASMMemoryFence();
1869 return *pf; /* byte reads are atomic on x86 */
1870}
1871
1872
1873/**
1874 * Atomically reads a boolean value, unordered.
1875 *
1876 * @returns Current *pf value
1877 * @param pf Pointer to the boolean variable to read.
1878 */
1879DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
1880{
1881 return *pf; /* byte reads are atomic on x86 */
1882}
1883
1884
1885/**
1886 * Atomically read a typical IPRT handle value, ordered.
1887 *
1888 * @param ph Pointer to the handle variable to read.
1889 * @param phRes Where to store the result.
1890 *
1891 * @remarks This doesn't currently work for all handles (like RTFILE).
1892 */
1893#if HC_ARCH_BITS == 32
1894# define ASMAtomicReadHandle(ph, phRes) \
1895 do { \
1896 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1897 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1898 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
1899 } while (0)
1900#elif HC_ARCH_BITS == 64
1901# define ASMAtomicReadHandle(ph, phRes) \
1902 do { \
1903 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1904 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1905 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
1906 } while (0)
1907#else
1908# error HC_ARCH_BITS
1909#endif
1910
1911
1912/**
1913 * Atomically read a typical IPRT handle value, unordered.
1914 *
1915 * @param ph Pointer to the handle variable to read.
1916 * @param phRes Where to store the result.
1917 *
1918 * @remarks This doesn't currently work for all handles (like RTFILE).
1919 */
1920#if HC_ARCH_BITS == 32
1921# define ASMAtomicUoReadHandle(ph, phRes) \
1922 do { \
1923 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1924 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1925 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
1926 } while (0)
1927#elif HC_ARCH_BITS == 64
1928# define ASMAtomicUoReadHandle(ph, phRes) \
1929 do { \
1930 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1931 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1932 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
1933 } while (0)
1934#else
1935# error HC_ARCH_BITS
1936#endif
1937
1938
1939/**
1940 * Atomically read a value which size might differ
1941 * between platforms or compilers, ordered.
1942 *
1943 * @param pu Pointer to the variable to read.
1944 * @param puRes Where to store the result.
1945 */
1946#define ASMAtomicReadSize(pu, puRes) \
1947 do { \
1948 switch (sizeof(*(pu))) { \
1949 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1950 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
1951 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
1952 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
1953 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1954 } \
1955 } while (0)
1956
1957
1958/**
1959 * Atomically read a value which size might differ
1960 * between platforms or compilers, unordered.
1961 *
1962 * @param pu Pointer to the variable to read.
1963 * @param puRes Where to store the result.
1964 */
1965#define ASMAtomicUoReadSize(pu, puRes) \
1966 do { \
1967 switch (sizeof(*(pu))) { \
1968 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1969 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
1970 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
1971 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
1972 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1973 } \
1974 } while (0)
1975
1976
1977/**
1978 * Atomically writes an unsigned 8-bit value, ordered.
1979 *
1980 * @param pu8 Pointer to the 8-bit variable.
1981 * @param u8 The 8-bit value to assign to *pu8.
1982 */
1983DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
1984{
1985 ASMAtomicXchgU8(pu8, u8);
1986}
1987
1988
1989/**
1990 * Atomically writes an unsigned 8-bit value, unordered.
1991 *
1992 * @param pu8 Pointer to the 8-bit variable.
1993 * @param u8 The 8-bit value to assign to *pu8.
1994 */
1995DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
1996{
1997 *pu8 = u8; /* byte writes are atomic on x86 */
1998}
1999
2000
2001/**
2002 * Atomically writes a signed 8-bit value, ordered.
2003 *
2004 * @param pi8 Pointer to the 8-bit variable to read.
2005 * @param i8 The 8-bit value to assign to *pi8.
2006 */
2007DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2008{
2009 ASMAtomicXchgS8(pi8, i8);
2010}
2011
2012
2013/**
2014 * Atomically writes a signed 8-bit value, unordered.
2015 *
2016 * @param pi8 Pointer to the 8-bit variable to write.
2017 * @param i8 The 8-bit value to assign to *pi8.
2018 */
2019DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2020{
2021 *pi8 = i8; /* byte writes are atomic on x86 */
2022}
2023
2024
2025/**
2026 * Atomically writes an unsigned 16-bit value, ordered.
2027 *
2028 * @param pu16 Pointer to the 16-bit variable to write.
2029 * @param u16 The 16-bit value to assign to *pu16.
2030 */
2031DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2032{
2033 ASMAtomicXchgU16(pu16, u16);
2034}
2035
2036
2037/**
2038 * Atomically writes an unsigned 16-bit value, unordered.
2039 *
2040 * @param pu16 Pointer to the 16-bit variable to write.
2041 * @param u16 The 16-bit value to assign to *pu16.
2042 */
2043DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2044{
2045 Assert(!((uintptr_t)pu16 & 1));
2046 *pu16 = u16;
2047}
2048
2049
2050/**
2051 * Atomically writes a signed 16-bit value, ordered.
2052 *
2053 * @param pi16 Pointer to the 16-bit variable to write.
2054 * @param i16 The 16-bit value to assign to *pi16.
2055 */
2056DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2057{
2058 ASMAtomicXchgS16(pi16, i16);
2059}
2060
2061
2062/**
2063 * Atomically writes a signed 16-bit value, unordered.
2064 *
2065 * @param pi16 Pointer to the 16-bit variable to write.
2066 * @param i16 The 16-bit value to assign to *pi16.
2067 */
2068DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2069{
2070 Assert(!((uintptr_t)pi16 & 1));
2071 *pi16 = i16;
2072}
2073
2074
2075/**
2076 * Atomically writes an unsigned 32-bit value, ordered.
2077 *
2078 * @param pu32 Pointer to the 32-bit variable to write.
2079 * @param u32 The 32-bit value to assign to *pu32.
2080 */
2081DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2082{
2083 ASMAtomicXchgU32(pu32, u32);
2084}
2085
2086
2087/**
2088 * Atomically writes an unsigned 32-bit value, unordered.
2089 *
2090 * @param pu32 Pointer to the 32-bit variable to write.
2091 * @param u32 The 32-bit value to assign to *pu32.
2092 */
2093DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2094{
2095 Assert(!((uintptr_t)pu32 & 3));
2096 *pu32 = u32;
2097}
2098
2099
2100/**
2101 * Atomically writes a signed 32-bit value, ordered.
2102 *
2103 * @param pi32 Pointer to the 32-bit variable to write.
2104 * @param i32 The 32-bit value to assign to *pi32.
2105 */
2106DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2107{
2108 ASMAtomicXchgS32(pi32, i32);
2109}
2110
2111
2112/**
2113 * Atomically writes a signed 32-bit value, unordered.
2114 *
2115 * @param pi32 Pointer to the 32-bit variable to write.
2116 * @param i32 The 32-bit value to assign to *pi32.
2117 */
2118DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2119{
2120 Assert(!((uintptr_t)pi32 & 3));
2121 *pi32 = i32;
2122}
2123
2124
2125/**
2126 * Atomically writes an unsigned 64-bit value, ordered.
2127 *
2128 * @param pu64 Pointer to the 64-bit variable to write.
2129 * @param u64 The 64-bit value to assign to *pu64.
2130 */
2131DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2132{
2133 ASMAtomicXchgU64(pu64, u64);
2134}
2135
2136
2137/**
2138 * Atomically writes an unsigned 64-bit value, unordered.
2139 *
2140 * @param pu64 Pointer to the 64-bit variable to write.
2141 * @param u64 The 64-bit value to assign to *pu64.
2142 */
2143DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2144{
2145 Assert(!((uintptr_t)pu64 & 7));
2146#if ARCH_BITS == 64
2147 *pu64 = u64;
2148#else
2149 ASMAtomicXchgU64(pu64, u64);
2150#endif
2151}
2152
2153
2154/**
2155 * Atomically writes a signed 64-bit value, ordered.
2156 *
2157 * @param pi64 Pointer to the 64-bit variable to write.
2158 * @param i64 The 64-bit value to assign to *pi64.
2159 */
2160DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2161{
2162 ASMAtomicXchgS64(pi64, i64);
2163}
2164
2165
2166/**
2167 * Atomically writes a signed 64-bit value, unordered.
2168 *
2169 * @param pi64 Pointer to the 64-bit variable to write.
2170 * @param i64 The 64-bit value to assign to *pi64.
2171 */
2172DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2173{
2174 Assert(!((uintptr_t)pi64 & 7));
2175#if ARCH_BITS == 64
2176 *pi64 = i64;
2177#else
2178 ASMAtomicXchgS64(pi64, i64);
2179#endif
2180}
2181
2182
2183/**
2184 * Atomically writes a boolean value, unordered.
2185 *
2186 * @param pf Pointer to the boolean variable to write.
2187 * @param f The boolean value to assign to *pf.
2188 */
2189DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2190{
2191 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2192}
2193
2194
2195/**
2196 * Atomically writes a boolean value, unordered.
2197 *
2198 * @param pf Pointer to the boolean variable to write.
2199 * @param f The boolean value to assign to *pf.
2200 */
2201DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2202{
2203 *pf = f; /* byte writes are atomic on x86 */
2204}
2205
2206
2207/**
2208 * Atomically writes a pointer value, ordered.
2209 *
2210 * @param ppv Pointer to the pointer variable to write.
2211 * @param pv The pointer value to assign to *ppv.
2212 */
2213DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2214{
2215#if ARCH_BITS == 32
2216 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2217#elif ARCH_BITS == 64
2218 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2219#else
2220# error "ARCH_BITS is bogus"
2221#endif
2222}
2223
2224
2225/**
2226 * Atomically writes a pointer value, ordered.
2227 *
2228 * @param ppv Pointer to the pointer variable to write.
2229 * @param pv The pointer value to assign to *ppv. If NULL use
2230 * ASMAtomicWriteNullPtr or you'll land in trouble.
2231 *
2232 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2233 * NULL.
2234 */
2235#ifdef __GNUC__
2236# define ASMAtomicWritePtr(ppv, pv) \
2237 do \
2238 { \
2239 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2240 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2241 \
2242 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2243 AssertCompile(sizeof(pv) == sizeof(void *)); \
2244 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2245 \
2246 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2247 } while (0)
2248#else
2249# define ASMAtomicWritePtr(ppv, pv) \
2250 do \
2251 { \
2252 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2253 AssertCompile(sizeof(pv) == sizeof(void *)); \
2254 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2255 \
2256 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2257 } while (0)
2258#endif
2259
2260
2261/**
2262 * Atomically sets a pointer to NULL, ordered.
2263 *
2264 * @param ppv Pointer to the pointer variable that should be set to NULL.
2265 *
2266 * @remarks This is relatively type safe on GCC platforms.
2267 */
2268#ifdef __GNUC__
2269# define ASMAtomicWriteNullPtr(ppv) \
2270 do \
2271 { \
2272 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2273 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2274 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2275 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2276 } while (0)
2277#else
2278# define ASMAtomicWriteNullPtr(ppv) \
2279 do \
2280 { \
2281 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2282 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2283 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2284 } while (0)
2285#endif
2286
2287
2288/**
2289 * Atomically writes a pointer value, unordered.
2290 *
2291 * @returns Current *pv value
2292 * @param ppv Pointer to the pointer variable.
2293 * @param pv The pointer value to assign to *ppv. If NULL use
2294 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2295 *
2296 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2297 * NULL.
2298 */
2299#ifdef __GNUC__
2300# define ASMAtomicUoWritePtr(ppv, pv) \
2301 do \
2302 { \
2303 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2304 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2305 \
2306 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2307 AssertCompile(sizeof(pv) == sizeof(void *)); \
2308 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2309 \
2310 *(ppvTypeChecked) = pvTypeChecked; \
2311 } while (0)
2312#else
2313# define ASMAtomicUoWritePtr(ppv, pv) \
2314 do \
2315 { \
2316 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2317 AssertCompile(sizeof(pv) == sizeof(void *)); \
2318 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2319 *(ppv) = pv; \
2320 } while (0)
2321#endif
2322
2323
2324/**
2325 * Atomically sets a pointer to NULL, unordered.
2326 *
2327 * @param ppv Pointer to the pointer variable that should be set to NULL.
2328 *
2329 * @remarks This is relatively type safe on GCC platforms.
2330 */
2331#ifdef __GNUC__
2332# define ASMAtomicUoWriteNullPtr(ppv) \
2333 do \
2334 { \
2335 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2336 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2337 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2338 *(ppvTypeChecked) = NULL; \
2339 } while (0)
2340#else
2341# define ASMAtomicUoWriteNullPtr(ppv) \
2342 do \
2343 { \
2344 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2345 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2346 *(ppv) = NULL; \
2347 } while (0)
2348#endif
2349
2350
2351/**
2352 * Atomically write a typical IPRT handle value, ordered.
2353 *
2354 * @param ph Pointer to the variable to update.
2355 * @param hNew The value to assign to *ph.
2356 *
2357 * @remarks This doesn't currently work for all handles (like RTFILE).
2358 */
2359#if HC_ARCH_BITS == 32
2360# define ASMAtomicWriteHandle(ph, hNew) \
2361 do { \
2362 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2363 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2364 } while (0)
2365#elif HC_ARCH_BITS == 64
2366# define ASMAtomicWriteHandle(ph, hNew) \
2367 do { \
2368 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2369 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2370 } while (0)
2371#else
2372# error HC_ARCH_BITS
2373#endif
2374
2375
2376/**
2377 * Atomically write a typical IPRT handle value, unordered.
2378 *
2379 * @param ph Pointer to the variable to update.
2380 * @param hNew The value to assign to *ph.
2381 *
2382 * @remarks This doesn't currently work for all handles (like RTFILE).
2383 */
2384#if HC_ARCH_BITS == 32
2385# define ASMAtomicUoWriteHandle(ph, hNew) \
2386 do { \
2387 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2388 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2389 } while (0)
2390#elif HC_ARCH_BITS == 64
2391# define ASMAtomicUoWriteHandle(ph, hNew) \
2392 do { \
2393 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2394 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2395 } while (0)
2396#else
2397# error HC_ARCH_BITS
2398#endif
2399
2400
2401/**
2402 * Atomically write a value which size might differ
2403 * between platforms or compilers, ordered.
2404 *
2405 * @param pu Pointer to the variable to update.
2406 * @param uNew The value to assign to *pu.
2407 */
2408#define ASMAtomicWriteSize(pu, uNew) \
2409 do { \
2410 switch (sizeof(*(pu))) { \
2411 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2412 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2413 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2414 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2415 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2416 } \
2417 } while (0)
2418
2419/**
2420 * Atomically write a value which size might differ
2421 * between platforms or compilers, unordered.
2422 *
2423 * @param pu Pointer to the variable to update.
2424 * @param uNew The value to assign to *pu.
2425 */
2426#define ASMAtomicUoWriteSize(pu, uNew) \
2427 do { \
2428 switch (sizeof(*(pu))) { \
2429 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2430 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2431 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2432 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2433 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2434 } \
2435 } while (0)
2436
2437
2438
2439/**
2440 * Atomically exchanges and adds to a 32-bit value, ordered.
2441 *
2442 * @returns The old value.
2443 * @param pu32 Pointer to the value.
2444 * @param u32 Number to add.
2445 */
2446#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2447DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2448#else
2449DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2450{
2451# if RT_INLINE_ASM_USES_INTRIN
2452 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2453 return u32;
2454
2455# elif RT_INLINE_ASM_GNU_STYLE
2456 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2457 : "=r" (u32),
2458 "=m" (*pu32)
2459 : "0" (u32),
2460 "m" (*pu32)
2461 : "memory");
2462 return u32;
2463# else
2464 __asm
2465 {
2466 mov eax, [u32]
2467# ifdef RT_ARCH_AMD64
2468 mov rdx, [pu32]
2469 lock xadd [rdx], eax
2470# else
2471 mov edx, [pu32]
2472 lock xadd [edx], eax
2473# endif
2474 mov [u32], eax
2475 }
2476 return u32;
2477# endif
2478}
2479#endif
2480
2481
2482/**
2483 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2484 *
2485 * @returns The old value.
2486 * @param pi32 Pointer to the value.
2487 * @param i32 Number to add.
2488 */
2489DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2490{
2491 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2492}
2493
2494
2495/**
2496 * Atomically exchanges and adds to a 64-bit value, ordered.
2497 *
2498 * @returns The old value.
2499 * @param pu64 Pointer to the value.
2500 * @param u64 Number to add.
2501 */
2502#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2503DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2504#else
2505DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2506{
2507# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2508 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2509 return u64;
2510
2511# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2512 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2513 : "=r" (u64),
2514 "=m" (*pu64)
2515 : "0" (u64),
2516 "m" (*pu64)
2517 : "memory");
2518 return u64;
2519# else
2520 uint64_t u64Old;
2521 for (;;)
2522 {
2523 uint64_t u64New;
2524 u64Old = ASMAtomicUoReadU64(pu64);
2525 u64New = u64Old + u64;
2526 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2527 break;
2528 ASMNopPause();
2529 }
2530 return u64Old;
2531# endif
2532}
2533#endif
2534
2535
2536/**
2537 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2538 *
2539 * @returns The old value.
2540 * @param pi64 Pointer to the value.
2541 * @param i64 Number to add.
2542 */
2543DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2544{
2545 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2546}
2547
2548
2549/**
2550 * Atomically exchanges and adds to a size_t value, ordered.
2551 *
2552 * @returns The old value.
2553 * @param pcb Pointer to the size_t value.
2554 * @param cb Number to add.
2555 */
2556DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2557{
2558#if ARCH_BITS == 64
2559 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2560#elif ARCH_BITS == 32
2561 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2562#else
2563# error "Unsupported ARCH_BITS value"
2564#endif
2565}
2566
2567
2568/**
2569 * Atomically exchanges and adds a value which size might differ between
2570 * platforms or compilers, ordered.
2571 *
2572 * @param pu Pointer to the variable to update.
2573 * @param uNew The value to add to *pu.
2574 * @param puOld Where to store the old value.
2575 */
2576#define ASMAtomicAddSize(pu, uNew, puOld) \
2577 do { \
2578 switch (sizeof(*(pu))) { \
2579 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2580 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2581 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2582 } \
2583 } while (0)
2584
2585
2586/**
2587 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2588 *
2589 * @returns The old value.
2590 * @param pu32 Pointer to the value.
2591 * @param u32 Number to subtract.
2592 */
2593DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2594{
2595 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2596}
2597
2598
2599/**
2600 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2601 *
2602 * @returns The old value.
2603 * @param pi32 Pointer to the value.
2604 * @param i32 Number to subtract.
2605 */
2606DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2607{
2608 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2609}
2610
2611
2612/**
2613 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2614 *
2615 * @returns The old value.
2616 * @param pu64 Pointer to the value.
2617 * @param u64 Number to subtract.
2618 */
2619DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2620{
2621 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2622}
2623
2624
2625/**
2626 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2627 *
2628 * @returns The old value.
2629 * @param pi64 Pointer to the value.
2630 * @param i64 Number to subtract.
2631 */
2632DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2633{
2634 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2635}
2636
2637
2638/**
2639 * Atomically exchanges and subtracts to a size_t value, ordered.
2640 *
2641 * @returns The old value.
2642 * @param pcb Pointer to the size_t value.
2643 * @param cb Number to subtract.
2644 */
2645DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2646{
2647#if ARCH_BITS == 64
2648 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2649#elif ARCH_BITS == 32
2650 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2651#else
2652# error "Unsupported ARCH_BITS value"
2653#endif
2654}
2655
2656
2657/**
2658 * Atomically exchanges and subtracts a value which size might differ between
2659 * platforms or compilers, ordered.
2660 *
2661 * @param pu Pointer to the variable to update.
2662 * @param uNew The value to subtract to *pu.
2663 * @param puOld Where to store the old value.
2664 */
2665#define ASMAtomicSubSize(pu, uNew, puOld) \
2666 do { \
2667 switch (sizeof(*(pu))) { \
2668 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2669 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2670 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2671 } \
2672 } while (0)
2673
2674
2675/**
2676 * Atomically increment a 32-bit value, ordered.
2677 *
2678 * @returns The new value.
2679 * @param pu32 Pointer to the value to increment.
2680 */
2681#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2682DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2683#else
2684DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2685{
2686 uint32_t u32;
2687# if RT_INLINE_ASM_USES_INTRIN
2688 u32 = _InterlockedIncrement((long *)pu32);
2689 return u32;
2690
2691# elif RT_INLINE_ASM_GNU_STYLE
2692 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2693 : "=r" (u32),
2694 "=m" (*pu32)
2695 : "0" (1),
2696 "m" (*pu32)
2697 : "memory");
2698 return u32+1;
2699# else
2700 __asm
2701 {
2702 mov eax, 1
2703# ifdef RT_ARCH_AMD64
2704 mov rdx, [pu32]
2705 lock xadd [rdx], eax
2706# else
2707 mov edx, [pu32]
2708 lock xadd [edx], eax
2709# endif
2710 mov u32, eax
2711 }
2712 return u32+1;
2713# endif
2714}
2715#endif
2716
2717
2718/**
2719 * Atomically increment a signed 32-bit value, ordered.
2720 *
2721 * @returns The new value.
2722 * @param pi32 Pointer to the value to increment.
2723 */
2724DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2725{
2726 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2727}
2728
2729
2730/**
2731 * Atomically increment a 64-bit value, ordered.
2732 *
2733 * @returns The new value.
2734 * @param pu64 Pointer to the value to increment.
2735 */
2736#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2737DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2738#else
2739DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2740{
2741# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2742 uint64_t u64;
2743 u64 = _InterlockedIncrement64((__int64 *)pu64);
2744 return u64;
2745
2746# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2747 uint64_t u64;
2748 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2749 : "=r" (u64),
2750 "=m" (*pu64)
2751 : "0" (1),
2752 "m" (*pu64)
2753 : "memory");
2754 return u64 + 1;
2755# else
2756 return ASMAtomicAddU64(pu64, 1) + 1;
2757# endif
2758}
2759#endif
2760
2761
2762/**
2763 * Atomically increment a signed 64-bit value, ordered.
2764 *
2765 * @returns The new value.
2766 * @param pi64 Pointer to the value to increment.
2767 */
2768DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
2769{
2770 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
2771}
2772
2773
2774/**
2775 * Atomically increment a size_t value, ordered.
2776 *
2777 * @returns The new value.
2778 * @param pcb Pointer to the value to increment.
2779 */
2780DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
2781{
2782#if ARCH_BITS == 64
2783 return ASMAtomicIncU64((uint64_t volatile *)pcb);
2784#elif ARCH_BITS == 32
2785 return ASMAtomicIncU32((uint32_t volatile *)pcb);
2786#else
2787# error "Unsupported ARCH_BITS value"
2788#endif
2789}
2790
2791
2792/**
2793 * Atomically decrement an unsigned 32-bit value, ordered.
2794 *
2795 * @returns The new value.
2796 * @param pu32 Pointer to the value to decrement.
2797 */
2798#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2799DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2800#else
2801DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2802{
2803 uint32_t u32;
2804# if RT_INLINE_ASM_USES_INTRIN
2805 u32 = _InterlockedDecrement((long *)pu32);
2806 return u32;
2807
2808# elif RT_INLINE_ASM_GNU_STYLE
2809 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2810 : "=r" (u32),
2811 "=m" (*pu32)
2812 : "0" (-1),
2813 "m" (*pu32)
2814 : "memory");
2815 return u32-1;
2816# else
2817 __asm
2818 {
2819 mov eax, -1
2820# ifdef RT_ARCH_AMD64
2821 mov rdx, [pu32]
2822 lock xadd [rdx], eax
2823# else
2824 mov edx, [pu32]
2825 lock xadd [edx], eax
2826# endif
2827 mov u32, eax
2828 }
2829 return u32-1;
2830# endif
2831}
2832#endif
2833
2834
2835/**
2836 * Atomically decrement a signed 32-bit value, ordered.
2837 *
2838 * @returns The new value.
2839 * @param pi32 Pointer to the value to decrement.
2840 */
2841DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2842{
2843 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2844}
2845
2846
2847/**
2848 * Atomically decrement an unsigned 64-bit value, ordered.
2849 *
2850 * @returns The new value.
2851 * @param pu64 Pointer to the value to decrement.
2852 */
2853#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2854DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
2855#else
2856DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
2857{
2858# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2859 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
2860 return u64;
2861
2862# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2863 uint64_t u64;
2864 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
2865 : "=r" (u64),
2866 "=m" (*pu64)
2867 : "0" (~(uint64_t)0),
2868 "m" (*pu64)
2869 : "memory");
2870 return u64-1;
2871# else
2872 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
2873# endif
2874}
2875#endif
2876
2877
2878/**
2879 * Atomically decrement a signed 64-bit value, ordered.
2880 *
2881 * @returns The new value.
2882 * @param pi64 Pointer to the value to decrement.
2883 */
2884DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
2885{
2886 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
2887}
2888
2889
2890/**
2891 * Atomically decrement a size_t value, ordered.
2892 *
2893 * @returns The new value.
2894 * @param pcb Pointer to the value to decrement.
2895 */
2896DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
2897{
2898#if ARCH_BITS == 64
2899 return ASMAtomicDecU64((uint64_t volatile *)pcb);
2900#elif ARCH_BITS == 32
2901 return ASMAtomicDecU32((uint32_t volatile *)pcb);
2902#else
2903# error "Unsupported ARCH_BITS value"
2904#endif
2905}
2906
2907
2908/**
2909 * Atomically Or an unsigned 32-bit value, ordered.
2910 *
2911 * @param pu32 Pointer to the pointer variable to OR u32 with.
2912 * @param u32 The value to OR *pu32 with.
2913 */
2914#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2915DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2916#else
2917DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2918{
2919# if RT_INLINE_ASM_USES_INTRIN
2920 _InterlockedOr((long volatile *)pu32, (long)u32);
2921
2922# elif RT_INLINE_ASM_GNU_STYLE
2923 __asm__ __volatile__("lock; orl %1, %0\n\t"
2924 : "=m" (*pu32)
2925 : "ir" (u32),
2926 "m" (*pu32));
2927# else
2928 __asm
2929 {
2930 mov eax, [u32]
2931# ifdef RT_ARCH_AMD64
2932 mov rdx, [pu32]
2933 lock or [rdx], eax
2934# else
2935 mov edx, [pu32]
2936 lock or [edx], eax
2937# endif
2938 }
2939# endif
2940}
2941#endif
2942
2943
2944/**
2945 * Atomically Or a signed 32-bit value, ordered.
2946 *
2947 * @param pi32 Pointer to the pointer variable to OR u32 with.
2948 * @param i32 The value to OR *pu32 with.
2949 */
2950DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2951{
2952 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2953}
2954
2955
2956/**
2957 * Atomically Or an unsigned 64-bit value, ordered.
2958 *
2959 * @param pu64 Pointer to the pointer variable to OR u64 with.
2960 * @param u64 The value to OR *pu64 with.
2961 */
2962#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2963DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
2964#else
2965DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
2966{
2967# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2968 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
2969
2970# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2971 __asm__ __volatile__("lock; orq %1, %q0\n\t"
2972 : "=m" (*pu64)
2973 : "r" (u64),
2974 "m" (*pu64));
2975# else
2976 for (;;)
2977 {
2978 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
2979 uint64_t u64New = u64Old | u64;
2980 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2981 break;
2982 ASMNopPause();
2983 }
2984# endif
2985}
2986#endif
2987
2988
2989/**
2990 * Atomically Or a signed 64-bit value, ordered.
2991 *
2992 * @param pi64 Pointer to the pointer variable to OR u64 with.
2993 * @param i64 The value to OR *pu64 with.
2994 */
2995DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
2996{
2997 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
2998}
2999
3000
3001/**
3002 * Atomically And an unsigned 32-bit value, ordered.
3003 *
3004 * @param pu32 Pointer to the pointer variable to AND u32 with.
3005 * @param u32 The value to AND *pu32 with.
3006 */
3007#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3008DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3009#else
3010DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3011{
3012# if RT_INLINE_ASM_USES_INTRIN
3013 _InterlockedAnd((long volatile *)pu32, u32);
3014
3015# elif RT_INLINE_ASM_GNU_STYLE
3016 __asm__ __volatile__("lock; andl %1, %0\n\t"
3017 : "=m" (*pu32)
3018 : "ir" (u32),
3019 "m" (*pu32));
3020# else
3021 __asm
3022 {
3023 mov eax, [u32]
3024# ifdef RT_ARCH_AMD64
3025 mov rdx, [pu32]
3026 lock and [rdx], eax
3027# else
3028 mov edx, [pu32]
3029 lock and [edx], eax
3030# endif
3031 }
3032# endif
3033}
3034#endif
3035
3036
3037/**
3038 * Atomically And a signed 32-bit value, ordered.
3039 *
3040 * @param pi32 Pointer to the pointer variable to AND i32 with.
3041 * @param i32 The value to AND *pi32 with.
3042 */
3043DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3044{
3045 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3046}
3047
3048
3049/**
3050 * Atomically And an unsigned 64-bit value, ordered.
3051 *
3052 * @param pu64 Pointer to the pointer variable to AND u64 with.
3053 * @param u64 The value to AND *pu64 with.
3054 */
3055#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3056DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3057#else
3058DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3059{
3060# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3061 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3062
3063# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3064 __asm__ __volatile__("lock; andq %1, %0\n\t"
3065 : "=m" (*pu64)
3066 : "r" (u64),
3067 "m" (*pu64));
3068# else
3069 for (;;)
3070 {
3071 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3072 uint64_t u64New = u64Old & u64;
3073 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3074 break;
3075 ASMNopPause();
3076 }
3077# endif
3078}
3079#endif
3080
3081
3082/**
3083 * Atomically And a signed 64-bit value, ordered.
3084 *
3085 * @param pi64 Pointer to the pointer variable to AND i64 with.
3086 * @param i64 The value to AND *pi64 with.
3087 */
3088DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3089{
3090 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3091}
3092
3093
3094/**
3095 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3096 *
3097 * @param pu32 Pointer to the pointer variable to OR u32 with.
3098 * @param u32 The value to OR *pu32 with.
3099 */
3100#if RT_INLINE_ASM_EXTERNAL
3101DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3102#else
3103DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3104{
3105# if RT_INLINE_ASM_GNU_STYLE
3106 __asm__ __volatile__("orl %1, %0\n\t"
3107 : "=m" (*pu32)
3108 : "ir" (u32),
3109 "m" (*pu32));
3110# else
3111 __asm
3112 {
3113 mov eax, [u32]
3114# ifdef RT_ARCH_AMD64
3115 mov rdx, [pu32]
3116 or [rdx], eax
3117# else
3118 mov edx, [pu32]
3119 or [edx], eax
3120# endif
3121 }
3122# endif
3123}
3124#endif
3125
3126
3127/**
3128 * Atomically OR a signed 32-bit value, unordered.
3129 *
3130 * @param pi32 Pointer to the pointer variable to OR u32 with.
3131 * @param i32 The value to OR *pu32 with.
3132 */
3133DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3134{
3135 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3136}
3137
3138
3139/**
3140 * Atomically OR an unsigned 64-bit value, unordered.
3141 *
3142 * @param pu64 Pointer to the pointer variable to OR u64 with.
3143 * @param u64 The value to OR *pu64 with.
3144 */
3145#if RT_INLINE_ASM_EXTERNAL
3146DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3147#else
3148DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3149{
3150# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3151 __asm__ __volatile__("orq %1, %q0\n\t"
3152 : "=m" (*pu64)
3153 : "r" (u64),
3154 "m" (*pu64));
3155# else
3156 for (;;)
3157 {
3158 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3159 uint64_t u64New = u64Old | u64;
3160 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3161 break;
3162 ASMNopPause();
3163 }
3164# endif
3165}
3166#endif
3167
3168
3169/**
3170 * Atomically Or a signed 64-bit value, unordered.
3171 *
3172 * @param pi64 Pointer to the pointer variable to OR u64 with.
3173 * @param i64 The value to OR *pu64 with.
3174 */
3175DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3176{
3177 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3178}
3179
3180
3181/**
3182 * Atomically And an unsigned 32-bit value, unordered.
3183 *
3184 * @param pu32 Pointer to the pointer variable to AND u32 with.
3185 * @param u32 The value to AND *pu32 with.
3186 */
3187#if RT_INLINE_ASM_EXTERNAL
3188DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3189#else
3190DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3191{
3192# if RT_INLINE_ASM_GNU_STYLE
3193 __asm__ __volatile__("andl %1, %0\n\t"
3194 : "=m" (*pu32)
3195 : "ir" (u32),
3196 "m" (*pu32));
3197# else
3198 __asm
3199 {
3200 mov eax, [u32]
3201# ifdef RT_ARCH_AMD64
3202 mov rdx, [pu32]
3203 and [rdx], eax
3204# else
3205 mov edx, [pu32]
3206 and [edx], eax
3207# endif
3208 }
3209# endif
3210}
3211#endif
3212
3213
3214/**
3215 * Atomically And a signed 32-bit value, unordered.
3216 *
3217 * @param pi32 Pointer to the pointer variable to AND i32 with.
3218 * @param i32 The value to AND *pi32 with.
3219 */
3220DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3221{
3222 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3223}
3224
3225
3226/**
3227 * Atomically And an unsigned 64-bit value, unordered.
3228 *
3229 * @param pu64 Pointer to the pointer variable to AND u64 with.
3230 * @param u64 The value to AND *pu64 with.
3231 */
3232#if RT_INLINE_ASM_EXTERNAL
3233DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3234#else
3235DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3236{
3237# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3238 __asm__ __volatile__("andq %1, %0\n\t"
3239 : "=m" (*pu64)
3240 : "r" (u64),
3241 "m" (*pu64));
3242# else
3243 for (;;)
3244 {
3245 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3246 uint64_t u64New = u64Old & u64;
3247 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3248 break;
3249 ASMNopPause();
3250 }
3251# endif
3252}
3253#endif
3254
3255
3256/**
3257 * Atomically And a signed 64-bit value, unordered.
3258 *
3259 * @param pi64 Pointer to the pointer variable to AND i64 with.
3260 * @param i64 The value to AND *pi64 with.
3261 */
3262DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3263{
3264 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3265}
3266
3267
3268/**
3269 * Atomically increment an unsigned 32-bit value, unordered.
3270 *
3271 * @returns the new value.
3272 * @param pu32 Pointer to the variable to increment.
3273 */
3274#if RT_INLINE_ASM_EXTERNAL
3275DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3276#else
3277DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3278{
3279 uint32_t u32;
3280# if RT_INLINE_ASM_GNU_STYLE
3281 __asm__ __volatile__("xaddl %0, %1\n\t"
3282 : "=r" (u32),
3283 "=m" (*pu32)
3284 : "0" (1),
3285 "m" (*pu32)
3286 : "memory");
3287 return u32 + 1;
3288# else
3289 __asm
3290 {
3291 mov eax, 1
3292# ifdef RT_ARCH_AMD64
3293 mov rdx, [pu32]
3294 xadd [rdx], eax
3295# else
3296 mov edx, [pu32]
3297 xadd [edx], eax
3298# endif
3299 mov u32, eax
3300 }
3301 return u32 + 1;
3302# endif
3303}
3304#endif
3305
3306
3307/**
3308 * Atomically decrement an unsigned 32-bit value, unordered.
3309 *
3310 * @returns the new value.
3311 * @param pu32 Pointer to the variable to decrement.
3312 */
3313#if RT_INLINE_ASM_EXTERNAL
3314DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3315#else
3316DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3317{
3318 uint32_t u32;
3319# if RT_INLINE_ASM_GNU_STYLE
3320 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3321 : "=r" (u32),
3322 "=m" (*pu32)
3323 : "0" (-1),
3324 "m" (*pu32)
3325 : "memory");
3326 return u32 - 1;
3327# else
3328 __asm
3329 {
3330 mov eax, -1
3331# ifdef RT_ARCH_AMD64
3332 mov rdx, [pu32]
3333 xadd [rdx], eax
3334# else
3335 mov edx, [pu32]
3336 xadd [edx], eax
3337# endif
3338 mov u32, eax
3339 }
3340 return u32 - 1;
3341# endif
3342}
3343#endif
3344
3345
3346/** @def RT_ASM_PAGE_SIZE
3347 * We try avoid dragging in iprt/param.h here.
3348 * @internal
3349 */
3350#if defined(RT_ARCH_SPARC64)
3351# define RT_ASM_PAGE_SIZE 0x2000
3352# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3353# if PAGE_SIZE != 0x2000
3354# error "PAGE_SIZE is not 0x2000!"
3355# endif
3356# endif
3357#else
3358# define RT_ASM_PAGE_SIZE 0x1000
3359# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3360# if PAGE_SIZE != 0x1000
3361# error "PAGE_SIZE is not 0x1000!"
3362# endif
3363# endif
3364#endif
3365
3366/**
3367 * Zeros a 4K memory page.
3368 *
3369 * @param pv Pointer to the memory block. This must be page aligned.
3370 */
3371#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3372DECLASM(void) ASMMemZeroPage(volatile void *pv);
3373# else
3374DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3375{
3376# if RT_INLINE_ASM_USES_INTRIN
3377# ifdef RT_ARCH_AMD64
3378 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3379# else
3380 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3381# endif
3382
3383# elif RT_INLINE_ASM_GNU_STYLE
3384 RTCCUINTREG uDummy;
3385# ifdef RT_ARCH_AMD64
3386 __asm__ __volatile__("rep stosq"
3387 : "=D" (pv),
3388 "=c" (uDummy)
3389 : "0" (pv),
3390 "c" (RT_ASM_PAGE_SIZE >> 3),
3391 "a" (0)
3392 : "memory");
3393# else
3394 __asm__ __volatile__("rep stosl"
3395 : "=D" (pv),
3396 "=c" (uDummy)
3397 : "0" (pv),
3398 "c" (RT_ASM_PAGE_SIZE >> 2),
3399 "a" (0)
3400 : "memory");
3401# endif
3402# else
3403 __asm
3404 {
3405# ifdef RT_ARCH_AMD64
3406 xor rax, rax
3407 mov ecx, 0200h
3408 mov rdi, [pv]
3409 rep stosq
3410# else
3411 xor eax, eax
3412 mov ecx, 0400h
3413 mov edi, [pv]
3414 rep stosd
3415# endif
3416 }
3417# endif
3418}
3419# endif
3420
3421
3422/**
3423 * Zeros a memory block with a 32-bit aligned size.
3424 *
3425 * @param pv Pointer to the memory block.
3426 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3427 */
3428#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3429DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3430#else
3431DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3432{
3433# if RT_INLINE_ASM_USES_INTRIN
3434# ifdef RT_ARCH_AMD64
3435 if (!(cb & 7))
3436 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3437 else
3438# endif
3439 __stosd((unsigned long *)pv, 0, cb / 4);
3440
3441# elif RT_INLINE_ASM_GNU_STYLE
3442 __asm__ __volatile__("rep stosl"
3443 : "=D" (pv),
3444 "=c" (cb)
3445 : "0" (pv),
3446 "1" (cb >> 2),
3447 "a" (0)
3448 : "memory");
3449# else
3450 __asm
3451 {
3452 xor eax, eax
3453# ifdef RT_ARCH_AMD64
3454 mov rcx, [cb]
3455 shr rcx, 2
3456 mov rdi, [pv]
3457# else
3458 mov ecx, [cb]
3459 shr ecx, 2
3460 mov edi, [pv]
3461# endif
3462 rep stosd
3463 }
3464# endif
3465}
3466#endif
3467
3468
3469/**
3470 * Fills a memory block with a 32-bit aligned size.
3471 *
3472 * @param pv Pointer to the memory block.
3473 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3474 * @param u32 The value to fill with.
3475 */
3476#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3477DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3478#else
3479DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3480{
3481# if RT_INLINE_ASM_USES_INTRIN
3482# ifdef RT_ARCH_AMD64
3483 if (!(cb & 7))
3484 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3485 else
3486# endif
3487 __stosd((unsigned long *)pv, u32, cb / 4);
3488
3489# elif RT_INLINE_ASM_GNU_STYLE
3490 __asm__ __volatile__("rep stosl"
3491 : "=D" (pv),
3492 "=c" (cb)
3493 : "0" (pv),
3494 "1" (cb >> 2),
3495 "a" (u32)
3496 : "memory");
3497# else
3498 __asm
3499 {
3500# ifdef RT_ARCH_AMD64
3501 mov rcx, [cb]
3502 shr rcx, 2
3503 mov rdi, [pv]
3504# else
3505 mov ecx, [cb]
3506 shr ecx, 2
3507 mov edi, [pv]
3508# endif
3509 mov eax, [u32]
3510 rep stosd
3511 }
3512# endif
3513}
3514#endif
3515
3516
3517/**
3518 * Checks if a memory page is all zeros.
3519 *
3520 * @returns true / false.
3521 *
3522 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3523 * boundary
3524 */
3525DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3526{
3527# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3528 union { RTCCUINTREG r; bool f; } uAX;
3529 RTCCUINTREG xCX, xDI;
3530 Assert(!((uintptr_t)pvPage & 15));
3531 __asm__ __volatile__("repe; "
3532# ifdef RT_ARCH_AMD64
3533 "scasq\n\t"
3534# else
3535 "scasl\n\t"
3536# endif
3537 "setnc %%al\n\t"
3538 : "=&c" (xCX),
3539 "=&D" (xDI),
3540 "=&a" (uAX.r)
3541 : "mr" (pvPage),
3542# ifdef RT_ARCH_AMD64
3543 "0" (RT_ASM_PAGE_SIZE/8),
3544# else
3545 "0" (RT_ASM_PAGE_SIZE/4),
3546# endif
3547 "1" (pvPage),
3548 "2" (0));
3549 return uAX.f;
3550# else
3551 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3552 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3553 Assert(!((uintptr_t)pvPage & 15));
3554 for (;;)
3555 {
3556 if (puPtr[0]) return false;
3557 if (puPtr[4]) return false;
3558
3559 if (puPtr[2]) return false;
3560 if (puPtr[6]) return false;
3561
3562 if (puPtr[1]) return false;
3563 if (puPtr[5]) return false;
3564
3565 if (puPtr[3]) return false;
3566 if (puPtr[7]) return false;
3567
3568 if (!--cLeft)
3569 return true;
3570 puPtr += 8;
3571 }
3572 return true;
3573# endif
3574}
3575
3576
3577/**
3578 * Checks if a memory block is filled with the specified byte.
3579 *
3580 * This is a sort of inverted memchr.
3581 *
3582 * @returns Pointer to the byte which doesn't equal u8.
3583 * @returns NULL if all equal to u8.
3584 *
3585 * @param pv Pointer to the memory block.
3586 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3587 * @param u8 The value it's supposed to be filled with.
3588 *
3589 * @todo Fix name, it is a predicate function but it's not returning boolean!
3590 */
3591DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3592{
3593/** @todo rewrite this in inline assembly? */
3594 uint8_t const *pb = (uint8_t const *)pv;
3595 for (; cb; cb--, pb++)
3596 if (RT_UNLIKELY(*pb != u8))
3597 return (void *)pb;
3598 return NULL;
3599}
3600
3601
3602/**
3603 * Checks if a memory block is filled with the specified 32-bit value.
3604 *
3605 * This is a sort of inverted memchr.
3606 *
3607 * @returns Pointer to the first value which doesn't equal u32.
3608 * @returns NULL if all equal to u32.
3609 *
3610 * @param pv Pointer to the memory block.
3611 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3612 * @param u32 The value it's supposed to be filled with.
3613 *
3614 * @todo Fix name, it is a predicate function but it's not returning boolean!
3615 */
3616DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3617{
3618/** @todo rewrite this in inline assembly? */
3619 uint32_t const *pu32 = (uint32_t const *)pv;
3620 for (; cb; cb -= 4, pu32++)
3621 if (RT_UNLIKELY(*pu32 != u32))
3622 return (uint32_t *)pu32;
3623 return NULL;
3624}
3625
3626
3627/**
3628 * Probes a byte pointer for read access.
3629 *
3630 * While the function will not fault if the byte is not read accessible,
3631 * the idea is to do this in a safe place like before acquiring locks
3632 * and such like.
3633 *
3634 * Also, this functions guarantees that an eager compiler is not going
3635 * to optimize the probing away.
3636 *
3637 * @param pvByte Pointer to the byte.
3638 */
3639#if RT_INLINE_ASM_EXTERNAL
3640DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3641#else
3642DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3643{
3644 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3645 uint8_t u8;
3646# if RT_INLINE_ASM_GNU_STYLE
3647 __asm__ __volatile__("movb (%1), %0\n\t"
3648 : "=r" (u8)
3649 : "r" (pvByte));
3650# else
3651 __asm
3652 {
3653# ifdef RT_ARCH_AMD64
3654 mov rax, [pvByte]
3655 mov al, [rax]
3656# else
3657 mov eax, [pvByte]
3658 mov al, [eax]
3659# endif
3660 mov [u8], al
3661 }
3662# endif
3663 return u8;
3664}
3665#endif
3666
3667/**
3668 * Probes a buffer for read access page by page.
3669 *
3670 * While the function will fault if the buffer is not fully read
3671 * accessible, the idea is to do this in a safe place like before
3672 * acquiring locks and such like.
3673 *
3674 * Also, this functions guarantees that an eager compiler is not going
3675 * to optimize the probing away.
3676 *
3677 * @param pvBuf Pointer to the buffer.
3678 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3679 */
3680DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3681{
3682 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3683 /* the first byte */
3684 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3685 ASMProbeReadByte(pu8);
3686
3687 /* the pages in between pages. */
3688 while (cbBuf > RT_ASM_PAGE_SIZE)
3689 {
3690 ASMProbeReadByte(pu8);
3691 cbBuf -= RT_ASM_PAGE_SIZE;
3692 pu8 += RT_ASM_PAGE_SIZE;
3693 }
3694
3695 /* the last byte */
3696 ASMProbeReadByte(pu8 + cbBuf - 1);
3697}
3698
3699
3700
3701/** @defgroup grp_inline_bits Bit Operations
3702 * @{
3703 */
3704
3705
3706/**
3707 * Sets a bit in a bitmap.
3708 *
3709 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
3710 * @param iBit The bit to set.
3711 *
3712 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3713 * However, doing so will yield better performance as well as avoiding
3714 * traps accessing the last bits in the bitmap.
3715 */
3716#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3717DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3718#else
3719DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3720{
3721# if RT_INLINE_ASM_USES_INTRIN
3722 _bittestandset((long *)pvBitmap, iBit);
3723
3724# elif RT_INLINE_ASM_GNU_STYLE
3725 __asm__ __volatile__("btsl %1, %0"
3726 : "=m" (*(volatile long *)pvBitmap)
3727 : "Ir" (iBit),
3728 "m" (*(volatile long *)pvBitmap)
3729 : "memory");
3730# else
3731 __asm
3732 {
3733# ifdef RT_ARCH_AMD64
3734 mov rax, [pvBitmap]
3735 mov edx, [iBit]
3736 bts [rax], edx
3737# else
3738 mov eax, [pvBitmap]
3739 mov edx, [iBit]
3740 bts [eax], edx
3741# endif
3742 }
3743# endif
3744}
3745#endif
3746
3747
3748/**
3749 * Atomically sets a bit in a bitmap, ordered.
3750 *
3751 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3752 * the memory access isn't atomic!
3753 * @param iBit The bit to set.
3754 */
3755#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3756DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3757#else
3758DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3759{
3760 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3761# if RT_INLINE_ASM_USES_INTRIN
3762 _interlockedbittestandset((long *)pvBitmap, iBit);
3763# elif RT_INLINE_ASM_GNU_STYLE
3764 __asm__ __volatile__("lock; btsl %1, %0"
3765 : "=m" (*(volatile long *)pvBitmap)
3766 : "Ir" (iBit),
3767 "m" (*(volatile long *)pvBitmap)
3768 : "memory");
3769# else
3770 __asm
3771 {
3772# ifdef RT_ARCH_AMD64
3773 mov rax, [pvBitmap]
3774 mov edx, [iBit]
3775 lock bts [rax], edx
3776# else
3777 mov eax, [pvBitmap]
3778 mov edx, [iBit]
3779 lock bts [eax], edx
3780# endif
3781 }
3782# endif
3783}
3784#endif
3785
3786
3787/**
3788 * Clears a bit in a bitmap.
3789 *
3790 * @param pvBitmap Pointer to the bitmap.
3791 * @param iBit The bit to clear.
3792 *
3793 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3794 * However, doing so will yield better performance as well as avoiding
3795 * traps accessing the last bits in the bitmap.
3796 */
3797#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3798DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3799#else
3800DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3801{
3802# if RT_INLINE_ASM_USES_INTRIN
3803 _bittestandreset((long *)pvBitmap, iBit);
3804
3805# elif RT_INLINE_ASM_GNU_STYLE
3806 __asm__ __volatile__("btrl %1, %0"
3807 : "=m" (*(volatile long *)pvBitmap)
3808 : "Ir" (iBit),
3809 "m" (*(volatile long *)pvBitmap)
3810 : "memory");
3811# else
3812 __asm
3813 {
3814# ifdef RT_ARCH_AMD64
3815 mov rax, [pvBitmap]
3816 mov edx, [iBit]
3817 btr [rax], edx
3818# else
3819 mov eax, [pvBitmap]
3820 mov edx, [iBit]
3821 btr [eax], edx
3822# endif
3823 }
3824# endif
3825}
3826#endif
3827
3828
3829/**
3830 * Atomically clears a bit in a bitmap, ordered.
3831 *
3832 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3833 * the memory access isn't atomic!
3834 * @param iBit The bit to toggle set.
3835 * @remarks No memory barrier, take care on smp.
3836 */
3837#if RT_INLINE_ASM_EXTERNAL
3838DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3839#else
3840DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3841{
3842 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3843# if RT_INLINE_ASM_GNU_STYLE
3844 __asm__ __volatile__("lock; btrl %1, %0"
3845 : "=m" (*(volatile long *)pvBitmap)
3846 : "Ir" (iBit),
3847 "m" (*(volatile long *)pvBitmap)
3848 : "memory");
3849# else
3850 __asm
3851 {
3852# ifdef RT_ARCH_AMD64
3853 mov rax, [pvBitmap]
3854 mov edx, [iBit]
3855 lock btr [rax], edx
3856# else
3857 mov eax, [pvBitmap]
3858 mov edx, [iBit]
3859 lock btr [eax], edx
3860# endif
3861 }
3862# endif
3863}
3864#endif
3865
3866
3867/**
3868 * Toggles a bit in a bitmap.
3869 *
3870 * @param pvBitmap Pointer to the bitmap.
3871 * @param iBit The bit to toggle.
3872 *
3873 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3874 * However, doing so will yield better performance as well as avoiding
3875 * traps accessing the last bits in the bitmap.
3876 */
3877#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3878DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3879#else
3880DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3881{
3882# if RT_INLINE_ASM_USES_INTRIN
3883 _bittestandcomplement((long *)pvBitmap, iBit);
3884# elif RT_INLINE_ASM_GNU_STYLE
3885 __asm__ __volatile__("btcl %1, %0"
3886 : "=m" (*(volatile long *)pvBitmap)
3887 : "Ir" (iBit),
3888 "m" (*(volatile long *)pvBitmap)
3889 : "memory");
3890# else
3891 __asm
3892 {
3893# ifdef RT_ARCH_AMD64
3894 mov rax, [pvBitmap]
3895 mov edx, [iBit]
3896 btc [rax], edx
3897# else
3898 mov eax, [pvBitmap]
3899 mov edx, [iBit]
3900 btc [eax], edx
3901# endif
3902 }
3903# endif
3904}
3905#endif
3906
3907
3908/**
3909 * Atomically toggles a bit in a bitmap, ordered.
3910 *
3911 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3912 * the memory access isn't atomic!
3913 * @param iBit The bit to test and set.
3914 */
3915#if RT_INLINE_ASM_EXTERNAL
3916DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3917#else
3918DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3919{
3920 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3921# if RT_INLINE_ASM_GNU_STYLE
3922 __asm__ __volatile__("lock; btcl %1, %0"
3923 : "=m" (*(volatile long *)pvBitmap)
3924 : "Ir" (iBit),
3925 "m" (*(volatile long *)pvBitmap)
3926 : "memory");
3927# else
3928 __asm
3929 {
3930# ifdef RT_ARCH_AMD64
3931 mov rax, [pvBitmap]
3932 mov edx, [iBit]
3933 lock btc [rax], edx
3934# else
3935 mov eax, [pvBitmap]
3936 mov edx, [iBit]
3937 lock btc [eax], edx
3938# endif
3939 }
3940# endif
3941}
3942#endif
3943
3944
3945/**
3946 * Tests and sets a bit in a bitmap.
3947 *
3948 * @returns true if the bit was set.
3949 * @returns false if the bit was clear.
3950 *
3951 * @param pvBitmap Pointer to the bitmap.
3952 * @param iBit The bit to test and set.
3953 *
3954 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3955 * However, doing so will yield better performance as well as avoiding
3956 * traps accessing the last bits in the bitmap.
3957 */
3958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3959DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3960#else
3961DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3962{
3963 union { bool f; uint32_t u32; uint8_t u8; } rc;
3964# if RT_INLINE_ASM_USES_INTRIN
3965 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3966
3967# elif RT_INLINE_ASM_GNU_STYLE
3968 __asm__ __volatile__("btsl %2, %1\n\t"
3969 "setc %b0\n\t"
3970 "andl $1, %0\n\t"
3971 : "=q" (rc.u32),
3972 "=m" (*(volatile long *)pvBitmap)
3973 : "Ir" (iBit),
3974 "m" (*(volatile long *)pvBitmap)
3975 : "memory");
3976# else
3977 __asm
3978 {
3979 mov edx, [iBit]
3980# ifdef RT_ARCH_AMD64
3981 mov rax, [pvBitmap]
3982 bts [rax], edx
3983# else
3984 mov eax, [pvBitmap]
3985 bts [eax], edx
3986# endif
3987 setc al
3988 and eax, 1
3989 mov [rc.u32], eax
3990 }
3991# endif
3992 return rc.f;
3993}
3994#endif
3995
3996
3997/**
3998 * Atomically tests and sets a bit in a bitmap, ordered.
3999 *
4000 * @returns true if the bit was set.
4001 * @returns false if the bit was clear.
4002 *
4003 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4004 * the memory access isn't atomic!
4005 * @param iBit The bit to set.
4006 */
4007#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4008DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4009#else
4010DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4011{
4012 union { bool f; uint32_t u32; uint8_t u8; } rc;
4013 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4014# if RT_INLINE_ASM_USES_INTRIN
4015 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4016# elif RT_INLINE_ASM_GNU_STYLE
4017 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4018 "setc %b0\n\t"
4019 "andl $1, %0\n\t"
4020 : "=q" (rc.u32),
4021 "=m" (*(volatile long *)pvBitmap)
4022 : "Ir" (iBit),
4023 "m" (*(volatile long *)pvBitmap)
4024 : "memory");
4025# else
4026 __asm
4027 {
4028 mov edx, [iBit]
4029# ifdef RT_ARCH_AMD64
4030 mov rax, [pvBitmap]
4031 lock bts [rax], edx
4032# else
4033 mov eax, [pvBitmap]
4034 lock bts [eax], edx
4035# endif
4036 setc al
4037 and eax, 1
4038 mov [rc.u32], eax
4039 }
4040# endif
4041 return rc.f;
4042}
4043#endif
4044
4045
4046/**
4047 * Tests and clears a bit in a bitmap.
4048 *
4049 * @returns true if the bit was set.
4050 * @returns false if the bit was clear.
4051 *
4052 * @param pvBitmap Pointer to the bitmap.
4053 * @param iBit The bit to test and clear.
4054 *
4055 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4056 * However, doing so will yield better performance as well as avoiding
4057 * traps accessing the last bits in the bitmap.
4058 */
4059#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4060DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4061#else
4062DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4063{
4064 union { bool f; uint32_t u32; uint8_t u8; } rc;
4065# if RT_INLINE_ASM_USES_INTRIN
4066 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4067
4068# elif RT_INLINE_ASM_GNU_STYLE
4069 __asm__ __volatile__("btrl %2, %1\n\t"
4070 "setc %b0\n\t"
4071 "andl $1, %0\n\t"
4072 : "=q" (rc.u32),
4073 "=m" (*(volatile long *)pvBitmap)
4074 : "Ir" (iBit),
4075 "m" (*(volatile long *)pvBitmap)
4076 : "memory");
4077# else
4078 __asm
4079 {
4080 mov edx, [iBit]
4081# ifdef RT_ARCH_AMD64
4082 mov rax, [pvBitmap]
4083 btr [rax], edx
4084# else
4085 mov eax, [pvBitmap]
4086 btr [eax], edx
4087# endif
4088 setc al
4089 and eax, 1
4090 mov [rc.u32], eax
4091 }
4092# endif
4093 return rc.f;
4094}
4095#endif
4096
4097
4098/**
4099 * Atomically tests and clears a bit in a bitmap, ordered.
4100 *
4101 * @returns true if the bit was set.
4102 * @returns false if the bit was clear.
4103 *
4104 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4105 * the memory access isn't atomic!
4106 * @param iBit The bit to test and clear.
4107 *
4108 * @remarks No memory barrier, take care on smp.
4109 */
4110#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4111DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4112#else
4113DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4114{
4115 union { bool f; uint32_t u32; uint8_t u8; } rc;
4116 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4117# if RT_INLINE_ASM_USES_INTRIN
4118 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4119
4120# elif RT_INLINE_ASM_GNU_STYLE
4121 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4122 "setc %b0\n\t"
4123 "andl $1, %0\n\t"
4124 : "=q" (rc.u32),
4125 "=m" (*(volatile long *)pvBitmap)
4126 : "Ir" (iBit),
4127 "m" (*(volatile long *)pvBitmap)
4128 : "memory");
4129# else
4130 __asm
4131 {
4132 mov edx, [iBit]
4133# ifdef RT_ARCH_AMD64
4134 mov rax, [pvBitmap]
4135 lock btr [rax], edx
4136# else
4137 mov eax, [pvBitmap]
4138 lock btr [eax], edx
4139# endif
4140 setc al
4141 and eax, 1
4142 mov [rc.u32], eax
4143 }
4144# endif
4145 return rc.f;
4146}
4147#endif
4148
4149
4150/**
4151 * Tests and toggles a bit in a bitmap.
4152 *
4153 * @returns true if the bit was set.
4154 * @returns false if the bit was clear.
4155 *
4156 * @param pvBitmap Pointer to the bitmap.
4157 * @param iBit The bit to test and toggle.
4158 *
4159 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4160 * However, doing so will yield better performance as well as avoiding
4161 * traps accessing the last bits in the bitmap.
4162 */
4163#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4164DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4165#else
4166DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4167{
4168 union { bool f; uint32_t u32; uint8_t u8; } rc;
4169# if RT_INLINE_ASM_USES_INTRIN
4170 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4171
4172# elif RT_INLINE_ASM_GNU_STYLE
4173 __asm__ __volatile__("btcl %2, %1\n\t"
4174 "setc %b0\n\t"
4175 "andl $1, %0\n\t"
4176 : "=q" (rc.u32),
4177 "=m" (*(volatile long *)pvBitmap)
4178 : "Ir" (iBit),
4179 "m" (*(volatile long *)pvBitmap)
4180 : "memory");
4181# else
4182 __asm
4183 {
4184 mov edx, [iBit]
4185# ifdef RT_ARCH_AMD64
4186 mov rax, [pvBitmap]
4187 btc [rax], edx
4188# else
4189 mov eax, [pvBitmap]
4190 btc [eax], edx
4191# endif
4192 setc al
4193 and eax, 1
4194 mov [rc.u32], eax
4195 }
4196# endif
4197 return rc.f;
4198}
4199#endif
4200
4201
4202/**
4203 * Atomically tests and toggles a bit in a bitmap, ordered.
4204 *
4205 * @returns true if the bit was set.
4206 * @returns false if the bit was clear.
4207 *
4208 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4209 * the memory access isn't atomic!
4210 * @param iBit The bit to test and toggle.
4211 */
4212#if RT_INLINE_ASM_EXTERNAL
4213DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4214#else
4215DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4216{
4217 union { bool f; uint32_t u32; uint8_t u8; } rc;
4218 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4219# if RT_INLINE_ASM_GNU_STYLE
4220 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4221 "setc %b0\n\t"
4222 "andl $1, %0\n\t"
4223 : "=q" (rc.u32),
4224 "=m" (*(volatile long *)pvBitmap)
4225 : "Ir" (iBit),
4226 "m" (*(volatile long *)pvBitmap)
4227 : "memory");
4228# else
4229 __asm
4230 {
4231 mov edx, [iBit]
4232# ifdef RT_ARCH_AMD64
4233 mov rax, [pvBitmap]
4234 lock btc [rax], edx
4235# else
4236 mov eax, [pvBitmap]
4237 lock btc [eax], edx
4238# endif
4239 setc al
4240 and eax, 1
4241 mov [rc.u32], eax
4242 }
4243# endif
4244 return rc.f;
4245}
4246#endif
4247
4248
4249/**
4250 * Tests if a bit in a bitmap is set.
4251 *
4252 * @returns true if the bit is set.
4253 * @returns false if the bit is clear.
4254 *
4255 * @param pvBitmap Pointer to the bitmap.
4256 * @param iBit The bit to test.
4257 *
4258 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4259 * However, doing so will yield better performance as well as avoiding
4260 * traps accessing the last bits in the bitmap.
4261 */
4262#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4263DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4264#else
4265DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4266{
4267 union { bool f; uint32_t u32; uint8_t u8; } rc;
4268# if RT_INLINE_ASM_USES_INTRIN
4269 rc.u32 = _bittest((long *)pvBitmap, iBit);
4270# elif RT_INLINE_ASM_GNU_STYLE
4271
4272 __asm__ __volatile__("btl %2, %1\n\t"
4273 "setc %b0\n\t"
4274 "andl $1, %0\n\t"
4275 : "=q" (rc.u32)
4276 : "m" (*(const volatile long *)pvBitmap),
4277 "Ir" (iBit)
4278 : "memory");
4279# else
4280 __asm
4281 {
4282 mov edx, [iBit]
4283# ifdef RT_ARCH_AMD64
4284 mov rax, [pvBitmap]
4285 bt [rax], edx
4286# else
4287 mov eax, [pvBitmap]
4288 bt [eax], edx
4289# endif
4290 setc al
4291 and eax, 1
4292 mov [rc.u32], eax
4293 }
4294# endif
4295 return rc.f;
4296}
4297#endif
4298
4299
4300/**
4301 * Clears a bit range within a bitmap.
4302 *
4303 * @param pvBitmap Pointer to the bitmap.
4304 * @param iBitStart The First bit to clear.
4305 * @param iBitEnd The first bit not to clear.
4306 */
4307DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4308{
4309 if (iBitStart < iBitEnd)
4310 {
4311 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4312 int iStart = iBitStart & ~31;
4313 int iEnd = iBitEnd & ~31;
4314 if (iStart == iEnd)
4315 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4316 else
4317 {
4318 /* bits in first dword. */
4319 if (iBitStart & 31)
4320 {
4321 *pu32 &= (1 << (iBitStart & 31)) - 1;
4322 pu32++;
4323 iBitStart = iStart + 32;
4324 }
4325
4326 /* whole dword. */
4327 if (iBitStart != iEnd)
4328 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4329
4330 /* bits in last dword. */
4331 if (iBitEnd & 31)
4332 {
4333 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4334 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4335 }
4336 }
4337 }
4338}
4339
4340
4341/**
4342 * Sets a bit range within a bitmap.
4343 *
4344 * @param pvBitmap Pointer to the bitmap.
4345 * @param iBitStart The First bit to set.
4346 * @param iBitEnd The first bit not to set.
4347 */
4348DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4349{
4350 if (iBitStart < iBitEnd)
4351 {
4352 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4353 int iStart = iBitStart & ~31;
4354 int iEnd = iBitEnd & ~31;
4355 if (iStart == iEnd)
4356 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
4357 else
4358 {
4359 /* bits in first dword. */
4360 if (iBitStart & 31)
4361 {
4362 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
4363 pu32++;
4364 iBitStart = iStart + 32;
4365 }
4366
4367 /* whole dword. */
4368 if (iBitStart != iEnd)
4369 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4370
4371 /* bits in last dword. */
4372 if (iBitEnd & 31)
4373 {
4374 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4375 *pu32 |= (1 << (iBitEnd & 31)) - 1;
4376 }
4377 }
4378 }
4379}
4380
4381
4382/**
4383 * Finds the first clear bit in a bitmap.
4384 *
4385 * @returns Index of the first zero bit.
4386 * @returns -1 if no clear bit was found.
4387 * @param pvBitmap Pointer to the bitmap.
4388 * @param cBits The number of bits in the bitmap. Multiple of 32.
4389 */
4390#if RT_INLINE_ASM_EXTERNAL
4391DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4392#else
4393DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4394{
4395 if (cBits)
4396 {
4397 int32_t iBit;
4398# if RT_INLINE_ASM_GNU_STYLE
4399 RTCCUINTREG uEAX, uECX, uEDI;
4400 cBits = RT_ALIGN_32(cBits, 32);
4401 __asm__ __volatile__("repe; scasl\n\t"
4402 "je 1f\n\t"
4403# ifdef RT_ARCH_AMD64
4404 "lea -4(%%rdi), %%rdi\n\t"
4405 "xorl (%%rdi), %%eax\n\t"
4406 "subq %5, %%rdi\n\t"
4407# else
4408 "lea -4(%%edi), %%edi\n\t"
4409 "xorl (%%edi), %%eax\n\t"
4410 "subl %5, %%edi\n\t"
4411# endif
4412 "shll $3, %%edi\n\t"
4413 "bsfl %%eax, %%edx\n\t"
4414 "addl %%edi, %%edx\n\t"
4415 "1:\t\n"
4416 : "=d" (iBit),
4417 "=&c" (uECX),
4418 "=&D" (uEDI),
4419 "=&a" (uEAX)
4420 : "0" (0xffffffff),
4421 "mr" (pvBitmap),
4422 "1" (cBits >> 5),
4423 "2" (pvBitmap),
4424 "3" (0xffffffff));
4425# else
4426 cBits = RT_ALIGN_32(cBits, 32);
4427 __asm
4428 {
4429# ifdef RT_ARCH_AMD64
4430 mov rdi, [pvBitmap]
4431 mov rbx, rdi
4432# else
4433 mov edi, [pvBitmap]
4434 mov ebx, edi
4435# endif
4436 mov edx, 0ffffffffh
4437 mov eax, edx
4438 mov ecx, [cBits]
4439 shr ecx, 5
4440 repe scasd
4441 je done
4442
4443# ifdef RT_ARCH_AMD64
4444 lea rdi, [rdi - 4]
4445 xor eax, [rdi]
4446 sub rdi, rbx
4447# else
4448 lea edi, [edi - 4]
4449 xor eax, [edi]
4450 sub edi, ebx
4451# endif
4452 shl edi, 3
4453 bsf edx, eax
4454 add edx, edi
4455 done:
4456 mov [iBit], edx
4457 }
4458# endif
4459 return iBit;
4460 }
4461 return -1;
4462}
4463#endif
4464
4465
4466/**
4467 * Finds the next clear bit in a bitmap.
4468 *
4469 * @returns Index of the first zero bit.
4470 * @returns -1 if no clear bit was found.
4471 * @param pvBitmap Pointer to the bitmap.
4472 * @param cBits The number of bits in the bitmap. Multiple of 32.
4473 * @param iBitPrev The bit returned from the last search.
4474 * The search will start at iBitPrev + 1.
4475 */
4476#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4477DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4478#else
4479DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4480{
4481 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4482 int iBit = ++iBitPrev & 31;
4483 if (iBit)
4484 {
4485 /*
4486 * Inspect the 32-bit word containing the unaligned bit.
4487 */
4488 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4489
4490# if RT_INLINE_ASM_USES_INTRIN
4491 unsigned long ulBit = 0;
4492 if (_BitScanForward(&ulBit, u32))
4493 return ulBit + iBitPrev;
4494# else
4495# if RT_INLINE_ASM_GNU_STYLE
4496 __asm__ __volatile__("bsf %1, %0\n\t"
4497 "jnz 1f\n\t"
4498 "movl $-1, %0\n\t"
4499 "1:\n\t"
4500 : "=r" (iBit)
4501 : "r" (u32));
4502# else
4503 __asm
4504 {
4505 mov edx, [u32]
4506 bsf eax, edx
4507 jnz done
4508 mov eax, 0ffffffffh
4509 done:
4510 mov [iBit], eax
4511 }
4512# endif
4513 if (iBit >= 0)
4514 return iBit + iBitPrev;
4515# endif
4516
4517 /*
4518 * Skip ahead and see if there is anything left to search.
4519 */
4520 iBitPrev |= 31;
4521 iBitPrev++;
4522 if (cBits <= (uint32_t)iBitPrev)
4523 return -1;
4524 }
4525
4526 /*
4527 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4528 */
4529 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4530 if (iBit >= 0)
4531 iBit += iBitPrev;
4532 return iBit;
4533}
4534#endif
4535
4536
4537/**
4538 * Finds the first set bit in a bitmap.
4539 *
4540 * @returns Index of the first set bit.
4541 * @returns -1 if no clear bit was found.
4542 * @param pvBitmap Pointer to the bitmap.
4543 * @param cBits The number of bits in the bitmap. Multiple of 32.
4544 */
4545#if RT_INLINE_ASM_EXTERNAL
4546DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4547#else
4548DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4549{
4550 if (cBits)
4551 {
4552 int32_t iBit;
4553# if RT_INLINE_ASM_GNU_STYLE
4554 RTCCUINTREG uEAX, uECX, uEDI;
4555 cBits = RT_ALIGN_32(cBits, 32);
4556 __asm__ __volatile__("repe; scasl\n\t"
4557 "je 1f\n\t"
4558# ifdef RT_ARCH_AMD64
4559 "lea -4(%%rdi), %%rdi\n\t"
4560 "movl (%%rdi), %%eax\n\t"
4561 "subq %5, %%rdi\n\t"
4562# else
4563 "lea -4(%%edi), %%edi\n\t"
4564 "movl (%%edi), %%eax\n\t"
4565 "subl %5, %%edi\n\t"
4566# endif
4567 "shll $3, %%edi\n\t"
4568 "bsfl %%eax, %%edx\n\t"
4569 "addl %%edi, %%edx\n\t"
4570 "1:\t\n"
4571 : "=d" (iBit),
4572 "=&c" (uECX),
4573 "=&D" (uEDI),
4574 "=&a" (uEAX)
4575 : "0" (0xffffffff),
4576 "mr" (pvBitmap),
4577 "1" (cBits >> 5),
4578 "2" (pvBitmap),
4579 "3" (0));
4580# else
4581 cBits = RT_ALIGN_32(cBits, 32);
4582 __asm
4583 {
4584# ifdef RT_ARCH_AMD64
4585 mov rdi, [pvBitmap]
4586 mov rbx, rdi
4587# else
4588 mov edi, [pvBitmap]
4589 mov ebx, edi
4590# endif
4591 mov edx, 0ffffffffh
4592 xor eax, eax
4593 mov ecx, [cBits]
4594 shr ecx, 5
4595 repe scasd
4596 je done
4597# ifdef RT_ARCH_AMD64
4598 lea rdi, [rdi - 4]
4599 mov eax, [rdi]
4600 sub rdi, rbx
4601# else
4602 lea edi, [edi - 4]
4603 mov eax, [edi]
4604 sub edi, ebx
4605# endif
4606 shl edi, 3
4607 bsf edx, eax
4608 add edx, edi
4609 done:
4610 mov [iBit], edx
4611 }
4612# endif
4613 return iBit;
4614 }
4615 return -1;
4616}
4617#endif
4618
4619
4620/**
4621 * Finds the next set bit in a bitmap.
4622 *
4623 * @returns Index of the next set bit.
4624 * @returns -1 if no set bit was found.
4625 * @param pvBitmap Pointer to the bitmap.
4626 * @param cBits The number of bits in the bitmap. Multiple of 32.
4627 * @param iBitPrev The bit returned from the last search.
4628 * The search will start at iBitPrev + 1.
4629 */
4630#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4631DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4632#else
4633DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4634{
4635 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4636 int iBit = ++iBitPrev & 31;
4637 if (iBit)
4638 {
4639 /*
4640 * Inspect the 32-bit word containing the unaligned bit.
4641 */
4642 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
4643
4644# if RT_INLINE_ASM_USES_INTRIN
4645 unsigned long ulBit = 0;
4646 if (_BitScanForward(&ulBit, u32))
4647 return ulBit + iBitPrev;
4648# else
4649# if RT_INLINE_ASM_GNU_STYLE
4650 __asm__ __volatile__("bsf %1, %0\n\t"
4651 "jnz 1f\n\t"
4652 "movl $-1, %0\n\t"
4653 "1:\n\t"
4654 : "=r" (iBit)
4655 : "r" (u32));
4656# else
4657 __asm
4658 {
4659 mov edx, [u32]
4660 bsf eax, edx
4661 jnz done
4662 mov eax, 0ffffffffh
4663 done:
4664 mov [iBit], eax
4665 }
4666# endif
4667 if (iBit >= 0)
4668 return iBit + iBitPrev;
4669# endif
4670
4671 /*
4672 * Skip ahead and see if there is anything left to search.
4673 */
4674 iBitPrev |= 31;
4675 iBitPrev++;
4676 if (cBits <= (uint32_t)iBitPrev)
4677 return -1;
4678 }
4679
4680 /*
4681 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4682 */
4683 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4684 if (iBit >= 0)
4685 iBit += iBitPrev;
4686 return iBit;
4687}
4688#endif
4689
4690
4691/**
4692 * Finds the first bit which is set in the given 32-bit integer.
4693 * Bits are numbered from 1 (least significant) to 32.
4694 *
4695 * @returns index [1..32] of the first set bit.
4696 * @returns 0 if all bits are cleared.
4697 * @param u32 Integer to search for set bits.
4698 * @remark Similar to ffs() in BSD.
4699 */
4700#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4701DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
4702#else
4703DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4704{
4705# if RT_INLINE_ASM_USES_INTRIN
4706 unsigned long iBit;
4707 if (_BitScanForward(&iBit, u32))
4708 iBit++;
4709 else
4710 iBit = 0;
4711# elif RT_INLINE_ASM_GNU_STYLE
4712 uint32_t iBit;
4713 __asm__ __volatile__("bsf %1, %0\n\t"
4714 "jnz 1f\n\t"
4715 "xorl %0, %0\n\t"
4716 "jmp 2f\n"
4717 "1:\n\t"
4718 "incl %0\n"
4719 "2:\n\t"
4720 : "=r" (iBit)
4721 : "rm" (u32));
4722# else
4723 uint32_t iBit;
4724 _asm
4725 {
4726 bsf eax, [u32]
4727 jnz found
4728 xor eax, eax
4729 jmp done
4730 found:
4731 inc eax
4732 done:
4733 mov [iBit], eax
4734 }
4735# endif
4736 return iBit;
4737}
4738#endif
4739
4740
4741/**
4742 * Finds the first bit which is set in the given 32-bit integer.
4743 * Bits are numbered from 1 (least significant) to 32.
4744 *
4745 * @returns index [1..32] of the first set bit.
4746 * @returns 0 if all bits are cleared.
4747 * @param i32 Integer to search for set bits.
4748 * @remark Similar to ffs() in BSD.
4749 */
4750DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4751{
4752 return ASMBitFirstSetU32((uint32_t)i32);
4753}
4754
4755
4756/**
4757 * Finds the last bit which is set in the given 32-bit integer.
4758 * Bits are numbered from 1 (least significant) to 32.
4759 *
4760 * @returns index [1..32] of the last set bit.
4761 * @returns 0 if all bits are cleared.
4762 * @param u32 Integer to search for set bits.
4763 * @remark Similar to fls() in BSD.
4764 */
4765#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4766DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
4767#else
4768DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4769{
4770# if RT_INLINE_ASM_USES_INTRIN
4771 unsigned long iBit;
4772 if (_BitScanReverse(&iBit, u32))
4773 iBit++;
4774 else
4775 iBit = 0;
4776# elif RT_INLINE_ASM_GNU_STYLE
4777 uint32_t iBit;
4778 __asm__ __volatile__("bsrl %1, %0\n\t"
4779 "jnz 1f\n\t"
4780 "xorl %0, %0\n\t"
4781 "jmp 2f\n"
4782 "1:\n\t"
4783 "incl %0\n"
4784 "2:\n\t"
4785 : "=r" (iBit)
4786 : "rm" (u32));
4787# else
4788 uint32_t iBit;
4789 _asm
4790 {
4791 bsr eax, [u32]
4792 jnz found
4793 xor eax, eax
4794 jmp done
4795 found:
4796 inc eax
4797 done:
4798 mov [iBit], eax
4799 }
4800# endif
4801 return iBit;
4802}
4803#endif
4804
4805
4806/**
4807 * Finds the last bit which is set in the given 32-bit integer.
4808 * Bits are numbered from 1 (least significant) to 32.
4809 *
4810 * @returns index [1..32] of the last set bit.
4811 * @returns 0 if all bits are cleared.
4812 * @param i32 Integer to search for set bits.
4813 * @remark Similar to fls() in BSD.
4814 */
4815DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4816{
4817 return ASMBitLastSetU32((uint32_t)i32);
4818}
4819
4820/**
4821 * Reverse the byte order of the given 16-bit integer.
4822 *
4823 * @returns Revert
4824 * @param u16 16-bit integer value.
4825 */
4826#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4827DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
4828#else
4829DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
4830{
4831# if RT_INLINE_ASM_USES_INTRIN
4832 u16 = _byteswap_ushort(u16);
4833# elif RT_INLINE_ASM_GNU_STYLE
4834 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
4835# else
4836 _asm
4837 {
4838 mov ax, [u16]
4839 ror ax, 8
4840 mov [u16], ax
4841 }
4842# endif
4843 return u16;
4844}
4845#endif
4846
4847
4848/**
4849 * Reverse the byte order of the given 32-bit integer.
4850 *
4851 * @returns Revert
4852 * @param u32 32-bit integer value.
4853 */
4854#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4855DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
4856#else
4857DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4858{
4859# if RT_INLINE_ASM_USES_INTRIN
4860 u32 = _byteswap_ulong(u32);
4861# elif RT_INLINE_ASM_GNU_STYLE
4862 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4863# else
4864 _asm
4865 {
4866 mov eax, [u32]
4867 bswap eax
4868 mov [u32], eax
4869 }
4870# endif
4871 return u32;
4872}
4873#endif
4874
4875
4876/**
4877 * Reverse the byte order of the given 64-bit integer.
4878 *
4879 * @returns Revert
4880 * @param u64 64-bit integer value.
4881 */
4882DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
4883{
4884#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
4885 u64 = _byteswap_uint64(u64);
4886#else
4887 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
4888 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
4889#endif
4890 return u64;
4891}
4892
4893
4894/**
4895 * Rotate 32-bit unsigned value to the left by @a cShift.
4896 *
4897 * @returns Rotated value.
4898 * @param u32 The value to rotate.
4899 * @param cShift How many bits to rotate by.
4900 */
4901DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
4902{
4903#if RT_INLINE_ASM_USES_INTRIN
4904 return _rotl(u32, cShift);
4905#elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
4906 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
4907 return u32;
4908#else
4909 cShift &= 31;
4910 return (u32 << cShift) | (u32 >> (32 - cShift));
4911#endif
4912}
4913
4914
4915/**
4916 * Rotate 32-bit unsigned value to the right by @a cShift.
4917 *
4918 * @returns Rotated value.
4919 * @param u32 The value to rotate.
4920 * @param cShift How many bits to rotate by.
4921 */
4922DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
4923{
4924#if RT_INLINE_ASM_USES_INTRIN
4925 return _rotr(u32, cShift);
4926#elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
4927 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
4928 return u32;
4929#else
4930 cShift &= 31;
4931 return (u32 >> cShift) | (u32 << (32 - cShift));
4932#endif
4933}
4934
4935
4936/**
4937 * Rotate 64-bit unsigned value to the left by @a cShift.
4938 *
4939 * @returns Rotated value.
4940 * @param u64 The value to rotate.
4941 * @param cShift How many bits to rotate by.
4942 */
4943DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
4944{
4945#if RT_INLINE_ASM_USES_INTRIN
4946 return _rotl64(u64, cShift);
4947#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4948 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
4949 return u64;
4950#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
4951 uint32_t uSpill;
4952 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
4953 "jz 1f\n\t"
4954 "xchgl %%eax, %%edx\n\t"
4955 "1:\n\t"
4956 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
4957 "jz 2f\n\t"
4958 "movl %%edx, %2\n\t" /* save the hi value in %3. */
4959 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
4960 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
4961 "2:\n\t" /* } */
4962 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
4963 : "0" (u64),
4964 "1" (cShift));
4965 return u64;
4966#else
4967 cShift &= 63;
4968 return (u64 << cShift) | (u64 >> (64 - cShift));
4969#endif
4970}
4971
4972
4973/**
4974 * Rotate 64-bit unsigned value to the right by @a cShift.
4975 *
4976 * @returns Rotated value.
4977 * @param u64 The value to rotate.
4978 * @param cShift How many bits to rotate by.
4979 */
4980DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
4981{
4982#if RT_INLINE_ASM_USES_INTRIN
4983 return _rotr64(u64, cShift);
4984#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4985 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
4986 return u64;
4987#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
4988 uint32_t uSpill;
4989 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
4990 "jz 1f\n\t"
4991 "xchgl %%eax, %%edx\n\t"
4992 "1:\n\t"
4993 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
4994 "jz 2f\n\t"
4995 "movl %%edx, %2\n\t" /* save the hi value in %3. */
4996 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
4997 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
4998 "2:\n\t" /* } */
4999 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5000 : "0" (u64),
5001 "1" (cShift));
5002 return u64;
5003#else
5004 cShift &= 63;
5005 return (u64 >> cShift) | (u64 << (64 - cShift));
5006#endif
5007}
5008
5009/** @} */
5010
5011
5012/** @} */
5013
5014#endif
5015
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette