VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 55420

Last change on this file since 55420 was 54269, checked in by vboxsync, 10 years ago

asm.h: Don't call assmebly for ASMAtomicUoReadU64 on AMD64.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 142.5 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2012 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# pragma intrinsic(_rotl)
69# pragma intrinsic(_rotr)
70# pragma intrinsic(_rotl64)
71# pragma intrinsic(_rotr64)
72# ifdef RT_ARCH_AMD64
73# pragma intrinsic(__stosq)
74# pragma intrinsic(_byteswap_uint64)
75# pragma intrinsic(_InterlockedExchange64)
76# pragma intrinsic(_InterlockedExchangeAdd64)
77# pragma intrinsic(_InterlockedAnd64)
78# pragma intrinsic(_InterlockedOr64)
79# pragma intrinsic(_InterlockedIncrement64)
80# pragma intrinsic(_InterlockedDecrement64)
81# endif
82#endif
83
84
85/** @defgroup grp_rt_asm ASM - Assembly Routines
86 * @ingroup grp_rt
87 *
88 * @remarks The difference between ordered and unordered atomic operations are that
89 * the former will complete outstanding reads and writes before continuing
90 * while the latter doesn't make any promises about the order. Ordered
91 * operations doesn't, it seems, make any 100% promise wrt to whether
92 * the operation will complete before any subsequent memory access.
93 * (please, correct if wrong.)
94 *
95 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
96 * are unordered (note the Uo).
97 *
98 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
99 * or even optimize assembler instructions away. For instance, in the following code
100 * the second rdmsr instruction is optimized away because gcc treats that instruction
101 * as deterministic:
102 *
103 * @code
104 * static inline uint64_t rdmsr_low(int idx)
105 * {
106 * uint32_t low;
107 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
108 * }
109 * ...
110 * uint32_t msr1 = rdmsr_low(1);
111 * foo(msr1);
112 * msr1 = rdmsr_low(1);
113 * bar(msr1);
114 * @endcode
115 *
116 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
117 * use the result of the first call as input parameter for bar() as well. For rdmsr this
118 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
119 * machine status information in general.
120 *
121 * @{
122 */
123
124
125/** @def RT_INLINE_ASM_GCC_4_3_X_X86
126 * Used to work around some 4.3.x register allocation issues in this version of
127 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
128#ifdef __GNUC__
129# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
130#endif
131#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
132# define RT_INLINE_ASM_GCC_4_3_X_X86 0
133#endif
134
135/** @def RT_INLINE_DONT_USE_CMPXCHG8B
136 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
137 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
138 * mode, x86.
139 *
140 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
141 * when in PIC mode on x86.
142 */
143#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
144# ifdef DOXYGEN_RUNNING
145# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
146# else
147# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
148 ( (defined(PIC) || defined(__PIC__)) \
149 && defined(RT_ARCH_X86) \
150 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
151 || defined(RT_OS_DARWIN)) )
152# endif
153#endif
154
155
156/** @def ASMReturnAddress
157 * Gets the return address of the current (or calling if you like) function or method.
158 */
159#ifdef _MSC_VER
160# ifdef __cplusplus
161extern "C"
162# endif
163void * _ReturnAddress(void);
164# pragma intrinsic(_ReturnAddress)
165# define ASMReturnAddress() _ReturnAddress()
166#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
167# define ASMReturnAddress() __builtin_return_address(0)
168#else
169# error "Unsupported compiler."
170#endif
171
172
173/**
174 * Compiler memory barrier.
175 *
176 * Ensure that the compiler does not use any cached (register/tmp stack) memory
177 * values or any outstanding writes when returning from this function.
178 *
179 * This function must be used if non-volatile data is modified by a
180 * device or the VMM. Typical cases are port access, MMIO access,
181 * trapping instruction, etc.
182 */
183#if RT_INLINE_ASM_GNU_STYLE
184# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
185#elif RT_INLINE_ASM_USES_INTRIN
186# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
187#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
188DECLINLINE(void) ASMCompilerBarrier(void)
189{
190 __asm
191 {
192 }
193}
194#endif
195
196
197/** @def ASMBreakpoint
198 * Debugger Breakpoint.
199 * @deprecated Use RT_BREAKPOINT instead.
200 * @internal
201 */
202#define ASMBreakpoint() RT_BREAKPOINT()
203
204
205/**
206 * Spinloop hint for platforms that have these, empty function on the other
207 * platforms.
208 *
209 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
210 * spin locks.
211 */
212#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
213DECLASM(void) ASMNopPause(void);
214#else
215DECLINLINE(void) ASMNopPause(void)
216{
217# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
218# if RT_INLINE_ASM_GNU_STYLE
219 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
220# else
221 __asm {
222 _emit 0f3h
223 _emit 090h
224 }
225# endif
226# else
227 /* dummy */
228# endif
229}
230#endif
231
232
233/**
234 * Atomically Exchange an unsigned 8-bit value, ordered.
235 *
236 * @returns Current *pu8 value
237 * @param pu8 Pointer to the 8-bit variable to update.
238 * @param u8 The 8-bit value to assign to *pu8.
239 */
240#if RT_INLINE_ASM_EXTERNAL
241DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
242#else
243DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
244{
245# if RT_INLINE_ASM_GNU_STYLE
246 __asm__ __volatile__("xchgb %0, %1\n\t"
247 : "=m" (*pu8),
248 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
249 : "1" (u8),
250 "m" (*pu8));
251# else
252 __asm
253 {
254# ifdef RT_ARCH_AMD64
255 mov rdx, [pu8]
256 mov al, [u8]
257 xchg [rdx], al
258 mov [u8], al
259# else
260 mov edx, [pu8]
261 mov al, [u8]
262 xchg [edx], al
263 mov [u8], al
264# endif
265 }
266# endif
267 return u8;
268}
269#endif
270
271
272/**
273 * Atomically Exchange a signed 8-bit value, ordered.
274 *
275 * @returns Current *pu8 value
276 * @param pi8 Pointer to the 8-bit variable to update.
277 * @param i8 The 8-bit value to assign to *pi8.
278 */
279DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
280{
281 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
282}
283
284
285/**
286 * Atomically Exchange a bool value, ordered.
287 *
288 * @returns Current *pf value
289 * @param pf Pointer to the 8-bit variable to update.
290 * @param f The 8-bit value to assign to *pi8.
291 */
292DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
293{
294#ifdef _MSC_VER
295 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
296#else
297 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
298#endif
299}
300
301
302/**
303 * Atomically Exchange an unsigned 16-bit value, ordered.
304 *
305 * @returns Current *pu16 value
306 * @param pu16 Pointer to the 16-bit variable to update.
307 * @param u16 The 16-bit value to assign to *pu16.
308 */
309#if RT_INLINE_ASM_EXTERNAL
310DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
311#else
312DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
313{
314# if RT_INLINE_ASM_GNU_STYLE
315 __asm__ __volatile__("xchgw %0, %1\n\t"
316 : "=m" (*pu16),
317 "=r" (u16)
318 : "1" (u16),
319 "m" (*pu16));
320# else
321 __asm
322 {
323# ifdef RT_ARCH_AMD64
324 mov rdx, [pu16]
325 mov ax, [u16]
326 xchg [rdx], ax
327 mov [u16], ax
328# else
329 mov edx, [pu16]
330 mov ax, [u16]
331 xchg [edx], ax
332 mov [u16], ax
333# endif
334 }
335# endif
336 return u16;
337}
338#endif
339
340
341/**
342 * Atomically Exchange a signed 16-bit value, ordered.
343 *
344 * @returns Current *pu16 value
345 * @param pi16 Pointer to the 16-bit variable to update.
346 * @param i16 The 16-bit value to assign to *pi16.
347 */
348DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
349{
350 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
351}
352
353
354/**
355 * Atomically Exchange an unsigned 32-bit value, ordered.
356 *
357 * @returns Current *pu32 value
358 * @param pu32 Pointer to the 32-bit variable to update.
359 * @param u32 The 32-bit value to assign to *pu32.
360 */
361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
362DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
363#else
364DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
365{
366# if RT_INLINE_ASM_GNU_STYLE
367 __asm__ __volatile__("xchgl %0, %1\n\t"
368 : "=m" (*pu32),
369 "=r" (u32)
370 : "1" (u32),
371 "m" (*pu32));
372
373# elif RT_INLINE_ASM_USES_INTRIN
374 u32 = _InterlockedExchange((long *)pu32, u32);
375
376# else
377 __asm
378 {
379# ifdef RT_ARCH_AMD64
380 mov rdx, [pu32]
381 mov eax, u32
382 xchg [rdx], eax
383 mov [u32], eax
384# else
385 mov edx, [pu32]
386 mov eax, u32
387 xchg [edx], eax
388 mov [u32], eax
389# endif
390 }
391# endif
392 return u32;
393}
394#endif
395
396
397/**
398 * Atomically Exchange a signed 32-bit value, ordered.
399 *
400 * @returns Current *pu32 value
401 * @param pi32 Pointer to the 32-bit variable to update.
402 * @param i32 The 32-bit value to assign to *pi32.
403 */
404DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
405{
406 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
407}
408
409
410/**
411 * Atomically Exchange an unsigned 64-bit value, ordered.
412 *
413 * @returns Current *pu64 value
414 * @param pu64 Pointer to the 64-bit variable to update.
415 * @param u64 The 64-bit value to assign to *pu64.
416 */
417#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
418 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
419DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
420#else
421DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
422{
423# if defined(RT_ARCH_AMD64)
424# if RT_INLINE_ASM_USES_INTRIN
425 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
426
427# elif RT_INLINE_ASM_GNU_STYLE
428 __asm__ __volatile__("xchgq %0, %1\n\t"
429 : "=m" (*pu64),
430 "=r" (u64)
431 : "1" (u64),
432 "m" (*pu64));
433# else
434 __asm
435 {
436 mov rdx, [pu64]
437 mov rax, [u64]
438 xchg [rdx], rax
439 mov [u64], rax
440 }
441# endif
442# else /* !RT_ARCH_AMD64 */
443# if RT_INLINE_ASM_GNU_STYLE
444# if defined(PIC) || defined(__PIC__)
445 uint32_t u32EBX = (uint32_t)u64;
446 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
447 "xchgl %%ebx, %3\n\t"
448 "1:\n\t"
449 "lock; cmpxchg8b (%5)\n\t"
450 "jnz 1b\n\t"
451 "movl %3, %%ebx\n\t"
452 /*"xchgl %%esi, %5\n\t"*/
453 : "=A" (u64),
454 "=m" (*pu64)
455 : "0" (*pu64),
456 "m" ( u32EBX ),
457 "c" ( (uint32_t)(u64 >> 32) ),
458 "S" (pu64));
459# else /* !PIC */
460 __asm__ __volatile__("1:\n\t"
461 "lock; cmpxchg8b %1\n\t"
462 "jnz 1b\n\t"
463 : "=A" (u64),
464 "=m" (*pu64)
465 : "0" (*pu64),
466 "b" ( (uint32_t)u64 ),
467 "c" ( (uint32_t)(u64 >> 32) ));
468# endif
469# else
470 __asm
471 {
472 mov ebx, dword ptr [u64]
473 mov ecx, dword ptr [u64 + 4]
474 mov edi, pu64
475 mov eax, dword ptr [edi]
476 mov edx, dword ptr [edi + 4]
477 retry:
478 lock cmpxchg8b [edi]
479 jnz retry
480 mov dword ptr [u64], eax
481 mov dword ptr [u64 + 4], edx
482 }
483# endif
484# endif /* !RT_ARCH_AMD64 */
485 return u64;
486}
487#endif
488
489
490/**
491 * Atomically Exchange an signed 64-bit value, ordered.
492 *
493 * @returns Current *pi64 value
494 * @param pi64 Pointer to the 64-bit variable to update.
495 * @param i64 The 64-bit value to assign to *pi64.
496 */
497DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
498{
499 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
500}
501
502
503/**
504 * Atomically Exchange a pointer value, ordered.
505 *
506 * @returns Current *ppv value
507 * @param ppv Pointer to the pointer variable to update.
508 * @param pv The pointer value to assign to *ppv.
509 */
510DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
511{
512#if ARCH_BITS == 32
513 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
514#elif ARCH_BITS == 64
515 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
516#else
517# error "ARCH_BITS is bogus"
518#endif
519}
520
521
522/**
523 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
524 *
525 * @returns Current *pv value
526 * @param ppv Pointer to the pointer variable to update.
527 * @param pv The pointer value to assign to *ppv.
528 * @param Type The type of *ppv, sans volatile.
529 */
530#ifdef __GNUC__
531# define ASMAtomicXchgPtrT(ppv, pv, Type) \
532 __extension__ \
533 ({\
534 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
535 Type const pvTypeChecked = (pv); \
536 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
537 pvTypeCheckedRet; \
538 })
539#else
540# define ASMAtomicXchgPtrT(ppv, pv, Type) \
541 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
542#endif
543
544
545/**
546 * Atomically Exchange a raw-mode context pointer value, ordered.
547 *
548 * @returns Current *ppv value
549 * @param ppvRC Pointer to the pointer variable to update.
550 * @param pvRC The pointer value to assign to *ppv.
551 */
552DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
553{
554 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
555}
556
557
558/**
559 * Atomically Exchange a ring-0 pointer value, ordered.
560 *
561 * @returns Current *ppv value
562 * @param ppvR0 Pointer to the pointer variable to update.
563 * @param pvR0 The pointer value to assign to *ppv.
564 */
565DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
566{
567#if R0_ARCH_BITS == 32
568 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
569#elif R0_ARCH_BITS == 64
570 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
571#else
572# error "R0_ARCH_BITS is bogus"
573#endif
574}
575
576
577/**
578 * Atomically Exchange a ring-3 pointer value, ordered.
579 *
580 * @returns Current *ppv value
581 * @param ppvR3 Pointer to the pointer variable to update.
582 * @param pvR3 The pointer value to assign to *ppv.
583 */
584DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
585{
586#if R3_ARCH_BITS == 32
587 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
588#elif R3_ARCH_BITS == 64
589 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
590#else
591# error "R3_ARCH_BITS is bogus"
592#endif
593}
594
595
596/** @def ASMAtomicXchgHandle
597 * Atomically Exchange a typical IPRT handle value, ordered.
598 *
599 * @param ph Pointer to the value to update.
600 * @param hNew The new value to assigned to *pu.
601 * @param phRes Where to store the current *ph value.
602 *
603 * @remarks This doesn't currently work for all handles (like RTFILE).
604 */
605#if HC_ARCH_BITS == 32
606# define ASMAtomicXchgHandle(ph, hNew, phRes) \
607 do { \
608 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
609 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
610 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
611 } while (0)
612#elif HC_ARCH_BITS == 64
613# define ASMAtomicXchgHandle(ph, hNew, phRes) \
614 do { \
615 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
616 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
617 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
618 } while (0)
619#else
620# error HC_ARCH_BITS
621#endif
622
623
624/**
625 * Atomically Exchange a value which size might differ
626 * between platforms or compilers, ordered.
627 *
628 * @param pu Pointer to the variable to update.
629 * @param uNew The value to assign to *pu.
630 * @todo This is busted as its missing the result argument.
631 */
632#define ASMAtomicXchgSize(pu, uNew) \
633 do { \
634 switch (sizeof(*(pu))) { \
635 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
636 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
637 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
638 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
639 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
640 } \
641 } while (0)
642
643/**
644 * Atomically Exchange a value which size might differ
645 * between platforms or compilers, ordered.
646 *
647 * @param pu Pointer to the variable to update.
648 * @param uNew The value to assign to *pu.
649 * @param puRes Where to store the current *pu value.
650 */
651#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
652 do { \
653 switch (sizeof(*(pu))) { \
654 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
655 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
656 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
657 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
658 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
659 } \
660 } while (0)
661
662
663
664/**
665 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
666 *
667 * @returns true if xchg was done.
668 * @returns false if xchg wasn't done.
669 *
670 * @param pu8 Pointer to the value to update.
671 * @param u8New The new value to assigned to *pu8.
672 * @param u8Old The old value to *pu8 compare with.
673 */
674#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
675DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
676#else
677DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
678{
679 uint8_t u8Ret;
680 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
681 "setz %1\n\t"
682 : "=m" (*pu8),
683 "=qm" (u8Ret),
684 "=a" (u8Old)
685 : "q" (u8New),
686 "2" (u8Old),
687 "m" (*pu8));
688 return (bool)u8Ret;
689}
690#endif
691
692
693/**
694 * Atomically Compare and Exchange a signed 8-bit value, ordered.
695 *
696 * @returns true if xchg was done.
697 * @returns false if xchg wasn't done.
698 *
699 * @param pi8 Pointer to the value to update.
700 * @param i8New The new value to assigned to *pi8.
701 * @param i8Old The old value to *pi8 compare with.
702 */
703DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
704{
705 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
706}
707
708
709/**
710 * Atomically Compare and Exchange a bool value, ordered.
711 *
712 * @returns true if xchg was done.
713 * @returns false if xchg wasn't done.
714 *
715 * @param pf Pointer to the value to update.
716 * @param fNew The new value to assigned to *pf.
717 * @param fOld The old value to *pf compare with.
718 */
719DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
720{
721 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
722}
723
724
725/**
726 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
727 *
728 * @returns true if xchg was done.
729 * @returns false if xchg wasn't done.
730 *
731 * @param pu32 Pointer to the value to update.
732 * @param u32New The new value to assigned to *pu32.
733 * @param u32Old The old value to *pu32 compare with.
734 */
735#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
736DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
737#else
738DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
739{
740# if RT_INLINE_ASM_GNU_STYLE
741 uint8_t u8Ret;
742 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
743 "setz %1\n\t"
744 : "=m" (*pu32),
745 "=qm" (u8Ret),
746 "=a" (u32Old)
747 : "r" (u32New),
748 "2" (u32Old),
749 "m" (*pu32));
750 return (bool)u8Ret;
751
752# elif RT_INLINE_ASM_USES_INTRIN
753 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
754
755# else
756 uint32_t u32Ret;
757 __asm
758 {
759# ifdef RT_ARCH_AMD64
760 mov rdx, [pu32]
761# else
762 mov edx, [pu32]
763# endif
764 mov eax, [u32Old]
765 mov ecx, [u32New]
766# ifdef RT_ARCH_AMD64
767 lock cmpxchg [rdx], ecx
768# else
769 lock cmpxchg [edx], ecx
770# endif
771 setz al
772 movzx eax, al
773 mov [u32Ret], eax
774 }
775 return !!u32Ret;
776# endif
777}
778#endif
779
780
781/**
782 * Atomically Compare and Exchange a signed 32-bit value, ordered.
783 *
784 * @returns true if xchg was done.
785 * @returns false if xchg wasn't done.
786 *
787 * @param pi32 Pointer to the value to update.
788 * @param i32New The new value to assigned to *pi32.
789 * @param i32Old The old value to *pi32 compare with.
790 */
791DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
792{
793 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
794}
795
796
797/**
798 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
799 *
800 * @returns true if xchg was done.
801 * @returns false if xchg wasn't done.
802 *
803 * @param pu64 Pointer to the 64-bit variable to update.
804 * @param u64New The 64-bit value to assign to *pu64.
805 * @param u64Old The value to compare with.
806 */
807#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
808 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
809DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
810#else
811DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
812{
813# if RT_INLINE_ASM_USES_INTRIN
814 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
815
816# elif defined(RT_ARCH_AMD64)
817# if RT_INLINE_ASM_GNU_STYLE
818 uint8_t u8Ret;
819 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
820 "setz %1\n\t"
821 : "=m" (*pu64),
822 "=qm" (u8Ret),
823 "=a" (u64Old)
824 : "r" (u64New),
825 "2" (u64Old),
826 "m" (*pu64));
827 return (bool)u8Ret;
828# else
829 bool fRet;
830 __asm
831 {
832 mov rdx, [pu32]
833 mov rax, [u64Old]
834 mov rcx, [u64New]
835 lock cmpxchg [rdx], rcx
836 setz al
837 mov [fRet], al
838 }
839 return fRet;
840# endif
841# else /* !RT_ARCH_AMD64 */
842 uint32_t u32Ret;
843# if RT_INLINE_ASM_GNU_STYLE
844# if defined(PIC) || defined(__PIC__)
845 uint32_t u32EBX = (uint32_t)u64New;
846 uint32_t u32Spill;
847 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
848 "lock; cmpxchg8b (%6)\n\t"
849 "setz %%al\n\t"
850 "movl %4, %%ebx\n\t"
851 "movzbl %%al, %%eax\n\t"
852 : "=a" (u32Ret),
853 "=d" (u32Spill),
854# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
855 "+m" (*pu64)
856# else
857 "=m" (*pu64)
858# endif
859 : "A" (u64Old),
860 "m" ( u32EBX ),
861 "c" ( (uint32_t)(u64New >> 32) ),
862 "S" (pu64));
863# else /* !PIC */
864 uint32_t u32Spill;
865 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
866 "setz %%al\n\t"
867 "movzbl %%al, %%eax\n\t"
868 : "=a" (u32Ret),
869 "=d" (u32Spill),
870 "+m" (*pu64)
871 : "A" (u64Old),
872 "b" ( (uint32_t)u64New ),
873 "c" ( (uint32_t)(u64New >> 32) ));
874# endif
875 return (bool)u32Ret;
876# else
877 __asm
878 {
879 mov ebx, dword ptr [u64New]
880 mov ecx, dword ptr [u64New + 4]
881 mov edi, [pu64]
882 mov eax, dword ptr [u64Old]
883 mov edx, dword ptr [u64Old + 4]
884 lock cmpxchg8b [edi]
885 setz al
886 movzx eax, al
887 mov dword ptr [u32Ret], eax
888 }
889 return !!u32Ret;
890# endif
891# endif /* !RT_ARCH_AMD64 */
892}
893#endif
894
895
896/**
897 * Atomically Compare and exchange a signed 64-bit value, ordered.
898 *
899 * @returns true if xchg was done.
900 * @returns false if xchg wasn't done.
901 *
902 * @param pi64 Pointer to the 64-bit variable to update.
903 * @param i64 The 64-bit value to assign to *pu64.
904 * @param i64Old The value to compare with.
905 */
906DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
907{
908 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
909}
910
911
912/**
913 * Atomically Compare and Exchange a pointer value, ordered.
914 *
915 * @returns true if xchg was done.
916 * @returns false if xchg wasn't done.
917 *
918 * @param ppv Pointer to the value to update.
919 * @param pvNew The new value to assigned to *ppv.
920 * @param pvOld The old value to *ppv compare with.
921 */
922DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
923{
924#if ARCH_BITS == 32
925 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
926#elif ARCH_BITS == 64
927 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
928#else
929# error "ARCH_BITS is bogus"
930#endif
931}
932
933
934/**
935 * Atomically Compare and Exchange a pointer value, ordered.
936 *
937 * @returns true if xchg was done.
938 * @returns false if xchg wasn't done.
939 *
940 * @param ppv Pointer to the value to update.
941 * @param pvNew The new value to assigned to *ppv.
942 * @param pvOld The old value to *ppv compare with.
943 *
944 * @remarks This is relatively type safe on GCC platforms.
945 */
946#ifdef __GNUC__
947# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
948 __extension__ \
949 ({\
950 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
951 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
952 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
953 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
954 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
955 fMacroRet; \
956 })
957#else
958# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
959 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
960#endif
961
962
963/** @def ASMAtomicCmpXchgHandle
964 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
965 *
966 * @param ph Pointer to the value to update.
967 * @param hNew The new value to assigned to *pu.
968 * @param hOld The old value to *pu compare with.
969 * @param fRc Where to store the result.
970 *
971 * @remarks This doesn't currently work for all handles (like RTFILE).
972 */
973#if HC_ARCH_BITS == 32
974# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
975 do { \
976 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
977 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
978 } while (0)
979#elif HC_ARCH_BITS == 64
980# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
981 do { \
982 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
983 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
984 } while (0)
985#else
986# error HC_ARCH_BITS
987#endif
988
989
990/** @def ASMAtomicCmpXchgSize
991 * Atomically Compare and Exchange a value which size might differ
992 * between platforms or compilers, ordered.
993 *
994 * @param pu Pointer to the value to update.
995 * @param uNew The new value to assigned to *pu.
996 * @param uOld The old value to *pu compare with.
997 * @param fRc Where to store the result.
998 */
999#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1000 do { \
1001 switch (sizeof(*(pu))) { \
1002 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1003 break; \
1004 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1005 break; \
1006 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1007 (fRc) = false; \
1008 break; \
1009 } \
1010 } while (0)
1011
1012
1013/**
1014 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1015 * passes back old value, ordered.
1016 *
1017 * @returns true if xchg was done.
1018 * @returns false if xchg wasn't done.
1019 *
1020 * @param pu32 Pointer to the value to update.
1021 * @param u32New The new value to assigned to *pu32.
1022 * @param u32Old The old value to *pu32 compare with.
1023 * @param pu32Old Pointer store the old value at.
1024 */
1025#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1026DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1027#else
1028DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1029{
1030# if RT_INLINE_ASM_GNU_STYLE
1031 uint8_t u8Ret;
1032 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1033 "setz %1\n\t"
1034 : "=m" (*pu32),
1035 "=qm" (u8Ret),
1036 "=a" (*pu32Old)
1037 : "r" (u32New),
1038 "a" (u32Old),
1039 "m" (*pu32));
1040 return (bool)u8Ret;
1041
1042# elif RT_INLINE_ASM_USES_INTRIN
1043 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1044
1045# else
1046 uint32_t u32Ret;
1047 __asm
1048 {
1049# ifdef RT_ARCH_AMD64
1050 mov rdx, [pu32]
1051# else
1052 mov edx, [pu32]
1053# endif
1054 mov eax, [u32Old]
1055 mov ecx, [u32New]
1056# ifdef RT_ARCH_AMD64
1057 lock cmpxchg [rdx], ecx
1058 mov rdx, [pu32Old]
1059 mov [rdx], eax
1060# else
1061 lock cmpxchg [edx], ecx
1062 mov edx, [pu32Old]
1063 mov [edx], eax
1064# endif
1065 setz al
1066 movzx eax, al
1067 mov [u32Ret], eax
1068 }
1069 return !!u32Ret;
1070# endif
1071}
1072#endif
1073
1074
1075/**
1076 * Atomically Compare and Exchange a signed 32-bit value, additionally
1077 * passes back old value, ordered.
1078 *
1079 * @returns true if xchg was done.
1080 * @returns false if xchg wasn't done.
1081 *
1082 * @param pi32 Pointer to the value to update.
1083 * @param i32New The new value to assigned to *pi32.
1084 * @param i32Old The old value to *pi32 compare with.
1085 * @param pi32Old Pointer store the old value at.
1086 */
1087DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1088{
1089 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1090}
1091
1092
1093/**
1094 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1095 * passing back old value, ordered.
1096 *
1097 * @returns true if xchg was done.
1098 * @returns false if xchg wasn't done.
1099 *
1100 * @param pu64 Pointer to the 64-bit variable to update.
1101 * @param u64New The 64-bit value to assign to *pu64.
1102 * @param u64Old The value to compare with.
1103 * @param pu64Old Pointer store the old value at.
1104 */
1105#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1106 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1107DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1108#else
1109DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1110{
1111# if RT_INLINE_ASM_USES_INTRIN
1112 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1113
1114# elif defined(RT_ARCH_AMD64)
1115# if RT_INLINE_ASM_GNU_STYLE
1116 uint8_t u8Ret;
1117 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1118 "setz %1\n\t"
1119 : "=m" (*pu64),
1120 "=qm" (u8Ret),
1121 "=a" (*pu64Old)
1122 : "r" (u64New),
1123 "a" (u64Old),
1124 "m" (*pu64));
1125 return (bool)u8Ret;
1126# else
1127 bool fRet;
1128 __asm
1129 {
1130 mov rdx, [pu32]
1131 mov rax, [u64Old]
1132 mov rcx, [u64New]
1133 lock cmpxchg [rdx], rcx
1134 mov rdx, [pu64Old]
1135 mov [rdx], rax
1136 setz al
1137 mov [fRet], al
1138 }
1139 return fRet;
1140# endif
1141# else /* !RT_ARCH_AMD64 */
1142# if RT_INLINE_ASM_GNU_STYLE
1143 uint64_t u64Ret;
1144# if defined(PIC) || defined(__PIC__)
1145 /* NB: this code uses a memory clobber description, because the clean
1146 * solution with an output value for *pu64 makes gcc run out of registers.
1147 * This will cause suboptimal code, and anyone with a better solution is
1148 * welcome to improve this. */
1149 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1150 "lock; cmpxchg8b %3\n\t"
1151 "xchgl %%ebx, %1\n\t"
1152 : "=A" (u64Ret)
1153 : "DS" ((uint32_t)u64New),
1154 "c" ((uint32_t)(u64New >> 32)),
1155 "m" (*pu64),
1156 "0" (u64Old)
1157 : "memory" );
1158# else /* !PIC */
1159 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1160 : "=A" (u64Ret),
1161 "=m" (*pu64)
1162 : "b" ((uint32_t)u64New),
1163 "c" ((uint32_t)(u64New >> 32)),
1164 "m" (*pu64),
1165 "0" (u64Old));
1166# endif
1167 *pu64Old = u64Ret;
1168 return u64Ret == u64Old;
1169# else
1170 uint32_t u32Ret;
1171 __asm
1172 {
1173 mov ebx, dword ptr [u64New]
1174 mov ecx, dword ptr [u64New + 4]
1175 mov edi, [pu64]
1176 mov eax, dword ptr [u64Old]
1177 mov edx, dword ptr [u64Old + 4]
1178 lock cmpxchg8b [edi]
1179 mov ebx, [pu64Old]
1180 mov [ebx], eax
1181 setz al
1182 movzx eax, al
1183 add ebx, 4
1184 mov [ebx], edx
1185 mov dword ptr [u32Ret], eax
1186 }
1187 return !!u32Ret;
1188# endif
1189# endif /* !RT_ARCH_AMD64 */
1190}
1191#endif
1192
1193
1194/**
1195 * Atomically Compare and exchange a signed 64-bit value, additionally
1196 * passing back old value, ordered.
1197 *
1198 * @returns true if xchg was done.
1199 * @returns false if xchg wasn't done.
1200 *
1201 * @param pi64 Pointer to the 64-bit variable to update.
1202 * @param i64 The 64-bit value to assign to *pu64.
1203 * @param i64Old The value to compare with.
1204 * @param pi64Old Pointer store the old value at.
1205 */
1206DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1207{
1208 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1209}
1210
1211/** @def ASMAtomicCmpXchgExHandle
1212 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1213 *
1214 * @param ph Pointer to the value to update.
1215 * @param hNew The new value to assigned to *pu.
1216 * @param hOld The old value to *pu compare with.
1217 * @param fRc Where to store the result.
1218 * @param phOldVal Pointer to where to store the old value.
1219 *
1220 * @remarks This doesn't currently work for all handles (like RTFILE).
1221 */
1222#if HC_ARCH_BITS == 32
1223# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1224 do { \
1225 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1226 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1227 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1228 } while (0)
1229#elif HC_ARCH_BITS == 64
1230# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1231 do { \
1232 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1233 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1234 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1235 } while (0)
1236#else
1237# error HC_ARCH_BITS
1238#endif
1239
1240
1241/** @def ASMAtomicCmpXchgExSize
1242 * Atomically Compare and Exchange a value which size might differ
1243 * between platforms or compilers. Additionally passes back old value.
1244 *
1245 * @param pu Pointer to the value to update.
1246 * @param uNew The new value to assigned to *pu.
1247 * @param uOld The old value to *pu compare with.
1248 * @param fRc Where to store the result.
1249 * @param puOldVal Pointer to where to store the old value.
1250 */
1251#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1252 do { \
1253 switch (sizeof(*(pu))) { \
1254 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1255 break; \
1256 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1257 break; \
1258 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1259 (fRc) = false; \
1260 (uOldVal) = 0; \
1261 break; \
1262 } \
1263 } while (0)
1264
1265
1266/**
1267 * Atomically Compare and Exchange a pointer value, additionally
1268 * passing back old value, ordered.
1269 *
1270 * @returns true if xchg was done.
1271 * @returns false if xchg wasn't done.
1272 *
1273 * @param ppv Pointer to the value to update.
1274 * @param pvNew The new value to assigned to *ppv.
1275 * @param pvOld The old value to *ppv compare with.
1276 * @param ppvOld Pointer store the old value at.
1277 */
1278DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1279{
1280#if ARCH_BITS == 32
1281 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1282#elif ARCH_BITS == 64
1283 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1284#else
1285# error "ARCH_BITS is bogus"
1286#endif
1287}
1288
1289
1290/**
1291 * Atomically Compare and Exchange a pointer value, additionally
1292 * passing back old value, ordered.
1293 *
1294 * @returns true if xchg was done.
1295 * @returns false if xchg wasn't done.
1296 *
1297 * @param ppv Pointer to the value to update.
1298 * @param pvNew The new value to assigned to *ppv.
1299 * @param pvOld The old value to *ppv compare with.
1300 * @param ppvOld Pointer store the old value at.
1301 *
1302 * @remarks This is relatively type safe on GCC platforms.
1303 */
1304#ifdef __GNUC__
1305# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1306 __extension__ \
1307 ({\
1308 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1309 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1310 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1311 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1312 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1313 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1314 (void **)ppvOldTypeChecked); \
1315 fMacroRet; \
1316 })
1317#else
1318# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1319 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1320#endif
1321
1322
1323/**
1324 * Serialize Instruction.
1325 */
1326#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1327DECLASM(void) ASMSerializeInstruction(void);
1328#else
1329DECLINLINE(void) ASMSerializeInstruction(void)
1330{
1331# if RT_INLINE_ASM_GNU_STYLE
1332 RTCCUINTREG xAX = 0;
1333# ifdef RT_ARCH_AMD64
1334 __asm__ ("cpuid"
1335 : "=a" (xAX)
1336 : "0" (xAX)
1337 : "rbx", "rcx", "rdx");
1338# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1339 __asm__ ("push %%ebx\n\t"
1340 "cpuid\n\t"
1341 "pop %%ebx\n\t"
1342 : "=a" (xAX)
1343 : "0" (xAX)
1344 : "ecx", "edx");
1345# else
1346 __asm__ ("cpuid"
1347 : "=a" (xAX)
1348 : "0" (xAX)
1349 : "ebx", "ecx", "edx");
1350# endif
1351
1352# elif RT_INLINE_ASM_USES_INTRIN
1353 int aInfo[4];
1354 __cpuid(aInfo, 0);
1355
1356# else
1357 __asm
1358 {
1359 push ebx
1360 xor eax, eax
1361 cpuid
1362 pop ebx
1363 }
1364# endif
1365}
1366#endif
1367
1368
1369/**
1370 * Memory fence, waits for any pending writes and reads to complete.
1371 */
1372DECLINLINE(void) ASMMemoryFence(void)
1373{
1374 /** @todo use mfence? check if all cpus we care for support it. */
1375 uint32_t volatile u32;
1376 ASMAtomicXchgU32(&u32, 0);
1377}
1378
1379
1380/**
1381 * Write fence, waits for any pending writes to complete.
1382 */
1383DECLINLINE(void) ASMWriteFence(void)
1384{
1385 /** @todo use sfence? check if all cpus we care for support it. */
1386 ASMMemoryFence();
1387}
1388
1389
1390/**
1391 * Read fence, waits for any pending reads to complete.
1392 */
1393DECLINLINE(void) ASMReadFence(void)
1394{
1395 /** @todo use lfence? check if all cpus we care for support it. */
1396 ASMMemoryFence();
1397}
1398
1399
1400/**
1401 * Atomically reads an unsigned 8-bit value, ordered.
1402 *
1403 * @returns Current *pu8 value
1404 * @param pu8 Pointer to the 8-bit variable to read.
1405 */
1406DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1407{
1408 ASMMemoryFence();
1409 return *pu8; /* byte reads are atomic on x86 */
1410}
1411
1412
1413/**
1414 * Atomically reads an unsigned 8-bit value, unordered.
1415 *
1416 * @returns Current *pu8 value
1417 * @param pu8 Pointer to the 8-bit variable to read.
1418 */
1419DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1420{
1421 return *pu8; /* byte reads are atomic on x86 */
1422}
1423
1424
1425/**
1426 * Atomically reads a signed 8-bit value, ordered.
1427 *
1428 * @returns Current *pi8 value
1429 * @param pi8 Pointer to the 8-bit variable to read.
1430 */
1431DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1432{
1433 ASMMemoryFence();
1434 return *pi8; /* byte reads are atomic on x86 */
1435}
1436
1437
1438/**
1439 * Atomically reads a signed 8-bit value, unordered.
1440 *
1441 * @returns Current *pi8 value
1442 * @param pi8 Pointer to the 8-bit variable to read.
1443 */
1444DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1445{
1446 return *pi8; /* byte reads are atomic on x86 */
1447}
1448
1449
1450/**
1451 * Atomically reads an unsigned 16-bit value, ordered.
1452 *
1453 * @returns Current *pu16 value
1454 * @param pu16 Pointer to the 16-bit variable to read.
1455 */
1456DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1457{
1458 ASMMemoryFence();
1459 Assert(!((uintptr_t)pu16 & 1));
1460 return *pu16;
1461}
1462
1463
1464/**
1465 * Atomically reads an unsigned 16-bit value, unordered.
1466 *
1467 * @returns Current *pu16 value
1468 * @param pu16 Pointer to the 16-bit variable to read.
1469 */
1470DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1471{
1472 Assert(!((uintptr_t)pu16 & 1));
1473 return *pu16;
1474}
1475
1476
1477/**
1478 * Atomically reads a signed 16-bit value, ordered.
1479 *
1480 * @returns Current *pi16 value
1481 * @param pi16 Pointer to the 16-bit variable to read.
1482 */
1483DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1484{
1485 ASMMemoryFence();
1486 Assert(!((uintptr_t)pi16 & 1));
1487 return *pi16;
1488}
1489
1490
1491/**
1492 * Atomically reads a signed 16-bit value, unordered.
1493 *
1494 * @returns Current *pi16 value
1495 * @param pi16 Pointer to the 16-bit variable to read.
1496 */
1497DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1498{
1499 Assert(!((uintptr_t)pi16 & 1));
1500 return *pi16;
1501}
1502
1503
1504/**
1505 * Atomically reads an unsigned 32-bit value, ordered.
1506 *
1507 * @returns Current *pu32 value
1508 * @param pu32 Pointer to the 32-bit variable to read.
1509 */
1510DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1511{
1512 ASMMemoryFence();
1513 Assert(!((uintptr_t)pu32 & 3));
1514 return *pu32;
1515}
1516
1517
1518/**
1519 * Atomically reads an unsigned 32-bit value, unordered.
1520 *
1521 * @returns Current *pu32 value
1522 * @param pu32 Pointer to the 32-bit variable to read.
1523 */
1524DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1525{
1526 Assert(!((uintptr_t)pu32 & 3));
1527 return *pu32;
1528}
1529
1530
1531/**
1532 * Atomically reads a signed 32-bit value, ordered.
1533 *
1534 * @returns Current *pi32 value
1535 * @param pi32 Pointer to the 32-bit variable to read.
1536 */
1537DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1538{
1539 ASMMemoryFence();
1540 Assert(!((uintptr_t)pi32 & 3));
1541 return *pi32;
1542}
1543
1544
1545/**
1546 * Atomically reads a signed 32-bit value, unordered.
1547 *
1548 * @returns Current *pi32 value
1549 * @param pi32 Pointer to the 32-bit variable to read.
1550 */
1551DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1552{
1553 Assert(!((uintptr_t)pi32 & 3));
1554 return *pi32;
1555}
1556
1557
1558/**
1559 * Atomically reads an unsigned 64-bit value, ordered.
1560 *
1561 * @returns Current *pu64 value
1562 * @param pu64 Pointer to the 64-bit variable to read.
1563 * The memory pointed to must be writable.
1564 * @remark This will fault if the memory is read-only!
1565 */
1566#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1567 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1568DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1569#else
1570DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1571{
1572 uint64_t u64;
1573# ifdef RT_ARCH_AMD64
1574 Assert(!((uintptr_t)pu64 & 7));
1575/*# if RT_INLINE_ASM_GNU_STYLE
1576 __asm__ __volatile__( "mfence\n\t"
1577 "movq %1, %0\n\t"
1578 : "=r" (u64)
1579 : "m" (*pu64));
1580# else
1581 __asm
1582 {
1583 mfence
1584 mov rdx, [pu64]
1585 mov rax, [rdx]
1586 mov [u64], rax
1587 }
1588# endif*/
1589 ASMMemoryFence();
1590 u64 = *pu64;
1591# else /* !RT_ARCH_AMD64 */
1592# if RT_INLINE_ASM_GNU_STYLE
1593# if defined(PIC) || defined(__PIC__)
1594 uint32_t u32EBX = 0;
1595 Assert(!((uintptr_t)pu64 & 7));
1596 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1597 "lock; cmpxchg8b (%5)\n\t"
1598 "movl %3, %%ebx\n\t"
1599 : "=A" (u64),
1600# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1601 "+m" (*pu64)
1602# else
1603 "=m" (*pu64)
1604# endif
1605 : "0" (0ULL),
1606 "m" (u32EBX),
1607 "c" (0),
1608 "S" (pu64));
1609# else /* !PIC */
1610 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1611 : "=A" (u64),
1612 "+m" (*pu64)
1613 : "0" (0ULL),
1614 "b" (0),
1615 "c" (0));
1616# endif
1617# else
1618 Assert(!((uintptr_t)pu64 & 7));
1619 __asm
1620 {
1621 xor eax, eax
1622 xor edx, edx
1623 mov edi, pu64
1624 xor ecx, ecx
1625 xor ebx, ebx
1626 lock cmpxchg8b [edi]
1627 mov dword ptr [u64], eax
1628 mov dword ptr [u64 + 4], edx
1629 }
1630# endif
1631# endif /* !RT_ARCH_AMD64 */
1632 return u64;
1633}
1634#endif
1635
1636
1637/**
1638 * Atomically reads an unsigned 64-bit value, unordered.
1639 *
1640 * @returns Current *pu64 value
1641 * @param pu64 Pointer to the 64-bit variable to read.
1642 * The memory pointed to must be writable.
1643 * @remark This will fault if the memory is read-only!
1644 */
1645#if !defined(RT_ARCH_AMD64) \
1646 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1647 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1648DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1649#else
1650DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1651{
1652 uint64_t u64;
1653# ifdef RT_ARCH_AMD64
1654 Assert(!((uintptr_t)pu64 & 7));
1655/*# if RT_INLINE_ASM_GNU_STYLE
1656 Assert(!((uintptr_t)pu64 & 7));
1657 __asm__ __volatile__("movq %1, %0\n\t"
1658 : "=r" (u64)
1659 : "m" (*pu64));
1660# else
1661 __asm
1662 {
1663 mov rdx, [pu64]
1664 mov rax, [rdx]
1665 mov [u64], rax
1666 }
1667# endif */
1668 u64 = *pu64;
1669# else /* !RT_ARCH_AMD64 */
1670# if RT_INLINE_ASM_GNU_STYLE
1671# if defined(PIC) || defined(__PIC__)
1672 uint32_t u32EBX = 0;
1673 uint32_t u32Spill;
1674 Assert(!((uintptr_t)pu64 & 7));
1675 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1676 "xor %%ecx,%%ecx\n\t"
1677 "xor %%edx,%%edx\n\t"
1678 "xchgl %%ebx, %3\n\t"
1679 "lock; cmpxchg8b (%4)\n\t"
1680 "movl %3, %%ebx\n\t"
1681 : "=A" (u64),
1682# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1683 "+m" (*pu64),
1684# else
1685 "=m" (*pu64),
1686# endif
1687 "=c" (u32Spill)
1688 : "m" (u32EBX),
1689 "S" (pu64));
1690# else /* !PIC */
1691 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1692 : "=A" (u64),
1693 "+m" (*pu64)
1694 : "0" (0ULL),
1695 "b" (0),
1696 "c" (0));
1697# endif
1698# else
1699 Assert(!((uintptr_t)pu64 & 7));
1700 __asm
1701 {
1702 xor eax, eax
1703 xor edx, edx
1704 mov edi, pu64
1705 xor ecx, ecx
1706 xor ebx, ebx
1707 lock cmpxchg8b [edi]
1708 mov dword ptr [u64], eax
1709 mov dword ptr [u64 + 4], edx
1710 }
1711# endif
1712# endif /* !RT_ARCH_AMD64 */
1713 return u64;
1714}
1715#endif
1716
1717
1718/**
1719 * Atomically reads a signed 64-bit value, ordered.
1720 *
1721 * @returns Current *pi64 value
1722 * @param pi64 Pointer to the 64-bit variable to read.
1723 * The memory pointed to must be writable.
1724 * @remark This will fault if the memory is read-only!
1725 */
1726DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1727{
1728 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1729}
1730
1731
1732/**
1733 * Atomically reads a signed 64-bit value, unordered.
1734 *
1735 * @returns Current *pi64 value
1736 * @param pi64 Pointer to the 64-bit variable to read.
1737 * The memory pointed to must be writable.
1738 * @remark This will fault if the memory is read-only!
1739 */
1740DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1741{
1742 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1743}
1744
1745
1746/**
1747 * Atomically reads a size_t value, ordered.
1748 *
1749 * @returns Current *pcb value
1750 * @param pcb Pointer to the size_t variable to read.
1751 */
1752DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1753{
1754#if ARCH_BITS == 64
1755 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1756#elif ARCH_BITS == 32
1757 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1758#else
1759# error "Unsupported ARCH_BITS value"
1760#endif
1761}
1762
1763
1764/**
1765 * Atomically reads a size_t value, unordered.
1766 *
1767 * @returns Current *pcb value
1768 * @param pcb Pointer to the size_t variable to read.
1769 */
1770DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1771{
1772#if ARCH_BITS == 64
1773 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1774#elif ARCH_BITS == 32
1775 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1776#else
1777# error "Unsupported ARCH_BITS value"
1778#endif
1779}
1780
1781
1782/**
1783 * Atomically reads a pointer value, ordered.
1784 *
1785 * @returns Current *pv value
1786 * @param ppv Pointer to the pointer variable to read.
1787 *
1788 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1789 * requires less typing (no casts).
1790 */
1791DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1792{
1793#if ARCH_BITS == 32
1794 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1795#elif ARCH_BITS == 64
1796 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1797#else
1798# error "ARCH_BITS is bogus"
1799#endif
1800}
1801
1802/**
1803 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1804 *
1805 * @returns Current *pv value
1806 * @param ppv Pointer to the pointer variable to read.
1807 * @param Type The type of *ppv, sans volatile.
1808 */
1809#ifdef __GNUC__
1810# define ASMAtomicReadPtrT(ppv, Type) \
1811 __extension__ \
1812 ({\
1813 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1814 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1815 pvTypeChecked; \
1816 })
1817#else
1818# define ASMAtomicReadPtrT(ppv, Type) \
1819 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1820#endif
1821
1822
1823/**
1824 * Atomically reads a pointer value, unordered.
1825 *
1826 * @returns Current *pv value
1827 * @param ppv Pointer to the pointer variable to read.
1828 *
1829 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1830 * requires less typing (no casts).
1831 */
1832DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1833{
1834#if ARCH_BITS == 32
1835 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1836#elif ARCH_BITS == 64
1837 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1838#else
1839# error "ARCH_BITS is bogus"
1840#endif
1841}
1842
1843
1844/**
1845 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
1846 *
1847 * @returns Current *pv value
1848 * @param ppv Pointer to the pointer variable to read.
1849 * @param Type The type of *ppv, sans volatile.
1850 */
1851#ifdef __GNUC__
1852# define ASMAtomicUoReadPtrT(ppv, Type) \
1853 __extension__ \
1854 ({\
1855 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1856 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
1857 pvTypeChecked; \
1858 })
1859#else
1860# define ASMAtomicUoReadPtrT(ppv, Type) \
1861 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
1862#endif
1863
1864
1865/**
1866 * Atomically reads a boolean value, ordered.
1867 *
1868 * @returns Current *pf value
1869 * @param pf Pointer to the boolean variable to read.
1870 */
1871DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
1872{
1873 ASMMemoryFence();
1874 return *pf; /* byte reads are atomic on x86 */
1875}
1876
1877
1878/**
1879 * Atomically reads a boolean value, unordered.
1880 *
1881 * @returns Current *pf value
1882 * @param pf Pointer to the boolean variable to read.
1883 */
1884DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
1885{
1886 return *pf; /* byte reads are atomic on x86 */
1887}
1888
1889
1890/**
1891 * Atomically read a typical IPRT handle value, ordered.
1892 *
1893 * @param ph Pointer to the handle variable to read.
1894 * @param phRes Where to store the result.
1895 *
1896 * @remarks This doesn't currently work for all handles (like RTFILE).
1897 */
1898#if HC_ARCH_BITS == 32
1899# define ASMAtomicReadHandle(ph, phRes) \
1900 do { \
1901 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1902 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1903 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
1904 } while (0)
1905#elif HC_ARCH_BITS == 64
1906# define ASMAtomicReadHandle(ph, phRes) \
1907 do { \
1908 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1909 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1910 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
1911 } while (0)
1912#else
1913# error HC_ARCH_BITS
1914#endif
1915
1916
1917/**
1918 * Atomically read a typical IPRT handle value, unordered.
1919 *
1920 * @param ph Pointer to the handle variable to read.
1921 * @param phRes Where to store the result.
1922 *
1923 * @remarks This doesn't currently work for all handles (like RTFILE).
1924 */
1925#if HC_ARCH_BITS == 32
1926# define ASMAtomicUoReadHandle(ph, phRes) \
1927 do { \
1928 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1929 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1930 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
1931 } while (0)
1932#elif HC_ARCH_BITS == 64
1933# define ASMAtomicUoReadHandle(ph, phRes) \
1934 do { \
1935 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1936 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1937 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
1938 } while (0)
1939#else
1940# error HC_ARCH_BITS
1941#endif
1942
1943
1944/**
1945 * Atomically read a value which size might differ
1946 * between platforms or compilers, ordered.
1947 *
1948 * @param pu Pointer to the variable to read.
1949 * @param puRes Where to store the result.
1950 */
1951#define ASMAtomicReadSize(pu, puRes) \
1952 do { \
1953 switch (sizeof(*(pu))) { \
1954 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1955 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
1956 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
1957 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
1958 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1959 } \
1960 } while (0)
1961
1962
1963/**
1964 * Atomically read a value which size might differ
1965 * between platforms or compilers, unordered.
1966 *
1967 * @param pu Pointer to the variable to read.
1968 * @param puRes Where to store the result.
1969 */
1970#define ASMAtomicUoReadSize(pu, puRes) \
1971 do { \
1972 switch (sizeof(*(pu))) { \
1973 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1974 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
1975 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
1976 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
1977 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1978 } \
1979 } while (0)
1980
1981
1982/**
1983 * Atomically writes an unsigned 8-bit value, ordered.
1984 *
1985 * @param pu8 Pointer to the 8-bit variable.
1986 * @param u8 The 8-bit value to assign to *pu8.
1987 */
1988DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
1989{
1990 ASMAtomicXchgU8(pu8, u8);
1991}
1992
1993
1994/**
1995 * Atomically writes an unsigned 8-bit value, unordered.
1996 *
1997 * @param pu8 Pointer to the 8-bit variable.
1998 * @param u8 The 8-bit value to assign to *pu8.
1999 */
2000DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2001{
2002 *pu8 = u8; /* byte writes are atomic on x86 */
2003}
2004
2005
2006/**
2007 * Atomically writes a signed 8-bit value, ordered.
2008 *
2009 * @param pi8 Pointer to the 8-bit variable to read.
2010 * @param i8 The 8-bit value to assign to *pi8.
2011 */
2012DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2013{
2014 ASMAtomicXchgS8(pi8, i8);
2015}
2016
2017
2018/**
2019 * Atomically writes a signed 8-bit value, unordered.
2020 *
2021 * @param pi8 Pointer to the 8-bit variable to write.
2022 * @param i8 The 8-bit value to assign to *pi8.
2023 */
2024DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2025{
2026 *pi8 = i8; /* byte writes are atomic on x86 */
2027}
2028
2029
2030/**
2031 * Atomically writes an unsigned 16-bit value, ordered.
2032 *
2033 * @param pu16 Pointer to the 16-bit variable to write.
2034 * @param u16 The 16-bit value to assign to *pu16.
2035 */
2036DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2037{
2038 ASMAtomicXchgU16(pu16, u16);
2039}
2040
2041
2042/**
2043 * Atomically writes an unsigned 16-bit value, unordered.
2044 *
2045 * @param pu16 Pointer to the 16-bit variable to write.
2046 * @param u16 The 16-bit value to assign to *pu16.
2047 */
2048DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2049{
2050 Assert(!((uintptr_t)pu16 & 1));
2051 *pu16 = u16;
2052}
2053
2054
2055/**
2056 * Atomically writes a signed 16-bit value, ordered.
2057 *
2058 * @param pi16 Pointer to the 16-bit variable to write.
2059 * @param i16 The 16-bit value to assign to *pi16.
2060 */
2061DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2062{
2063 ASMAtomicXchgS16(pi16, i16);
2064}
2065
2066
2067/**
2068 * Atomically writes a signed 16-bit value, unordered.
2069 *
2070 * @param pi16 Pointer to the 16-bit variable to write.
2071 * @param i16 The 16-bit value to assign to *pi16.
2072 */
2073DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2074{
2075 Assert(!((uintptr_t)pi16 & 1));
2076 *pi16 = i16;
2077}
2078
2079
2080/**
2081 * Atomically writes an unsigned 32-bit value, ordered.
2082 *
2083 * @param pu32 Pointer to the 32-bit variable to write.
2084 * @param u32 The 32-bit value to assign to *pu32.
2085 */
2086DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2087{
2088 ASMAtomicXchgU32(pu32, u32);
2089}
2090
2091
2092/**
2093 * Atomically writes an unsigned 32-bit value, unordered.
2094 *
2095 * @param pu32 Pointer to the 32-bit variable to write.
2096 * @param u32 The 32-bit value to assign to *pu32.
2097 */
2098DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2099{
2100 Assert(!((uintptr_t)pu32 & 3));
2101 *pu32 = u32;
2102}
2103
2104
2105/**
2106 * Atomically writes a signed 32-bit value, ordered.
2107 *
2108 * @param pi32 Pointer to the 32-bit variable to write.
2109 * @param i32 The 32-bit value to assign to *pi32.
2110 */
2111DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2112{
2113 ASMAtomicXchgS32(pi32, i32);
2114}
2115
2116
2117/**
2118 * Atomically writes a signed 32-bit value, unordered.
2119 *
2120 * @param pi32 Pointer to the 32-bit variable to write.
2121 * @param i32 The 32-bit value to assign to *pi32.
2122 */
2123DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2124{
2125 Assert(!((uintptr_t)pi32 & 3));
2126 *pi32 = i32;
2127}
2128
2129
2130/**
2131 * Atomically writes an unsigned 64-bit value, ordered.
2132 *
2133 * @param pu64 Pointer to the 64-bit variable to write.
2134 * @param u64 The 64-bit value to assign to *pu64.
2135 */
2136DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2137{
2138 ASMAtomicXchgU64(pu64, u64);
2139}
2140
2141
2142/**
2143 * Atomically writes an unsigned 64-bit value, unordered.
2144 *
2145 * @param pu64 Pointer to the 64-bit variable to write.
2146 * @param u64 The 64-bit value to assign to *pu64.
2147 */
2148DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2149{
2150 Assert(!((uintptr_t)pu64 & 7));
2151#if ARCH_BITS == 64
2152 *pu64 = u64;
2153#else
2154 ASMAtomicXchgU64(pu64, u64);
2155#endif
2156}
2157
2158
2159/**
2160 * Atomically writes a signed 64-bit value, ordered.
2161 *
2162 * @param pi64 Pointer to the 64-bit variable to write.
2163 * @param i64 The 64-bit value to assign to *pi64.
2164 */
2165DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2166{
2167 ASMAtomicXchgS64(pi64, i64);
2168}
2169
2170
2171/**
2172 * Atomically writes a signed 64-bit value, unordered.
2173 *
2174 * @param pi64 Pointer to the 64-bit variable to write.
2175 * @param i64 The 64-bit value to assign to *pi64.
2176 */
2177DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2178{
2179 Assert(!((uintptr_t)pi64 & 7));
2180#if ARCH_BITS == 64
2181 *pi64 = i64;
2182#else
2183 ASMAtomicXchgS64(pi64, i64);
2184#endif
2185}
2186
2187
2188/**
2189 * Atomically writes a boolean value, unordered.
2190 *
2191 * @param pf Pointer to the boolean variable to write.
2192 * @param f The boolean value to assign to *pf.
2193 */
2194DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2195{
2196 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2197}
2198
2199
2200/**
2201 * Atomically writes a boolean value, unordered.
2202 *
2203 * @param pf Pointer to the boolean variable to write.
2204 * @param f The boolean value to assign to *pf.
2205 */
2206DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2207{
2208 *pf = f; /* byte writes are atomic on x86 */
2209}
2210
2211
2212/**
2213 * Atomically writes a pointer value, ordered.
2214 *
2215 * @param ppv Pointer to the pointer variable to write.
2216 * @param pv The pointer value to assign to *ppv.
2217 */
2218DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2219{
2220#if ARCH_BITS == 32
2221 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2222#elif ARCH_BITS == 64
2223 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2224#else
2225# error "ARCH_BITS is bogus"
2226#endif
2227}
2228
2229
2230/**
2231 * Atomically writes a pointer value, ordered.
2232 *
2233 * @param ppv Pointer to the pointer variable to write.
2234 * @param pv The pointer value to assign to *ppv. If NULL use
2235 * ASMAtomicWriteNullPtr or you'll land in trouble.
2236 *
2237 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2238 * NULL.
2239 */
2240#ifdef __GNUC__
2241# define ASMAtomicWritePtr(ppv, pv) \
2242 do \
2243 { \
2244 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2245 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2246 \
2247 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2248 AssertCompile(sizeof(pv) == sizeof(void *)); \
2249 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2250 \
2251 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2252 } while (0)
2253#else
2254# define ASMAtomicWritePtr(ppv, pv) \
2255 do \
2256 { \
2257 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2258 AssertCompile(sizeof(pv) == sizeof(void *)); \
2259 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2260 \
2261 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2262 } while (0)
2263#endif
2264
2265
2266/**
2267 * Atomically sets a pointer to NULL, ordered.
2268 *
2269 * @param ppv Pointer to the pointer variable that should be set to NULL.
2270 *
2271 * @remarks This is relatively type safe on GCC platforms.
2272 */
2273#ifdef __GNUC__
2274# define ASMAtomicWriteNullPtr(ppv) \
2275 do \
2276 { \
2277 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2278 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2279 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2280 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2281 } while (0)
2282#else
2283# define ASMAtomicWriteNullPtr(ppv) \
2284 do \
2285 { \
2286 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2287 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2288 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2289 } while (0)
2290#endif
2291
2292
2293/**
2294 * Atomically writes a pointer value, unordered.
2295 *
2296 * @returns Current *pv value
2297 * @param ppv Pointer to the pointer variable.
2298 * @param pv The pointer value to assign to *ppv. If NULL use
2299 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2300 *
2301 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2302 * NULL.
2303 */
2304#ifdef __GNUC__
2305# define ASMAtomicUoWritePtr(ppv, pv) \
2306 do \
2307 { \
2308 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2309 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2310 \
2311 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2312 AssertCompile(sizeof(pv) == sizeof(void *)); \
2313 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2314 \
2315 *(ppvTypeChecked) = pvTypeChecked; \
2316 } while (0)
2317#else
2318# define ASMAtomicUoWritePtr(ppv, pv) \
2319 do \
2320 { \
2321 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2322 AssertCompile(sizeof(pv) == sizeof(void *)); \
2323 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2324 *(ppv) = pv; \
2325 } while (0)
2326#endif
2327
2328
2329/**
2330 * Atomically sets a pointer to NULL, unordered.
2331 *
2332 * @param ppv Pointer to the pointer variable that should be set to NULL.
2333 *
2334 * @remarks This is relatively type safe on GCC platforms.
2335 */
2336#ifdef __GNUC__
2337# define ASMAtomicUoWriteNullPtr(ppv) \
2338 do \
2339 { \
2340 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2341 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2342 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2343 *(ppvTypeChecked) = NULL; \
2344 } while (0)
2345#else
2346# define ASMAtomicUoWriteNullPtr(ppv) \
2347 do \
2348 { \
2349 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2350 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2351 *(ppv) = NULL; \
2352 } while (0)
2353#endif
2354
2355
2356/**
2357 * Atomically write a typical IPRT handle value, ordered.
2358 *
2359 * @param ph Pointer to the variable to update.
2360 * @param hNew The value to assign to *ph.
2361 *
2362 * @remarks This doesn't currently work for all handles (like RTFILE).
2363 */
2364#if HC_ARCH_BITS == 32
2365# define ASMAtomicWriteHandle(ph, hNew) \
2366 do { \
2367 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2368 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2369 } while (0)
2370#elif HC_ARCH_BITS == 64
2371# define ASMAtomicWriteHandle(ph, hNew) \
2372 do { \
2373 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2374 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2375 } while (0)
2376#else
2377# error HC_ARCH_BITS
2378#endif
2379
2380
2381/**
2382 * Atomically write a typical IPRT handle value, unordered.
2383 *
2384 * @param ph Pointer to the variable to update.
2385 * @param hNew The value to assign to *ph.
2386 *
2387 * @remarks This doesn't currently work for all handles (like RTFILE).
2388 */
2389#if HC_ARCH_BITS == 32
2390# define ASMAtomicUoWriteHandle(ph, hNew) \
2391 do { \
2392 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2393 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2394 } while (0)
2395#elif HC_ARCH_BITS == 64
2396# define ASMAtomicUoWriteHandle(ph, hNew) \
2397 do { \
2398 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2399 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2400 } while (0)
2401#else
2402# error HC_ARCH_BITS
2403#endif
2404
2405
2406/**
2407 * Atomically write a value which size might differ
2408 * between platforms or compilers, ordered.
2409 *
2410 * @param pu Pointer to the variable to update.
2411 * @param uNew The value to assign to *pu.
2412 */
2413#define ASMAtomicWriteSize(pu, uNew) \
2414 do { \
2415 switch (sizeof(*(pu))) { \
2416 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2417 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2418 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2419 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2420 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2421 } \
2422 } while (0)
2423
2424/**
2425 * Atomically write a value which size might differ
2426 * between platforms or compilers, unordered.
2427 *
2428 * @param pu Pointer to the variable to update.
2429 * @param uNew The value to assign to *pu.
2430 */
2431#define ASMAtomicUoWriteSize(pu, uNew) \
2432 do { \
2433 switch (sizeof(*(pu))) { \
2434 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2435 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2436 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2437 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2438 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2439 } \
2440 } while (0)
2441
2442
2443
2444/**
2445 * Atomically exchanges and adds to a 32-bit value, ordered.
2446 *
2447 * @returns The old value.
2448 * @param pu32 Pointer to the value.
2449 * @param u32 Number to add.
2450 */
2451#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2452DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2453#else
2454DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2455{
2456# if RT_INLINE_ASM_USES_INTRIN
2457 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2458 return u32;
2459
2460# elif RT_INLINE_ASM_GNU_STYLE
2461 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2462 : "=r" (u32),
2463 "=m" (*pu32)
2464 : "0" (u32),
2465 "m" (*pu32)
2466 : "memory");
2467 return u32;
2468# else
2469 __asm
2470 {
2471 mov eax, [u32]
2472# ifdef RT_ARCH_AMD64
2473 mov rdx, [pu32]
2474 lock xadd [rdx], eax
2475# else
2476 mov edx, [pu32]
2477 lock xadd [edx], eax
2478# endif
2479 mov [u32], eax
2480 }
2481 return u32;
2482# endif
2483}
2484#endif
2485
2486
2487/**
2488 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2489 *
2490 * @returns The old value.
2491 * @param pi32 Pointer to the value.
2492 * @param i32 Number to add.
2493 */
2494DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2495{
2496 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2497}
2498
2499
2500/**
2501 * Atomically exchanges and adds to a 64-bit value, ordered.
2502 *
2503 * @returns The old value.
2504 * @param pu64 Pointer to the value.
2505 * @param u64 Number to add.
2506 */
2507#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2508DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2509#else
2510DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2511{
2512# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2513 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2514 return u64;
2515
2516# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2517 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2518 : "=r" (u64),
2519 "=m" (*pu64)
2520 : "0" (u64),
2521 "m" (*pu64)
2522 : "memory");
2523 return u64;
2524# else
2525 uint64_t u64Old;
2526 for (;;)
2527 {
2528 uint64_t u64New;
2529 u64Old = ASMAtomicUoReadU64(pu64);
2530 u64New = u64Old + u64;
2531 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2532 break;
2533 ASMNopPause();
2534 }
2535 return u64Old;
2536# endif
2537}
2538#endif
2539
2540
2541/**
2542 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2543 *
2544 * @returns The old value.
2545 * @param pi64 Pointer to the value.
2546 * @param i64 Number to add.
2547 */
2548DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2549{
2550 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2551}
2552
2553
2554/**
2555 * Atomically exchanges and adds to a size_t value, ordered.
2556 *
2557 * @returns The old value.
2558 * @param pcb Pointer to the size_t value.
2559 * @param cb Number to add.
2560 */
2561DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2562{
2563#if ARCH_BITS == 64
2564 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2565#elif ARCH_BITS == 32
2566 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2567#else
2568# error "Unsupported ARCH_BITS value"
2569#endif
2570}
2571
2572
2573/**
2574 * Atomically exchanges and adds a value which size might differ between
2575 * platforms or compilers, ordered.
2576 *
2577 * @param pu Pointer to the variable to update.
2578 * @param uNew The value to add to *pu.
2579 * @param puOld Where to store the old value.
2580 */
2581#define ASMAtomicAddSize(pu, uNew, puOld) \
2582 do { \
2583 switch (sizeof(*(pu))) { \
2584 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2585 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2586 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2587 } \
2588 } while (0)
2589
2590
2591/**
2592 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2593 *
2594 * @returns The old value.
2595 * @param pu32 Pointer to the value.
2596 * @param u32 Number to subtract.
2597 */
2598DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2599{
2600 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2601}
2602
2603
2604/**
2605 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2606 *
2607 * @returns The old value.
2608 * @param pi32 Pointer to the value.
2609 * @param i32 Number to subtract.
2610 */
2611DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2612{
2613 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2614}
2615
2616
2617/**
2618 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2619 *
2620 * @returns The old value.
2621 * @param pu64 Pointer to the value.
2622 * @param u64 Number to subtract.
2623 */
2624DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2625{
2626 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2627}
2628
2629
2630/**
2631 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2632 *
2633 * @returns The old value.
2634 * @param pi64 Pointer to the value.
2635 * @param i64 Number to subtract.
2636 */
2637DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2638{
2639 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2640}
2641
2642
2643/**
2644 * Atomically exchanges and subtracts to a size_t value, ordered.
2645 *
2646 * @returns The old value.
2647 * @param pcb Pointer to the size_t value.
2648 * @param cb Number to subtract.
2649 */
2650DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2651{
2652#if ARCH_BITS == 64
2653 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2654#elif ARCH_BITS == 32
2655 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2656#else
2657# error "Unsupported ARCH_BITS value"
2658#endif
2659}
2660
2661
2662/**
2663 * Atomically exchanges and subtracts a value which size might differ between
2664 * platforms or compilers, ordered.
2665 *
2666 * @param pu Pointer to the variable to update.
2667 * @param uNew The value to subtract to *pu.
2668 * @param puOld Where to store the old value.
2669 */
2670#define ASMAtomicSubSize(pu, uNew, puOld) \
2671 do { \
2672 switch (sizeof(*(pu))) { \
2673 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2674 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2675 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2676 } \
2677 } while (0)
2678
2679
2680/**
2681 * Atomically increment a 32-bit value, ordered.
2682 *
2683 * @returns The new value.
2684 * @param pu32 Pointer to the value to increment.
2685 */
2686#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2687DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2688#else
2689DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2690{
2691 uint32_t u32;
2692# if RT_INLINE_ASM_USES_INTRIN
2693 u32 = _InterlockedIncrement((long *)pu32);
2694 return u32;
2695
2696# elif RT_INLINE_ASM_GNU_STYLE
2697 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2698 : "=r" (u32),
2699 "=m" (*pu32)
2700 : "0" (1),
2701 "m" (*pu32)
2702 : "memory");
2703 return u32+1;
2704# else
2705 __asm
2706 {
2707 mov eax, 1
2708# ifdef RT_ARCH_AMD64
2709 mov rdx, [pu32]
2710 lock xadd [rdx], eax
2711# else
2712 mov edx, [pu32]
2713 lock xadd [edx], eax
2714# endif
2715 mov u32, eax
2716 }
2717 return u32+1;
2718# endif
2719}
2720#endif
2721
2722
2723/**
2724 * Atomically increment a signed 32-bit value, ordered.
2725 *
2726 * @returns The new value.
2727 * @param pi32 Pointer to the value to increment.
2728 */
2729DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2730{
2731 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2732}
2733
2734
2735/**
2736 * Atomically increment a 64-bit value, ordered.
2737 *
2738 * @returns The new value.
2739 * @param pu64 Pointer to the value to increment.
2740 */
2741#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2742DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2743#else
2744DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2745{
2746# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2747 uint64_t u64;
2748 u64 = _InterlockedIncrement64((__int64 *)pu64);
2749 return u64;
2750
2751# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2752 uint64_t u64;
2753 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2754 : "=r" (u64),
2755 "=m" (*pu64)
2756 : "0" (1),
2757 "m" (*pu64)
2758 : "memory");
2759 return u64 + 1;
2760# else
2761 return ASMAtomicAddU64(pu64, 1) + 1;
2762# endif
2763}
2764#endif
2765
2766
2767/**
2768 * Atomically increment a signed 64-bit value, ordered.
2769 *
2770 * @returns The new value.
2771 * @param pi64 Pointer to the value to increment.
2772 */
2773DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
2774{
2775 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
2776}
2777
2778
2779/**
2780 * Atomically increment a size_t value, ordered.
2781 *
2782 * @returns The new value.
2783 * @param pcb Pointer to the value to increment.
2784 */
2785DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
2786{
2787#if ARCH_BITS == 64
2788 return ASMAtomicIncU64((uint64_t volatile *)pcb);
2789#elif ARCH_BITS == 32
2790 return ASMAtomicIncU32((uint32_t volatile *)pcb);
2791#else
2792# error "Unsupported ARCH_BITS value"
2793#endif
2794}
2795
2796
2797/**
2798 * Atomically decrement an unsigned 32-bit value, ordered.
2799 *
2800 * @returns The new value.
2801 * @param pu32 Pointer to the value to decrement.
2802 */
2803#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2804DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2805#else
2806DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2807{
2808 uint32_t u32;
2809# if RT_INLINE_ASM_USES_INTRIN
2810 u32 = _InterlockedDecrement((long *)pu32);
2811 return u32;
2812
2813# elif RT_INLINE_ASM_GNU_STYLE
2814 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2815 : "=r" (u32),
2816 "=m" (*pu32)
2817 : "0" (-1),
2818 "m" (*pu32)
2819 : "memory");
2820 return u32-1;
2821# else
2822 __asm
2823 {
2824 mov eax, -1
2825# ifdef RT_ARCH_AMD64
2826 mov rdx, [pu32]
2827 lock xadd [rdx], eax
2828# else
2829 mov edx, [pu32]
2830 lock xadd [edx], eax
2831# endif
2832 mov u32, eax
2833 }
2834 return u32-1;
2835# endif
2836}
2837#endif
2838
2839
2840/**
2841 * Atomically decrement a signed 32-bit value, ordered.
2842 *
2843 * @returns The new value.
2844 * @param pi32 Pointer to the value to decrement.
2845 */
2846DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2847{
2848 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2849}
2850
2851
2852/**
2853 * Atomically decrement an unsigned 64-bit value, ordered.
2854 *
2855 * @returns The new value.
2856 * @param pu64 Pointer to the value to decrement.
2857 */
2858#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2859DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
2860#else
2861DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
2862{
2863# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2864 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
2865 return u64;
2866
2867# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2868 uint64_t u64;
2869 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
2870 : "=r" (u64),
2871 "=m" (*pu64)
2872 : "0" (~(uint64_t)0),
2873 "m" (*pu64)
2874 : "memory");
2875 return u64-1;
2876# else
2877 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
2878# endif
2879}
2880#endif
2881
2882
2883/**
2884 * Atomically decrement a signed 64-bit value, ordered.
2885 *
2886 * @returns The new value.
2887 * @param pi64 Pointer to the value to decrement.
2888 */
2889DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
2890{
2891 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
2892}
2893
2894
2895/**
2896 * Atomically decrement a size_t value, ordered.
2897 *
2898 * @returns The new value.
2899 * @param pcb Pointer to the value to decrement.
2900 */
2901DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
2902{
2903#if ARCH_BITS == 64
2904 return ASMAtomicDecU64((uint64_t volatile *)pcb);
2905#elif ARCH_BITS == 32
2906 return ASMAtomicDecU32((uint32_t volatile *)pcb);
2907#else
2908# error "Unsupported ARCH_BITS value"
2909#endif
2910}
2911
2912
2913/**
2914 * Atomically Or an unsigned 32-bit value, ordered.
2915 *
2916 * @param pu32 Pointer to the pointer variable to OR u32 with.
2917 * @param u32 The value to OR *pu32 with.
2918 */
2919#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2920DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2921#else
2922DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2923{
2924# if RT_INLINE_ASM_USES_INTRIN
2925 _InterlockedOr((long volatile *)pu32, (long)u32);
2926
2927# elif RT_INLINE_ASM_GNU_STYLE
2928 __asm__ __volatile__("lock; orl %1, %0\n\t"
2929 : "=m" (*pu32)
2930 : "ir" (u32),
2931 "m" (*pu32));
2932# else
2933 __asm
2934 {
2935 mov eax, [u32]
2936# ifdef RT_ARCH_AMD64
2937 mov rdx, [pu32]
2938 lock or [rdx], eax
2939# else
2940 mov edx, [pu32]
2941 lock or [edx], eax
2942# endif
2943 }
2944# endif
2945}
2946#endif
2947
2948
2949/**
2950 * Atomically Or a signed 32-bit value, ordered.
2951 *
2952 * @param pi32 Pointer to the pointer variable to OR u32 with.
2953 * @param i32 The value to OR *pu32 with.
2954 */
2955DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2956{
2957 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2958}
2959
2960
2961/**
2962 * Atomically Or an unsigned 64-bit value, ordered.
2963 *
2964 * @param pu64 Pointer to the pointer variable to OR u64 with.
2965 * @param u64 The value to OR *pu64 with.
2966 */
2967#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2968DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
2969#else
2970DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
2971{
2972# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2973 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
2974
2975# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2976 __asm__ __volatile__("lock; orq %1, %q0\n\t"
2977 : "=m" (*pu64)
2978 : "r" (u64),
2979 "m" (*pu64));
2980# else
2981 for (;;)
2982 {
2983 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
2984 uint64_t u64New = u64Old | u64;
2985 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2986 break;
2987 ASMNopPause();
2988 }
2989# endif
2990}
2991#endif
2992
2993
2994/**
2995 * Atomically Or a signed 64-bit value, ordered.
2996 *
2997 * @param pi64 Pointer to the pointer variable to OR u64 with.
2998 * @param i64 The value to OR *pu64 with.
2999 */
3000DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3001{
3002 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3003}
3004
3005
3006/**
3007 * Atomically And an unsigned 32-bit value, ordered.
3008 *
3009 * @param pu32 Pointer to the pointer variable to AND u32 with.
3010 * @param u32 The value to AND *pu32 with.
3011 */
3012#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3013DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3014#else
3015DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3016{
3017# if RT_INLINE_ASM_USES_INTRIN
3018 _InterlockedAnd((long volatile *)pu32, u32);
3019
3020# elif RT_INLINE_ASM_GNU_STYLE
3021 __asm__ __volatile__("lock; andl %1, %0\n\t"
3022 : "=m" (*pu32)
3023 : "ir" (u32),
3024 "m" (*pu32));
3025# else
3026 __asm
3027 {
3028 mov eax, [u32]
3029# ifdef RT_ARCH_AMD64
3030 mov rdx, [pu32]
3031 lock and [rdx], eax
3032# else
3033 mov edx, [pu32]
3034 lock and [edx], eax
3035# endif
3036 }
3037# endif
3038}
3039#endif
3040
3041
3042/**
3043 * Atomically And a signed 32-bit value, ordered.
3044 *
3045 * @param pi32 Pointer to the pointer variable to AND i32 with.
3046 * @param i32 The value to AND *pi32 with.
3047 */
3048DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3049{
3050 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3051}
3052
3053
3054/**
3055 * Atomically And an unsigned 64-bit value, ordered.
3056 *
3057 * @param pu64 Pointer to the pointer variable to AND u64 with.
3058 * @param u64 The value to AND *pu64 with.
3059 */
3060#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3061DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3062#else
3063DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3064{
3065# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3066 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3067
3068# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3069 __asm__ __volatile__("lock; andq %1, %0\n\t"
3070 : "=m" (*pu64)
3071 : "r" (u64),
3072 "m" (*pu64));
3073# else
3074 for (;;)
3075 {
3076 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3077 uint64_t u64New = u64Old & u64;
3078 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3079 break;
3080 ASMNopPause();
3081 }
3082# endif
3083}
3084#endif
3085
3086
3087/**
3088 * Atomically And a signed 64-bit value, ordered.
3089 *
3090 * @param pi64 Pointer to the pointer variable to AND i64 with.
3091 * @param i64 The value to AND *pi64 with.
3092 */
3093DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3094{
3095 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3096}
3097
3098
3099/**
3100 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3101 *
3102 * @param pu32 Pointer to the pointer variable to OR u32 with.
3103 * @param u32 The value to OR *pu32 with.
3104 */
3105#if RT_INLINE_ASM_EXTERNAL
3106DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3107#else
3108DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3109{
3110# if RT_INLINE_ASM_GNU_STYLE
3111 __asm__ __volatile__("orl %1, %0\n\t"
3112 : "=m" (*pu32)
3113 : "ir" (u32),
3114 "m" (*pu32));
3115# else
3116 __asm
3117 {
3118 mov eax, [u32]
3119# ifdef RT_ARCH_AMD64
3120 mov rdx, [pu32]
3121 or [rdx], eax
3122# else
3123 mov edx, [pu32]
3124 or [edx], eax
3125# endif
3126 }
3127# endif
3128}
3129#endif
3130
3131
3132/**
3133 * Atomically OR a signed 32-bit value, unordered.
3134 *
3135 * @param pi32 Pointer to the pointer variable to OR u32 with.
3136 * @param i32 The value to OR *pu32 with.
3137 */
3138DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3139{
3140 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3141}
3142
3143
3144/**
3145 * Atomically OR an unsigned 64-bit value, unordered.
3146 *
3147 * @param pu64 Pointer to the pointer variable to OR u64 with.
3148 * @param u64 The value to OR *pu64 with.
3149 */
3150#if RT_INLINE_ASM_EXTERNAL
3151DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3152#else
3153DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3154{
3155# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3156 __asm__ __volatile__("orq %1, %q0\n\t"
3157 : "=m" (*pu64)
3158 : "r" (u64),
3159 "m" (*pu64));
3160# else
3161 for (;;)
3162 {
3163 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3164 uint64_t u64New = u64Old | u64;
3165 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3166 break;
3167 ASMNopPause();
3168 }
3169# endif
3170}
3171#endif
3172
3173
3174/**
3175 * Atomically Or a signed 64-bit value, unordered.
3176 *
3177 * @param pi64 Pointer to the pointer variable to OR u64 with.
3178 * @param i64 The value to OR *pu64 with.
3179 */
3180DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3181{
3182 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3183}
3184
3185
3186/**
3187 * Atomically And an unsigned 32-bit value, unordered.
3188 *
3189 * @param pu32 Pointer to the pointer variable to AND u32 with.
3190 * @param u32 The value to AND *pu32 with.
3191 */
3192#if RT_INLINE_ASM_EXTERNAL
3193DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3194#else
3195DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3196{
3197# if RT_INLINE_ASM_GNU_STYLE
3198 __asm__ __volatile__("andl %1, %0\n\t"
3199 : "=m" (*pu32)
3200 : "ir" (u32),
3201 "m" (*pu32));
3202# else
3203 __asm
3204 {
3205 mov eax, [u32]
3206# ifdef RT_ARCH_AMD64
3207 mov rdx, [pu32]
3208 and [rdx], eax
3209# else
3210 mov edx, [pu32]
3211 and [edx], eax
3212# endif
3213 }
3214# endif
3215}
3216#endif
3217
3218
3219/**
3220 * Atomically And a signed 32-bit value, unordered.
3221 *
3222 * @param pi32 Pointer to the pointer variable to AND i32 with.
3223 * @param i32 The value to AND *pi32 with.
3224 */
3225DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3226{
3227 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3228}
3229
3230
3231/**
3232 * Atomically And an unsigned 64-bit value, unordered.
3233 *
3234 * @param pu64 Pointer to the pointer variable to AND u64 with.
3235 * @param u64 The value to AND *pu64 with.
3236 */
3237#if RT_INLINE_ASM_EXTERNAL
3238DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3239#else
3240DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3241{
3242# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3243 __asm__ __volatile__("andq %1, %0\n\t"
3244 : "=m" (*pu64)
3245 : "r" (u64),
3246 "m" (*pu64));
3247# else
3248 for (;;)
3249 {
3250 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3251 uint64_t u64New = u64Old & u64;
3252 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3253 break;
3254 ASMNopPause();
3255 }
3256# endif
3257}
3258#endif
3259
3260
3261/**
3262 * Atomically And a signed 64-bit value, unordered.
3263 *
3264 * @param pi64 Pointer to the pointer variable to AND i64 with.
3265 * @param i64 The value to AND *pi64 with.
3266 */
3267DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3268{
3269 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3270}
3271
3272
3273/**
3274 * Atomically increment an unsigned 32-bit value, unordered.
3275 *
3276 * @returns the new value.
3277 * @param pu32 Pointer to the variable to increment.
3278 */
3279#if RT_INLINE_ASM_EXTERNAL
3280DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3281#else
3282DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3283{
3284 uint32_t u32;
3285# if RT_INLINE_ASM_GNU_STYLE
3286 __asm__ __volatile__("xaddl %0, %1\n\t"
3287 : "=r" (u32),
3288 "=m" (*pu32)
3289 : "0" (1),
3290 "m" (*pu32)
3291 : "memory");
3292 return u32 + 1;
3293# else
3294 __asm
3295 {
3296 mov eax, 1
3297# ifdef RT_ARCH_AMD64
3298 mov rdx, [pu32]
3299 xadd [rdx], eax
3300# else
3301 mov edx, [pu32]
3302 xadd [edx], eax
3303# endif
3304 mov u32, eax
3305 }
3306 return u32 + 1;
3307# endif
3308}
3309#endif
3310
3311
3312/**
3313 * Atomically decrement an unsigned 32-bit value, unordered.
3314 *
3315 * @returns the new value.
3316 * @param pu32 Pointer to the variable to decrement.
3317 */
3318#if RT_INLINE_ASM_EXTERNAL
3319DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3320#else
3321DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3322{
3323 uint32_t u32;
3324# if RT_INLINE_ASM_GNU_STYLE
3325 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3326 : "=r" (u32),
3327 "=m" (*pu32)
3328 : "0" (-1),
3329 "m" (*pu32)
3330 : "memory");
3331 return u32 - 1;
3332# else
3333 __asm
3334 {
3335 mov eax, -1
3336# ifdef RT_ARCH_AMD64
3337 mov rdx, [pu32]
3338 xadd [rdx], eax
3339# else
3340 mov edx, [pu32]
3341 xadd [edx], eax
3342# endif
3343 mov u32, eax
3344 }
3345 return u32 - 1;
3346# endif
3347}
3348#endif
3349
3350
3351/** @def RT_ASM_PAGE_SIZE
3352 * We try avoid dragging in iprt/param.h here.
3353 * @internal
3354 */
3355#if defined(RT_ARCH_SPARC64)
3356# define RT_ASM_PAGE_SIZE 0x2000
3357# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3358# if PAGE_SIZE != 0x2000
3359# error "PAGE_SIZE is not 0x2000!"
3360# endif
3361# endif
3362#else
3363# define RT_ASM_PAGE_SIZE 0x1000
3364# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3365# if PAGE_SIZE != 0x1000
3366# error "PAGE_SIZE is not 0x1000!"
3367# endif
3368# endif
3369#endif
3370
3371/**
3372 * Zeros a 4K memory page.
3373 *
3374 * @param pv Pointer to the memory block. This must be page aligned.
3375 */
3376#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3377DECLASM(void) ASMMemZeroPage(volatile void *pv);
3378# else
3379DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3380{
3381# if RT_INLINE_ASM_USES_INTRIN
3382# ifdef RT_ARCH_AMD64
3383 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3384# else
3385 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3386# endif
3387
3388# elif RT_INLINE_ASM_GNU_STYLE
3389 RTCCUINTREG uDummy;
3390# ifdef RT_ARCH_AMD64
3391 __asm__ __volatile__("rep stosq"
3392 : "=D" (pv),
3393 "=c" (uDummy)
3394 : "0" (pv),
3395 "c" (RT_ASM_PAGE_SIZE >> 3),
3396 "a" (0)
3397 : "memory");
3398# else
3399 __asm__ __volatile__("rep stosl"
3400 : "=D" (pv),
3401 "=c" (uDummy)
3402 : "0" (pv),
3403 "c" (RT_ASM_PAGE_SIZE >> 2),
3404 "a" (0)
3405 : "memory");
3406# endif
3407# else
3408 __asm
3409 {
3410# ifdef RT_ARCH_AMD64
3411 xor rax, rax
3412 mov ecx, 0200h
3413 mov rdi, [pv]
3414 rep stosq
3415# else
3416 xor eax, eax
3417 mov ecx, 0400h
3418 mov edi, [pv]
3419 rep stosd
3420# endif
3421 }
3422# endif
3423}
3424# endif
3425
3426
3427/**
3428 * Zeros a memory block with a 32-bit aligned size.
3429 *
3430 * @param pv Pointer to the memory block.
3431 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3432 */
3433#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3434DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3435#else
3436DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3437{
3438# if RT_INLINE_ASM_USES_INTRIN
3439# ifdef RT_ARCH_AMD64
3440 if (!(cb & 7))
3441 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3442 else
3443# endif
3444 __stosd((unsigned long *)pv, 0, cb / 4);
3445
3446# elif RT_INLINE_ASM_GNU_STYLE
3447 __asm__ __volatile__("rep stosl"
3448 : "=D" (pv),
3449 "=c" (cb)
3450 : "0" (pv),
3451 "1" (cb >> 2),
3452 "a" (0)
3453 : "memory");
3454# else
3455 __asm
3456 {
3457 xor eax, eax
3458# ifdef RT_ARCH_AMD64
3459 mov rcx, [cb]
3460 shr rcx, 2
3461 mov rdi, [pv]
3462# else
3463 mov ecx, [cb]
3464 shr ecx, 2
3465 mov edi, [pv]
3466# endif
3467 rep stosd
3468 }
3469# endif
3470}
3471#endif
3472
3473
3474/**
3475 * Fills a memory block with a 32-bit aligned size.
3476 *
3477 * @param pv Pointer to the memory block.
3478 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3479 * @param u32 The value to fill with.
3480 */
3481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3482DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3483#else
3484DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3485{
3486# if RT_INLINE_ASM_USES_INTRIN
3487# ifdef RT_ARCH_AMD64
3488 if (!(cb & 7))
3489 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3490 else
3491# endif
3492 __stosd((unsigned long *)pv, u32, cb / 4);
3493
3494# elif RT_INLINE_ASM_GNU_STYLE
3495 __asm__ __volatile__("rep stosl"
3496 : "=D" (pv),
3497 "=c" (cb)
3498 : "0" (pv),
3499 "1" (cb >> 2),
3500 "a" (u32)
3501 : "memory");
3502# else
3503 __asm
3504 {
3505# ifdef RT_ARCH_AMD64
3506 mov rcx, [cb]
3507 shr rcx, 2
3508 mov rdi, [pv]
3509# else
3510 mov ecx, [cb]
3511 shr ecx, 2
3512 mov edi, [pv]
3513# endif
3514 mov eax, [u32]
3515 rep stosd
3516 }
3517# endif
3518}
3519#endif
3520
3521
3522/**
3523 * Checks if a memory page is all zeros.
3524 *
3525 * @returns true / false.
3526 *
3527 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3528 * boundary
3529 */
3530DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3531{
3532# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3533 union { RTCCUINTREG r; bool f; } uAX;
3534 RTCCUINTREG xCX, xDI;
3535 Assert(!((uintptr_t)pvPage & 15));
3536 __asm__ __volatile__("repe; "
3537# ifdef RT_ARCH_AMD64
3538 "scasq\n\t"
3539# else
3540 "scasl\n\t"
3541# endif
3542 "setnc %%al\n\t"
3543 : "=&c" (xCX),
3544 "=&D" (xDI),
3545 "=&a" (uAX.r)
3546 : "mr" (pvPage),
3547# ifdef RT_ARCH_AMD64
3548 "0" (RT_ASM_PAGE_SIZE/8),
3549# else
3550 "0" (RT_ASM_PAGE_SIZE/4),
3551# endif
3552 "1" (pvPage),
3553 "2" (0));
3554 return uAX.f;
3555# else
3556 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3557 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3558 Assert(!((uintptr_t)pvPage & 15));
3559 for (;;)
3560 {
3561 if (puPtr[0]) return false;
3562 if (puPtr[4]) return false;
3563
3564 if (puPtr[2]) return false;
3565 if (puPtr[6]) return false;
3566
3567 if (puPtr[1]) return false;
3568 if (puPtr[5]) return false;
3569
3570 if (puPtr[3]) return false;
3571 if (puPtr[7]) return false;
3572
3573 if (!--cLeft)
3574 return true;
3575 puPtr += 8;
3576 }
3577 return true;
3578# endif
3579}
3580
3581
3582/**
3583 * Checks if a memory block is filled with the specified byte.
3584 *
3585 * This is a sort of inverted memchr.
3586 *
3587 * @returns Pointer to the byte which doesn't equal u8.
3588 * @returns NULL if all equal to u8.
3589 *
3590 * @param pv Pointer to the memory block.
3591 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3592 * @param u8 The value it's supposed to be filled with.
3593 *
3594 * @todo Fix name, it is a predicate function but it's not returning boolean!
3595 */
3596DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3597{
3598/** @todo rewrite this in inline assembly? */
3599 uint8_t const *pb = (uint8_t const *)pv;
3600 for (; cb; cb--, pb++)
3601 if (RT_UNLIKELY(*pb != u8))
3602 return (void *)pb;
3603 return NULL;
3604}
3605
3606
3607/**
3608 * Checks if a memory block is filled with the specified 32-bit value.
3609 *
3610 * This is a sort of inverted memchr.
3611 *
3612 * @returns Pointer to the first value which doesn't equal u32.
3613 * @returns NULL if all equal to u32.
3614 *
3615 * @param pv Pointer to the memory block.
3616 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3617 * @param u32 The value it's supposed to be filled with.
3618 *
3619 * @todo Fix name, it is a predicate function but it's not returning boolean!
3620 */
3621DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3622{
3623/** @todo rewrite this in inline assembly? */
3624 uint32_t const *pu32 = (uint32_t const *)pv;
3625 for (; cb; cb -= 4, pu32++)
3626 if (RT_UNLIKELY(*pu32 != u32))
3627 return (uint32_t *)pu32;
3628 return NULL;
3629}
3630
3631
3632/**
3633 * Probes a byte pointer for read access.
3634 *
3635 * While the function will not fault if the byte is not read accessible,
3636 * the idea is to do this in a safe place like before acquiring locks
3637 * and such like.
3638 *
3639 * Also, this functions guarantees that an eager compiler is not going
3640 * to optimize the probing away.
3641 *
3642 * @param pvByte Pointer to the byte.
3643 */
3644#if RT_INLINE_ASM_EXTERNAL
3645DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3646#else
3647DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3648{
3649 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3650 uint8_t u8;
3651# if RT_INLINE_ASM_GNU_STYLE
3652 __asm__ __volatile__("movb (%1), %0\n\t"
3653 : "=r" (u8)
3654 : "r" (pvByte));
3655# else
3656 __asm
3657 {
3658# ifdef RT_ARCH_AMD64
3659 mov rax, [pvByte]
3660 mov al, [rax]
3661# else
3662 mov eax, [pvByte]
3663 mov al, [eax]
3664# endif
3665 mov [u8], al
3666 }
3667# endif
3668 return u8;
3669}
3670#endif
3671
3672/**
3673 * Probes a buffer for read access page by page.
3674 *
3675 * While the function will fault if the buffer is not fully read
3676 * accessible, the idea is to do this in a safe place like before
3677 * acquiring locks and such like.
3678 *
3679 * Also, this functions guarantees that an eager compiler is not going
3680 * to optimize the probing away.
3681 *
3682 * @param pvBuf Pointer to the buffer.
3683 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3684 */
3685DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3686{
3687 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3688 /* the first byte */
3689 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3690 ASMProbeReadByte(pu8);
3691
3692 /* the pages in between pages. */
3693 while (cbBuf > RT_ASM_PAGE_SIZE)
3694 {
3695 ASMProbeReadByte(pu8);
3696 cbBuf -= RT_ASM_PAGE_SIZE;
3697 pu8 += RT_ASM_PAGE_SIZE;
3698 }
3699
3700 /* the last byte */
3701 ASMProbeReadByte(pu8 + cbBuf - 1);
3702}
3703
3704
3705
3706/** @defgroup grp_inline_bits Bit Operations
3707 * @{
3708 */
3709
3710
3711/**
3712 * Sets a bit in a bitmap.
3713 *
3714 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
3715 * @param iBit The bit to set.
3716 *
3717 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3718 * However, doing so will yield better performance as well as avoiding
3719 * traps accessing the last bits in the bitmap.
3720 */
3721#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3722DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3723#else
3724DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3725{
3726# if RT_INLINE_ASM_USES_INTRIN
3727 _bittestandset((long *)pvBitmap, iBit);
3728
3729# elif RT_INLINE_ASM_GNU_STYLE
3730 __asm__ __volatile__("btsl %1, %0"
3731 : "=m" (*(volatile long *)pvBitmap)
3732 : "Ir" (iBit),
3733 "m" (*(volatile long *)pvBitmap)
3734 : "memory");
3735# else
3736 __asm
3737 {
3738# ifdef RT_ARCH_AMD64
3739 mov rax, [pvBitmap]
3740 mov edx, [iBit]
3741 bts [rax], edx
3742# else
3743 mov eax, [pvBitmap]
3744 mov edx, [iBit]
3745 bts [eax], edx
3746# endif
3747 }
3748# endif
3749}
3750#endif
3751
3752
3753/**
3754 * Atomically sets a bit in a bitmap, ordered.
3755 *
3756 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3757 * the memory access isn't atomic!
3758 * @param iBit The bit to set.
3759 */
3760#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3761DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3762#else
3763DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3764{
3765 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3766# if RT_INLINE_ASM_USES_INTRIN
3767 _interlockedbittestandset((long *)pvBitmap, iBit);
3768# elif RT_INLINE_ASM_GNU_STYLE
3769 __asm__ __volatile__("lock; btsl %1, %0"
3770 : "=m" (*(volatile long *)pvBitmap)
3771 : "Ir" (iBit),
3772 "m" (*(volatile long *)pvBitmap)
3773 : "memory");
3774# else
3775 __asm
3776 {
3777# ifdef RT_ARCH_AMD64
3778 mov rax, [pvBitmap]
3779 mov edx, [iBit]
3780 lock bts [rax], edx
3781# else
3782 mov eax, [pvBitmap]
3783 mov edx, [iBit]
3784 lock bts [eax], edx
3785# endif
3786 }
3787# endif
3788}
3789#endif
3790
3791
3792/**
3793 * Clears a bit in a bitmap.
3794 *
3795 * @param pvBitmap Pointer to the bitmap.
3796 * @param iBit The bit to clear.
3797 *
3798 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3799 * However, doing so will yield better performance as well as avoiding
3800 * traps accessing the last bits in the bitmap.
3801 */
3802#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3803DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3804#else
3805DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3806{
3807# if RT_INLINE_ASM_USES_INTRIN
3808 _bittestandreset((long *)pvBitmap, iBit);
3809
3810# elif RT_INLINE_ASM_GNU_STYLE
3811 __asm__ __volatile__("btrl %1, %0"
3812 : "=m" (*(volatile long *)pvBitmap)
3813 : "Ir" (iBit),
3814 "m" (*(volatile long *)pvBitmap)
3815 : "memory");
3816# else
3817 __asm
3818 {
3819# ifdef RT_ARCH_AMD64
3820 mov rax, [pvBitmap]
3821 mov edx, [iBit]
3822 btr [rax], edx
3823# else
3824 mov eax, [pvBitmap]
3825 mov edx, [iBit]
3826 btr [eax], edx
3827# endif
3828 }
3829# endif
3830}
3831#endif
3832
3833
3834/**
3835 * Atomically clears a bit in a bitmap, ordered.
3836 *
3837 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3838 * the memory access isn't atomic!
3839 * @param iBit The bit to toggle set.
3840 * @remarks No memory barrier, take care on smp.
3841 */
3842#if RT_INLINE_ASM_EXTERNAL
3843DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3844#else
3845DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3846{
3847 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3848# if RT_INLINE_ASM_GNU_STYLE
3849 __asm__ __volatile__("lock; btrl %1, %0"
3850 : "=m" (*(volatile long *)pvBitmap)
3851 : "Ir" (iBit),
3852 "m" (*(volatile long *)pvBitmap)
3853 : "memory");
3854# else
3855 __asm
3856 {
3857# ifdef RT_ARCH_AMD64
3858 mov rax, [pvBitmap]
3859 mov edx, [iBit]
3860 lock btr [rax], edx
3861# else
3862 mov eax, [pvBitmap]
3863 mov edx, [iBit]
3864 lock btr [eax], edx
3865# endif
3866 }
3867# endif
3868}
3869#endif
3870
3871
3872/**
3873 * Toggles a bit in a bitmap.
3874 *
3875 * @param pvBitmap Pointer to the bitmap.
3876 * @param iBit The bit to toggle.
3877 *
3878 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3879 * However, doing so will yield better performance as well as avoiding
3880 * traps accessing the last bits in the bitmap.
3881 */
3882#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3883DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3884#else
3885DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3886{
3887# if RT_INLINE_ASM_USES_INTRIN
3888 _bittestandcomplement((long *)pvBitmap, iBit);
3889# elif RT_INLINE_ASM_GNU_STYLE
3890 __asm__ __volatile__("btcl %1, %0"
3891 : "=m" (*(volatile long *)pvBitmap)
3892 : "Ir" (iBit),
3893 "m" (*(volatile long *)pvBitmap)
3894 : "memory");
3895# else
3896 __asm
3897 {
3898# ifdef RT_ARCH_AMD64
3899 mov rax, [pvBitmap]
3900 mov edx, [iBit]
3901 btc [rax], edx
3902# else
3903 mov eax, [pvBitmap]
3904 mov edx, [iBit]
3905 btc [eax], edx
3906# endif
3907 }
3908# endif
3909}
3910#endif
3911
3912
3913/**
3914 * Atomically toggles a bit in a bitmap, ordered.
3915 *
3916 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3917 * the memory access isn't atomic!
3918 * @param iBit The bit to test and set.
3919 */
3920#if RT_INLINE_ASM_EXTERNAL
3921DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3922#else
3923DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3924{
3925 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3926# if RT_INLINE_ASM_GNU_STYLE
3927 __asm__ __volatile__("lock; btcl %1, %0"
3928 : "=m" (*(volatile long *)pvBitmap)
3929 : "Ir" (iBit),
3930 "m" (*(volatile long *)pvBitmap)
3931 : "memory");
3932# else
3933 __asm
3934 {
3935# ifdef RT_ARCH_AMD64
3936 mov rax, [pvBitmap]
3937 mov edx, [iBit]
3938 lock btc [rax], edx
3939# else
3940 mov eax, [pvBitmap]
3941 mov edx, [iBit]
3942 lock btc [eax], edx
3943# endif
3944 }
3945# endif
3946}
3947#endif
3948
3949
3950/**
3951 * Tests and sets a bit in a bitmap.
3952 *
3953 * @returns true if the bit was set.
3954 * @returns false if the bit was clear.
3955 *
3956 * @param pvBitmap Pointer to the bitmap.
3957 * @param iBit The bit to test and set.
3958 *
3959 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3960 * However, doing so will yield better performance as well as avoiding
3961 * traps accessing the last bits in the bitmap.
3962 */
3963#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3964DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3965#else
3966DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3967{
3968 union { bool f; uint32_t u32; uint8_t u8; } rc;
3969# if RT_INLINE_ASM_USES_INTRIN
3970 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3971
3972# elif RT_INLINE_ASM_GNU_STYLE
3973 __asm__ __volatile__("btsl %2, %1\n\t"
3974 "setc %b0\n\t"
3975 "andl $1, %0\n\t"
3976 : "=q" (rc.u32),
3977 "=m" (*(volatile long *)pvBitmap)
3978 : "Ir" (iBit),
3979 "m" (*(volatile long *)pvBitmap)
3980 : "memory");
3981# else
3982 __asm
3983 {
3984 mov edx, [iBit]
3985# ifdef RT_ARCH_AMD64
3986 mov rax, [pvBitmap]
3987 bts [rax], edx
3988# else
3989 mov eax, [pvBitmap]
3990 bts [eax], edx
3991# endif
3992 setc al
3993 and eax, 1
3994 mov [rc.u32], eax
3995 }
3996# endif
3997 return rc.f;
3998}
3999#endif
4000
4001
4002/**
4003 * Atomically tests and sets a bit in a bitmap, ordered.
4004 *
4005 * @returns true if the bit was set.
4006 * @returns false if the bit was clear.
4007 *
4008 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4009 * the memory access isn't atomic!
4010 * @param iBit The bit to set.
4011 */
4012#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4013DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4014#else
4015DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4016{
4017 union { bool f; uint32_t u32; uint8_t u8; } rc;
4018 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4019# if RT_INLINE_ASM_USES_INTRIN
4020 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4021# elif RT_INLINE_ASM_GNU_STYLE
4022 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4023 "setc %b0\n\t"
4024 "andl $1, %0\n\t"
4025 : "=q" (rc.u32),
4026 "=m" (*(volatile long *)pvBitmap)
4027 : "Ir" (iBit),
4028 "m" (*(volatile long *)pvBitmap)
4029 : "memory");
4030# else
4031 __asm
4032 {
4033 mov edx, [iBit]
4034# ifdef RT_ARCH_AMD64
4035 mov rax, [pvBitmap]
4036 lock bts [rax], edx
4037# else
4038 mov eax, [pvBitmap]
4039 lock bts [eax], edx
4040# endif
4041 setc al
4042 and eax, 1
4043 mov [rc.u32], eax
4044 }
4045# endif
4046 return rc.f;
4047}
4048#endif
4049
4050
4051/**
4052 * Tests and clears a bit in a bitmap.
4053 *
4054 * @returns true if the bit was set.
4055 * @returns false if the bit was clear.
4056 *
4057 * @param pvBitmap Pointer to the bitmap.
4058 * @param iBit The bit to test and clear.
4059 *
4060 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4061 * However, doing so will yield better performance as well as avoiding
4062 * traps accessing the last bits in the bitmap.
4063 */
4064#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4065DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4066#else
4067DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4068{
4069 union { bool f; uint32_t u32; uint8_t u8; } rc;
4070# if RT_INLINE_ASM_USES_INTRIN
4071 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4072
4073# elif RT_INLINE_ASM_GNU_STYLE
4074 __asm__ __volatile__("btrl %2, %1\n\t"
4075 "setc %b0\n\t"
4076 "andl $1, %0\n\t"
4077 : "=q" (rc.u32),
4078 "=m" (*(volatile long *)pvBitmap)
4079 : "Ir" (iBit),
4080 "m" (*(volatile long *)pvBitmap)
4081 : "memory");
4082# else
4083 __asm
4084 {
4085 mov edx, [iBit]
4086# ifdef RT_ARCH_AMD64
4087 mov rax, [pvBitmap]
4088 btr [rax], edx
4089# else
4090 mov eax, [pvBitmap]
4091 btr [eax], edx
4092# endif
4093 setc al
4094 and eax, 1
4095 mov [rc.u32], eax
4096 }
4097# endif
4098 return rc.f;
4099}
4100#endif
4101
4102
4103/**
4104 * Atomically tests and clears a bit in a bitmap, ordered.
4105 *
4106 * @returns true if the bit was set.
4107 * @returns false if the bit was clear.
4108 *
4109 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4110 * the memory access isn't atomic!
4111 * @param iBit The bit to test and clear.
4112 *
4113 * @remarks No memory barrier, take care on smp.
4114 */
4115#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4116DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4117#else
4118DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4119{
4120 union { bool f; uint32_t u32; uint8_t u8; } rc;
4121 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4122# if RT_INLINE_ASM_USES_INTRIN
4123 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4124
4125# elif RT_INLINE_ASM_GNU_STYLE
4126 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4127 "setc %b0\n\t"
4128 "andl $1, %0\n\t"
4129 : "=q" (rc.u32),
4130 "=m" (*(volatile long *)pvBitmap)
4131 : "Ir" (iBit),
4132 "m" (*(volatile long *)pvBitmap)
4133 : "memory");
4134# else
4135 __asm
4136 {
4137 mov edx, [iBit]
4138# ifdef RT_ARCH_AMD64
4139 mov rax, [pvBitmap]
4140 lock btr [rax], edx
4141# else
4142 mov eax, [pvBitmap]
4143 lock btr [eax], edx
4144# endif
4145 setc al
4146 and eax, 1
4147 mov [rc.u32], eax
4148 }
4149# endif
4150 return rc.f;
4151}
4152#endif
4153
4154
4155/**
4156 * Tests and toggles a bit in a bitmap.
4157 *
4158 * @returns true if the bit was set.
4159 * @returns false if the bit was clear.
4160 *
4161 * @param pvBitmap Pointer to the bitmap.
4162 * @param iBit The bit to test and toggle.
4163 *
4164 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4165 * However, doing so will yield better performance as well as avoiding
4166 * traps accessing the last bits in the bitmap.
4167 */
4168#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4169DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4170#else
4171DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4172{
4173 union { bool f; uint32_t u32; uint8_t u8; } rc;
4174# if RT_INLINE_ASM_USES_INTRIN
4175 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4176
4177# elif RT_INLINE_ASM_GNU_STYLE
4178 __asm__ __volatile__("btcl %2, %1\n\t"
4179 "setc %b0\n\t"
4180 "andl $1, %0\n\t"
4181 : "=q" (rc.u32),
4182 "=m" (*(volatile long *)pvBitmap)
4183 : "Ir" (iBit),
4184 "m" (*(volatile long *)pvBitmap)
4185 : "memory");
4186# else
4187 __asm
4188 {
4189 mov edx, [iBit]
4190# ifdef RT_ARCH_AMD64
4191 mov rax, [pvBitmap]
4192 btc [rax], edx
4193# else
4194 mov eax, [pvBitmap]
4195 btc [eax], edx
4196# endif
4197 setc al
4198 and eax, 1
4199 mov [rc.u32], eax
4200 }
4201# endif
4202 return rc.f;
4203}
4204#endif
4205
4206
4207/**
4208 * Atomically tests and toggles a bit in a bitmap, ordered.
4209 *
4210 * @returns true if the bit was set.
4211 * @returns false if the bit was clear.
4212 *
4213 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4214 * the memory access isn't atomic!
4215 * @param iBit The bit to test and toggle.
4216 */
4217#if RT_INLINE_ASM_EXTERNAL
4218DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4219#else
4220DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4221{
4222 union { bool f; uint32_t u32; uint8_t u8; } rc;
4223 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4224# if RT_INLINE_ASM_GNU_STYLE
4225 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4226 "setc %b0\n\t"
4227 "andl $1, %0\n\t"
4228 : "=q" (rc.u32),
4229 "=m" (*(volatile long *)pvBitmap)
4230 : "Ir" (iBit),
4231 "m" (*(volatile long *)pvBitmap)
4232 : "memory");
4233# else
4234 __asm
4235 {
4236 mov edx, [iBit]
4237# ifdef RT_ARCH_AMD64
4238 mov rax, [pvBitmap]
4239 lock btc [rax], edx
4240# else
4241 mov eax, [pvBitmap]
4242 lock btc [eax], edx
4243# endif
4244 setc al
4245 and eax, 1
4246 mov [rc.u32], eax
4247 }
4248# endif
4249 return rc.f;
4250}
4251#endif
4252
4253
4254/**
4255 * Tests if a bit in a bitmap is set.
4256 *
4257 * @returns true if the bit is set.
4258 * @returns false if the bit is clear.
4259 *
4260 * @param pvBitmap Pointer to the bitmap.
4261 * @param iBit The bit to test.
4262 *
4263 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4264 * However, doing so will yield better performance as well as avoiding
4265 * traps accessing the last bits in the bitmap.
4266 */
4267#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4268DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4269#else
4270DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4271{
4272 union { bool f; uint32_t u32; uint8_t u8; } rc;
4273# if RT_INLINE_ASM_USES_INTRIN
4274 rc.u32 = _bittest((long *)pvBitmap, iBit);
4275# elif RT_INLINE_ASM_GNU_STYLE
4276
4277 __asm__ __volatile__("btl %2, %1\n\t"
4278 "setc %b0\n\t"
4279 "andl $1, %0\n\t"
4280 : "=q" (rc.u32)
4281 : "m" (*(const volatile long *)pvBitmap),
4282 "Ir" (iBit)
4283 : "memory");
4284# else
4285 __asm
4286 {
4287 mov edx, [iBit]
4288# ifdef RT_ARCH_AMD64
4289 mov rax, [pvBitmap]
4290 bt [rax], edx
4291# else
4292 mov eax, [pvBitmap]
4293 bt [eax], edx
4294# endif
4295 setc al
4296 and eax, 1
4297 mov [rc.u32], eax
4298 }
4299# endif
4300 return rc.f;
4301}
4302#endif
4303
4304
4305/**
4306 * Clears a bit range within a bitmap.
4307 *
4308 * @param pvBitmap Pointer to the bitmap.
4309 * @param iBitStart The First bit to clear.
4310 * @param iBitEnd The first bit not to clear.
4311 */
4312DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4313{
4314 if (iBitStart < iBitEnd)
4315 {
4316 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4317 int iStart = iBitStart & ~31;
4318 int iEnd = iBitEnd & ~31;
4319 if (iStart == iEnd)
4320 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4321 else
4322 {
4323 /* bits in first dword. */
4324 if (iBitStart & 31)
4325 {
4326 *pu32 &= (1 << (iBitStart & 31)) - 1;
4327 pu32++;
4328 iBitStart = iStart + 32;
4329 }
4330
4331 /* whole dword. */
4332 if (iBitStart != iEnd)
4333 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4334
4335 /* bits in last dword. */
4336 if (iBitEnd & 31)
4337 {
4338 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4339 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4340 }
4341 }
4342 }
4343}
4344
4345
4346/**
4347 * Sets a bit range within a bitmap.
4348 *
4349 * @param pvBitmap Pointer to the bitmap.
4350 * @param iBitStart The First bit to set.
4351 * @param iBitEnd The first bit not to set.
4352 */
4353DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4354{
4355 if (iBitStart < iBitEnd)
4356 {
4357 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4358 int iStart = iBitStart & ~31;
4359 int iEnd = iBitEnd & ~31;
4360 if (iStart == iEnd)
4361 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
4362 else
4363 {
4364 /* bits in first dword. */
4365 if (iBitStart & 31)
4366 {
4367 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
4368 pu32++;
4369 iBitStart = iStart + 32;
4370 }
4371
4372 /* whole dword. */
4373 if (iBitStart != iEnd)
4374 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4375
4376 /* bits in last dword. */
4377 if (iBitEnd & 31)
4378 {
4379 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4380 *pu32 |= (1 << (iBitEnd & 31)) - 1;
4381 }
4382 }
4383 }
4384}
4385
4386
4387/**
4388 * Finds the first clear bit in a bitmap.
4389 *
4390 * @returns Index of the first zero bit.
4391 * @returns -1 if no clear bit was found.
4392 * @param pvBitmap Pointer to the bitmap.
4393 * @param cBits The number of bits in the bitmap. Multiple of 32.
4394 */
4395#if RT_INLINE_ASM_EXTERNAL
4396DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4397#else
4398DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4399{
4400 if (cBits)
4401 {
4402 int32_t iBit;
4403# if RT_INLINE_ASM_GNU_STYLE
4404 RTCCUINTREG uEAX, uECX, uEDI;
4405 cBits = RT_ALIGN_32(cBits, 32);
4406 __asm__ __volatile__("repe; scasl\n\t"
4407 "je 1f\n\t"
4408# ifdef RT_ARCH_AMD64
4409 "lea -4(%%rdi), %%rdi\n\t"
4410 "xorl (%%rdi), %%eax\n\t"
4411 "subq %5, %%rdi\n\t"
4412# else
4413 "lea -4(%%edi), %%edi\n\t"
4414 "xorl (%%edi), %%eax\n\t"
4415 "subl %5, %%edi\n\t"
4416# endif
4417 "shll $3, %%edi\n\t"
4418 "bsfl %%eax, %%edx\n\t"
4419 "addl %%edi, %%edx\n\t"
4420 "1:\t\n"
4421 : "=d" (iBit),
4422 "=&c" (uECX),
4423 "=&D" (uEDI),
4424 "=&a" (uEAX)
4425 : "0" (0xffffffff),
4426 "mr" (pvBitmap),
4427 "1" (cBits >> 5),
4428 "2" (pvBitmap),
4429 "3" (0xffffffff));
4430# else
4431 cBits = RT_ALIGN_32(cBits, 32);
4432 __asm
4433 {
4434# ifdef RT_ARCH_AMD64
4435 mov rdi, [pvBitmap]
4436 mov rbx, rdi
4437# else
4438 mov edi, [pvBitmap]
4439 mov ebx, edi
4440# endif
4441 mov edx, 0ffffffffh
4442 mov eax, edx
4443 mov ecx, [cBits]
4444 shr ecx, 5
4445 repe scasd
4446 je done
4447
4448# ifdef RT_ARCH_AMD64
4449 lea rdi, [rdi - 4]
4450 xor eax, [rdi]
4451 sub rdi, rbx
4452# else
4453 lea edi, [edi - 4]
4454 xor eax, [edi]
4455 sub edi, ebx
4456# endif
4457 shl edi, 3
4458 bsf edx, eax
4459 add edx, edi
4460 done:
4461 mov [iBit], edx
4462 }
4463# endif
4464 return iBit;
4465 }
4466 return -1;
4467}
4468#endif
4469
4470
4471/**
4472 * Finds the next clear bit in a bitmap.
4473 *
4474 * @returns Index of the first zero bit.
4475 * @returns -1 if no clear bit was found.
4476 * @param pvBitmap Pointer to the bitmap.
4477 * @param cBits The number of bits in the bitmap. Multiple of 32.
4478 * @param iBitPrev The bit returned from the last search.
4479 * The search will start at iBitPrev + 1.
4480 */
4481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4482DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4483#else
4484DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4485{
4486 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4487 int iBit = ++iBitPrev & 31;
4488 if (iBit)
4489 {
4490 /*
4491 * Inspect the 32-bit word containing the unaligned bit.
4492 */
4493 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4494
4495# if RT_INLINE_ASM_USES_INTRIN
4496 unsigned long ulBit = 0;
4497 if (_BitScanForward(&ulBit, u32))
4498 return ulBit + iBitPrev;
4499# else
4500# if RT_INLINE_ASM_GNU_STYLE
4501 __asm__ __volatile__("bsf %1, %0\n\t"
4502 "jnz 1f\n\t"
4503 "movl $-1, %0\n\t"
4504 "1:\n\t"
4505 : "=r" (iBit)
4506 : "r" (u32));
4507# else
4508 __asm
4509 {
4510 mov edx, [u32]
4511 bsf eax, edx
4512 jnz done
4513 mov eax, 0ffffffffh
4514 done:
4515 mov [iBit], eax
4516 }
4517# endif
4518 if (iBit >= 0)
4519 return iBit + iBitPrev;
4520# endif
4521
4522 /*
4523 * Skip ahead and see if there is anything left to search.
4524 */
4525 iBitPrev |= 31;
4526 iBitPrev++;
4527 if (cBits <= (uint32_t)iBitPrev)
4528 return -1;
4529 }
4530
4531 /*
4532 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4533 */
4534 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4535 if (iBit >= 0)
4536 iBit += iBitPrev;
4537 return iBit;
4538}
4539#endif
4540
4541
4542/**
4543 * Finds the first set bit in a bitmap.
4544 *
4545 * @returns Index of the first set bit.
4546 * @returns -1 if no clear bit was found.
4547 * @param pvBitmap Pointer to the bitmap.
4548 * @param cBits The number of bits in the bitmap. Multiple of 32.
4549 */
4550#if RT_INLINE_ASM_EXTERNAL
4551DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4552#else
4553DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4554{
4555 if (cBits)
4556 {
4557 int32_t iBit;
4558# if RT_INLINE_ASM_GNU_STYLE
4559 RTCCUINTREG uEAX, uECX, uEDI;
4560 cBits = RT_ALIGN_32(cBits, 32);
4561 __asm__ __volatile__("repe; scasl\n\t"
4562 "je 1f\n\t"
4563# ifdef RT_ARCH_AMD64
4564 "lea -4(%%rdi), %%rdi\n\t"
4565 "movl (%%rdi), %%eax\n\t"
4566 "subq %5, %%rdi\n\t"
4567# else
4568 "lea -4(%%edi), %%edi\n\t"
4569 "movl (%%edi), %%eax\n\t"
4570 "subl %5, %%edi\n\t"
4571# endif
4572 "shll $3, %%edi\n\t"
4573 "bsfl %%eax, %%edx\n\t"
4574 "addl %%edi, %%edx\n\t"
4575 "1:\t\n"
4576 : "=d" (iBit),
4577 "=&c" (uECX),
4578 "=&D" (uEDI),
4579 "=&a" (uEAX)
4580 : "0" (0xffffffff),
4581 "mr" (pvBitmap),
4582 "1" (cBits >> 5),
4583 "2" (pvBitmap),
4584 "3" (0));
4585# else
4586 cBits = RT_ALIGN_32(cBits, 32);
4587 __asm
4588 {
4589# ifdef RT_ARCH_AMD64
4590 mov rdi, [pvBitmap]
4591 mov rbx, rdi
4592# else
4593 mov edi, [pvBitmap]
4594 mov ebx, edi
4595# endif
4596 mov edx, 0ffffffffh
4597 xor eax, eax
4598 mov ecx, [cBits]
4599 shr ecx, 5
4600 repe scasd
4601 je done
4602# ifdef RT_ARCH_AMD64
4603 lea rdi, [rdi - 4]
4604 mov eax, [rdi]
4605 sub rdi, rbx
4606# else
4607 lea edi, [edi - 4]
4608 mov eax, [edi]
4609 sub edi, ebx
4610# endif
4611 shl edi, 3
4612 bsf edx, eax
4613 add edx, edi
4614 done:
4615 mov [iBit], edx
4616 }
4617# endif
4618 return iBit;
4619 }
4620 return -1;
4621}
4622#endif
4623
4624
4625/**
4626 * Finds the next set bit in a bitmap.
4627 *
4628 * @returns Index of the next set bit.
4629 * @returns -1 if no set bit was found.
4630 * @param pvBitmap Pointer to the bitmap.
4631 * @param cBits The number of bits in the bitmap. Multiple of 32.
4632 * @param iBitPrev The bit returned from the last search.
4633 * The search will start at iBitPrev + 1.
4634 */
4635#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4636DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4637#else
4638DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4639{
4640 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4641 int iBit = ++iBitPrev & 31;
4642 if (iBit)
4643 {
4644 /*
4645 * Inspect the 32-bit word containing the unaligned bit.
4646 */
4647 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
4648
4649# if RT_INLINE_ASM_USES_INTRIN
4650 unsigned long ulBit = 0;
4651 if (_BitScanForward(&ulBit, u32))
4652 return ulBit + iBitPrev;
4653# else
4654# if RT_INLINE_ASM_GNU_STYLE
4655 __asm__ __volatile__("bsf %1, %0\n\t"
4656 "jnz 1f\n\t"
4657 "movl $-1, %0\n\t"
4658 "1:\n\t"
4659 : "=r" (iBit)
4660 : "r" (u32));
4661# else
4662 __asm
4663 {
4664 mov edx, [u32]
4665 bsf eax, edx
4666 jnz done
4667 mov eax, 0ffffffffh
4668 done:
4669 mov [iBit], eax
4670 }
4671# endif
4672 if (iBit >= 0)
4673 return iBit + iBitPrev;
4674# endif
4675
4676 /*
4677 * Skip ahead and see if there is anything left to search.
4678 */
4679 iBitPrev |= 31;
4680 iBitPrev++;
4681 if (cBits <= (uint32_t)iBitPrev)
4682 return -1;
4683 }
4684
4685 /*
4686 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4687 */
4688 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4689 if (iBit >= 0)
4690 iBit += iBitPrev;
4691 return iBit;
4692}
4693#endif
4694
4695
4696/**
4697 * Finds the first bit which is set in the given 32-bit integer.
4698 * Bits are numbered from 1 (least significant) to 32.
4699 *
4700 * @returns index [1..32] of the first set bit.
4701 * @returns 0 if all bits are cleared.
4702 * @param u32 Integer to search for set bits.
4703 * @remark Similar to ffs() in BSD.
4704 */
4705#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4706DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
4707#else
4708DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4709{
4710# if RT_INLINE_ASM_USES_INTRIN
4711 unsigned long iBit;
4712 if (_BitScanForward(&iBit, u32))
4713 iBit++;
4714 else
4715 iBit = 0;
4716# elif RT_INLINE_ASM_GNU_STYLE
4717 uint32_t iBit;
4718 __asm__ __volatile__("bsf %1, %0\n\t"
4719 "jnz 1f\n\t"
4720 "xorl %0, %0\n\t"
4721 "jmp 2f\n"
4722 "1:\n\t"
4723 "incl %0\n"
4724 "2:\n\t"
4725 : "=r" (iBit)
4726 : "rm" (u32));
4727# else
4728 uint32_t iBit;
4729 _asm
4730 {
4731 bsf eax, [u32]
4732 jnz found
4733 xor eax, eax
4734 jmp done
4735 found:
4736 inc eax
4737 done:
4738 mov [iBit], eax
4739 }
4740# endif
4741 return iBit;
4742}
4743#endif
4744
4745
4746/**
4747 * Finds the first bit which is set in the given 32-bit integer.
4748 * Bits are numbered from 1 (least significant) to 32.
4749 *
4750 * @returns index [1..32] of the first set bit.
4751 * @returns 0 if all bits are cleared.
4752 * @param i32 Integer to search for set bits.
4753 * @remark Similar to ffs() in BSD.
4754 */
4755DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4756{
4757 return ASMBitFirstSetU32((uint32_t)i32);
4758}
4759
4760
4761/**
4762 * Finds the last bit which is set in the given 32-bit integer.
4763 * Bits are numbered from 1 (least significant) to 32.
4764 *
4765 * @returns index [1..32] of the last set bit.
4766 * @returns 0 if all bits are cleared.
4767 * @param u32 Integer to search for set bits.
4768 * @remark Similar to fls() in BSD.
4769 */
4770#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4771DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
4772#else
4773DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4774{
4775# if RT_INLINE_ASM_USES_INTRIN
4776 unsigned long iBit;
4777 if (_BitScanReverse(&iBit, u32))
4778 iBit++;
4779 else
4780 iBit = 0;
4781# elif RT_INLINE_ASM_GNU_STYLE
4782 uint32_t iBit;
4783 __asm__ __volatile__("bsrl %1, %0\n\t"
4784 "jnz 1f\n\t"
4785 "xorl %0, %0\n\t"
4786 "jmp 2f\n"
4787 "1:\n\t"
4788 "incl %0\n"
4789 "2:\n\t"
4790 : "=r" (iBit)
4791 : "rm" (u32));
4792# else
4793 uint32_t iBit;
4794 _asm
4795 {
4796 bsr eax, [u32]
4797 jnz found
4798 xor eax, eax
4799 jmp done
4800 found:
4801 inc eax
4802 done:
4803 mov [iBit], eax
4804 }
4805# endif
4806 return iBit;
4807}
4808#endif
4809
4810
4811/**
4812 * Finds the last bit which is set in the given 32-bit integer.
4813 * Bits are numbered from 1 (least significant) to 32.
4814 *
4815 * @returns index [1..32] of the last set bit.
4816 * @returns 0 if all bits are cleared.
4817 * @param i32 Integer to search for set bits.
4818 * @remark Similar to fls() in BSD.
4819 */
4820DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4821{
4822 return ASMBitLastSetU32((uint32_t)i32);
4823}
4824
4825/**
4826 * Reverse the byte order of the given 16-bit integer.
4827 *
4828 * @returns Revert
4829 * @param u16 16-bit integer value.
4830 */
4831#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4832DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
4833#else
4834DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
4835{
4836# if RT_INLINE_ASM_USES_INTRIN
4837 u16 = _byteswap_ushort(u16);
4838# elif RT_INLINE_ASM_GNU_STYLE
4839 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
4840# else
4841 _asm
4842 {
4843 mov ax, [u16]
4844 ror ax, 8
4845 mov [u16], ax
4846 }
4847# endif
4848 return u16;
4849}
4850#endif
4851
4852
4853/**
4854 * Reverse the byte order of the given 32-bit integer.
4855 *
4856 * @returns Revert
4857 * @param u32 32-bit integer value.
4858 */
4859#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4860DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
4861#else
4862DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4863{
4864# if RT_INLINE_ASM_USES_INTRIN
4865 u32 = _byteswap_ulong(u32);
4866# elif RT_INLINE_ASM_GNU_STYLE
4867 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4868# else
4869 _asm
4870 {
4871 mov eax, [u32]
4872 bswap eax
4873 mov [u32], eax
4874 }
4875# endif
4876 return u32;
4877}
4878#endif
4879
4880
4881/**
4882 * Reverse the byte order of the given 64-bit integer.
4883 *
4884 * @returns Revert
4885 * @param u64 64-bit integer value.
4886 */
4887DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
4888{
4889#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
4890 u64 = _byteswap_uint64(u64);
4891#else
4892 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
4893 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
4894#endif
4895 return u64;
4896}
4897
4898
4899/**
4900 * Rotate 32-bit unsigned value to the left by @a cShift.
4901 *
4902 * @returns Rotated value.
4903 * @param u32 The value to rotate.
4904 * @param cShift How many bits to rotate by.
4905 */
4906DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
4907{
4908#if RT_INLINE_ASM_USES_INTRIN
4909 return _rotl(u32, cShift);
4910#elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
4911 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
4912 return u32;
4913#else
4914 cShift &= 31;
4915 return (u32 << cShift) | (u32 >> (32 - cShift));
4916#endif
4917}
4918
4919
4920/**
4921 * Rotate 32-bit unsigned value to the right by @a cShift.
4922 *
4923 * @returns Rotated value.
4924 * @param u32 The value to rotate.
4925 * @param cShift How many bits to rotate by.
4926 */
4927DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
4928{
4929#if RT_INLINE_ASM_USES_INTRIN
4930 return _rotr(u32, cShift);
4931#elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
4932 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
4933 return u32;
4934#else
4935 cShift &= 31;
4936 return (u32 >> cShift) | (u32 << (32 - cShift));
4937#endif
4938}
4939
4940
4941/**
4942 * Rotate 64-bit unsigned value to the left by @a cShift.
4943 *
4944 * @returns Rotated value.
4945 * @param u64 The value to rotate.
4946 * @param cShift How many bits to rotate by.
4947 */
4948DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
4949{
4950#if RT_INLINE_ASM_USES_INTRIN
4951 return _rotl64(u64, cShift);
4952#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4953 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
4954 return u64;
4955#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
4956 uint32_t uSpill;
4957 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
4958 "jz 1f\n\t"
4959 "xchgl %%eax, %%edx\n\t"
4960 "1:\n\t"
4961 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
4962 "jz 2f\n\t"
4963 "movl %%edx, %2\n\t" /* save the hi value in %3. */
4964 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
4965 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
4966 "2:\n\t" /* } */
4967 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
4968 : "0" (u64),
4969 "1" (cShift));
4970 return u64;
4971#else
4972 cShift &= 63;
4973 return (u64 << cShift) | (u64 >> (64 - cShift));
4974#endif
4975}
4976
4977
4978/**
4979 * Rotate 64-bit unsigned value to the right by @a cShift.
4980 *
4981 * @returns Rotated value.
4982 * @param u64 The value to rotate.
4983 * @param cShift How many bits to rotate by.
4984 */
4985DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
4986{
4987#if RT_INLINE_ASM_USES_INTRIN
4988 return _rotr64(u64, cShift);
4989#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4990 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
4991 return u64;
4992#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
4993 uint32_t uSpill;
4994 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
4995 "jz 1f\n\t"
4996 "xchgl %%eax, %%edx\n\t"
4997 "1:\n\t"
4998 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
4999 "jz 2f\n\t"
5000 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5001 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5002 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5003 "2:\n\t" /* } */
5004 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5005 : "0" (u64),
5006 "1" (cShift));
5007 return u64;
5008#else
5009 cShift &= 63;
5010 return (u64 >> cShift) | (u64 << (64 - cShift));
5011#endif
5012}
5013
5014/** @} */
5015
5016
5017/** @} */
5018
5019#endif
5020
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette