VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 56009

Last change on this file since 56009 was 55949, checked in by vboxsync, 10 years ago

iprt/asm.h: Replaced two RT_UNLIKELY occurances with RT_LIKELY.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 142.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2012 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# pragma intrinsic(_rotl)
69# pragma intrinsic(_rotr)
70# pragma intrinsic(_rotl64)
71# pragma intrinsic(_rotr64)
72# ifdef RT_ARCH_AMD64
73# pragma intrinsic(__stosq)
74# pragma intrinsic(_byteswap_uint64)
75# pragma intrinsic(_InterlockedExchange64)
76# pragma intrinsic(_InterlockedExchangeAdd64)
77# pragma intrinsic(_InterlockedAnd64)
78# pragma intrinsic(_InterlockedOr64)
79# pragma intrinsic(_InterlockedIncrement64)
80# pragma intrinsic(_InterlockedDecrement64)
81# endif
82#endif
83
84
85/** @defgroup grp_rt_asm ASM - Assembly Routines
86 * @ingroup grp_rt
87 *
88 * @remarks The difference between ordered and unordered atomic operations are that
89 * the former will complete outstanding reads and writes before continuing
90 * while the latter doesn't make any promises about the order. Ordered
91 * operations doesn't, it seems, make any 100% promise wrt to whether
92 * the operation will complete before any subsequent memory access.
93 * (please, correct if wrong.)
94 *
95 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
96 * are unordered (note the Uo).
97 *
98 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
99 * or even optimize assembler instructions away. For instance, in the following code
100 * the second rdmsr instruction is optimized away because gcc treats that instruction
101 * as deterministic:
102 *
103 * @code
104 * static inline uint64_t rdmsr_low(int idx)
105 * {
106 * uint32_t low;
107 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
108 * }
109 * ...
110 * uint32_t msr1 = rdmsr_low(1);
111 * foo(msr1);
112 * msr1 = rdmsr_low(1);
113 * bar(msr1);
114 * @endcode
115 *
116 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
117 * use the result of the first call as input parameter for bar() as well. For rdmsr this
118 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
119 * machine status information in general.
120 *
121 * @{
122 */
123
124
125/** @def RT_INLINE_ASM_GCC_4_3_X_X86
126 * Used to work around some 4.3.x register allocation issues in this version of
127 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
128#ifdef __GNUC__
129# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
130#endif
131#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
132# define RT_INLINE_ASM_GCC_4_3_X_X86 0
133#endif
134
135/** @def RT_INLINE_DONT_USE_CMPXCHG8B
136 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
137 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
138 * mode, x86.
139 *
140 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
141 * when in PIC mode on x86.
142 */
143#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
144# ifdef DOXYGEN_RUNNING
145# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
146# else
147# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
148 ( (defined(PIC) || defined(__PIC__)) \
149 && defined(RT_ARCH_X86) \
150 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
151 || defined(RT_OS_DARWIN)) )
152# endif
153#endif
154
155
156/** @def ASMReturnAddress
157 * Gets the return address of the current (or calling if you like) function or method.
158 */
159#ifdef _MSC_VER
160# ifdef __cplusplus
161extern "C"
162# endif
163void * _ReturnAddress(void);
164# pragma intrinsic(_ReturnAddress)
165# define ASMReturnAddress() _ReturnAddress()
166#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
167# define ASMReturnAddress() __builtin_return_address(0)
168#else
169# error "Unsupported compiler."
170#endif
171
172
173/**
174 * Compiler memory barrier.
175 *
176 * Ensure that the compiler does not use any cached (register/tmp stack) memory
177 * values or any outstanding writes when returning from this function.
178 *
179 * This function must be used if non-volatile data is modified by a
180 * device or the VMM. Typical cases are port access, MMIO access,
181 * trapping instruction, etc.
182 */
183#if RT_INLINE_ASM_GNU_STYLE
184# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
185#elif RT_INLINE_ASM_USES_INTRIN
186# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
187#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
188DECLINLINE(void) ASMCompilerBarrier(void)
189{
190 __asm
191 {
192 }
193}
194#endif
195
196
197/** @def ASMBreakpoint
198 * Debugger Breakpoint.
199 * @deprecated Use RT_BREAKPOINT instead.
200 * @internal
201 */
202#define ASMBreakpoint() RT_BREAKPOINT()
203
204
205/**
206 * Spinloop hint for platforms that have these, empty function on the other
207 * platforms.
208 *
209 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
210 * spin locks.
211 */
212#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
213DECLASM(void) ASMNopPause(void);
214#else
215DECLINLINE(void) ASMNopPause(void)
216{
217# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
218# if RT_INLINE_ASM_GNU_STYLE
219 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
220# else
221 __asm {
222 _emit 0f3h
223 _emit 090h
224 }
225# endif
226# else
227 /* dummy */
228# endif
229}
230#endif
231
232
233/**
234 * Atomically Exchange an unsigned 8-bit value, ordered.
235 *
236 * @returns Current *pu8 value
237 * @param pu8 Pointer to the 8-bit variable to update.
238 * @param u8 The 8-bit value to assign to *pu8.
239 */
240#if RT_INLINE_ASM_EXTERNAL
241DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
242#else
243DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
244{
245# if RT_INLINE_ASM_GNU_STYLE
246 __asm__ __volatile__("xchgb %0, %1\n\t"
247 : "=m" (*pu8),
248 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
249 : "1" (u8),
250 "m" (*pu8));
251# else
252 __asm
253 {
254# ifdef RT_ARCH_AMD64
255 mov rdx, [pu8]
256 mov al, [u8]
257 xchg [rdx], al
258 mov [u8], al
259# else
260 mov edx, [pu8]
261 mov al, [u8]
262 xchg [edx], al
263 mov [u8], al
264# endif
265 }
266# endif
267 return u8;
268}
269#endif
270
271
272/**
273 * Atomically Exchange a signed 8-bit value, ordered.
274 *
275 * @returns Current *pu8 value
276 * @param pi8 Pointer to the 8-bit variable to update.
277 * @param i8 The 8-bit value to assign to *pi8.
278 */
279DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
280{
281 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
282}
283
284
285/**
286 * Atomically Exchange a bool value, ordered.
287 *
288 * @returns Current *pf value
289 * @param pf Pointer to the 8-bit variable to update.
290 * @param f The 8-bit value to assign to *pi8.
291 */
292DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
293{
294#ifdef _MSC_VER
295 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
296#else
297 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
298#endif
299}
300
301
302/**
303 * Atomically Exchange an unsigned 16-bit value, ordered.
304 *
305 * @returns Current *pu16 value
306 * @param pu16 Pointer to the 16-bit variable to update.
307 * @param u16 The 16-bit value to assign to *pu16.
308 */
309#if RT_INLINE_ASM_EXTERNAL
310DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
311#else
312DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
313{
314# if RT_INLINE_ASM_GNU_STYLE
315 __asm__ __volatile__("xchgw %0, %1\n\t"
316 : "=m" (*pu16),
317 "=r" (u16)
318 : "1" (u16),
319 "m" (*pu16));
320# else
321 __asm
322 {
323# ifdef RT_ARCH_AMD64
324 mov rdx, [pu16]
325 mov ax, [u16]
326 xchg [rdx], ax
327 mov [u16], ax
328# else
329 mov edx, [pu16]
330 mov ax, [u16]
331 xchg [edx], ax
332 mov [u16], ax
333# endif
334 }
335# endif
336 return u16;
337}
338#endif
339
340
341/**
342 * Atomically Exchange a signed 16-bit value, ordered.
343 *
344 * @returns Current *pu16 value
345 * @param pi16 Pointer to the 16-bit variable to update.
346 * @param i16 The 16-bit value to assign to *pi16.
347 */
348DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
349{
350 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
351}
352
353
354/**
355 * Atomically Exchange an unsigned 32-bit value, ordered.
356 *
357 * @returns Current *pu32 value
358 * @param pu32 Pointer to the 32-bit variable to update.
359 * @param u32 The 32-bit value to assign to *pu32.
360 */
361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
362DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
363#else
364DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
365{
366# if RT_INLINE_ASM_GNU_STYLE
367 __asm__ __volatile__("xchgl %0, %1\n\t"
368 : "=m" (*pu32),
369 "=r" (u32)
370 : "1" (u32),
371 "m" (*pu32));
372
373# elif RT_INLINE_ASM_USES_INTRIN
374 u32 = _InterlockedExchange((long *)pu32, u32);
375
376# else
377 __asm
378 {
379# ifdef RT_ARCH_AMD64
380 mov rdx, [pu32]
381 mov eax, u32
382 xchg [rdx], eax
383 mov [u32], eax
384# else
385 mov edx, [pu32]
386 mov eax, u32
387 xchg [edx], eax
388 mov [u32], eax
389# endif
390 }
391# endif
392 return u32;
393}
394#endif
395
396
397/**
398 * Atomically Exchange a signed 32-bit value, ordered.
399 *
400 * @returns Current *pu32 value
401 * @param pi32 Pointer to the 32-bit variable to update.
402 * @param i32 The 32-bit value to assign to *pi32.
403 */
404DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
405{
406 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
407}
408
409
410/**
411 * Atomically Exchange an unsigned 64-bit value, ordered.
412 *
413 * @returns Current *pu64 value
414 * @param pu64 Pointer to the 64-bit variable to update.
415 * @param u64 The 64-bit value to assign to *pu64.
416 */
417#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
418 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
419DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
420#else
421DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
422{
423# if defined(RT_ARCH_AMD64)
424# if RT_INLINE_ASM_USES_INTRIN
425 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
426
427# elif RT_INLINE_ASM_GNU_STYLE
428 __asm__ __volatile__("xchgq %0, %1\n\t"
429 : "=m" (*pu64),
430 "=r" (u64)
431 : "1" (u64),
432 "m" (*pu64));
433# else
434 __asm
435 {
436 mov rdx, [pu64]
437 mov rax, [u64]
438 xchg [rdx], rax
439 mov [u64], rax
440 }
441# endif
442# else /* !RT_ARCH_AMD64 */
443# if RT_INLINE_ASM_GNU_STYLE
444# if defined(PIC) || defined(__PIC__)
445 uint32_t u32EBX = (uint32_t)u64;
446 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
447 "xchgl %%ebx, %3\n\t"
448 "1:\n\t"
449 "lock; cmpxchg8b (%5)\n\t"
450 "jnz 1b\n\t"
451 "movl %3, %%ebx\n\t"
452 /*"xchgl %%esi, %5\n\t"*/
453 : "=A" (u64),
454 "=m" (*pu64)
455 : "0" (*pu64),
456 "m" ( u32EBX ),
457 "c" ( (uint32_t)(u64 >> 32) ),
458 "S" (pu64));
459# else /* !PIC */
460 __asm__ __volatile__("1:\n\t"
461 "lock; cmpxchg8b %1\n\t"
462 "jnz 1b\n\t"
463 : "=A" (u64),
464 "=m" (*pu64)
465 : "0" (*pu64),
466 "b" ( (uint32_t)u64 ),
467 "c" ( (uint32_t)(u64 >> 32) ));
468# endif
469# else
470 __asm
471 {
472 mov ebx, dword ptr [u64]
473 mov ecx, dword ptr [u64 + 4]
474 mov edi, pu64
475 mov eax, dword ptr [edi]
476 mov edx, dword ptr [edi + 4]
477 retry:
478 lock cmpxchg8b [edi]
479 jnz retry
480 mov dword ptr [u64], eax
481 mov dword ptr [u64 + 4], edx
482 }
483# endif
484# endif /* !RT_ARCH_AMD64 */
485 return u64;
486}
487#endif
488
489
490/**
491 * Atomically Exchange an signed 64-bit value, ordered.
492 *
493 * @returns Current *pi64 value
494 * @param pi64 Pointer to the 64-bit variable to update.
495 * @param i64 The 64-bit value to assign to *pi64.
496 */
497DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
498{
499 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
500}
501
502
503/**
504 * Atomically Exchange a pointer value, ordered.
505 *
506 * @returns Current *ppv value
507 * @param ppv Pointer to the pointer variable to update.
508 * @param pv The pointer value to assign to *ppv.
509 */
510DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
511{
512#if ARCH_BITS == 32
513 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
514#elif ARCH_BITS == 64
515 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
516#else
517# error "ARCH_BITS is bogus"
518#endif
519}
520
521
522/**
523 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
524 *
525 * @returns Current *pv value
526 * @param ppv Pointer to the pointer variable to update.
527 * @param pv The pointer value to assign to *ppv.
528 * @param Type The type of *ppv, sans volatile.
529 */
530#ifdef __GNUC__
531# define ASMAtomicXchgPtrT(ppv, pv, Type) \
532 __extension__ \
533 ({\
534 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
535 Type const pvTypeChecked = (pv); \
536 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
537 pvTypeCheckedRet; \
538 })
539#else
540# define ASMAtomicXchgPtrT(ppv, pv, Type) \
541 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
542#endif
543
544
545/**
546 * Atomically Exchange a raw-mode context pointer value, ordered.
547 *
548 * @returns Current *ppv value
549 * @param ppvRC Pointer to the pointer variable to update.
550 * @param pvRC The pointer value to assign to *ppv.
551 */
552DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
553{
554 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
555}
556
557
558/**
559 * Atomically Exchange a ring-0 pointer value, ordered.
560 *
561 * @returns Current *ppv value
562 * @param ppvR0 Pointer to the pointer variable to update.
563 * @param pvR0 The pointer value to assign to *ppv.
564 */
565DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
566{
567#if R0_ARCH_BITS == 32
568 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
569#elif R0_ARCH_BITS == 64
570 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
571#else
572# error "R0_ARCH_BITS is bogus"
573#endif
574}
575
576
577/**
578 * Atomically Exchange a ring-3 pointer value, ordered.
579 *
580 * @returns Current *ppv value
581 * @param ppvR3 Pointer to the pointer variable to update.
582 * @param pvR3 The pointer value to assign to *ppv.
583 */
584DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
585{
586#if R3_ARCH_BITS == 32
587 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
588#elif R3_ARCH_BITS == 64
589 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
590#else
591# error "R3_ARCH_BITS is bogus"
592#endif
593}
594
595
596/** @def ASMAtomicXchgHandle
597 * Atomically Exchange a typical IPRT handle value, ordered.
598 *
599 * @param ph Pointer to the value to update.
600 * @param hNew The new value to assigned to *pu.
601 * @param phRes Where to store the current *ph value.
602 *
603 * @remarks This doesn't currently work for all handles (like RTFILE).
604 */
605#if HC_ARCH_BITS == 32
606# define ASMAtomicXchgHandle(ph, hNew, phRes) \
607 do { \
608 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
609 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
610 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
611 } while (0)
612#elif HC_ARCH_BITS == 64
613# define ASMAtomicXchgHandle(ph, hNew, phRes) \
614 do { \
615 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
616 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
617 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
618 } while (0)
619#else
620# error HC_ARCH_BITS
621#endif
622
623
624/**
625 * Atomically Exchange a value which size might differ
626 * between platforms or compilers, ordered.
627 *
628 * @param pu Pointer to the variable to update.
629 * @param uNew The value to assign to *pu.
630 * @todo This is busted as its missing the result argument.
631 */
632#define ASMAtomicXchgSize(pu, uNew) \
633 do { \
634 switch (sizeof(*(pu))) { \
635 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
636 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
637 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
638 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
639 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
640 } \
641 } while (0)
642
643/**
644 * Atomically Exchange a value which size might differ
645 * between platforms or compilers, ordered.
646 *
647 * @param pu Pointer to the variable to update.
648 * @param uNew The value to assign to *pu.
649 * @param puRes Where to store the current *pu value.
650 */
651#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
652 do { \
653 switch (sizeof(*(pu))) { \
654 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
655 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
656 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
657 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
658 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
659 } \
660 } while (0)
661
662
663
664/**
665 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
666 *
667 * @returns true if xchg was done.
668 * @returns false if xchg wasn't done.
669 *
670 * @param pu8 Pointer to the value to update.
671 * @param u8New The new value to assigned to *pu8.
672 * @param u8Old The old value to *pu8 compare with.
673 */
674#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
675DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
676#else
677DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
678{
679 uint8_t u8Ret;
680 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
681 "setz %1\n\t"
682 : "=m" (*pu8),
683 "=qm" (u8Ret),
684 "=a" (u8Old)
685 : "q" (u8New),
686 "2" (u8Old),
687 "m" (*pu8));
688 return (bool)u8Ret;
689}
690#endif
691
692
693/**
694 * Atomically Compare and Exchange a signed 8-bit value, ordered.
695 *
696 * @returns true if xchg was done.
697 * @returns false if xchg wasn't done.
698 *
699 * @param pi8 Pointer to the value to update.
700 * @param i8New The new value to assigned to *pi8.
701 * @param i8Old The old value to *pi8 compare with.
702 */
703DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
704{
705 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
706}
707
708
709/**
710 * Atomically Compare and Exchange a bool value, ordered.
711 *
712 * @returns true if xchg was done.
713 * @returns false if xchg wasn't done.
714 *
715 * @param pf Pointer to the value to update.
716 * @param fNew The new value to assigned to *pf.
717 * @param fOld The old value to *pf compare with.
718 */
719DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
720{
721 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
722}
723
724
725/**
726 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
727 *
728 * @returns true if xchg was done.
729 * @returns false if xchg wasn't done.
730 *
731 * @param pu32 Pointer to the value to update.
732 * @param u32New The new value to assigned to *pu32.
733 * @param u32Old The old value to *pu32 compare with.
734 */
735#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
736DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
737#else
738DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
739{
740# if RT_INLINE_ASM_GNU_STYLE
741 uint8_t u8Ret;
742 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
743 "setz %1\n\t"
744 : "=m" (*pu32),
745 "=qm" (u8Ret),
746 "=a" (u32Old)
747 : "r" (u32New),
748 "2" (u32Old),
749 "m" (*pu32));
750 return (bool)u8Ret;
751
752# elif RT_INLINE_ASM_USES_INTRIN
753 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
754
755# else
756 uint32_t u32Ret;
757 __asm
758 {
759# ifdef RT_ARCH_AMD64
760 mov rdx, [pu32]
761# else
762 mov edx, [pu32]
763# endif
764 mov eax, [u32Old]
765 mov ecx, [u32New]
766# ifdef RT_ARCH_AMD64
767 lock cmpxchg [rdx], ecx
768# else
769 lock cmpxchg [edx], ecx
770# endif
771 setz al
772 movzx eax, al
773 mov [u32Ret], eax
774 }
775 return !!u32Ret;
776# endif
777}
778#endif
779
780
781/**
782 * Atomically Compare and Exchange a signed 32-bit value, ordered.
783 *
784 * @returns true if xchg was done.
785 * @returns false if xchg wasn't done.
786 *
787 * @param pi32 Pointer to the value to update.
788 * @param i32New The new value to assigned to *pi32.
789 * @param i32Old The old value to *pi32 compare with.
790 */
791DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
792{
793 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
794}
795
796
797/**
798 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
799 *
800 * @returns true if xchg was done.
801 * @returns false if xchg wasn't done.
802 *
803 * @param pu64 Pointer to the 64-bit variable to update.
804 * @param u64New The 64-bit value to assign to *pu64.
805 * @param u64Old The value to compare with.
806 */
807#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
808 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
809DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
810#else
811DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
812{
813# if RT_INLINE_ASM_USES_INTRIN
814 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
815
816# elif defined(RT_ARCH_AMD64)
817# if RT_INLINE_ASM_GNU_STYLE
818 uint8_t u8Ret;
819 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
820 "setz %1\n\t"
821 : "=m" (*pu64),
822 "=qm" (u8Ret),
823 "=a" (u64Old)
824 : "r" (u64New),
825 "2" (u64Old),
826 "m" (*pu64));
827 return (bool)u8Ret;
828# else
829 bool fRet;
830 __asm
831 {
832 mov rdx, [pu32]
833 mov rax, [u64Old]
834 mov rcx, [u64New]
835 lock cmpxchg [rdx], rcx
836 setz al
837 mov [fRet], al
838 }
839 return fRet;
840# endif
841# else /* !RT_ARCH_AMD64 */
842 uint32_t u32Ret;
843# if RT_INLINE_ASM_GNU_STYLE
844# if defined(PIC) || defined(__PIC__)
845 uint32_t u32EBX = (uint32_t)u64New;
846 uint32_t u32Spill;
847 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
848 "lock; cmpxchg8b (%6)\n\t"
849 "setz %%al\n\t"
850 "movl %4, %%ebx\n\t"
851 "movzbl %%al, %%eax\n\t"
852 : "=a" (u32Ret),
853 "=d" (u32Spill),
854# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
855 "+m" (*pu64)
856# else
857 "=m" (*pu64)
858# endif
859 : "A" (u64Old),
860 "m" ( u32EBX ),
861 "c" ( (uint32_t)(u64New >> 32) ),
862 "S" (pu64));
863# else /* !PIC */
864 uint32_t u32Spill;
865 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
866 "setz %%al\n\t"
867 "movzbl %%al, %%eax\n\t"
868 : "=a" (u32Ret),
869 "=d" (u32Spill),
870 "+m" (*pu64)
871 : "A" (u64Old),
872 "b" ( (uint32_t)u64New ),
873 "c" ( (uint32_t)(u64New >> 32) ));
874# endif
875 return (bool)u32Ret;
876# else
877 __asm
878 {
879 mov ebx, dword ptr [u64New]
880 mov ecx, dword ptr [u64New + 4]
881 mov edi, [pu64]
882 mov eax, dword ptr [u64Old]
883 mov edx, dword ptr [u64Old + 4]
884 lock cmpxchg8b [edi]
885 setz al
886 movzx eax, al
887 mov dword ptr [u32Ret], eax
888 }
889 return !!u32Ret;
890# endif
891# endif /* !RT_ARCH_AMD64 */
892}
893#endif
894
895
896/**
897 * Atomically Compare and exchange a signed 64-bit value, ordered.
898 *
899 * @returns true if xchg was done.
900 * @returns false if xchg wasn't done.
901 *
902 * @param pi64 Pointer to the 64-bit variable to update.
903 * @param i64 The 64-bit value to assign to *pu64.
904 * @param i64Old The value to compare with.
905 */
906DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
907{
908 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
909}
910
911
912/**
913 * Atomically Compare and Exchange a pointer value, ordered.
914 *
915 * @returns true if xchg was done.
916 * @returns false if xchg wasn't done.
917 *
918 * @param ppv Pointer to the value to update.
919 * @param pvNew The new value to assigned to *ppv.
920 * @param pvOld The old value to *ppv compare with.
921 */
922DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
923{
924#if ARCH_BITS == 32
925 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
926#elif ARCH_BITS == 64
927 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
928#else
929# error "ARCH_BITS is bogus"
930#endif
931}
932
933
934/**
935 * Atomically Compare and Exchange a pointer value, ordered.
936 *
937 * @returns true if xchg was done.
938 * @returns false if xchg wasn't done.
939 *
940 * @param ppv Pointer to the value to update.
941 * @param pvNew The new value to assigned to *ppv.
942 * @param pvOld The old value to *ppv compare with.
943 *
944 * @remarks This is relatively type safe on GCC platforms.
945 */
946#ifdef __GNUC__
947# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
948 __extension__ \
949 ({\
950 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
951 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
952 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
953 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
954 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
955 fMacroRet; \
956 })
957#else
958# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
959 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
960#endif
961
962
963/** @def ASMAtomicCmpXchgHandle
964 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
965 *
966 * @param ph Pointer to the value to update.
967 * @param hNew The new value to assigned to *pu.
968 * @param hOld The old value to *pu compare with.
969 * @param fRc Where to store the result.
970 *
971 * @remarks This doesn't currently work for all handles (like RTFILE).
972 */
973#if HC_ARCH_BITS == 32
974# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
975 do { \
976 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
977 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
978 } while (0)
979#elif HC_ARCH_BITS == 64
980# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
981 do { \
982 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
983 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
984 } while (0)
985#else
986# error HC_ARCH_BITS
987#endif
988
989
990/** @def ASMAtomicCmpXchgSize
991 * Atomically Compare and Exchange a value which size might differ
992 * between platforms or compilers, ordered.
993 *
994 * @param pu Pointer to the value to update.
995 * @param uNew The new value to assigned to *pu.
996 * @param uOld The old value to *pu compare with.
997 * @param fRc Where to store the result.
998 */
999#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1000 do { \
1001 switch (sizeof(*(pu))) { \
1002 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1003 break; \
1004 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1005 break; \
1006 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1007 (fRc) = false; \
1008 break; \
1009 } \
1010 } while (0)
1011
1012
1013/**
1014 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1015 * passes back old value, ordered.
1016 *
1017 * @returns true if xchg was done.
1018 * @returns false if xchg wasn't done.
1019 *
1020 * @param pu32 Pointer to the value to update.
1021 * @param u32New The new value to assigned to *pu32.
1022 * @param u32Old The old value to *pu32 compare with.
1023 * @param pu32Old Pointer store the old value at.
1024 */
1025#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1026DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1027#else
1028DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1029{
1030# if RT_INLINE_ASM_GNU_STYLE
1031 uint8_t u8Ret;
1032 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1033 "setz %1\n\t"
1034 : "=m" (*pu32),
1035 "=qm" (u8Ret),
1036 "=a" (*pu32Old)
1037 : "r" (u32New),
1038 "a" (u32Old),
1039 "m" (*pu32));
1040 return (bool)u8Ret;
1041
1042# elif RT_INLINE_ASM_USES_INTRIN
1043 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1044
1045# else
1046 uint32_t u32Ret;
1047 __asm
1048 {
1049# ifdef RT_ARCH_AMD64
1050 mov rdx, [pu32]
1051# else
1052 mov edx, [pu32]
1053# endif
1054 mov eax, [u32Old]
1055 mov ecx, [u32New]
1056# ifdef RT_ARCH_AMD64
1057 lock cmpxchg [rdx], ecx
1058 mov rdx, [pu32Old]
1059 mov [rdx], eax
1060# else
1061 lock cmpxchg [edx], ecx
1062 mov edx, [pu32Old]
1063 mov [edx], eax
1064# endif
1065 setz al
1066 movzx eax, al
1067 mov [u32Ret], eax
1068 }
1069 return !!u32Ret;
1070# endif
1071}
1072#endif
1073
1074
1075/**
1076 * Atomically Compare and Exchange a signed 32-bit value, additionally
1077 * passes back old value, ordered.
1078 *
1079 * @returns true if xchg was done.
1080 * @returns false if xchg wasn't done.
1081 *
1082 * @param pi32 Pointer to the value to update.
1083 * @param i32New The new value to assigned to *pi32.
1084 * @param i32Old The old value to *pi32 compare with.
1085 * @param pi32Old Pointer store the old value at.
1086 */
1087DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1088{
1089 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1090}
1091
1092
1093/**
1094 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1095 * passing back old value, ordered.
1096 *
1097 * @returns true if xchg was done.
1098 * @returns false if xchg wasn't done.
1099 *
1100 * @param pu64 Pointer to the 64-bit variable to update.
1101 * @param u64New The 64-bit value to assign to *pu64.
1102 * @param u64Old The value to compare with.
1103 * @param pu64Old Pointer store the old value at.
1104 */
1105#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1106 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1107DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1108#else
1109DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1110{
1111# if RT_INLINE_ASM_USES_INTRIN
1112 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1113
1114# elif defined(RT_ARCH_AMD64)
1115# if RT_INLINE_ASM_GNU_STYLE
1116 uint8_t u8Ret;
1117 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1118 "setz %1\n\t"
1119 : "=m" (*pu64),
1120 "=qm" (u8Ret),
1121 "=a" (*pu64Old)
1122 : "r" (u64New),
1123 "a" (u64Old),
1124 "m" (*pu64));
1125 return (bool)u8Ret;
1126# else
1127 bool fRet;
1128 __asm
1129 {
1130 mov rdx, [pu32]
1131 mov rax, [u64Old]
1132 mov rcx, [u64New]
1133 lock cmpxchg [rdx], rcx
1134 mov rdx, [pu64Old]
1135 mov [rdx], rax
1136 setz al
1137 mov [fRet], al
1138 }
1139 return fRet;
1140# endif
1141# else /* !RT_ARCH_AMD64 */
1142# if RT_INLINE_ASM_GNU_STYLE
1143 uint64_t u64Ret;
1144# if defined(PIC) || defined(__PIC__)
1145 /* NB: this code uses a memory clobber description, because the clean
1146 * solution with an output value for *pu64 makes gcc run out of registers.
1147 * This will cause suboptimal code, and anyone with a better solution is
1148 * welcome to improve this. */
1149 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1150 "lock; cmpxchg8b %3\n\t"
1151 "xchgl %%ebx, %1\n\t"
1152 : "=A" (u64Ret)
1153 : "DS" ((uint32_t)u64New),
1154 "c" ((uint32_t)(u64New >> 32)),
1155 "m" (*pu64),
1156 "0" (u64Old)
1157 : "memory" );
1158# else /* !PIC */
1159 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1160 : "=A" (u64Ret),
1161 "=m" (*pu64)
1162 : "b" ((uint32_t)u64New),
1163 "c" ((uint32_t)(u64New >> 32)),
1164 "m" (*pu64),
1165 "0" (u64Old));
1166# endif
1167 *pu64Old = u64Ret;
1168 return u64Ret == u64Old;
1169# else
1170 uint32_t u32Ret;
1171 __asm
1172 {
1173 mov ebx, dword ptr [u64New]
1174 mov ecx, dword ptr [u64New + 4]
1175 mov edi, [pu64]
1176 mov eax, dword ptr [u64Old]
1177 mov edx, dword ptr [u64Old + 4]
1178 lock cmpxchg8b [edi]
1179 mov ebx, [pu64Old]
1180 mov [ebx], eax
1181 setz al
1182 movzx eax, al
1183 add ebx, 4
1184 mov [ebx], edx
1185 mov dword ptr [u32Ret], eax
1186 }
1187 return !!u32Ret;
1188# endif
1189# endif /* !RT_ARCH_AMD64 */
1190}
1191#endif
1192
1193
1194/**
1195 * Atomically Compare and exchange a signed 64-bit value, additionally
1196 * passing back old value, ordered.
1197 *
1198 * @returns true if xchg was done.
1199 * @returns false if xchg wasn't done.
1200 *
1201 * @param pi64 Pointer to the 64-bit variable to update.
1202 * @param i64 The 64-bit value to assign to *pu64.
1203 * @param i64Old The value to compare with.
1204 * @param pi64Old Pointer store the old value at.
1205 */
1206DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1207{
1208 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1209}
1210
1211/** @def ASMAtomicCmpXchgExHandle
1212 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1213 *
1214 * @param ph Pointer to the value to update.
1215 * @param hNew The new value to assigned to *pu.
1216 * @param hOld The old value to *pu compare with.
1217 * @param fRc Where to store the result.
1218 * @param phOldVal Pointer to where to store the old value.
1219 *
1220 * @remarks This doesn't currently work for all handles (like RTFILE).
1221 */
1222#if HC_ARCH_BITS == 32
1223# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1224 do { \
1225 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1226 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1227 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1228 } while (0)
1229#elif HC_ARCH_BITS == 64
1230# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1231 do { \
1232 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1233 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1234 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1235 } while (0)
1236#else
1237# error HC_ARCH_BITS
1238#endif
1239
1240
1241/** @def ASMAtomicCmpXchgExSize
1242 * Atomically Compare and Exchange a value which size might differ
1243 * between platforms or compilers. Additionally passes back old value.
1244 *
1245 * @param pu Pointer to the value to update.
1246 * @param uNew The new value to assigned to *pu.
1247 * @param uOld The old value to *pu compare with.
1248 * @param fRc Where to store the result.
1249 * @param puOldVal Pointer to where to store the old value.
1250 */
1251#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1252 do { \
1253 switch (sizeof(*(pu))) { \
1254 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1255 break; \
1256 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1257 break; \
1258 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1259 (fRc) = false; \
1260 (uOldVal) = 0; \
1261 break; \
1262 } \
1263 } while (0)
1264
1265
1266/**
1267 * Atomically Compare and Exchange a pointer value, additionally
1268 * passing back old value, ordered.
1269 *
1270 * @returns true if xchg was done.
1271 * @returns false if xchg wasn't done.
1272 *
1273 * @param ppv Pointer to the value to update.
1274 * @param pvNew The new value to assigned to *ppv.
1275 * @param pvOld The old value to *ppv compare with.
1276 * @param ppvOld Pointer store the old value at.
1277 */
1278DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1279{
1280#if ARCH_BITS == 32
1281 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1282#elif ARCH_BITS == 64
1283 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1284#else
1285# error "ARCH_BITS is bogus"
1286#endif
1287}
1288
1289
1290/**
1291 * Atomically Compare and Exchange a pointer value, additionally
1292 * passing back old value, ordered.
1293 *
1294 * @returns true if xchg was done.
1295 * @returns false if xchg wasn't done.
1296 *
1297 * @param ppv Pointer to the value to update.
1298 * @param pvNew The new value to assigned to *ppv.
1299 * @param pvOld The old value to *ppv compare with.
1300 * @param ppvOld Pointer store the old value at.
1301 *
1302 * @remarks This is relatively type safe on GCC platforms.
1303 */
1304#ifdef __GNUC__
1305# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1306 __extension__ \
1307 ({\
1308 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1309 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1310 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1311 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1312 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1313 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1314 (void **)ppvOldTypeChecked); \
1315 fMacroRet; \
1316 })
1317#else
1318# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1319 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1320#endif
1321
1322
1323/**
1324 * Serialize Instruction.
1325 */
1326#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1327DECLASM(void) ASMSerializeInstruction(void);
1328#else
1329DECLINLINE(void) ASMSerializeInstruction(void)
1330{
1331# if RT_INLINE_ASM_GNU_STYLE
1332 RTCCUINTREG xAX = 0;
1333# ifdef RT_ARCH_AMD64
1334 __asm__ ("cpuid"
1335 : "=a" (xAX)
1336 : "0" (xAX)
1337 : "rbx", "rcx", "rdx");
1338# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1339 __asm__ ("push %%ebx\n\t"
1340 "cpuid\n\t"
1341 "pop %%ebx\n\t"
1342 : "=a" (xAX)
1343 : "0" (xAX)
1344 : "ecx", "edx");
1345# else
1346 __asm__ ("cpuid"
1347 : "=a" (xAX)
1348 : "0" (xAX)
1349 : "ebx", "ecx", "edx");
1350# endif
1351
1352# elif RT_INLINE_ASM_USES_INTRIN
1353 int aInfo[4];
1354 __cpuid(aInfo, 0);
1355
1356# else
1357 __asm
1358 {
1359 push ebx
1360 xor eax, eax
1361 cpuid
1362 pop ebx
1363 }
1364# endif
1365}
1366#endif
1367
1368
1369/**
1370 * Memory fence, waits for any pending writes and reads to complete.
1371 */
1372DECLINLINE(void) ASMMemoryFence(void)
1373{
1374 /** @todo use mfence? check if all cpus we care for support it. */
1375 uint32_t volatile u32;
1376 ASMAtomicXchgU32(&u32, 0);
1377}
1378
1379
1380/**
1381 * Write fence, waits for any pending writes to complete.
1382 */
1383DECLINLINE(void) ASMWriteFence(void)
1384{
1385 /** @todo use sfence? check if all cpus we care for support it. */
1386 ASMMemoryFence();
1387}
1388
1389
1390/**
1391 * Read fence, waits for any pending reads to complete.
1392 */
1393DECLINLINE(void) ASMReadFence(void)
1394{
1395 /** @todo use lfence? check if all cpus we care for support it. */
1396 ASMMemoryFence();
1397}
1398
1399
1400/**
1401 * Atomically reads an unsigned 8-bit value, ordered.
1402 *
1403 * @returns Current *pu8 value
1404 * @param pu8 Pointer to the 8-bit variable to read.
1405 */
1406DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1407{
1408 ASMMemoryFence();
1409 return *pu8; /* byte reads are atomic on x86 */
1410}
1411
1412
1413/**
1414 * Atomically reads an unsigned 8-bit value, unordered.
1415 *
1416 * @returns Current *pu8 value
1417 * @param pu8 Pointer to the 8-bit variable to read.
1418 */
1419DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1420{
1421 return *pu8; /* byte reads are atomic on x86 */
1422}
1423
1424
1425/**
1426 * Atomically reads a signed 8-bit value, ordered.
1427 *
1428 * @returns Current *pi8 value
1429 * @param pi8 Pointer to the 8-bit variable to read.
1430 */
1431DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1432{
1433 ASMMemoryFence();
1434 return *pi8; /* byte reads are atomic on x86 */
1435}
1436
1437
1438/**
1439 * Atomically reads a signed 8-bit value, unordered.
1440 *
1441 * @returns Current *pi8 value
1442 * @param pi8 Pointer to the 8-bit variable to read.
1443 */
1444DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1445{
1446 return *pi8; /* byte reads are atomic on x86 */
1447}
1448
1449
1450/**
1451 * Atomically reads an unsigned 16-bit value, ordered.
1452 *
1453 * @returns Current *pu16 value
1454 * @param pu16 Pointer to the 16-bit variable to read.
1455 */
1456DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1457{
1458 ASMMemoryFence();
1459 Assert(!((uintptr_t)pu16 & 1));
1460 return *pu16;
1461}
1462
1463
1464/**
1465 * Atomically reads an unsigned 16-bit value, unordered.
1466 *
1467 * @returns Current *pu16 value
1468 * @param pu16 Pointer to the 16-bit variable to read.
1469 */
1470DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1471{
1472 Assert(!((uintptr_t)pu16 & 1));
1473 return *pu16;
1474}
1475
1476
1477/**
1478 * Atomically reads a signed 16-bit value, ordered.
1479 *
1480 * @returns Current *pi16 value
1481 * @param pi16 Pointer to the 16-bit variable to read.
1482 */
1483DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1484{
1485 ASMMemoryFence();
1486 Assert(!((uintptr_t)pi16 & 1));
1487 return *pi16;
1488}
1489
1490
1491/**
1492 * Atomically reads a signed 16-bit value, unordered.
1493 *
1494 * @returns Current *pi16 value
1495 * @param pi16 Pointer to the 16-bit variable to read.
1496 */
1497DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1498{
1499 Assert(!((uintptr_t)pi16 & 1));
1500 return *pi16;
1501}
1502
1503
1504/**
1505 * Atomically reads an unsigned 32-bit value, ordered.
1506 *
1507 * @returns Current *pu32 value
1508 * @param pu32 Pointer to the 32-bit variable to read.
1509 */
1510DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1511{
1512 ASMMemoryFence();
1513 Assert(!((uintptr_t)pu32 & 3));
1514 return *pu32;
1515}
1516
1517
1518/**
1519 * Atomically reads an unsigned 32-bit value, unordered.
1520 *
1521 * @returns Current *pu32 value
1522 * @param pu32 Pointer to the 32-bit variable to read.
1523 */
1524DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1525{
1526 Assert(!((uintptr_t)pu32 & 3));
1527 return *pu32;
1528}
1529
1530
1531/**
1532 * Atomically reads a signed 32-bit value, ordered.
1533 *
1534 * @returns Current *pi32 value
1535 * @param pi32 Pointer to the 32-bit variable to read.
1536 */
1537DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1538{
1539 ASMMemoryFence();
1540 Assert(!((uintptr_t)pi32 & 3));
1541 return *pi32;
1542}
1543
1544
1545/**
1546 * Atomically reads a signed 32-bit value, unordered.
1547 *
1548 * @returns Current *pi32 value
1549 * @param pi32 Pointer to the 32-bit variable to read.
1550 */
1551DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1552{
1553 Assert(!((uintptr_t)pi32 & 3));
1554 return *pi32;
1555}
1556
1557
1558/**
1559 * Atomically reads an unsigned 64-bit value, ordered.
1560 *
1561 * @returns Current *pu64 value
1562 * @param pu64 Pointer to the 64-bit variable to read.
1563 * The memory pointed to must be writable.
1564 * @remark This will fault if the memory is read-only!
1565 */
1566#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1567 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1568DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1569#else
1570DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1571{
1572 uint64_t u64;
1573# ifdef RT_ARCH_AMD64
1574 Assert(!((uintptr_t)pu64 & 7));
1575/*# if RT_INLINE_ASM_GNU_STYLE
1576 __asm__ __volatile__( "mfence\n\t"
1577 "movq %1, %0\n\t"
1578 : "=r" (u64)
1579 : "m" (*pu64));
1580# else
1581 __asm
1582 {
1583 mfence
1584 mov rdx, [pu64]
1585 mov rax, [rdx]
1586 mov [u64], rax
1587 }
1588# endif*/
1589 ASMMemoryFence();
1590 u64 = *pu64;
1591# else /* !RT_ARCH_AMD64 */
1592# if RT_INLINE_ASM_GNU_STYLE
1593# if defined(PIC) || defined(__PIC__)
1594 uint32_t u32EBX = 0;
1595 Assert(!((uintptr_t)pu64 & 7));
1596 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1597 "lock; cmpxchg8b (%5)\n\t"
1598 "movl %3, %%ebx\n\t"
1599 : "=A" (u64),
1600# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1601 "+m" (*pu64)
1602# else
1603 "=m" (*pu64)
1604# endif
1605 : "0" (0ULL),
1606 "m" (u32EBX),
1607 "c" (0),
1608 "S" (pu64));
1609# else /* !PIC */
1610 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1611 : "=A" (u64),
1612 "+m" (*pu64)
1613 : "0" (0ULL),
1614 "b" (0),
1615 "c" (0));
1616# endif
1617# else
1618 Assert(!((uintptr_t)pu64 & 7));
1619 __asm
1620 {
1621 xor eax, eax
1622 xor edx, edx
1623 mov edi, pu64
1624 xor ecx, ecx
1625 xor ebx, ebx
1626 lock cmpxchg8b [edi]
1627 mov dword ptr [u64], eax
1628 mov dword ptr [u64 + 4], edx
1629 }
1630# endif
1631# endif /* !RT_ARCH_AMD64 */
1632 return u64;
1633}
1634#endif
1635
1636
1637/**
1638 * Atomically reads an unsigned 64-bit value, unordered.
1639 *
1640 * @returns Current *pu64 value
1641 * @param pu64 Pointer to the 64-bit variable to read.
1642 * The memory pointed to must be writable.
1643 * @remark This will fault if the memory is read-only!
1644 */
1645#if !defined(RT_ARCH_AMD64) \
1646 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1647 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1648DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1649#else
1650DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1651{
1652 uint64_t u64;
1653# ifdef RT_ARCH_AMD64
1654 Assert(!((uintptr_t)pu64 & 7));
1655/*# if RT_INLINE_ASM_GNU_STYLE
1656 Assert(!((uintptr_t)pu64 & 7));
1657 __asm__ __volatile__("movq %1, %0\n\t"
1658 : "=r" (u64)
1659 : "m" (*pu64));
1660# else
1661 __asm
1662 {
1663 mov rdx, [pu64]
1664 mov rax, [rdx]
1665 mov [u64], rax
1666 }
1667# endif */
1668 u64 = *pu64;
1669# else /* !RT_ARCH_AMD64 */
1670# if RT_INLINE_ASM_GNU_STYLE
1671# if defined(PIC) || defined(__PIC__)
1672 uint32_t u32EBX = 0;
1673 uint32_t u32Spill;
1674 Assert(!((uintptr_t)pu64 & 7));
1675 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1676 "xor %%ecx,%%ecx\n\t"
1677 "xor %%edx,%%edx\n\t"
1678 "xchgl %%ebx, %3\n\t"
1679 "lock; cmpxchg8b (%4)\n\t"
1680 "movl %3, %%ebx\n\t"
1681 : "=A" (u64),
1682# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1683 "+m" (*pu64),
1684# else
1685 "=m" (*pu64),
1686# endif
1687 "=c" (u32Spill)
1688 : "m" (u32EBX),
1689 "S" (pu64));
1690# else /* !PIC */
1691 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1692 : "=A" (u64),
1693 "+m" (*pu64)
1694 : "0" (0ULL),
1695 "b" (0),
1696 "c" (0));
1697# endif
1698# else
1699 Assert(!((uintptr_t)pu64 & 7));
1700 __asm
1701 {
1702 xor eax, eax
1703 xor edx, edx
1704 mov edi, pu64
1705 xor ecx, ecx
1706 xor ebx, ebx
1707 lock cmpxchg8b [edi]
1708 mov dword ptr [u64], eax
1709 mov dword ptr [u64 + 4], edx
1710 }
1711# endif
1712# endif /* !RT_ARCH_AMD64 */
1713 return u64;
1714}
1715#endif
1716
1717
1718/**
1719 * Atomically reads a signed 64-bit value, ordered.
1720 *
1721 * @returns Current *pi64 value
1722 * @param pi64 Pointer to the 64-bit variable to read.
1723 * The memory pointed to must be writable.
1724 * @remark This will fault if the memory is read-only!
1725 */
1726DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1727{
1728 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1729}
1730
1731
1732/**
1733 * Atomically reads a signed 64-bit value, unordered.
1734 *
1735 * @returns Current *pi64 value
1736 * @param pi64 Pointer to the 64-bit variable to read.
1737 * The memory pointed to must be writable.
1738 * @remark This will fault if the memory is read-only!
1739 */
1740DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1741{
1742 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1743}
1744
1745
1746/**
1747 * Atomically reads a size_t value, ordered.
1748 *
1749 * @returns Current *pcb value
1750 * @param pcb Pointer to the size_t variable to read.
1751 */
1752DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1753{
1754#if ARCH_BITS == 64
1755 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1756#elif ARCH_BITS == 32
1757 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1758#else
1759# error "Unsupported ARCH_BITS value"
1760#endif
1761}
1762
1763
1764/**
1765 * Atomically reads a size_t value, unordered.
1766 *
1767 * @returns Current *pcb value
1768 * @param pcb Pointer to the size_t variable to read.
1769 */
1770DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1771{
1772#if ARCH_BITS == 64
1773 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1774#elif ARCH_BITS == 32
1775 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1776#else
1777# error "Unsupported ARCH_BITS value"
1778#endif
1779}
1780
1781
1782/**
1783 * Atomically reads a pointer value, ordered.
1784 *
1785 * @returns Current *pv value
1786 * @param ppv Pointer to the pointer variable to read.
1787 *
1788 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1789 * requires less typing (no casts).
1790 */
1791DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1792{
1793#if ARCH_BITS == 32
1794 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1795#elif ARCH_BITS == 64
1796 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1797#else
1798# error "ARCH_BITS is bogus"
1799#endif
1800}
1801
1802/**
1803 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1804 *
1805 * @returns Current *pv value
1806 * @param ppv Pointer to the pointer variable to read.
1807 * @param Type The type of *ppv, sans volatile.
1808 */
1809#ifdef __GNUC__
1810# define ASMAtomicReadPtrT(ppv, Type) \
1811 __extension__ \
1812 ({\
1813 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1814 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1815 pvTypeChecked; \
1816 })
1817#else
1818# define ASMAtomicReadPtrT(ppv, Type) \
1819 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1820#endif
1821
1822
1823/**
1824 * Atomically reads a pointer value, unordered.
1825 *
1826 * @returns Current *pv value
1827 * @param ppv Pointer to the pointer variable to read.
1828 *
1829 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1830 * requires less typing (no casts).
1831 */
1832DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1833{
1834#if ARCH_BITS == 32
1835 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1836#elif ARCH_BITS == 64
1837 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1838#else
1839# error "ARCH_BITS is bogus"
1840#endif
1841}
1842
1843
1844/**
1845 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
1846 *
1847 * @returns Current *pv value
1848 * @param ppv Pointer to the pointer variable to read.
1849 * @param Type The type of *ppv, sans volatile.
1850 */
1851#ifdef __GNUC__
1852# define ASMAtomicUoReadPtrT(ppv, Type) \
1853 __extension__ \
1854 ({\
1855 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1856 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
1857 pvTypeChecked; \
1858 })
1859#else
1860# define ASMAtomicUoReadPtrT(ppv, Type) \
1861 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
1862#endif
1863
1864
1865/**
1866 * Atomically reads a boolean value, ordered.
1867 *
1868 * @returns Current *pf value
1869 * @param pf Pointer to the boolean variable to read.
1870 */
1871DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
1872{
1873 ASMMemoryFence();
1874 return *pf; /* byte reads are atomic on x86 */
1875}
1876
1877
1878/**
1879 * Atomically reads a boolean value, unordered.
1880 *
1881 * @returns Current *pf value
1882 * @param pf Pointer to the boolean variable to read.
1883 */
1884DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
1885{
1886 return *pf; /* byte reads are atomic on x86 */
1887}
1888
1889
1890/**
1891 * Atomically read a typical IPRT handle value, ordered.
1892 *
1893 * @param ph Pointer to the handle variable to read.
1894 * @param phRes Where to store the result.
1895 *
1896 * @remarks This doesn't currently work for all handles (like RTFILE).
1897 */
1898#if HC_ARCH_BITS == 32
1899# define ASMAtomicReadHandle(ph, phRes) \
1900 do { \
1901 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1902 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1903 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
1904 } while (0)
1905#elif HC_ARCH_BITS == 64
1906# define ASMAtomicReadHandle(ph, phRes) \
1907 do { \
1908 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1909 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1910 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
1911 } while (0)
1912#else
1913# error HC_ARCH_BITS
1914#endif
1915
1916
1917/**
1918 * Atomically read a typical IPRT handle value, unordered.
1919 *
1920 * @param ph Pointer to the handle variable to read.
1921 * @param phRes Where to store the result.
1922 *
1923 * @remarks This doesn't currently work for all handles (like RTFILE).
1924 */
1925#if HC_ARCH_BITS == 32
1926# define ASMAtomicUoReadHandle(ph, phRes) \
1927 do { \
1928 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1929 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1930 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
1931 } while (0)
1932#elif HC_ARCH_BITS == 64
1933# define ASMAtomicUoReadHandle(ph, phRes) \
1934 do { \
1935 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1936 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1937 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
1938 } while (0)
1939#else
1940# error HC_ARCH_BITS
1941#endif
1942
1943
1944/**
1945 * Atomically read a value which size might differ
1946 * between platforms or compilers, ordered.
1947 *
1948 * @param pu Pointer to the variable to read.
1949 * @param puRes Where to store the result.
1950 */
1951#define ASMAtomicReadSize(pu, puRes) \
1952 do { \
1953 switch (sizeof(*(pu))) { \
1954 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1955 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
1956 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
1957 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
1958 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1959 } \
1960 } while (0)
1961
1962
1963/**
1964 * Atomically read a value which size might differ
1965 * between platforms or compilers, unordered.
1966 *
1967 * @param pu Pointer to the variable to read.
1968 * @param puRes Where to store the result.
1969 */
1970#define ASMAtomicUoReadSize(pu, puRes) \
1971 do { \
1972 switch (sizeof(*(pu))) { \
1973 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1974 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
1975 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
1976 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
1977 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1978 } \
1979 } while (0)
1980
1981
1982/**
1983 * Atomically writes an unsigned 8-bit value, ordered.
1984 *
1985 * @param pu8 Pointer to the 8-bit variable.
1986 * @param u8 The 8-bit value to assign to *pu8.
1987 */
1988DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
1989{
1990 ASMAtomicXchgU8(pu8, u8);
1991}
1992
1993
1994/**
1995 * Atomically writes an unsigned 8-bit value, unordered.
1996 *
1997 * @param pu8 Pointer to the 8-bit variable.
1998 * @param u8 The 8-bit value to assign to *pu8.
1999 */
2000DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2001{
2002 *pu8 = u8; /* byte writes are atomic on x86 */
2003}
2004
2005
2006/**
2007 * Atomically writes a signed 8-bit value, ordered.
2008 *
2009 * @param pi8 Pointer to the 8-bit variable to read.
2010 * @param i8 The 8-bit value to assign to *pi8.
2011 */
2012DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2013{
2014 ASMAtomicXchgS8(pi8, i8);
2015}
2016
2017
2018/**
2019 * Atomically writes a signed 8-bit value, unordered.
2020 *
2021 * @param pi8 Pointer to the 8-bit variable to write.
2022 * @param i8 The 8-bit value to assign to *pi8.
2023 */
2024DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2025{
2026 *pi8 = i8; /* byte writes are atomic on x86 */
2027}
2028
2029
2030/**
2031 * Atomically writes an unsigned 16-bit value, ordered.
2032 *
2033 * @param pu16 Pointer to the 16-bit variable to write.
2034 * @param u16 The 16-bit value to assign to *pu16.
2035 */
2036DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2037{
2038 ASMAtomicXchgU16(pu16, u16);
2039}
2040
2041
2042/**
2043 * Atomically writes an unsigned 16-bit value, unordered.
2044 *
2045 * @param pu16 Pointer to the 16-bit variable to write.
2046 * @param u16 The 16-bit value to assign to *pu16.
2047 */
2048DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2049{
2050 Assert(!((uintptr_t)pu16 & 1));
2051 *pu16 = u16;
2052}
2053
2054
2055/**
2056 * Atomically writes a signed 16-bit value, ordered.
2057 *
2058 * @param pi16 Pointer to the 16-bit variable to write.
2059 * @param i16 The 16-bit value to assign to *pi16.
2060 */
2061DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2062{
2063 ASMAtomicXchgS16(pi16, i16);
2064}
2065
2066
2067/**
2068 * Atomically writes a signed 16-bit value, unordered.
2069 *
2070 * @param pi16 Pointer to the 16-bit variable to write.
2071 * @param i16 The 16-bit value to assign to *pi16.
2072 */
2073DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2074{
2075 Assert(!((uintptr_t)pi16 & 1));
2076 *pi16 = i16;
2077}
2078
2079
2080/**
2081 * Atomically writes an unsigned 32-bit value, ordered.
2082 *
2083 * @param pu32 Pointer to the 32-bit variable to write.
2084 * @param u32 The 32-bit value to assign to *pu32.
2085 */
2086DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2087{
2088 ASMAtomicXchgU32(pu32, u32);
2089}
2090
2091
2092/**
2093 * Atomically writes an unsigned 32-bit value, unordered.
2094 *
2095 * @param pu32 Pointer to the 32-bit variable to write.
2096 * @param u32 The 32-bit value to assign to *pu32.
2097 */
2098DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2099{
2100 Assert(!((uintptr_t)pu32 & 3));
2101 *pu32 = u32;
2102}
2103
2104
2105/**
2106 * Atomically writes a signed 32-bit value, ordered.
2107 *
2108 * @param pi32 Pointer to the 32-bit variable to write.
2109 * @param i32 The 32-bit value to assign to *pi32.
2110 */
2111DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2112{
2113 ASMAtomicXchgS32(pi32, i32);
2114}
2115
2116
2117/**
2118 * Atomically writes a signed 32-bit value, unordered.
2119 *
2120 * @param pi32 Pointer to the 32-bit variable to write.
2121 * @param i32 The 32-bit value to assign to *pi32.
2122 */
2123DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2124{
2125 Assert(!((uintptr_t)pi32 & 3));
2126 *pi32 = i32;
2127}
2128
2129
2130/**
2131 * Atomically writes an unsigned 64-bit value, ordered.
2132 *
2133 * @param pu64 Pointer to the 64-bit variable to write.
2134 * @param u64 The 64-bit value to assign to *pu64.
2135 */
2136DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2137{
2138 ASMAtomicXchgU64(pu64, u64);
2139}
2140
2141
2142/**
2143 * Atomically writes an unsigned 64-bit value, unordered.
2144 *
2145 * @param pu64 Pointer to the 64-bit variable to write.
2146 * @param u64 The 64-bit value to assign to *pu64.
2147 */
2148DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2149{
2150 Assert(!((uintptr_t)pu64 & 7));
2151#if ARCH_BITS == 64
2152 *pu64 = u64;
2153#else
2154 ASMAtomicXchgU64(pu64, u64);
2155#endif
2156}
2157
2158
2159/**
2160 * Atomically writes a signed 64-bit value, ordered.
2161 *
2162 * @param pi64 Pointer to the 64-bit variable to write.
2163 * @param i64 The 64-bit value to assign to *pi64.
2164 */
2165DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2166{
2167 ASMAtomicXchgS64(pi64, i64);
2168}
2169
2170
2171/**
2172 * Atomically writes a signed 64-bit value, unordered.
2173 *
2174 * @param pi64 Pointer to the 64-bit variable to write.
2175 * @param i64 The 64-bit value to assign to *pi64.
2176 */
2177DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2178{
2179 Assert(!((uintptr_t)pi64 & 7));
2180#if ARCH_BITS == 64
2181 *pi64 = i64;
2182#else
2183 ASMAtomicXchgS64(pi64, i64);
2184#endif
2185}
2186
2187
2188/**
2189 * Atomically writes a boolean value, unordered.
2190 *
2191 * @param pf Pointer to the boolean variable to write.
2192 * @param f The boolean value to assign to *pf.
2193 */
2194DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2195{
2196 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2197}
2198
2199
2200/**
2201 * Atomically writes a boolean value, unordered.
2202 *
2203 * @param pf Pointer to the boolean variable to write.
2204 * @param f The boolean value to assign to *pf.
2205 */
2206DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2207{
2208 *pf = f; /* byte writes are atomic on x86 */
2209}
2210
2211
2212/**
2213 * Atomically writes a pointer value, ordered.
2214 *
2215 * @param ppv Pointer to the pointer variable to write.
2216 * @param pv The pointer value to assign to *ppv.
2217 */
2218DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2219{
2220#if ARCH_BITS == 32
2221 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2222#elif ARCH_BITS == 64
2223 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2224#else
2225# error "ARCH_BITS is bogus"
2226#endif
2227}
2228
2229
2230/**
2231 * Atomically writes a pointer value, ordered.
2232 *
2233 * @param ppv Pointer to the pointer variable to write.
2234 * @param pv The pointer value to assign to *ppv. If NULL use
2235 * ASMAtomicWriteNullPtr or you'll land in trouble.
2236 *
2237 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2238 * NULL.
2239 */
2240#ifdef __GNUC__
2241# define ASMAtomicWritePtr(ppv, pv) \
2242 do \
2243 { \
2244 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2245 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2246 \
2247 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2248 AssertCompile(sizeof(pv) == sizeof(void *)); \
2249 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2250 \
2251 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2252 } while (0)
2253#else
2254# define ASMAtomicWritePtr(ppv, pv) \
2255 do \
2256 { \
2257 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2258 AssertCompile(sizeof(pv) == sizeof(void *)); \
2259 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2260 \
2261 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2262 } while (0)
2263#endif
2264
2265
2266/**
2267 * Atomically sets a pointer to NULL, ordered.
2268 *
2269 * @param ppv Pointer to the pointer variable that should be set to NULL.
2270 *
2271 * @remarks This is relatively type safe on GCC platforms.
2272 */
2273#ifdef __GNUC__
2274# define ASMAtomicWriteNullPtr(ppv) \
2275 do \
2276 { \
2277 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2278 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2279 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2280 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2281 } while (0)
2282#else
2283# define ASMAtomicWriteNullPtr(ppv) \
2284 do \
2285 { \
2286 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2287 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2288 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2289 } while (0)
2290#endif
2291
2292
2293/**
2294 * Atomically writes a pointer value, unordered.
2295 *
2296 * @returns Current *pv value
2297 * @param ppv Pointer to the pointer variable.
2298 * @param pv The pointer value to assign to *ppv. If NULL use
2299 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2300 *
2301 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2302 * NULL.
2303 */
2304#ifdef __GNUC__
2305# define ASMAtomicUoWritePtr(ppv, pv) \
2306 do \
2307 { \
2308 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2309 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2310 \
2311 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2312 AssertCompile(sizeof(pv) == sizeof(void *)); \
2313 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2314 \
2315 *(ppvTypeChecked) = pvTypeChecked; \
2316 } while (0)
2317#else
2318# define ASMAtomicUoWritePtr(ppv, pv) \
2319 do \
2320 { \
2321 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2322 AssertCompile(sizeof(pv) == sizeof(void *)); \
2323 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2324 *(ppv) = pv; \
2325 } while (0)
2326#endif
2327
2328
2329/**
2330 * Atomically sets a pointer to NULL, unordered.
2331 *
2332 * @param ppv Pointer to the pointer variable that should be set to NULL.
2333 *
2334 * @remarks This is relatively type safe on GCC platforms.
2335 */
2336#ifdef __GNUC__
2337# define ASMAtomicUoWriteNullPtr(ppv) \
2338 do \
2339 { \
2340 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2341 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2342 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2343 *(ppvTypeChecked) = NULL; \
2344 } while (0)
2345#else
2346# define ASMAtomicUoWriteNullPtr(ppv) \
2347 do \
2348 { \
2349 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2350 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2351 *(ppv) = NULL; \
2352 } while (0)
2353#endif
2354
2355
2356/**
2357 * Atomically write a typical IPRT handle value, ordered.
2358 *
2359 * @param ph Pointer to the variable to update.
2360 * @param hNew The value to assign to *ph.
2361 *
2362 * @remarks This doesn't currently work for all handles (like RTFILE).
2363 */
2364#if HC_ARCH_BITS == 32
2365# define ASMAtomicWriteHandle(ph, hNew) \
2366 do { \
2367 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2368 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2369 } while (0)
2370#elif HC_ARCH_BITS == 64
2371# define ASMAtomicWriteHandle(ph, hNew) \
2372 do { \
2373 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2374 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2375 } while (0)
2376#else
2377# error HC_ARCH_BITS
2378#endif
2379
2380
2381/**
2382 * Atomically write a typical IPRT handle value, unordered.
2383 *
2384 * @param ph Pointer to the variable to update.
2385 * @param hNew The value to assign to *ph.
2386 *
2387 * @remarks This doesn't currently work for all handles (like RTFILE).
2388 */
2389#if HC_ARCH_BITS == 32
2390# define ASMAtomicUoWriteHandle(ph, hNew) \
2391 do { \
2392 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2393 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2394 } while (0)
2395#elif HC_ARCH_BITS == 64
2396# define ASMAtomicUoWriteHandle(ph, hNew) \
2397 do { \
2398 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2399 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2400 } while (0)
2401#else
2402# error HC_ARCH_BITS
2403#endif
2404
2405
2406/**
2407 * Atomically write a value which size might differ
2408 * between platforms or compilers, ordered.
2409 *
2410 * @param pu Pointer to the variable to update.
2411 * @param uNew The value to assign to *pu.
2412 */
2413#define ASMAtomicWriteSize(pu, uNew) \
2414 do { \
2415 switch (sizeof(*(pu))) { \
2416 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2417 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2418 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2419 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2420 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2421 } \
2422 } while (0)
2423
2424/**
2425 * Atomically write a value which size might differ
2426 * between platforms or compilers, unordered.
2427 *
2428 * @param pu Pointer to the variable to update.
2429 * @param uNew The value to assign to *pu.
2430 */
2431#define ASMAtomicUoWriteSize(pu, uNew) \
2432 do { \
2433 switch (sizeof(*(pu))) { \
2434 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2435 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2436 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2437 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2438 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2439 } \
2440 } while (0)
2441
2442
2443
2444/**
2445 * Atomically exchanges and adds to a 32-bit value, ordered.
2446 *
2447 * @returns The old value.
2448 * @param pu32 Pointer to the value.
2449 * @param u32 Number to add.
2450 */
2451#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2452DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2453#else
2454DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2455{
2456# if RT_INLINE_ASM_USES_INTRIN
2457 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2458 return u32;
2459
2460# elif RT_INLINE_ASM_GNU_STYLE
2461 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2462 : "=r" (u32),
2463 "=m" (*pu32)
2464 : "0" (u32),
2465 "m" (*pu32)
2466 : "memory");
2467 return u32;
2468# else
2469 __asm
2470 {
2471 mov eax, [u32]
2472# ifdef RT_ARCH_AMD64
2473 mov rdx, [pu32]
2474 lock xadd [rdx], eax
2475# else
2476 mov edx, [pu32]
2477 lock xadd [edx], eax
2478# endif
2479 mov [u32], eax
2480 }
2481 return u32;
2482# endif
2483}
2484#endif
2485
2486
2487/**
2488 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2489 *
2490 * @returns The old value.
2491 * @param pi32 Pointer to the value.
2492 * @param i32 Number to add.
2493 */
2494DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2495{
2496 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2497}
2498
2499
2500/**
2501 * Atomically exchanges and adds to a 64-bit value, ordered.
2502 *
2503 * @returns The old value.
2504 * @param pu64 Pointer to the value.
2505 * @param u64 Number to add.
2506 */
2507#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2508DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2509#else
2510DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2511{
2512# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2513 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2514 return u64;
2515
2516# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2517 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2518 : "=r" (u64),
2519 "=m" (*pu64)
2520 : "0" (u64),
2521 "m" (*pu64)
2522 : "memory");
2523 return u64;
2524# else
2525 uint64_t u64Old;
2526 for (;;)
2527 {
2528 uint64_t u64New;
2529 u64Old = ASMAtomicUoReadU64(pu64);
2530 u64New = u64Old + u64;
2531 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2532 break;
2533 ASMNopPause();
2534 }
2535 return u64Old;
2536# endif
2537}
2538#endif
2539
2540
2541/**
2542 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2543 *
2544 * @returns The old value.
2545 * @param pi64 Pointer to the value.
2546 * @param i64 Number to add.
2547 */
2548DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2549{
2550 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2551}
2552
2553
2554/**
2555 * Atomically exchanges and adds to a size_t value, ordered.
2556 *
2557 * @returns The old value.
2558 * @param pcb Pointer to the size_t value.
2559 * @param cb Number to add.
2560 */
2561DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2562{
2563#if ARCH_BITS == 64
2564 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2565#elif ARCH_BITS == 32
2566 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2567#else
2568# error "Unsupported ARCH_BITS value"
2569#endif
2570}
2571
2572
2573/**
2574 * Atomically exchanges and adds a value which size might differ between
2575 * platforms or compilers, ordered.
2576 *
2577 * @param pu Pointer to the variable to update.
2578 * @param uNew The value to add to *pu.
2579 * @param puOld Where to store the old value.
2580 */
2581#define ASMAtomicAddSize(pu, uNew, puOld) \
2582 do { \
2583 switch (sizeof(*(pu))) { \
2584 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2585 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2586 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2587 } \
2588 } while (0)
2589
2590
2591/**
2592 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2593 *
2594 * @returns The old value.
2595 * @param pu32 Pointer to the value.
2596 * @param u32 Number to subtract.
2597 */
2598DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2599{
2600 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2601}
2602
2603
2604/**
2605 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2606 *
2607 * @returns The old value.
2608 * @param pi32 Pointer to the value.
2609 * @param i32 Number to subtract.
2610 */
2611DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2612{
2613 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2614}
2615
2616
2617/**
2618 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2619 *
2620 * @returns The old value.
2621 * @param pu64 Pointer to the value.
2622 * @param u64 Number to subtract.
2623 */
2624DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2625{
2626 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2627}
2628
2629
2630/**
2631 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2632 *
2633 * @returns The old value.
2634 * @param pi64 Pointer to the value.
2635 * @param i64 Number to subtract.
2636 */
2637DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2638{
2639 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2640}
2641
2642
2643/**
2644 * Atomically exchanges and subtracts to a size_t value, ordered.
2645 *
2646 * @returns The old value.
2647 * @param pcb Pointer to the size_t value.
2648 * @param cb Number to subtract.
2649 */
2650DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2651{
2652#if ARCH_BITS == 64
2653 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2654#elif ARCH_BITS == 32
2655 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2656#else
2657# error "Unsupported ARCH_BITS value"
2658#endif
2659}
2660
2661
2662/**
2663 * Atomically exchanges and subtracts a value which size might differ between
2664 * platforms or compilers, ordered.
2665 *
2666 * @param pu Pointer to the variable to update.
2667 * @param uNew The value to subtract to *pu.
2668 * @param puOld Where to store the old value.
2669 */
2670#define ASMAtomicSubSize(pu, uNew, puOld) \
2671 do { \
2672 switch (sizeof(*(pu))) { \
2673 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2674 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2675 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2676 } \
2677 } while (0)
2678
2679
2680/**
2681 * Atomically increment a 32-bit value, ordered.
2682 *
2683 * @returns The new value.
2684 * @param pu32 Pointer to the value to increment.
2685 */
2686#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2687DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2688#else
2689DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2690{
2691 uint32_t u32;
2692# if RT_INLINE_ASM_USES_INTRIN
2693 u32 = _InterlockedIncrement((long *)pu32);
2694 return u32;
2695
2696# elif RT_INLINE_ASM_GNU_STYLE
2697 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2698 : "=r" (u32),
2699 "=m" (*pu32)
2700 : "0" (1),
2701 "m" (*pu32)
2702 : "memory");
2703 return u32+1;
2704# else
2705 __asm
2706 {
2707 mov eax, 1
2708# ifdef RT_ARCH_AMD64
2709 mov rdx, [pu32]
2710 lock xadd [rdx], eax
2711# else
2712 mov edx, [pu32]
2713 lock xadd [edx], eax
2714# endif
2715 mov u32, eax
2716 }
2717 return u32+1;
2718# endif
2719}
2720#endif
2721
2722
2723/**
2724 * Atomically increment a signed 32-bit value, ordered.
2725 *
2726 * @returns The new value.
2727 * @param pi32 Pointer to the value to increment.
2728 */
2729DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2730{
2731 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2732}
2733
2734
2735/**
2736 * Atomically increment a 64-bit value, ordered.
2737 *
2738 * @returns The new value.
2739 * @param pu64 Pointer to the value to increment.
2740 */
2741#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2742DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2743#else
2744DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2745{
2746# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2747 uint64_t u64;
2748 u64 = _InterlockedIncrement64((__int64 *)pu64);
2749 return u64;
2750
2751# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2752 uint64_t u64;
2753 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2754 : "=r" (u64),
2755 "=m" (*pu64)
2756 : "0" (1),
2757 "m" (*pu64)
2758 : "memory");
2759 return u64 + 1;
2760# else
2761 return ASMAtomicAddU64(pu64, 1) + 1;
2762# endif
2763}
2764#endif
2765
2766
2767/**
2768 * Atomically increment a signed 64-bit value, ordered.
2769 *
2770 * @returns The new value.
2771 * @param pi64 Pointer to the value to increment.
2772 */
2773DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
2774{
2775 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
2776}
2777
2778
2779/**
2780 * Atomically increment a size_t value, ordered.
2781 *
2782 * @returns The new value.
2783 * @param pcb Pointer to the value to increment.
2784 */
2785DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
2786{
2787#if ARCH_BITS == 64
2788 return ASMAtomicIncU64((uint64_t volatile *)pcb);
2789#elif ARCH_BITS == 32
2790 return ASMAtomicIncU32((uint32_t volatile *)pcb);
2791#else
2792# error "Unsupported ARCH_BITS value"
2793#endif
2794}
2795
2796
2797/**
2798 * Atomically decrement an unsigned 32-bit value, ordered.
2799 *
2800 * @returns The new value.
2801 * @param pu32 Pointer to the value to decrement.
2802 */
2803#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2804DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2805#else
2806DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2807{
2808 uint32_t u32;
2809# if RT_INLINE_ASM_USES_INTRIN
2810 u32 = _InterlockedDecrement((long *)pu32);
2811 return u32;
2812
2813# elif RT_INLINE_ASM_GNU_STYLE
2814 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2815 : "=r" (u32),
2816 "=m" (*pu32)
2817 : "0" (-1),
2818 "m" (*pu32)
2819 : "memory");
2820 return u32-1;
2821# else
2822 __asm
2823 {
2824 mov eax, -1
2825# ifdef RT_ARCH_AMD64
2826 mov rdx, [pu32]
2827 lock xadd [rdx], eax
2828# else
2829 mov edx, [pu32]
2830 lock xadd [edx], eax
2831# endif
2832 mov u32, eax
2833 }
2834 return u32-1;
2835# endif
2836}
2837#endif
2838
2839
2840/**
2841 * Atomically decrement a signed 32-bit value, ordered.
2842 *
2843 * @returns The new value.
2844 * @param pi32 Pointer to the value to decrement.
2845 */
2846DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2847{
2848 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2849}
2850
2851
2852/**
2853 * Atomically decrement an unsigned 64-bit value, ordered.
2854 *
2855 * @returns The new value.
2856 * @param pu64 Pointer to the value to decrement.
2857 */
2858#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2859DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
2860#else
2861DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
2862{
2863# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2864 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
2865 return u64;
2866
2867# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2868 uint64_t u64;
2869 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
2870 : "=r" (u64),
2871 "=m" (*pu64)
2872 : "0" (~(uint64_t)0),
2873 "m" (*pu64)
2874 : "memory");
2875 return u64-1;
2876# else
2877 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
2878# endif
2879}
2880#endif
2881
2882
2883/**
2884 * Atomically decrement a signed 64-bit value, ordered.
2885 *
2886 * @returns The new value.
2887 * @param pi64 Pointer to the value to decrement.
2888 */
2889DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
2890{
2891 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
2892}
2893
2894
2895/**
2896 * Atomically decrement a size_t value, ordered.
2897 *
2898 * @returns The new value.
2899 * @param pcb Pointer to the value to decrement.
2900 */
2901DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
2902{
2903#if ARCH_BITS == 64
2904 return ASMAtomicDecU64((uint64_t volatile *)pcb);
2905#elif ARCH_BITS == 32
2906 return ASMAtomicDecU32((uint32_t volatile *)pcb);
2907#else
2908# error "Unsupported ARCH_BITS value"
2909#endif
2910}
2911
2912
2913/**
2914 * Atomically Or an unsigned 32-bit value, ordered.
2915 *
2916 * @param pu32 Pointer to the pointer variable to OR u32 with.
2917 * @param u32 The value to OR *pu32 with.
2918 */
2919#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2920DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2921#else
2922DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2923{
2924# if RT_INLINE_ASM_USES_INTRIN
2925 _InterlockedOr((long volatile *)pu32, (long)u32);
2926
2927# elif RT_INLINE_ASM_GNU_STYLE
2928 __asm__ __volatile__("lock; orl %1, %0\n\t"
2929 : "=m" (*pu32)
2930 : "ir" (u32),
2931 "m" (*pu32));
2932# else
2933 __asm
2934 {
2935 mov eax, [u32]
2936# ifdef RT_ARCH_AMD64
2937 mov rdx, [pu32]
2938 lock or [rdx], eax
2939# else
2940 mov edx, [pu32]
2941 lock or [edx], eax
2942# endif
2943 }
2944# endif
2945}
2946#endif
2947
2948
2949/**
2950 * Atomically Or a signed 32-bit value, ordered.
2951 *
2952 * @param pi32 Pointer to the pointer variable to OR u32 with.
2953 * @param i32 The value to OR *pu32 with.
2954 */
2955DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2956{
2957 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2958}
2959
2960
2961/**
2962 * Atomically Or an unsigned 64-bit value, ordered.
2963 *
2964 * @param pu64 Pointer to the pointer variable to OR u64 with.
2965 * @param u64 The value to OR *pu64 with.
2966 */
2967#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2968DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
2969#else
2970DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
2971{
2972# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2973 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
2974
2975# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2976 __asm__ __volatile__("lock; orq %1, %q0\n\t"
2977 : "=m" (*pu64)
2978 : "r" (u64),
2979 "m" (*pu64));
2980# else
2981 for (;;)
2982 {
2983 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
2984 uint64_t u64New = u64Old | u64;
2985 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2986 break;
2987 ASMNopPause();
2988 }
2989# endif
2990}
2991#endif
2992
2993
2994/**
2995 * Atomically Or a signed 64-bit value, ordered.
2996 *
2997 * @param pi64 Pointer to the pointer variable to OR u64 with.
2998 * @param i64 The value to OR *pu64 with.
2999 */
3000DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3001{
3002 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3003}
3004
3005
3006/**
3007 * Atomically And an unsigned 32-bit value, ordered.
3008 *
3009 * @param pu32 Pointer to the pointer variable to AND u32 with.
3010 * @param u32 The value to AND *pu32 with.
3011 */
3012#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3013DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3014#else
3015DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3016{
3017# if RT_INLINE_ASM_USES_INTRIN
3018 _InterlockedAnd((long volatile *)pu32, u32);
3019
3020# elif RT_INLINE_ASM_GNU_STYLE
3021 __asm__ __volatile__("lock; andl %1, %0\n\t"
3022 : "=m" (*pu32)
3023 : "ir" (u32),
3024 "m" (*pu32));
3025# else
3026 __asm
3027 {
3028 mov eax, [u32]
3029# ifdef RT_ARCH_AMD64
3030 mov rdx, [pu32]
3031 lock and [rdx], eax
3032# else
3033 mov edx, [pu32]
3034 lock and [edx], eax
3035# endif
3036 }
3037# endif
3038}
3039#endif
3040
3041
3042/**
3043 * Atomically And a signed 32-bit value, ordered.
3044 *
3045 * @param pi32 Pointer to the pointer variable to AND i32 with.
3046 * @param i32 The value to AND *pi32 with.
3047 */
3048DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3049{
3050 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3051}
3052
3053
3054/**
3055 * Atomically And an unsigned 64-bit value, ordered.
3056 *
3057 * @param pu64 Pointer to the pointer variable to AND u64 with.
3058 * @param u64 The value to AND *pu64 with.
3059 */
3060#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3061DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3062#else
3063DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3064{
3065# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3066 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3067
3068# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3069 __asm__ __volatile__("lock; andq %1, %0\n\t"
3070 : "=m" (*pu64)
3071 : "r" (u64),
3072 "m" (*pu64));
3073# else
3074 for (;;)
3075 {
3076 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3077 uint64_t u64New = u64Old & u64;
3078 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3079 break;
3080 ASMNopPause();
3081 }
3082# endif
3083}
3084#endif
3085
3086
3087/**
3088 * Atomically And a signed 64-bit value, ordered.
3089 *
3090 * @param pi64 Pointer to the pointer variable to AND i64 with.
3091 * @param i64 The value to AND *pi64 with.
3092 */
3093DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3094{
3095 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3096}
3097
3098
3099/**
3100 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3101 *
3102 * @param pu32 Pointer to the pointer variable to OR u32 with.
3103 * @param u32 The value to OR *pu32 with.
3104 */
3105#if RT_INLINE_ASM_EXTERNAL
3106DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3107#else
3108DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3109{
3110# if RT_INLINE_ASM_GNU_STYLE
3111 __asm__ __volatile__("orl %1, %0\n\t"
3112 : "=m" (*pu32)
3113 : "ir" (u32),
3114 "m" (*pu32));
3115# else
3116 __asm
3117 {
3118 mov eax, [u32]
3119# ifdef RT_ARCH_AMD64
3120 mov rdx, [pu32]
3121 or [rdx], eax
3122# else
3123 mov edx, [pu32]
3124 or [edx], eax
3125# endif
3126 }
3127# endif
3128}
3129#endif
3130
3131
3132/**
3133 * Atomically OR a signed 32-bit value, unordered.
3134 *
3135 * @param pi32 Pointer to the pointer variable to OR u32 with.
3136 * @param i32 The value to OR *pu32 with.
3137 */
3138DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3139{
3140 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3141}
3142
3143
3144/**
3145 * Atomically OR an unsigned 64-bit value, unordered.
3146 *
3147 * @param pu64 Pointer to the pointer variable to OR u64 with.
3148 * @param u64 The value to OR *pu64 with.
3149 */
3150#if RT_INLINE_ASM_EXTERNAL
3151DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3152#else
3153DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3154{
3155# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3156 __asm__ __volatile__("orq %1, %q0\n\t"
3157 : "=m" (*pu64)
3158 : "r" (u64),
3159 "m" (*pu64));
3160# else
3161 for (;;)
3162 {
3163 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3164 uint64_t u64New = u64Old | u64;
3165 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3166 break;
3167 ASMNopPause();
3168 }
3169# endif
3170}
3171#endif
3172
3173
3174/**
3175 * Atomically Or a signed 64-bit value, unordered.
3176 *
3177 * @param pi64 Pointer to the pointer variable to OR u64 with.
3178 * @param i64 The value to OR *pu64 with.
3179 */
3180DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3181{
3182 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3183}
3184
3185
3186/**
3187 * Atomically And an unsigned 32-bit value, unordered.
3188 *
3189 * @param pu32 Pointer to the pointer variable to AND u32 with.
3190 * @param u32 The value to AND *pu32 with.
3191 */
3192#if RT_INLINE_ASM_EXTERNAL
3193DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3194#else
3195DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3196{
3197# if RT_INLINE_ASM_GNU_STYLE
3198 __asm__ __volatile__("andl %1, %0\n\t"
3199 : "=m" (*pu32)
3200 : "ir" (u32),
3201 "m" (*pu32));
3202# else
3203 __asm
3204 {
3205 mov eax, [u32]
3206# ifdef RT_ARCH_AMD64
3207 mov rdx, [pu32]
3208 and [rdx], eax
3209# else
3210 mov edx, [pu32]
3211 and [edx], eax
3212# endif
3213 }
3214# endif
3215}
3216#endif
3217
3218
3219/**
3220 * Atomically And a signed 32-bit value, unordered.
3221 *
3222 * @param pi32 Pointer to the pointer variable to AND i32 with.
3223 * @param i32 The value to AND *pi32 with.
3224 */
3225DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3226{
3227 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3228}
3229
3230
3231/**
3232 * Atomically And an unsigned 64-bit value, unordered.
3233 *
3234 * @param pu64 Pointer to the pointer variable to AND u64 with.
3235 * @param u64 The value to AND *pu64 with.
3236 */
3237#if RT_INLINE_ASM_EXTERNAL
3238DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3239#else
3240DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3241{
3242# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3243 __asm__ __volatile__("andq %1, %0\n\t"
3244 : "=m" (*pu64)
3245 : "r" (u64),
3246 "m" (*pu64));
3247# else
3248 for (;;)
3249 {
3250 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3251 uint64_t u64New = u64Old & u64;
3252 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3253 break;
3254 ASMNopPause();
3255 }
3256# endif
3257}
3258#endif
3259
3260
3261/**
3262 * Atomically And a signed 64-bit value, unordered.
3263 *
3264 * @param pi64 Pointer to the pointer variable to AND i64 with.
3265 * @param i64 The value to AND *pi64 with.
3266 */
3267DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3268{
3269 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3270}
3271
3272
3273/**
3274 * Atomically increment an unsigned 32-bit value, unordered.
3275 *
3276 * @returns the new value.
3277 * @param pu32 Pointer to the variable to increment.
3278 */
3279#if RT_INLINE_ASM_EXTERNAL
3280DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3281#else
3282DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3283{
3284 uint32_t u32;
3285# if RT_INLINE_ASM_GNU_STYLE
3286 __asm__ __volatile__("xaddl %0, %1\n\t"
3287 : "=r" (u32),
3288 "=m" (*pu32)
3289 : "0" (1),
3290 "m" (*pu32)
3291 : "memory");
3292 return u32 + 1;
3293# else
3294 __asm
3295 {
3296 mov eax, 1
3297# ifdef RT_ARCH_AMD64
3298 mov rdx, [pu32]
3299 xadd [rdx], eax
3300# else
3301 mov edx, [pu32]
3302 xadd [edx], eax
3303# endif
3304 mov u32, eax
3305 }
3306 return u32 + 1;
3307# endif
3308}
3309#endif
3310
3311
3312/**
3313 * Atomically decrement an unsigned 32-bit value, unordered.
3314 *
3315 * @returns the new value.
3316 * @param pu32 Pointer to the variable to decrement.
3317 */
3318#if RT_INLINE_ASM_EXTERNAL
3319DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3320#else
3321DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3322{
3323 uint32_t u32;
3324# if RT_INLINE_ASM_GNU_STYLE
3325 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3326 : "=r" (u32),
3327 "=m" (*pu32)
3328 : "0" (-1),
3329 "m" (*pu32)
3330 : "memory");
3331 return u32 - 1;
3332# else
3333 __asm
3334 {
3335 mov eax, -1
3336# ifdef RT_ARCH_AMD64
3337 mov rdx, [pu32]
3338 xadd [rdx], eax
3339# else
3340 mov edx, [pu32]
3341 xadd [edx], eax
3342# endif
3343 mov u32, eax
3344 }
3345 return u32 - 1;
3346# endif
3347}
3348#endif
3349
3350
3351/** @def RT_ASM_PAGE_SIZE
3352 * We try avoid dragging in iprt/param.h here.
3353 * @internal
3354 */
3355#if defined(RT_ARCH_SPARC64)
3356# define RT_ASM_PAGE_SIZE 0x2000
3357# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3358# if PAGE_SIZE != 0x2000
3359# error "PAGE_SIZE is not 0x2000!"
3360# endif
3361# endif
3362#else
3363# define RT_ASM_PAGE_SIZE 0x1000
3364# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3365# if PAGE_SIZE != 0x1000
3366# error "PAGE_SIZE is not 0x1000!"
3367# endif
3368# endif
3369#endif
3370
3371/**
3372 * Zeros a 4K memory page.
3373 *
3374 * @param pv Pointer to the memory block. This must be page aligned.
3375 */
3376#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3377DECLASM(void) ASMMemZeroPage(volatile void *pv);
3378# else
3379DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3380{
3381# if RT_INLINE_ASM_USES_INTRIN
3382# ifdef RT_ARCH_AMD64
3383 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3384# else
3385 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3386# endif
3387
3388# elif RT_INLINE_ASM_GNU_STYLE
3389 RTCCUINTREG uDummy;
3390# ifdef RT_ARCH_AMD64
3391 __asm__ __volatile__("rep stosq"
3392 : "=D" (pv),
3393 "=c" (uDummy)
3394 : "0" (pv),
3395 "c" (RT_ASM_PAGE_SIZE >> 3),
3396 "a" (0)
3397 : "memory");
3398# else
3399 __asm__ __volatile__("rep stosl"
3400 : "=D" (pv),
3401 "=c" (uDummy)
3402 : "0" (pv),
3403 "c" (RT_ASM_PAGE_SIZE >> 2),
3404 "a" (0)
3405 : "memory");
3406# endif
3407# else
3408 __asm
3409 {
3410# ifdef RT_ARCH_AMD64
3411 xor rax, rax
3412 mov ecx, 0200h
3413 mov rdi, [pv]
3414 rep stosq
3415# else
3416 xor eax, eax
3417 mov ecx, 0400h
3418 mov edi, [pv]
3419 rep stosd
3420# endif
3421 }
3422# endif
3423}
3424# endif
3425
3426
3427/**
3428 * Zeros a memory block with a 32-bit aligned size.
3429 *
3430 * @param pv Pointer to the memory block.
3431 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3432 */
3433#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3434DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3435#else
3436DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3437{
3438# if RT_INLINE_ASM_USES_INTRIN
3439# ifdef RT_ARCH_AMD64
3440 if (!(cb & 7))
3441 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3442 else
3443# endif
3444 __stosd((unsigned long *)pv, 0, cb / 4);
3445
3446# elif RT_INLINE_ASM_GNU_STYLE
3447 __asm__ __volatile__("rep stosl"
3448 : "=D" (pv),
3449 "=c" (cb)
3450 : "0" (pv),
3451 "1" (cb >> 2),
3452 "a" (0)
3453 : "memory");
3454# else
3455 __asm
3456 {
3457 xor eax, eax
3458# ifdef RT_ARCH_AMD64
3459 mov rcx, [cb]
3460 shr rcx, 2
3461 mov rdi, [pv]
3462# else
3463 mov ecx, [cb]
3464 shr ecx, 2
3465 mov edi, [pv]
3466# endif
3467 rep stosd
3468 }
3469# endif
3470}
3471#endif
3472
3473
3474/**
3475 * Fills a memory block with a 32-bit aligned size.
3476 *
3477 * @param pv Pointer to the memory block.
3478 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3479 * @param u32 The value to fill with.
3480 */
3481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3482DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3483#else
3484DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3485{
3486# if RT_INLINE_ASM_USES_INTRIN
3487# ifdef RT_ARCH_AMD64
3488 if (!(cb & 7))
3489 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3490 else
3491# endif
3492 __stosd((unsigned long *)pv, u32, cb / 4);
3493
3494# elif RT_INLINE_ASM_GNU_STYLE
3495 __asm__ __volatile__("rep stosl"
3496 : "=D" (pv),
3497 "=c" (cb)
3498 : "0" (pv),
3499 "1" (cb >> 2),
3500 "a" (u32)
3501 : "memory");
3502# else
3503 __asm
3504 {
3505# ifdef RT_ARCH_AMD64
3506 mov rcx, [cb]
3507 shr rcx, 2
3508 mov rdi, [pv]
3509# else
3510 mov ecx, [cb]
3511 shr ecx, 2
3512 mov edi, [pv]
3513# endif
3514 mov eax, [u32]
3515 rep stosd
3516 }
3517# endif
3518}
3519#endif
3520
3521
3522/**
3523 * Checks if a memory page is all zeros.
3524 *
3525 * @returns true / false.
3526 *
3527 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3528 * boundary
3529 */
3530DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3531{
3532# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3533 union { RTCCUINTREG r; bool f; } uAX;
3534 RTCCUINTREG xCX, xDI;
3535 Assert(!((uintptr_t)pvPage & 15));
3536 __asm__ __volatile__("repe; "
3537# ifdef RT_ARCH_AMD64
3538 "scasq\n\t"
3539# else
3540 "scasl\n\t"
3541# endif
3542 "setnc %%al\n\t"
3543 : "=&c" (xCX),
3544 "=&D" (xDI),
3545 "=&a" (uAX.r)
3546 : "mr" (pvPage),
3547# ifdef RT_ARCH_AMD64
3548 "0" (RT_ASM_PAGE_SIZE/8),
3549# else
3550 "0" (RT_ASM_PAGE_SIZE/4),
3551# endif
3552 "1" (pvPage),
3553 "2" (0));
3554 return uAX.f;
3555# else
3556 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3557 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3558 Assert(!((uintptr_t)pvPage & 15));
3559 for (;;)
3560 {
3561 if (puPtr[0]) return false;
3562 if (puPtr[4]) return false;
3563
3564 if (puPtr[2]) return false;
3565 if (puPtr[6]) return false;
3566
3567 if (puPtr[1]) return false;
3568 if (puPtr[5]) return false;
3569
3570 if (puPtr[3]) return false;
3571 if (puPtr[7]) return false;
3572
3573 if (!--cLeft)
3574 return true;
3575 puPtr += 8;
3576 }
3577 return true;
3578# endif
3579}
3580
3581
3582/**
3583 * Checks if a memory block is filled with the specified byte.
3584 *
3585 * This is a sort of inverted memchr.
3586 *
3587 * @returns Pointer to the byte which doesn't equal u8.
3588 * @returns NULL if all equal to u8.
3589 *
3590 * @param pv Pointer to the memory block.
3591 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3592 * @param u8 The value it's supposed to be filled with.
3593 *
3594 * @todo Fix name, it is a predicate function but it's not returning boolean!
3595 */
3596DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3597{
3598/** @todo rewrite this in inline assembly? */
3599 uint8_t const *pb = (uint8_t const *)pv;
3600 for (; cb; cb--, pb++)
3601 if (RT_LIKELY(*pb == u8))
3602 { /* likely */ }
3603 else
3604 return (void *)pb;
3605 return NULL;
3606}
3607
3608
3609/**
3610 * Checks if a memory block is filled with the specified 32-bit value.
3611 *
3612 * This is a sort of inverted memchr.
3613 *
3614 * @returns Pointer to the first value which doesn't equal u32.
3615 * @returns NULL if all equal to u32.
3616 *
3617 * @param pv Pointer to the memory block.
3618 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3619 * @param u32 The value it's supposed to be filled with.
3620 *
3621 * @todo Fix name, it is a predicate function but it's not returning boolean!
3622 */
3623DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3624{
3625/** @todo rewrite this in inline assembly? */
3626 uint32_t const *pu32 = (uint32_t const *)pv;
3627 for (; cb; cb -= 4, pu32++)
3628 if (RT_LIKELY(*pu32 == u32))
3629 { /* likely */ }
3630 else
3631 return (uint32_t *)pu32;
3632 return NULL;
3633}
3634
3635
3636/**
3637 * Probes a byte pointer for read access.
3638 *
3639 * While the function will not fault if the byte is not read accessible,
3640 * the idea is to do this in a safe place like before acquiring locks
3641 * and such like.
3642 *
3643 * Also, this functions guarantees that an eager compiler is not going
3644 * to optimize the probing away.
3645 *
3646 * @param pvByte Pointer to the byte.
3647 */
3648#if RT_INLINE_ASM_EXTERNAL
3649DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3650#else
3651DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3652{
3653 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3654 uint8_t u8;
3655# if RT_INLINE_ASM_GNU_STYLE
3656 __asm__ __volatile__("movb (%1), %0\n\t"
3657 : "=r" (u8)
3658 : "r" (pvByte));
3659# else
3660 __asm
3661 {
3662# ifdef RT_ARCH_AMD64
3663 mov rax, [pvByte]
3664 mov al, [rax]
3665# else
3666 mov eax, [pvByte]
3667 mov al, [eax]
3668# endif
3669 mov [u8], al
3670 }
3671# endif
3672 return u8;
3673}
3674#endif
3675
3676/**
3677 * Probes a buffer for read access page by page.
3678 *
3679 * While the function will fault if the buffer is not fully read
3680 * accessible, the idea is to do this in a safe place like before
3681 * acquiring locks and such like.
3682 *
3683 * Also, this functions guarantees that an eager compiler is not going
3684 * to optimize the probing away.
3685 *
3686 * @param pvBuf Pointer to the buffer.
3687 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3688 */
3689DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3690{
3691 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3692 /* the first byte */
3693 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3694 ASMProbeReadByte(pu8);
3695
3696 /* the pages in between pages. */
3697 while (cbBuf > RT_ASM_PAGE_SIZE)
3698 {
3699 ASMProbeReadByte(pu8);
3700 cbBuf -= RT_ASM_PAGE_SIZE;
3701 pu8 += RT_ASM_PAGE_SIZE;
3702 }
3703
3704 /* the last byte */
3705 ASMProbeReadByte(pu8 + cbBuf - 1);
3706}
3707
3708
3709
3710/** @defgroup grp_inline_bits Bit Operations
3711 * @{
3712 */
3713
3714
3715/**
3716 * Sets a bit in a bitmap.
3717 *
3718 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
3719 * @param iBit The bit to set.
3720 *
3721 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3722 * However, doing so will yield better performance as well as avoiding
3723 * traps accessing the last bits in the bitmap.
3724 */
3725#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3726DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3727#else
3728DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3729{
3730# if RT_INLINE_ASM_USES_INTRIN
3731 _bittestandset((long *)pvBitmap, iBit);
3732
3733# elif RT_INLINE_ASM_GNU_STYLE
3734 __asm__ __volatile__("btsl %1, %0"
3735 : "=m" (*(volatile long *)pvBitmap)
3736 : "Ir" (iBit),
3737 "m" (*(volatile long *)pvBitmap)
3738 : "memory");
3739# else
3740 __asm
3741 {
3742# ifdef RT_ARCH_AMD64
3743 mov rax, [pvBitmap]
3744 mov edx, [iBit]
3745 bts [rax], edx
3746# else
3747 mov eax, [pvBitmap]
3748 mov edx, [iBit]
3749 bts [eax], edx
3750# endif
3751 }
3752# endif
3753}
3754#endif
3755
3756
3757/**
3758 * Atomically sets a bit in a bitmap, ordered.
3759 *
3760 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3761 * the memory access isn't atomic!
3762 * @param iBit The bit to set.
3763 */
3764#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3765DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3766#else
3767DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3768{
3769 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3770# if RT_INLINE_ASM_USES_INTRIN
3771 _interlockedbittestandset((long *)pvBitmap, iBit);
3772# elif RT_INLINE_ASM_GNU_STYLE
3773 __asm__ __volatile__("lock; btsl %1, %0"
3774 : "=m" (*(volatile long *)pvBitmap)
3775 : "Ir" (iBit),
3776 "m" (*(volatile long *)pvBitmap)
3777 : "memory");
3778# else
3779 __asm
3780 {
3781# ifdef RT_ARCH_AMD64
3782 mov rax, [pvBitmap]
3783 mov edx, [iBit]
3784 lock bts [rax], edx
3785# else
3786 mov eax, [pvBitmap]
3787 mov edx, [iBit]
3788 lock bts [eax], edx
3789# endif
3790 }
3791# endif
3792}
3793#endif
3794
3795
3796/**
3797 * Clears a bit in a bitmap.
3798 *
3799 * @param pvBitmap Pointer to the bitmap.
3800 * @param iBit The bit to clear.
3801 *
3802 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3803 * However, doing so will yield better performance as well as avoiding
3804 * traps accessing the last bits in the bitmap.
3805 */
3806#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3807DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3808#else
3809DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3810{
3811# if RT_INLINE_ASM_USES_INTRIN
3812 _bittestandreset((long *)pvBitmap, iBit);
3813
3814# elif RT_INLINE_ASM_GNU_STYLE
3815 __asm__ __volatile__("btrl %1, %0"
3816 : "=m" (*(volatile long *)pvBitmap)
3817 : "Ir" (iBit),
3818 "m" (*(volatile long *)pvBitmap)
3819 : "memory");
3820# else
3821 __asm
3822 {
3823# ifdef RT_ARCH_AMD64
3824 mov rax, [pvBitmap]
3825 mov edx, [iBit]
3826 btr [rax], edx
3827# else
3828 mov eax, [pvBitmap]
3829 mov edx, [iBit]
3830 btr [eax], edx
3831# endif
3832 }
3833# endif
3834}
3835#endif
3836
3837
3838/**
3839 * Atomically clears a bit in a bitmap, ordered.
3840 *
3841 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3842 * the memory access isn't atomic!
3843 * @param iBit The bit to toggle set.
3844 * @remarks No memory barrier, take care on smp.
3845 */
3846#if RT_INLINE_ASM_EXTERNAL
3847DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3848#else
3849DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3850{
3851 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3852# if RT_INLINE_ASM_GNU_STYLE
3853 __asm__ __volatile__("lock; btrl %1, %0"
3854 : "=m" (*(volatile long *)pvBitmap)
3855 : "Ir" (iBit),
3856 "m" (*(volatile long *)pvBitmap)
3857 : "memory");
3858# else
3859 __asm
3860 {
3861# ifdef RT_ARCH_AMD64
3862 mov rax, [pvBitmap]
3863 mov edx, [iBit]
3864 lock btr [rax], edx
3865# else
3866 mov eax, [pvBitmap]
3867 mov edx, [iBit]
3868 lock btr [eax], edx
3869# endif
3870 }
3871# endif
3872}
3873#endif
3874
3875
3876/**
3877 * Toggles a bit in a bitmap.
3878 *
3879 * @param pvBitmap Pointer to the bitmap.
3880 * @param iBit The bit to toggle.
3881 *
3882 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3883 * However, doing so will yield better performance as well as avoiding
3884 * traps accessing the last bits in the bitmap.
3885 */
3886#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3887DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3888#else
3889DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3890{
3891# if RT_INLINE_ASM_USES_INTRIN
3892 _bittestandcomplement((long *)pvBitmap, iBit);
3893# elif RT_INLINE_ASM_GNU_STYLE
3894 __asm__ __volatile__("btcl %1, %0"
3895 : "=m" (*(volatile long *)pvBitmap)
3896 : "Ir" (iBit),
3897 "m" (*(volatile long *)pvBitmap)
3898 : "memory");
3899# else
3900 __asm
3901 {
3902# ifdef RT_ARCH_AMD64
3903 mov rax, [pvBitmap]
3904 mov edx, [iBit]
3905 btc [rax], edx
3906# else
3907 mov eax, [pvBitmap]
3908 mov edx, [iBit]
3909 btc [eax], edx
3910# endif
3911 }
3912# endif
3913}
3914#endif
3915
3916
3917/**
3918 * Atomically toggles a bit in a bitmap, ordered.
3919 *
3920 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3921 * the memory access isn't atomic!
3922 * @param iBit The bit to test and set.
3923 */
3924#if RT_INLINE_ASM_EXTERNAL
3925DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3926#else
3927DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3928{
3929 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3930# if RT_INLINE_ASM_GNU_STYLE
3931 __asm__ __volatile__("lock; btcl %1, %0"
3932 : "=m" (*(volatile long *)pvBitmap)
3933 : "Ir" (iBit),
3934 "m" (*(volatile long *)pvBitmap)
3935 : "memory");
3936# else
3937 __asm
3938 {
3939# ifdef RT_ARCH_AMD64
3940 mov rax, [pvBitmap]
3941 mov edx, [iBit]
3942 lock btc [rax], edx
3943# else
3944 mov eax, [pvBitmap]
3945 mov edx, [iBit]
3946 lock btc [eax], edx
3947# endif
3948 }
3949# endif
3950}
3951#endif
3952
3953
3954/**
3955 * Tests and sets a bit in a bitmap.
3956 *
3957 * @returns true if the bit was set.
3958 * @returns false if the bit was clear.
3959 *
3960 * @param pvBitmap Pointer to the bitmap.
3961 * @param iBit The bit to test and set.
3962 *
3963 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3964 * However, doing so will yield better performance as well as avoiding
3965 * traps accessing the last bits in the bitmap.
3966 */
3967#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3968DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3969#else
3970DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3971{
3972 union { bool f; uint32_t u32; uint8_t u8; } rc;
3973# if RT_INLINE_ASM_USES_INTRIN
3974 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3975
3976# elif RT_INLINE_ASM_GNU_STYLE
3977 __asm__ __volatile__("btsl %2, %1\n\t"
3978 "setc %b0\n\t"
3979 "andl $1, %0\n\t"
3980 : "=q" (rc.u32),
3981 "=m" (*(volatile long *)pvBitmap)
3982 : "Ir" (iBit),
3983 "m" (*(volatile long *)pvBitmap)
3984 : "memory");
3985# else
3986 __asm
3987 {
3988 mov edx, [iBit]
3989# ifdef RT_ARCH_AMD64
3990 mov rax, [pvBitmap]
3991 bts [rax], edx
3992# else
3993 mov eax, [pvBitmap]
3994 bts [eax], edx
3995# endif
3996 setc al
3997 and eax, 1
3998 mov [rc.u32], eax
3999 }
4000# endif
4001 return rc.f;
4002}
4003#endif
4004
4005
4006/**
4007 * Atomically tests and sets a bit in a bitmap, ordered.
4008 *
4009 * @returns true if the bit was set.
4010 * @returns false if the bit was clear.
4011 *
4012 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4013 * the memory access isn't atomic!
4014 * @param iBit The bit to set.
4015 */
4016#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4017DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4018#else
4019DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4020{
4021 union { bool f; uint32_t u32; uint8_t u8; } rc;
4022 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4023# if RT_INLINE_ASM_USES_INTRIN
4024 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4025# elif RT_INLINE_ASM_GNU_STYLE
4026 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4027 "setc %b0\n\t"
4028 "andl $1, %0\n\t"
4029 : "=q" (rc.u32),
4030 "=m" (*(volatile long *)pvBitmap)
4031 : "Ir" (iBit),
4032 "m" (*(volatile long *)pvBitmap)
4033 : "memory");
4034# else
4035 __asm
4036 {
4037 mov edx, [iBit]
4038# ifdef RT_ARCH_AMD64
4039 mov rax, [pvBitmap]
4040 lock bts [rax], edx
4041# else
4042 mov eax, [pvBitmap]
4043 lock bts [eax], edx
4044# endif
4045 setc al
4046 and eax, 1
4047 mov [rc.u32], eax
4048 }
4049# endif
4050 return rc.f;
4051}
4052#endif
4053
4054
4055/**
4056 * Tests and clears a bit in a bitmap.
4057 *
4058 * @returns true if the bit was set.
4059 * @returns false if the bit was clear.
4060 *
4061 * @param pvBitmap Pointer to the bitmap.
4062 * @param iBit The bit to test and clear.
4063 *
4064 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4065 * However, doing so will yield better performance as well as avoiding
4066 * traps accessing the last bits in the bitmap.
4067 */
4068#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4069DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4070#else
4071DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4072{
4073 union { bool f; uint32_t u32; uint8_t u8; } rc;
4074# if RT_INLINE_ASM_USES_INTRIN
4075 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4076
4077# elif RT_INLINE_ASM_GNU_STYLE
4078 __asm__ __volatile__("btrl %2, %1\n\t"
4079 "setc %b0\n\t"
4080 "andl $1, %0\n\t"
4081 : "=q" (rc.u32),
4082 "=m" (*(volatile long *)pvBitmap)
4083 : "Ir" (iBit),
4084 "m" (*(volatile long *)pvBitmap)
4085 : "memory");
4086# else
4087 __asm
4088 {
4089 mov edx, [iBit]
4090# ifdef RT_ARCH_AMD64
4091 mov rax, [pvBitmap]
4092 btr [rax], edx
4093# else
4094 mov eax, [pvBitmap]
4095 btr [eax], edx
4096# endif
4097 setc al
4098 and eax, 1
4099 mov [rc.u32], eax
4100 }
4101# endif
4102 return rc.f;
4103}
4104#endif
4105
4106
4107/**
4108 * Atomically tests and clears a bit in a bitmap, ordered.
4109 *
4110 * @returns true if the bit was set.
4111 * @returns false if the bit was clear.
4112 *
4113 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4114 * the memory access isn't atomic!
4115 * @param iBit The bit to test and clear.
4116 *
4117 * @remarks No memory barrier, take care on smp.
4118 */
4119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4120DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4121#else
4122DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4123{
4124 union { bool f; uint32_t u32; uint8_t u8; } rc;
4125 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4126# if RT_INLINE_ASM_USES_INTRIN
4127 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4128
4129# elif RT_INLINE_ASM_GNU_STYLE
4130 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4131 "setc %b0\n\t"
4132 "andl $1, %0\n\t"
4133 : "=q" (rc.u32),
4134 "=m" (*(volatile long *)pvBitmap)
4135 : "Ir" (iBit),
4136 "m" (*(volatile long *)pvBitmap)
4137 : "memory");
4138# else
4139 __asm
4140 {
4141 mov edx, [iBit]
4142# ifdef RT_ARCH_AMD64
4143 mov rax, [pvBitmap]
4144 lock btr [rax], edx
4145# else
4146 mov eax, [pvBitmap]
4147 lock btr [eax], edx
4148# endif
4149 setc al
4150 and eax, 1
4151 mov [rc.u32], eax
4152 }
4153# endif
4154 return rc.f;
4155}
4156#endif
4157
4158
4159/**
4160 * Tests and toggles a bit in a bitmap.
4161 *
4162 * @returns true if the bit was set.
4163 * @returns false if the bit was clear.
4164 *
4165 * @param pvBitmap Pointer to the bitmap.
4166 * @param iBit The bit to test and toggle.
4167 *
4168 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4169 * However, doing so will yield better performance as well as avoiding
4170 * traps accessing the last bits in the bitmap.
4171 */
4172#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4173DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4174#else
4175DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4176{
4177 union { bool f; uint32_t u32; uint8_t u8; } rc;
4178# if RT_INLINE_ASM_USES_INTRIN
4179 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4180
4181# elif RT_INLINE_ASM_GNU_STYLE
4182 __asm__ __volatile__("btcl %2, %1\n\t"
4183 "setc %b0\n\t"
4184 "andl $1, %0\n\t"
4185 : "=q" (rc.u32),
4186 "=m" (*(volatile long *)pvBitmap)
4187 : "Ir" (iBit),
4188 "m" (*(volatile long *)pvBitmap)
4189 : "memory");
4190# else
4191 __asm
4192 {
4193 mov edx, [iBit]
4194# ifdef RT_ARCH_AMD64
4195 mov rax, [pvBitmap]
4196 btc [rax], edx
4197# else
4198 mov eax, [pvBitmap]
4199 btc [eax], edx
4200# endif
4201 setc al
4202 and eax, 1
4203 mov [rc.u32], eax
4204 }
4205# endif
4206 return rc.f;
4207}
4208#endif
4209
4210
4211/**
4212 * Atomically tests and toggles a bit in a bitmap, ordered.
4213 *
4214 * @returns true if the bit was set.
4215 * @returns false if the bit was clear.
4216 *
4217 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4218 * the memory access isn't atomic!
4219 * @param iBit The bit to test and toggle.
4220 */
4221#if RT_INLINE_ASM_EXTERNAL
4222DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4223#else
4224DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4225{
4226 union { bool f; uint32_t u32; uint8_t u8; } rc;
4227 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4228# if RT_INLINE_ASM_GNU_STYLE
4229 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4230 "setc %b0\n\t"
4231 "andl $1, %0\n\t"
4232 : "=q" (rc.u32),
4233 "=m" (*(volatile long *)pvBitmap)
4234 : "Ir" (iBit),
4235 "m" (*(volatile long *)pvBitmap)
4236 : "memory");
4237# else
4238 __asm
4239 {
4240 mov edx, [iBit]
4241# ifdef RT_ARCH_AMD64
4242 mov rax, [pvBitmap]
4243 lock btc [rax], edx
4244# else
4245 mov eax, [pvBitmap]
4246 lock btc [eax], edx
4247# endif
4248 setc al
4249 and eax, 1
4250 mov [rc.u32], eax
4251 }
4252# endif
4253 return rc.f;
4254}
4255#endif
4256
4257
4258/**
4259 * Tests if a bit in a bitmap is set.
4260 *
4261 * @returns true if the bit is set.
4262 * @returns false if the bit is clear.
4263 *
4264 * @param pvBitmap Pointer to the bitmap.
4265 * @param iBit The bit to test.
4266 *
4267 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4268 * However, doing so will yield better performance as well as avoiding
4269 * traps accessing the last bits in the bitmap.
4270 */
4271#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4272DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4273#else
4274DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4275{
4276 union { bool f; uint32_t u32; uint8_t u8; } rc;
4277# if RT_INLINE_ASM_USES_INTRIN
4278 rc.u32 = _bittest((long *)pvBitmap, iBit);
4279# elif RT_INLINE_ASM_GNU_STYLE
4280
4281 __asm__ __volatile__("btl %2, %1\n\t"
4282 "setc %b0\n\t"
4283 "andl $1, %0\n\t"
4284 : "=q" (rc.u32)
4285 : "m" (*(const volatile long *)pvBitmap),
4286 "Ir" (iBit)
4287 : "memory");
4288# else
4289 __asm
4290 {
4291 mov edx, [iBit]
4292# ifdef RT_ARCH_AMD64
4293 mov rax, [pvBitmap]
4294 bt [rax], edx
4295# else
4296 mov eax, [pvBitmap]
4297 bt [eax], edx
4298# endif
4299 setc al
4300 and eax, 1
4301 mov [rc.u32], eax
4302 }
4303# endif
4304 return rc.f;
4305}
4306#endif
4307
4308
4309/**
4310 * Clears a bit range within a bitmap.
4311 *
4312 * @param pvBitmap Pointer to the bitmap.
4313 * @param iBitStart The First bit to clear.
4314 * @param iBitEnd The first bit not to clear.
4315 */
4316DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4317{
4318 if (iBitStart < iBitEnd)
4319 {
4320 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4321 int iStart = iBitStart & ~31;
4322 int iEnd = iBitEnd & ~31;
4323 if (iStart == iEnd)
4324 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4325 else
4326 {
4327 /* bits in first dword. */
4328 if (iBitStart & 31)
4329 {
4330 *pu32 &= (1 << (iBitStart & 31)) - 1;
4331 pu32++;
4332 iBitStart = iStart + 32;
4333 }
4334
4335 /* whole dword. */
4336 if (iBitStart != iEnd)
4337 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4338
4339 /* bits in last dword. */
4340 if (iBitEnd & 31)
4341 {
4342 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4343 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4344 }
4345 }
4346 }
4347}
4348
4349
4350/**
4351 * Sets a bit range within a bitmap.
4352 *
4353 * @param pvBitmap Pointer to the bitmap.
4354 * @param iBitStart The First bit to set.
4355 * @param iBitEnd The first bit not to set.
4356 */
4357DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4358{
4359 if (iBitStart < iBitEnd)
4360 {
4361 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4362 int iStart = iBitStart & ~31;
4363 int iEnd = iBitEnd & ~31;
4364 if (iStart == iEnd)
4365 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
4366 else
4367 {
4368 /* bits in first dword. */
4369 if (iBitStart & 31)
4370 {
4371 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
4372 pu32++;
4373 iBitStart = iStart + 32;
4374 }
4375
4376 /* whole dword. */
4377 if (iBitStart != iEnd)
4378 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4379
4380 /* bits in last dword. */
4381 if (iBitEnd & 31)
4382 {
4383 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4384 *pu32 |= (1 << (iBitEnd & 31)) - 1;
4385 }
4386 }
4387 }
4388}
4389
4390
4391/**
4392 * Finds the first clear bit in a bitmap.
4393 *
4394 * @returns Index of the first zero bit.
4395 * @returns -1 if no clear bit was found.
4396 * @param pvBitmap Pointer to the bitmap.
4397 * @param cBits The number of bits in the bitmap. Multiple of 32.
4398 */
4399#if RT_INLINE_ASM_EXTERNAL
4400DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4401#else
4402DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4403{
4404 if (cBits)
4405 {
4406 int32_t iBit;
4407# if RT_INLINE_ASM_GNU_STYLE
4408 RTCCUINTREG uEAX, uECX, uEDI;
4409 cBits = RT_ALIGN_32(cBits, 32);
4410 __asm__ __volatile__("repe; scasl\n\t"
4411 "je 1f\n\t"
4412# ifdef RT_ARCH_AMD64
4413 "lea -4(%%rdi), %%rdi\n\t"
4414 "xorl (%%rdi), %%eax\n\t"
4415 "subq %5, %%rdi\n\t"
4416# else
4417 "lea -4(%%edi), %%edi\n\t"
4418 "xorl (%%edi), %%eax\n\t"
4419 "subl %5, %%edi\n\t"
4420# endif
4421 "shll $3, %%edi\n\t"
4422 "bsfl %%eax, %%edx\n\t"
4423 "addl %%edi, %%edx\n\t"
4424 "1:\t\n"
4425 : "=d" (iBit),
4426 "=&c" (uECX),
4427 "=&D" (uEDI),
4428 "=&a" (uEAX)
4429 : "0" (0xffffffff),
4430 "mr" (pvBitmap),
4431 "1" (cBits >> 5),
4432 "2" (pvBitmap),
4433 "3" (0xffffffff));
4434# else
4435 cBits = RT_ALIGN_32(cBits, 32);
4436 __asm
4437 {
4438# ifdef RT_ARCH_AMD64
4439 mov rdi, [pvBitmap]
4440 mov rbx, rdi
4441# else
4442 mov edi, [pvBitmap]
4443 mov ebx, edi
4444# endif
4445 mov edx, 0ffffffffh
4446 mov eax, edx
4447 mov ecx, [cBits]
4448 shr ecx, 5
4449 repe scasd
4450 je done
4451
4452# ifdef RT_ARCH_AMD64
4453 lea rdi, [rdi - 4]
4454 xor eax, [rdi]
4455 sub rdi, rbx
4456# else
4457 lea edi, [edi - 4]
4458 xor eax, [edi]
4459 sub edi, ebx
4460# endif
4461 shl edi, 3
4462 bsf edx, eax
4463 add edx, edi
4464 done:
4465 mov [iBit], edx
4466 }
4467# endif
4468 return iBit;
4469 }
4470 return -1;
4471}
4472#endif
4473
4474
4475/**
4476 * Finds the next clear bit in a bitmap.
4477 *
4478 * @returns Index of the first zero bit.
4479 * @returns -1 if no clear bit was found.
4480 * @param pvBitmap Pointer to the bitmap.
4481 * @param cBits The number of bits in the bitmap. Multiple of 32.
4482 * @param iBitPrev The bit returned from the last search.
4483 * The search will start at iBitPrev + 1.
4484 */
4485#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4486DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4487#else
4488DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4489{
4490 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4491 int iBit = ++iBitPrev & 31;
4492 if (iBit)
4493 {
4494 /*
4495 * Inspect the 32-bit word containing the unaligned bit.
4496 */
4497 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4498
4499# if RT_INLINE_ASM_USES_INTRIN
4500 unsigned long ulBit = 0;
4501 if (_BitScanForward(&ulBit, u32))
4502 return ulBit + iBitPrev;
4503# else
4504# if RT_INLINE_ASM_GNU_STYLE
4505 __asm__ __volatile__("bsf %1, %0\n\t"
4506 "jnz 1f\n\t"
4507 "movl $-1, %0\n\t"
4508 "1:\n\t"
4509 : "=r" (iBit)
4510 : "r" (u32));
4511# else
4512 __asm
4513 {
4514 mov edx, [u32]
4515 bsf eax, edx
4516 jnz done
4517 mov eax, 0ffffffffh
4518 done:
4519 mov [iBit], eax
4520 }
4521# endif
4522 if (iBit >= 0)
4523 return iBit + iBitPrev;
4524# endif
4525
4526 /*
4527 * Skip ahead and see if there is anything left to search.
4528 */
4529 iBitPrev |= 31;
4530 iBitPrev++;
4531 if (cBits <= (uint32_t)iBitPrev)
4532 return -1;
4533 }
4534
4535 /*
4536 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4537 */
4538 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4539 if (iBit >= 0)
4540 iBit += iBitPrev;
4541 return iBit;
4542}
4543#endif
4544
4545
4546/**
4547 * Finds the first set bit in a bitmap.
4548 *
4549 * @returns Index of the first set bit.
4550 * @returns -1 if no clear bit was found.
4551 * @param pvBitmap Pointer to the bitmap.
4552 * @param cBits The number of bits in the bitmap. Multiple of 32.
4553 */
4554#if RT_INLINE_ASM_EXTERNAL
4555DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4556#else
4557DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4558{
4559 if (cBits)
4560 {
4561 int32_t iBit;
4562# if RT_INLINE_ASM_GNU_STYLE
4563 RTCCUINTREG uEAX, uECX, uEDI;
4564 cBits = RT_ALIGN_32(cBits, 32);
4565 __asm__ __volatile__("repe; scasl\n\t"
4566 "je 1f\n\t"
4567# ifdef RT_ARCH_AMD64
4568 "lea -4(%%rdi), %%rdi\n\t"
4569 "movl (%%rdi), %%eax\n\t"
4570 "subq %5, %%rdi\n\t"
4571# else
4572 "lea -4(%%edi), %%edi\n\t"
4573 "movl (%%edi), %%eax\n\t"
4574 "subl %5, %%edi\n\t"
4575# endif
4576 "shll $3, %%edi\n\t"
4577 "bsfl %%eax, %%edx\n\t"
4578 "addl %%edi, %%edx\n\t"
4579 "1:\t\n"
4580 : "=d" (iBit),
4581 "=&c" (uECX),
4582 "=&D" (uEDI),
4583 "=&a" (uEAX)
4584 : "0" (0xffffffff),
4585 "mr" (pvBitmap),
4586 "1" (cBits >> 5),
4587 "2" (pvBitmap),
4588 "3" (0));
4589# else
4590 cBits = RT_ALIGN_32(cBits, 32);
4591 __asm
4592 {
4593# ifdef RT_ARCH_AMD64
4594 mov rdi, [pvBitmap]
4595 mov rbx, rdi
4596# else
4597 mov edi, [pvBitmap]
4598 mov ebx, edi
4599# endif
4600 mov edx, 0ffffffffh
4601 xor eax, eax
4602 mov ecx, [cBits]
4603 shr ecx, 5
4604 repe scasd
4605 je done
4606# ifdef RT_ARCH_AMD64
4607 lea rdi, [rdi - 4]
4608 mov eax, [rdi]
4609 sub rdi, rbx
4610# else
4611 lea edi, [edi - 4]
4612 mov eax, [edi]
4613 sub edi, ebx
4614# endif
4615 shl edi, 3
4616 bsf edx, eax
4617 add edx, edi
4618 done:
4619 mov [iBit], edx
4620 }
4621# endif
4622 return iBit;
4623 }
4624 return -1;
4625}
4626#endif
4627
4628
4629/**
4630 * Finds the next set bit in a bitmap.
4631 *
4632 * @returns Index of the next set bit.
4633 * @returns -1 if no set bit was found.
4634 * @param pvBitmap Pointer to the bitmap.
4635 * @param cBits The number of bits in the bitmap. Multiple of 32.
4636 * @param iBitPrev The bit returned from the last search.
4637 * The search will start at iBitPrev + 1.
4638 */
4639#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4640DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4641#else
4642DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4643{
4644 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4645 int iBit = ++iBitPrev & 31;
4646 if (iBit)
4647 {
4648 /*
4649 * Inspect the 32-bit word containing the unaligned bit.
4650 */
4651 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
4652
4653# if RT_INLINE_ASM_USES_INTRIN
4654 unsigned long ulBit = 0;
4655 if (_BitScanForward(&ulBit, u32))
4656 return ulBit + iBitPrev;
4657# else
4658# if RT_INLINE_ASM_GNU_STYLE
4659 __asm__ __volatile__("bsf %1, %0\n\t"
4660 "jnz 1f\n\t"
4661 "movl $-1, %0\n\t"
4662 "1:\n\t"
4663 : "=r" (iBit)
4664 : "r" (u32));
4665# else
4666 __asm
4667 {
4668 mov edx, [u32]
4669 bsf eax, edx
4670 jnz done
4671 mov eax, 0ffffffffh
4672 done:
4673 mov [iBit], eax
4674 }
4675# endif
4676 if (iBit >= 0)
4677 return iBit + iBitPrev;
4678# endif
4679
4680 /*
4681 * Skip ahead and see if there is anything left to search.
4682 */
4683 iBitPrev |= 31;
4684 iBitPrev++;
4685 if (cBits <= (uint32_t)iBitPrev)
4686 return -1;
4687 }
4688
4689 /*
4690 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4691 */
4692 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4693 if (iBit >= 0)
4694 iBit += iBitPrev;
4695 return iBit;
4696}
4697#endif
4698
4699
4700/**
4701 * Finds the first bit which is set in the given 32-bit integer.
4702 * Bits are numbered from 1 (least significant) to 32.
4703 *
4704 * @returns index [1..32] of the first set bit.
4705 * @returns 0 if all bits are cleared.
4706 * @param u32 Integer to search for set bits.
4707 * @remark Similar to ffs() in BSD.
4708 */
4709#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4710DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
4711#else
4712DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4713{
4714# if RT_INLINE_ASM_USES_INTRIN
4715 unsigned long iBit;
4716 if (_BitScanForward(&iBit, u32))
4717 iBit++;
4718 else
4719 iBit = 0;
4720# elif RT_INLINE_ASM_GNU_STYLE
4721 uint32_t iBit;
4722 __asm__ __volatile__("bsf %1, %0\n\t"
4723 "jnz 1f\n\t"
4724 "xorl %0, %0\n\t"
4725 "jmp 2f\n"
4726 "1:\n\t"
4727 "incl %0\n"
4728 "2:\n\t"
4729 : "=r" (iBit)
4730 : "rm" (u32));
4731# else
4732 uint32_t iBit;
4733 _asm
4734 {
4735 bsf eax, [u32]
4736 jnz found
4737 xor eax, eax
4738 jmp done
4739 found:
4740 inc eax
4741 done:
4742 mov [iBit], eax
4743 }
4744# endif
4745 return iBit;
4746}
4747#endif
4748
4749
4750/**
4751 * Finds the first bit which is set in the given 32-bit integer.
4752 * Bits are numbered from 1 (least significant) to 32.
4753 *
4754 * @returns index [1..32] of the first set bit.
4755 * @returns 0 if all bits are cleared.
4756 * @param i32 Integer to search for set bits.
4757 * @remark Similar to ffs() in BSD.
4758 */
4759DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4760{
4761 return ASMBitFirstSetU32((uint32_t)i32);
4762}
4763
4764
4765/**
4766 * Finds the last bit which is set in the given 32-bit integer.
4767 * Bits are numbered from 1 (least significant) to 32.
4768 *
4769 * @returns index [1..32] of the last set bit.
4770 * @returns 0 if all bits are cleared.
4771 * @param u32 Integer to search for set bits.
4772 * @remark Similar to fls() in BSD.
4773 */
4774#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4775DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
4776#else
4777DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4778{
4779# if RT_INLINE_ASM_USES_INTRIN
4780 unsigned long iBit;
4781 if (_BitScanReverse(&iBit, u32))
4782 iBit++;
4783 else
4784 iBit = 0;
4785# elif RT_INLINE_ASM_GNU_STYLE
4786 uint32_t iBit;
4787 __asm__ __volatile__("bsrl %1, %0\n\t"
4788 "jnz 1f\n\t"
4789 "xorl %0, %0\n\t"
4790 "jmp 2f\n"
4791 "1:\n\t"
4792 "incl %0\n"
4793 "2:\n\t"
4794 : "=r" (iBit)
4795 : "rm" (u32));
4796# else
4797 uint32_t iBit;
4798 _asm
4799 {
4800 bsr eax, [u32]
4801 jnz found
4802 xor eax, eax
4803 jmp done
4804 found:
4805 inc eax
4806 done:
4807 mov [iBit], eax
4808 }
4809# endif
4810 return iBit;
4811}
4812#endif
4813
4814
4815/**
4816 * Finds the last bit which is set in the given 32-bit integer.
4817 * Bits are numbered from 1 (least significant) to 32.
4818 *
4819 * @returns index [1..32] of the last set bit.
4820 * @returns 0 if all bits are cleared.
4821 * @param i32 Integer to search for set bits.
4822 * @remark Similar to fls() in BSD.
4823 */
4824DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4825{
4826 return ASMBitLastSetU32((uint32_t)i32);
4827}
4828
4829/**
4830 * Reverse the byte order of the given 16-bit integer.
4831 *
4832 * @returns Revert
4833 * @param u16 16-bit integer value.
4834 */
4835#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4836DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
4837#else
4838DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
4839{
4840# if RT_INLINE_ASM_USES_INTRIN
4841 u16 = _byteswap_ushort(u16);
4842# elif RT_INLINE_ASM_GNU_STYLE
4843 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
4844# else
4845 _asm
4846 {
4847 mov ax, [u16]
4848 ror ax, 8
4849 mov [u16], ax
4850 }
4851# endif
4852 return u16;
4853}
4854#endif
4855
4856
4857/**
4858 * Reverse the byte order of the given 32-bit integer.
4859 *
4860 * @returns Revert
4861 * @param u32 32-bit integer value.
4862 */
4863#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4864DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
4865#else
4866DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4867{
4868# if RT_INLINE_ASM_USES_INTRIN
4869 u32 = _byteswap_ulong(u32);
4870# elif RT_INLINE_ASM_GNU_STYLE
4871 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4872# else
4873 _asm
4874 {
4875 mov eax, [u32]
4876 bswap eax
4877 mov [u32], eax
4878 }
4879# endif
4880 return u32;
4881}
4882#endif
4883
4884
4885/**
4886 * Reverse the byte order of the given 64-bit integer.
4887 *
4888 * @returns Revert
4889 * @param u64 64-bit integer value.
4890 */
4891DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
4892{
4893#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
4894 u64 = _byteswap_uint64(u64);
4895#else
4896 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
4897 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
4898#endif
4899 return u64;
4900}
4901
4902
4903/**
4904 * Rotate 32-bit unsigned value to the left by @a cShift.
4905 *
4906 * @returns Rotated value.
4907 * @param u32 The value to rotate.
4908 * @param cShift How many bits to rotate by.
4909 */
4910DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
4911{
4912#if RT_INLINE_ASM_USES_INTRIN
4913 return _rotl(u32, cShift);
4914#elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
4915 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
4916 return u32;
4917#else
4918 cShift &= 31;
4919 return (u32 << cShift) | (u32 >> (32 - cShift));
4920#endif
4921}
4922
4923
4924/**
4925 * Rotate 32-bit unsigned value to the right by @a cShift.
4926 *
4927 * @returns Rotated value.
4928 * @param u32 The value to rotate.
4929 * @param cShift How many bits to rotate by.
4930 */
4931DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
4932{
4933#if RT_INLINE_ASM_USES_INTRIN
4934 return _rotr(u32, cShift);
4935#elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
4936 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
4937 return u32;
4938#else
4939 cShift &= 31;
4940 return (u32 >> cShift) | (u32 << (32 - cShift));
4941#endif
4942}
4943
4944
4945/**
4946 * Rotate 64-bit unsigned value to the left by @a cShift.
4947 *
4948 * @returns Rotated value.
4949 * @param u64 The value to rotate.
4950 * @param cShift How many bits to rotate by.
4951 */
4952DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
4953{
4954#if RT_INLINE_ASM_USES_INTRIN
4955 return _rotl64(u64, cShift);
4956#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4957 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
4958 return u64;
4959#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
4960 uint32_t uSpill;
4961 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
4962 "jz 1f\n\t"
4963 "xchgl %%eax, %%edx\n\t"
4964 "1:\n\t"
4965 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
4966 "jz 2f\n\t"
4967 "movl %%edx, %2\n\t" /* save the hi value in %3. */
4968 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
4969 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
4970 "2:\n\t" /* } */
4971 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
4972 : "0" (u64),
4973 "1" (cShift));
4974 return u64;
4975#else
4976 cShift &= 63;
4977 return (u64 << cShift) | (u64 >> (64 - cShift));
4978#endif
4979}
4980
4981
4982/**
4983 * Rotate 64-bit unsigned value to the right by @a cShift.
4984 *
4985 * @returns Rotated value.
4986 * @param u64 The value to rotate.
4987 * @param cShift How many bits to rotate by.
4988 */
4989DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
4990{
4991#if RT_INLINE_ASM_USES_INTRIN
4992 return _rotr64(u64, cShift);
4993#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4994 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
4995 return u64;
4996#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
4997 uint32_t uSpill;
4998 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
4999 "jz 1f\n\t"
5000 "xchgl %%eax, %%edx\n\t"
5001 "1:\n\t"
5002 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5003 "jz 2f\n\t"
5004 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5005 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5006 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5007 "2:\n\t" /* } */
5008 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5009 : "0" (u64),
5010 "1" (cShift));
5011 return u64;
5012#else
5013 cShift &= 63;
5014 return (u64 >> cShift) | (u64 << (64 - cShift));
5015#endif
5016}
5017
5018/** @} */
5019
5020
5021/** @} */
5022
5023#endif
5024
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette