VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 53902

Last change on this file since 53902 was 53615, checked in by vboxsync, 10 years ago

doxygen fixes.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 142.5 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2012 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# pragma intrinsic(_rotl)
69# pragma intrinsic(_rotr)
70# pragma intrinsic(_rotl64)
71# pragma intrinsic(_rotr64)
72# ifdef RT_ARCH_AMD64
73# pragma intrinsic(__stosq)
74# pragma intrinsic(_byteswap_uint64)
75# pragma intrinsic(_InterlockedExchange64)
76# pragma intrinsic(_InterlockedExchangeAdd64)
77# pragma intrinsic(_InterlockedAnd64)
78# pragma intrinsic(_InterlockedOr64)
79# pragma intrinsic(_InterlockedIncrement64)
80# pragma intrinsic(_InterlockedDecrement64)
81# endif
82#endif
83
84
85/** @defgroup grp_rt_asm ASM - Assembly Routines
86 * @ingroup grp_rt
87 *
88 * @remarks The difference between ordered and unordered atomic operations are that
89 * the former will complete outstanding reads and writes before continuing
90 * while the latter doesn't make any promises about the order. Ordered
91 * operations doesn't, it seems, make any 100% promise wrt to whether
92 * the operation will complete before any subsequent memory access.
93 * (please, correct if wrong.)
94 *
95 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
96 * are unordered (note the Uo).
97 *
98 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
99 * or even optimize assembler instructions away. For instance, in the following code
100 * the second rdmsr instruction is optimized away because gcc treats that instruction
101 * as deterministic:
102 *
103 * @code
104 * static inline uint64_t rdmsr_low(int idx)
105 * {
106 * uint32_t low;
107 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
108 * }
109 * ...
110 * uint32_t msr1 = rdmsr_low(1);
111 * foo(msr1);
112 * msr1 = rdmsr_low(1);
113 * bar(msr1);
114 * @endcode
115 *
116 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
117 * use the result of the first call as input parameter for bar() as well. For rdmsr this
118 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
119 * machine status information in general.
120 *
121 * @{
122 */
123
124
125/** @def RT_INLINE_ASM_GCC_4_3_X_X86
126 * Used to work around some 4.3.x register allocation issues in this version of
127 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
128#ifdef __GNUC__
129# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
130#endif
131#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
132# define RT_INLINE_ASM_GCC_4_3_X_X86 0
133#endif
134
135/** @def RT_INLINE_DONT_USE_CMPXCHG8B
136 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
137 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
138 * mode, x86.
139 *
140 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
141 * when in PIC mode on x86.
142 */
143#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
144# ifdef DOXYGEN_RUNNING
145# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
146# else
147# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
148 ( (defined(PIC) || defined(__PIC__)) \
149 && defined(RT_ARCH_X86) \
150 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
151 || defined(RT_OS_DARWIN)) )
152# endif
153#endif
154
155
156/** @def ASMReturnAddress
157 * Gets the return address of the current (or calling if you like) function or method.
158 */
159#ifdef _MSC_VER
160# ifdef __cplusplus
161extern "C"
162# endif
163void * _ReturnAddress(void);
164# pragma intrinsic(_ReturnAddress)
165# define ASMReturnAddress() _ReturnAddress()
166#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
167# define ASMReturnAddress() __builtin_return_address(0)
168#else
169# error "Unsupported compiler."
170#endif
171
172
173/**
174 * Compiler memory barrier.
175 *
176 * Ensure that the compiler does not use any cached (register/tmp stack) memory
177 * values or any outstanding writes when returning from this function.
178 *
179 * This function must be used if non-volatile data is modified by a
180 * device or the VMM. Typical cases are port access, MMIO access,
181 * trapping instruction, etc.
182 */
183#if RT_INLINE_ASM_GNU_STYLE
184# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
185#elif RT_INLINE_ASM_USES_INTRIN
186# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
187#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
188DECLINLINE(void) ASMCompilerBarrier(void)
189{
190 __asm
191 {
192 }
193}
194#endif
195
196
197/** @def ASMBreakpoint
198 * Debugger Breakpoint.
199 * @deprecated Use RT_BREAKPOINT instead.
200 * @internal
201 */
202#define ASMBreakpoint() RT_BREAKPOINT()
203
204
205/**
206 * Spinloop hint for platforms that have these, empty function on the other
207 * platforms.
208 *
209 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
210 * spin locks.
211 */
212#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
213DECLASM(void) ASMNopPause(void);
214#else
215DECLINLINE(void) ASMNopPause(void)
216{
217# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
218# if RT_INLINE_ASM_GNU_STYLE
219 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
220# else
221 __asm {
222 _emit 0f3h
223 _emit 090h
224 }
225# endif
226# else
227 /* dummy */
228# endif
229}
230#endif
231
232
233/**
234 * Atomically Exchange an unsigned 8-bit value, ordered.
235 *
236 * @returns Current *pu8 value
237 * @param pu8 Pointer to the 8-bit variable to update.
238 * @param u8 The 8-bit value to assign to *pu8.
239 */
240#if RT_INLINE_ASM_EXTERNAL
241DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
242#else
243DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
244{
245# if RT_INLINE_ASM_GNU_STYLE
246 __asm__ __volatile__("xchgb %0, %1\n\t"
247 : "=m" (*pu8),
248 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
249 : "1" (u8),
250 "m" (*pu8));
251# else
252 __asm
253 {
254# ifdef RT_ARCH_AMD64
255 mov rdx, [pu8]
256 mov al, [u8]
257 xchg [rdx], al
258 mov [u8], al
259# else
260 mov edx, [pu8]
261 mov al, [u8]
262 xchg [edx], al
263 mov [u8], al
264# endif
265 }
266# endif
267 return u8;
268}
269#endif
270
271
272/**
273 * Atomically Exchange a signed 8-bit value, ordered.
274 *
275 * @returns Current *pu8 value
276 * @param pi8 Pointer to the 8-bit variable to update.
277 * @param i8 The 8-bit value to assign to *pi8.
278 */
279DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
280{
281 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
282}
283
284
285/**
286 * Atomically Exchange a bool value, ordered.
287 *
288 * @returns Current *pf value
289 * @param pf Pointer to the 8-bit variable to update.
290 * @param f The 8-bit value to assign to *pi8.
291 */
292DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
293{
294#ifdef _MSC_VER
295 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
296#else
297 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
298#endif
299}
300
301
302/**
303 * Atomically Exchange an unsigned 16-bit value, ordered.
304 *
305 * @returns Current *pu16 value
306 * @param pu16 Pointer to the 16-bit variable to update.
307 * @param u16 The 16-bit value to assign to *pu16.
308 */
309#if RT_INLINE_ASM_EXTERNAL
310DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
311#else
312DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
313{
314# if RT_INLINE_ASM_GNU_STYLE
315 __asm__ __volatile__("xchgw %0, %1\n\t"
316 : "=m" (*pu16),
317 "=r" (u16)
318 : "1" (u16),
319 "m" (*pu16));
320# else
321 __asm
322 {
323# ifdef RT_ARCH_AMD64
324 mov rdx, [pu16]
325 mov ax, [u16]
326 xchg [rdx], ax
327 mov [u16], ax
328# else
329 mov edx, [pu16]
330 mov ax, [u16]
331 xchg [edx], ax
332 mov [u16], ax
333# endif
334 }
335# endif
336 return u16;
337}
338#endif
339
340
341/**
342 * Atomically Exchange a signed 16-bit value, ordered.
343 *
344 * @returns Current *pu16 value
345 * @param pi16 Pointer to the 16-bit variable to update.
346 * @param i16 The 16-bit value to assign to *pi16.
347 */
348DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
349{
350 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
351}
352
353
354/**
355 * Atomically Exchange an unsigned 32-bit value, ordered.
356 *
357 * @returns Current *pu32 value
358 * @param pu32 Pointer to the 32-bit variable to update.
359 * @param u32 The 32-bit value to assign to *pu32.
360 */
361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
362DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
363#else
364DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
365{
366# if RT_INLINE_ASM_GNU_STYLE
367 __asm__ __volatile__("xchgl %0, %1\n\t"
368 : "=m" (*pu32),
369 "=r" (u32)
370 : "1" (u32),
371 "m" (*pu32));
372
373# elif RT_INLINE_ASM_USES_INTRIN
374 u32 = _InterlockedExchange((long *)pu32, u32);
375
376# else
377 __asm
378 {
379# ifdef RT_ARCH_AMD64
380 mov rdx, [pu32]
381 mov eax, u32
382 xchg [rdx], eax
383 mov [u32], eax
384# else
385 mov edx, [pu32]
386 mov eax, u32
387 xchg [edx], eax
388 mov [u32], eax
389# endif
390 }
391# endif
392 return u32;
393}
394#endif
395
396
397/**
398 * Atomically Exchange a signed 32-bit value, ordered.
399 *
400 * @returns Current *pu32 value
401 * @param pi32 Pointer to the 32-bit variable to update.
402 * @param i32 The 32-bit value to assign to *pi32.
403 */
404DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
405{
406 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
407}
408
409
410/**
411 * Atomically Exchange an unsigned 64-bit value, ordered.
412 *
413 * @returns Current *pu64 value
414 * @param pu64 Pointer to the 64-bit variable to update.
415 * @param u64 The 64-bit value to assign to *pu64.
416 */
417#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
418 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
419DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
420#else
421DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
422{
423# if defined(RT_ARCH_AMD64)
424# if RT_INLINE_ASM_USES_INTRIN
425 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
426
427# elif RT_INLINE_ASM_GNU_STYLE
428 __asm__ __volatile__("xchgq %0, %1\n\t"
429 : "=m" (*pu64),
430 "=r" (u64)
431 : "1" (u64),
432 "m" (*pu64));
433# else
434 __asm
435 {
436 mov rdx, [pu64]
437 mov rax, [u64]
438 xchg [rdx], rax
439 mov [u64], rax
440 }
441# endif
442# else /* !RT_ARCH_AMD64 */
443# if RT_INLINE_ASM_GNU_STYLE
444# if defined(PIC) || defined(__PIC__)
445 uint32_t u32EBX = (uint32_t)u64;
446 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
447 "xchgl %%ebx, %3\n\t"
448 "1:\n\t"
449 "lock; cmpxchg8b (%5)\n\t"
450 "jnz 1b\n\t"
451 "movl %3, %%ebx\n\t"
452 /*"xchgl %%esi, %5\n\t"*/
453 : "=A" (u64),
454 "=m" (*pu64)
455 : "0" (*pu64),
456 "m" ( u32EBX ),
457 "c" ( (uint32_t)(u64 >> 32) ),
458 "S" (pu64));
459# else /* !PIC */
460 __asm__ __volatile__("1:\n\t"
461 "lock; cmpxchg8b %1\n\t"
462 "jnz 1b\n\t"
463 : "=A" (u64),
464 "=m" (*pu64)
465 : "0" (*pu64),
466 "b" ( (uint32_t)u64 ),
467 "c" ( (uint32_t)(u64 >> 32) ));
468# endif
469# else
470 __asm
471 {
472 mov ebx, dword ptr [u64]
473 mov ecx, dword ptr [u64 + 4]
474 mov edi, pu64
475 mov eax, dword ptr [edi]
476 mov edx, dword ptr [edi + 4]
477 retry:
478 lock cmpxchg8b [edi]
479 jnz retry
480 mov dword ptr [u64], eax
481 mov dword ptr [u64 + 4], edx
482 }
483# endif
484# endif /* !RT_ARCH_AMD64 */
485 return u64;
486}
487#endif
488
489
490/**
491 * Atomically Exchange an signed 64-bit value, ordered.
492 *
493 * @returns Current *pi64 value
494 * @param pi64 Pointer to the 64-bit variable to update.
495 * @param i64 The 64-bit value to assign to *pi64.
496 */
497DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
498{
499 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
500}
501
502
503/**
504 * Atomically Exchange a pointer value, ordered.
505 *
506 * @returns Current *ppv value
507 * @param ppv Pointer to the pointer variable to update.
508 * @param pv The pointer value to assign to *ppv.
509 */
510DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
511{
512#if ARCH_BITS == 32
513 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
514#elif ARCH_BITS == 64
515 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
516#else
517# error "ARCH_BITS is bogus"
518#endif
519}
520
521
522/**
523 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
524 *
525 * @returns Current *pv value
526 * @param ppv Pointer to the pointer variable to update.
527 * @param pv The pointer value to assign to *ppv.
528 * @param Type The type of *ppv, sans volatile.
529 */
530#ifdef __GNUC__
531# define ASMAtomicXchgPtrT(ppv, pv, Type) \
532 __extension__ \
533 ({\
534 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
535 Type const pvTypeChecked = (pv); \
536 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
537 pvTypeCheckedRet; \
538 })
539#else
540# define ASMAtomicXchgPtrT(ppv, pv, Type) \
541 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
542#endif
543
544
545/**
546 * Atomically Exchange a raw-mode context pointer value, ordered.
547 *
548 * @returns Current *ppv value
549 * @param ppvRC Pointer to the pointer variable to update.
550 * @param pvRC The pointer value to assign to *ppv.
551 */
552DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
553{
554 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
555}
556
557
558/**
559 * Atomically Exchange a ring-0 pointer value, ordered.
560 *
561 * @returns Current *ppv value
562 * @param ppvR0 Pointer to the pointer variable to update.
563 * @param pvR0 The pointer value to assign to *ppv.
564 */
565DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
566{
567#if R0_ARCH_BITS == 32
568 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
569#elif R0_ARCH_BITS == 64
570 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
571#else
572# error "R0_ARCH_BITS is bogus"
573#endif
574}
575
576
577/**
578 * Atomically Exchange a ring-3 pointer value, ordered.
579 *
580 * @returns Current *ppv value
581 * @param ppvR3 Pointer to the pointer variable to update.
582 * @param pvR3 The pointer value to assign to *ppv.
583 */
584DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
585{
586#if R3_ARCH_BITS == 32
587 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
588#elif R3_ARCH_BITS == 64
589 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
590#else
591# error "R3_ARCH_BITS is bogus"
592#endif
593}
594
595
596/** @def ASMAtomicXchgHandle
597 * Atomically Exchange a typical IPRT handle value, ordered.
598 *
599 * @param ph Pointer to the value to update.
600 * @param hNew The new value to assigned to *pu.
601 * @param phRes Where to store the current *ph value.
602 *
603 * @remarks This doesn't currently work for all handles (like RTFILE).
604 */
605#if HC_ARCH_BITS == 32
606# define ASMAtomicXchgHandle(ph, hNew, phRes) \
607 do { \
608 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
609 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
610 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
611 } while (0)
612#elif HC_ARCH_BITS == 64
613# define ASMAtomicXchgHandle(ph, hNew, phRes) \
614 do { \
615 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
616 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
617 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
618 } while (0)
619#else
620# error HC_ARCH_BITS
621#endif
622
623
624/**
625 * Atomically Exchange a value which size might differ
626 * between platforms or compilers, ordered.
627 *
628 * @param pu Pointer to the variable to update.
629 * @param uNew The value to assign to *pu.
630 * @todo This is busted as its missing the result argument.
631 */
632#define ASMAtomicXchgSize(pu, uNew) \
633 do { \
634 switch (sizeof(*(pu))) { \
635 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
636 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
637 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
638 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
639 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
640 } \
641 } while (0)
642
643/**
644 * Atomically Exchange a value which size might differ
645 * between platforms or compilers, ordered.
646 *
647 * @param pu Pointer to the variable to update.
648 * @param uNew The value to assign to *pu.
649 * @param puRes Where to store the current *pu value.
650 */
651#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
652 do { \
653 switch (sizeof(*(pu))) { \
654 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
655 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
656 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
657 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
658 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
659 } \
660 } while (0)
661
662
663
664/**
665 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
666 *
667 * @returns true if xchg was done.
668 * @returns false if xchg wasn't done.
669 *
670 * @param pu8 Pointer to the value to update.
671 * @param u8New The new value to assigned to *pu8.
672 * @param u8Old The old value to *pu8 compare with.
673 */
674#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
675DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
676#else
677DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
678{
679 uint8_t u8Ret;
680 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
681 "setz %1\n\t"
682 : "=m" (*pu8),
683 "=qm" (u8Ret),
684 "=a" (u8Old)
685 : "q" (u8New),
686 "2" (u8Old),
687 "m" (*pu8));
688 return (bool)u8Ret;
689}
690#endif
691
692
693/**
694 * Atomically Compare and Exchange a signed 8-bit value, ordered.
695 *
696 * @returns true if xchg was done.
697 * @returns false if xchg wasn't done.
698 *
699 * @param pi8 Pointer to the value to update.
700 * @param i8New The new value to assigned to *pi8.
701 * @param i8Old The old value to *pi8 compare with.
702 */
703DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
704{
705 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
706}
707
708
709/**
710 * Atomically Compare and Exchange a bool value, ordered.
711 *
712 * @returns true if xchg was done.
713 * @returns false if xchg wasn't done.
714 *
715 * @param pf Pointer to the value to update.
716 * @param fNew The new value to assigned to *pf.
717 * @param fOld The old value to *pf compare with.
718 */
719DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
720{
721 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
722}
723
724
725/**
726 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
727 *
728 * @returns true if xchg was done.
729 * @returns false if xchg wasn't done.
730 *
731 * @param pu32 Pointer to the value to update.
732 * @param u32New The new value to assigned to *pu32.
733 * @param u32Old The old value to *pu32 compare with.
734 */
735#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
736DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
737#else
738DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
739{
740# if RT_INLINE_ASM_GNU_STYLE
741 uint8_t u8Ret;
742 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
743 "setz %1\n\t"
744 : "=m" (*pu32),
745 "=qm" (u8Ret),
746 "=a" (u32Old)
747 : "r" (u32New),
748 "2" (u32Old),
749 "m" (*pu32));
750 return (bool)u8Ret;
751
752# elif RT_INLINE_ASM_USES_INTRIN
753 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
754
755# else
756 uint32_t u32Ret;
757 __asm
758 {
759# ifdef RT_ARCH_AMD64
760 mov rdx, [pu32]
761# else
762 mov edx, [pu32]
763# endif
764 mov eax, [u32Old]
765 mov ecx, [u32New]
766# ifdef RT_ARCH_AMD64
767 lock cmpxchg [rdx], ecx
768# else
769 lock cmpxchg [edx], ecx
770# endif
771 setz al
772 movzx eax, al
773 mov [u32Ret], eax
774 }
775 return !!u32Ret;
776# endif
777}
778#endif
779
780
781/**
782 * Atomically Compare and Exchange a signed 32-bit value, ordered.
783 *
784 * @returns true if xchg was done.
785 * @returns false if xchg wasn't done.
786 *
787 * @param pi32 Pointer to the value to update.
788 * @param i32New The new value to assigned to *pi32.
789 * @param i32Old The old value to *pi32 compare with.
790 */
791DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
792{
793 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
794}
795
796
797/**
798 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
799 *
800 * @returns true if xchg was done.
801 * @returns false if xchg wasn't done.
802 *
803 * @param pu64 Pointer to the 64-bit variable to update.
804 * @param u64New The 64-bit value to assign to *pu64.
805 * @param u64Old The value to compare with.
806 */
807#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
808 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
809DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
810#else
811DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
812{
813# if RT_INLINE_ASM_USES_INTRIN
814 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
815
816# elif defined(RT_ARCH_AMD64)
817# if RT_INLINE_ASM_GNU_STYLE
818 uint8_t u8Ret;
819 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
820 "setz %1\n\t"
821 : "=m" (*pu64),
822 "=qm" (u8Ret),
823 "=a" (u64Old)
824 : "r" (u64New),
825 "2" (u64Old),
826 "m" (*pu64));
827 return (bool)u8Ret;
828# else
829 bool fRet;
830 __asm
831 {
832 mov rdx, [pu32]
833 mov rax, [u64Old]
834 mov rcx, [u64New]
835 lock cmpxchg [rdx], rcx
836 setz al
837 mov [fRet], al
838 }
839 return fRet;
840# endif
841# else /* !RT_ARCH_AMD64 */
842 uint32_t u32Ret;
843# if RT_INLINE_ASM_GNU_STYLE
844# if defined(PIC) || defined(__PIC__)
845 uint32_t u32EBX = (uint32_t)u64New;
846 uint32_t u32Spill;
847 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
848 "lock; cmpxchg8b (%6)\n\t"
849 "setz %%al\n\t"
850 "movl %4, %%ebx\n\t"
851 "movzbl %%al, %%eax\n\t"
852 : "=a" (u32Ret),
853 "=d" (u32Spill),
854# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
855 "+m" (*pu64)
856# else
857 "=m" (*pu64)
858# endif
859 : "A" (u64Old),
860 "m" ( u32EBX ),
861 "c" ( (uint32_t)(u64New >> 32) ),
862 "S" (pu64));
863# else /* !PIC */
864 uint32_t u32Spill;
865 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
866 "setz %%al\n\t"
867 "movzbl %%al, %%eax\n\t"
868 : "=a" (u32Ret),
869 "=d" (u32Spill),
870 "+m" (*pu64)
871 : "A" (u64Old),
872 "b" ( (uint32_t)u64New ),
873 "c" ( (uint32_t)(u64New >> 32) ));
874# endif
875 return (bool)u32Ret;
876# else
877 __asm
878 {
879 mov ebx, dword ptr [u64New]
880 mov ecx, dword ptr [u64New + 4]
881 mov edi, [pu64]
882 mov eax, dword ptr [u64Old]
883 mov edx, dword ptr [u64Old + 4]
884 lock cmpxchg8b [edi]
885 setz al
886 movzx eax, al
887 mov dword ptr [u32Ret], eax
888 }
889 return !!u32Ret;
890# endif
891# endif /* !RT_ARCH_AMD64 */
892}
893#endif
894
895
896/**
897 * Atomically Compare and exchange a signed 64-bit value, ordered.
898 *
899 * @returns true if xchg was done.
900 * @returns false if xchg wasn't done.
901 *
902 * @param pi64 Pointer to the 64-bit variable to update.
903 * @param i64 The 64-bit value to assign to *pu64.
904 * @param i64Old The value to compare with.
905 */
906DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
907{
908 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
909}
910
911
912/**
913 * Atomically Compare and Exchange a pointer value, ordered.
914 *
915 * @returns true if xchg was done.
916 * @returns false if xchg wasn't done.
917 *
918 * @param ppv Pointer to the value to update.
919 * @param pvNew The new value to assigned to *ppv.
920 * @param pvOld The old value to *ppv compare with.
921 */
922DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
923{
924#if ARCH_BITS == 32
925 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
926#elif ARCH_BITS == 64
927 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
928#else
929# error "ARCH_BITS is bogus"
930#endif
931}
932
933
934/**
935 * Atomically Compare and Exchange a pointer value, ordered.
936 *
937 * @returns true if xchg was done.
938 * @returns false if xchg wasn't done.
939 *
940 * @param ppv Pointer to the value to update.
941 * @param pvNew The new value to assigned to *ppv.
942 * @param pvOld The old value to *ppv compare with.
943 *
944 * @remarks This is relatively type safe on GCC platforms.
945 */
946#ifdef __GNUC__
947# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
948 __extension__ \
949 ({\
950 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
951 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
952 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
953 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
954 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
955 fMacroRet; \
956 })
957#else
958# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
959 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
960#endif
961
962
963/** @def ASMAtomicCmpXchgHandle
964 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
965 *
966 * @param ph Pointer to the value to update.
967 * @param hNew The new value to assigned to *pu.
968 * @param hOld The old value to *pu compare with.
969 * @param fRc Where to store the result.
970 *
971 * @remarks This doesn't currently work for all handles (like RTFILE).
972 */
973#if HC_ARCH_BITS == 32
974# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
975 do { \
976 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
977 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
978 } while (0)
979#elif HC_ARCH_BITS == 64
980# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
981 do { \
982 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
983 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
984 } while (0)
985#else
986# error HC_ARCH_BITS
987#endif
988
989
990/** @def ASMAtomicCmpXchgSize
991 * Atomically Compare and Exchange a value which size might differ
992 * between platforms or compilers, ordered.
993 *
994 * @param pu Pointer to the value to update.
995 * @param uNew The new value to assigned to *pu.
996 * @param uOld The old value to *pu compare with.
997 * @param fRc Where to store the result.
998 */
999#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1000 do { \
1001 switch (sizeof(*(pu))) { \
1002 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1003 break; \
1004 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1005 break; \
1006 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1007 (fRc) = false; \
1008 break; \
1009 } \
1010 } while (0)
1011
1012
1013/**
1014 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1015 * passes back old value, ordered.
1016 *
1017 * @returns true if xchg was done.
1018 * @returns false if xchg wasn't done.
1019 *
1020 * @param pu32 Pointer to the value to update.
1021 * @param u32New The new value to assigned to *pu32.
1022 * @param u32Old The old value to *pu32 compare with.
1023 * @param pu32Old Pointer store the old value at.
1024 */
1025#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1026DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1027#else
1028DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1029{
1030# if RT_INLINE_ASM_GNU_STYLE
1031 uint8_t u8Ret;
1032 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1033 "setz %1\n\t"
1034 : "=m" (*pu32),
1035 "=qm" (u8Ret),
1036 "=a" (*pu32Old)
1037 : "r" (u32New),
1038 "a" (u32Old),
1039 "m" (*pu32));
1040 return (bool)u8Ret;
1041
1042# elif RT_INLINE_ASM_USES_INTRIN
1043 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1044
1045# else
1046 uint32_t u32Ret;
1047 __asm
1048 {
1049# ifdef RT_ARCH_AMD64
1050 mov rdx, [pu32]
1051# else
1052 mov edx, [pu32]
1053# endif
1054 mov eax, [u32Old]
1055 mov ecx, [u32New]
1056# ifdef RT_ARCH_AMD64
1057 lock cmpxchg [rdx], ecx
1058 mov rdx, [pu32Old]
1059 mov [rdx], eax
1060# else
1061 lock cmpxchg [edx], ecx
1062 mov edx, [pu32Old]
1063 mov [edx], eax
1064# endif
1065 setz al
1066 movzx eax, al
1067 mov [u32Ret], eax
1068 }
1069 return !!u32Ret;
1070# endif
1071}
1072#endif
1073
1074
1075/**
1076 * Atomically Compare and Exchange a signed 32-bit value, additionally
1077 * passes back old value, ordered.
1078 *
1079 * @returns true if xchg was done.
1080 * @returns false if xchg wasn't done.
1081 *
1082 * @param pi32 Pointer to the value to update.
1083 * @param i32New The new value to assigned to *pi32.
1084 * @param i32Old The old value to *pi32 compare with.
1085 * @param pi32Old Pointer store the old value at.
1086 */
1087DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1088{
1089 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1090}
1091
1092
1093/**
1094 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1095 * passing back old value, ordered.
1096 *
1097 * @returns true if xchg was done.
1098 * @returns false if xchg wasn't done.
1099 *
1100 * @param pu64 Pointer to the 64-bit variable to update.
1101 * @param u64New The 64-bit value to assign to *pu64.
1102 * @param u64Old The value to compare with.
1103 * @param pu64Old Pointer store the old value at.
1104 */
1105#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1106 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1107DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1108#else
1109DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1110{
1111# if RT_INLINE_ASM_USES_INTRIN
1112 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1113
1114# elif defined(RT_ARCH_AMD64)
1115# if RT_INLINE_ASM_GNU_STYLE
1116 uint8_t u8Ret;
1117 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1118 "setz %1\n\t"
1119 : "=m" (*pu64),
1120 "=qm" (u8Ret),
1121 "=a" (*pu64Old)
1122 : "r" (u64New),
1123 "a" (u64Old),
1124 "m" (*pu64));
1125 return (bool)u8Ret;
1126# else
1127 bool fRet;
1128 __asm
1129 {
1130 mov rdx, [pu32]
1131 mov rax, [u64Old]
1132 mov rcx, [u64New]
1133 lock cmpxchg [rdx], rcx
1134 mov rdx, [pu64Old]
1135 mov [rdx], rax
1136 setz al
1137 mov [fRet], al
1138 }
1139 return fRet;
1140# endif
1141# else /* !RT_ARCH_AMD64 */
1142# if RT_INLINE_ASM_GNU_STYLE
1143 uint64_t u64Ret;
1144# if defined(PIC) || defined(__PIC__)
1145 /* NB: this code uses a memory clobber description, because the clean
1146 * solution with an output value for *pu64 makes gcc run out of registers.
1147 * This will cause suboptimal code, and anyone with a better solution is
1148 * welcome to improve this. */
1149 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1150 "lock; cmpxchg8b %3\n\t"
1151 "xchgl %%ebx, %1\n\t"
1152 : "=A" (u64Ret)
1153 : "DS" ((uint32_t)u64New),
1154 "c" ((uint32_t)(u64New >> 32)),
1155 "m" (*pu64),
1156 "0" (u64Old)
1157 : "memory" );
1158# else /* !PIC */
1159 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1160 : "=A" (u64Ret),
1161 "=m" (*pu64)
1162 : "b" ((uint32_t)u64New),
1163 "c" ((uint32_t)(u64New >> 32)),
1164 "m" (*pu64),
1165 "0" (u64Old));
1166# endif
1167 *pu64Old = u64Ret;
1168 return u64Ret == u64Old;
1169# else
1170 uint32_t u32Ret;
1171 __asm
1172 {
1173 mov ebx, dword ptr [u64New]
1174 mov ecx, dword ptr [u64New + 4]
1175 mov edi, [pu64]
1176 mov eax, dword ptr [u64Old]
1177 mov edx, dword ptr [u64Old + 4]
1178 lock cmpxchg8b [edi]
1179 mov ebx, [pu64Old]
1180 mov [ebx], eax
1181 setz al
1182 movzx eax, al
1183 add ebx, 4
1184 mov [ebx], edx
1185 mov dword ptr [u32Ret], eax
1186 }
1187 return !!u32Ret;
1188# endif
1189# endif /* !RT_ARCH_AMD64 */
1190}
1191#endif
1192
1193
1194/**
1195 * Atomically Compare and exchange a signed 64-bit value, additionally
1196 * passing back old value, ordered.
1197 *
1198 * @returns true if xchg was done.
1199 * @returns false if xchg wasn't done.
1200 *
1201 * @param pi64 Pointer to the 64-bit variable to update.
1202 * @param i64 The 64-bit value to assign to *pu64.
1203 * @param i64Old The value to compare with.
1204 * @param pi64Old Pointer store the old value at.
1205 */
1206DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1207{
1208 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1209}
1210
1211/** @def ASMAtomicCmpXchgExHandle
1212 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1213 *
1214 * @param ph Pointer to the value to update.
1215 * @param hNew The new value to assigned to *pu.
1216 * @param hOld The old value to *pu compare with.
1217 * @param fRc Where to store the result.
1218 * @param phOldVal Pointer to where to store the old value.
1219 *
1220 * @remarks This doesn't currently work for all handles (like RTFILE).
1221 */
1222#if HC_ARCH_BITS == 32
1223# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1224 do { \
1225 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1226 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1227 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1228 } while (0)
1229#elif HC_ARCH_BITS == 64
1230# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1231 do { \
1232 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1233 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1234 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1235 } while (0)
1236#else
1237# error HC_ARCH_BITS
1238#endif
1239
1240
1241/** @def ASMAtomicCmpXchgExSize
1242 * Atomically Compare and Exchange a value which size might differ
1243 * between platforms or compilers. Additionally passes back old value.
1244 *
1245 * @param pu Pointer to the value to update.
1246 * @param uNew The new value to assigned to *pu.
1247 * @param uOld The old value to *pu compare with.
1248 * @param fRc Where to store the result.
1249 * @param puOldVal Pointer to where to store the old value.
1250 */
1251#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1252 do { \
1253 switch (sizeof(*(pu))) { \
1254 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1255 break; \
1256 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1257 break; \
1258 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1259 (fRc) = false; \
1260 (uOldVal) = 0; \
1261 break; \
1262 } \
1263 } while (0)
1264
1265
1266/**
1267 * Atomically Compare and Exchange a pointer value, additionally
1268 * passing back old value, ordered.
1269 *
1270 * @returns true if xchg was done.
1271 * @returns false if xchg wasn't done.
1272 *
1273 * @param ppv Pointer to the value to update.
1274 * @param pvNew The new value to assigned to *ppv.
1275 * @param pvOld The old value to *ppv compare with.
1276 * @param ppvOld Pointer store the old value at.
1277 */
1278DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1279{
1280#if ARCH_BITS == 32
1281 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1282#elif ARCH_BITS == 64
1283 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1284#else
1285# error "ARCH_BITS is bogus"
1286#endif
1287}
1288
1289
1290/**
1291 * Atomically Compare and Exchange a pointer value, additionally
1292 * passing back old value, ordered.
1293 *
1294 * @returns true if xchg was done.
1295 * @returns false if xchg wasn't done.
1296 *
1297 * @param ppv Pointer to the value to update.
1298 * @param pvNew The new value to assigned to *ppv.
1299 * @param pvOld The old value to *ppv compare with.
1300 * @param ppvOld Pointer store the old value at.
1301 *
1302 * @remarks This is relatively type safe on GCC platforms.
1303 */
1304#ifdef __GNUC__
1305# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1306 __extension__ \
1307 ({\
1308 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1309 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1310 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1311 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1312 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1313 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1314 (void **)ppvOldTypeChecked); \
1315 fMacroRet; \
1316 })
1317#else
1318# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1319 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1320#endif
1321
1322
1323/**
1324 * Serialize Instruction.
1325 */
1326#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1327DECLASM(void) ASMSerializeInstruction(void);
1328#else
1329DECLINLINE(void) ASMSerializeInstruction(void)
1330{
1331# if RT_INLINE_ASM_GNU_STYLE
1332 RTCCUINTREG xAX = 0;
1333# ifdef RT_ARCH_AMD64
1334 __asm__ ("cpuid"
1335 : "=a" (xAX)
1336 : "0" (xAX)
1337 : "rbx", "rcx", "rdx");
1338# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1339 __asm__ ("push %%ebx\n\t"
1340 "cpuid\n\t"
1341 "pop %%ebx\n\t"
1342 : "=a" (xAX)
1343 : "0" (xAX)
1344 : "ecx", "edx");
1345# else
1346 __asm__ ("cpuid"
1347 : "=a" (xAX)
1348 : "0" (xAX)
1349 : "ebx", "ecx", "edx");
1350# endif
1351
1352# elif RT_INLINE_ASM_USES_INTRIN
1353 int aInfo[4];
1354 __cpuid(aInfo, 0);
1355
1356# else
1357 __asm
1358 {
1359 push ebx
1360 xor eax, eax
1361 cpuid
1362 pop ebx
1363 }
1364# endif
1365}
1366#endif
1367
1368
1369/**
1370 * Memory fence, waits for any pending writes and reads to complete.
1371 */
1372DECLINLINE(void) ASMMemoryFence(void)
1373{
1374 /** @todo use mfence? check if all cpus we care for support it. */
1375 uint32_t volatile u32;
1376 ASMAtomicXchgU32(&u32, 0);
1377}
1378
1379
1380/**
1381 * Write fence, waits for any pending writes to complete.
1382 */
1383DECLINLINE(void) ASMWriteFence(void)
1384{
1385 /** @todo use sfence? check if all cpus we care for support it. */
1386 ASMMemoryFence();
1387}
1388
1389
1390/**
1391 * Read fence, waits for any pending reads to complete.
1392 */
1393DECLINLINE(void) ASMReadFence(void)
1394{
1395 /** @todo use lfence? check if all cpus we care for support it. */
1396 ASMMemoryFence();
1397}
1398
1399
1400/**
1401 * Atomically reads an unsigned 8-bit value, ordered.
1402 *
1403 * @returns Current *pu8 value
1404 * @param pu8 Pointer to the 8-bit variable to read.
1405 */
1406DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1407{
1408 ASMMemoryFence();
1409 return *pu8; /* byte reads are atomic on x86 */
1410}
1411
1412
1413/**
1414 * Atomically reads an unsigned 8-bit value, unordered.
1415 *
1416 * @returns Current *pu8 value
1417 * @param pu8 Pointer to the 8-bit variable to read.
1418 */
1419DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1420{
1421 return *pu8; /* byte reads are atomic on x86 */
1422}
1423
1424
1425/**
1426 * Atomically reads a signed 8-bit value, ordered.
1427 *
1428 * @returns Current *pi8 value
1429 * @param pi8 Pointer to the 8-bit variable to read.
1430 */
1431DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1432{
1433 ASMMemoryFence();
1434 return *pi8; /* byte reads are atomic on x86 */
1435}
1436
1437
1438/**
1439 * Atomically reads a signed 8-bit value, unordered.
1440 *
1441 * @returns Current *pi8 value
1442 * @param pi8 Pointer to the 8-bit variable to read.
1443 */
1444DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1445{
1446 return *pi8; /* byte reads are atomic on x86 */
1447}
1448
1449
1450/**
1451 * Atomically reads an unsigned 16-bit value, ordered.
1452 *
1453 * @returns Current *pu16 value
1454 * @param pu16 Pointer to the 16-bit variable to read.
1455 */
1456DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1457{
1458 ASMMemoryFence();
1459 Assert(!((uintptr_t)pu16 & 1));
1460 return *pu16;
1461}
1462
1463
1464/**
1465 * Atomically reads an unsigned 16-bit value, unordered.
1466 *
1467 * @returns Current *pu16 value
1468 * @param pu16 Pointer to the 16-bit variable to read.
1469 */
1470DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1471{
1472 Assert(!((uintptr_t)pu16 & 1));
1473 return *pu16;
1474}
1475
1476
1477/**
1478 * Atomically reads a signed 16-bit value, ordered.
1479 *
1480 * @returns Current *pi16 value
1481 * @param pi16 Pointer to the 16-bit variable to read.
1482 */
1483DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1484{
1485 ASMMemoryFence();
1486 Assert(!((uintptr_t)pi16 & 1));
1487 return *pi16;
1488}
1489
1490
1491/**
1492 * Atomically reads a signed 16-bit value, unordered.
1493 *
1494 * @returns Current *pi16 value
1495 * @param pi16 Pointer to the 16-bit variable to read.
1496 */
1497DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1498{
1499 Assert(!((uintptr_t)pi16 & 1));
1500 return *pi16;
1501}
1502
1503
1504/**
1505 * Atomically reads an unsigned 32-bit value, ordered.
1506 *
1507 * @returns Current *pu32 value
1508 * @param pu32 Pointer to the 32-bit variable to read.
1509 */
1510DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1511{
1512 ASMMemoryFence();
1513 Assert(!((uintptr_t)pu32 & 3));
1514 return *pu32;
1515}
1516
1517
1518/**
1519 * Atomically reads an unsigned 32-bit value, unordered.
1520 *
1521 * @returns Current *pu32 value
1522 * @param pu32 Pointer to the 32-bit variable to read.
1523 */
1524DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1525{
1526 Assert(!((uintptr_t)pu32 & 3));
1527 return *pu32;
1528}
1529
1530
1531/**
1532 * Atomically reads a signed 32-bit value, ordered.
1533 *
1534 * @returns Current *pi32 value
1535 * @param pi32 Pointer to the 32-bit variable to read.
1536 */
1537DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1538{
1539 ASMMemoryFence();
1540 Assert(!((uintptr_t)pi32 & 3));
1541 return *pi32;
1542}
1543
1544
1545/**
1546 * Atomically reads a signed 32-bit value, unordered.
1547 *
1548 * @returns Current *pi32 value
1549 * @param pi32 Pointer to the 32-bit variable to read.
1550 */
1551DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1552{
1553 Assert(!((uintptr_t)pi32 & 3));
1554 return *pi32;
1555}
1556
1557
1558/**
1559 * Atomically reads an unsigned 64-bit value, ordered.
1560 *
1561 * @returns Current *pu64 value
1562 * @param pu64 Pointer to the 64-bit variable to read.
1563 * The memory pointed to must be writable.
1564 * @remark This will fault if the memory is read-only!
1565 */
1566#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1567 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1568DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1569#else
1570DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1571{
1572 uint64_t u64;
1573# ifdef RT_ARCH_AMD64
1574 Assert(!((uintptr_t)pu64 & 7));
1575/*# if RT_INLINE_ASM_GNU_STYLE
1576 __asm__ __volatile__( "mfence\n\t"
1577 "movq %1, %0\n\t"
1578 : "=r" (u64)
1579 : "m" (*pu64));
1580# else
1581 __asm
1582 {
1583 mfence
1584 mov rdx, [pu64]
1585 mov rax, [rdx]
1586 mov [u64], rax
1587 }
1588# endif*/
1589 ASMMemoryFence();
1590 u64 = *pu64;
1591# else /* !RT_ARCH_AMD64 */
1592# if RT_INLINE_ASM_GNU_STYLE
1593# if defined(PIC) || defined(__PIC__)
1594 uint32_t u32EBX = 0;
1595 Assert(!((uintptr_t)pu64 & 7));
1596 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1597 "lock; cmpxchg8b (%5)\n\t"
1598 "movl %3, %%ebx\n\t"
1599 : "=A" (u64),
1600# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1601 "+m" (*pu64)
1602# else
1603 "=m" (*pu64)
1604# endif
1605 : "0" (0ULL),
1606 "m" (u32EBX),
1607 "c" (0),
1608 "S" (pu64));
1609# else /* !PIC */
1610 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1611 : "=A" (u64),
1612 "+m" (*pu64)
1613 : "0" (0ULL),
1614 "b" (0),
1615 "c" (0));
1616# endif
1617# else
1618 Assert(!((uintptr_t)pu64 & 7));
1619 __asm
1620 {
1621 xor eax, eax
1622 xor edx, edx
1623 mov edi, pu64
1624 xor ecx, ecx
1625 xor ebx, ebx
1626 lock cmpxchg8b [edi]
1627 mov dword ptr [u64], eax
1628 mov dword ptr [u64 + 4], edx
1629 }
1630# endif
1631# endif /* !RT_ARCH_AMD64 */
1632 return u64;
1633}
1634#endif
1635
1636
1637/**
1638 * Atomically reads an unsigned 64-bit value, unordered.
1639 *
1640 * @returns Current *pu64 value
1641 * @param pu64 Pointer to the 64-bit variable to read.
1642 * The memory pointed to must be writable.
1643 * @remark This will fault if the memory is read-only!
1644 */
1645#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1646 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1647DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1648#else
1649DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1650{
1651 uint64_t u64;
1652# ifdef RT_ARCH_AMD64
1653 Assert(!((uintptr_t)pu64 & 7));
1654/*# if RT_INLINE_ASM_GNU_STYLE
1655 Assert(!((uintptr_t)pu64 & 7));
1656 __asm__ __volatile__("movq %1, %0\n\t"
1657 : "=r" (u64)
1658 : "m" (*pu64));
1659# else
1660 __asm
1661 {
1662 mov rdx, [pu64]
1663 mov rax, [rdx]
1664 mov [u64], rax
1665 }
1666# endif */
1667 u64 = *pu64;
1668# else /* !RT_ARCH_AMD64 */
1669# if RT_INLINE_ASM_GNU_STYLE
1670# if defined(PIC) || defined(__PIC__)
1671 uint32_t u32EBX = 0;
1672 uint32_t u32Spill;
1673 Assert(!((uintptr_t)pu64 & 7));
1674 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1675 "xor %%ecx,%%ecx\n\t"
1676 "xor %%edx,%%edx\n\t"
1677 "xchgl %%ebx, %3\n\t"
1678 "lock; cmpxchg8b (%4)\n\t"
1679 "movl %3, %%ebx\n\t"
1680 : "=A" (u64),
1681# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1682 "+m" (*pu64),
1683# else
1684 "=m" (*pu64),
1685# endif
1686 "=c" (u32Spill)
1687 : "m" (u32EBX),
1688 "S" (pu64));
1689# else /* !PIC */
1690 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1691 : "=A" (u64),
1692 "+m" (*pu64)
1693 : "0" (0ULL),
1694 "b" (0),
1695 "c" (0));
1696# endif
1697# else
1698 Assert(!((uintptr_t)pu64 & 7));
1699 __asm
1700 {
1701 xor eax, eax
1702 xor edx, edx
1703 mov edi, pu64
1704 xor ecx, ecx
1705 xor ebx, ebx
1706 lock cmpxchg8b [edi]
1707 mov dword ptr [u64], eax
1708 mov dword ptr [u64 + 4], edx
1709 }
1710# endif
1711# endif /* !RT_ARCH_AMD64 */
1712 return u64;
1713}
1714#endif
1715
1716
1717/**
1718 * Atomically reads a signed 64-bit value, ordered.
1719 *
1720 * @returns Current *pi64 value
1721 * @param pi64 Pointer to the 64-bit variable to read.
1722 * The memory pointed to must be writable.
1723 * @remark This will fault if the memory is read-only!
1724 */
1725DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1726{
1727 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1728}
1729
1730
1731/**
1732 * Atomically reads a signed 64-bit value, unordered.
1733 *
1734 * @returns Current *pi64 value
1735 * @param pi64 Pointer to the 64-bit variable to read.
1736 * The memory pointed to must be writable.
1737 * @remark This will fault if the memory is read-only!
1738 */
1739DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1740{
1741 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1742}
1743
1744
1745/**
1746 * Atomically reads a size_t value, ordered.
1747 *
1748 * @returns Current *pcb value
1749 * @param pcb Pointer to the size_t variable to read.
1750 */
1751DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1752{
1753#if ARCH_BITS == 64
1754 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1755#elif ARCH_BITS == 32
1756 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1757#else
1758# error "Unsupported ARCH_BITS value"
1759#endif
1760}
1761
1762
1763/**
1764 * Atomically reads a size_t value, unordered.
1765 *
1766 * @returns Current *pcb value
1767 * @param pcb Pointer to the size_t variable to read.
1768 */
1769DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1770{
1771#if ARCH_BITS == 64
1772 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1773#elif ARCH_BITS == 32
1774 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1775#else
1776# error "Unsupported ARCH_BITS value"
1777#endif
1778}
1779
1780
1781/**
1782 * Atomically reads a pointer value, ordered.
1783 *
1784 * @returns Current *pv value
1785 * @param ppv Pointer to the pointer variable to read.
1786 *
1787 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1788 * requires less typing (no casts).
1789 */
1790DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1791{
1792#if ARCH_BITS == 32
1793 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1794#elif ARCH_BITS == 64
1795 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1796#else
1797# error "ARCH_BITS is bogus"
1798#endif
1799}
1800
1801/**
1802 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1803 *
1804 * @returns Current *pv value
1805 * @param ppv Pointer to the pointer variable to read.
1806 * @param Type The type of *ppv, sans volatile.
1807 */
1808#ifdef __GNUC__
1809# define ASMAtomicReadPtrT(ppv, Type) \
1810 __extension__ \
1811 ({\
1812 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1813 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1814 pvTypeChecked; \
1815 })
1816#else
1817# define ASMAtomicReadPtrT(ppv, Type) \
1818 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1819#endif
1820
1821
1822/**
1823 * Atomically reads a pointer value, unordered.
1824 *
1825 * @returns Current *pv value
1826 * @param ppv Pointer to the pointer variable to read.
1827 *
1828 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1829 * requires less typing (no casts).
1830 */
1831DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1832{
1833#if ARCH_BITS == 32
1834 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1835#elif ARCH_BITS == 64
1836 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1837#else
1838# error "ARCH_BITS is bogus"
1839#endif
1840}
1841
1842
1843/**
1844 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
1845 *
1846 * @returns Current *pv value
1847 * @param ppv Pointer to the pointer variable to read.
1848 * @param Type The type of *ppv, sans volatile.
1849 */
1850#ifdef __GNUC__
1851# define ASMAtomicUoReadPtrT(ppv, Type) \
1852 __extension__ \
1853 ({\
1854 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1855 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
1856 pvTypeChecked; \
1857 })
1858#else
1859# define ASMAtomicUoReadPtrT(ppv, Type) \
1860 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
1861#endif
1862
1863
1864/**
1865 * Atomically reads a boolean value, ordered.
1866 *
1867 * @returns Current *pf value
1868 * @param pf Pointer to the boolean variable to read.
1869 */
1870DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
1871{
1872 ASMMemoryFence();
1873 return *pf; /* byte reads are atomic on x86 */
1874}
1875
1876
1877/**
1878 * Atomically reads a boolean value, unordered.
1879 *
1880 * @returns Current *pf value
1881 * @param pf Pointer to the boolean variable to read.
1882 */
1883DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
1884{
1885 return *pf; /* byte reads are atomic on x86 */
1886}
1887
1888
1889/**
1890 * Atomically read a typical IPRT handle value, ordered.
1891 *
1892 * @param ph Pointer to the handle variable to read.
1893 * @param phRes Where to store the result.
1894 *
1895 * @remarks This doesn't currently work for all handles (like RTFILE).
1896 */
1897#if HC_ARCH_BITS == 32
1898# define ASMAtomicReadHandle(ph, phRes) \
1899 do { \
1900 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1901 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1902 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
1903 } while (0)
1904#elif HC_ARCH_BITS == 64
1905# define ASMAtomicReadHandle(ph, phRes) \
1906 do { \
1907 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1908 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1909 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
1910 } while (0)
1911#else
1912# error HC_ARCH_BITS
1913#endif
1914
1915
1916/**
1917 * Atomically read a typical IPRT handle value, unordered.
1918 *
1919 * @param ph Pointer to the handle variable to read.
1920 * @param phRes Where to store the result.
1921 *
1922 * @remarks This doesn't currently work for all handles (like RTFILE).
1923 */
1924#if HC_ARCH_BITS == 32
1925# define ASMAtomicUoReadHandle(ph, phRes) \
1926 do { \
1927 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1928 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1929 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
1930 } while (0)
1931#elif HC_ARCH_BITS == 64
1932# define ASMAtomicUoReadHandle(ph, phRes) \
1933 do { \
1934 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1935 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1936 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
1937 } while (0)
1938#else
1939# error HC_ARCH_BITS
1940#endif
1941
1942
1943/**
1944 * Atomically read a value which size might differ
1945 * between platforms or compilers, ordered.
1946 *
1947 * @param pu Pointer to the variable to read.
1948 * @param puRes Where to store the result.
1949 */
1950#define ASMAtomicReadSize(pu, puRes) \
1951 do { \
1952 switch (sizeof(*(pu))) { \
1953 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1954 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
1955 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
1956 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
1957 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1958 } \
1959 } while (0)
1960
1961
1962/**
1963 * Atomically read a value which size might differ
1964 * between platforms or compilers, unordered.
1965 *
1966 * @param pu Pointer to the variable to read.
1967 * @param puRes Where to store the result.
1968 */
1969#define ASMAtomicUoReadSize(pu, puRes) \
1970 do { \
1971 switch (sizeof(*(pu))) { \
1972 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1973 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
1974 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
1975 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
1976 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1977 } \
1978 } while (0)
1979
1980
1981/**
1982 * Atomically writes an unsigned 8-bit value, ordered.
1983 *
1984 * @param pu8 Pointer to the 8-bit variable.
1985 * @param u8 The 8-bit value to assign to *pu8.
1986 */
1987DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
1988{
1989 ASMAtomicXchgU8(pu8, u8);
1990}
1991
1992
1993/**
1994 * Atomically writes an unsigned 8-bit value, unordered.
1995 *
1996 * @param pu8 Pointer to the 8-bit variable.
1997 * @param u8 The 8-bit value to assign to *pu8.
1998 */
1999DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2000{
2001 *pu8 = u8; /* byte writes are atomic on x86 */
2002}
2003
2004
2005/**
2006 * Atomically writes a signed 8-bit value, ordered.
2007 *
2008 * @param pi8 Pointer to the 8-bit variable to read.
2009 * @param i8 The 8-bit value to assign to *pi8.
2010 */
2011DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2012{
2013 ASMAtomicXchgS8(pi8, i8);
2014}
2015
2016
2017/**
2018 * Atomically writes a signed 8-bit value, unordered.
2019 *
2020 * @param pi8 Pointer to the 8-bit variable to write.
2021 * @param i8 The 8-bit value to assign to *pi8.
2022 */
2023DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2024{
2025 *pi8 = i8; /* byte writes are atomic on x86 */
2026}
2027
2028
2029/**
2030 * Atomically writes an unsigned 16-bit value, ordered.
2031 *
2032 * @param pu16 Pointer to the 16-bit variable to write.
2033 * @param u16 The 16-bit value to assign to *pu16.
2034 */
2035DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2036{
2037 ASMAtomicXchgU16(pu16, u16);
2038}
2039
2040
2041/**
2042 * Atomically writes an unsigned 16-bit value, unordered.
2043 *
2044 * @param pu16 Pointer to the 16-bit variable to write.
2045 * @param u16 The 16-bit value to assign to *pu16.
2046 */
2047DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2048{
2049 Assert(!((uintptr_t)pu16 & 1));
2050 *pu16 = u16;
2051}
2052
2053
2054/**
2055 * Atomically writes a signed 16-bit value, ordered.
2056 *
2057 * @param pi16 Pointer to the 16-bit variable to write.
2058 * @param i16 The 16-bit value to assign to *pi16.
2059 */
2060DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2061{
2062 ASMAtomicXchgS16(pi16, i16);
2063}
2064
2065
2066/**
2067 * Atomically writes a signed 16-bit value, unordered.
2068 *
2069 * @param pi16 Pointer to the 16-bit variable to write.
2070 * @param i16 The 16-bit value to assign to *pi16.
2071 */
2072DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2073{
2074 Assert(!((uintptr_t)pi16 & 1));
2075 *pi16 = i16;
2076}
2077
2078
2079/**
2080 * Atomically writes an unsigned 32-bit value, ordered.
2081 *
2082 * @param pu32 Pointer to the 32-bit variable to write.
2083 * @param u32 The 32-bit value to assign to *pu32.
2084 */
2085DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2086{
2087 ASMAtomicXchgU32(pu32, u32);
2088}
2089
2090
2091/**
2092 * Atomically writes an unsigned 32-bit value, unordered.
2093 *
2094 * @param pu32 Pointer to the 32-bit variable to write.
2095 * @param u32 The 32-bit value to assign to *pu32.
2096 */
2097DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2098{
2099 Assert(!((uintptr_t)pu32 & 3));
2100 *pu32 = u32;
2101}
2102
2103
2104/**
2105 * Atomically writes a signed 32-bit value, ordered.
2106 *
2107 * @param pi32 Pointer to the 32-bit variable to write.
2108 * @param i32 The 32-bit value to assign to *pi32.
2109 */
2110DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2111{
2112 ASMAtomicXchgS32(pi32, i32);
2113}
2114
2115
2116/**
2117 * Atomically writes a signed 32-bit value, unordered.
2118 *
2119 * @param pi32 Pointer to the 32-bit variable to write.
2120 * @param i32 The 32-bit value to assign to *pi32.
2121 */
2122DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2123{
2124 Assert(!((uintptr_t)pi32 & 3));
2125 *pi32 = i32;
2126}
2127
2128
2129/**
2130 * Atomically writes an unsigned 64-bit value, ordered.
2131 *
2132 * @param pu64 Pointer to the 64-bit variable to write.
2133 * @param u64 The 64-bit value to assign to *pu64.
2134 */
2135DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2136{
2137 ASMAtomicXchgU64(pu64, u64);
2138}
2139
2140
2141/**
2142 * Atomically writes an unsigned 64-bit value, unordered.
2143 *
2144 * @param pu64 Pointer to the 64-bit variable to write.
2145 * @param u64 The 64-bit value to assign to *pu64.
2146 */
2147DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2148{
2149 Assert(!((uintptr_t)pu64 & 7));
2150#if ARCH_BITS == 64
2151 *pu64 = u64;
2152#else
2153 ASMAtomicXchgU64(pu64, u64);
2154#endif
2155}
2156
2157
2158/**
2159 * Atomically writes a signed 64-bit value, ordered.
2160 *
2161 * @param pi64 Pointer to the 64-bit variable to write.
2162 * @param i64 The 64-bit value to assign to *pi64.
2163 */
2164DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2165{
2166 ASMAtomicXchgS64(pi64, i64);
2167}
2168
2169
2170/**
2171 * Atomically writes a signed 64-bit value, unordered.
2172 *
2173 * @param pi64 Pointer to the 64-bit variable to write.
2174 * @param i64 The 64-bit value to assign to *pi64.
2175 */
2176DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2177{
2178 Assert(!((uintptr_t)pi64 & 7));
2179#if ARCH_BITS == 64
2180 *pi64 = i64;
2181#else
2182 ASMAtomicXchgS64(pi64, i64);
2183#endif
2184}
2185
2186
2187/**
2188 * Atomically writes a boolean value, unordered.
2189 *
2190 * @param pf Pointer to the boolean variable to write.
2191 * @param f The boolean value to assign to *pf.
2192 */
2193DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2194{
2195 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2196}
2197
2198
2199/**
2200 * Atomically writes a boolean value, unordered.
2201 *
2202 * @param pf Pointer to the boolean variable to write.
2203 * @param f The boolean value to assign to *pf.
2204 */
2205DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2206{
2207 *pf = f; /* byte writes are atomic on x86 */
2208}
2209
2210
2211/**
2212 * Atomically writes a pointer value, ordered.
2213 *
2214 * @param ppv Pointer to the pointer variable to write.
2215 * @param pv The pointer value to assign to *ppv.
2216 */
2217DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2218{
2219#if ARCH_BITS == 32
2220 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2221#elif ARCH_BITS == 64
2222 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2223#else
2224# error "ARCH_BITS is bogus"
2225#endif
2226}
2227
2228
2229/**
2230 * Atomically writes a pointer value, ordered.
2231 *
2232 * @param ppv Pointer to the pointer variable to write.
2233 * @param pv The pointer value to assign to *ppv. If NULL use
2234 * ASMAtomicWriteNullPtr or you'll land in trouble.
2235 *
2236 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2237 * NULL.
2238 */
2239#ifdef __GNUC__
2240# define ASMAtomicWritePtr(ppv, pv) \
2241 do \
2242 { \
2243 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2244 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2245 \
2246 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2247 AssertCompile(sizeof(pv) == sizeof(void *)); \
2248 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2249 \
2250 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2251 } while (0)
2252#else
2253# define ASMAtomicWritePtr(ppv, pv) \
2254 do \
2255 { \
2256 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2257 AssertCompile(sizeof(pv) == sizeof(void *)); \
2258 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2259 \
2260 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2261 } while (0)
2262#endif
2263
2264
2265/**
2266 * Atomically sets a pointer to NULL, ordered.
2267 *
2268 * @param ppv Pointer to the pointer variable that should be set to NULL.
2269 *
2270 * @remarks This is relatively type safe on GCC platforms.
2271 */
2272#ifdef __GNUC__
2273# define ASMAtomicWriteNullPtr(ppv) \
2274 do \
2275 { \
2276 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2277 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2278 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2279 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2280 } while (0)
2281#else
2282# define ASMAtomicWriteNullPtr(ppv) \
2283 do \
2284 { \
2285 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2286 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2287 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2288 } while (0)
2289#endif
2290
2291
2292/**
2293 * Atomically writes a pointer value, unordered.
2294 *
2295 * @returns Current *pv value
2296 * @param ppv Pointer to the pointer variable.
2297 * @param pv The pointer value to assign to *ppv. If NULL use
2298 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2299 *
2300 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2301 * NULL.
2302 */
2303#ifdef __GNUC__
2304# define ASMAtomicUoWritePtr(ppv, pv) \
2305 do \
2306 { \
2307 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2308 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2309 \
2310 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2311 AssertCompile(sizeof(pv) == sizeof(void *)); \
2312 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2313 \
2314 *(ppvTypeChecked) = pvTypeChecked; \
2315 } while (0)
2316#else
2317# define ASMAtomicUoWritePtr(ppv, pv) \
2318 do \
2319 { \
2320 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2321 AssertCompile(sizeof(pv) == sizeof(void *)); \
2322 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2323 *(ppv) = pv; \
2324 } while (0)
2325#endif
2326
2327
2328/**
2329 * Atomically sets a pointer to NULL, unordered.
2330 *
2331 * @param ppv Pointer to the pointer variable that should be set to NULL.
2332 *
2333 * @remarks This is relatively type safe on GCC platforms.
2334 */
2335#ifdef __GNUC__
2336# define ASMAtomicUoWriteNullPtr(ppv) \
2337 do \
2338 { \
2339 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2340 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2341 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2342 *(ppvTypeChecked) = NULL; \
2343 } while (0)
2344#else
2345# define ASMAtomicUoWriteNullPtr(ppv) \
2346 do \
2347 { \
2348 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2349 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2350 *(ppv) = NULL; \
2351 } while (0)
2352#endif
2353
2354
2355/**
2356 * Atomically write a typical IPRT handle value, ordered.
2357 *
2358 * @param ph Pointer to the variable to update.
2359 * @param hNew The value to assign to *ph.
2360 *
2361 * @remarks This doesn't currently work for all handles (like RTFILE).
2362 */
2363#if HC_ARCH_BITS == 32
2364# define ASMAtomicWriteHandle(ph, hNew) \
2365 do { \
2366 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2367 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2368 } while (0)
2369#elif HC_ARCH_BITS == 64
2370# define ASMAtomicWriteHandle(ph, hNew) \
2371 do { \
2372 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2373 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2374 } while (0)
2375#else
2376# error HC_ARCH_BITS
2377#endif
2378
2379
2380/**
2381 * Atomically write a typical IPRT handle value, unordered.
2382 *
2383 * @param ph Pointer to the variable to update.
2384 * @param hNew The value to assign to *ph.
2385 *
2386 * @remarks This doesn't currently work for all handles (like RTFILE).
2387 */
2388#if HC_ARCH_BITS == 32
2389# define ASMAtomicUoWriteHandle(ph, hNew) \
2390 do { \
2391 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2392 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2393 } while (0)
2394#elif HC_ARCH_BITS == 64
2395# define ASMAtomicUoWriteHandle(ph, hNew) \
2396 do { \
2397 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2398 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2399 } while (0)
2400#else
2401# error HC_ARCH_BITS
2402#endif
2403
2404
2405/**
2406 * Atomically write a value which size might differ
2407 * between platforms or compilers, ordered.
2408 *
2409 * @param pu Pointer to the variable to update.
2410 * @param uNew The value to assign to *pu.
2411 */
2412#define ASMAtomicWriteSize(pu, uNew) \
2413 do { \
2414 switch (sizeof(*(pu))) { \
2415 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2416 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2417 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2418 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2419 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2420 } \
2421 } while (0)
2422
2423/**
2424 * Atomically write a value which size might differ
2425 * between platforms or compilers, unordered.
2426 *
2427 * @param pu Pointer to the variable to update.
2428 * @param uNew The value to assign to *pu.
2429 */
2430#define ASMAtomicUoWriteSize(pu, uNew) \
2431 do { \
2432 switch (sizeof(*(pu))) { \
2433 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2434 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2435 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2436 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2437 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2438 } \
2439 } while (0)
2440
2441
2442
2443/**
2444 * Atomically exchanges and adds to a 32-bit value, ordered.
2445 *
2446 * @returns The old value.
2447 * @param pu32 Pointer to the value.
2448 * @param u32 Number to add.
2449 */
2450#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2451DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2452#else
2453DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2454{
2455# if RT_INLINE_ASM_USES_INTRIN
2456 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2457 return u32;
2458
2459# elif RT_INLINE_ASM_GNU_STYLE
2460 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2461 : "=r" (u32),
2462 "=m" (*pu32)
2463 : "0" (u32),
2464 "m" (*pu32)
2465 : "memory");
2466 return u32;
2467# else
2468 __asm
2469 {
2470 mov eax, [u32]
2471# ifdef RT_ARCH_AMD64
2472 mov rdx, [pu32]
2473 lock xadd [rdx], eax
2474# else
2475 mov edx, [pu32]
2476 lock xadd [edx], eax
2477# endif
2478 mov [u32], eax
2479 }
2480 return u32;
2481# endif
2482}
2483#endif
2484
2485
2486/**
2487 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2488 *
2489 * @returns The old value.
2490 * @param pi32 Pointer to the value.
2491 * @param i32 Number to add.
2492 */
2493DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2494{
2495 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2496}
2497
2498
2499/**
2500 * Atomically exchanges and adds to a 64-bit value, ordered.
2501 *
2502 * @returns The old value.
2503 * @param pu64 Pointer to the value.
2504 * @param u64 Number to add.
2505 */
2506#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2507DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2508#else
2509DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2510{
2511# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2512 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2513 return u64;
2514
2515# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2516 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2517 : "=r" (u64),
2518 "=m" (*pu64)
2519 : "0" (u64),
2520 "m" (*pu64)
2521 : "memory");
2522 return u64;
2523# else
2524 uint64_t u64Old;
2525 for (;;)
2526 {
2527 uint64_t u64New;
2528 u64Old = ASMAtomicUoReadU64(pu64);
2529 u64New = u64Old + u64;
2530 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2531 break;
2532 ASMNopPause();
2533 }
2534 return u64Old;
2535# endif
2536}
2537#endif
2538
2539
2540/**
2541 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2542 *
2543 * @returns The old value.
2544 * @param pi64 Pointer to the value.
2545 * @param i64 Number to add.
2546 */
2547DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2548{
2549 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2550}
2551
2552
2553/**
2554 * Atomically exchanges and adds to a size_t value, ordered.
2555 *
2556 * @returns The old value.
2557 * @param pcb Pointer to the size_t value.
2558 * @param cb Number to add.
2559 */
2560DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2561{
2562#if ARCH_BITS == 64
2563 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2564#elif ARCH_BITS == 32
2565 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2566#else
2567# error "Unsupported ARCH_BITS value"
2568#endif
2569}
2570
2571
2572/**
2573 * Atomically exchanges and adds a value which size might differ between
2574 * platforms or compilers, ordered.
2575 *
2576 * @param pu Pointer to the variable to update.
2577 * @param uNew The value to add to *pu.
2578 * @param puOld Where to store the old value.
2579 */
2580#define ASMAtomicAddSize(pu, uNew, puOld) \
2581 do { \
2582 switch (sizeof(*(pu))) { \
2583 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2584 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2585 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2586 } \
2587 } while (0)
2588
2589
2590/**
2591 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2592 *
2593 * @returns The old value.
2594 * @param pu32 Pointer to the value.
2595 * @param u32 Number to subtract.
2596 */
2597DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2598{
2599 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2600}
2601
2602
2603/**
2604 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2605 *
2606 * @returns The old value.
2607 * @param pi32 Pointer to the value.
2608 * @param i32 Number to subtract.
2609 */
2610DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2611{
2612 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2613}
2614
2615
2616/**
2617 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2618 *
2619 * @returns The old value.
2620 * @param pu64 Pointer to the value.
2621 * @param u64 Number to subtract.
2622 */
2623DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2624{
2625 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2626}
2627
2628
2629/**
2630 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2631 *
2632 * @returns The old value.
2633 * @param pi64 Pointer to the value.
2634 * @param i64 Number to subtract.
2635 */
2636DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2637{
2638 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2639}
2640
2641
2642/**
2643 * Atomically exchanges and subtracts to a size_t value, ordered.
2644 *
2645 * @returns The old value.
2646 * @param pcb Pointer to the size_t value.
2647 * @param cb Number to subtract.
2648 */
2649DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2650{
2651#if ARCH_BITS == 64
2652 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2653#elif ARCH_BITS == 32
2654 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2655#else
2656# error "Unsupported ARCH_BITS value"
2657#endif
2658}
2659
2660
2661/**
2662 * Atomically exchanges and subtracts a value which size might differ between
2663 * platforms or compilers, ordered.
2664 *
2665 * @param pu Pointer to the variable to update.
2666 * @param uNew The value to subtract to *pu.
2667 * @param puOld Where to store the old value.
2668 */
2669#define ASMAtomicSubSize(pu, uNew, puOld) \
2670 do { \
2671 switch (sizeof(*(pu))) { \
2672 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2673 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2674 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2675 } \
2676 } while (0)
2677
2678
2679/**
2680 * Atomically increment a 32-bit value, ordered.
2681 *
2682 * @returns The new value.
2683 * @param pu32 Pointer to the value to increment.
2684 */
2685#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2686DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2687#else
2688DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2689{
2690 uint32_t u32;
2691# if RT_INLINE_ASM_USES_INTRIN
2692 u32 = _InterlockedIncrement((long *)pu32);
2693 return u32;
2694
2695# elif RT_INLINE_ASM_GNU_STYLE
2696 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2697 : "=r" (u32),
2698 "=m" (*pu32)
2699 : "0" (1),
2700 "m" (*pu32)
2701 : "memory");
2702 return u32+1;
2703# else
2704 __asm
2705 {
2706 mov eax, 1
2707# ifdef RT_ARCH_AMD64
2708 mov rdx, [pu32]
2709 lock xadd [rdx], eax
2710# else
2711 mov edx, [pu32]
2712 lock xadd [edx], eax
2713# endif
2714 mov u32, eax
2715 }
2716 return u32+1;
2717# endif
2718}
2719#endif
2720
2721
2722/**
2723 * Atomically increment a signed 32-bit value, ordered.
2724 *
2725 * @returns The new value.
2726 * @param pi32 Pointer to the value to increment.
2727 */
2728DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2729{
2730 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2731}
2732
2733
2734/**
2735 * Atomically increment a 64-bit value, ordered.
2736 *
2737 * @returns The new value.
2738 * @param pu64 Pointer to the value to increment.
2739 */
2740#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2741DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2742#else
2743DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2744{
2745# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2746 uint64_t u64;
2747 u64 = _InterlockedIncrement64((__int64 *)pu64);
2748 return u64;
2749
2750# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2751 uint64_t u64;
2752 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2753 : "=r" (u64),
2754 "=m" (*pu64)
2755 : "0" (1),
2756 "m" (*pu64)
2757 : "memory");
2758 return u64 + 1;
2759# else
2760 return ASMAtomicAddU64(pu64, 1) + 1;
2761# endif
2762}
2763#endif
2764
2765
2766/**
2767 * Atomically increment a signed 64-bit value, ordered.
2768 *
2769 * @returns The new value.
2770 * @param pi64 Pointer to the value to increment.
2771 */
2772DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
2773{
2774 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
2775}
2776
2777
2778/**
2779 * Atomically increment a size_t value, ordered.
2780 *
2781 * @returns The new value.
2782 * @param pcb Pointer to the value to increment.
2783 */
2784DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
2785{
2786#if ARCH_BITS == 64
2787 return ASMAtomicIncU64((uint64_t volatile *)pcb);
2788#elif ARCH_BITS == 32
2789 return ASMAtomicIncU32((uint32_t volatile *)pcb);
2790#else
2791# error "Unsupported ARCH_BITS value"
2792#endif
2793}
2794
2795
2796/**
2797 * Atomically decrement an unsigned 32-bit value, ordered.
2798 *
2799 * @returns The new value.
2800 * @param pu32 Pointer to the value to decrement.
2801 */
2802#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2803DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2804#else
2805DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2806{
2807 uint32_t u32;
2808# if RT_INLINE_ASM_USES_INTRIN
2809 u32 = _InterlockedDecrement((long *)pu32);
2810 return u32;
2811
2812# elif RT_INLINE_ASM_GNU_STYLE
2813 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2814 : "=r" (u32),
2815 "=m" (*pu32)
2816 : "0" (-1),
2817 "m" (*pu32)
2818 : "memory");
2819 return u32-1;
2820# else
2821 __asm
2822 {
2823 mov eax, -1
2824# ifdef RT_ARCH_AMD64
2825 mov rdx, [pu32]
2826 lock xadd [rdx], eax
2827# else
2828 mov edx, [pu32]
2829 lock xadd [edx], eax
2830# endif
2831 mov u32, eax
2832 }
2833 return u32-1;
2834# endif
2835}
2836#endif
2837
2838
2839/**
2840 * Atomically decrement a signed 32-bit value, ordered.
2841 *
2842 * @returns The new value.
2843 * @param pi32 Pointer to the value to decrement.
2844 */
2845DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2846{
2847 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2848}
2849
2850
2851/**
2852 * Atomically decrement an unsigned 64-bit value, ordered.
2853 *
2854 * @returns The new value.
2855 * @param pu64 Pointer to the value to decrement.
2856 */
2857#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2858DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
2859#else
2860DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
2861{
2862# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2863 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
2864 return u64;
2865
2866# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2867 uint64_t u64;
2868 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
2869 : "=r" (u64),
2870 "=m" (*pu64)
2871 : "0" (~(uint64_t)0),
2872 "m" (*pu64)
2873 : "memory");
2874 return u64-1;
2875# else
2876 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
2877# endif
2878}
2879#endif
2880
2881
2882/**
2883 * Atomically decrement a signed 64-bit value, ordered.
2884 *
2885 * @returns The new value.
2886 * @param pi64 Pointer to the value to decrement.
2887 */
2888DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
2889{
2890 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
2891}
2892
2893
2894/**
2895 * Atomically decrement a size_t value, ordered.
2896 *
2897 * @returns The new value.
2898 * @param pcb Pointer to the value to decrement.
2899 */
2900DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
2901{
2902#if ARCH_BITS == 64
2903 return ASMAtomicDecU64((uint64_t volatile *)pcb);
2904#elif ARCH_BITS == 32
2905 return ASMAtomicDecU32((uint32_t volatile *)pcb);
2906#else
2907# error "Unsupported ARCH_BITS value"
2908#endif
2909}
2910
2911
2912/**
2913 * Atomically Or an unsigned 32-bit value, ordered.
2914 *
2915 * @param pu32 Pointer to the pointer variable to OR u32 with.
2916 * @param u32 The value to OR *pu32 with.
2917 */
2918#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2919DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2920#else
2921DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2922{
2923# if RT_INLINE_ASM_USES_INTRIN
2924 _InterlockedOr((long volatile *)pu32, (long)u32);
2925
2926# elif RT_INLINE_ASM_GNU_STYLE
2927 __asm__ __volatile__("lock; orl %1, %0\n\t"
2928 : "=m" (*pu32)
2929 : "ir" (u32),
2930 "m" (*pu32));
2931# else
2932 __asm
2933 {
2934 mov eax, [u32]
2935# ifdef RT_ARCH_AMD64
2936 mov rdx, [pu32]
2937 lock or [rdx], eax
2938# else
2939 mov edx, [pu32]
2940 lock or [edx], eax
2941# endif
2942 }
2943# endif
2944}
2945#endif
2946
2947
2948/**
2949 * Atomically Or a signed 32-bit value, ordered.
2950 *
2951 * @param pi32 Pointer to the pointer variable to OR u32 with.
2952 * @param i32 The value to OR *pu32 with.
2953 */
2954DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2955{
2956 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2957}
2958
2959
2960/**
2961 * Atomically Or an unsigned 64-bit value, ordered.
2962 *
2963 * @param pu64 Pointer to the pointer variable to OR u64 with.
2964 * @param u64 The value to OR *pu64 with.
2965 */
2966#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2967DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
2968#else
2969DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
2970{
2971# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2972 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
2973
2974# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2975 __asm__ __volatile__("lock; orq %1, %q0\n\t"
2976 : "=m" (*pu64)
2977 : "r" (u64),
2978 "m" (*pu64));
2979# else
2980 for (;;)
2981 {
2982 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
2983 uint64_t u64New = u64Old | u64;
2984 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2985 break;
2986 ASMNopPause();
2987 }
2988# endif
2989}
2990#endif
2991
2992
2993/**
2994 * Atomically Or a signed 64-bit value, ordered.
2995 *
2996 * @param pi64 Pointer to the pointer variable to OR u64 with.
2997 * @param i64 The value to OR *pu64 with.
2998 */
2999DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3000{
3001 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3002}
3003
3004
3005/**
3006 * Atomically And an unsigned 32-bit value, ordered.
3007 *
3008 * @param pu32 Pointer to the pointer variable to AND u32 with.
3009 * @param u32 The value to AND *pu32 with.
3010 */
3011#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3012DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3013#else
3014DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3015{
3016# if RT_INLINE_ASM_USES_INTRIN
3017 _InterlockedAnd((long volatile *)pu32, u32);
3018
3019# elif RT_INLINE_ASM_GNU_STYLE
3020 __asm__ __volatile__("lock; andl %1, %0\n\t"
3021 : "=m" (*pu32)
3022 : "ir" (u32),
3023 "m" (*pu32));
3024# else
3025 __asm
3026 {
3027 mov eax, [u32]
3028# ifdef RT_ARCH_AMD64
3029 mov rdx, [pu32]
3030 lock and [rdx], eax
3031# else
3032 mov edx, [pu32]
3033 lock and [edx], eax
3034# endif
3035 }
3036# endif
3037}
3038#endif
3039
3040
3041/**
3042 * Atomically And a signed 32-bit value, ordered.
3043 *
3044 * @param pi32 Pointer to the pointer variable to AND i32 with.
3045 * @param i32 The value to AND *pi32 with.
3046 */
3047DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3048{
3049 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3050}
3051
3052
3053/**
3054 * Atomically And an unsigned 64-bit value, ordered.
3055 *
3056 * @param pu64 Pointer to the pointer variable to AND u64 with.
3057 * @param u64 The value to AND *pu64 with.
3058 */
3059#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3060DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3061#else
3062DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3063{
3064# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3065 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3066
3067# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3068 __asm__ __volatile__("lock; andq %1, %0\n\t"
3069 : "=m" (*pu64)
3070 : "r" (u64),
3071 "m" (*pu64));
3072# else
3073 for (;;)
3074 {
3075 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3076 uint64_t u64New = u64Old & u64;
3077 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3078 break;
3079 ASMNopPause();
3080 }
3081# endif
3082}
3083#endif
3084
3085
3086/**
3087 * Atomically And a signed 64-bit value, ordered.
3088 *
3089 * @param pi64 Pointer to the pointer variable to AND i64 with.
3090 * @param i64 The value to AND *pi64 with.
3091 */
3092DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3093{
3094 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3095}
3096
3097
3098/**
3099 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3100 *
3101 * @param pu32 Pointer to the pointer variable to OR u32 with.
3102 * @param u32 The value to OR *pu32 with.
3103 */
3104#if RT_INLINE_ASM_EXTERNAL
3105DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3106#else
3107DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3108{
3109# if RT_INLINE_ASM_GNU_STYLE
3110 __asm__ __volatile__("orl %1, %0\n\t"
3111 : "=m" (*pu32)
3112 : "ir" (u32),
3113 "m" (*pu32));
3114# else
3115 __asm
3116 {
3117 mov eax, [u32]
3118# ifdef RT_ARCH_AMD64
3119 mov rdx, [pu32]
3120 or [rdx], eax
3121# else
3122 mov edx, [pu32]
3123 or [edx], eax
3124# endif
3125 }
3126# endif
3127}
3128#endif
3129
3130
3131/**
3132 * Atomically OR a signed 32-bit value, unordered.
3133 *
3134 * @param pi32 Pointer to the pointer variable to OR u32 with.
3135 * @param i32 The value to OR *pu32 with.
3136 */
3137DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3138{
3139 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3140}
3141
3142
3143/**
3144 * Atomically OR an unsigned 64-bit value, unordered.
3145 *
3146 * @param pu64 Pointer to the pointer variable to OR u64 with.
3147 * @param u64 The value to OR *pu64 with.
3148 */
3149#if RT_INLINE_ASM_EXTERNAL
3150DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3151#else
3152DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3153{
3154# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3155 __asm__ __volatile__("orq %1, %q0\n\t"
3156 : "=m" (*pu64)
3157 : "r" (u64),
3158 "m" (*pu64));
3159# else
3160 for (;;)
3161 {
3162 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3163 uint64_t u64New = u64Old | u64;
3164 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3165 break;
3166 ASMNopPause();
3167 }
3168# endif
3169}
3170#endif
3171
3172
3173/**
3174 * Atomically Or a signed 64-bit value, unordered.
3175 *
3176 * @param pi64 Pointer to the pointer variable to OR u64 with.
3177 * @param i64 The value to OR *pu64 with.
3178 */
3179DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3180{
3181 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3182}
3183
3184
3185/**
3186 * Atomically And an unsigned 32-bit value, unordered.
3187 *
3188 * @param pu32 Pointer to the pointer variable to AND u32 with.
3189 * @param u32 The value to AND *pu32 with.
3190 */
3191#if RT_INLINE_ASM_EXTERNAL
3192DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3193#else
3194DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3195{
3196# if RT_INLINE_ASM_GNU_STYLE
3197 __asm__ __volatile__("andl %1, %0\n\t"
3198 : "=m" (*pu32)
3199 : "ir" (u32),
3200 "m" (*pu32));
3201# else
3202 __asm
3203 {
3204 mov eax, [u32]
3205# ifdef RT_ARCH_AMD64
3206 mov rdx, [pu32]
3207 and [rdx], eax
3208# else
3209 mov edx, [pu32]
3210 and [edx], eax
3211# endif
3212 }
3213# endif
3214}
3215#endif
3216
3217
3218/**
3219 * Atomically And a signed 32-bit value, unordered.
3220 *
3221 * @param pi32 Pointer to the pointer variable to AND i32 with.
3222 * @param i32 The value to AND *pi32 with.
3223 */
3224DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3225{
3226 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3227}
3228
3229
3230/**
3231 * Atomically And an unsigned 64-bit value, unordered.
3232 *
3233 * @param pu64 Pointer to the pointer variable to AND u64 with.
3234 * @param u64 The value to AND *pu64 with.
3235 */
3236#if RT_INLINE_ASM_EXTERNAL
3237DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3238#else
3239DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3240{
3241# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3242 __asm__ __volatile__("andq %1, %0\n\t"
3243 : "=m" (*pu64)
3244 : "r" (u64),
3245 "m" (*pu64));
3246# else
3247 for (;;)
3248 {
3249 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3250 uint64_t u64New = u64Old & u64;
3251 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3252 break;
3253 ASMNopPause();
3254 }
3255# endif
3256}
3257#endif
3258
3259
3260/**
3261 * Atomically And a signed 64-bit value, unordered.
3262 *
3263 * @param pi64 Pointer to the pointer variable to AND i64 with.
3264 * @param i64 The value to AND *pi64 with.
3265 */
3266DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3267{
3268 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3269}
3270
3271
3272/**
3273 * Atomically increment an unsigned 32-bit value, unordered.
3274 *
3275 * @returns the new value.
3276 * @param pu32 Pointer to the variable to increment.
3277 */
3278#if RT_INLINE_ASM_EXTERNAL
3279DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3280#else
3281DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3282{
3283 uint32_t u32;
3284# if RT_INLINE_ASM_GNU_STYLE
3285 __asm__ __volatile__("xaddl %0, %1\n\t"
3286 : "=r" (u32),
3287 "=m" (*pu32)
3288 : "0" (1),
3289 "m" (*pu32)
3290 : "memory");
3291 return u32 + 1;
3292# else
3293 __asm
3294 {
3295 mov eax, 1
3296# ifdef RT_ARCH_AMD64
3297 mov rdx, [pu32]
3298 xadd [rdx], eax
3299# else
3300 mov edx, [pu32]
3301 xadd [edx], eax
3302# endif
3303 mov u32, eax
3304 }
3305 return u32 + 1;
3306# endif
3307}
3308#endif
3309
3310
3311/**
3312 * Atomically decrement an unsigned 32-bit value, unordered.
3313 *
3314 * @returns the new value.
3315 * @param pu32 Pointer to the variable to decrement.
3316 */
3317#if RT_INLINE_ASM_EXTERNAL
3318DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3319#else
3320DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3321{
3322 uint32_t u32;
3323# if RT_INLINE_ASM_GNU_STYLE
3324 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3325 : "=r" (u32),
3326 "=m" (*pu32)
3327 : "0" (-1),
3328 "m" (*pu32)
3329 : "memory");
3330 return u32 - 1;
3331# else
3332 __asm
3333 {
3334 mov eax, -1
3335# ifdef RT_ARCH_AMD64
3336 mov rdx, [pu32]
3337 xadd [rdx], eax
3338# else
3339 mov edx, [pu32]
3340 xadd [edx], eax
3341# endif
3342 mov u32, eax
3343 }
3344 return u32 - 1;
3345# endif
3346}
3347#endif
3348
3349
3350/** @def RT_ASM_PAGE_SIZE
3351 * We try avoid dragging in iprt/param.h here.
3352 * @internal
3353 */
3354#if defined(RT_ARCH_SPARC64)
3355# define RT_ASM_PAGE_SIZE 0x2000
3356# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3357# if PAGE_SIZE != 0x2000
3358# error "PAGE_SIZE is not 0x2000!"
3359# endif
3360# endif
3361#else
3362# define RT_ASM_PAGE_SIZE 0x1000
3363# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3364# if PAGE_SIZE != 0x1000
3365# error "PAGE_SIZE is not 0x1000!"
3366# endif
3367# endif
3368#endif
3369
3370/**
3371 * Zeros a 4K memory page.
3372 *
3373 * @param pv Pointer to the memory block. This must be page aligned.
3374 */
3375#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3376DECLASM(void) ASMMemZeroPage(volatile void *pv);
3377# else
3378DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3379{
3380# if RT_INLINE_ASM_USES_INTRIN
3381# ifdef RT_ARCH_AMD64
3382 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3383# else
3384 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3385# endif
3386
3387# elif RT_INLINE_ASM_GNU_STYLE
3388 RTCCUINTREG uDummy;
3389# ifdef RT_ARCH_AMD64
3390 __asm__ __volatile__("rep stosq"
3391 : "=D" (pv),
3392 "=c" (uDummy)
3393 : "0" (pv),
3394 "c" (RT_ASM_PAGE_SIZE >> 3),
3395 "a" (0)
3396 : "memory");
3397# else
3398 __asm__ __volatile__("rep stosl"
3399 : "=D" (pv),
3400 "=c" (uDummy)
3401 : "0" (pv),
3402 "c" (RT_ASM_PAGE_SIZE >> 2),
3403 "a" (0)
3404 : "memory");
3405# endif
3406# else
3407 __asm
3408 {
3409# ifdef RT_ARCH_AMD64
3410 xor rax, rax
3411 mov ecx, 0200h
3412 mov rdi, [pv]
3413 rep stosq
3414# else
3415 xor eax, eax
3416 mov ecx, 0400h
3417 mov edi, [pv]
3418 rep stosd
3419# endif
3420 }
3421# endif
3422}
3423# endif
3424
3425
3426/**
3427 * Zeros a memory block with a 32-bit aligned size.
3428 *
3429 * @param pv Pointer to the memory block.
3430 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3431 */
3432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3433DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3434#else
3435DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3436{
3437# if RT_INLINE_ASM_USES_INTRIN
3438# ifdef RT_ARCH_AMD64
3439 if (!(cb & 7))
3440 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3441 else
3442# endif
3443 __stosd((unsigned long *)pv, 0, cb / 4);
3444
3445# elif RT_INLINE_ASM_GNU_STYLE
3446 __asm__ __volatile__("rep stosl"
3447 : "=D" (pv),
3448 "=c" (cb)
3449 : "0" (pv),
3450 "1" (cb >> 2),
3451 "a" (0)
3452 : "memory");
3453# else
3454 __asm
3455 {
3456 xor eax, eax
3457# ifdef RT_ARCH_AMD64
3458 mov rcx, [cb]
3459 shr rcx, 2
3460 mov rdi, [pv]
3461# else
3462 mov ecx, [cb]
3463 shr ecx, 2
3464 mov edi, [pv]
3465# endif
3466 rep stosd
3467 }
3468# endif
3469}
3470#endif
3471
3472
3473/**
3474 * Fills a memory block with a 32-bit aligned size.
3475 *
3476 * @param pv Pointer to the memory block.
3477 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3478 * @param u32 The value to fill with.
3479 */
3480#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3481DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3482#else
3483DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3484{
3485# if RT_INLINE_ASM_USES_INTRIN
3486# ifdef RT_ARCH_AMD64
3487 if (!(cb & 7))
3488 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3489 else
3490# endif
3491 __stosd((unsigned long *)pv, u32, cb / 4);
3492
3493# elif RT_INLINE_ASM_GNU_STYLE
3494 __asm__ __volatile__("rep stosl"
3495 : "=D" (pv),
3496 "=c" (cb)
3497 : "0" (pv),
3498 "1" (cb >> 2),
3499 "a" (u32)
3500 : "memory");
3501# else
3502 __asm
3503 {
3504# ifdef RT_ARCH_AMD64
3505 mov rcx, [cb]
3506 shr rcx, 2
3507 mov rdi, [pv]
3508# else
3509 mov ecx, [cb]
3510 shr ecx, 2
3511 mov edi, [pv]
3512# endif
3513 mov eax, [u32]
3514 rep stosd
3515 }
3516# endif
3517}
3518#endif
3519
3520
3521/**
3522 * Checks if a memory page is all zeros.
3523 *
3524 * @returns true / false.
3525 *
3526 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3527 * boundary
3528 */
3529DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3530{
3531# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3532 union { RTCCUINTREG r; bool f; } uAX;
3533 RTCCUINTREG xCX, xDI;
3534 Assert(!((uintptr_t)pvPage & 15));
3535 __asm__ __volatile__("repe; "
3536# ifdef RT_ARCH_AMD64
3537 "scasq\n\t"
3538# else
3539 "scasl\n\t"
3540# endif
3541 "setnc %%al\n\t"
3542 : "=&c" (xCX),
3543 "=&D" (xDI),
3544 "=&a" (uAX.r)
3545 : "mr" (pvPage),
3546# ifdef RT_ARCH_AMD64
3547 "0" (RT_ASM_PAGE_SIZE/8),
3548# else
3549 "0" (RT_ASM_PAGE_SIZE/4),
3550# endif
3551 "1" (pvPage),
3552 "2" (0));
3553 return uAX.f;
3554# else
3555 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3556 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3557 Assert(!((uintptr_t)pvPage & 15));
3558 for (;;)
3559 {
3560 if (puPtr[0]) return false;
3561 if (puPtr[4]) return false;
3562
3563 if (puPtr[2]) return false;
3564 if (puPtr[6]) return false;
3565
3566 if (puPtr[1]) return false;
3567 if (puPtr[5]) return false;
3568
3569 if (puPtr[3]) return false;
3570 if (puPtr[7]) return false;
3571
3572 if (!--cLeft)
3573 return true;
3574 puPtr += 8;
3575 }
3576 return true;
3577# endif
3578}
3579
3580
3581/**
3582 * Checks if a memory block is filled with the specified byte.
3583 *
3584 * This is a sort of inverted memchr.
3585 *
3586 * @returns Pointer to the byte which doesn't equal u8.
3587 * @returns NULL if all equal to u8.
3588 *
3589 * @param pv Pointer to the memory block.
3590 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3591 * @param u8 The value it's supposed to be filled with.
3592 *
3593 * @todo Fix name, it is a predicate function but it's not returning boolean!
3594 */
3595DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3596{
3597/** @todo rewrite this in inline assembly? */
3598 uint8_t const *pb = (uint8_t const *)pv;
3599 for (; cb; cb--, pb++)
3600 if (RT_UNLIKELY(*pb != u8))
3601 return (void *)pb;
3602 return NULL;
3603}
3604
3605
3606/**
3607 * Checks if a memory block is filled with the specified 32-bit value.
3608 *
3609 * This is a sort of inverted memchr.
3610 *
3611 * @returns Pointer to the first value which doesn't equal u32.
3612 * @returns NULL if all equal to u32.
3613 *
3614 * @param pv Pointer to the memory block.
3615 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3616 * @param u32 The value it's supposed to be filled with.
3617 *
3618 * @todo Fix name, it is a predicate function but it's not returning boolean!
3619 */
3620DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3621{
3622/** @todo rewrite this in inline assembly? */
3623 uint32_t const *pu32 = (uint32_t const *)pv;
3624 for (; cb; cb -= 4, pu32++)
3625 if (RT_UNLIKELY(*pu32 != u32))
3626 return (uint32_t *)pu32;
3627 return NULL;
3628}
3629
3630
3631/**
3632 * Probes a byte pointer for read access.
3633 *
3634 * While the function will not fault if the byte is not read accessible,
3635 * the idea is to do this in a safe place like before acquiring locks
3636 * and such like.
3637 *
3638 * Also, this functions guarantees that an eager compiler is not going
3639 * to optimize the probing away.
3640 *
3641 * @param pvByte Pointer to the byte.
3642 */
3643#if RT_INLINE_ASM_EXTERNAL
3644DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3645#else
3646DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3647{
3648 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3649 uint8_t u8;
3650# if RT_INLINE_ASM_GNU_STYLE
3651 __asm__ __volatile__("movb (%1), %0\n\t"
3652 : "=r" (u8)
3653 : "r" (pvByte));
3654# else
3655 __asm
3656 {
3657# ifdef RT_ARCH_AMD64
3658 mov rax, [pvByte]
3659 mov al, [rax]
3660# else
3661 mov eax, [pvByte]
3662 mov al, [eax]
3663# endif
3664 mov [u8], al
3665 }
3666# endif
3667 return u8;
3668}
3669#endif
3670
3671/**
3672 * Probes a buffer for read access page by page.
3673 *
3674 * While the function will fault if the buffer is not fully read
3675 * accessible, the idea is to do this in a safe place like before
3676 * acquiring locks and such like.
3677 *
3678 * Also, this functions guarantees that an eager compiler is not going
3679 * to optimize the probing away.
3680 *
3681 * @param pvBuf Pointer to the buffer.
3682 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3683 */
3684DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3685{
3686 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3687 /* the first byte */
3688 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3689 ASMProbeReadByte(pu8);
3690
3691 /* the pages in between pages. */
3692 while (cbBuf > RT_ASM_PAGE_SIZE)
3693 {
3694 ASMProbeReadByte(pu8);
3695 cbBuf -= RT_ASM_PAGE_SIZE;
3696 pu8 += RT_ASM_PAGE_SIZE;
3697 }
3698
3699 /* the last byte */
3700 ASMProbeReadByte(pu8 + cbBuf - 1);
3701}
3702
3703
3704
3705/** @defgroup grp_inline_bits Bit Operations
3706 * @{
3707 */
3708
3709
3710/**
3711 * Sets a bit in a bitmap.
3712 *
3713 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
3714 * @param iBit The bit to set.
3715 *
3716 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3717 * However, doing so will yield better performance as well as avoiding
3718 * traps accessing the last bits in the bitmap.
3719 */
3720#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3721DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3722#else
3723DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3724{
3725# if RT_INLINE_ASM_USES_INTRIN
3726 _bittestandset((long *)pvBitmap, iBit);
3727
3728# elif RT_INLINE_ASM_GNU_STYLE
3729 __asm__ __volatile__("btsl %1, %0"
3730 : "=m" (*(volatile long *)pvBitmap)
3731 : "Ir" (iBit),
3732 "m" (*(volatile long *)pvBitmap)
3733 : "memory");
3734# else
3735 __asm
3736 {
3737# ifdef RT_ARCH_AMD64
3738 mov rax, [pvBitmap]
3739 mov edx, [iBit]
3740 bts [rax], edx
3741# else
3742 mov eax, [pvBitmap]
3743 mov edx, [iBit]
3744 bts [eax], edx
3745# endif
3746 }
3747# endif
3748}
3749#endif
3750
3751
3752/**
3753 * Atomically sets a bit in a bitmap, ordered.
3754 *
3755 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3756 * the memory access isn't atomic!
3757 * @param iBit The bit to set.
3758 */
3759#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3760DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3761#else
3762DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3763{
3764 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3765# if RT_INLINE_ASM_USES_INTRIN
3766 _interlockedbittestandset((long *)pvBitmap, iBit);
3767# elif RT_INLINE_ASM_GNU_STYLE
3768 __asm__ __volatile__("lock; btsl %1, %0"
3769 : "=m" (*(volatile long *)pvBitmap)
3770 : "Ir" (iBit),
3771 "m" (*(volatile long *)pvBitmap)
3772 : "memory");
3773# else
3774 __asm
3775 {
3776# ifdef RT_ARCH_AMD64
3777 mov rax, [pvBitmap]
3778 mov edx, [iBit]
3779 lock bts [rax], edx
3780# else
3781 mov eax, [pvBitmap]
3782 mov edx, [iBit]
3783 lock bts [eax], edx
3784# endif
3785 }
3786# endif
3787}
3788#endif
3789
3790
3791/**
3792 * Clears a bit in a bitmap.
3793 *
3794 * @param pvBitmap Pointer to the bitmap.
3795 * @param iBit The bit to clear.
3796 *
3797 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3798 * However, doing so will yield better performance as well as avoiding
3799 * traps accessing the last bits in the bitmap.
3800 */
3801#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3802DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3803#else
3804DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3805{
3806# if RT_INLINE_ASM_USES_INTRIN
3807 _bittestandreset((long *)pvBitmap, iBit);
3808
3809# elif RT_INLINE_ASM_GNU_STYLE
3810 __asm__ __volatile__("btrl %1, %0"
3811 : "=m" (*(volatile long *)pvBitmap)
3812 : "Ir" (iBit),
3813 "m" (*(volatile long *)pvBitmap)
3814 : "memory");
3815# else
3816 __asm
3817 {
3818# ifdef RT_ARCH_AMD64
3819 mov rax, [pvBitmap]
3820 mov edx, [iBit]
3821 btr [rax], edx
3822# else
3823 mov eax, [pvBitmap]
3824 mov edx, [iBit]
3825 btr [eax], edx
3826# endif
3827 }
3828# endif
3829}
3830#endif
3831
3832
3833/**
3834 * Atomically clears a bit in a bitmap, ordered.
3835 *
3836 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3837 * the memory access isn't atomic!
3838 * @param iBit The bit to toggle set.
3839 * @remarks No memory barrier, take care on smp.
3840 */
3841#if RT_INLINE_ASM_EXTERNAL
3842DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3843#else
3844DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3845{
3846 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3847# if RT_INLINE_ASM_GNU_STYLE
3848 __asm__ __volatile__("lock; btrl %1, %0"
3849 : "=m" (*(volatile long *)pvBitmap)
3850 : "Ir" (iBit),
3851 "m" (*(volatile long *)pvBitmap)
3852 : "memory");
3853# else
3854 __asm
3855 {
3856# ifdef RT_ARCH_AMD64
3857 mov rax, [pvBitmap]
3858 mov edx, [iBit]
3859 lock btr [rax], edx
3860# else
3861 mov eax, [pvBitmap]
3862 mov edx, [iBit]
3863 lock btr [eax], edx
3864# endif
3865 }
3866# endif
3867}
3868#endif
3869
3870
3871/**
3872 * Toggles a bit in a bitmap.
3873 *
3874 * @param pvBitmap Pointer to the bitmap.
3875 * @param iBit The bit to toggle.
3876 *
3877 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3878 * However, doing so will yield better performance as well as avoiding
3879 * traps accessing the last bits in the bitmap.
3880 */
3881#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3882DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3883#else
3884DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3885{
3886# if RT_INLINE_ASM_USES_INTRIN
3887 _bittestandcomplement((long *)pvBitmap, iBit);
3888# elif RT_INLINE_ASM_GNU_STYLE
3889 __asm__ __volatile__("btcl %1, %0"
3890 : "=m" (*(volatile long *)pvBitmap)
3891 : "Ir" (iBit),
3892 "m" (*(volatile long *)pvBitmap)
3893 : "memory");
3894# else
3895 __asm
3896 {
3897# ifdef RT_ARCH_AMD64
3898 mov rax, [pvBitmap]
3899 mov edx, [iBit]
3900 btc [rax], edx
3901# else
3902 mov eax, [pvBitmap]
3903 mov edx, [iBit]
3904 btc [eax], edx
3905# endif
3906 }
3907# endif
3908}
3909#endif
3910
3911
3912/**
3913 * Atomically toggles a bit in a bitmap, ordered.
3914 *
3915 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3916 * the memory access isn't atomic!
3917 * @param iBit The bit to test and set.
3918 */
3919#if RT_INLINE_ASM_EXTERNAL
3920DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3921#else
3922DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3923{
3924 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3925# if RT_INLINE_ASM_GNU_STYLE
3926 __asm__ __volatile__("lock; btcl %1, %0"
3927 : "=m" (*(volatile long *)pvBitmap)
3928 : "Ir" (iBit),
3929 "m" (*(volatile long *)pvBitmap)
3930 : "memory");
3931# else
3932 __asm
3933 {
3934# ifdef RT_ARCH_AMD64
3935 mov rax, [pvBitmap]
3936 mov edx, [iBit]
3937 lock btc [rax], edx
3938# else
3939 mov eax, [pvBitmap]
3940 mov edx, [iBit]
3941 lock btc [eax], edx
3942# endif
3943 }
3944# endif
3945}
3946#endif
3947
3948
3949/**
3950 * Tests and sets a bit in a bitmap.
3951 *
3952 * @returns true if the bit was set.
3953 * @returns false if the bit was clear.
3954 *
3955 * @param pvBitmap Pointer to the bitmap.
3956 * @param iBit The bit to test and set.
3957 *
3958 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3959 * However, doing so will yield better performance as well as avoiding
3960 * traps accessing the last bits in the bitmap.
3961 */
3962#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3963DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3964#else
3965DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3966{
3967 union { bool f; uint32_t u32; uint8_t u8; } rc;
3968# if RT_INLINE_ASM_USES_INTRIN
3969 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3970
3971# elif RT_INLINE_ASM_GNU_STYLE
3972 __asm__ __volatile__("btsl %2, %1\n\t"
3973 "setc %b0\n\t"
3974 "andl $1, %0\n\t"
3975 : "=q" (rc.u32),
3976 "=m" (*(volatile long *)pvBitmap)
3977 : "Ir" (iBit),
3978 "m" (*(volatile long *)pvBitmap)
3979 : "memory");
3980# else
3981 __asm
3982 {
3983 mov edx, [iBit]
3984# ifdef RT_ARCH_AMD64
3985 mov rax, [pvBitmap]
3986 bts [rax], edx
3987# else
3988 mov eax, [pvBitmap]
3989 bts [eax], edx
3990# endif
3991 setc al
3992 and eax, 1
3993 mov [rc.u32], eax
3994 }
3995# endif
3996 return rc.f;
3997}
3998#endif
3999
4000
4001/**
4002 * Atomically tests and sets a bit in a bitmap, ordered.
4003 *
4004 * @returns true if the bit was set.
4005 * @returns false if the bit was clear.
4006 *
4007 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4008 * the memory access isn't atomic!
4009 * @param iBit The bit to set.
4010 */
4011#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4012DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4013#else
4014DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4015{
4016 union { bool f; uint32_t u32; uint8_t u8; } rc;
4017 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4018# if RT_INLINE_ASM_USES_INTRIN
4019 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4020# elif RT_INLINE_ASM_GNU_STYLE
4021 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4022 "setc %b0\n\t"
4023 "andl $1, %0\n\t"
4024 : "=q" (rc.u32),
4025 "=m" (*(volatile long *)pvBitmap)
4026 : "Ir" (iBit),
4027 "m" (*(volatile long *)pvBitmap)
4028 : "memory");
4029# else
4030 __asm
4031 {
4032 mov edx, [iBit]
4033# ifdef RT_ARCH_AMD64
4034 mov rax, [pvBitmap]
4035 lock bts [rax], edx
4036# else
4037 mov eax, [pvBitmap]
4038 lock bts [eax], edx
4039# endif
4040 setc al
4041 and eax, 1
4042 mov [rc.u32], eax
4043 }
4044# endif
4045 return rc.f;
4046}
4047#endif
4048
4049
4050/**
4051 * Tests and clears a bit in a bitmap.
4052 *
4053 * @returns true if the bit was set.
4054 * @returns false if the bit was clear.
4055 *
4056 * @param pvBitmap Pointer to the bitmap.
4057 * @param iBit The bit to test and clear.
4058 *
4059 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4060 * However, doing so will yield better performance as well as avoiding
4061 * traps accessing the last bits in the bitmap.
4062 */
4063#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4064DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4065#else
4066DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4067{
4068 union { bool f; uint32_t u32; uint8_t u8; } rc;
4069# if RT_INLINE_ASM_USES_INTRIN
4070 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4071
4072# elif RT_INLINE_ASM_GNU_STYLE
4073 __asm__ __volatile__("btrl %2, %1\n\t"
4074 "setc %b0\n\t"
4075 "andl $1, %0\n\t"
4076 : "=q" (rc.u32),
4077 "=m" (*(volatile long *)pvBitmap)
4078 : "Ir" (iBit),
4079 "m" (*(volatile long *)pvBitmap)
4080 : "memory");
4081# else
4082 __asm
4083 {
4084 mov edx, [iBit]
4085# ifdef RT_ARCH_AMD64
4086 mov rax, [pvBitmap]
4087 btr [rax], edx
4088# else
4089 mov eax, [pvBitmap]
4090 btr [eax], edx
4091# endif
4092 setc al
4093 and eax, 1
4094 mov [rc.u32], eax
4095 }
4096# endif
4097 return rc.f;
4098}
4099#endif
4100
4101
4102/**
4103 * Atomically tests and clears a bit in a bitmap, ordered.
4104 *
4105 * @returns true if the bit was set.
4106 * @returns false if the bit was clear.
4107 *
4108 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4109 * the memory access isn't atomic!
4110 * @param iBit The bit to test and clear.
4111 *
4112 * @remarks No memory barrier, take care on smp.
4113 */
4114#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4115DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4116#else
4117DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4118{
4119 union { bool f; uint32_t u32; uint8_t u8; } rc;
4120 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4121# if RT_INLINE_ASM_USES_INTRIN
4122 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4123
4124# elif RT_INLINE_ASM_GNU_STYLE
4125 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4126 "setc %b0\n\t"
4127 "andl $1, %0\n\t"
4128 : "=q" (rc.u32),
4129 "=m" (*(volatile long *)pvBitmap)
4130 : "Ir" (iBit),
4131 "m" (*(volatile long *)pvBitmap)
4132 : "memory");
4133# else
4134 __asm
4135 {
4136 mov edx, [iBit]
4137# ifdef RT_ARCH_AMD64
4138 mov rax, [pvBitmap]
4139 lock btr [rax], edx
4140# else
4141 mov eax, [pvBitmap]
4142 lock btr [eax], edx
4143# endif
4144 setc al
4145 and eax, 1
4146 mov [rc.u32], eax
4147 }
4148# endif
4149 return rc.f;
4150}
4151#endif
4152
4153
4154/**
4155 * Tests and toggles a bit in a bitmap.
4156 *
4157 * @returns true if the bit was set.
4158 * @returns false if the bit was clear.
4159 *
4160 * @param pvBitmap Pointer to the bitmap.
4161 * @param iBit The bit to test and toggle.
4162 *
4163 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4164 * However, doing so will yield better performance as well as avoiding
4165 * traps accessing the last bits in the bitmap.
4166 */
4167#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4168DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4169#else
4170DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4171{
4172 union { bool f; uint32_t u32; uint8_t u8; } rc;
4173# if RT_INLINE_ASM_USES_INTRIN
4174 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4175
4176# elif RT_INLINE_ASM_GNU_STYLE
4177 __asm__ __volatile__("btcl %2, %1\n\t"
4178 "setc %b0\n\t"
4179 "andl $1, %0\n\t"
4180 : "=q" (rc.u32),
4181 "=m" (*(volatile long *)pvBitmap)
4182 : "Ir" (iBit),
4183 "m" (*(volatile long *)pvBitmap)
4184 : "memory");
4185# else
4186 __asm
4187 {
4188 mov edx, [iBit]
4189# ifdef RT_ARCH_AMD64
4190 mov rax, [pvBitmap]
4191 btc [rax], edx
4192# else
4193 mov eax, [pvBitmap]
4194 btc [eax], edx
4195# endif
4196 setc al
4197 and eax, 1
4198 mov [rc.u32], eax
4199 }
4200# endif
4201 return rc.f;
4202}
4203#endif
4204
4205
4206/**
4207 * Atomically tests and toggles a bit in a bitmap, ordered.
4208 *
4209 * @returns true if the bit was set.
4210 * @returns false if the bit was clear.
4211 *
4212 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4213 * the memory access isn't atomic!
4214 * @param iBit The bit to test and toggle.
4215 */
4216#if RT_INLINE_ASM_EXTERNAL
4217DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4218#else
4219DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4220{
4221 union { bool f; uint32_t u32; uint8_t u8; } rc;
4222 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4223# if RT_INLINE_ASM_GNU_STYLE
4224 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4225 "setc %b0\n\t"
4226 "andl $1, %0\n\t"
4227 : "=q" (rc.u32),
4228 "=m" (*(volatile long *)pvBitmap)
4229 : "Ir" (iBit),
4230 "m" (*(volatile long *)pvBitmap)
4231 : "memory");
4232# else
4233 __asm
4234 {
4235 mov edx, [iBit]
4236# ifdef RT_ARCH_AMD64
4237 mov rax, [pvBitmap]
4238 lock btc [rax], edx
4239# else
4240 mov eax, [pvBitmap]
4241 lock btc [eax], edx
4242# endif
4243 setc al
4244 and eax, 1
4245 mov [rc.u32], eax
4246 }
4247# endif
4248 return rc.f;
4249}
4250#endif
4251
4252
4253/**
4254 * Tests if a bit in a bitmap is set.
4255 *
4256 * @returns true if the bit is set.
4257 * @returns false if the bit is clear.
4258 *
4259 * @param pvBitmap Pointer to the bitmap.
4260 * @param iBit The bit to test.
4261 *
4262 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4263 * However, doing so will yield better performance as well as avoiding
4264 * traps accessing the last bits in the bitmap.
4265 */
4266#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4267DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4268#else
4269DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4270{
4271 union { bool f; uint32_t u32; uint8_t u8; } rc;
4272# if RT_INLINE_ASM_USES_INTRIN
4273 rc.u32 = _bittest((long *)pvBitmap, iBit);
4274# elif RT_INLINE_ASM_GNU_STYLE
4275
4276 __asm__ __volatile__("btl %2, %1\n\t"
4277 "setc %b0\n\t"
4278 "andl $1, %0\n\t"
4279 : "=q" (rc.u32)
4280 : "m" (*(const volatile long *)pvBitmap),
4281 "Ir" (iBit)
4282 : "memory");
4283# else
4284 __asm
4285 {
4286 mov edx, [iBit]
4287# ifdef RT_ARCH_AMD64
4288 mov rax, [pvBitmap]
4289 bt [rax], edx
4290# else
4291 mov eax, [pvBitmap]
4292 bt [eax], edx
4293# endif
4294 setc al
4295 and eax, 1
4296 mov [rc.u32], eax
4297 }
4298# endif
4299 return rc.f;
4300}
4301#endif
4302
4303
4304/**
4305 * Clears a bit range within a bitmap.
4306 *
4307 * @param pvBitmap Pointer to the bitmap.
4308 * @param iBitStart The First bit to clear.
4309 * @param iBitEnd The first bit not to clear.
4310 */
4311DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4312{
4313 if (iBitStart < iBitEnd)
4314 {
4315 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4316 int iStart = iBitStart & ~31;
4317 int iEnd = iBitEnd & ~31;
4318 if (iStart == iEnd)
4319 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4320 else
4321 {
4322 /* bits in first dword. */
4323 if (iBitStart & 31)
4324 {
4325 *pu32 &= (1 << (iBitStart & 31)) - 1;
4326 pu32++;
4327 iBitStart = iStart + 32;
4328 }
4329
4330 /* whole dword. */
4331 if (iBitStart != iEnd)
4332 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4333
4334 /* bits in last dword. */
4335 if (iBitEnd & 31)
4336 {
4337 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4338 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4339 }
4340 }
4341 }
4342}
4343
4344
4345/**
4346 * Sets a bit range within a bitmap.
4347 *
4348 * @param pvBitmap Pointer to the bitmap.
4349 * @param iBitStart The First bit to set.
4350 * @param iBitEnd The first bit not to set.
4351 */
4352DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4353{
4354 if (iBitStart < iBitEnd)
4355 {
4356 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4357 int iStart = iBitStart & ~31;
4358 int iEnd = iBitEnd & ~31;
4359 if (iStart == iEnd)
4360 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
4361 else
4362 {
4363 /* bits in first dword. */
4364 if (iBitStart & 31)
4365 {
4366 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
4367 pu32++;
4368 iBitStart = iStart + 32;
4369 }
4370
4371 /* whole dword. */
4372 if (iBitStart != iEnd)
4373 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4374
4375 /* bits in last dword. */
4376 if (iBitEnd & 31)
4377 {
4378 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4379 *pu32 |= (1 << (iBitEnd & 31)) - 1;
4380 }
4381 }
4382 }
4383}
4384
4385
4386/**
4387 * Finds the first clear bit in a bitmap.
4388 *
4389 * @returns Index of the first zero bit.
4390 * @returns -1 if no clear bit was found.
4391 * @param pvBitmap Pointer to the bitmap.
4392 * @param cBits The number of bits in the bitmap. Multiple of 32.
4393 */
4394#if RT_INLINE_ASM_EXTERNAL
4395DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4396#else
4397DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4398{
4399 if (cBits)
4400 {
4401 int32_t iBit;
4402# if RT_INLINE_ASM_GNU_STYLE
4403 RTCCUINTREG uEAX, uECX, uEDI;
4404 cBits = RT_ALIGN_32(cBits, 32);
4405 __asm__ __volatile__("repe; scasl\n\t"
4406 "je 1f\n\t"
4407# ifdef RT_ARCH_AMD64
4408 "lea -4(%%rdi), %%rdi\n\t"
4409 "xorl (%%rdi), %%eax\n\t"
4410 "subq %5, %%rdi\n\t"
4411# else
4412 "lea -4(%%edi), %%edi\n\t"
4413 "xorl (%%edi), %%eax\n\t"
4414 "subl %5, %%edi\n\t"
4415# endif
4416 "shll $3, %%edi\n\t"
4417 "bsfl %%eax, %%edx\n\t"
4418 "addl %%edi, %%edx\n\t"
4419 "1:\t\n"
4420 : "=d" (iBit),
4421 "=&c" (uECX),
4422 "=&D" (uEDI),
4423 "=&a" (uEAX)
4424 : "0" (0xffffffff),
4425 "mr" (pvBitmap),
4426 "1" (cBits >> 5),
4427 "2" (pvBitmap),
4428 "3" (0xffffffff));
4429# else
4430 cBits = RT_ALIGN_32(cBits, 32);
4431 __asm
4432 {
4433# ifdef RT_ARCH_AMD64
4434 mov rdi, [pvBitmap]
4435 mov rbx, rdi
4436# else
4437 mov edi, [pvBitmap]
4438 mov ebx, edi
4439# endif
4440 mov edx, 0ffffffffh
4441 mov eax, edx
4442 mov ecx, [cBits]
4443 shr ecx, 5
4444 repe scasd
4445 je done
4446
4447# ifdef RT_ARCH_AMD64
4448 lea rdi, [rdi - 4]
4449 xor eax, [rdi]
4450 sub rdi, rbx
4451# else
4452 lea edi, [edi - 4]
4453 xor eax, [edi]
4454 sub edi, ebx
4455# endif
4456 shl edi, 3
4457 bsf edx, eax
4458 add edx, edi
4459 done:
4460 mov [iBit], edx
4461 }
4462# endif
4463 return iBit;
4464 }
4465 return -1;
4466}
4467#endif
4468
4469
4470/**
4471 * Finds the next clear bit in a bitmap.
4472 *
4473 * @returns Index of the first zero bit.
4474 * @returns -1 if no clear bit was found.
4475 * @param pvBitmap Pointer to the bitmap.
4476 * @param cBits The number of bits in the bitmap. Multiple of 32.
4477 * @param iBitPrev The bit returned from the last search.
4478 * The search will start at iBitPrev + 1.
4479 */
4480#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4481DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4482#else
4483DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4484{
4485 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4486 int iBit = ++iBitPrev & 31;
4487 if (iBit)
4488 {
4489 /*
4490 * Inspect the 32-bit word containing the unaligned bit.
4491 */
4492 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4493
4494# if RT_INLINE_ASM_USES_INTRIN
4495 unsigned long ulBit = 0;
4496 if (_BitScanForward(&ulBit, u32))
4497 return ulBit + iBitPrev;
4498# else
4499# if RT_INLINE_ASM_GNU_STYLE
4500 __asm__ __volatile__("bsf %1, %0\n\t"
4501 "jnz 1f\n\t"
4502 "movl $-1, %0\n\t"
4503 "1:\n\t"
4504 : "=r" (iBit)
4505 : "r" (u32));
4506# else
4507 __asm
4508 {
4509 mov edx, [u32]
4510 bsf eax, edx
4511 jnz done
4512 mov eax, 0ffffffffh
4513 done:
4514 mov [iBit], eax
4515 }
4516# endif
4517 if (iBit >= 0)
4518 return iBit + iBitPrev;
4519# endif
4520
4521 /*
4522 * Skip ahead and see if there is anything left to search.
4523 */
4524 iBitPrev |= 31;
4525 iBitPrev++;
4526 if (cBits <= (uint32_t)iBitPrev)
4527 return -1;
4528 }
4529
4530 /*
4531 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4532 */
4533 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4534 if (iBit >= 0)
4535 iBit += iBitPrev;
4536 return iBit;
4537}
4538#endif
4539
4540
4541/**
4542 * Finds the first set bit in a bitmap.
4543 *
4544 * @returns Index of the first set bit.
4545 * @returns -1 if no clear bit was found.
4546 * @param pvBitmap Pointer to the bitmap.
4547 * @param cBits The number of bits in the bitmap. Multiple of 32.
4548 */
4549#if RT_INLINE_ASM_EXTERNAL
4550DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4551#else
4552DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4553{
4554 if (cBits)
4555 {
4556 int32_t iBit;
4557# if RT_INLINE_ASM_GNU_STYLE
4558 RTCCUINTREG uEAX, uECX, uEDI;
4559 cBits = RT_ALIGN_32(cBits, 32);
4560 __asm__ __volatile__("repe; scasl\n\t"
4561 "je 1f\n\t"
4562# ifdef RT_ARCH_AMD64
4563 "lea -4(%%rdi), %%rdi\n\t"
4564 "movl (%%rdi), %%eax\n\t"
4565 "subq %5, %%rdi\n\t"
4566# else
4567 "lea -4(%%edi), %%edi\n\t"
4568 "movl (%%edi), %%eax\n\t"
4569 "subl %5, %%edi\n\t"
4570# endif
4571 "shll $3, %%edi\n\t"
4572 "bsfl %%eax, %%edx\n\t"
4573 "addl %%edi, %%edx\n\t"
4574 "1:\t\n"
4575 : "=d" (iBit),
4576 "=&c" (uECX),
4577 "=&D" (uEDI),
4578 "=&a" (uEAX)
4579 : "0" (0xffffffff),
4580 "mr" (pvBitmap),
4581 "1" (cBits >> 5),
4582 "2" (pvBitmap),
4583 "3" (0));
4584# else
4585 cBits = RT_ALIGN_32(cBits, 32);
4586 __asm
4587 {
4588# ifdef RT_ARCH_AMD64
4589 mov rdi, [pvBitmap]
4590 mov rbx, rdi
4591# else
4592 mov edi, [pvBitmap]
4593 mov ebx, edi
4594# endif
4595 mov edx, 0ffffffffh
4596 xor eax, eax
4597 mov ecx, [cBits]
4598 shr ecx, 5
4599 repe scasd
4600 je done
4601# ifdef RT_ARCH_AMD64
4602 lea rdi, [rdi - 4]
4603 mov eax, [rdi]
4604 sub rdi, rbx
4605# else
4606 lea edi, [edi - 4]
4607 mov eax, [edi]
4608 sub edi, ebx
4609# endif
4610 shl edi, 3
4611 bsf edx, eax
4612 add edx, edi
4613 done:
4614 mov [iBit], edx
4615 }
4616# endif
4617 return iBit;
4618 }
4619 return -1;
4620}
4621#endif
4622
4623
4624/**
4625 * Finds the next set bit in a bitmap.
4626 *
4627 * @returns Index of the next set bit.
4628 * @returns -1 if no set bit was found.
4629 * @param pvBitmap Pointer to the bitmap.
4630 * @param cBits The number of bits in the bitmap. Multiple of 32.
4631 * @param iBitPrev The bit returned from the last search.
4632 * The search will start at iBitPrev + 1.
4633 */
4634#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4635DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4636#else
4637DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4638{
4639 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4640 int iBit = ++iBitPrev & 31;
4641 if (iBit)
4642 {
4643 /*
4644 * Inspect the 32-bit word containing the unaligned bit.
4645 */
4646 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
4647
4648# if RT_INLINE_ASM_USES_INTRIN
4649 unsigned long ulBit = 0;
4650 if (_BitScanForward(&ulBit, u32))
4651 return ulBit + iBitPrev;
4652# else
4653# if RT_INLINE_ASM_GNU_STYLE
4654 __asm__ __volatile__("bsf %1, %0\n\t"
4655 "jnz 1f\n\t"
4656 "movl $-1, %0\n\t"
4657 "1:\n\t"
4658 : "=r" (iBit)
4659 : "r" (u32));
4660# else
4661 __asm
4662 {
4663 mov edx, [u32]
4664 bsf eax, edx
4665 jnz done
4666 mov eax, 0ffffffffh
4667 done:
4668 mov [iBit], eax
4669 }
4670# endif
4671 if (iBit >= 0)
4672 return iBit + iBitPrev;
4673# endif
4674
4675 /*
4676 * Skip ahead and see if there is anything left to search.
4677 */
4678 iBitPrev |= 31;
4679 iBitPrev++;
4680 if (cBits <= (uint32_t)iBitPrev)
4681 return -1;
4682 }
4683
4684 /*
4685 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4686 */
4687 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4688 if (iBit >= 0)
4689 iBit += iBitPrev;
4690 return iBit;
4691}
4692#endif
4693
4694
4695/**
4696 * Finds the first bit which is set in the given 32-bit integer.
4697 * Bits are numbered from 1 (least significant) to 32.
4698 *
4699 * @returns index [1..32] of the first set bit.
4700 * @returns 0 if all bits are cleared.
4701 * @param u32 Integer to search for set bits.
4702 * @remark Similar to ffs() in BSD.
4703 */
4704#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4705DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
4706#else
4707DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4708{
4709# if RT_INLINE_ASM_USES_INTRIN
4710 unsigned long iBit;
4711 if (_BitScanForward(&iBit, u32))
4712 iBit++;
4713 else
4714 iBit = 0;
4715# elif RT_INLINE_ASM_GNU_STYLE
4716 uint32_t iBit;
4717 __asm__ __volatile__("bsf %1, %0\n\t"
4718 "jnz 1f\n\t"
4719 "xorl %0, %0\n\t"
4720 "jmp 2f\n"
4721 "1:\n\t"
4722 "incl %0\n"
4723 "2:\n\t"
4724 : "=r" (iBit)
4725 : "rm" (u32));
4726# else
4727 uint32_t iBit;
4728 _asm
4729 {
4730 bsf eax, [u32]
4731 jnz found
4732 xor eax, eax
4733 jmp done
4734 found:
4735 inc eax
4736 done:
4737 mov [iBit], eax
4738 }
4739# endif
4740 return iBit;
4741}
4742#endif
4743
4744
4745/**
4746 * Finds the first bit which is set in the given 32-bit integer.
4747 * Bits are numbered from 1 (least significant) to 32.
4748 *
4749 * @returns index [1..32] of the first set bit.
4750 * @returns 0 if all bits are cleared.
4751 * @param i32 Integer to search for set bits.
4752 * @remark Similar to ffs() in BSD.
4753 */
4754DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4755{
4756 return ASMBitFirstSetU32((uint32_t)i32);
4757}
4758
4759
4760/**
4761 * Finds the last bit which is set in the given 32-bit integer.
4762 * Bits are numbered from 1 (least significant) to 32.
4763 *
4764 * @returns index [1..32] of the last set bit.
4765 * @returns 0 if all bits are cleared.
4766 * @param u32 Integer to search for set bits.
4767 * @remark Similar to fls() in BSD.
4768 */
4769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4770DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
4771#else
4772DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4773{
4774# if RT_INLINE_ASM_USES_INTRIN
4775 unsigned long iBit;
4776 if (_BitScanReverse(&iBit, u32))
4777 iBit++;
4778 else
4779 iBit = 0;
4780# elif RT_INLINE_ASM_GNU_STYLE
4781 uint32_t iBit;
4782 __asm__ __volatile__("bsrl %1, %0\n\t"
4783 "jnz 1f\n\t"
4784 "xorl %0, %0\n\t"
4785 "jmp 2f\n"
4786 "1:\n\t"
4787 "incl %0\n"
4788 "2:\n\t"
4789 : "=r" (iBit)
4790 : "rm" (u32));
4791# else
4792 uint32_t iBit;
4793 _asm
4794 {
4795 bsr eax, [u32]
4796 jnz found
4797 xor eax, eax
4798 jmp done
4799 found:
4800 inc eax
4801 done:
4802 mov [iBit], eax
4803 }
4804# endif
4805 return iBit;
4806}
4807#endif
4808
4809
4810/**
4811 * Finds the last bit which is set in the given 32-bit integer.
4812 * Bits are numbered from 1 (least significant) to 32.
4813 *
4814 * @returns index [1..32] of the last set bit.
4815 * @returns 0 if all bits are cleared.
4816 * @param i32 Integer to search for set bits.
4817 * @remark Similar to fls() in BSD.
4818 */
4819DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4820{
4821 return ASMBitLastSetU32((uint32_t)i32);
4822}
4823
4824/**
4825 * Reverse the byte order of the given 16-bit integer.
4826 *
4827 * @returns Revert
4828 * @param u16 16-bit integer value.
4829 */
4830#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4831DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
4832#else
4833DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
4834{
4835# if RT_INLINE_ASM_USES_INTRIN
4836 u16 = _byteswap_ushort(u16);
4837# elif RT_INLINE_ASM_GNU_STYLE
4838 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
4839# else
4840 _asm
4841 {
4842 mov ax, [u16]
4843 ror ax, 8
4844 mov [u16], ax
4845 }
4846# endif
4847 return u16;
4848}
4849#endif
4850
4851
4852/**
4853 * Reverse the byte order of the given 32-bit integer.
4854 *
4855 * @returns Revert
4856 * @param u32 32-bit integer value.
4857 */
4858#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4859DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
4860#else
4861DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4862{
4863# if RT_INLINE_ASM_USES_INTRIN
4864 u32 = _byteswap_ulong(u32);
4865# elif RT_INLINE_ASM_GNU_STYLE
4866 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4867# else
4868 _asm
4869 {
4870 mov eax, [u32]
4871 bswap eax
4872 mov [u32], eax
4873 }
4874# endif
4875 return u32;
4876}
4877#endif
4878
4879
4880/**
4881 * Reverse the byte order of the given 64-bit integer.
4882 *
4883 * @returns Revert
4884 * @param u64 64-bit integer value.
4885 */
4886DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
4887{
4888#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
4889 u64 = _byteswap_uint64(u64);
4890#else
4891 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
4892 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
4893#endif
4894 return u64;
4895}
4896
4897
4898/**
4899 * Rotate 32-bit unsigned value to the left by @a cShift.
4900 *
4901 * @returns Rotated value.
4902 * @param u32 The value to rotate.
4903 * @param cShift How many bits to rotate by.
4904 */
4905DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
4906{
4907#if RT_INLINE_ASM_USES_INTRIN
4908 return _rotl(u32, cShift);
4909#elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
4910 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
4911 return u32;
4912#else
4913 cShift &= 31;
4914 return (u32 << cShift) | (u32 >> (32 - cShift));
4915#endif
4916}
4917
4918
4919/**
4920 * Rotate 32-bit unsigned value to the right by @a cShift.
4921 *
4922 * @returns Rotated value.
4923 * @param u32 The value to rotate.
4924 * @param cShift How many bits to rotate by.
4925 */
4926DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
4927{
4928#if RT_INLINE_ASM_USES_INTRIN
4929 return _rotr(u32, cShift);
4930#elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
4931 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
4932 return u32;
4933#else
4934 cShift &= 31;
4935 return (u32 >> cShift) | (u32 << (32 - cShift));
4936#endif
4937}
4938
4939
4940/**
4941 * Rotate 64-bit unsigned value to the left by @a cShift.
4942 *
4943 * @returns Rotated value.
4944 * @param u64 The value to rotate.
4945 * @param cShift How many bits to rotate by.
4946 */
4947DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
4948{
4949#if RT_INLINE_ASM_USES_INTRIN
4950 return _rotl64(u64, cShift);
4951#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4952 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
4953 return u64;
4954#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
4955 uint32_t uSpill;
4956 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
4957 "jz 1f\n\t"
4958 "xchgl %%eax, %%edx\n\t"
4959 "1:\n\t"
4960 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
4961 "jz 2f\n\t"
4962 "movl %%edx, %2\n\t" /* save the hi value in %3. */
4963 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
4964 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
4965 "2:\n\t" /* } */
4966 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
4967 : "0" (u64),
4968 "1" (cShift));
4969 return u64;
4970#else
4971 cShift &= 63;
4972 return (u64 << cShift) | (u64 >> (64 - cShift));
4973#endif
4974}
4975
4976
4977/**
4978 * Rotate 64-bit unsigned value to the right by @a cShift.
4979 *
4980 * @returns Rotated value.
4981 * @param u64 The value to rotate.
4982 * @param cShift How many bits to rotate by.
4983 */
4984DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
4985{
4986#if RT_INLINE_ASM_USES_INTRIN
4987 return _rotr64(u64, cShift);
4988#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4989 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
4990 return u64;
4991#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
4992 uint32_t uSpill;
4993 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
4994 "jz 1f\n\t"
4995 "xchgl %%eax, %%edx\n\t"
4996 "1:\n\t"
4997 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
4998 "jz 2f\n\t"
4999 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5000 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5001 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5002 "2:\n\t" /* } */
5003 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5004 : "0" (u64),
5005 "1" (cShift));
5006 return u64;
5007#else
5008 cShift &= 63;
5009 return (u64 >> cShift) | (u64 << (64 - cShift));
5010#endif
5011}
5012
5013/** @} */
5014
5015
5016/** @} */
5017
5018#endif
5019
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette