VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 49921

Last change on this file since 49921 was 49724, checked in by vboxsync, 11 years ago

Added ASMAtomicUo[And|Or]U[32|64].

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 136.2 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2012 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# ifdef RT_ARCH_AMD64
69# pragma intrinsic(__stosq)
70# pragma intrinsic(_byteswap_uint64)
71# pragma intrinsic(_InterlockedExchange64)
72# pragma intrinsic(_InterlockedExchangeAdd64)
73# pragma intrinsic(_InterlockedAnd64)
74# pragma intrinsic(_InterlockedOr64)
75# pragma intrinsic(_InterlockedIncrement64)
76# pragma intrinsic(_InterlockedDecrement64)
77# endif
78#endif
79
80
81/** @defgroup grp_rt_asm ASM - Assembly Routines
82 * @ingroup grp_rt
83 *
84 * @remarks The difference between ordered and unordered atomic operations are that
85 * the former will complete outstanding reads and writes before continuing
86 * while the latter doesn't make any promises about the order. Ordered
87 * operations doesn't, it seems, make any 100% promise wrt to whether
88 * the operation will complete before any subsequent memory access.
89 * (please, correct if wrong.)
90 *
91 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
92 * are unordered (note the Uo).
93 *
94 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
95 * or even optimize assembler instructions away. For instance, in the following code
96 * the second rdmsr instruction is optimized away because gcc treats that instruction
97 * as deterministic:
98 *
99 * @code
100 * static inline uint64_t rdmsr_low(int idx)
101 * {
102 * uint32_t low;
103 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
104 * }
105 * ...
106 * uint32_t msr1 = rdmsr_low(1);
107 * foo(msr1);
108 * msr1 = rdmsr_low(1);
109 * bar(msr1);
110 * @endcode
111 *
112 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
113 * use the result of the first call as input parameter for bar() as well. For rdmsr this
114 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
115 * machine status information in general.
116 *
117 * @{
118 */
119
120
121/** @def RT_INLINE_ASM_GCC_4_3_X_X86
122 * Used to work around some 4.3.x register allocation issues in this version of
123 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
124#ifdef __GNUC__
125# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
126#endif
127#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
128# define RT_INLINE_ASM_GCC_4_3_X_X86 0
129#endif
130
131/** @def RT_INLINE_DONT_USE_CMPXCHG8B
132 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
133 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
134 * mode, x86.
135 *
136 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
137 * when in PIC mode on x86.
138 */
139#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
140# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
141 ( (defined(PIC) || defined(__PIC__)) \
142 && defined(RT_ARCH_X86) \
143 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
144 || defined(RT_OS_DARWIN)) )
145#endif
146
147
148/** @def ASMReturnAddress
149 * Gets the return address of the current (or calling if you like) function or method.
150 */
151#ifdef _MSC_VER
152# ifdef __cplusplus
153extern "C"
154# endif
155void * _ReturnAddress(void);
156# pragma intrinsic(_ReturnAddress)
157# define ASMReturnAddress() _ReturnAddress()
158#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
159# define ASMReturnAddress() __builtin_return_address(0)
160#else
161# error "Unsupported compiler."
162#endif
163
164
165/**
166 * Compiler memory barrier.
167 *
168 * Ensure that the compiler does not use any cached (register/tmp stack) memory
169 * values or any outstanding writes when returning from this function.
170 *
171 * This function must be used if non-volatile data is modified by a
172 * device or the VMM. Typical cases are port access, MMIO access,
173 * trapping instruction, etc.
174 */
175#if RT_INLINE_ASM_GNU_STYLE
176# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
177#elif RT_INLINE_ASM_USES_INTRIN
178# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
179#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
180DECLINLINE(void) ASMCompilerBarrier(void)
181{
182 __asm
183 {
184 }
185}
186#endif
187
188
189/** @def ASMBreakpoint
190 * Debugger Breakpoint.
191 * @deprecated Use RT_BREAKPOINT instead.
192 * @internal
193 */
194#define ASMBreakpoint() RT_BREAKPOINT()
195
196
197/**
198 * Spinloop hint for platforms that have these, empty function on the other
199 * platforms.
200 *
201 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
202 * spin locks.
203 */
204#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
205DECLASM(void) ASMNopPause(void);
206#else
207DECLINLINE(void) ASMNopPause(void)
208{
209# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
210# if RT_INLINE_ASM_GNU_STYLE
211 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
212# else
213 __asm {
214 _emit 0f3h
215 _emit 090h
216 }
217# endif
218# else
219 /* dummy */
220# endif
221}
222#endif
223
224
225/**
226 * Atomically Exchange an unsigned 8-bit value, ordered.
227 *
228 * @returns Current *pu8 value
229 * @param pu8 Pointer to the 8-bit variable to update.
230 * @param u8 The 8-bit value to assign to *pu8.
231 */
232#if RT_INLINE_ASM_EXTERNAL
233DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
234#else
235DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
236{
237# if RT_INLINE_ASM_GNU_STYLE
238 __asm__ __volatile__("xchgb %0, %1\n\t"
239 : "=m" (*pu8),
240 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
241 : "1" (u8),
242 "m" (*pu8));
243# else
244 __asm
245 {
246# ifdef RT_ARCH_AMD64
247 mov rdx, [pu8]
248 mov al, [u8]
249 xchg [rdx], al
250 mov [u8], al
251# else
252 mov edx, [pu8]
253 mov al, [u8]
254 xchg [edx], al
255 mov [u8], al
256# endif
257 }
258# endif
259 return u8;
260}
261#endif
262
263
264/**
265 * Atomically Exchange a signed 8-bit value, ordered.
266 *
267 * @returns Current *pu8 value
268 * @param pi8 Pointer to the 8-bit variable to update.
269 * @param i8 The 8-bit value to assign to *pi8.
270 */
271DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
272{
273 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
274}
275
276
277/**
278 * Atomically Exchange a bool value, ordered.
279 *
280 * @returns Current *pf value
281 * @param pf Pointer to the 8-bit variable to update.
282 * @param f The 8-bit value to assign to *pi8.
283 */
284DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
285{
286#ifdef _MSC_VER
287 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
288#else
289 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
290#endif
291}
292
293
294/**
295 * Atomically Exchange an unsigned 16-bit value, ordered.
296 *
297 * @returns Current *pu16 value
298 * @param pu16 Pointer to the 16-bit variable to update.
299 * @param u16 The 16-bit value to assign to *pu16.
300 */
301#if RT_INLINE_ASM_EXTERNAL
302DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
303#else
304DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
305{
306# if RT_INLINE_ASM_GNU_STYLE
307 __asm__ __volatile__("xchgw %0, %1\n\t"
308 : "=m" (*pu16),
309 "=r" (u16)
310 : "1" (u16),
311 "m" (*pu16));
312# else
313 __asm
314 {
315# ifdef RT_ARCH_AMD64
316 mov rdx, [pu16]
317 mov ax, [u16]
318 xchg [rdx], ax
319 mov [u16], ax
320# else
321 mov edx, [pu16]
322 mov ax, [u16]
323 xchg [edx], ax
324 mov [u16], ax
325# endif
326 }
327# endif
328 return u16;
329}
330#endif
331
332
333/**
334 * Atomically Exchange a signed 16-bit value, ordered.
335 *
336 * @returns Current *pu16 value
337 * @param pi16 Pointer to the 16-bit variable to update.
338 * @param i16 The 16-bit value to assign to *pi16.
339 */
340DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
341{
342 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
343}
344
345
346/**
347 * Atomically Exchange an unsigned 32-bit value, ordered.
348 *
349 * @returns Current *pu32 value
350 * @param pu32 Pointer to the 32-bit variable to update.
351 * @param u32 The 32-bit value to assign to *pu32.
352 */
353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
354DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
355#else
356DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
357{
358# if RT_INLINE_ASM_GNU_STYLE
359 __asm__ __volatile__("xchgl %0, %1\n\t"
360 : "=m" (*pu32),
361 "=r" (u32)
362 : "1" (u32),
363 "m" (*pu32));
364
365# elif RT_INLINE_ASM_USES_INTRIN
366 u32 = _InterlockedExchange((long *)pu32, u32);
367
368# else
369 __asm
370 {
371# ifdef RT_ARCH_AMD64
372 mov rdx, [pu32]
373 mov eax, u32
374 xchg [rdx], eax
375 mov [u32], eax
376# else
377 mov edx, [pu32]
378 mov eax, u32
379 xchg [edx], eax
380 mov [u32], eax
381# endif
382 }
383# endif
384 return u32;
385}
386#endif
387
388
389/**
390 * Atomically Exchange a signed 32-bit value, ordered.
391 *
392 * @returns Current *pu32 value
393 * @param pi32 Pointer to the 32-bit variable to update.
394 * @param i32 The 32-bit value to assign to *pi32.
395 */
396DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
397{
398 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
399}
400
401
402/**
403 * Atomically Exchange an unsigned 64-bit value, ordered.
404 *
405 * @returns Current *pu64 value
406 * @param pu64 Pointer to the 64-bit variable to update.
407 * @param u64 The 64-bit value to assign to *pu64.
408 */
409#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
410 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
411DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
412#else
413DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
414{
415# if defined(RT_ARCH_AMD64)
416# if RT_INLINE_ASM_USES_INTRIN
417 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
418
419# elif RT_INLINE_ASM_GNU_STYLE
420 __asm__ __volatile__("xchgq %0, %1\n\t"
421 : "=m" (*pu64),
422 "=r" (u64)
423 : "1" (u64),
424 "m" (*pu64));
425# else
426 __asm
427 {
428 mov rdx, [pu64]
429 mov rax, [u64]
430 xchg [rdx], rax
431 mov [u64], rax
432 }
433# endif
434# else /* !RT_ARCH_AMD64 */
435# if RT_INLINE_ASM_GNU_STYLE
436# if defined(PIC) || defined(__PIC__)
437 uint32_t u32EBX = (uint32_t)u64;
438 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
439 "xchgl %%ebx, %3\n\t"
440 "1:\n\t"
441 "lock; cmpxchg8b (%5)\n\t"
442 "jnz 1b\n\t"
443 "movl %3, %%ebx\n\t"
444 /*"xchgl %%esi, %5\n\t"*/
445 : "=A" (u64),
446 "=m" (*pu64)
447 : "0" (*pu64),
448 "m" ( u32EBX ),
449 "c" ( (uint32_t)(u64 >> 32) ),
450 "S" (pu64));
451# else /* !PIC */
452 __asm__ __volatile__("1:\n\t"
453 "lock; cmpxchg8b %1\n\t"
454 "jnz 1b\n\t"
455 : "=A" (u64),
456 "=m" (*pu64)
457 : "0" (*pu64),
458 "b" ( (uint32_t)u64 ),
459 "c" ( (uint32_t)(u64 >> 32) ));
460# endif
461# else
462 __asm
463 {
464 mov ebx, dword ptr [u64]
465 mov ecx, dword ptr [u64 + 4]
466 mov edi, pu64
467 mov eax, dword ptr [edi]
468 mov edx, dword ptr [edi + 4]
469 retry:
470 lock cmpxchg8b [edi]
471 jnz retry
472 mov dword ptr [u64], eax
473 mov dword ptr [u64 + 4], edx
474 }
475# endif
476# endif /* !RT_ARCH_AMD64 */
477 return u64;
478}
479#endif
480
481
482/**
483 * Atomically Exchange an signed 64-bit value, ordered.
484 *
485 * @returns Current *pi64 value
486 * @param pi64 Pointer to the 64-bit variable to update.
487 * @param i64 The 64-bit value to assign to *pi64.
488 */
489DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
490{
491 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
492}
493
494
495/**
496 * Atomically Exchange a pointer value, ordered.
497 *
498 * @returns Current *ppv value
499 * @param ppv Pointer to the pointer variable to update.
500 * @param pv The pointer value to assign to *ppv.
501 */
502DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
503{
504#if ARCH_BITS == 32
505 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
506#elif ARCH_BITS == 64
507 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
508#else
509# error "ARCH_BITS is bogus"
510#endif
511}
512
513
514/**
515 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
516 *
517 * @returns Current *pv value
518 * @param ppv Pointer to the pointer variable to update.
519 * @param pv The pointer value to assign to *ppv.
520 * @param Type The type of *ppv, sans volatile.
521 */
522#ifdef __GNUC__
523# define ASMAtomicXchgPtrT(ppv, pv, Type) \
524 __extension__ \
525 ({\
526 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
527 Type const pvTypeChecked = (pv); \
528 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
529 pvTypeCheckedRet; \
530 })
531#else
532# define ASMAtomicXchgPtrT(ppv, pv, Type) \
533 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
534#endif
535
536
537/**
538 * Atomically Exchange a raw-mode context pointer value, ordered.
539 *
540 * @returns Current *ppv value
541 * @param ppvRC Pointer to the pointer variable to update.
542 * @param pvRC The pointer value to assign to *ppv.
543 */
544DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
545{
546 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
547}
548
549
550/**
551 * Atomically Exchange a ring-0 pointer value, ordered.
552 *
553 * @returns Current *ppv value
554 * @param ppvR0 Pointer to the pointer variable to update.
555 * @param pvR0 The pointer value to assign to *ppv.
556 */
557DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
558{
559#if R0_ARCH_BITS == 32
560 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
561#elif R0_ARCH_BITS == 64
562 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
563#else
564# error "R0_ARCH_BITS is bogus"
565#endif
566}
567
568
569/**
570 * Atomically Exchange a ring-3 pointer value, ordered.
571 *
572 * @returns Current *ppv value
573 * @param ppvR3 Pointer to the pointer variable to update.
574 * @param pvR3 The pointer value to assign to *ppv.
575 */
576DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
577{
578#if R3_ARCH_BITS == 32
579 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
580#elif R3_ARCH_BITS == 64
581 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
582#else
583# error "R3_ARCH_BITS is bogus"
584#endif
585}
586
587
588/** @def ASMAtomicXchgHandle
589 * Atomically Exchange a typical IPRT handle value, ordered.
590 *
591 * @param ph Pointer to the value to update.
592 * @param hNew The new value to assigned to *pu.
593 * @param phRes Where to store the current *ph value.
594 *
595 * @remarks This doesn't currently work for all handles (like RTFILE).
596 */
597#if HC_ARCH_BITS == 32
598# define ASMAtomicXchgHandle(ph, hNew, phRes) \
599 do { \
600 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
601 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
602 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
603 } while (0)
604#elif HC_ARCH_BITS == 64
605# define ASMAtomicXchgHandle(ph, hNew, phRes) \
606 do { \
607 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
608 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
609 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
610 } while (0)
611#else
612# error HC_ARCH_BITS
613#endif
614
615
616/**
617 * Atomically Exchange a value which size might differ
618 * between platforms or compilers, ordered.
619 *
620 * @param pu Pointer to the variable to update.
621 * @param uNew The value to assign to *pu.
622 * @todo This is busted as its missing the result argument.
623 */
624#define ASMAtomicXchgSize(pu, uNew) \
625 do { \
626 switch (sizeof(*(pu))) { \
627 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
628 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
629 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
630 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
631 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
632 } \
633 } while (0)
634
635/**
636 * Atomically Exchange a value which size might differ
637 * between platforms or compilers, ordered.
638 *
639 * @param pu Pointer to the variable to update.
640 * @param uNew The value to assign to *pu.
641 * @param puRes Where to store the current *pu value.
642 */
643#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
644 do { \
645 switch (sizeof(*(pu))) { \
646 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
647 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
648 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
649 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
650 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
651 } \
652 } while (0)
653
654
655
656/**
657 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
658 *
659 * @returns true if xchg was done.
660 * @returns false if xchg wasn't done.
661 *
662 * @param pu8 Pointer to the value to update.
663 * @param u8New The new value to assigned to *pu8.
664 * @param u8Old The old value to *pu8 compare with.
665 */
666#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
667DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
668#else
669DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
670{
671 uint8_t u8Ret;
672 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
673 "setz %1\n\t"
674 : "=m" (*pu8),
675 "=qm" (u8Ret),
676 "=a" (u8Old)
677 : "q" (u8New),
678 "2" (u8Old),
679 "m" (*pu8));
680 return (bool)u8Ret;
681}
682#endif
683
684
685/**
686 * Atomically Compare and Exchange a signed 8-bit value, ordered.
687 *
688 * @returns true if xchg was done.
689 * @returns false if xchg wasn't done.
690 *
691 * @param pi8 Pointer to the value to update.
692 * @param i8New The new value to assigned to *pi8.
693 * @param i8Old The old value to *pi8 compare with.
694 */
695DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
696{
697 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
698}
699
700
701/**
702 * Atomically Compare and Exchange a bool value, ordered.
703 *
704 * @returns true if xchg was done.
705 * @returns false if xchg wasn't done.
706 *
707 * @param pf Pointer to the value to update.
708 * @param fNew The new value to assigned to *pf.
709 * @param fOld The old value to *pf compare with.
710 */
711DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
712{
713 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
714}
715
716
717/**
718 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
719 *
720 * @returns true if xchg was done.
721 * @returns false if xchg wasn't done.
722 *
723 * @param pu32 Pointer to the value to update.
724 * @param u32New The new value to assigned to *pu32.
725 * @param u32Old The old value to *pu32 compare with.
726 */
727#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
728DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
729#else
730DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
731{
732# if RT_INLINE_ASM_GNU_STYLE
733 uint8_t u8Ret;
734 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
735 "setz %1\n\t"
736 : "=m" (*pu32),
737 "=qm" (u8Ret),
738 "=a" (u32Old)
739 : "r" (u32New),
740 "2" (u32Old),
741 "m" (*pu32));
742 return (bool)u8Ret;
743
744# elif RT_INLINE_ASM_USES_INTRIN
745 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
746
747# else
748 uint32_t u32Ret;
749 __asm
750 {
751# ifdef RT_ARCH_AMD64
752 mov rdx, [pu32]
753# else
754 mov edx, [pu32]
755# endif
756 mov eax, [u32Old]
757 mov ecx, [u32New]
758# ifdef RT_ARCH_AMD64
759 lock cmpxchg [rdx], ecx
760# else
761 lock cmpxchg [edx], ecx
762# endif
763 setz al
764 movzx eax, al
765 mov [u32Ret], eax
766 }
767 return !!u32Ret;
768# endif
769}
770#endif
771
772
773/**
774 * Atomically Compare and Exchange a signed 32-bit value, ordered.
775 *
776 * @returns true if xchg was done.
777 * @returns false if xchg wasn't done.
778 *
779 * @param pi32 Pointer to the value to update.
780 * @param i32New The new value to assigned to *pi32.
781 * @param i32Old The old value to *pi32 compare with.
782 */
783DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
784{
785 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
786}
787
788
789/**
790 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
791 *
792 * @returns true if xchg was done.
793 * @returns false if xchg wasn't done.
794 *
795 * @param pu64 Pointer to the 64-bit variable to update.
796 * @param u64New The 64-bit value to assign to *pu64.
797 * @param u64Old The value to compare with.
798 */
799#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
800 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
801DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
802#else
803DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
804{
805# if RT_INLINE_ASM_USES_INTRIN
806 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
807
808# elif defined(RT_ARCH_AMD64)
809# if RT_INLINE_ASM_GNU_STYLE
810 uint8_t u8Ret;
811 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
812 "setz %1\n\t"
813 : "=m" (*pu64),
814 "=qm" (u8Ret),
815 "=a" (u64Old)
816 : "r" (u64New),
817 "2" (u64Old),
818 "m" (*pu64));
819 return (bool)u8Ret;
820# else
821 bool fRet;
822 __asm
823 {
824 mov rdx, [pu32]
825 mov rax, [u64Old]
826 mov rcx, [u64New]
827 lock cmpxchg [rdx], rcx
828 setz al
829 mov [fRet], al
830 }
831 return fRet;
832# endif
833# else /* !RT_ARCH_AMD64 */
834 uint32_t u32Ret;
835# if RT_INLINE_ASM_GNU_STYLE
836# if defined(PIC) || defined(__PIC__)
837 uint32_t u32EBX = (uint32_t)u64New;
838 uint32_t u32Spill;
839 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
840 "lock; cmpxchg8b (%6)\n\t"
841 "setz %%al\n\t"
842 "movl %4, %%ebx\n\t"
843 "movzbl %%al, %%eax\n\t"
844 : "=a" (u32Ret),
845 "=d" (u32Spill),
846# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
847 "+m" (*pu64)
848# else
849 "=m" (*pu64)
850# endif
851 : "A" (u64Old),
852 "m" ( u32EBX ),
853 "c" ( (uint32_t)(u64New >> 32) ),
854 "S" (pu64));
855# else /* !PIC */
856 uint32_t u32Spill;
857 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
858 "setz %%al\n\t"
859 "movzbl %%al, %%eax\n\t"
860 : "=a" (u32Ret),
861 "=d" (u32Spill),
862 "+m" (*pu64)
863 : "A" (u64Old),
864 "b" ( (uint32_t)u64New ),
865 "c" ( (uint32_t)(u64New >> 32) ));
866# endif
867 return (bool)u32Ret;
868# else
869 __asm
870 {
871 mov ebx, dword ptr [u64New]
872 mov ecx, dword ptr [u64New + 4]
873 mov edi, [pu64]
874 mov eax, dword ptr [u64Old]
875 mov edx, dword ptr [u64Old + 4]
876 lock cmpxchg8b [edi]
877 setz al
878 movzx eax, al
879 mov dword ptr [u32Ret], eax
880 }
881 return !!u32Ret;
882# endif
883# endif /* !RT_ARCH_AMD64 */
884}
885#endif
886
887
888/**
889 * Atomically Compare and exchange a signed 64-bit value, ordered.
890 *
891 * @returns true if xchg was done.
892 * @returns false if xchg wasn't done.
893 *
894 * @param pi64 Pointer to the 64-bit variable to update.
895 * @param i64 The 64-bit value to assign to *pu64.
896 * @param i64Old The value to compare with.
897 */
898DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
899{
900 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
901}
902
903
904/**
905 * Atomically Compare and Exchange a pointer value, ordered.
906 *
907 * @returns true if xchg was done.
908 * @returns false if xchg wasn't done.
909 *
910 * @param ppv Pointer to the value to update.
911 * @param pvNew The new value to assigned to *ppv.
912 * @param pvOld The old value to *ppv compare with.
913 */
914DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
915{
916#if ARCH_BITS == 32
917 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
918#elif ARCH_BITS == 64
919 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
920#else
921# error "ARCH_BITS is bogus"
922#endif
923}
924
925
926/**
927 * Atomically Compare and Exchange a pointer value, ordered.
928 *
929 * @returns true if xchg was done.
930 * @returns false if xchg wasn't done.
931 *
932 * @param ppv Pointer to the value to update.
933 * @param pvNew The new value to assigned to *ppv.
934 * @param pvOld The old value to *ppv compare with.
935 *
936 * @remarks This is relatively type safe on GCC platforms.
937 */
938#ifdef __GNUC__
939# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
940 __extension__ \
941 ({\
942 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
943 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
944 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
945 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
946 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
947 fMacroRet; \
948 })
949#else
950# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
951 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
952#endif
953
954
955/** @def ASMAtomicCmpXchgHandle
956 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
957 *
958 * @param ph Pointer to the value to update.
959 * @param hNew The new value to assigned to *pu.
960 * @param hOld The old value to *pu compare with.
961 * @param fRc Where to store the result.
962 *
963 * @remarks This doesn't currently work for all handles (like RTFILE).
964 */
965#if HC_ARCH_BITS == 32
966# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
967 do { \
968 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
969 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
970 } while (0)
971#elif HC_ARCH_BITS == 64
972# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
973 do { \
974 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
975 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
976 } while (0)
977#else
978# error HC_ARCH_BITS
979#endif
980
981
982/** @def ASMAtomicCmpXchgSize
983 * Atomically Compare and Exchange a value which size might differ
984 * between platforms or compilers, ordered.
985 *
986 * @param pu Pointer to the value to update.
987 * @param uNew The new value to assigned to *pu.
988 * @param uOld The old value to *pu compare with.
989 * @param fRc Where to store the result.
990 */
991#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
992 do { \
993 switch (sizeof(*(pu))) { \
994 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
995 break; \
996 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
997 break; \
998 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
999 (fRc) = false; \
1000 break; \
1001 } \
1002 } while (0)
1003
1004
1005/**
1006 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1007 * passes back old value, ordered.
1008 *
1009 * @returns true if xchg was done.
1010 * @returns false if xchg wasn't done.
1011 *
1012 * @param pu32 Pointer to the value to update.
1013 * @param u32New The new value to assigned to *pu32.
1014 * @param u32Old The old value to *pu32 compare with.
1015 * @param pu32Old Pointer store the old value at.
1016 */
1017#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1018DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1019#else
1020DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1021{
1022# if RT_INLINE_ASM_GNU_STYLE
1023 uint8_t u8Ret;
1024 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1025 "setz %1\n\t"
1026 : "=m" (*pu32),
1027 "=qm" (u8Ret),
1028 "=a" (*pu32Old)
1029 : "r" (u32New),
1030 "a" (u32Old),
1031 "m" (*pu32));
1032 return (bool)u8Ret;
1033
1034# elif RT_INLINE_ASM_USES_INTRIN
1035 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1036
1037# else
1038 uint32_t u32Ret;
1039 __asm
1040 {
1041# ifdef RT_ARCH_AMD64
1042 mov rdx, [pu32]
1043# else
1044 mov edx, [pu32]
1045# endif
1046 mov eax, [u32Old]
1047 mov ecx, [u32New]
1048# ifdef RT_ARCH_AMD64
1049 lock cmpxchg [rdx], ecx
1050 mov rdx, [pu32Old]
1051 mov [rdx], eax
1052# else
1053 lock cmpxchg [edx], ecx
1054 mov edx, [pu32Old]
1055 mov [edx], eax
1056# endif
1057 setz al
1058 movzx eax, al
1059 mov [u32Ret], eax
1060 }
1061 return !!u32Ret;
1062# endif
1063}
1064#endif
1065
1066
1067/**
1068 * Atomically Compare and Exchange a signed 32-bit value, additionally
1069 * passes back old value, ordered.
1070 *
1071 * @returns true if xchg was done.
1072 * @returns false if xchg wasn't done.
1073 *
1074 * @param pi32 Pointer to the value to update.
1075 * @param i32New The new value to assigned to *pi32.
1076 * @param i32Old The old value to *pi32 compare with.
1077 * @param pi32Old Pointer store the old value at.
1078 */
1079DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1080{
1081 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1082}
1083
1084
1085/**
1086 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1087 * passing back old value, ordered.
1088 *
1089 * @returns true if xchg was done.
1090 * @returns false if xchg wasn't done.
1091 *
1092 * @param pu64 Pointer to the 64-bit variable to update.
1093 * @param u64New The 64-bit value to assign to *pu64.
1094 * @param u64Old The value to compare with.
1095 * @param pu64Old Pointer store the old value at.
1096 */
1097#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1098 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1099DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1100#else
1101DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1102{
1103# if RT_INLINE_ASM_USES_INTRIN
1104 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1105
1106# elif defined(RT_ARCH_AMD64)
1107# if RT_INLINE_ASM_GNU_STYLE
1108 uint8_t u8Ret;
1109 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1110 "setz %1\n\t"
1111 : "=m" (*pu64),
1112 "=qm" (u8Ret),
1113 "=a" (*pu64Old)
1114 : "r" (u64New),
1115 "a" (u64Old),
1116 "m" (*pu64));
1117 return (bool)u8Ret;
1118# else
1119 bool fRet;
1120 __asm
1121 {
1122 mov rdx, [pu32]
1123 mov rax, [u64Old]
1124 mov rcx, [u64New]
1125 lock cmpxchg [rdx], rcx
1126 mov rdx, [pu64Old]
1127 mov [rdx], rax
1128 setz al
1129 mov [fRet], al
1130 }
1131 return fRet;
1132# endif
1133# else /* !RT_ARCH_AMD64 */
1134# if RT_INLINE_ASM_GNU_STYLE
1135 uint64_t u64Ret;
1136# if defined(PIC) || defined(__PIC__)
1137 /* NB: this code uses a memory clobber description, because the clean
1138 * solution with an output value for *pu64 makes gcc run out of registers.
1139 * This will cause suboptimal code, and anyone with a better solution is
1140 * welcome to improve this. */
1141 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1142 "lock; cmpxchg8b %3\n\t"
1143 "xchgl %%ebx, %1\n\t"
1144 : "=A" (u64Ret)
1145 : "DS" ((uint32_t)u64New),
1146 "c" ((uint32_t)(u64New >> 32)),
1147 "m" (*pu64),
1148 "0" (u64Old)
1149 : "memory" );
1150# else /* !PIC */
1151 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1152 : "=A" (u64Ret),
1153 "=m" (*pu64)
1154 : "b" ((uint32_t)u64New),
1155 "c" ((uint32_t)(u64New >> 32)),
1156 "m" (*pu64),
1157 "0" (u64Old));
1158# endif
1159 *pu64Old = u64Ret;
1160 return u64Ret == u64Old;
1161# else
1162 uint32_t u32Ret;
1163 __asm
1164 {
1165 mov ebx, dword ptr [u64New]
1166 mov ecx, dword ptr [u64New + 4]
1167 mov edi, [pu64]
1168 mov eax, dword ptr [u64Old]
1169 mov edx, dword ptr [u64Old + 4]
1170 lock cmpxchg8b [edi]
1171 mov ebx, [pu64Old]
1172 mov [ebx], eax
1173 setz al
1174 movzx eax, al
1175 add ebx, 4
1176 mov [ebx], edx
1177 mov dword ptr [u32Ret], eax
1178 }
1179 return !!u32Ret;
1180# endif
1181# endif /* !RT_ARCH_AMD64 */
1182}
1183#endif
1184
1185
1186/**
1187 * Atomically Compare and exchange a signed 64-bit value, additionally
1188 * passing back old value, ordered.
1189 *
1190 * @returns true if xchg was done.
1191 * @returns false if xchg wasn't done.
1192 *
1193 * @param pi64 Pointer to the 64-bit variable to update.
1194 * @param i64 The 64-bit value to assign to *pu64.
1195 * @param i64Old The value to compare with.
1196 * @param pi64Old Pointer store the old value at.
1197 */
1198DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1199{
1200 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1201}
1202
1203/** @def ASMAtomicCmpXchgExHandle
1204 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1205 *
1206 * @param ph Pointer to the value to update.
1207 * @param hNew The new value to assigned to *pu.
1208 * @param hOld The old value to *pu compare with.
1209 * @param fRc Where to store the result.
1210 * @param phOldVal Pointer to where to store the old value.
1211 *
1212 * @remarks This doesn't currently work for all handles (like RTFILE).
1213 */
1214#if HC_ARCH_BITS == 32
1215# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1216 do { \
1217 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1218 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1219 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1220 } while (0)
1221#elif HC_ARCH_BITS == 64
1222# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1223 do { \
1224 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1225 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1226 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1227 } while (0)
1228#else
1229# error HC_ARCH_BITS
1230#endif
1231
1232
1233/** @def ASMAtomicCmpXchgExSize
1234 * Atomically Compare and Exchange a value which size might differ
1235 * between platforms or compilers. Additionally passes back old value.
1236 *
1237 * @param pu Pointer to the value to update.
1238 * @param uNew The new value to assigned to *pu.
1239 * @param uOld The old value to *pu compare with.
1240 * @param fRc Where to store the result.
1241 * @param puOldVal Pointer to where to store the old value.
1242 */
1243#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1244 do { \
1245 switch (sizeof(*(pu))) { \
1246 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1247 break; \
1248 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1249 break; \
1250 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1251 (fRc) = false; \
1252 (uOldVal) = 0; \
1253 break; \
1254 } \
1255 } while (0)
1256
1257
1258/**
1259 * Atomically Compare and Exchange a pointer value, additionally
1260 * passing back old value, ordered.
1261 *
1262 * @returns true if xchg was done.
1263 * @returns false if xchg wasn't done.
1264 *
1265 * @param ppv Pointer to the value to update.
1266 * @param pvNew The new value to assigned to *ppv.
1267 * @param pvOld The old value to *ppv compare with.
1268 * @param ppvOld Pointer store the old value at.
1269 */
1270DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1271{
1272#if ARCH_BITS == 32
1273 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1274#elif ARCH_BITS == 64
1275 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1276#else
1277# error "ARCH_BITS is bogus"
1278#endif
1279}
1280
1281
1282/**
1283 * Atomically Compare and Exchange a pointer value, additionally
1284 * passing back old value, ordered.
1285 *
1286 * @returns true if xchg was done.
1287 * @returns false if xchg wasn't done.
1288 *
1289 * @param ppv Pointer to the value to update.
1290 * @param pvNew The new value to assigned to *ppv.
1291 * @param pvOld The old value to *ppv compare with.
1292 * @param ppvOld Pointer store the old value at.
1293 *
1294 * @remarks This is relatively type safe on GCC platforms.
1295 */
1296#ifdef __GNUC__
1297# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1298 __extension__ \
1299 ({\
1300 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1301 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1302 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1303 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1304 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1305 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1306 (void **)ppvOldTypeChecked); \
1307 fMacroRet; \
1308 })
1309#else
1310# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1311 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1312#endif
1313
1314
1315/**
1316 * Serialize Instruction.
1317 */
1318#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1319DECLASM(void) ASMSerializeInstruction(void);
1320#else
1321DECLINLINE(void) ASMSerializeInstruction(void)
1322{
1323# if RT_INLINE_ASM_GNU_STYLE
1324 RTCCUINTREG xAX = 0;
1325# ifdef RT_ARCH_AMD64
1326 __asm__ ("cpuid"
1327 : "=a" (xAX)
1328 : "0" (xAX)
1329 : "rbx", "rcx", "rdx");
1330# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1331 __asm__ ("push %%ebx\n\t"
1332 "cpuid\n\t"
1333 "pop %%ebx\n\t"
1334 : "=a" (xAX)
1335 : "0" (xAX)
1336 : "ecx", "edx");
1337# else
1338 __asm__ ("cpuid"
1339 : "=a" (xAX)
1340 : "0" (xAX)
1341 : "ebx", "ecx", "edx");
1342# endif
1343
1344# elif RT_INLINE_ASM_USES_INTRIN
1345 int aInfo[4];
1346 __cpuid(aInfo, 0);
1347
1348# else
1349 __asm
1350 {
1351 push ebx
1352 xor eax, eax
1353 cpuid
1354 pop ebx
1355 }
1356# endif
1357}
1358#endif
1359
1360
1361/**
1362 * Memory fence, waits for any pending writes and reads to complete.
1363 */
1364DECLINLINE(void) ASMMemoryFence(void)
1365{
1366 /** @todo use mfence? check if all cpus we care for support it. */
1367 uint32_t volatile u32;
1368 ASMAtomicXchgU32(&u32, 0);
1369}
1370
1371
1372/**
1373 * Write fence, waits for any pending writes to complete.
1374 */
1375DECLINLINE(void) ASMWriteFence(void)
1376{
1377 /** @todo use sfence? check if all cpus we care for support it. */
1378 ASMMemoryFence();
1379}
1380
1381
1382/**
1383 * Read fence, waits for any pending reads to complete.
1384 */
1385DECLINLINE(void) ASMReadFence(void)
1386{
1387 /** @todo use lfence? check if all cpus we care for support it. */
1388 ASMMemoryFence();
1389}
1390
1391
1392/**
1393 * Atomically reads an unsigned 8-bit value, ordered.
1394 *
1395 * @returns Current *pu8 value
1396 * @param pu8 Pointer to the 8-bit variable to read.
1397 */
1398DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1399{
1400 ASMMemoryFence();
1401 return *pu8; /* byte reads are atomic on x86 */
1402}
1403
1404
1405/**
1406 * Atomically reads an unsigned 8-bit value, unordered.
1407 *
1408 * @returns Current *pu8 value
1409 * @param pu8 Pointer to the 8-bit variable to read.
1410 */
1411DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1412{
1413 return *pu8; /* byte reads are atomic on x86 */
1414}
1415
1416
1417/**
1418 * Atomically reads a signed 8-bit value, ordered.
1419 *
1420 * @returns Current *pi8 value
1421 * @param pi8 Pointer to the 8-bit variable to read.
1422 */
1423DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1424{
1425 ASMMemoryFence();
1426 return *pi8; /* byte reads are atomic on x86 */
1427}
1428
1429
1430/**
1431 * Atomically reads a signed 8-bit value, unordered.
1432 *
1433 * @returns Current *pi8 value
1434 * @param pi8 Pointer to the 8-bit variable to read.
1435 */
1436DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1437{
1438 return *pi8; /* byte reads are atomic on x86 */
1439}
1440
1441
1442/**
1443 * Atomically reads an unsigned 16-bit value, ordered.
1444 *
1445 * @returns Current *pu16 value
1446 * @param pu16 Pointer to the 16-bit variable to read.
1447 */
1448DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1449{
1450 ASMMemoryFence();
1451 Assert(!((uintptr_t)pu16 & 1));
1452 return *pu16;
1453}
1454
1455
1456/**
1457 * Atomically reads an unsigned 16-bit value, unordered.
1458 *
1459 * @returns Current *pu16 value
1460 * @param pu16 Pointer to the 16-bit variable to read.
1461 */
1462DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1463{
1464 Assert(!((uintptr_t)pu16 & 1));
1465 return *pu16;
1466}
1467
1468
1469/**
1470 * Atomically reads a signed 16-bit value, ordered.
1471 *
1472 * @returns Current *pi16 value
1473 * @param pi16 Pointer to the 16-bit variable to read.
1474 */
1475DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1476{
1477 ASMMemoryFence();
1478 Assert(!((uintptr_t)pi16 & 1));
1479 return *pi16;
1480}
1481
1482
1483/**
1484 * Atomically reads a signed 16-bit value, unordered.
1485 *
1486 * @returns Current *pi16 value
1487 * @param pi16 Pointer to the 16-bit variable to read.
1488 */
1489DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1490{
1491 Assert(!((uintptr_t)pi16 & 1));
1492 return *pi16;
1493}
1494
1495
1496/**
1497 * Atomically reads an unsigned 32-bit value, ordered.
1498 *
1499 * @returns Current *pu32 value
1500 * @param pu32 Pointer to the 32-bit variable to read.
1501 */
1502DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1503{
1504 ASMMemoryFence();
1505 Assert(!((uintptr_t)pu32 & 3));
1506 return *pu32;
1507}
1508
1509
1510/**
1511 * Atomically reads an unsigned 32-bit value, unordered.
1512 *
1513 * @returns Current *pu32 value
1514 * @param pu32 Pointer to the 32-bit variable to read.
1515 */
1516DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1517{
1518 Assert(!((uintptr_t)pu32 & 3));
1519 return *pu32;
1520}
1521
1522
1523/**
1524 * Atomically reads a signed 32-bit value, ordered.
1525 *
1526 * @returns Current *pi32 value
1527 * @param pi32 Pointer to the 32-bit variable to read.
1528 */
1529DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1530{
1531 ASMMemoryFence();
1532 Assert(!((uintptr_t)pi32 & 3));
1533 return *pi32;
1534}
1535
1536
1537/**
1538 * Atomically reads a signed 32-bit value, unordered.
1539 *
1540 * @returns Current *pi32 value
1541 * @param pi32 Pointer to the 32-bit variable to read.
1542 */
1543DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1544{
1545 Assert(!((uintptr_t)pi32 & 3));
1546 return *pi32;
1547}
1548
1549
1550/**
1551 * Atomically reads an unsigned 64-bit value, ordered.
1552 *
1553 * @returns Current *pu64 value
1554 * @param pu64 Pointer to the 64-bit variable to read.
1555 * The memory pointed to must be writable.
1556 * @remark This will fault if the memory is read-only!
1557 */
1558#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1559 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1560DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1561#else
1562DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1563{
1564 uint64_t u64;
1565# ifdef RT_ARCH_AMD64
1566 Assert(!((uintptr_t)pu64 & 7));
1567/*# if RT_INLINE_ASM_GNU_STYLE
1568 __asm__ __volatile__( "mfence\n\t"
1569 "movq %1, %0\n\t"
1570 : "=r" (u64)
1571 : "m" (*pu64));
1572# else
1573 __asm
1574 {
1575 mfence
1576 mov rdx, [pu64]
1577 mov rax, [rdx]
1578 mov [u64], rax
1579 }
1580# endif*/
1581 ASMMemoryFence();
1582 u64 = *pu64;
1583# else /* !RT_ARCH_AMD64 */
1584# if RT_INLINE_ASM_GNU_STYLE
1585# if defined(PIC) || defined(__PIC__)
1586 uint32_t u32EBX = 0;
1587 Assert(!((uintptr_t)pu64 & 7));
1588 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1589 "lock; cmpxchg8b (%5)\n\t"
1590 "movl %3, %%ebx\n\t"
1591 : "=A" (u64),
1592# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1593 "+m" (*pu64)
1594# else
1595 "=m" (*pu64)
1596# endif
1597 : "0" (0ULL),
1598 "m" (u32EBX),
1599 "c" (0),
1600 "S" (pu64));
1601# else /* !PIC */
1602 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1603 : "=A" (u64),
1604 "+m" (*pu64)
1605 : "0" (0ULL),
1606 "b" (0),
1607 "c" (0));
1608# endif
1609# else
1610 Assert(!((uintptr_t)pu64 & 7));
1611 __asm
1612 {
1613 xor eax, eax
1614 xor edx, edx
1615 mov edi, pu64
1616 xor ecx, ecx
1617 xor ebx, ebx
1618 lock cmpxchg8b [edi]
1619 mov dword ptr [u64], eax
1620 mov dword ptr [u64 + 4], edx
1621 }
1622# endif
1623# endif /* !RT_ARCH_AMD64 */
1624 return u64;
1625}
1626#endif
1627
1628
1629/**
1630 * Atomically reads an unsigned 64-bit value, unordered.
1631 *
1632 * @returns Current *pu64 value
1633 * @param pu64 Pointer to the 64-bit variable to read.
1634 * The memory pointed to must be writable.
1635 * @remark This will fault if the memory is read-only!
1636 */
1637#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1638 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1639DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1640#else
1641DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1642{
1643 uint64_t u64;
1644# ifdef RT_ARCH_AMD64
1645 Assert(!((uintptr_t)pu64 & 7));
1646/*# if RT_INLINE_ASM_GNU_STYLE
1647 Assert(!((uintptr_t)pu64 & 7));
1648 __asm__ __volatile__("movq %1, %0\n\t"
1649 : "=r" (u64)
1650 : "m" (*pu64));
1651# else
1652 __asm
1653 {
1654 mov rdx, [pu64]
1655 mov rax, [rdx]
1656 mov [u64], rax
1657 }
1658# endif */
1659 u64 = *pu64;
1660# else /* !RT_ARCH_AMD64 */
1661# if RT_INLINE_ASM_GNU_STYLE
1662# if defined(PIC) || defined(__PIC__)
1663 uint32_t u32EBX = 0;
1664 uint32_t u32Spill;
1665 Assert(!((uintptr_t)pu64 & 7));
1666 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1667 "xor %%ecx,%%ecx\n\t"
1668 "xor %%edx,%%edx\n\t"
1669 "xchgl %%ebx, %3\n\t"
1670 "lock; cmpxchg8b (%4)\n\t"
1671 "movl %3, %%ebx\n\t"
1672 : "=A" (u64),
1673# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1674 "+m" (*pu64),
1675# else
1676 "=m" (*pu64),
1677# endif
1678 "=c" (u32Spill)
1679 : "m" (u32EBX),
1680 "S" (pu64));
1681# else /* !PIC */
1682 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1683 : "=A" (u64),
1684 "+m" (*pu64)
1685 : "0" (0ULL),
1686 "b" (0),
1687 "c" (0));
1688# endif
1689# else
1690 Assert(!((uintptr_t)pu64 & 7));
1691 __asm
1692 {
1693 xor eax, eax
1694 xor edx, edx
1695 mov edi, pu64
1696 xor ecx, ecx
1697 xor ebx, ebx
1698 lock cmpxchg8b [edi]
1699 mov dword ptr [u64], eax
1700 mov dword ptr [u64 + 4], edx
1701 }
1702# endif
1703# endif /* !RT_ARCH_AMD64 */
1704 return u64;
1705}
1706#endif
1707
1708
1709/**
1710 * Atomically reads a signed 64-bit value, ordered.
1711 *
1712 * @returns Current *pi64 value
1713 * @param pi64 Pointer to the 64-bit variable to read.
1714 * The memory pointed to must be writable.
1715 * @remark This will fault if the memory is read-only!
1716 */
1717DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1718{
1719 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1720}
1721
1722
1723/**
1724 * Atomically reads a signed 64-bit value, unordered.
1725 *
1726 * @returns Current *pi64 value
1727 * @param pi64 Pointer to the 64-bit variable to read.
1728 * The memory pointed to must be writable.
1729 * @remark This will fault if the memory is read-only!
1730 */
1731DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1732{
1733 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1734}
1735
1736
1737/**
1738 * Atomically reads a size_t value, ordered.
1739 *
1740 * @returns Current *pcb value
1741 * @param pcb Pointer to the size_t variable to read.
1742 */
1743DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1744{
1745#if ARCH_BITS == 64
1746 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1747#elif ARCH_BITS == 32
1748 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1749#else
1750# error "Unsupported ARCH_BITS value"
1751#endif
1752}
1753
1754
1755/**
1756 * Atomically reads a size_t value, unordered.
1757 *
1758 * @returns Current *pcb value
1759 * @param pcb Pointer to the size_t variable to read.
1760 */
1761DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1762{
1763#if ARCH_BITS == 64
1764 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1765#elif ARCH_BITS == 32
1766 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1767#else
1768# error "Unsupported ARCH_BITS value"
1769#endif
1770}
1771
1772
1773/**
1774 * Atomically reads a pointer value, ordered.
1775 *
1776 * @returns Current *pv value
1777 * @param ppv Pointer to the pointer variable to read.
1778 *
1779 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1780 * requires less typing (no casts).
1781 */
1782DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1783{
1784#if ARCH_BITS == 32
1785 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1786#elif ARCH_BITS == 64
1787 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1788#else
1789# error "ARCH_BITS is bogus"
1790#endif
1791}
1792
1793/**
1794 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1795 *
1796 * @returns Current *pv value
1797 * @param ppv Pointer to the pointer variable to read.
1798 * @param Type The type of *ppv, sans volatile.
1799 */
1800#ifdef __GNUC__
1801# define ASMAtomicReadPtrT(ppv, Type) \
1802 __extension__ \
1803 ({\
1804 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1805 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1806 pvTypeChecked; \
1807 })
1808#else
1809# define ASMAtomicReadPtrT(ppv, Type) \
1810 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1811#endif
1812
1813
1814/**
1815 * Atomically reads a pointer value, unordered.
1816 *
1817 * @returns Current *pv value
1818 * @param ppv Pointer to the pointer variable to read.
1819 *
1820 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1821 * requires less typing (no casts).
1822 */
1823DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1824{
1825#if ARCH_BITS == 32
1826 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1827#elif ARCH_BITS == 64
1828 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1829#else
1830# error "ARCH_BITS is bogus"
1831#endif
1832}
1833
1834
1835/**
1836 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
1837 *
1838 * @returns Current *pv value
1839 * @param ppv Pointer to the pointer variable to read.
1840 * @param Type The type of *ppv, sans volatile.
1841 */
1842#ifdef __GNUC__
1843# define ASMAtomicUoReadPtrT(ppv, Type) \
1844 __extension__ \
1845 ({\
1846 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1847 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
1848 pvTypeChecked; \
1849 })
1850#else
1851# define ASMAtomicUoReadPtrT(ppv, Type) \
1852 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
1853#endif
1854
1855
1856/**
1857 * Atomically reads a boolean value, ordered.
1858 *
1859 * @returns Current *pf value
1860 * @param pf Pointer to the boolean variable to read.
1861 */
1862DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
1863{
1864 ASMMemoryFence();
1865 return *pf; /* byte reads are atomic on x86 */
1866}
1867
1868
1869/**
1870 * Atomically reads a boolean value, unordered.
1871 *
1872 * @returns Current *pf value
1873 * @param pf Pointer to the boolean variable to read.
1874 */
1875DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
1876{
1877 return *pf; /* byte reads are atomic on x86 */
1878}
1879
1880
1881/**
1882 * Atomically read a typical IPRT handle value, ordered.
1883 *
1884 * @param ph Pointer to the handle variable to read.
1885 * @param phRes Where to store the result.
1886 *
1887 * @remarks This doesn't currently work for all handles (like RTFILE).
1888 */
1889#if HC_ARCH_BITS == 32
1890# define ASMAtomicReadHandle(ph, phRes) \
1891 do { \
1892 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1893 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1894 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
1895 } while (0)
1896#elif HC_ARCH_BITS == 64
1897# define ASMAtomicReadHandle(ph, phRes) \
1898 do { \
1899 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1900 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1901 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
1902 } while (0)
1903#else
1904# error HC_ARCH_BITS
1905#endif
1906
1907
1908/**
1909 * Atomically read a typical IPRT handle value, unordered.
1910 *
1911 * @param ph Pointer to the handle variable to read.
1912 * @param phRes Where to store the result.
1913 *
1914 * @remarks This doesn't currently work for all handles (like RTFILE).
1915 */
1916#if HC_ARCH_BITS == 32
1917# define ASMAtomicUoReadHandle(ph, phRes) \
1918 do { \
1919 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1920 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1921 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
1922 } while (0)
1923#elif HC_ARCH_BITS == 64
1924# define ASMAtomicUoReadHandle(ph, phRes) \
1925 do { \
1926 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1927 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1928 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
1929 } while (0)
1930#else
1931# error HC_ARCH_BITS
1932#endif
1933
1934
1935/**
1936 * Atomically read a value which size might differ
1937 * between platforms or compilers, ordered.
1938 *
1939 * @param pu Pointer to the variable to read.
1940 * @param puRes Where to store the result.
1941 */
1942#define ASMAtomicReadSize(pu, puRes) \
1943 do { \
1944 switch (sizeof(*(pu))) { \
1945 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1946 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
1947 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
1948 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
1949 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1950 } \
1951 } while (0)
1952
1953
1954/**
1955 * Atomically read a value which size might differ
1956 * between platforms or compilers, unordered.
1957 *
1958 * @param pu Pointer to the variable to read.
1959 * @param puRes Where to store the result.
1960 */
1961#define ASMAtomicUoReadSize(pu, puRes) \
1962 do { \
1963 switch (sizeof(*(pu))) { \
1964 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1965 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
1966 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
1967 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
1968 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1969 } \
1970 } while (0)
1971
1972
1973/**
1974 * Atomically writes an unsigned 8-bit value, ordered.
1975 *
1976 * @param pu8 Pointer to the 8-bit variable.
1977 * @param u8 The 8-bit value to assign to *pu8.
1978 */
1979DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
1980{
1981 ASMAtomicXchgU8(pu8, u8);
1982}
1983
1984
1985/**
1986 * Atomically writes an unsigned 8-bit value, unordered.
1987 *
1988 * @param pu8 Pointer to the 8-bit variable.
1989 * @param u8 The 8-bit value to assign to *pu8.
1990 */
1991DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
1992{
1993 *pu8 = u8; /* byte writes are atomic on x86 */
1994}
1995
1996
1997/**
1998 * Atomically writes a signed 8-bit value, ordered.
1999 *
2000 * @param pi8 Pointer to the 8-bit variable to read.
2001 * @param i8 The 8-bit value to assign to *pi8.
2002 */
2003DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2004{
2005 ASMAtomicXchgS8(pi8, i8);
2006}
2007
2008
2009/**
2010 * Atomically writes a signed 8-bit value, unordered.
2011 *
2012 * @param pi8 Pointer to the 8-bit variable to write.
2013 * @param i8 The 8-bit value to assign to *pi8.
2014 */
2015DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2016{
2017 *pi8 = i8; /* byte writes are atomic on x86 */
2018}
2019
2020
2021/**
2022 * Atomically writes an unsigned 16-bit value, ordered.
2023 *
2024 * @param pu16 Pointer to the 16-bit variable to write.
2025 * @param u16 The 16-bit value to assign to *pu16.
2026 */
2027DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2028{
2029 ASMAtomicXchgU16(pu16, u16);
2030}
2031
2032
2033/**
2034 * Atomically writes an unsigned 16-bit value, unordered.
2035 *
2036 * @param pu16 Pointer to the 16-bit variable to write.
2037 * @param u16 The 16-bit value to assign to *pu16.
2038 */
2039DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2040{
2041 Assert(!((uintptr_t)pu16 & 1));
2042 *pu16 = u16;
2043}
2044
2045
2046/**
2047 * Atomically writes a signed 16-bit value, ordered.
2048 *
2049 * @param pi16 Pointer to the 16-bit variable to write.
2050 * @param i16 The 16-bit value to assign to *pi16.
2051 */
2052DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2053{
2054 ASMAtomicXchgS16(pi16, i16);
2055}
2056
2057
2058/**
2059 * Atomically writes a signed 16-bit value, unordered.
2060 *
2061 * @param pi16 Pointer to the 16-bit variable to write.
2062 * @param i16 The 16-bit value to assign to *pi16.
2063 */
2064DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2065{
2066 Assert(!((uintptr_t)pi16 & 1));
2067 *pi16 = i16;
2068}
2069
2070
2071/**
2072 * Atomically writes an unsigned 32-bit value, ordered.
2073 *
2074 * @param pu32 Pointer to the 32-bit variable to write.
2075 * @param u32 The 32-bit value to assign to *pu32.
2076 */
2077DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2078{
2079 ASMAtomicXchgU32(pu32, u32);
2080}
2081
2082
2083/**
2084 * Atomically writes an unsigned 32-bit value, unordered.
2085 *
2086 * @param pu32 Pointer to the 32-bit variable to write.
2087 * @param u32 The 32-bit value to assign to *pu32.
2088 */
2089DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2090{
2091 Assert(!((uintptr_t)pu32 & 3));
2092 *pu32 = u32;
2093}
2094
2095
2096/**
2097 * Atomically writes a signed 32-bit value, ordered.
2098 *
2099 * @param pi32 Pointer to the 32-bit variable to write.
2100 * @param i32 The 32-bit value to assign to *pi32.
2101 */
2102DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2103{
2104 ASMAtomicXchgS32(pi32, i32);
2105}
2106
2107
2108/**
2109 * Atomically writes a signed 32-bit value, unordered.
2110 *
2111 * @param pi32 Pointer to the 32-bit variable to write.
2112 * @param i32 The 32-bit value to assign to *pi32.
2113 */
2114DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2115{
2116 Assert(!((uintptr_t)pi32 & 3));
2117 *pi32 = i32;
2118}
2119
2120
2121/**
2122 * Atomically writes an unsigned 64-bit value, ordered.
2123 *
2124 * @param pu64 Pointer to the 64-bit variable to write.
2125 * @param u64 The 64-bit value to assign to *pu64.
2126 */
2127DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2128{
2129 ASMAtomicXchgU64(pu64, u64);
2130}
2131
2132
2133/**
2134 * Atomically writes an unsigned 64-bit value, unordered.
2135 *
2136 * @param pu64 Pointer to the 64-bit variable to write.
2137 * @param u64 The 64-bit value to assign to *pu64.
2138 */
2139DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2140{
2141 Assert(!((uintptr_t)pu64 & 7));
2142#if ARCH_BITS == 64
2143 *pu64 = u64;
2144#else
2145 ASMAtomicXchgU64(pu64, u64);
2146#endif
2147}
2148
2149
2150/**
2151 * Atomically writes a signed 64-bit value, ordered.
2152 *
2153 * @param pi64 Pointer to the 64-bit variable to write.
2154 * @param i64 The 64-bit value to assign to *pi64.
2155 */
2156DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2157{
2158 ASMAtomicXchgS64(pi64, i64);
2159}
2160
2161
2162/**
2163 * Atomically writes a signed 64-bit value, unordered.
2164 *
2165 * @param pi64 Pointer to the 64-bit variable to write.
2166 * @param i64 The 64-bit value to assign to *pi64.
2167 */
2168DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2169{
2170 Assert(!((uintptr_t)pi64 & 7));
2171#if ARCH_BITS == 64
2172 *pi64 = i64;
2173#else
2174 ASMAtomicXchgS64(pi64, i64);
2175#endif
2176}
2177
2178
2179/**
2180 * Atomically writes a boolean value, unordered.
2181 *
2182 * @param pf Pointer to the boolean variable to write.
2183 * @param f The boolean value to assign to *pf.
2184 */
2185DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2186{
2187 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2188}
2189
2190
2191/**
2192 * Atomically writes a boolean value, unordered.
2193 *
2194 * @param pf Pointer to the boolean variable to write.
2195 * @param f The boolean value to assign to *pf.
2196 */
2197DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2198{
2199 *pf = f; /* byte writes are atomic on x86 */
2200}
2201
2202
2203/**
2204 * Atomically writes a pointer value, ordered.
2205 *
2206 * @param ppv Pointer to the pointer variable to write.
2207 * @param pv The pointer value to assign to *ppv.
2208 */
2209DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2210{
2211#if ARCH_BITS == 32
2212 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2213#elif ARCH_BITS == 64
2214 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2215#else
2216# error "ARCH_BITS is bogus"
2217#endif
2218}
2219
2220
2221/**
2222 * Atomically writes a pointer value, ordered.
2223 *
2224 * @param ppv Pointer to the pointer variable to write.
2225 * @param pv The pointer value to assign to *ppv. If NULL use
2226 * ASMAtomicWriteNullPtr or you'll land in trouble.
2227 *
2228 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2229 * NULL.
2230 */
2231#ifdef __GNUC__
2232# define ASMAtomicWritePtr(ppv, pv) \
2233 do \
2234 { \
2235 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2236 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2237 \
2238 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2239 AssertCompile(sizeof(pv) == sizeof(void *)); \
2240 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2241 \
2242 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2243 } while (0)
2244#else
2245# define ASMAtomicWritePtr(ppv, pv) \
2246 do \
2247 { \
2248 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2249 AssertCompile(sizeof(pv) == sizeof(void *)); \
2250 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2251 \
2252 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2253 } while (0)
2254#endif
2255
2256
2257/**
2258 * Atomically sets a pointer to NULL, ordered.
2259 *
2260 * @param ppv Pointer to the pointer variable that should be set to NULL.
2261 *
2262 * @remarks This is relatively type safe on GCC platforms.
2263 */
2264#ifdef __GNUC__
2265# define ASMAtomicWriteNullPtr(ppv) \
2266 do \
2267 { \
2268 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2269 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2270 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2271 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2272 } while (0)
2273#else
2274# define ASMAtomicWriteNullPtr(ppv) \
2275 do \
2276 { \
2277 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2278 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2279 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2280 } while (0)
2281#endif
2282
2283
2284/**
2285 * Atomically writes a pointer value, unordered.
2286 *
2287 * @returns Current *pv value
2288 * @param ppv Pointer to the pointer variable.
2289 * @param pv The pointer value to assign to *ppv. If NULL use
2290 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2291 *
2292 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2293 * NULL.
2294 */
2295#ifdef __GNUC__
2296# define ASMAtomicUoWritePtr(ppv, pv) \
2297 do \
2298 { \
2299 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2300 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2301 \
2302 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2303 AssertCompile(sizeof(pv) == sizeof(void *)); \
2304 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2305 \
2306 *(ppvTypeChecked) = pvTypeChecked; \
2307 } while (0)
2308#else
2309# define ASMAtomicUoWritePtr(ppv, pv) \
2310 do \
2311 { \
2312 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2313 AssertCompile(sizeof(pv) == sizeof(void *)); \
2314 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2315 *(ppv) = pv; \
2316 } while (0)
2317#endif
2318
2319
2320/**
2321 * Atomically sets a pointer to NULL, unordered.
2322 *
2323 * @param ppv Pointer to the pointer variable that should be set to NULL.
2324 *
2325 * @remarks This is relatively type safe on GCC platforms.
2326 */
2327#ifdef __GNUC__
2328# define ASMAtomicUoWriteNullPtr(ppv) \
2329 do \
2330 { \
2331 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2332 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2333 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2334 *(ppvTypeChecked) = NULL; \
2335 } while (0)
2336#else
2337# define ASMAtomicUoWriteNullPtr(ppv) \
2338 do \
2339 { \
2340 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2341 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2342 *(ppv) = NULL; \
2343 } while (0)
2344#endif
2345
2346
2347/**
2348 * Atomically write a typical IPRT handle value, ordered.
2349 *
2350 * @param ph Pointer to the variable to update.
2351 * @param hNew The value to assign to *ph.
2352 *
2353 * @remarks This doesn't currently work for all handles (like RTFILE).
2354 */
2355#if HC_ARCH_BITS == 32
2356# define ASMAtomicWriteHandle(ph, hNew) \
2357 do { \
2358 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2359 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2360 } while (0)
2361#elif HC_ARCH_BITS == 64
2362# define ASMAtomicWriteHandle(ph, hNew) \
2363 do { \
2364 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2365 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2366 } while (0)
2367#else
2368# error HC_ARCH_BITS
2369#endif
2370
2371
2372/**
2373 * Atomically write a typical IPRT handle value, unordered.
2374 *
2375 * @param ph Pointer to the variable to update.
2376 * @param hNew The value to assign to *ph.
2377 *
2378 * @remarks This doesn't currently work for all handles (like RTFILE).
2379 */
2380#if HC_ARCH_BITS == 32
2381# define ASMAtomicUoWriteHandle(ph, hNew) \
2382 do { \
2383 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2384 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2385 } while (0)
2386#elif HC_ARCH_BITS == 64
2387# define ASMAtomicUoWriteHandle(ph, hNew) \
2388 do { \
2389 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2390 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2391 } while (0)
2392#else
2393# error HC_ARCH_BITS
2394#endif
2395
2396
2397/**
2398 * Atomically write a value which size might differ
2399 * between platforms or compilers, ordered.
2400 *
2401 * @param pu Pointer to the variable to update.
2402 * @param uNew The value to assign to *pu.
2403 */
2404#define ASMAtomicWriteSize(pu, uNew) \
2405 do { \
2406 switch (sizeof(*(pu))) { \
2407 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2408 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2409 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2410 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2411 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2412 } \
2413 } while (0)
2414
2415/**
2416 * Atomically write a value which size might differ
2417 * between platforms or compilers, unordered.
2418 *
2419 * @param pu Pointer to the variable to update.
2420 * @param uNew The value to assign to *pu.
2421 */
2422#define ASMAtomicUoWriteSize(pu, uNew) \
2423 do { \
2424 switch (sizeof(*(pu))) { \
2425 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2426 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2427 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2428 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2429 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2430 } \
2431 } while (0)
2432
2433
2434
2435/**
2436 * Atomically exchanges and adds to a 32-bit value, ordered.
2437 *
2438 * @returns The old value.
2439 * @param pu32 Pointer to the value.
2440 * @param u32 Number to add.
2441 */
2442#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2443DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2444#else
2445DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2446{
2447# if RT_INLINE_ASM_USES_INTRIN
2448 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2449 return u32;
2450
2451# elif RT_INLINE_ASM_GNU_STYLE
2452 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2453 : "=r" (u32),
2454 "=m" (*pu32)
2455 : "0" (u32),
2456 "m" (*pu32)
2457 : "memory");
2458 return u32;
2459# else
2460 __asm
2461 {
2462 mov eax, [u32]
2463# ifdef RT_ARCH_AMD64
2464 mov rdx, [pu32]
2465 lock xadd [rdx], eax
2466# else
2467 mov edx, [pu32]
2468 lock xadd [edx], eax
2469# endif
2470 mov [u32], eax
2471 }
2472 return u32;
2473# endif
2474}
2475#endif
2476
2477
2478/**
2479 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2480 *
2481 * @returns The old value.
2482 * @param pi32 Pointer to the value.
2483 * @param i32 Number to add.
2484 */
2485DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2486{
2487 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2488}
2489
2490
2491/**
2492 * Atomically exchanges and adds to a 64-bit value, ordered.
2493 *
2494 * @returns The old value.
2495 * @param pu64 Pointer to the value.
2496 * @param u64 Number to add.
2497 */
2498#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2499DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2500#else
2501DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2502{
2503# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2504 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2505 return u64;
2506
2507# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2508 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2509 : "=r" (u64),
2510 "=m" (*pu64)
2511 : "0" (u64),
2512 "m" (*pu64)
2513 : "memory");
2514 return u64;
2515# else
2516 uint64_t u64Old;
2517 for (;;)
2518 {
2519 uint64_t u64New;
2520 u64Old = ASMAtomicUoReadU64(pu64);
2521 u64New = u64Old + u64;
2522 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2523 break;
2524 ASMNopPause();
2525 }
2526 return u64Old;
2527# endif
2528}
2529#endif
2530
2531
2532/**
2533 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2534 *
2535 * @returns The old value.
2536 * @param pi64 Pointer to the value.
2537 * @param i64 Number to add.
2538 */
2539DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2540{
2541 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2542}
2543
2544
2545/**
2546 * Atomically exchanges and adds to a size_t value, ordered.
2547 *
2548 * @returns The old value.
2549 * @param pcb Pointer to the size_t value.
2550 * @param cb Number to add.
2551 */
2552DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2553{
2554#if ARCH_BITS == 64
2555 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2556#elif ARCH_BITS == 32
2557 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2558#else
2559# error "Unsupported ARCH_BITS value"
2560#endif
2561}
2562
2563
2564/**
2565 * Atomically exchanges and adds a value which size might differ between
2566 * platforms or compilers, ordered.
2567 *
2568 * @param pu Pointer to the variable to update.
2569 * @param uNew The value to add to *pu.
2570 * @param puOld Where to store the old value.
2571 */
2572#define ASMAtomicAddSize(pu, uNew, puOld) \
2573 do { \
2574 switch (sizeof(*(pu))) { \
2575 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2576 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2577 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2578 } \
2579 } while (0)
2580
2581
2582/**
2583 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2584 *
2585 * @returns The old value.
2586 * @param pu32 Pointer to the value.
2587 * @param u32 Number to subtract.
2588 */
2589DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2590{
2591 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2592}
2593
2594
2595/**
2596 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2597 *
2598 * @returns The old value.
2599 * @param pi32 Pointer to the value.
2600 * @param i32 Number to subtract.
2601 */
2602DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2603{
2604 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2605}
2606
2607
2608/**
2609 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2610 *
2611 * @returns The old value.
2612 * @param pu64 Pointer to the value.
2613 * @param u64 Number to subtract.
2614 */
2615DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2616{
2617 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2618}
2619
2620
2621/**
2622 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2623 *
2624 * @returns The old value.
2625 * @param pi64 Pointer to the value.
2626 * @param i64 Number to subtract.
2627 */
2628DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2629{
2630 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2631}
2632
2633
2634/**
2635 * Atomically exchanges and subtracts to a size_t value, ordered.
2636 *
2637 * @returns The old value.
2638 * @param pcb Pointer to the size_t value.
2639 * @param cb Number to subtract.
2640 */
2641DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2642{
2643#if ARCH_BITS == 64
2644 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2645#elif ARCH_BITS == 32
2646 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2647#else
2648# error "Unsupported ARCH_BITS value"
2649#endif
2650}
2651
2652
2653/**
2654 * Atomically exchanges and subtracts a value which size might differ between
2655 * platforms or compilers, ordered.
2656 *
2657 * @param pu Pointer to the variable to update.
2658 * @param uNew The value to subtract to *pu.
2659 * @param puOld Where to store the old value.
2660 */
2661#define ASMAtomicSubSize(pu, uNew, puOld) \
2662 do { \
2663 switch (sizeof(*(pu))) { \
2664 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2665 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2666 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2667 } \
2668 } while (0)
2669
2670
2671/**
2672 * Atomically increment a 32-bit value, ordered.
2673 *
2674 * @returns The new value.
2675 * @param pu32 Pointer to the value to increment.
2676 */
2677#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2678DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2679#else
2680DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2681{
2682 uint32_t u32;
2683# if RT_INLINE_ASM_USES_INTRIN
2684 u32 = _InterlockedIncrement((long *)pu32);
2685 return u32;
2686
2687# elif RT_INLINE_ASM_GNU_STYLE
2688 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2689 : "=r" (u32),
2690 "=m" (*pu32)
2691 : "0" (1),
2692 "m" (*pu32)
2693 : "memory");
2694 return u32+1;
2695# else
2696 __asm
2697 {
2698 mov eax, 1
2699# ifdef RT_ARCH_AMD64
2700 mov rdx, [pu32]
2701 lock xadd [rdx], eax
2702# else
2703 mov edx, [pu32]
2704 lock xadd [edx], eax
2705# endif
2706 mov u32, eax
2707 }
2708 return u32+1;
2709# endif
2710}
2711#endif
2712
2713
2714/**
2715 * Atomically increment a signed 32-bit value, ordered.
2716 *
2717 * @returns The new value.
2718 * @param pi32 Pointer to the value to increment.
2719 */
2720DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2721{
2722 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2723}
2724
2725
2726/**
2727 * Atomically increment a 64-bit value, ordered.
2728 *
2729 * @returns The new value.
2730 * @param pu64 Pointer to the value to increment.
2731 */
2732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2733DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2734#else
2735DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2736{
2737# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2738 uint64_t u64;
2739 u64 = _InterlockedIncrement64((__int64 *)pu64);
2740 return u64;
2741
2742# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2743 uint64_t u64;
2744 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2745 : "=r" (u64),
2746 "=m" (*pu64)
2747 : "0" (1),
2748 "m" (*pu64)
2749 : "memory");
2750 return u64 + 1;
2751# else
2752 return ASMAtomicAddU64(pu64, 1) + 1;
2753# endif
2754}
2755#endif
2756
2757
2758/**
2759 * Atomically increment a signed 64-bit value, ordered.
2760 *
2761 * @returns The new value.
2762 * @param pi64 Pointer to the value to increment.
2763 */
2764DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
2765{
2766 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
2767}
2768
2769
2770/**
2771 * Atomically increment a size_t value, ordered.
2772 *
2773 * @returns The new value.
2774 * @param pcb Pointer to the value to increment.
2775 */
2776DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
2777{
2778#if ARCH_BITS == 64
2779 return ASMAtomicIncU64((uint64_t volatile *)pcb);
2780#elif ARCH_BITS == 32
2781 return ASMAtomicIncU32((uint32_t volatile *)pcb);
2782#else
2783# error "Unsupported ARCH_BITS value"
2784#endif
2785}
2786
2787
2788/**
2789 * Atomically decrement an unsigned 32-bit value, ordered.
2790 *
2791 * @returns The new value.
2792 * @param pu32 Pointer to the value to decrement.
2793 */
2794#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2795DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2796#else
2797DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2798{
2799 uint32_t u32;
2800# if RT_INLINE_ASM_USES_INTRIN
2801 u32 = _InterlockedDecrement((long *)pu32);
2802 return u32;
2803
2804# elif RT_INLINE_ASM_GNU_STYLE
2805 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2806 : "=r" (u32),
2807 "=m" (*pu32)
2808 : "0" (-1),
2809 "m" (*pu32)
2810 : "memory");
2811 return u32-1;
2812# else
2813 __asm
2814 {
2815 mov eax, -1
2816# ifdef RT_ARCH_AMD64
2817 mov rdx, [pu32]
2818 lock xadd [rdx], eax
2819# else
2820 mov edx, [pu32]
2821 lock xadd [edx], eax
2822# endif
2823 mov u32, eax
2824 }
2825 return u32-1;
2826# endif
2827}
2828#endif
2829
2830
2831/**
2832 * Atomically decrement a signed 32-bit value, ordered.
2833 *
2834 * @returns The new value.
2835 * @param pi32 Pointer to the value to decrement.
2836 */
2837DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2838{
2839 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2840}
2841
2842
2843/**
2844 * Atomically decrement an unsigned 64-bit value, ordered.
2845 *
2846 * @returns The new value.
2847 * @param pu64 Pointer to the value to decrement.
2848 */
2849#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2850DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
2851#else
2852DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
2853{
2854# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2855 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
2856 return u64;
2857
2858# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2859 uint64_t u64;
2860 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
2861 : "=r" (u64),
2862 "=m" (*pu64)
2863 : "0" (~(uint64_t)0),
2864 "m" (*pu64)
2865 : "memory");
2866 return u64-1;
2867# else
2868 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
2869# endif
2870}
2871#endif
2872
2873
2874/**
2875 * Atomically decrement a signed 64-bit value, ordered.
2876 *
2877 * @returns The new value.
2878 * @param pi64 Pointer to the value to decrement.
2879 */
2880DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
2881{
2882 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
2883}
2884
2885
2886/**
2887 * Atomically decrement a size_t value, ordered.
2888 *
2889 * @returns The new value.
2890 * @param pcb Pointer to the value to decrement.
2891 */
2892DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
2893{
2894#if ARCH_BITS == 64
2895 return ASMAtomicDecU64((uint64_t volatile *)pcb);
2896#elif ARCH_BITS == 32
2897 return ASMAtomicDecU32((uint32_t volatile *)pcb);
2898#else
2899# error "Unsupported ARCH_BITS value"
2900#endif
2901}
2902
2903
2904/**
2905 * Atomically Or an unsigned 32-bit value, ordered.
2906 *
2907 * @param pu32 Pointer to the pointer variable to OR u32 with.
2908 * @param u32 The value to OR *pu32 with.
2909 */
2910#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2911DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2912#else
2913DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2914{
2915# if RT_INLINE_ASM_USES_INTRIN
2916 _InterlockedOr((long volatile *)pu32, (long)u32);
2917
2918# elif RT_INLINE_ASM_GNU_STYLE
2919 __asm__ __volatile__("lock; orl %1, %0\n\t"
2920 : "=m" (*pu32)
2921 : "ir" (u32),
2922 "m" (*pu32));
2923# else
2924 __asm
2925 {
2926 mov eax, [u32]
2927# ifdef RT_ARCH_AMD64
2928 mov rdx, [pu32]
2929 lock or [rdx], eax
2930# else
2931 mov edx, [pu32]
2932 lock or [edx], eax
2933# endif
2934 }
2935# endif
2936}
2937#endif
2938
2939
2940/**
2941 * Atomically Or a signed 32-bit value, ordered.
2942 *
2943 * @param pi32 Pointer to the pointer variable to OR u32 with.
2944 * @param i32 The value to OR *pu32 with.
2945 */
2946DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2947{
2948 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2949}
2950
2951
2952/**
2953 * Atomically Or an unsigned 64-bit value, ordered.
2954 *
2955 * @param pu64 Pointer to the pointer variable to OR u64 with.
2956 * @param u64 The value to OR *pu64 with.
2957 */
2958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2959DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
2960#else
2961DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
2962{
2963# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2964 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
2965
2966# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2967 __asm__ __volatile__("lock; orq %1, %q0\n\t"
2968 : "=m" (*pu64)
2969 : "r" (u64),
2970 "m" (*pu64));
2971# else
2972 for (;;)
2973 {
2974 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
2975 uint64_t u64New = u64Old | u64;
2976 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2977 break;
2978 ASMNopPause();
2979 }
2980# endif
2981}
2982#endif
2983
2984
2985/**
2986 * Atomically Or a signed 64-bit value, ordered.
2987 *
2988 * @param pi64 Pointer to the pointer variable to OR u64 with.
2989 * @param i64 The value to OR *pu64 with.
2990 */
2991DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
2992{
2993 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
2994}
2995
2996
2997/**
2998 * Atomically And an unsigned 32-bit value, ordered.
2999 *
3000 * @param pu32 Pointer to the pointer variable to AND u32 with.
3001 * @param u32 The value to AND *pu32 with.
3002 */
3003#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3004DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3005#else
3006DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3007{
3008# if RT_INLINE_ASM_USES_INTRIN
3009 _InterlockedAnd((long volatile *)pu32, u32);
3010
3011# elif RT_INLINE_ASM_GNU_STYLE
3012 __asm__ __volatile__("lock; andl %1, %0\n\t"
3013 : "=m" (*pu32)
3014 : "ir" (u32),
3015 "m" (*pu32));
3016# else
3017 __asm
3018 {
3019 mov eax, [u32]
3020# ifdef RT_ARCH_AMD64
3021 mov rdx, [pu32]
3022 lock and [rdx], eax
3023# else
3024 mov edx, [pu32]
3025 lock and [edx], eax
3026# endif
3027 }
3028# endif
3029}
3030#endif
3031
3032
3033/**
3034 * Atomically And a signed 32-bit value, ordered.
3035 *
3036 * @param pi32 Pointer to the pointer variable to AND i32 with.
3037 * @param i32 The value to AND *pi32 with.
3038 */
3039DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3040{
3041 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3042}
3043
3044
3045/**
3046 * Atomically And an unsigned 64-bit value, ordered.
3047 *
3048 * @param pu64 Pointer to the pointer variable to AND u64 with.
3049 * @param u64 The value to AND *pu64 with.
3050 */
3051#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3052DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3053#else
3054DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3055{
3056# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3057 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3058
3059# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3060 __asm__ __volatile__("lock; andq %1, %0\n\t"
3061 : "=m" (*pu64)
3062 : "r" (u64),
3063 "m" (*pu64));
3064# else
3065 for (;;)
3066 {
3067 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3068 uint64_t u64New = u64Old & u64;
3069 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3070 break;
3071 ASMNopPause();
3072 }
3073# endif
3074}
3075#endif
3076
3077
3078/**
3079 * Atomically And a signed 64-bit value, ordered.
3080 *
3081 * @param pi64 Pointer to the pointer variable to AND i64 with.
3082 * @param i64 The value to AND *pi64 with.
3083 */
3084DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3085{
3086 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3087}
3088
3089
3090/**
3091 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3092 *
3093 * @param pu32 Pointer to the pointer variable to OR u32 with.
3094 * @param u32 The value to OR *pu32 with.
3095 */
3096#if RT_INLINE_ASM_EXTERNAL
3097DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3098#else
3099DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3100{
3101# if RT_INLINE_ASM_GNU_STYLE
3102 __asm__ __volatile__("orl %1, %0\n\t"
3103 : "=m" (*pu32)
3104 : "ir" (u32),
3105 "m" (*pu32));
3106# else
3107 __asm
3108 {
3109 mov eax, [u32]
3110# ifdef RT_ARCH_AMD64
3111 mov rdx, [pu32]
3112 or [rdx], eax
3113# else
3114 mov edx, [pu32]
3115 or [edx], eax
3116# endif
3117 }
3118# endif
3119}
3120#endif
3121
3122
3123/**
3124 * Atomically OR a signed 32-bit value, unordered.
3125 *
3126 * @param pi32 Pointer to the pointer variable to OR u32 with.
3127 * @param i32 The value to OR *pu32 with.
3128 */
3129DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3130{
3131 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3132}
3133
3134
3135/**
3136 * Atomically OR an unsigned 64-bit value, unordered.
3137 *
3138 * @param pu64 Pointer to the pointer variable to OR u64 with.
3139 * @param u64 The value to OR *pu64 with.
3140 */
3141#if RT_INLINE_ASM_EXTERNAL
3142DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3143#else
3144DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3145{
3146# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3147 __asm__ __volatile__("orq %1, %q0\n\t"
3148 : "=m" (*pu64)
3149 : "r" (u64),
3150 "m" (*pu64));
3151# else
3152 for (;;)
3153 {
3154 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3155 uint64_t u64New = u64Old | u64;
3156 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3157 break;
3158 ASMNopPause();
3159 }
3160# endif
3161}
3162#endif
3163
3164
3165/**
3166 * Atomically Or a signed 64-bit value, unordered.
3167 *
3168 * @param pi64 Pointer to the pointer variable to OR u64 with.
3169 * @param i64 The value to OR *pu64 with.
3170 */
3171DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3172{
3173 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3174}
3175
3176
3177/**
3178 * Atomically And an unsigned 32-bit value, unordered.
3179 *
3180 * @param pu32 Pointer to the pointer variable to AND u32 with.
3181 * @param u32 The value to AND *pu32 with.
3182 */
3183#if RT_INLINE_ASM_EXTERNAL
3184DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3185#else
3186DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3187{
3188# if RT_INLINE_ASM_GNU_STYLE
3189 __asm__ __volatile__("andl %1, %0\n\t"
3190 : "=m" (*pu32)
3191 : "ir" (u32),
3192 "m" (*pu32));
3193# else
3194 __asm
3195 {
3196 mov eax, [u32]
3197# ifdef RT_ARCH_AMD64
3198 mov rdx, [pu32]
3199 and [rdx], eax
3200# else
3201 mov edx, [pu32]
3202 and [edx], eax
3203# endif
3204 }
3205# endif
3206}
3207#endif
3208
3209
3210/**
3211 * Atomically And a signed 32-bit value, unordered.
3212 *
3213 * @param pi32 Pointer to the pointer variable to AND i32 with.
3214 * @param i32 The value to AND *pi32 with.
3215 */
3216DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3217{
3218 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3219}
3220
3221
3222/**
3223 * Atomically And an unsigned 64-bit value, unordered.
3224 *
3225 * @param pu64 Pointer to the pointer variable to AND u64 with.
3226 * @param u64 The value to AND *pu64 with.
3227 */
3228#if RT_INLINE_ASM_EXTERNAL
3229DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3230#else
3231DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3232{
3233# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3234 __asm__ __volatile__("andq %1, %0\n\t"
3235 : "=m" (*pu64)
3236 : "r" (u64),
3237 "m" (*pu64));
3238# else
3239 for (;;)
3240 {
3241 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3242 uint64_t u64New = u64Old & u64;
3243 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3244 break;
3245 ASMNopPause();
3246 }
3247# endif
3248}
3249#endif
3250
3251
3252/**
3253 * Atomically And a signed 64-bit value, unordered.
3254 *
3255 * @param pi64 Pointer to the pointer variable to AND i64 with.
3256 * @param i64 The value to AND *pi64 with.
3257 */
3258DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3259{
3260 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3261}
3262
3263
3264
3265/** @def RT_ASM_PAGE_SIZE
3266 * We try avoid dragging in iprt/param.h here.
3267 * @internal
3268 */
3269#if defined(RT_ARCH_SPARC64)
3270# define RT_ASM_PAGE_SIZE 0x2000
3271# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3272# if PAGE_SIZE != 0x2000
3273# error "PAGE_SIZE is not 0x2000!"
3274# endif
3275# endif
3276#else
3277# define RT_ASM_PAGE_SIZE 0x1000
3278# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3279# if PAGE_SIZE != 0x1000
3280# error "PAGE_SIZE is not 0x1000!"
3281# endif
3282# endif
3283#endif
3284
3285/**
3286 * Zeros a 4K memory page.
3287 *
3288 * @param pv Pointer to the memory block. This must be page aligned.
3289 */
3290#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3291DECLASM(void) ASMMemZeroPage(volatile void *pv);
3292# else
3293DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3294{
3295# if RT_INLINE_ASM_USES_INTRIN
3296# ifdef RT_ARCH_AMD64
3297 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3298# else
3299 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3300# endif
3301
3302# elif RT_INLINE_ASM_GNU_STYLE
3303 RTCCUINTREG uDummy;
3304# ifdef RT_ARCH_AMD64
3305 __asm__ __volatile__("rep stosq"
3306 : "=D" (pv),
3307 "=c" (uDummy)
3308 : "0" (pv),
3309 "c" (RT_ASM_PAGE_SIZE >> 3),
3310 "a" (0)
3311 : "memory");
3312# else
3313 __asm__ __volatile__("rep stosl"
3314 : "=D" (pv),
3315 "=c" (uDummy)
3316 : "0" (pv),
3317 "c" (RT_ASM_PAGE_SIZE >> 2),
3318 "a" (0)
3319 : "memory");
3320# endif
3321# else
3322 __asm
3323 {
3324# ifdef RT_ARCH_AMD64
3325 xor rax, rax
3326 mov ecx, 0200h
3327 mov rdi, [pv]
3328 rep stosq
3329# else
3330 xor eax, eax
3331 mov ecx, 0400h
3332 mov edi, [pv]
3333 rep stosd
3334# endif
3335 }
3336# endif
3337}
3338# endif
3339
3340
3341/**
3342 * Zeros a memory block with a 32-bit aligned size.
3343 *
3344 * @param pv Pointer to the memory block.
3345 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3346 */
3347#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3348DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3349#else
3350DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3351{
3352# if RT_INLINE_ASM_USES_INTRIN
3353# ifdef RT_ARCH_AMD64
3354 if (!(cb & 7))
3355 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3356 else
3357# endif
3358 __stosd((unsigned long *)pv, 0, cb / 4);
3359
3360# elif RT_INLINE_ASM_GNU_STYLE
3361 __asm__ __volatile__("rep stosl"
3362 : "=D" (pv),
3363 "=c" (cb)
3364 : "0" (pv),
3365 "1" (cb >> 2),
3366 "a" (0)
3367 : "memory");
3368# else
3369 __asm
3370 {
3371 xor eax, eax
3372# ifdef RT_ARCH_AMD64
3373 mov rcx, [cb]
3374 shr rcx, 2
3375 mov rdi, [pv]
3376# else
3377 mov ecx, [cb]
3378 shr ecx, 2
3379 mov edi, [pv]
3380# endif
3381 rep stosd
3382 }
3383# endif
3384}
3385#endif
3386
3387
3388/**
3389 * Fills a memory block with a 32-bit aligned size.
3390 *
3391 * @param pv Pointer to the memory block.
3392 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3393 * @param u32 The value to fill with.
3394 */
3395#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3396DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3397#else
3398DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3399{
3400# if RT_INLINE_ASM_USES_INTRIN
3401# ifdef RT_ARCH_AMD64
3402 if (!(cb & 7))
3403 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3404 else
3405# endif
3406 __stosd((unsigned long *)pv, u32, cb / 4);
3407
3408# elif RT_INLINE_ASM_GNU_STYLE
3409 __asm__ __volatile__("rep stosl"
3410 : "=D" (pv),
3411 "=c" (cb)
3412 : "0" (pv),
3413 "1" (cb >> 2),
3414 "a" (u32)
3415 : "memory");
3416# else
3417 __asm
3418 {
3419# ifdef RT_ARCH_AMD64
3420 mov rcx, [cb]
3421 shr rcx, 2
3422 mov rdi, [pv]
3423# else
3424 mov ecx, [cb]
3425 shr ecx, 2
3426 mov edi, [pv]
3427# endif
3428 mov eax, [u32]
3429 rep stosd
3430 }
3431# endif
3432}
3433#endif
3434
3435
3436/**
3437 * Checks if a memory page is all zeros.
3438 *
3439 * @returns true / false.
3440 *
3441 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3442 * boundary
3443 */
3444DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3445{
3446# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3447 union { RTCCUINTREG r; bool f; } uAX;
3448 RTCCUINTREG xCX, xDI;
3449 Assert(!((uintptr_t)pvPage & 15));
3450 __asm__ __volatile__("repe; "
3451# ifdef RT_ARCH_AMD64
3452 "scasq\n\t"
3453# else
3454 "scasl\n\t"
3455# endif
3456 "setnc %%al\n\t"
3457 : "=&c" (xCX),
3458 "=&D" (xDI),
3459 "=&a" (uAX.r)
3460 : "mr" (pvPage),
3461# ifdef RT_ARCH_AMD64
3462 "0" (RT_ASM_PAGE_SIZE/8),
3463# else
3464 "0" (RT_ASM_PAGE_SIZE/4),
3465# endif
3466 "1" (pvPage),
3467 "2" (0));
3468 return uAX.f;
3469# else
3470 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3471 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3472 Assert(!((uintptr_t)pvPage & 15));
3473 for (;;)
3474 {
3475 if (puPtr[0]) return false;
3476 if (puPtr[4]) return false;
3477
3478 if (puPtr[2]) return false;
3479 if (puPtr[6]) return false;
3480
3481 if (puPtr[1]) return false;
3482 if (puPtr[5]) return false;
3483
3484 if (puPtr[3]) return false;
3485 if (puPtr[7]) return false;
3486
3487 if (!--cLeft)
3488 return true;
3489 puPtr += 8;
3490 }
3491 return true;
3492# endif
3493}
3494
3495
3496/**
3497 * Checks if a memory block is filled with the specified byte.
3498 *
3499 * This is a sort of inverted memchr.
3500 *
3501 * @returns Pointer to the byte which doesn't equal u8.
3502 * @returns NULL if all equal to u8.
3503 *
3504 * @param pv Pointer to the memory block.
3505 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3506 * @param u8 The value it's supposed to be filled with.
3507 *
3508 * @todo Fix name, it is a predicate function but it's not returning boolean!
3509 */
3510DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3511{
3512/** @todo rewrite this in inline assembly? */
3513 uint8_t const *pb = (uint8_t const *)pv;
3514 for (; cb; cb--, pb++)
3515 if (RT_UNLIKELY(*pb != u8))
3516 return (void *)pb;
3517 return NULL;
3518}
3519
3520
3521/**
3522 * Checks if a memory block is filled with the specified 32-bit value.
3523 *
3524 * This is a sort of inverted memchr.
3525 *
3526 * @returns Pointer to the first value which doesn't equal u32.
3527 * @returns NULL if all equal to u32.
3528 *
3529 * @param pv Pointer to the memory block.
3530 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3531 * @param u32 The value it's supposed to be filled with.
3532 *
3533 * @todo Fix name, it is a predicate function but it's not returning boolean!
3534 */
3535DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3536{
3537/** @todo rewrite this in inline assembly? */
3538 uint32_t const *pu32 = (uint32_t const *)pv;
3539 for (; cb; cb -= 4, pu32++)
3540 if (RT_UNLIKELY(*pu32 != u32))
3541 return (uint32_t *)pu32;
3542 return NULL;
3543}
3544
3545
3546/**
3547 * Probes a byte pointer for read access.
3548 *
3549 * While the function will not fault if the byte is not read accessible,
3550 * the idea is to do this in a safe place like before acquiring locks
3551 * and such like.
3552 *
3553 * Also, this functions guarantees that an eager compiler is not going
3554 * to optimize the probing away.
3555 *
3556 * @param pvByte Pointer to the byte.
3557 */
3558#if RT_INLINE_ASM_EXTERNAL
3559DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3560#else
3561DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3562{
3563 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3564 uint8_t u8;
3565# if RT_INLINE_ASM_GNU_STYLE
3566 __asm__ __volatile__("movb (%1), %0\n\t"
3567 : "=r" (u8)
3568 : "r" (pvByte));
3569# else
3570 __asm
3571 {
3572# ifdef RT_ARCH_AMD64
3573 mov rax, [pvByte]
3574 mov al, [rax]
3575# else
3576 mov eax, [pvByte]
3577 mov al, [eax]
3578# endif
3579 mov [u8], al
3580 }
3581# endif
3582 return u8;
3583}
3584#endif
3585
3586/**
3587 * Probes a buffer for read access page by page.
3588 *
3589 * While the function will fault if the buffer is not fully read
3590 * accessible, the idea is to do this in a safe place like before
3591 * acquiring locks and such like.
3592 *
3593 * Also, this functions guarantees that an eager compiler is not going
3594 * to optimize the probing away.
3595 *
3596 * @param pvBuf Pointer to the buffer.
3597 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3598 */
3599DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3600{
3601 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3602 /* the first byte */
3603 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3604 ASMProbeReadByte(pu8);
3605
3606 /* the pages in between pages. */
3607 while (cbBuf > RT_ASM_PAGE_SIZE)
3608 {
3609 ASMProbeReadByte(pu8);
3610 cbBuf -= RT_ASM_PAGE_SIZE;
3611 pu8 += RT_ASM_PAGE_SIZE;
3612 }
3613
3614 /* the last byte */
3615 ASMProbeReadByte(pu8 + cbBuf - 1);
3616}
3617
3618
3619
3620/** @defgroup grp_inline_bits Bit Operations
3621 * @{
3622 */
3623
3624
3625/**
3626 * Sets a bit in a bitmap.
3627 *
3628 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
3629 * @param iBit The bit to set.
3630 *
3631 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3632 * However, doing so will yield better performance as well as avoiding
3633 * traps accessing the last bits in the bitmap.
3634 */
3635#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3636DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3637#else
3638DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3639{
3640# if RT_INLINE_ASM_USES_INTRIN
3641 _bittestandset((long *)pvBitmap, iBit);
3642
3643# elif RT_INLINE_ASM_GNU_STYLE
3644 __asm__ __volatile__("btsl %1, %0"
3645 : "=m" (*(volatile long *)pvBitmap)
3646 : "Ir" (iBit),
3647 "m" (*(volatile long *)pvBitmap)
3648 : "memory");
3649# else
3650 __asm
3651 {
3652# ifdef RT_ARCH_AMD64
3653 mov rax, [pvBitmap]
3654 mov edx, [iBit]
3655 bts [rax], edx
3656# else
3657 mov eax, [pvBitmap]
3658 mov edx, [iBit]
3659 bts [eax], edx
3660# endif
3661 }
3662# endif
3663}
3664#endif
3665
3666
3667/**
3668 * Atomically sets a bit in a bitmap, ordered.
3669 *
3670 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3671 * the memory access isn't atomic!
3672 * @param iBit The bit to set.
3673 */
3674#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3675DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3676#else
3677DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3678{
3679 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3680# if RT_INLINE_ASM_USES_INTRIN
3681 _interlockedbittestandset((long *)pvBitmap, iBit);
3682# elif RT_INLINE_ASM_GNU_STYLE
3683 __asm__ __volatile__("lock; btsl %1, %0"
3684 : "=m" (*(volatile long *)pvBitmap)
3685 : "Ir" (iBit),
3686 "m" (*(volatile long *)pvBitmap)
3687 : "memory");
3688# else
3689 __asm
3690 {
3691# ifdef RT_ARCH_AMD64
3692 mov rax, [pvBitmap]
3693 mov edx, [iBit]
3694 lock bts [rax], edx
3695# else
3696 mov eax, [pvBitmap]
3697 mov edx, [iBit]
3698 lock bts [eax], edx
3699# endif
3700 }
3701# endif
3702}
3703#endif
3704
3705
3706/**
3707 * Clears a bit in a bitmap.
3708 *
3709 * @param pvBitmap Pointer to the bitmap.
3710 * @param iBit The bit to clear.
3711 *
3712 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3713 * However, doing so will yield better performance as well as avoiding
3714 * traps accessing the last bits in the bitmap.
3715 */
3716#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3717DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3718#else
3719DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3720{
3721# if RT_INLINE_ASM_USES_INTRIN
3722 _bittestandreset((long *)pvBitmap, iBit);
3723
3724# elif RT_INLINE_ASM_GNU_STYLE
3725 __asm__ __volatile__("btrl %1, %0"
3726 : "=m" (*(volatile long *)pvBitmap)
3727 : "Ir" (iBit),
3728 "m" (*(volatile long *)pvBitmap)
3729 : "memory");
3730# else
3731 __asm
3732 {
3733# ifdef RT_ARCH_AMD64
3734 mov rax, [pvBitmap]
3735 mov edx, [iBit]
3736 btr [rax], edx
3737# else
3738 mov eax, [pvBitmap]
3739 mov edx, [iBit]
3740 btr [eax], edx
3741# endif
3742 }
3743# endif
3744}
3745#endif
3746
3747
3748/**
3749 * Atomically clears a bit in a bitmap, ordered.
3750 *
3751 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3752 * the memory access isn't atomic!
3753 * @param iBit The bit to toggle set.
3754 * @remarks No memory barrier, take care on smp.
3755 */
3756#if RT_INLINE_ASM_EXTERNAL
3757DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3758#else
3759DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3760{
3761 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3762# if RT_INLINE_ASM_GNU_STYLE
3763 __asm__ __volatile__("lock; btrl %1, %0"
3764 : "=m" (*(volatile long *)pvBitmap)
3765 : "Ir" (iBit),
3766 "m" (*(volatile long *)pvBitmap)
3767 : "memory");
3768# else
3769 __asm
3770 {
3771# ifdef RT_ARCH_AMD64
3772 mov rax, [pvBitmap]
3773 mov edx, [iBit]
3774 lock btr [rax], edx
3775# else
3776 mov eax, [pvBitmap]
3777 mov edx, [iBit]
3778 lock btr [eax], edx
3779# endif
3780 }
3781# endif
3782}
3783#endif
3784
3785
3786/**
3787 * Toggles a bit in a bitmap.
3788 *
3789 * @param pvBitmap Pointer to the bitmap.
3790 * @param iBit The bit to toggle.
3791 *
3792 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3793 * However, doing so will yield better performance as well as avoiding
3794 * traps accessing the last bits in the bitmap.
3795 */
3796#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3797DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3798#else
3799DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3800{
3801# if RT_INLINE_ASM_USES_INTRIN
3802 _bittestandcomplement((long *)pvBitmap, iBit);
3803# elif RT_INLINE_ASM_GNU_STYLE
3804 __asm__ __volatile__("btcl %1, %0"
3805 : "=m" (*(volatile long *)pvBitmap)
3806 : "Ir" (iBit),
3807 "m" (*(volatile long *)pvBitmap)
3808 : "memory");
3809# else
3810 __asm
3811 {
3812# ifdef RT_ARCH_AMD64
3813 mov rax, [pvBitmap]
3814 mov edx, [iBit]
3815 btc [rax], edx
3816# else
3817 mov eax, [pvBitmap]
3818 mov edx, [iBit]
3819 btc [eax], edx
3820# endif
3821 }
3822# endif
3823}
3824#endif
3825
3826
3827/**
3828 * Atomically toggles a bit in a bitmap, ordered.
3829 *
3830 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3831 * the memory access isn't atomic!
3832 * @param iBit The bit to test and set.
3833 */
3834#if RT_INLINE_ASM_EXTERNAL
3835DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3836#else
3837DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3838{
3839 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3840# if RT_INLINE_ASM_GNU_STYLE
3841 __asm__ __volatile__("lock; btcl %1, %0"
3842 : "=m" (*(volatile long *)pvBitmap)
3843 : "Ir" (iBit),
3844 "m" (*(volatile long *)pvBitmap)
3845 : "memory");
3846# else
3847 __asm
3848 {
3849# ifdef RT_ARCH_AMD64
3850 mov rax, [pvBitmap]
3851 mov edx, [iBit]
3852 lock btc [rax], edx
3853# else
3854 mov eax, [pvBitmap]
3855 mov edx, [iBit]
3856 lock btc [eax], edx
3857# endif
3858 }
3859# endif
3860}
3861#endif
3862
3863
3864/**
3865 * Tests and sets a bit in a bitmap.
3866 *
3867 * @returns true if the bit was set.
3868 * @returns false if the bit was clear.
3869 *
3870 * @param pvBitmap Pointer to the bitmap.
3871 * @param iBit The bit to test and set.
3872 *
3873 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3874 * However, doing so will yield better performance as well as avoiding
3875 * traps accessing the last bits in the bitmap.
3876 */
3877#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3878DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3879#else
3880DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3881{
3882 union { bool f; uint32_t u32; uint8_t u8; } rc;
3883# if RT_INLINE_ASM_USES_INTRIN
3884 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3885
3886# elif RT_INLINE_ASM_GNU_STYLE
3887 __asm__ __volatile__("btsl %2, %1\n\t"
3888 "setc %b0\n\t"
3889 "andl $1, %0\n\t"
3890 : "=q" (rc.u32),
3891 "=m" (*(volatile long *)pvBitmap)
3892 : "Ir" (iBit),
3893 "m" (*(volatile long *)pvBitmap)
3894 : "memory");
3895# else
3896 __asm
3897 {
3898 mov edx, [iBit]
3899# ifdef RT_ARCH_AMD64
3900 mov rax, [pvBitmap]
3901 bts [rax], edx
3902# else
3903 mov eax, [pvBitmap]
3904 bts [eax], edx
3905# endif
3906 setc al
3907 and eax, 1
3908 mov [rc.u32], eax
3909 }
3910# endif
3911 return rc.f;
3912}
3913#endif
3914
3915
3916/**
3917 * Atomically tests and sets a bit in a bitmap, ordered.
3918 *
3919 * @returns true if the bit was set.
3920 * @returns false if the bit was clear.
3921 *
3922 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3923 * the memory access isn't atomic!
3924 * @param iBit The bit to set.
3925 */
3926#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3927DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3928#else
3929DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3930{
3931 union { bool f; uint32_t u32; uint8_t u8; } rc;
3932 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3933# if RT_INLINE_ASM_USES_INTRIN
3934 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3935# elif RT_INLINE_ASM_GNU_STYLE
3936 __asm__ __volatile__("lock; btsl %2, %1\n\t"
3937 "setc %b0\n\t"
3938 "andl $1, %0\n\t"
3939 : "=q" (rc.u32),
3940 "=m" (*(volatile long *)pvBitmap)
3941 : "Ir" (iBit),
3942 "m" (*(volatile long *)pvBitmap)
3943 : "memory");
3944# else
3945 __asm
3946 {
3947 mov edx, [iBit]
3948# ifdef RT_ARCH_AMD64
3949 mov rax, [pvBitmap]
3950 lock bts [rax], edx
3951# else
3952 mov eax, [pvBitmap]
3953 lock bts [eax], edx
3954# endif
3955 setc al
3956 and eax, 1
3957 mov [rc.u32], eax
3958 }
3959# endif
3960 return rc.f;
3961}
3962#endif
3963
3964
3965/**
3966 * Tests and clears a bit in a bitmap.
3967 *
3968 * @returns true if the bit was set.
3969 * @returns false if the bit was clear.
3970 *
3971 * @param pvBitmap Pointer to the bitmap.
3972 * @param iBit The bit to test and clear.
3973 *
3974 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3975 * However, doing so will yield better performance as well as avoiding
3976 * traps accessing the last bits in the bitmap.
3977 */
3978#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3979DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3980#else
3981DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3982{
3983 union { bool f; uint32_t u32; uint8_t u8; } rc;
3984# if RT_INLINE_ASM_USES_INTRIN
3985 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3986
3987# elif RT_INLINE_ASM_GNU_STYLE
3988 __asm__ __volatile__("btrl %2, %1\n\t"
3989 "setc %b0\n\t"
3990 "andl $1, %0\n\t"
3991 : "=q" (rc.u32),
3992 "=m" (*(volatile long *)pvBitmap)
3993 : "Ir" (iBit),
3994 "m" (*(volatile long *)pvBitmap)
3995 : "memory");
3996# else
3997 __asm
3998 {
3999 mov edx, [iBit]
4000# ifdef RT_ARCH_AMD64
4001 mov rax, [pvBitmap]
4002 btr [rax], edx
4003# else
4004 mov eax, [pvBitmap]
4005 btr [eax], edx
4006# endif
4007 setc al
4008 and eax, 1
4009 mov [rc.u32], eax
4010 }
4011# endif
4012 return rc.f;
4013}
4014#endif
4015
4016
4017/**
4018 * Atomically tests and clears a bit in a bitmap, ordered.
4019 *
4020 * @returns true if the bit was set.
4021 * @returns false if the bit was clear.
4022 *
4023 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4024 * the memory access isn't atomic!
4025 * @param iBit The bit to test and clear.
4026 *
4027 * @remarks No memory barrier, take care on smp.
4028 */
4029#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4030DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4031#else
4032DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4033{
4034 union { bool f; uint32_t u32; uint8_t u8; } rc;
4035 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4036# if RT_INLINE_ASM_USES_INTRIN
4037 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4038
4039# elif RT_INLINE_ASM_GNU_STYLE
4040 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4041 "setc %b0\n\t"
4042 "andl $1, %0\n\t"
4043 : "=q" (rc.u32),
4044 "=m" (*(volatile long *)pvBitmap)
4045 : "Ir" (iBit),
4046 "m" (*(volatile long *)pvBitmap)
4047 : "memory");
4048# else
4049 __asm
4050 {
4051 mov edx, [iBit]
4052# ifdef RT_ARCH_AMD64
4053 mov rax, [pvBitmap]
4054 lock btr [rax], edx
4055# else
4056 mov eax, [pvBitmap]
4057 lock btr [eax], edx
4058# endif
4059 setc al
4060 and eax, 1
4061 mov [rc.u32], eax
4062 }
4063# endif
4064 return rc.f;
4065}
4066#endif
4067
4068
4069/**
4070 * Tests and toggles a bit in a bitmap.
4071 *
4072 * @returns true if the bit was set.
4073 * @returns false if the bit was clear.
4074 *
4075 * @param pvBitmap Pointer to the bitmap.
4076 * @param iBit The bit to test and toggle.
4077 *
4078 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4079 * However, doing so will yield better performance as well as avoiding
4080 * traps accessing the last bits in the bitmap.
4081 */
4082#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4083DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4084#else
4085DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4086{
4087 union { bool f; uint32_t u32; uint8_t u8; } rc;
4088# if RT_INLINE_ASM_USES_INTRIN
4089 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4090
4091# elif RT_INLINE_ASM_GNU_STYLE
4092 __asm__ __volatile__("btcl %2, %1\n\t"
4093 "setc %b0\n\t"
4094 "andl $1, %0\n\t"
4095 : "=q" (rc.u32),
4096 "=m" (*(volatile long *)pvBitmap)
4097 : "Ir" (iBit),
4098 "m" (*(volatile long *)pvBitmap)
4099 : "memory");
4100# else
4101 __asm
4102 {
4103 mov edx, [iBit]
4104# ifdef RT_ARCH_AMD64
4105 mov rax, [pvBitmap]
4106 btc [rax], edx
4107# else
4108 mov eax, [pvBitmap]
4109 btc [eax], edx
4110# endif
4111 setc al
4112 and eax, 1
4113 mov [rc.u32], eax
4114 }
4115# endif
4116 return rc.f;
4117}
4118#endif
4119
4120
4121/**
4122 * Atomically tests and toggles a bit in a bitmap, ordered.
4123 *
4124 * @returns true if the bit was set.
4125 * @returns false if the bit was clear.
4126 *
4127 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4128 * the memory access isn't atomic!
4129 * @param iBit The bit to test and toggle.
4130 */
4131#if RT_INLINE_ASM_EXTERNAL
4132DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4133#else
4134DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4135{
4136 union { bool f; uint32_t u32; uint8_t u8; } rc;
4137 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4138# if RT_INLINE_ASM_GNU_STYLE
4139 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4140 "setc %b0\n\t"
4141 "andl $1, %0\n\t"
4142 : "=q" (rc.u32),
4143 "=m" (*(volatile long *)pvBitmap)
4144 : "Ir" (iBit),
4145 "m" (*(volatile long *)pvBitmap)
4146 : "memory");
4147# else
4148 __asm
4149 {
4150 mov edx, [iBit]
4151# ifdef RT_ARCH_AMD64
4152 mov rax, [pvBitmap]
4153 lock btc [rax], edx
4154# else
4155 mov eax, [pvBitmap]
4156 lock btc [eax], edx
4157# endif
4158 setc al
4159 and eax, 1
4160 mov [rc.u32], eax
4161 }
4162# endif
4163 return rc.f;
4164}
4165#endif
4166
4167
4168/**
4169 * Tests if a bit in a bitmap is set.
4170 *
4171 * @returns true if the bit is set.
4172 * @returns false if the bit is clear.
4173 *
4174 * @param pvBitmap Pointer to the bitmap.
4175 * @param iBit The bit to test.
4176 *
4177 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4178 * However, doing so will yield better performance as well as avoiding
4179 * traps accessing the last bits in the bitmap.
4180 */
4181#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4182DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4183#else
4184DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4185{
4186 union { bool f; uint32_t u32; uint8_t u8; } rc;
4187# if RT_INLINE_ASM_USES_INTRIN
4188 rc.u32 = _bittest((long *)pvBitmap, iBit);
4189# elif RT_INLINE_ASM_GNU_STYLE
4190
4191 __asm__ __volatile__("btl %2, %1\n\t"
4192 "setc %b0\n\t"
4193 "andl $1, %0\n\t"
4194 : "=q" (rc.u32)
4195 : "m" (*(const volatile long *)pvBitmap),
4196 "Ir" (iBit)
4197 : "memory");
4198# else
4199 __asm
4200 {
4201 mov edx, [iBit]
4202# ifdef RT_ARCH_AMD64
4203 mov rax, [pvBitmap]
4204 bt [rax], edx
4205# else
4206 mov eax, [pvBitmap]
4207 bt [eax], edx
4208# endif
4209 setc al
4210 and eax, 1
4211 mov [rc.u32], eax
4212 }
4213# endif
4214 return rc.f;
4215}
4216#endif
4217
4218
4219/**
4220 * Clears a bit range within a bitmap.
4221 *
4222 * @param pvBitmap Pointer to the bitmap.
4223 * @param iBitStart The First bit to clear.
4224 * @param iBitEnd The first bit not to clear.
4225 */
4226DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4227{
4228 if (iBitStart < iBitEnd)
4229 {
4230 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4231 int iStart = iBitStart & ~31;
4232 int iEnd = iBitEnd & ~31;
4233 if (iStart == iEnd)
4234 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4235 else
4236 {
4237 /* bits in first dword. */
4238 if (iBitStart & 31)
4239 {
4240 *pu32 &= (1 << (iBitStart & 31)) - 1;
4241 pu32++;
4242 iBitStart = iStart + 32;
4243 }
4244
4245 /* whole dword. */
4246 if (iBitStart != iEnd)
4247 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4248
4249 /* bits in last dword. */
4250 if (iBitEnd & 31)
4251 {
4252 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4253 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4254 }
4255 }
4256 }
4257}
4258
4259
4260/**
4261 * Sets a bit range within a bitmap.
4262 *
4263 * @param pvBitmap Pointer to the bitmap.
4264 * @param iBitStart The First bit to set.
4265 * @param iBitEnd The first bit not to set.
4266 */
4267DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4268{
4269 if (iBitStart < iBitEnd)
4270 {
4271 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4272 int iStart = iBitStart & ~31;
4273 int iEnd = iBitEnd & ~31;
4274 if (iStart == iEnd)
4275 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
4276 else
4277 {
4278 /* bits in first dword. */
4279 if (iBitStart & 31)
4280 {
4281 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
4282 pu32++;
4283 iBitStart = iStart + 32;
4284 }
4285
4286 /* whole dword. */
4287 if (iBitStart != iEnd)
4288 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4289
4290 /* bits in last dword. */
4291 if (iBitEnd & 31)
4292 {
4293 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4294 *pu32 |= (1 << (iBitEnd & 31)) - 1;
4295 }
4296 }
4297 }
4298}
4299
4300
4301/**
4302 * Finds the first clear bit in a bitmap.
4303 *
4304 * @returns Index of the first zero bit.
4305 * @returns -1 if no clear bit was found.
4306 * @param pvBitmap Pointer to the bitmap.
4307 * @param cBits The number of bits in the bitmap. Multiple of 32.
4308 */
4309#if RT_INLINE_ASM_EXTERNAL
4310DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4311#else
4312DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4313{
4314 if (cBits)
4315 {
4316 int32_t iBit;
4317# if RT_INLINE_ASM_GNU_STYLE
4318 RTCCUINTREG uEAX, uECX, uEDI;
4319 cBits = RT_ALIGN_32(cBits, 32);
4320 __asm__ __volatile__("repe; scasl\n\t"
4321 "je 1f\n\t"
4322# ifdef RT_ARCH_AMD64
4323 "lea -4(%%rdi), %%rdi\n\t"
4324 "xorl (%%rdi), %%eax\n\t"
4325 "subq %5, %%rdi\n\t"
4326# else
4327 "lea -4(%%edi), %%edi\n\t"
4328 "xorl (%%edi), %%eax\n\t"
4329 "subl %5, %%edi\n\t"
4330# endif
4331 "shll $3, %%edi\n\t"
4332 "bsfl %%eax, %%edx\n\t"
4333 "addl %%edi, %%edx\n\t"
4334 "1:\t\n"
4335 : "=d" (iBit),
4336 "=&c" (uECX),
4337 "=&D" (uEDI),
4338 "=&a" (uEAX)
4339 : "0" (0xffffffff),
4340 "mr" (pvBitmap),
4341 "1" (cBits >> 5),
4342 "2" (pvBitmap),
4343 "3" (0xffffffff));
4344# else
4345 cBits = RT_ALIGN_32(cBits, 32);
4346 __asm
4347 {
4348# ifdef RT_ARCH_AMD64
4349 mov rdi, [pvBitmap]
4350 mov rbx, rdi
4351# else
4352 mov edi, [pvBitmap]
4353 mov ebx, edi
4354# endif
4355 mov edx, 0ffffffffh
4356 mov eax, edx
4357 mov ecx, [cBits]
4358 shr ecx, 5
4359 repe scasd
4360 je done
4361
4362# ifdef RT_ARCH_AMD64
4363 lea rdi, [rdi - 4]
4364 xor eax, [rdi]
4365 sub rdi, rbx
4366# else
4367 lea edi, [edi - 4]
4368 xor eax, [edi]
4369 sub edi, ebx
4370# endif
4371 shl edi, 3
4372 bsf edx, eax
4373 add edx, edi
4374 done:
4375 mov [iBit], edx
4376 }
4377# endif
4378 return iBit;
4379 }
4380 return -1;
4381}
4382#endif
4383
4384
4385/**
4386 * Finds the next clear bit in a bitmap.
4387 *
4388 * @returns Index of the first zero bit.
4389 * @returns -1 if no clear bit was found.
4390 * @param pvBitmap Pointer to the bitmap.
4391 * @param cBits The number of bits in the bitmap. Multiple of 32.
4392 * @param iBitPrev The bit returned from the last search.
4393 * The search will start at iBitPrev + 1.
4394 */
4395#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4396DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4397#else
4398DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4399{
4400 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4401 int iBit = ++iBitPrev & 31;
4402 if (iBit)
4403 {
4404 /*
4405 * Inspect the 32-bit word containing the unaligned bit.
4406 */
4407 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4408
4409# if RT_INLINE_ASM_USES_INTRIN
4410 unsigned long ulBit = 0;
4411 if (_BitScanForward(&ulBit, u32))
4412 return ulBit + iBitPrev;
4413# else
4414# if RT_INLINE_ASM_GNU_STYLE
4415 __asm__ __volatile__("bsf %1, %0\n\t"
4416 "jnz 1f\n\t"
4417 "movl $-1, %0\n\t"
4418 "1:\n\t"
4419 : "=r" (iBit)
4420 : "r" (u32));
4421# else
4422 __asm
4423 {
4424 mov edx, [u32]
4425 bsf eax, edx
4426 jnz done
4427 mov eax, 0ffffffffh
4428 done:
4429 mov [iBit], eax
4430 }
4431# endif
4432 if (iBit >= 0)
4433 return iBit + iBitPrev;
4434# endif
4435
4436 /*
4437 * Skip ahead and see if there is anything left to search.
4438 */
4439 iBitPrev |= 31;
4440 iBitPrev++;
4441 if (cBits <= (uint32_t)iBitPrev)
4442 return -1;
4443 }
4444
4445 /*
4446 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4447 */
4448 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4449 if (iBit >= 0)
4450 iBit += iBitPrev;
4451 return iBit;
4452}
4453#endif
4454
4455
4456/**
4457 * Finds the first set bit in a bitmap.
4458 *
4459 * @returns Index of the first set bit.
4460 * @returns -1 if no clear bit was found.
4461 * @param pvBitmap Pointer to the bitmap.
4462 * @param cBits The number of bits in the bitmap. Multiple of 32.
4463 */
4464#if RT_INLINE_ASM_EXTERNAL
4465DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4466#else
4467DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4468{
4469 if (cBits)
4470 {
4471 int32_t iBit;
4472# if RT_INLINE_ASM_GNU_STYLE
4473 RTCCUINTREG uEAX, uECX, uEDI;
4474 cBits = RT_ALIGN_32(cBits, 32);
4475 __asm__ __volatile__("repe; scasl\n\t"
4476 "je 1f\n\t"
4477# ifdef RT_ARCH_AMD64
4478 "lea -4(%%rdi), %%rdi\n\t"
4479 "movl (%%rdi), %%eax\n\t"
4480 "subq %5, %%rdi\n\t"
4481# else
4482 "lea -4(%%edi), %%edi\n\t"
4483 "movl (%%edi), %%eax\n\t"
4484 "subl %5, %%edi\n\t"
4485# endif
4486 "shll $3, %%edi\n\t"
4487 "bsfl %%eax, %%edx\n\t"
4488 "addl %%edi, %%edx\n\t"
4489 "1:\t\n"
4490 : "=d" (iBit),
4491 "=&c" (uECX),
4492 "=&D" (uEDI),
4493 "=&a" (uEAX)
4494 : "0" (0xffffffff),
4495 "mr" (pvBitmap),
4496 "1" (cBits >> 5),
4497 "2" (pvBitmap),
4498 "3" (0));
4499# else
4500 cBits = RT_ALIGN_32(cBits, 32);
4501 __asm
4502 {
4503# ifdef RT_ARCH_AMD64
4504 mov rdi, [pvBitmap]
4505 mov rbx, rdi
4506# else
4507 mov edi, [pvBitmap]
4508 mov ebx, edi
4509# endif
4510 mov edx, 0ffffffffh
4511 xor eax, eax
4512 mov ecx, [cBits]
4513 shr ecx, 5
4514 repe scasd
4515 je done
4516# ifdef RT_ARCH_AMD64
4517 lea rdi, [rdi - 4]
4518 mov eax, [rdi]
4519 sub rdi, rbx
4520# else
4521 lea edi, [edi - 4]
4522 mov eax, [edi]
4523 sub edi, ebx
4524# endif
4525 shl edi, 3
4526 bsf edx, eax
4527 add edx, edi
4528 done:
4529 mov [iBit], edx
4530 }
4531# endif
4532 return iBit;
4533 }
4534 return -1;
4535}
4536#endif
4537
4538
4539/**
4540 * Finds the next set bit in a bitmap.
4541 *
4542 * @returns Index of the next set bit.
4543 * @returns -1 if no set bit was found.
4544 * @param pvBitmap Pointer to the bitmap.
4545 * @param cBits The number of bits in the bitmap. Multiple of 32.
4546 * @param iBitPrev The bit returned from the last search.
4547 * The search will start at iBitPrev + 1.
4548 */
4549#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4550DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4551#else
4552DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4553{
4554 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4555 int iBit = ++iBitPrev & 31;
4556 if (iBit)
4557 {
4558 /*
4559 * Inspect the 32-bit word containing the unaligned bit.
4560 */
4561 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
4562
4563# if RT_INLINE_ASM_USES_INTRIN
4564 unsigned long ulBit = 0;
4565 if (_BitScanForward(&ulBit, u32))
4566 return ulBit + iBitPrev;
4567# else
4568# if RT_INLINE_ASM_GNU_STYLE
4569 __asm__ __volatile__("bsf %1, %0\n\t"
4570 "jnz 1f\n\t"
4571 "movl $-1, %0\n\t"
4572 "1:\n\t"
4573 : "=r" (iBit)
4574 : "r" (u32));
4575# else
4576 __asm
4577 {
4578 mov edx, [u32]
4579 bsf eax, edx
4580 jnz done
4581 mov eax, 0ffffffffh
4582 done:
4583 mov [iBit], eax
4584 }
4585# endif
4586 if (iBit >= 0)
4587 return iBit + iBitPrev;
4588# endif
4589
4590 /*
4591 * Skip ahead and see if there is anything left to search.
4592 */
4593 iBitPrev |= 31;
4594 iBitPrev++;
4595 if (cBits <= (uint32_t)iBitPrev)
4596 return -1;
4597 }
4598
4599 /*
4600 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4601 */
4602 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4603 if (iBit >= 0)
4604 iBit += iBitPrev;
4605 return iBit;
4606}
4607#endif
4608
4609
4610/**
4611 * Finds the first bit which is set in the given 32-bit integer.
4612 * Bits are numbered from 1 (least significant) to 32.
4613 *
4614 * @returns index [1..32] of the first set bit.
4615 * @returns 0 if all bits are cleared.
4616 * @param u32 Integer to search for set bits.
4617 * @remark Similar to ffs() in BSD.
4618 */
4619#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4620DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
4621#else
4622DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4623{
4624# if RT_INLINE_ASM_USES_INTRIN
4625 unsigned long iBit;
4626 if (_BitScanForward(&iBit, u32))
4627 iBit++;
4628 else
4629 iBit = 0;
4630# elif RT_INLINE_ASM_GNU_STYLE
4631 uint32_t iBit;
4632 __asm__ __volatile__("bsf %1, %0\n\t"
4633 "jnz 1f\n\t"
4634 "xorl %0, %0\n\t"
4635 "jmp 2f\n"
4636 "1:\n\t"
4637 "incl %0\n"
4638 "2:\n\t"
4639 : "=r" (iBit)
4640 : "rm" (u32));
4641# else
4642 uint32_t iBit;
4643 _asm
4644 {
4645 bsf eax, [u32]
4646 jnz found
4647 xor eax, eax
4648 jmp done
4649 found:
4650 inc eax
4651 done:
4652 mov [iBit], eax
4653 }
4654# endif
4655 return iBit;
4656}
4657#endif
4658
4659
4660/**
4661 * Finds the first bit which is set in the given 32-bit integer.
4662 * Bits are numbered from 1 (least significant) to 32.
4663 *
4664 * @returns index [1..32] of the first set bit.
4665 * @returns 0 if all bits are cleared.
4666 * @param i32 Integer to search for set bits.
4667 * @remark Similar to ffs() in BSD.
4668 */
4669DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4670{
4671 return ASMBitFirstSetU32((uint32_t)i32);
4672}
4673
4674
4675/**
4676 * Finds the last bit which is set in the given 32-bit integer.
4677 * Bits are numbered from 1 (least significant) to 32.
4678 *
4679 * @returns index [1..32] of the last set bit.
4680 * @returns 0 if all bits are cleared.
4681 * @param u32 Integer to search for set bits.
4682 * @remark Similar to fls() in BSD.
4683 */
4684#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4685DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
4686#else
4687DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4688{
4689# if RT_INLINE_ASM_USES_INTRIN
4690 unsigned long iBit;
4691 if (_BitScanReverse(&iBit, u32))
4692 iBit++;
4693 else
4694 iBit = 0;
4695# elif RT_INLINE_ASM_GNU_STYLE
4696 uint32_t iBit;
4697 __asm__ __volatile__("bsrl %1, %0\n\t"
4698 "jnz 1f\n\t"
4699 "xorl %0, %0\n\t"
4700 "jmp 2f\n"
4701 "1:\n\t"
4702 "incl %0\n"
4703 "2:\n\t"
4704 : "=r" (iBit)
4705 : "rm" (u32));
4706# else
4707 uint32_t iBit;
4708 _asm
4709 {
4710 bsr eax, [u32]
4711 jnz found
4712 xor eax, eax
4713 jmp done
4714 found:
4715 inc eax
4716 done:
4717 mov [iBit], eax
4718 }
4719# endif
4720 return iBit;
4721}
4722#endif
4723
4724
4725/**
4726 * Finds the last bit which is set in the given 32-bit integer.
4727 * Bits are numbered from 1 (least significant) to 32.
4728 *
4729 * @returns index [1..32] of the last set bit.
4730 * @returns 0 if all bits are cleared.
4731 * @param i32 Integer to search for set bits.
4732 * @remark Similar to fls() in BSD.
4733 */
4734DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4735{
4736 return ASMBitLastSetU32((uint32_t)i32);
4737}
4738
4739/**
4740 * Reverse the byte order of the given 16-bit integer.
4741 *
4742 * @returns Revert
4743 * @param u16 16-bit integer value.
4744 */
4745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4746DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
4747#else
4748DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
4749{
4750# if RT_INLINE_ASM_USES_INTRIN
4751 u16 = _byteswap_ushort(u16);
4752# elif RT_INLINE_ASM_GNU_STYLE
4753 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
4754# else
4755 _asm
4756 {
4757 mov ax, [u16]
4758 ror ax, 8
4759 mov [u16], ax
4760 }
4761# endif
4762 return u16;
4763}
4764#endif
4765
4766
4767/**
4768 * Reverse the byte order of the given 32-bit integer.
4769 *
4770 * @returns Revert
4771 * @param u32 32-bit integer value.
4772 */
4773#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4774DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
4775#else
4776DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4777{
4778# if RT_INLINE_ASM_USES_INTRIN
4779 u32 = _byteswap_ulong(u32);
4780# elif RT_INLINE_ASM_GNU_STYLE
4781 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4782# else
4783 _asm
4784 {
4785 mov eax, [u32]
4786 bswap eax
4787 mov [u32], eax
4788 }
4789# endif
4790 return u32;
4791}
4792#endif
4793
4794
4795/**
4796 * Reverse the byte order of the given 64-bit integer.
4797 *
4798 * @returns Revert
4799 * @param u64 64-bit integer value.
4800 */
4801DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
4802{
4803#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
4804 u64 = _byteswap_uint64(u64);
4805#else
4806 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
4807 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
4808#endif
4809 return u64;
4810}
4811
4812
4813/** @} */
4814
4815
4816/** @} */
4817
4818#endif
4819
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette