VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 38761

Last change on this file since 38761 was 37209, checked in by vboxsync, 14 years ago

circbuf: Use ASMAtomic*Z instead of ASMAtomic*Size because the latter is causing trouble on recent microsoft compilers.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 132.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2010 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# ifdef RT_ARCH_AMD64
69# pragma intrinsic(__stosq)
70# pragma intrinsic(_byteswap_uint64)
71# pragma intrinsic(_InterlockedExchange64)
72# pragma intrinsic(_InterlockedExchangeAdd64)
73# pragma intrinsic(_InterlockedAnd64)
74# pragma intrinsic(_InterlockedOr64)
75# pragma intrinsic(_InterlockedIncrement64)
76# pragma intrinsic(_InterlockedDecrement64)
77# endif
78#endif
79
80
81/** @defgroup grp_rt_asm ASM - Assembly Routines
82 * @ingroup grp_rt
83 *
84 * @remarks The difference between ordered and unordered atomic operations are that
85 * the former will complete outstanding reads and writes before continuing
86 * while the latter doesn't make any promisses about the order. Ordered
87 * operations doesn't, it seems, make any 100% promise wrt to whether
88 * the operation will complete before any subsequent memory access.
89 * (please, correct if wrong.)
90 *
91 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
92 * are unordered (note the Uo).
93 *
94 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
95 * or even optimize assembler instructions away. For instance, in the following code
96 * the second rdmsr instruction is optimized away because gcc treats that instruction
97 * as deterministic:
98 *
99 * @code
100 * static inline uint64_t rdmsr_low(int idx)
101 * {
102 * uint32_t low;
103 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
104 * }
105 * ...
106 * uint32_t msr1 = rdmsr_low(1);
107 * foo(msr1);
108 * msr1 = rdmsr_low(1);
109 * bar(msr1);
110 * @endcode
111 *
112 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
113 * use the result of the first call as input parameter for bar() as well. For rdmsr this
114 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
115 * machine status information in general.
116 *
117 * @{
118 */
119
120
121/** @def RT_INLINE_ASM_GCC_4_3_X_X86
122 * Used to work around some 4.3.x register allocation issues in this version of
123 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
124#ifdef __GNUC__
125# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
126#endif
127#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
128# define RT_INLINE_ASM_GCC_4_3_X_X86 0
129#endif
130
131/** @def RT_INLINE_DONT_USE_CMPXCHG8B
132 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
133 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
134 * mode, x86.
135 *
136 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
137 * when in PIC mode on x86.
138 */
139#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
140# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
141 ( (defined(PIC) || defined(__PIC__)) \
142 && defined(RT_ARCH_X86) \
143 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
144 || defined(RT_OS_DARWIN)) )
145#endif
146
147
148/** @def ASMReturnAddress
149 * Gets the return address of the current (or calling if you like) function or method.
150 */
151#ifdef _MSC_VER
152# ifdef __cplusplus
153extern "C"
154# endif
155void * _ReturnAddress(void);
156# pragma intrinsic(_ReturnAddress)
157# define ASMReturnAddress() _ReturnAddress()
158#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
159# define ASMReturnAddress() __builtin_return_address(0)
160#else
161# error "Unsupported compiler."
162#endif
163
164
165/**
166 * Compiler memory barrier.
167 *
168 * Ensure that the compiler does not use any cached (register/tmp stack) memory
169 * values or any outstanding writes when returning from this function.
170 *
171 * This function must be used if non-volatile data is modified by a
172 * device or the VMM. Typical cases are port access, MMIO access,
173 * trapping instruction, etc.
174 */
175#if RT_INLINE_ASM_GNU_STYLE
176# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
177#elif RT_INLINE_ASM_USES_INTRIN
178# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
179#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
180DECLINLINE(void) ASMCompilerBarrier(void)
181{
182 __asm
183 {
184 }
185}
186#endif
187
188
189/** @def ASMBreakpoint
190 * Debugger Breakpoint.
191 * @remark In the gnu world we add a nop instruction after the int3 to
192 * force gdb to remain at the int3 source line.
193 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
194 * @internal
195 */
196#if RT_INLINE_ASM_GNU_STYLE
197# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
198# ifndef __L4ENV__
199# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
200# else
201# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
202# endif
203# elif defined(RT_ARCH_SPARC64)
204# define ASMBreakpoint() do { __asm__ __volatile__("illtrap 0\n\t") } while (0) /** @todo Sparc64: this is just a wild guess. */
205# elif defined(RT_ARCH_SPARC)
206# define ASMBreakpoint() do { __asm__ __volatile__("unimp 0\n\t"); } while (0) /** @todo Sparc: this is just a wild guess (same as Sparc64, just different name). */
207# else
208# error "PORTME"
209# endif
210#else
211# define ASMBreakpoint() __debugbreak()
212#endif
213
214
215/**
216 * Spinloop hint for platforms that have these, empty function on the other
217 * platforms.
218 *
219 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
220 * spin locks.
221 */
222#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
223DECLASM(void) ASMNopPause(void);
224#else
225DECLINLINE(void) ASMNopPause(void)
226{
227# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
228# if RT_INLINE_ASM_GNU_STYLE
229 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
230# else
231 __asm {
232 _emit 0f3h
233 _emit 090h
234 }
235# endif
236# else
237 /* dummy */
238# endif
239}
240#endif
241
242
243/**
244 * Atomically Exchange an unsigned 8-bit value, ordered.
245 *
246 * @returns Current *pu8 value
247 * @param pu8 Pointer to the 8-bit variable to update.
248 * @param u8 The 8-bit value to assign to *pu8.
249 */
250#if RT_INLINE_ASM_EXTERNAL
251DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
252#else
253DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
254{
255# if RT_INLINE_ASM_GNU_STYLE
256 __asm__ __volatile__("xchgb %0, %1\n\t"
257 : "=m" (*pu8),
258 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
259 : "1" (u8),
260 "m" (*pu8));
261# else
262 __asm
263 {
264# ifdef RT_ARCH_AMD64
265 mov rdx, [pu8]
266 mov al, [u8]
267 xchg [rdx], al
268 mov [u8], al
269# else
270 mov edx, [pu8]
271 mov al, [u8]
272 xchg [edx], al
273 mov [u8], al
274# endif
275 }
276# endif
277 return u8;
278}
279#endif
280
281
282/**
283 * Atomically Exchange a signed 8-bit value, ordered.
284 *
285 * @returns Current *pu8 value
286 * @param pi8 Pointer to the 8-bit variable to update.
287 * @param i8 The 8-bit value to assign to *pi8.
288 */
289DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
290{
291 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
292}
293
294
295/**
296 * Atomically Exchange a bool value, ordered.
297 *
298 * @returns Current *pf value
299 * @param pf Pointer to the 8-bit variable to update.
300 * @param f The 8-bit value to assign to *pi8.
301 */
302DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
303{
304#ifdef _MSC_VER
305 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
306#else
307 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
308#endif
309}
310
311
312/**
313 * Atomically Exchange an unsigned 16-bit value, ordered.
314 *
315 * @returns Current *pu16 value
316 * @param pu16 Pointer to the 16-bit variable to update.
317 * @param u16 The 16-bit value to assign to *pu16.
318 */
319#if RT_INLINE_ASM_EXTERNAL
320DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
321#else
322DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
323{
324# if RT_INLINE_ASM_GNU_STYLE
325 __asm__ __volatile__("xchgw %0, %1\n\t"
326 : "=m" (*pu16),
327 "=r" (u16)
328 : "1" (u16),
329 "m" (*pu16));
330# else
331 __asm
332 {
333# ifdef RT_ARCH_AMD64
334 mov rdx, [pu16]
335 mov ax, [u16]
336 xchg [rdx], ax
337 mov [u16], ax
338# else
339 mov edx, [pu16]
340 mov ax, [u16]
341 xchg [edx], ax
342 mov [u16], ax
343# endif
344 }
345# endif
346 return u16;
347}
348#endif
349
350
351/**
352 * Atomically Exchange a signed 16-bit value, ordered.
353 *
354 * @returns Current *pu16 value
355 * @param pi16 Pointer to the 16-bit variable to update.
356 * @param i16 The 16-bit value to assign to *pi16.
357 */
358DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
359{
360 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
361}
362
363
364/**
365 * Atomically Exchange an unsigned 32-bit value, ordered.
366 *
367 * @returns Current *pu32 value
368 * @param pu32 Pointer to the 32-bit variable to update.
369 * @param u32 The 32-bit value to assign to *pu32.
370 */
371#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
372DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
373#else
374DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
375{
376# if RT_INLINE_ASM_GNU_STYLE
377 __asm__ __volatile__("xchgl %0, %1\n\t"
378 : "=m" (*pu32),
379 "=r" (u32)
380 : "1" (u32),
381 "m" (*pu32));
382
383# elif RT_INLINE_ASM_USES_INTRIN
384 u32 = _InterlockedExchange((long *)pu32, u32);
385
386# else
387 __asm
388 {
389# ifdef RT_ARCH_AMD64
390 mov rdx, [pu32]
391 mov eax, u32
392 xchg [rdx], eax
393 mov [u32], eax
394# else
395 mov edx, [pu32]
396 mov eax, u32
397 xchg [edx], eax
398 mov [u32], eax
399# endif
400 }
401# endif
402 return u32;
403}
404#endif
405
406
407/**
408 * Atomically Exchange a signed 32-bit value, ordered.
409 *
410 * @returns Current *pu32 value
411 * @param pi32 Pointer to the 32-bit variable to update.
412 * @param i32 The 32-bit value to assign to *pi32.
413 */
414DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
415{
416 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
417}
418
419
420/**
421 * Atomically Exchange an unsigned 64-bit value, ordered.
422 *
423 * @returns Current *pu64 value
424 * @param pu64 Pointer to the 64-bit variable to update.
425 * @param u64 The 64-bit value to assign to *pu64.
426 */
427#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
428 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
429DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
430#else
431DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
432{
433# if defined(RT_ARCH_AMD64)
434# if RT_INLINE_ASM_USES_INTRIN
435 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
436
437# elif RT_INLINE_ASM_GNU_STYLE
438 __asm__ __volatile__("xchgq %0, %1\n\t"
439 : "=m" (*pu64),
440 "=r" (u64)
441 : "1" (u64),
442 "m" (*pu64));
443# else
444 __asm
445 {
446 mov rdx, [pu64]
447 mov rax, [u64]
448 xchg [rdx], rax
449 mov [u64], rax
450 }
451# endif
452# else /* !RT_ARCH_AMD64 */
453# if RT_INLINE_ASM_GNU_STYLE
454# if defined(PIC) || defined(__PIC__)
455 uint32_t u32EBX = (uint32_t)u64;
456 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
457 "xchgl %%ebx, %3\n\t"
458 "1:\n\t"
459 "lock; cmpxchg8b (%5)\n\t"
460 "jnz 1b\n\t"
461 "movl %3, %%ebx\n\t"
462 /*"xchgl %%esi, %5\n\t"*/
463 : "=A" (u64),
464 "=m" (*pu64)
465 : "0" (*pu64),
466 "m" ( u32EBX ),
467 "c" ( (uint32_t)(u64 >> 32) ),
468 "S" (pu64));
469# else /* !PIC */
470 __asm__ __volatile__("1:\n\t"
471 "lock; cmpxchg8b %1\n\t"
472 "jnz 1b\n\t"
473 : "=A" (u64),
474 "=m" (*pu64)
475 : "0" (*pu64),
476 "b" ( (uint32_t)u64 ),
477 "c" ( (uint32_t)(u64 >> 32) ));
478# endif
479# else
480 __asm
481 {
482 mov ebx, dword ptr [u64]
483 mov ecx, dword ptr [u64 + 4]
484 mov edi, pu64
485 mov eax, dword ptr [edi]
486 mov edx, dword ptr [edi + 4]
487 retry:
488 lock cmpxchg8b [edi]
489 jnz retry
490 mov dword ptr [u64], eax
491 mov dword ptr [u64 + 4], edx
492 }
493# endif
494# endif /* !RT_ARCH_AMD64 */
495 return u64;
496}
497#endif
498
499
500/**
501 * Atomically Exchange an signed 64-bit value, ordered.
502 *
503 * @returns Current *pi64 value
504 * @param pi64 Pointer to the 64-bit variable to update.
505 * @param i64 The 64-bit value to assign to *pi64.
506 */
507DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
508{
509 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
510}
511
512
513/**
514 * Atomically Exchange a pointer value, ordered.
515 *
516 * @returns Current *ppv value
517 * @param ppv Pointer to the pointer variable to update.
518 * @param pv The pointer value to assign to *ppv.
519 */
520DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
521{
522#if ARCH_BITS == 32
523 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
524#elif ARCH_BITS == 64
525 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
526#else
527# error "ARCH_BITS is bogus"
528#endif
529}
530
531
532/**
533 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
534 *
535 * @returns Current *pv value
536 * @param ppv Pointer to the pointer variable to update.
537 * @param pv The pointer value to assign to *ppv.
538 * @param Type The type of *ppv, sans volatile.
539 */
540#ifdef __GNUC__
541# define ASMAtomicXchgPtrT(ppv, pv, Type) \
542 __extension__ \
543 ({\
544 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
545 Type const pvTypeChecked = (pv); \
546 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
547 pvTypeCheckedRet; \
548 })
549#else
550# define ASMAtomicXchgPtrT(ppv, pv, Type) \
551 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
552#endif
553
554
555/**
556 * Atomically Exchange a raw-mode context pointer value, ordered.
557 *
558 * @returns Current *ppv value
559 * @param ppvRC Pointer to the pointer variable to update.
560 * @param pvRC The pointer value to assign to *ppv.
561 */
562DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
563{
564 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
565}
566
567
568/**
569 * Atomically Exchange a ring-0 pointer value, ordered.
570 *
571 * @returns Current *ppv value
572 * @param ppvR0 Pointer to the pointer variable to update.
573 * @param pvR0 The pointer value to assign to *ppv.
574 */
575DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
576{
577#if R0_ARCH_BITS == 32
578 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
579#elif R0_ARCH_BITS == 64
580 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
581#else
582# error "R0_ARCH_BITS is bogus"
583#endif
584}
585
586
587/**
588 * Atomically Exchange a ring-3 pointer value, ordered.
589 *
590 * @returns Current *ppv value
591 * @param ppvR3 Pointer to the pointer variable to update.
592 * @param pvR3 The pointer value to assign to *ppv.
593 */
594DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
595{
596#if R3_ARCH_BITS == 32
597 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
598#elif R3_ARCH_BITS == 64
599 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
600#else
601# error "R3_ARCH_BITS is bogus"
602#endif
603}
604
605
606/** @def ASMAtomicXchgHandle
607 * Atomically Exchange a typical IPRT handle value, ordered.
608 *
609 * @param ph Pointer to the value to update.
610 * @param hNew The new value to assigned to *pu.
611 * @param phRes Where to store the current *ph value.
612 *
613 * @remarks This doesn't currently work for all handles (like RTFILE).
614 */
615#if HC_ARCH_BITS == 32
616# define ASMAtomicXchgHandle(ph, hNew, phRes) \
617 do { \
618 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
619 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
620 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
621 } while (0)
622#elif HC_ARCH_BITS == 64
623# define ASMAtomicXchgHandle(ph, hNew, phRes) \
624 do { \
625 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
626 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
627 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
628 } while (0)
629#else
630# error HC_ARCH_BITS
631#endif
632
633
634/**
635 * Atomically Exchange a value which size might differ
636 * between platforms or compilers, ordered.
637 *
638 * @param pu Pointer to the variable to update.
639 * @param uNew The value to assign to *pu.
640 * @todo This is busted as its missing the result argument.
641 */
642#define ASMAtomicXchgSize(pu, uNew) \
643 do { \
644 switch (sizeof(*(pu))) { \
645 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
646 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
647 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
648 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
649 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
650 } \
651 } while (0)
652
653/**
654 * Atomically Exchange a value which size might differ
655 * between platforms or compilers, ordered.
656 *
657 * @param pu Pointer to the variable to update.
658 * @param uNew The value to assign to *pu.
659 * @param puRes Where to store the current *pu value.
660 */
661#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
662 do { \
663 switch (sizeof(*(pu))) { \
664 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
665 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
666 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
667 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
668 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
669 } \
670 } while (0)
671
672
673
674/**
675 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
676 *
677 * @returns true if xchg was done.
678 * @returns false if xchg wasn't done.
679 *
680 * @param pu8 Pointer to the value to update.
681 * @param u8New The new value to assigned to *pu8.
682 * @param u8Old The old value to *pu8 compare with.
683 */
684#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
685DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
686#else
687DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
688{
689 uint8_t u8Ret;
690 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
691 "setz %1\n\t"
692 : "=m" (*pu8),
693 "=qm" (u8Ret),
694 "=a" (u8Old)
695 : "q" (u8New),
696 "2" (u8Old),
697 "m" (*pu8));
698 return (bool)u8Ret;
699}
700#endif
701
702
703/**
704 * Atomically Compare and Exchange a signed 8-bit value, ordered.
705 *
706 * @returns true if xchg was done.
707 * @returns false if xchg wasn't done.
708 *
709 * @param pi8 Pointer to the value to update.
710 * @param i8New The new value to assigned to *pi8.
711 * @param i8Old The old value to *pi8 compare with.
712 */
713DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
714{
715 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
716}
717
718
719/**
720 * Atomically Compare and Exchange a bool value, ordered.
721 *
722 * @returns true if xchg was done.
723 * @returns false if xchg wasn't done.
724 *
725 * @param pf Pointer to the value to update.
726 * @param fNew The new value to assigned to *pf.
727 * @param fOld The old value to *pf compare with.
728 */
729DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
730{
731 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
732}
733
734
735/**
736 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
737 *
738 * @returns true if xchg was done.
739 * @returns false if xchg wasn't done.
740 *
741 * @param pu32 Pointer to the value to update.
742 * @param u32New The new value to assigned to *pu32.
743 * @param u32Old The old value to *pu32 compare with.
744 */
745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
746DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
747#else
748DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
749{
750# if RT_INLINE_ASM_GNU_STYLE
751 uint8_t u8Ret;
752 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
753 "setz %1\n\t"
754 : "=m" (*pu32),
755 "=qm" (u8Ret),
756 "=a" (u32Old)
757 : "r" (u32New),
758 "2" (u32Old),
759 "m" (*pu32));
760 return (bool)u8Ret;
761
762# elif RT_INLINE_ASM_USES_INTRIN
763 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
764
765# else
766 uint32_t u32Ret;
767 __asm
768 {
769# ifdef RT_ARCH_AMD64
770 mov rdx, [pu32]
771# else
772 mov edx, [pu32]
773# endif
774 mov eax, [u32Old]
775 mov ecx, [u32New]
776# ifdef RT_ARCH_AMD64
777 lock cmpxchg [rdx], ecx
778# else
779 lock cmpxchg [edx], ecx
780# endif
781 setz al
782 movzx eax, al
783 mov [u32Ret], eax
784 }
785 return !!u32Ret;
786# endif
787}
788#endif
789
790
791/**
792 * Atomically Compare and Exchange a signed 32-bit value, ordered.
793 *
794 * @returns true if xchg was done.
795 * @returns false if xchg wasn't done.
796 *
797 * @param pi32 Pointer to the value to update.
798 * @param i32New The new value to assigned to *pi32.
799 * @param i32Old The old value to *pi32 compare with.
800 */
801DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
802{
803 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
804}
805
806
807/**
808 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
809 *
810 * @returns true if xchg was done.
811 * @returns false if xchg wasn't done.
812 *
813 * @param pu64 Pointer to the 64-bit variable to update.
814 * @param u64New The 64-bit value to assign to *pu64.
815 * @param u64Old The value to compare with.
816 */
817#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
818 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
819DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
820#else
821DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
822{
823# if RT_INLINE_ASM_USES_INTRIN
824 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
825
826# elif defined(RT_ARCH_AMD64)
827# if RT_INLINE_ASM_GNU_STYLE
828 uint8_t u8Ret;
829 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
830 "setz %1\n\t"
831 : "=m" (*pu64),
832 "=qm" (u8Ret),
833 "=a" (u64Old)
834 : "r" (u64New),
835 "2" (u64Old),
836 "m" (*pu64));
837 return (bool)u8Ret;
838# else
839 bool fRet;
840 __asm
841 {
842 mov rdx, [pu32]
843 mov rax, [u64Old]
844 mov rcx, [u64New]
845 lock cmpxchg [rdx], rcx
846 setz al
847 mov [fRet], al
848 }
849 return fRet;
850# endif
851# else /* !RT_ARCH_AMD64 */
852 uint32_t u32Ret;
853# if RT_INLINE_ASM_GNU_STYLE
854# if defined(PIC) || defined(__PIC__)
855 uint32_t u32EBX = (uint32_t)u64New;
856 uint32_t u32Spill;
857 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
858 "lock; cmpxchg8b (%6)\n\t"
859 "setz %%al\n\t"
860 "movl %4, %%ebx\n\t"
861 "movzbl %%al, %%eax\n\t"
862 : "=a" (u32Ret),
863 "=d" (u32Spill),
864# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
865 "+m" (*pu64)
866# else
867 "=m" (*pu64)
868# endif
869 : "A" (u64Old),
870 "m" ( u32EBX ),
871 "c" ( (uint32_t)(u64New >> 32) ),
872 "S" (pu64));
873# else /* !PIC */
874 uint32_t u32Spill;
875 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
876 "setz %%al\n\t"
877 "movzbl %%al, %%eax\n\t"
878 : "=a" (u32Ret),
879 "=d" (u32Spill),
880 "+m" (*pu64)
881 : "A" (u64Old),
882 "b" ( (uint32_t)u64New ),
883 "c" ( (uint32_t)(u64New >> 32) ));
884# endif
885 return (bool)u32Ret;
886# else
887 __asm
888 {
889 mov ebx, dword ptr [u64New]
890 mov ecx, dword ptr [u64New + 4]
891 mov edi, [pu64]
892 mov eax, dword ptr [u64Old]
893 mov edx, dword ptr [u64Old + 4]
894 lock cmpxchg8b [edi]
895 setz al
896 movzx eax, al
897 mov dword ptr [u32Ret], eax
898 }
899 return !!u32Ret;
900# endif
901# endif /* !RT_ARCH_AMD64 */
902}
903#endif
904
905
906/**
907 * Atomically Compare and exchange a signed 64-bit value, ordered.
908 *
909 * @returns true if xchg was done.
910 * @returns false if xchg wasn't done.
911 *
912 * @param pi64 Pointer to the 64-bit variable to update.
913 * @param i64 The 64-bit value to assign to *pu64.
914 * @param i64Old The value to compare with.
915 */
916DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
917{
918 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
919}
920
921
922/**
923 * Atomically Compare and Exchange a pointer value, ordered.
924 *
925 * @returns true if xchg was done.
926 * @returns false if xchg wasn't done.
927 *
928 * @param ppv Pointer to the value to update.
929 * @param pvNew The new value to assigned to *ppv.
930 * @param pvOld The old value to *ppv compare with.
931 */
932DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
933{
934#if ARCH_BITS == 32
935 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
936#elif ARCH_BITS == 64
937 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
938#else
939# error "ARCH_BITS is bogus"
940#endif
941}
942
943
944/**
945 * Atomically Compare and Exchange a pointer value, ordered.
946 *
947 * @returns true if xchg was done.
948 * @returns false if xchg wasn't done.
949 *
950 * @param ppv Pointer to the value to update.
951 * @param pvNew The new value to assigned to *ppv.
952 * @param pvOld The old value to *ppv compare with.
953 *
954 * @remarks This is relatively type safe on GCC platforms.
955 */
956#ifdef __GNUC__
957# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
958 __extension__ \
959 ({\
960 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
961 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
962 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
963 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
964 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
965 fMacroRet; \
966 })
967#else
968# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
969 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
970#endif
971
972
973/** @def ASMAtomicCmpXchgHandle
974 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
975 *
976 * @param ph Pointer to the value to update.
977 * @param hNew The new value to assigned to *pu.
978 * @param hOld The old value to *pu compare with.
979 * @param fRc Where to store the result.
980 *
981 * @remarks This doesn't currently work for all handles (like RTFILE).
982 */
983#if HC_ARCH_BITS == 32
984# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
985 do { \
986 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
987 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
988 } while (0)
989#elif HC_ARCH_BITS == 64
990# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
991 do { \
992 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
993 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
994 } while (0)
995#else
996# error HC_ARCH_BITS
997#endif
998
999
1000/** @def ASMAtomicCmpXchgSize
1001 * Atomically Compare and Exchange a value which size might differ
1002 * between platforms or compilers, ordered.
1003 *
1004 * @param pu Pointer to the value to update.
1005 * @param uNew The new value to assigned to *pu.
1006 * @param uOld The old value to *pu compare with.
1007 * @param fRc Where to store the result.
1008 */
1009#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1010 do { \
1011 switch (sizeof(*(pu))) { \
1012 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1013 break; \
1014 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1015 break; \
1016 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1017 (fRc) = false; \
1018 break; \
1019 } \
1020 } while (0)
1021
1022
1023/**
1024 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1025 * passes back old value, ordered.
1026 *
1027 * @returns true if xchg was done.
1028 * @returns false if xchg wasn't done.
1029 *
1030 * @param pu32 Pointer to the value to update.
1031 * @param u32New The new value to assigned to *pu32.
1032 * @param u32Old The old value to *pu32 compare with.
1033 * @param pu32Old Pointer store the old value at.
1034 */
1035#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1036DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1037#else
1038DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1039{
1040# if RT_INLINE_ASM_GNU_STYLE
1041 uint8_t u8Ret;
1042 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1043 "setz %1\n\t"
1044 : "=m" (*pu32),
1045 "=qm" (u8Ret),
1046 "=a" (*pu32Old)
1047 : "r" (u32New),
1048 "a" (u32Old),
1049 "m" (*pu32));
1050 return (bool)u8Ret;
1051
1052# elif RT_INLINE_ASM_USES_INTRIN
1053 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1054
1055# else
1056 uint32_t u32Ret;
1057 __asm
1058 {
1059# ifdef RT_ARCH_AMD64
1060 mov rdx, [pu32]
1061# else
1062 mov edx, [pu32]
1063# endif
1064 mov eax, [u32Old]
1065 mov ecx, [u32New]
1066# ifdef RT_ARCH_AMD64
1067 lock cmpxchg [rdx], ecx
1068 mov rdx, [pu32Old]
1069 mov [rdx], eax
1070# else
1071 lock cmpxchg [edx], ecx
1072 mov edx, [pu32Old]
1073 mov [edx], eax
1074# endif
1075 setz al
1076 movzx eax, al
1077 mov [u32Ret], eax
1078 }
1079 return !!u32Ret;
1080# endif
1081}
1082#endif
1083
1084
1085/**
1086 * Atomically Compare and Exchange a signed 32-bit value, additionally
1087 * passes back old value, ordered.
1088 *
1089 * @returns true if xchg was done.
1090 * @returns false if xchg wasn't done.
1091 *
1092 * @param pi32 Pointer to the value to update.
1093 * @param i32New The new value to assigned to *pi32.
1094 * @param i32Old The old value to *pi32 compare with.
1095 * @param pi32Old Pointer store the old value at.
1096 */
1097DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1098{
1099 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1100}
1101
1102
1103/**
1104 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1105 * passing back old value, ordered.
1106 *
1107 * @returns true if xchg was done.
1108 * @returns false if xchg wasn't done.
1109 *
1110 * @param pu64 Pointer to the 64-bit variable to update.
1111 * @param u64New The 64-bit value to assign to *pu64.
1112 * @param u64Old The value to compare with.
1113 * @param pu64Old Pointer store the old value at.
1114 */
1115#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1116 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1117DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1118#else
1119DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1120{
1121# if RT_INLINE_ASM_USES_INTRIN
1122 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1123
1124# elif defined(RT_ARCH_AMD64)
1125# if RT_INLINE_ASM_GNU_STYLE
1126 uint8_t u8Ret;
1127 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1128 "setz %1\n\t"
1129 : "=m" (*pu64),
1130 "=qm" (u8Ret),
1131 "=a" (*pu64Old)
1132 : "r" (u64New),
1133 "a" (u64Old),
1134 "m" (*pu64));
1135 return (bool)u8Ret;
1136# else
1137 bool fRet;
1138 __asm
1139 {
1140 mov rdx, [pu32]
1141 mov rax, [u64Old]
1142 mov rcx, [u64New]
1143 lock cmpxchg [rdx], rcx
1144 mov rdx, [pu64Old]
1145 mov [rdx], rax
1146 setz al
1147 mov [fRet], al
1148 }
1149 return fRet;
1150# endif
1151# else /* !RT_ARCH_AMD64 */
1152# if RT_INLINE_ASM_GNU_STYLE
1153 uint64_t u64Ret;
1154# if defined(PIC) || defined(__PIC__)
1155 /* NB: this code uses a memory clobber description, because the clean
1156 * solution with an output value for *pu64 makes gcc run out of registers.
1157 * This will cause suboptimal code, and anyone with a better solution is
1158 * welcome to improve this. */
1159 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1160 "lock; cmpxchg8b %3\n\t"
1161 "xchgl %%ebx, %1\n\t"
1162 : "=A" (u64Ret)
1163 : "DS" ((uint32_t)u64New),
1164 "c" ((uint32_t)(u64New >> 32)),
1165 "m" (*pu64),
1166 "0" (u64Old)
1167 : "memory" );
1168# else /* !PIC */
1169 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1170 : "=A" (u64Ret),
1171 "=m" (*pu64)
1172 : "b" ((uint32_t)u64New),
1173 "c" ((uint32_t)(u64New >> 32)),
1174 "m" (*pu64),
1175 "0" (u64Old));
1176# endif
1177 *pu64Old = u64Ret;
1178 return u64Ret == u64Old;
1179# else
1180 uint32_t u32Ret;
1181 __asm
1182 {
1183 mov ebx, dword ptr [u64New]
1184 mov ecx, dword ptr [u64New + 4]
1185 mov edi, [pu64]
1186 mov eax, dword ptr [u64Old]
1187 mov edx, dword ptr [u64Old + 4]
1188 lock cmpxchg8b [edi]
1189 mov ebx, [pu64Old]
1190 mov [ebx], eax
1191 setz al
1192 movzx eax, al
1193 add ebx, 4
1194 mov [ebx], edx
1195 mov dword ptr [u32Ret], eax
1196 }
1197 return !!u32Ret;
1198# endif
1199# endif /* !RT_ARCH_AMD64 */
1200}
1201#endif
1202
1203
1204/**
1205 * Atomically Compare and exchange a signed 64-bit value, additionally
1206 * passing back old value, ordered.
1207 *
1208 * @returns true if xchg was done.
1209 * @returns false if xchg wasn't done.
1210 *
1211 * @param pi64 Pointer to the 64-bit variable to update.
1212 * @param i64 The 64-bit value to assign to *pu64.
1213 * @param i64Old The value to compare with.
1214 * @param pi64Old Pointer store the old value at.
1215 */
1216DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1217{
1218 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1219}
1220
1221/** @def ASMAtomicCmpXchgExHandle
1222 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1223 *
1224 * @param ph Pointer to the value to update.
1225 * @param hNew The new value to assigned to *pu.
1226 * @param hOld The old value to *pu compare with.
1227 * @param fRc Where to store the result.
1228 * @param phOldVal Pointer to where to store the old value.
1229 *
1230 * @remarks This doesn't currently work for all handles (like RTFILE).
1231 */
1232#if HC_ARCH_BITS == 32
1233# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1234 do { \
1235 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1236 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1237 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1238 } while (0)
1239#elif HC_ARCH_BITS == 64
1240# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1241 do { \
1242 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1243 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1244 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1245 } while (0)
1246#else
1247# error HC_ARCH_BITS
1248#endif
1249
1250
1251/** @def ASMAtomicCmpXchgExSize
1252 * Atomically Compare and Exchange a value which size might differ
1253 * between platforms or compilers. Additionally passes back old value.
1254 *
1255 * @param pu Pointer to the value to update.
1256 * @param uNew The new value to assigned to *pu.
1257 * @param uOld The old value to *pu compare with.
1258 * @param fRc Where to store the result.
1259 * @param puOldVal Pointer to where to store the old value.
1260 */
1261#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1262 do { \
1263 switch (sizeof(*(pu))) { \
1264 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1265 break; \
1266 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1267 break; \
1268 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1269 (fRc) = false; \
1270 (uOldVal) = 0; \
1271 break; \
1272 } \
1273 } while (0)
1274
1275
1276/**
1277 * Atomically Compare and Exchange a pointer value, additionally
1278 * passing back old value, ordered.
1279 *
1280 * @returns true if xchg was done.
1281 * @returns false if xchg wasn't done.
1282 *
1283 * @param ppv Pointer to the value to update.
1284 * @param pvNew The new value to assigned to *ppv.
1285 * @param pvOld The old value to *ppv compare with.
1286 * @param ppvOld Pointer store the old value at.
1287 */
1288DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1289{
1290#if ARCH_BITS == 32
1291 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1292#elif ARCH_BITS == 64
1293 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1294#else
1295# error "ARCH_BITS is bogus"
1296#endif
1297}
1298
1299
1300/**
1301 * Atomically Compare and Exchange a pointer value, additionally
1302 * passing back old value, ordered.
1303 *
1304 * @returns true if xchg was done.
1305 * @returns false if xchg wasn't done.
1306 *
1307 * @param ppv Pointer to the value to update.
1308 * @param pvNew The new value to assigned to *ppv.
1309 * @param pvOld The old value to *ppv compare with.
1310 * @param ppvOld Pointer store the old value at.
1311 *
1312 * @remarks This is relatively type safe on GCC platforms.
1313 */
1314#ifdef __GNUC__
1315# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1316 __extension__ \
1317 ({\
1318 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1319 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1320 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1321 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1322 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1323 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1324 (void **)ppvOldTypeChecked); \
1325 fMacroRet; \
1326 })
1327#else
1328# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1329 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1330#endif
1331
1332
1333/**
1334 * Serialize Instruction.
1335 */
1336#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1337DECLASM(void) ASMSerializeInstruction(void);
1338#else
1339DECLINLINE(void) ASMSerializeInstruction(void)
1340{
1341# if RT_INLINE_ASM_GNU_STYLE
1342 RTCCUINTREG xAX = 0;
1343# ifdef RT_ARCH_AMD64
1344 __asm__ ("cpuid"
1345 : "=a" (xAX)
1346 : "0" (xAX)
1347 : "rbx", "rcx", "rdx");
1348# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1349 __asm__ ("push %%ebx\n\t"
1350 "cpuid\n\t"
1351 "pop %%ebx\n\t"
1352 : "=a" (xAX)
1353 : "0" (xAX)
1354 : "ecx", "edx");
1355# else
1356 __asm__ ("cpuid"
1357 : "=a" (xAX)
1358 : "0" (xAX)
1359 : "ebx", "ecx", "edx");
1360# endif
1361
1362# elif RT_INLINE_ASM_USES_INTRIN
1363 int aInfo[4];
1364 __cpuid(aInfo, 0);
1365
1366# else
1367 __asm
1368 {
1369 push ebx
1370 xor eax, eax
1371 cpuid
1372 pop ebx
1373 }
1374# endif
1375}
1376#endif
1377
1378
1379/**
1380 * Memory fence, waits for any pending writes and reads to complete.
1381 */
1382DECLINLINE(void) ASMMemoryFence(void)
1383{
1384 /** @todo use mfence? check if all cpus we care for support it. */
1385 uint32_t volatile u32;
1386 ASMAtomicXchgU32(&u32, 0);
1387}
1388
1389
1390/**
1391 * Write fence, waits for any pending writes to complete.
1392 */
1393DECLINLINE(void) ASMWriteFence(void)
1394{
1395 /** @todo use sfence? check if all cpus we care for support it. */
1396 ASMMemoryFence();
1397}
1398
1399
1400/**
1401 * Read fence, waits for any pending reads to complete.
1402 */
1403DECLINLINE(void) ASMReadFence(void)
1404{
1405 /** @todo use lfence? check if all cpus we care for support it. */
1406 ASMMemoryFence();
1407}
1408
1409
1410/**
1411 * Atomically reads an unsigned 8-bit value, ordered.
1412 *
1413 * @returns Current *pu8 value
1414 * @param pu8 Pointer to the 8-bit variable to read.
1415 */
1416DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1417{
1418 ASMMemoryFence();
1419 return *pu8; /* byte reads are atomic on x86 */
1420}
1421
1422
1423/**
1424 * Atomically reads an unsigned 8-bit value, unordered.
1425 *
1426 * @returns Current *pu8 value
1427 * @param pu8 Pointer to the 8-bit variable to read.
1428 */
1429DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1430{
1431 return *pu8; /* byte reads are atomic on x86 */
1432}
1433
1434
1435/**
1436 * Atomically reads a signed 8-bit value, ordered.
1437 *
1438 * @returns Current *pi8 value
1439 * @param pi8 Pointer to the 8-bit variable to read.
1440 */
1441DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1442{
1443 ASMMemoryFence();
1444 return *pi8; /* byte reads are atomic on x86 */
1445}
1446
1447
1448/**
1449 * Atomically reads a signed 8-bit value, unordered.
1450 *
1451 * @returns Current *pi8 value
1452 * @param pi8 Pointer to the 8-bit variable to read.
1453 */
1454DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1455{
1456 return *pi8; /* byte reads are atomic on x86 */
1457}
1458
1459
1460/**
1461 * Atomically reads an unsigned 16-bit value, ordered.
1462 *
1463 * @returns Current *pu16 value
1464 * @param pu16 Pointer to the 16-bit variable to read.
1465 */
1466DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1467{
1468 ASMMemoryFence();
1469 Assert(!((uintptr_t)pu16 & 1));
1470 return *pu16;
1471}
1472
1473
1474/**
1475 * Atomically reads an unsigned 16-bit value, unordered.
1476 *
1477 * @returns Current *pu16 value
1478 * @param pu16 Pointer to the 16-bit variable to read.
1479 */
1480DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1481{
1482 Assert(!((uintptr_t)pu16 & 1));
1483 return *pu16;
1484}
1485
1486
1487/**
1488 * Atomically reads a signed 16-bit value, ordered.
1489 *
1490 * @returns Current *pi16 value
1491 * @param pi16 Pointer to the 16-bit variable to read.
1492 */
1493DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1494{
1495 ASMMemoryFence();
1496 Assert(!((uintptr_t)pi16 & 1));
1497 return *pi16;
1498}
1499
1500
1501/**
1502 * Atomically reads a signed 16-bit value, unordered.
1503 *
1504 * @returns Current *pi16 value
1505 * @param pi16 Pointer to the 16-bit variable to read.
1506 */
1507DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1508{
1509 Assert(!((uintptr_t)pi16 & 1));
1510 return *pi16;
1511}
1512
1513
1514/**
1515 * Atomically reads an unsigned 32-bit value, ordered.
1516 *
1517 * @returns Current *pu32 value
1518 * @param pu32 Pointer to the 32-bit variable to read.
1519 */
1520DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1521{
1522 ASMMemoryFence();
1523 Assert(!((uintptr_t)pu32 & 3));
1524 return *pu32;
1525}
1526
1527
1528/**
1529 * Atomically reads an unsigned 32-bit value, unordered.
1530 *
1531 * @returns Current *pu32 value
1532 * @param pu32 Pointer to the 32-bit variable to read.
1533 */
1534DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1535{
1536 Assert(!((uintptr_t)pu32 & 3));
1537 return *pu32;
1538}
1539
1540
1541/**
1542 * Atomically reads a signed 32-bit value, ordered.
1543 *
1544 * @returns Current *pi32 value
1545 * @param pi32 Pointer to the 32-bit variable to read.
1546 */
1547DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1548{
1549 ASMMemoryFence();
1550 Assert(!((uintptr_t)pi32 & 3));
1551 return *pi32;
1552}
1553
1554
1555/**
1556 * Atomically reads a signed 32-bit value, unordered.
1557 *
1558 * @returns Current *pi32 value
1559 * @param pi32 Pointer to the 32-bit variable to read.
1560 */
1561DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1562{
1563 Assert(!((uintptr_t)pi32 & 3));
1564 return *pi32;
1565}
1566
1567
1568/**
1569 * Atomically reads an unsigned 64-bit value, ordered.
1570 *
1571 * @returns Current *pu64 value
1572 * @param pu64 Pointer to the 64-bit variable to read.
1573 * The memory pointed to must be writable.
1574 * @remark This will fault if the memory is read-only!
1575 */
1576#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1577 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1578DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1579#else
1580DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1581{
1582 uint64_t u64;
1583# ifdef RT_ARCH_AMD64
1584 Assert(!((uintptr_t)pu64 & 7));
1585/*# if RT_INLINE_ASM_GNU_STYLE
1586 __asm__ __volatile__( "mfence\n\t"
1587 "movq %1, %0\n\t"
1588 : "=r" (u64)
1589 : "m" (*pu64));
1590# else
1591 __asm
1592 {
1593 mfence
1594 mov rdx, [pu64]
1595 mov rax, [rdx]
1596 mov [u64], rax
1597 }
1598# endif*/
1599 ASMMemoryFence();
1600 u64 = *pu64;
1601# else /* !RT_ARCH_AMD64 */
1602# if RT_INLINE_ASM_GNU_STYLE
1603# if defined(PIC) || defined(__PIC__)
1604 uint32_t u32EBX = 0;
1605 Assert(!((uintptr_t)pu64 & 7));
1606 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1607 "lock; cmpxchg8b (%5)\n\t"
1608 "movl %3, %%ebx\n\t"
1609 : "=A" (u64),
1610# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1611 "+m" (*pu64)
1612# else
1613 "=m" (*pu64)
1614# endif
1615 : "0" (0ULL),
1616 "m" (u32EBX),
1617 "c" (0),
1618 "S" (pu64));
1619# else /* !PIC */
1620 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1621 : "=A" (u64),
1622 "+m" (*pu64)
1623 : "0" (0ULL),
1624 "b" (0),
1625 "c" (0));
1626# endif
1627# else
1628 Assert(!((uintptr_t)pu64 & 7));
1629 __asm
1630 {
1631 xor eax, eax
1632 xor edx, edx
1633 mov edi, pu64
1634 xor ecx, ecx
1635 xor ebx, ebx
1636 lock cmpxchg8b [edi]
1637 mov dword ptr [u64], eax
1638 mov dword ptr [u64 + 4], edx
1639 }
1640# endif
1641# endif /* !RT_ARCH_AMD64 */
1642 return u64;
1643}
1644#endif
1645
1646
1647/**
1648 * Atomically reads an unsigned 64-bit value, unordered.
1649 *
1650 * @returns Current *pu64 value
1651 * @param pu64 Pointer to the 64-bit variable to read.
1652 * The memory pointed to must be writable.
1653 * @remark This will fault if the memory is read-only!
1654 */
1655#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1656 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1657DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1658#else
1659DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1660{
1661 uint64_t u64;
1662# ifdef RT_ARCH_AMD64
1663 Assert(!((uintptr_t)pu64 & 7));
1664/*# if RT_INLINE_ASM_GNU_STYLE
1665 Assert(!((uintptr_t)pu64 & 7));
1666 __asm__ __volatile__("movq %1, %0\n\t"
1667 : "=r" (u64)
1668 : "m" (*pu64));
1669# else
1670 __asm
1671 {
1672 mov rdx, [pu64]
1673 mov rax, [rdx]
1674 mov [u64], rax
1675 }
1676# endif */
1677 u64 = *pu64;
1678# else /* !RT_ARCH_AMD64 */
1679# if RT_INLINE_ASM_GNU_STYLE
1680# if defined(PIC) || defined(__PIC__)
1681 uint32_t u32EBX = 0;
1682 uint32_t u32Spill;
1683 Assert(!((uintptr_t)pu64 & 7));
1684 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1685 "xor %%ecx,%%ecx\n\t"
1686 "xor %%edx,%%edx\n\t"
1687 "xchgl %%ebx, %3\n\t"
1688 "lock; cmpxchg8b (%4)\n\t"
1689 "movl %3, %%ebx\n\t"
1690 : "=A" (u64),
1691# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1692 "+m" (*pu64),
1693# else
1694 "=m" (*pu64),
1695# endif
1696 "=c" (u32Spill)
1697 : "m" (u32EBX),
1698 "S" (pu64));
1699# else /* !PIC */
1700 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1701 : "=A" (u64),
1702 "+m" (*pu64)
1703 : "0" (0ULL),
1704 "b" (0),
1705 "c" (0));
1706# endif
1707# else
1708 Assert(!((uintptr_t)pu64 & 7));
1709 __asm
1710 {
1711 xor eax, eax
1712 xor edx, edx
1713 mov edi, pu64
1714 xor ecx, ecx
1715 xor ebx, ebx
1716 lock cmpxchg8b [edi]
1717 mov dword ptr [u64], eax
1718 mov dword ptr [u64 + 4], edx
1719 }
1720# endif
1721# endif /* !RT_ARCH_AMD64 */
1722 return u64;
1723}
1724#endif
1725
1726
1727/**
1728 * Atomically reads a signed 64-bit value, ordered.
1729 *
1730 * @returns Current *pi64 value
1731 * @param pi64 Pointer to the 64-bit variable to read.
1732 * The memory pointed to must be writable.
1733 * @remark This will fault if the memory is read-only!
1734 */
1735DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1736{
1737 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1738}
1739
1740
1741/**
1742 * Atomically reads a signed 64-bit value, unordered.
1743 *
1744 * @returns Current *pi64 value
1745 * @param pi64 Pointer to the 64-bit variable to read.
1746 * The memory pointed to must be writable.
1747 * @remark This will fault if the memory is read-only!
1748 */
1749DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1750{
1751 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1752}
1753
1754
1755/**
1756 * Atomically reads a size_t value, ordered.
1757 *
1758 * @returns Current *pcb value
1759 * @param pcb Pointer to the size_t variable to read.
1760 */
1761DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1762{
1763#if ARCH_BITS == 64
1764 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1765#elif ARCH_BITS == 32
1766 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1767#else
1768# error "Unsupported ARCH_BITS value"
1769#endif
1770}
1771
1772
1773/**
1774 * Atomically reads a size_t value, unordered.
1775 *
1776 * @returns Current *pcb value
1777 * @param pcb Pointer to the size_t variable to read.
1778 */
1779DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1780{
1781#if ARCH_BITS == 64
1782 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1783#elif ARCH_BITS == 32
1784 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1785#else
1786# error "Unsupported ARCH_BITS value"
1787#endif
1788}
1789
1790
1791/**
1792 * Atomically reads a pointer value, ordered.
1793 *
1794 * @returns Current *pv value
1795 * @param ppv Pointer to the pointer variable to read.
1796 *
1797 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1798 * requires less typing (no casts).
1799 */
1800DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1801{
1802#if ARCH_BITS == 32
1803 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1804#elif ARCH_BITS == 64
1805 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1806#else
1807# error "ARCH_BITS is bogus"
1808#endif
1809}
1810
1811/**
1812 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1813 *
1814 * @returns Current *pv value
1815 * @param ppv Pointer to the pointer variable to read.
1816 * @param Type The type of *ppv, sans volatile.
1817 */
1818#ifdef __GNUC__
1819# define ASMAtomicReadPtrT(ppv, Type) \
1820 __extension__ \
1821 ({\
1822 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1823 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1824 pvTypeChecked; \
1825 })
1826#else
1827# define ASMAtomicReadPtrT(ppv, Type) \
1828 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1829#endif
1830
1831
1832/**
1833 * Atomically reads a pointer value, unordered.
1834 *
1835 * @returns Current *pv value
1836 * @param ppv Pointer to the pointer variable to read.
1837 *
1838 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1839 * requires less typing (no casts).
1840 */
1841DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1842{
1843#if ARCH_BITS == 32
1844 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1845#elif ARCH_BITS == 64
1846 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1847#else
1848# error "ARCH_BITS is bogus"
1849#endif
1850}
1851
1852
1853/**
1854 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
1855 *
1856 * @returns Current *pv value
1857 * @param ppv Pointer to the pointer variable to read.
1858 * @param Type The type of *ppv, sans volatile.
1859 */
1860#ifdef __GNUC__
1861# define ASMAtomicUoReadPtrT(ppv, Type) \
1862 __extension__ \
1863 ({\
1864 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1865 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
1866 pvTypeChecked; \
1867 })
1868#else
1869# define ASMAtomicUoReadPtrT(ppv, Type) \
1870 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
1871#endif
1872
1873
1874/**
1875 * Atomically reads a boolean value, ordered.
1876 *
1877 * @returns Current *pf value
1878 * @param pf Pointer to the boolean variable to read.
1879 */
1880DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
1881{
1882 ASMMemoryFence();
1883 return *pf; /* byte reads are atomic on x86 */
1884}
1885
1886
1887/**
1888 * Atomically reads a boolean value, unordered.
1889 *
1890 * @returns Current *pf value
1891 * @param pf Pointer to the boolean variable to read.
1892 */
1893DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
1894{
1895 return *pf; /* byte reads are atomic on x86 */
1896}
1897
1898
1899/**
1900 * Atomically read a typical IPRT handle value, ordered.
1901 *
1902 * @param ph Pointer to the handle variable to read.
1903 * @param phRes Where to store the result.
1904 *
1905 * @remarks This doesn't currently work for all handles (like RTFILE).
1906 */
1907#if HC_ARCH_BITS == 32
1908# define ASMAtomicReadHandle(ph, phRes) \
1909 do { \
1910 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1911 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1912 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
1913 } while (0)
1914#elif HC_ARCH_BITS == 64
1915# define ASMAtomicReadHandle(ph, phRes) \
1916 do { \
1917 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1918 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1919 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
1920 } while (0)
1921#else
1922# error HC_ARCH_BITS
1923#endif
1924
1925
1926/**
1927 * Atomically read a typical IPRT handle value, unordered.
1928 *
1929 * @param ph Pointer to the handle variable to read.
1930 * @param phRes Where to store the result.
1931 *
1932 * @remarks This doesn't currently work for all handles (like RTFILE).
1933 */
1934#if HC_ARCH_BITS == 32
1935# define ASMAtomicUoReadHandle(ph, phRes) \
1936 do { \
1937 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1938 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1939 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
1940 } while (0)
1941#elif HC_ARCH_BITS == 64
1942# define ASMAtomicUoReadHandle(ph, phRes) \
1943 do { \
1944 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1945 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1946 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
1947 } while (0)
1948#else
1949# error HC_ARCH_BITS
1950#endif
1951
1952
1953/**
1954 * Atomically read a value which size might differ
1955 * between platforms or compilers, ordered.
1956 *
1957 * @param pu Pointer to the variable to update.
1958 * @param puRes Where to store the result.
1959 */
1960#define ASMAtomicReadSize(pu, puRes) \
1961 do { \
1962 switch (sizeof(*(pu))) { \
1963 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1964 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
1965 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
1966 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
1967 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1968 } \
1969 } while (0)
1970
1971
1972/**
1973 * Atomically read a value which size might differ
1974 * between platforms or compilers, unordered.
1975 *
1976 * @param pu Pointer to the variable to read.
1977 * @param puRes Where to store the result.
1978 */
1979#define ASMAtomicUoReadSize(pu, puRes) \
1980 do { \
1981 switch (sizeof(*(pu))) { \
1982 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1983 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
1984 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
1985 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
1986 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1987 } \
1988 } while (0)
1989
1990
1991/**
1992 * Atomically writes an unsigned 8-bit value, ordered.
1993 *
1994 * @param pu8 Pointer to the 8-bit variable.
1995 * @param u8 The 8-bit value to assign to *pu8.
1996 */
1997DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
1998{
1999 ASMAtomicXchgU8(pu8, u8);
2000}
2001
2002
2003/**
2004 * Atomically writes an unsigned 8-bit value, unordered.
2005 *
2006 * @param pu8 Pointer to the 8-bit variable.
2007 * @param u8 The 8-bit value to assign to *pu8.
2008 */
2009DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2010{
2011 *pu8 = u8; /* byte writes are atomic on x86 */
2012}
2013
2014
2015/**
2016 * Atomically writes a signed 8-bit value, ordered.
2017 *
2018 * @param pi8 Pointer to the 8-bit variable to read.
2019 * @param i8 The 8-bit value to assign to *pi8.
2020 */
2021DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2022{
2023 ASMAtomicXchgS8(pi8, i8);
2024}
2025
2026
2027/**
2028 * Atomically writes a signed 8-bit value, unordered.
2029 *
2030 * @param pi8 Pointer to the 8-bit variable to read.
2031 * @param i8 The 8-bit value to assign to *pi8.
2032 */
2033DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2034{
2035 *pi8 = i8; /* byte writes are atomic on x86 */
2036}
2037
2038
2039/**
2040 * Atomically writes an unsigned 16-bit value, ordered.
2041 *
2042 * @param pu16 Pointer to the 16-bit variable.
2043 * @param u16 The 16-bit value to assign to *pu16.
2044 */
2045DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2046{
2047 ASMAtomicXchgU16(pu16, u16);
2048}
2049
2050
2051/**
2052 * Atomically writes an unsigned 16-bit value, unordered.
2053 *
2054 * @param pu16 Pointer to the 16-bit variable.
2055 * @param u16 The 16-bit value to assign to *pu16.
2056 */
2057DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2058{
2059 Assert(!((uintptr_t)pu16 & 1));
2060 *pu16 = u16;
2061}
2062
2063
2064/**
2065 * Atomically writes a signed 16-bit value, ordered.
2066 *
2067 * @param pi16 Pointer to the 16-bit variable to read.
2068 * @param i16 The 16-bit value to assign to *pi16.
2069 */
2070DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2071{
2072 ASMAtomicXchgS16(pi16, i16);
2073}
2074
2075
2076/**
2077 * Atomically writes a signed 16-bit value, unordered.
2078 *
2079 * @param pi16 Pointer to the 16-bit variable to read.
2080 * @param i16 The 16-bit value to assign to *pi16.
2081 */
2082DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2083{
2084 Assert(!((uintptr_t)pi16 & 1));
2085 *pi16 = i16;
2086}
2087
2088
2089/**
2090 * Atomically writes an unsigned 32-bit value, ordered.
2091 *
2092 * @param pu32 Pointer to the 32-bit variable.
2093 * @param u32 The 32-bit value to assign to *pu32.
2094 */
2095DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2096{
2097 ASMAtomicXchgU32(pu32, u32);
2098}
2099
2100
2101/**
2102 * Atomically writes an unsigned 32-bit value, unordered.
2103 *
2104 * @param pu32 Pointer to the 32-bit variable.
2105 * @param u32 The 32-bit value to assign to *pu32.
2106 */
2107DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2108{
2109 Assert(!((uintptr_t)pu32 & 3));
2110 *pu32 = u32;
2111}
2112
2113
2114/**
2115 * Atomically writes a signed 32-bit value, ordered.
2116 *
2117 * @param pi32 Pointer to the 32-bit variable to read.
2118 * @param i32 The 32-bit value to assign to *pi32.
2119 */
2120DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2121{
2122 ASMAtomicXchgS32(pi32, i32);
2123}
2124
2125
2126/**
2127 * Atomically writes a signed 32-bit value, unordered.
2128 *
2129 * @param pi32 Pointer to the 32-bit variable to read.
2130 * @param i32 The 32-bit value to assign to *pi32.
2131 */
2132DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2133{
2134 Assert(!((uintptr_t)pi32 & 3));
2135 *pi32 = i32;
2136}
2137
2138
2139/**
2140 * Atomically writes an unsigned 64-bit value, ordered.
2141 *
2142 * @param pu64 Pointer to the 64-bit variable.
2143 * @param u64 The 64-bit value to assign to *pu64.
2144 */
2145DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2146{
2147 ASMAtomicXchgU64(pu64, u64);
2148}
2149
2150
2151/**
2152 * Atomically writes an unsigned 64-bit value, unordered.
2153 *
2154 * @param pu64 Pointer to the 64-bit variable.
2155 * @param u64 The 64-bit value to assign to *pu64.
2156 */
2157DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2158{
2159 Assert(!((uintptr_t)pu64 & 7));
2160#if ARCH_BITS == 64
2161 *pu64 = u64;
2162#else
2163 ASMAtomicXchgU64(pu64, u64);
2164#endif
2165}
2166
2167
2168/**
2169 * Atomically writes a signed 64-bit value, ordered.
2170 *
2171 * @param pi64 Pointer to the 64-bit variable.
2172 * @param i64 The 64-bit value to assign to *pi64.
2173 */
2174DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2175{
2176 ASMAtomicXchgS64(pi64, i64);
2177}
2178
2179
2180/**
2181 * Atomically writes a signed 64-bit value, unordered.
2182 *
2183 * @param pi64 Pointer to the 64-bit variable.
2184 * @param i64 The 64-bit value to assign to *pi64.
2185 */
2186DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2187{
2188 Assert(!((uintptr_t)pi64 & 7));
2189#if ARCH_BITS == 64
2190 *pi64 = i64;
2191#else
2192 ASMAtomicXchgS64(pi64, i64);
2193#endif
2194}
2195
2196
2197/**
2198 * Atomically writes a boolean value, unordered.
2199 *
2200 * @param pf Pointer to the boolean variable.
2201 * @param f The boolean value to assign to *pf.
2202 */
2203DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2204{
2205 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2206}
2207
2208
2209/**
2210 * Atomically writes a boolean value, unordered.
2211 *
2212 * @param pf Pointer to the boolean variable.
2213 * @param f The boolean value to assign to *pf.
2214 */
2215DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2216{
2217 *pf = f; /* byte writes are atomic on x86 */
2218}
2219
2220
2221/**
2222 * Atomically writes a pointer value, ordered.
2223 *
2224 * @param ppv Pointer to the pointer variable.
2225 * @param pv The pointer value to assign to *ppv.
2226 */
2227DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2228{
2229#if ARCH_BITS == 32
2230 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2231#elif ARCH_BITS == 64
2232 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2233#else
2234# error "ARCH_BITS is bogus"
2235#endif
2236}
2237
2238
2239/**
2240 * Atomically writes a pointer value, ordered.
2241 *
2242 * @param ppv Pointer to the pointer variable.
2243 * @param pv The pointer value to assign to *ppv. If NULL use
2244 * ASMAtomicWriteNullPtr or you'll land in trouble.
2245 *
2246 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2247 * NULL.
2248 */
2249#ifdef __GNUC__
2250# define ASMAtomicWritePtr(ppv, pv) \
2251 do \
2252 { \
2253 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2254 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2255 \
2256 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2257 AssertCompile(sizeof(pv) == sizeof(void *)); \
2258 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2259 \
2260 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2261 } while (0)
2262#else
2263# define ASMAtomicWritePtr(ppv, pv) \
2264 do \
2265 { \
2266 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2267 AssertCompile(sizeof(pv) == sizeof(void *)); \
2268 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2269 \
2270 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2271 } while (0)
2272#endif
2273
2274
2275/**
2276 * Atomically sets a pointer to NULL, ordered.
2277 *
2278 * @param ppv Pointer to the pointer variable that should be set to NULL.
2279 *
2280 * @remarks This is relatively type safe on GCC platforms.
2281 */
2282#ifdef __GNUC__
2283# define ASMAtomicWriteNullPtr(ppv) \
2284 do \
2285 { \
2286 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2287 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2288 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2289 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2290 } while (0)
2291#else
2292# define ASMAtomicWriteNullPtr(ppv) \
2293 do \
2294 { \
2295 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2296 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2297 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2298 } while (0)
2299#endif
2300
2301
2302/**
2303 * Atomically writes a pointer value, unordered.
2304 *
2305 * @returns Current *pv value
2306 * @param ppv Pointer to the pointer variable.
2307 * @param pv The pointer value to assign to *ppv. If NULL use
2308 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2309 *
2310 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2311 * NULL.
2312 */
2313#ifdef __GNUC__
2314# define ASMAtomicUoWritePtr(ppv, pv) \
2315 do \
2316 { \
2317 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2318 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2319 \
2320 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2321 AssertCompile(sizeof(pv) == sizeof(void *)); \
2322 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2323 \
2324 *(ppvTypeChecked) = pvTypeChecked; \
2325 } while (0)
2326#else
2327# define ASMAtomicUoWritePtr(ppv, pv) \
2328 do \
2329 { \
2330 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2331 AssertCompile(sizeof(pv) == sizeof(void *)); \
2332 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2333 *(ppv) = pv; \
2334 } while (0)
2335#endif
2336
2337
2338/**
2339 * Atomically sets a pointer to NULL, unordered.
2340 *
2341 * @param ppv Pointer to the pointer variable that should be set to NULL.
2342 *
2343 * @remarks This is relatively type safe on GCC platforms.
2344 */
2345#ifdef __GNUC__
2346# define ASMAtomicUoWriteNullPtr(ppv) \
2347 do \
2348 { \
2349 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2350 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2351 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2352 *(ppvTypeChecked) = NULL; \
2353 } while (0)
2354#else
2355# define ASMAtomicUoWriteNullPtr(ppv) \
2356 do \
2357 { \
2358 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2359 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2360 *(ppv) = NULL; \
2361 } while (0)
2362#endif
2363
2364
2365/**
2366 * Atomically write a typical IPRT handle value, ordered.
2367 *
2368 * @param ph Pointer to the variable to update.
2369 * @param hNew The value to assign to *ph.
2370 *
2371 * @remarks This doesn't currently work for all handles (like RTFILE).
2372 */
2373#if HC_ARCH_BITS == 32
2374# define ASMAtomicWriteHandle(ph, hNew) \
2375 do { \
2376 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2377 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2378 } while (0)
2379#elif HC_ARCH_BITS == 64
2380# define ASMAtomicWriteHandle(ph, hNew) \
2381 do { \
2382 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2383 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2384 } while (0)
2385#else
2386# error HC_ARCH_BITS
2387#endif
2388
2389
2390/**
2391 * Atomically write a typical IPRT handle value, unordered.
2392 *
2393 * @param ph Pointer to the variable to update.
2394 * @param hNew The value to assign to *ph.
2395 *
2396 * @remarks This doesn't currently work for all handles (like RTFILE).
2397 */
2398#if HC_ARCH_BITS == 32
2399# define ASMAtomicUoWriteHandle(ph, hNew) \
2400 do { \
2401 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2402 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2403 } while (0)
2404#elif HC_ARCH_BITS == 64
2405# define ASMAtomicUoWriteHandle(ph, hNew) \
2406 do { \
2407 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2408 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2409 } while (0)
2410#else
2411# error HC_ARCH_BITS
2412#endif
2413
2414
2415/**
2416 * Atomically write a value which size might differ
2417 * between platforms or compilers, ordered.
2418 *
2419 * @param pu Pointer to the variable to update.
2420 * @param uNew The value to assign to *pu.
2421 */
2422#define ASMAtomicWriteSize(pu, uNew) \
2423 do { \
2424 switch (sizeof(*(pu))) { \
2425 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2426 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2427 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2428 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2429 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2430 } \
2431 } while (0)
2432
2433/**
2434 * Atomically write a value which size might differ
2435 * between platforms or compilers, unordered.
2436 *
2437 * @param pu Pointer to the variable to update.
2438 * @param uNew The value to assign to *pu.
2439 */
2440#define ASMAtomicUoWriteSize(pu, uNew) \
2441 do { \
2442 switch (sizeof(*(pu))) { \
2443 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2444 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2445 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2446 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2447 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2448 } \
2449 } while (0)
2450
2451
2452
2453/**
2454 * Atomically exchanges and adds to a 32-bit value, ordered.
2455 *
2456 * @returns The old value.
2457 * @param pu32 Pointer to the value.
2458 * @param u32 Number to add.
2459 */
2460#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2461DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2462#else
2463DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2464{
2465# if RT_INLINE_ASM_USES_INTRIN
2466 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2467 return u32;
2468
2469# elif RT_INLINE_ASM_GNU_STYLE
2470 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2471 : "=r" (u32),
2472 "=m" (*pu32)
2473 : "0" (u32),
2474 "m" (*pu32)
2475 : "memory");
2476 return u32;
2477# else
2478 __asm
2479 {
2480 mov eax, [u32]
2481# ifdef RT_ARCH_AMD64
2482 mov rdx, [pu32]
2483 lock xadd [rdx], eax
2484# else
2485 mov edx, [pu32]
2486 lock xadd [edx], eax
2487# endif
2488 mov [u32], eax
2489 }
2490 return u32;
2491# endif
2492}
2493#endif
2494
2495
2496/**
2497 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2498 *
2499 * @returns The old value.
2500 * @param pi32 Pointer to the value.
2501 * @param i32 Number to add.
2502 */
2503DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2504{
2505 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2506}
2507
2508
2509/**
2510 * Atomically exchanges and adds to a 64-bit value, ordered.
2511 *
2512 * @returns The old value.
2513 * @param pu64 Pointer to the value.
2514 * @param u64 Number to add.
2515 */
2516#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2517DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2518#else
2519DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2520{
2521# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2522 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2523 return u64;
2524
2525# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2526 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2527 : "=r" (u64),
2528 "=m" (*pu64)
2529 : "0" (u64),
2530 "m" (*pu64)
2531 : "memory");
2532 return u64;
2533# else
2534 uint64_t u64Old;
2535 for (;;)
2536 {
2537 uint64_t u64New;
2538 u64Old = ASMAtomicUoReadU64(pu64);
2539 u64New = u64Old + u64;
2540 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2541 break;
2542 ASMNopPause();
2543 }
2544 return u64Old;
2545# endif
2546}
2547#endif
2548
2549
2550/**
2551 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2552 *
2553 * @returns The old value.
2554 * @param pi64 Pointer to the value.
2555 * @param i64 Number to add.
2556 */
2557DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2558{
2559 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2560}
2561
2562
2563/**
2564 * Atomically exchanges and adds to a size_t value, ordered.
2565 *
2566 * @returns The old value.
2567 * @param pcb Pointer to the size_t value.
2568 * @param cb Number to add.
2569 */
2570DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2571{
2572#if ARCH_BITS == 64
2573 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2574#elif ARCH_BITS == 32
2575 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2576#else
2577# error "Unsupported ARCH_BITS value"
2578#endif
2579}
2580
2581
2582/**
2583 * Atomically exchanges and adds a value which size might differ between
2584 * platforms or compilers, ordered.
2585 *
2586 * @param pu Pointer to the variable to update.
2587 * @param uNew The value to add to *pu.
2588 * @param puOld Where to store the old value.
2589 */
2590#define ASMAtomicAddSize(pu, uNew, puOld) \
2591 do { \
2592 switch (sizeof(*(pu))) { \
2593 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2594 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2595 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2596 } \
2597 } while (0)
2598
2599
2600/**
2601 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2602 *
2603 * @returns The old value.
2604 * @param pu32 Pointer to the value.
2605 * @param u32 Number to subtract.
2606 */
2607DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2608{
2609 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2610}
2611
2612
2613/**
2614 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2615 *
2616 * @returns The old value.
2617 * @param pi32 Pointer to the value.
2618 * @param i32 Number to subtract.
2619 */
2620DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2621{
2622 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2623}
2624
2625
2626/**
2627 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2628 *
2629 * @returns The old value.
2630 * @param pu64 Pointer to the value.
2631 * @param u64 Number to subtract.
2632 */
2633DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2634{
2635 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2636}
2637
2638
2639/**
2640 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2641 *
2642 * @returns The old value.
2643 * @param pi64 Pointer to the value.
2644 * @param i64 Number to subtract.
2645 */
2646DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2647{
2648 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2649}
2650
2651
2652/**
2653 * Atomically exchanges and subtracts to a size_t value, ordered.
2654 *
2655 * @returns The old value.
2656 * @param pcb Pointer to the size_t value.
2657 * @param cb Number to subtract.
2658 */
2659DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2660{
2661#if ARCH_BITS == 64
2662 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2663#elif ARCH_BITS == 32
2664 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2665#else
2666# error "Unsupported ARCH_BITS value"
2667#endif
2668}
2669
2670
2671/**
2672 * Atomically exchanges and subtracts a value which size might differ between
2673 * platforms or compilers, ordered.
2674 *
2675 * @param pu Pointer to the variable to update.
2676 * @param uNew The value to subtract to *pu.
2677 * @param puOld Where to store the old value.
2678 */
2679#define ASMAtomicSubSize(pu, uNew, puOld) \
2680 do { \
2681 switch (sizeof(*(pu))) { \
2682 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2683 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2684 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2685 } \
2686 } while (0)
2687
2688
2689/**
2690 * Atomically increment a 32-bit value, ordered.
2691 *
2692 * @returns The new value.
2693 * @param pu32 Pointer to the value to increment.
2694 */
2695#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2696DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2697#else
2698DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2699{
2700 uint32_t u32;
2701# if RT_INLINE_ASM_USES_INTRIN
2702 u32 = _InterlockedIncrement((long *)pu32);
2703 return u32;
2704
2705# elif RT_INLINE_ASM_GNU_STYLE
2706 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2707 : "=r" (u32),
2708 "=m" (*pu32)
2709 : "0" (1),
2710 "m" (*pu32)
2711 : "memory");
2712 return u32+1;
2713# else
2714 __asm
2715 {
2716 mov eax, 1
2717# ifdef RT_ARCH_AMD64
2718 mov rdx, [pu32]
2719 lock xadd [rdx], eax
2720# else
2721 mov edx, [pu32]
2722 lock xadd [edx], eax
2723# endif
2724 mov u32, eax
2725 }
2726 return u32+1;
2727# endif
2728}
2729#endif
2730
2731
2732/**
2733 * Atomically increment a signed 32-bit value, ordered.
2734 *
2735 * @returns The new value.
2736 * @param pi32 Pointer to the value to increment.
2737 */
2738DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2739{
2740 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2741}
2742
2743
2744/**
2745 * Atomically increment a 64-bit value, ordered.
2746 *
2747 * @returns The new value.
2748 * @param pu64 Pointer to the value to increment.
2749 */
2750#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2751DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2752#else
2753DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2754{
2755# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2756 uint64_t u64;
2757 u64 = _InterlockedIncrement64((__int64 *)pu64);
2758 return u64;
2759
2760# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2761 uint64_t u64;
2762 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2763 : "=r" (u64),
2764 "=m" (*pu64)
2765 : "0" (1),
2766 "m" (*pu64)
2767 : "memory");
2768 return u64 + 1;
2769# else
2770 return ASMAtomicAddU64(pu64, 1) + 1;
2771# endif
2772}
2773#endif
2774
2775
2776/**
2777 * Atomically increment a signed 64-bit value, ordered.
2778 *
2779 * @returns The new value.
2780 * @param pi64 Pointer to the value to increment.
2781 */
2782DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
2783{
2784 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
2785}
2786
2787
2788/**
2789 * Atomically increment a size_t value, ordered.
2790 *
2791 * @returns The new value.
2792 * @param pcb Pointer to the value to increment.
2793 */
2794DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
2795{
2796#if ARCH_BITS == 64
2797 return ASMAtomicIncU64((uint64_t volatile *)pcb);
2798#elif ARCH_BITS == 32
2799 return ASMAtomicIncU32((uint32_t volatile *)pcb);
2800#else
2801# error "Unsupported ARCH_BITS value"
2802#endif
2803}
2804
2805
2806/**
2807 * Atomically decrement an unsigned 32-bit value, ordered.
2808 *
2809 * @returns The new value.
2810 * @param pu32 Pointer to the value to decrement.
2811 */
2812#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2813DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2814#else
2815DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2816{
2817 uint32_t u32;
2818# if RT_INLINE_ASM_USES_INTRIN
2819 u32 = _InterlockedDecrement((long *)pu32);
2820 return u32;
2821
2822# elif RT_INLINE_ASM_GNU_STYLE
2823 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2824 : "=r" (u32),
2825 "=m" (*pu32)
2826 : "0" (-1),
2827 "m" (*pu32)
2828 : "memory");
2829 return u32-1;
2830# else
2831 __asm
2832 {
2833 mov eax, -1
2834# ifdef RT_ARCH_AMD64
2835 mov rdx, [pu32]
2836 lock xadd [rdx], eax
2837# else
2838 mov edx, [pu32]
2839 lock xadd [edx], eax
2840# endif
2841 mov u32, eax
2842 }
2843 return u32-1;
2844# endif
2845}
2846#endif
2847
2848
2849/**
2850 * Atomically decrement a signed 32-bit value, ordered.
2851 *
2852 * @returns The new value.
2853 * @param pi32 Pointer to the value to decrement.
2854 */
2855DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2856{
2857 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2858}
2859
2860
2861/**
2862 * Atomically decrement an unsigned 64-bit value, ordered.
2863 *
2864 * @returns The new value.
2865 * @param pu64 Pointer to the value to decrement.
2866 */
2867#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2868DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
2869#else
2870DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
2871{
2872# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2873 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
2874 return u64;
2875
2876# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2877 uint64_t u64;
2878 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
2879 : "=r" (u64),
2880 "=m" (*pu64)
2881 : "0" (~(uint64_t)0),
2882 "m" (*pu64)
2883 : "memory");
2884 return u64-1;
2885# else
2886 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
2887# endif
2888}
2889#endif
2890
2891
2892/**
2893 * Atomically decrement a signed 64-bit value, ordered.
2894 *
2895 * @returns The new value.
2896 * @param pi64 Pointer to the value to decrement.
2897 */
2898DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
2899{
2900 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
2901}
2902
2903
2904/**
2905 * Atomically decrement a size_t value, ordered.
2906 *
2907 * @returns The new value.
2908 * @param pcb Pointer to the value to decrement.
2909 */
2910DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
2911{
2912#if ARCH_BITS == 64
2913 return ASMAtomicDecU64((uint64_t volatile *)pcb);
2914#elif ARCH_BITS == 32
2915 return ASMAtomicDecU32((uint32_t volatile *)pcb);
2916#else
2917# error "Unsupported ARCH_BITS value"
2918#endif
2919}
2920
2921
2922/**
2923 * Atomically Or an unsigned 32-bit value, ordered.
2924 *
2925 * @param pu32 Pointer to the pointer variable to OR u32 with.
2926 * @param u32 The value to OR *pu32 with.
2927 */
2928#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2929DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2930#else
2931DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2932{
2933# if RT_INLINE_ASM_USES_INTRIN
2934 _InterlockedOr((long volatile *)pu32, (long)u32);
2935
2936# elif RT_INLINE_ASM_GNU_STYLE
2937 __asm__ __volatile__("lock; orl %1, %0\n\t"
2938 : "=m" (*pu32)
2939 : "ir" (u32),
2940 "m" (*pu32));
2941# else
2942 __asm
2943 {
2944 mov eax, [u32]
2945# ifdef RT_ARCH_AMD64
2946 mov rdx, [pu32]
2947 lock or [rdx], eax
2948# else
2949 mov edx, [pu32]
2950 lock or [edx], eax
2951# endif
2952 }
2953# endif
2954}
2955#endif
2956
2957
2958/**
2959 * Atomically Or a signed 32-bit value, ordered.
2960 *
2961 * @param pi32 Pointer to the pointer variable to OR u32 with.
2962 * @param i32 The value to OR *pu32 with.
2963 */
2964DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2965{
2966 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2967}
2968
2969
2970/**
2971 * Atomically Or an unsigned 64-bit value, ordered.
2972 *
2973 * @param pu64 Pointer to the pointer variable to OR u64 with.
2974 * @param u64 The value to OR *pu64 with.
2975 */
2976#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2977DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
2978#else
2979DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
2980{
2981# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2982 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
2983
2984# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2985 __asm__ __volatile__("lock; orq %1, %q0\n\t"
2986 : "=m" (*pu64)
2987 : "r" (u64),
2988 "m" (*pu64));
2989# else
2990 for (;;)
2991 {
2992 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
2993 uint64_t u64New = u64Old | u64;
2994 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2995 break;
2996 ASMNopPause();
2997 }
2998# endif
2999}
3000#endif
3001
3002
3003/**
3004 * Atomically Or a signed 64-bit value, ordered.
3005 *
3006 * @param pi64 Pointer to the pointer variable to OR u64 with.
3007 * @param i64 The value to OR *pu64 with.
3008 */
3009DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3010{
3011 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3012}
3013/**
3014 * Atomically And an unsigned 32-bit value, ordered.
3015 *
3016 * @param pu32 Pointer to the pointer variable to AND u32 with.
3017 * @param u32 The value to AND *pu32 with.
3018 */
3019#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3020DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3021#else
3022DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3023{
3024# if RT_INLINE_ASM_USES_INTRIN
3025 _InterlockedAnd((long volatile *)pu32, u32);
3026
3027# elif RT_INLINE_ASM_GNU_STYLE
3028 __asm__ __volatile__("lock; andl %1, %0\n\t"
3029 : "=m" (*pu32)
3030 : "ir" (u32),
3031 "m" (*pu32));
3032# else
3033 __asm
3034 {
3035 mov eax, [u32]
3036# ifdef RT_ARCH_AMD64
3037 mov rdx, [pu32]
3038 lock and [rdx], eax
3039# else
3040 mov edx, [pu32]
3041 lock and [edx], eax
3042# endif
3043 }
3044# endif
3045}
3046#endif
3047
3048
3049/**
3050 * Atomically And a signed 32-bit value, ordered.
3051 *
3052 * @param pi32 Pointer to the pointer variable to AND i32 with.
3053 * @param i32 The value to AND *pi32 with.
3054 */
3055DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3056{
3057 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3058}
3059
3060
3061/**
3062 * Atomically And an unsigned 64-bit value, ordered.
3063 *
3064 * @param pu64 Pointer to the pointer variable to AND u64 with.
3065 * @param u64 The value to AND *pu64 with.
3066 */
3067#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3068DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3069#else
3070DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3071{
3072# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3073 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3074
3075# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3076 __asm__ __volatile__("lock; andq %1, %0\n\t"
3077 : "=m" (*pu64)
3078 : "r" (u64),
3079 "m" (*pu64));
3080# else
3081 for (;;)
3082 {
3083 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3084 uint64_t u64New = u64Old & u64;
3085 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3086 break;
3087 ASMNopPause();
3088 }
3089# endif
3090}
3091#endif
3092
3093
3094/**
3095 * Atomically And a signed 64-bit value, ordered.
3096 *
3097 * @param pi64 Pointer to the pointer variable to AND i64 with.
3098 * @param i64 The value to AND *pi64 with.
3099 */
3100DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3101{
3102 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3103}
3104
3105
3106
3107/** @def RT_ASM_PAGE_SIZE
3108 * We try avoid dragging in iprt/param.h here.
3109 * @internal
3110 */
3111#if defined(RT_ARCH_SPARC64)
3112# define RT_ASM_PAGE_SIZE 0x2000
3113# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3114# if PAGE_SIZE != 0x2000
3115# error "PAGE_SIZE is not 0x2000!"
3116# endif
3117# endif
3118#else
3119# define RT_ASM_PAGE_SIZE 0x1000
3120# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3121# if PAGE_SIZE != 0x1000
3122# error "PAGE_SIZE is not 0x1000!"
3123# endif
3124# endif
3125#endif
3126
3127/**
3128 * Zeros a 4K memory page.
3129 *
3130 * @param pv Pointer to the memory block. This must be page aligned.
3131 */
3132#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3133DECLASM(void) ASMMemZeroPage(volatile void *pv);
3134# else
3135DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3136{
3137# if RT_INLINE_ASM_USES_INTRIN
3138# ifdef RT_ARCH_AMD64
3139 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3140# else
3141 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3142# endif
3143
3144# elif RT_INLINE_ASM_GNU_STYLE
3145 RTCCUINTREG uDummy;
3146# ifdef RT_ARCH_AMD64
3147 __asm__ __volatile__("rep stosq"
3148 : "=D" (pv),
3149 "=c" (uDummy)
3150 : "0" (pv),
3151 "c" (RT_ASM_PAGE_SIZE >> 3),
3152 "a" (0)
3153 : "memory");
3154# else
3155 __asm__ __volatile__("rep stosl"
3156 : "=D" (pv),
3157 "=c" (uDummy)
3158 : "0" (pv),
3159 "c" (RT_ASM_PAGE_SIZE >> 2),
3160 "a" (0)
3161 : "memory");
3162# endif
3163# else
3164 __asm
3165 {
3166# ifdef RT_ARCH_AMD64
3167 xor rax, rax
3168 mov ecx, 0200h
3169 mov rdi, [pv]
3170 rep stosq
3171# else
3172 xor eax, eax
3173 mov ecx, 0400h
3174 mov edi, [pv]
3175 rep stosd
3176# endif
3177 }
3178# endif
3179}
3180# endif
3181
3182
3183/**
3184 * Zeros a memory block with a 32-bit aligned size.
3185 *
3186 * @param pv Pointer to the memory block.
3187 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3188 */
3189#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3190DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3191#else
3192DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3193{
3194# if RT_INLINE_ASM_USES_INTRIN
3195# ifdef RT_ARCH_AMD64
3196 if (!(cb & 7))
3197 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3198 else
3199# endif
3200 __stosd((unsigned long *)pv, 0, cb / 4);
3201
3202# elif RT_INLINE_ASM_GNU_STYLE
3203 __asm__ __volatile__("rep stosl"
3204 : "=D" (pv),
3205 "=c" (cb)
3206 : "0" (pv),
3207 "1" (cb >> 2),
3208 "a" (0)
3209 : "memory");
3210# else
3211 __asm
3212 {
3213 xor eax, eax
3214# ifdef RT_ARCH_AMD64
3215 mov rcx, [cb]
3216 shr rcx, 2
3217 mov rdi, [pv]
3218# else
3219 mov ecx, [cb]
3220 shr ecx, 2
3221 mov edi, [pv]
3222# endif
3223 rep stosd
3224 }
3225# endif
3226}
3227#endif
3228
3229
3230/**
3231 * Fills a memory block with a 32-bit aligned size.
3232 *
3233 * @param pv Pointer to the memory block.
3234 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3235 * @param u32 The value to fill with.
3236 */
3237#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3238DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3239#else
3240DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3241{
3242# if RT_INLINE_ASM_USES_INTRIN
3243# ifdef RT_ARCH_AMD64
3244 if (!(cb & 7))
3245 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3246 else
3247# endif
3248 __stosd((unsigned long *)pv, u32, cb / 4);
3249
3250# elif RT_INLINE_ASM_GNU_STYLE
3251 __asm__ __volatile__("rep stosl"
3252 : "=D" (pv),
3253 "=c" (cb)
3254 : "0" (pv),
3255 "1" (cb >> 2),
3256 "a" (u32)
3257 : "memory");
3258# else
3259 __asm
3260 {
3261# ifdef RT_ARCH_AMD64
3262 mov rcx, [cb]
3263 shr rcx, 2
3264 mov rdi, [pv]
3265# else
3266 mov ecx, [cb]
3267 shr ecx, 2
3268 mov edi, [pv]
3269# endif
3270 mov eax, [u32]
3271 rep stosd
3272 }
3273# endif
3274}
3275#endif
3276
3277
3278/**
3279 * Checks if a memory page is all zeros.
3280 *
3281 * @returns true / false.
3282 *
3283 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3284 * boundary
3285 */
3286DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3287{
3288# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3289 union { RTCCUINTREG r; bool f; } uAX;
3290 RTCCUINTREG xCX, xDI;
3291 Assert(!((uintptr_t)pvPage & 15));
3292 __asm__ __volatile__("repe; "
3293# ifdef RT_ARCH_AMD64
3294 "scasq\n\t"
3295# else
3296 "scasl\n\t"
3297# endif
3298 "setnc %%al\n\t"
3299 : "=&c" (xCX),
3300 "=&D" (xDI),
3301 "=&a" (uAX.r)
3302 : "mr" (pvPage),
3303# ifdef RT_ARCH_AMD64
3304 "0" (RT_ASM_PAGE_SIZE/8),
3305# else
3306 "0" (RT_ASM_PAGE_SIZE/4),
3307# endif
3308 "1" (pvPage),
3309 "2" (0));
3310 return uAX.f;
3311# else
3312 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3313 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3314 Assert(!((uintptr_t)pvPage & 15));
3315 for (;;)
3316 {
3317 if (puPtr[0]) return false;
3318 if (puPtr[4]) return false;
3319
3320 if (puPtr[2]) return false;
3321 if (puPtr[6]) return false;
3322
3323 if (puPtr[1]) return false;
3324 if (puPtr[5]) return false;
3325
3326 if (puPtr[3]) return false;
3327 if (puPtr[7]) return false;
3328
3329 if (!--cLeft)
3330 return true;
3331 puPtr += 8;
3332 }
3333 return true;
3334# endif
3335}
3336
3337
3338/**
3339 * Checks if a memory block is filled with the specified byte.
3340 *
3341 * This is a sort of inverted memchr.
3342 *
3343 * @returns Pointer to the byte which doesn't equal u8.
3344 * @returns NULL if all equal to u8.
3345 *
3346 * @param pv Pointer to the memory block.
3347 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3348 * @param u8 The value it's supposed to be filled with.
3349 *
3350 * @todo Fix name, it is a predicate function but it's not returning boolean!
3351 */
3352DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3353{
3354/** @todo rewrite this in inline assembly? */
3355 uint8_t const *pb = (uint8_t const *)pv;
3356 for (; cb; cb--, pb++)
3357 if (RT_UNLIKELY(*pb != u8))
3358 return (void *)pb;
3359 return NULL;
3360}
3361
3362
3363/**
3364 * Checks if a memory block is filled with the specified 32-bit value.
3365 *
3366 * This is a sort of inverted memchr.
3367 *
3368 * @returns Pointer to the first value which doesn't equal u32.
3369 * @returns NULL if all equal to u32.
3370 *
3371 * @param pv Pointer to the memory block.
3372 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3373 * @param u32 The value it's supposed to be filled with.
3374 *
3375 * @todo Fix name, it is a predicate function but it's not returning boolean!
3376 */
3377DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3378{
3379/** @todo rewrite this in inline assembly? */
3380 uint32_t const *pu32 = (uint32_t const *)pv;
3381 for (; cb; cb -= 4, pu32++)
3382 if (RT_UNLIKELY(*pu32 != u32))
3383 return (uint32_t *)pu32;
3384 return NULL;
3385}
3386
3387
3388/**
3389 * Probes a byte pointer for read access.
3390 *
3391 * While the function will not fault if the byte is not read accessible,
3392 * the idea is to do this in a safe place like before acquiring locks
3393 * and such like.
3394 *
3395 * Also, this functions guarantees that an eager compiler is not going
3396 * to optimize the probing away.
3397 *
3398 * @param pvByte Pointer to the byte.
3399 */
3400#if RT_INLINE_ASM_EXTERNAL
3401DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3402#else
3403DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3404{
3405 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3406 uint8_t u8;
3407# if RT_INLINE_ASM_GNU_STYLE
3408 __asm__ __volatile__("movb (%1), %0\n\t"
3409 : "=r" (u8)
3410 : "r" (pvByte));
3411# else
3412 __asm
3413 {
3414# ifdef RT_ARCH_AMD64
3415 mov rax, [pvByte]
3416 mov al, [rax]
3417# else
3418 mov eax, [pvByte]
3419 mov al, [eax]
3420# endif
3421 mov [u8], al
3422 }
3423# endif
3424 return u8;
3425}
3426#endif
3427
3428/**
3429 * Probes a buffer for read access page by page.
3430 *
3431 * While the function will fault if the buffer is not fully read
3432 * accessible, the idea is to do this in a safe place like before
3433 * acquiring locks and such like.
3434 *
3435 * Also, this functions guarantees that an eager compiler is not going
3436 * to optimize the probing away.
3437 *
3438 * @param pvBuf Pointer to the buffer.
3439 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3440 */
3441DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3442{
3443 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3444 /* the first byte */
3445 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3446 ASMProbeReadByte(pu8);
3447
3448 /* the pages in between pages. */
3449 while (cbBuf > RT_ASM_PAGE_SIZE)
3450 {
3451 ASMProbeReadByte(pu8);
3452 cbBuf -= RT_ASM_PAGE_SIZE;
3453 pu8 += RT_ASM_PAGE_SIZE;
3454 }
3455
3456 /* the last byte */
3457 ASMProbeReadByte(pu8 + cbBuf - 1);
3458}
3459
3460
3461
3462/** @defgroup grp_inline_bits Bit Operations
3463 * @{
3464 */
3465
3466
3467/**
3468 * Sets a bit in a bitmap.
3469 *
3470 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
3471 * @param iBit The bit to set.
3472 *
3473 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3474 * However, doing so will yield better performance as well as avoiding
3475 * traps accessing the last bits in the bitmap.
3476 */
3477#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3478DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3479#else
3480DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3481{
3482# if RT_INLINE_ASM_USES_INTRIN
3483 _bittestandset((long *)pvBitmap, iBit);
3484
3485# elif RT_INLINE_ASM_GNU_STYLE
3486 __asm__ __volatile__("btsl %1, %0"
3487 : "=m" (*(volatile long *)pvBitmap)
3488 : "Ir" (iBit),
3489 "m" (*(volatile long *)pvBitmap)
3490 : "memory");
3491# else
3492 __asm
3493 {
3494# ifdef RT_ARCH_AMD64
3495 mov rax, [pvBitmap]
3496 mov edx, [iBit]
3497 bts [rax], edx
3498# else
3499 mov eax, [pvBitmap]
3500 mov edx, [iBit]
3501 bts [eax], edx
3502# endif
3503 }
3504# endif
3505}
3506#endif
3507
3508
3509/**
3510 * Atomically sets a bit in a bitmap, ordered.
3511 *
3512 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3513 * the memory access isn't atomic!
3514 * @param iBit The bit to set.
3515 */
3516#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3517DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3518#else
3519DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3520{
3521 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3522# if RT_INLINE_ASM_USES_INTRIN
3523 _interlockedbittestandset((long *)pvBitmap, iBit);
3524# elif RT_INLINE_ASM_GNU_STYLE
3525 __asm__ __volatile__("lock; btsl %1, %0"
3526 : "=m" (*(volatile long *)pvBitmap)
3527 : "Ir" (iBit),
3528 "m" (*(volatile long *)pvBitmap)
3529 : "memory");
3530# else
3531 __asm
3532 {
3533# ifdef RT_ARCH_AMD64
3534 mov rax, [pvBitmap]
3535 mov edx, [iBit]
3536 lock bts [rax], edx
3537# else
3538 mov eax, [pvBitmap]
3539 mov edx, [iBit]
3540 lock bts [eax], edx
3541# endif
3542 }
3543# endif
3544}
3545#endif
3546
3547
3548/**
3549 * Clears a bit in a bitmap.
3550 *
3551 * @param pvBitmap Pointer to the bitmap.
3552 * @param iBit The bit to clear.
3553 *
3554 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3555 * However, doing so will yield better performance as well as avoiding
3556 * traps accessing the last bits in the bitmap.
3557 */
3558#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3559DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3560#else
3561DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3562{
3563# if RT_INLINE_ASM_USES_INTRIN
3564 _bittestandreset((long *)pvBitmap, iBit);
3565
3566# elif RT_INLINE_ASM_GNU_STYLE
3567 __asm__ __volatile__("btrl %1, %0"
3568 : "=m" (*(volatile long *)pvBitmap)
3569 : "Ir" (iBit),
3570 "m" (*(volatile long *)pvBitmap)
3571 : "memory");
3572# else
3573 __asm
3574 {
3575# ifdef RT_ARCH_AMD64
3576 mov rax, [pvBitmap]
3577 mov edx, [iBit]
3578 btr [rax], edx
3579# else
3580 mov eax, [pvBitmap]
3581 mov edx, [iBit]
3582 btr [eax], edx
3583# endif
3584 }
3585# endif
3586}
3587#endif
3588
3589
3590/**
3591 * Atomically clears a bit in a bitmap, ordered.
3592 *
3593 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3594 * the memory access isn't atomic!
3595 * @param iBit The bit to toggle set.
3596 * @remarks No memory barrier, take care on smp.
3597 */
3598#if RT_INLINE_ASM_EXTERNAL
3599DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3600#else
3601DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3602{
3603 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3604# if RT_INLINE_ASM_GNU_STYLE
3605 __asm__ __volatile__("lock; btrl %1, %0"
3606 : "=m" (*(volatile long *)pvBitmap)
3607 : "Ir" (iBit),
3608 "m" (*(volatile long *)pvBitmap)
3609 : "memory");
3610# else
3611 __asm
3612 {
3613# ifdef RT_ARCH_AMD64
3614 mov rax, [pvBitmap]
3615 mov edx, [iBit]
3616 lock btr [rax], edx
3617# else
3618 mov eax, [pvBitmap]
3619 mov edx, [iBit]
3620 lock btr [eax], edx
3621# endif
3622 }
3623# endif
3624}
3625#endif
3626
3627
3628/**
3629 * Toggles a bit in a bitmap.
3630 *
3631 * @param pvBitmap Pointer to the bitmap.
3632 * @param iBit The bit to toggle.
3633 *
3634 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3635 * However, doing so will yield better performance as well as avoiding
3636 * traps accessing the last bits in the bitmap.
3637 */
3638#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3639DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3640#else
3641DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3642{
3643# if RT_INLINE_ASM_USES_INTRIN
3644 _bittestandcomplement((long *)pvBitmap, iBit);
3645# elif RT_INLINE_ASM_GNU_STYLE
3646 __asm__ __volatile__("btcl %1, %0"
3647 : "=m" (*(volatile long *)pvBitmap)
3648 : "Ir" (iBit),
3649 "m" (*(volatile long *)pvBitmap)
3650 : "memory");
3651# else
3652 __asm
3653 {
3654# ifdef RT_ARCH_AMD64
3655 mov rax, [pvBitmap]
3656 mov edx, [iBit]
3657 btc [rax], edx
3658# else
3659 mov eax, [pvBitmap]
3660 mov edx, [iBit]
3661 btc [eax], edx
3662# endif
3663 }
3664# endif
3665}
3666#endif
3667
3668
3669/**
3670 * Atomically toggles a bit in a bitmap, ordered.
3671 *
3672 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3673 * the memory access isn't atomic!
3674 * @param iBit The bit to test and set.
3675 */
3676#if RT_INLINE_ASM_EXTERNAL
3677DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3678#else
3679DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3680{
3681 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3682# if RT_INLINE_ASM_GNU_STYLE
3683 __asm__ __volatile__("lock; btcl %1, %0"
3684 : "=m" (*(volatile long *)pvBitmap)
3685 : "Ir" (iBit),
3686 "m" (*(volatile long *)pvBitmap)
3687 : "memory");
3688# else
3689 __asm
3690 {
3691# ifdef RT_ARCH_AMD64
3692 mov rax, [pvBitmap]
3693 mov edx, [iBit]
3694 lock btc [rax], edx
3695# else
3696 mov eax, [pvBitmap]
3697 mov edx, [iBit]
3698 lock btc [eax], edx
3699# endif
3700 }
3701# endif
3702}
3703#endif
3704
3705
3706/**
3707 * Tests and sets a bit in a bitmap.
3708 *
3709 * @returns true if the bit was set.
3710 * @returns false if the bit was clear.
3711 *
3712 * @param pvBitmap Pointer to the bitmap.
3713 * @param iBit The bit to test and set.
3714 *
3715 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3716 * However, doing so will yield better performance as well as avoiding
3717 * traps accessing the last bits in the bitmap.
3718 */
3719#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3720DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3721#else
3722DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3723{
3724 union { bool f; uint32_t u32; uint8_t u8; } rc;
3725# if RT_INLINE_ASM_USES_INTRIN
3726 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3727
3728# elif RT_INLINE_ASM_GNU_STYLE
3729 __asm__ __volatile__("btsl %2, %1\n\t"
3730 "setc %b0\n\t"
3731 "andl $1, %0\n\t"
3732 : "=q" (rc.u32),
3733 "=m" (*(volatile long *)pvBitmap)
3734 : "Ir" (iBit),
3735 "m" (*(volatile long *)pvBitmap)
3736 : "memory");
3737# else
3738 __asm
3739 {
3740 mov edx, [iBit]
3741# ifdef RT_ARCH_AMD64
3742 mov rax, [pvBitmap]
3743 bts [rax], edx
3744# else
3745 mov eax, [pvBitmap]
3746 bts [eax], edx
3747# endif
3748 setc al
3749 and eax, 1
3750 mov [rc.u32], eax
3751 }
3752# endif
3753 return rc.f;
3754}
3755#endif
3756
3757
3758/**
3759 * Atomically tests and sets a bit in a bitmap, ordered.
3760 *
3761 * @returns true if the bit was set.
3762 * @returns false if the bit was clear.
3763 *
3764 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3765 * the memory access isn't atomic!
3766 * @param iBit The bit to set.
3767 */
3768#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3769DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3770#else
3771DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3772{
3773 union { bool f; uint32_t u32; uint8_t u8; } rc;
3774 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3775# if RT_INLINE_ASM_USES_INTRIN
3776 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3777# elif RT_INLINE_ASM_GNU_STYLE
3778 __asm__ __volatile__("lock; btsl %2, %1\n\t"
3779 "setc %b0\n\t"
3780 "andl $1, %0\n\t"
3781 : "=q" (rc.u32),
3782 "=m" (*(volatile long *)pvBitmap)
3783 : "Ir" (iBit),
3784 "m" (*(volatile long *)pvBitmap)
3785 : "memory");
3786# else
3787 __asm
3788 {
3789 mov edx, [iBit]
3790# ifdef RT_ARCH_AMD64
3791 mov rax, [pvBitmap]
3792 lock bts [rax], edx
3793# else
3794 mov eax, [pvBitmap]
3795 lock bts [eax], edx
3796# endif
3797 setc al
3798 and eax, 1
3799 mov [rc.u32], eax
3800 }
3801# endif
3802 return rc.f;
3803}
3804#endif
3805
3806
3807/**
3808 * Tests and clears a bit in a bitmap.
3809 *
3810 * @returns true if the bit was set.
3811 * @returns false if the bit was clear.
3812 *
3813 * @param pvBitmap Pointer to the bitmap.
3814 * @param iBit The bit to test and clear.
3815 *
3816 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3817 * However, doing so will yield better performance as well as avoiding
3818 * traps accessing the last bits in the bitmap.
3819 */
3820#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3821DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3822#else
3823DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3824{
3825 union { bool f; uint32_t u32; uint8_t u8; } rc;
3826# if RT_INLINE_ASM_USES_INTRIN
3827 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3828
3829# elif RT_INLINE_ASM_GNU_STYLE
3830 __asm__ __volatile__("btrl %2, %1\n\t"
3831 "setc %b0\n\t"
3832 "andl $1, %0\n\t"
3833 : "=q" (rc.u32),
3834 "=m" (*(volatile long *)pvBitmap)
3835 : "Ir" (iBit),
3836 "m" (*(volatile long *)pvBitmap)
3837 : "memory");
3838# else
3839 __asm
3840 {
3841 mov edx, [iBit]
3842# ifdef RT_ARCH_AMD64
3843 mov rax, [pvBitmap]
3844 btr [rax], edx
3845# else
3846 mov eax, [pvBitmap]
3847 btr [eax], edx
3848# endif
3849 setc al
3850 and eax, 1
3851 mov [rc.u32], eax
3852 }
3853# endif
3854 return rc.f;
3855}
3856#endif
3857
3858
3859/**
3860 * Atomically tests and clears a bit in a bitmap, ordered.
3861 *
3862 * @returns true if the bit was set.
3863 * @returns false if the bit was clear.
3864 *
3865 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3866 * the memory access isn't atomic!
3867 * @param iBit The bit to test and clear.
3868 *
3869 * @remarks No memory barrier, take care on smp.
3870 */
3871#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3872DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3873#else
3874DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3875{
3876 union { bool f; uint32_t u32; uint8_t u8; } rc;
3877 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3878# if RT_INLINE_ASM_USES_INTRIN
3879 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3880
3881# elif RT_INLINE_ASM_GNU_STYLE
3882 __asm__ __volatile__("lock; btrl %2, %1\n\t"
3883 "setc %b0\n\t"
3884 "andl $1, %0\n\t"
3885 : "=q" (rc.u32),
3886 "=m" (*(volatile long *)pvBitmap)
3887 : "Ir" (iBit),
3888 "m" (*(volatile long *)pvBitmap)
3889 : "memory");
3890# else
3891 __asm
3892 {
3893 mov edx, [iBit]
3894# ifdef RT_ARCH_AMD64
3895 mov rax, [pvBitmap]
3896 lock btr [rax], edx
3897# else
3898 mov eax, [pvBitmap]
3899 lock btr [eax], edx
3900# endif
3901 setc al
3902 and eax, 1
3903 mov [rc.u32], eax
3904 }
3905# endif
3906 return rc.f;
3907}
3908#endif
3909
3910
3911/**
3912 * Tests and toggles a bit in a bitmap.
3913 *
3914 * @returns true if the bit was set.
3915 * @returns false if the bit was clear.
3916 *
3917 * @param pvBitmap Pointer to the bitmap.
3918 * @param iBit The bit to test and toggle.
3919 *
3920 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3921 * However, doing so will yield better performance as well as avoiding
3922 * traps accessing the last bits in the bitmap.
3923 */
3924#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3925DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3926#else
3927DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3928{
3929 union { bool f; uint32_t u32; uint8_t u8; } rc;
3930# if RT_INLINE_ASM_USES_INTRIN
3931 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3932
3933# elif RT_INLINE_ASM_GNU_STYLE
3934 __asm__ __volatile__("btcl %2, %1\n\t"
3935 "setc %b0\n\t"
3936 "andl $1, %0\n\t"
3937 : "=q" (rc.u32),
3938 "=m" (*(volatile long *)pvBitmap)
3939 : "Ir" (iBit),
3940 "m" (*(volatile long *)pvBitmap)
3941 : "memory");
3942# else
3943 __asm
3944 {
3945 mov edx, [iBit]
3946# ifdef RT_ARCH_AMD64
3947 mov rax, [pvBitmap]
3948 btc [rax], edx
3949# else
3950 mov eax, [pvBitmap]
3951 btc [eax], edx
3952# endif
3953 setc al
3954 and eax, 1
3955 mov [rc.u32], eax
3956 }
3957# endif
3958 return rc.f;
3959}
3960#endif
3961
3962
3963/**
3964 * Atomically tests and toggles a bit in a bitmap, ordered.
3965 *
3966 * @returns true if the bit was set.
3967 * @returns false if the bit was clear.
3968 *
3969 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3970 * the memory access isn't atomic!
3971 * @param iBit The bit to test and toggle.
3972 */
3973#if RT_INLINE_ASM_EXTERNAL
3974DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3975#else
3976DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3977{
3978 union { bool f; uint32_t u32; uint8_t u8; } rc;
3979 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3980# if RT_INLINE_ASM_GNU_STYLE
3981 __asm__ __volatile__("lock; btcl %2, %1\n\t"
3982 "setc %b0\n\t"
3983 "andl $1, %0\n\t"
3984 : "=q" (rc.u32),
3985 "=m" (*(volatile long *)pvBitmap)
3986 : "Ir" (iBit),
3987 "m" (*(volatile long *)pvBitmap)
3988 : "memory");
3989# else
3990 __asm
3991 {
3992 mov edx, [iBit]
3993# ifdef RT_ARCH_AMD64
3994 mov rax, [pvBitmap]
3995 lock btc [rax], edx
3996# else
3997 mov eax, [pvBitmap]
3998 lock btc [eax], edx
3999# endif
4000 setc al
4001 and eax, 1
4002 mov [rc.u32], eax
4003 }
4004# endif
4005 return rc.f;
4006}
4007#endif
4008
4009
4010/**
4011 * Tests if a bit in a bitmap is set.
4012 *
4013 * @returns true if the bit is set.
4014 * @returns false if the bit is clear.
4015 *
4016 * @param pvBitmap Pointer to the bitmap.
4017 * @param iBit The bit to test.
4018 *
4019 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4020 * However, doing so will yield better performance as well as avoiding
4021 * traps accessing the last bits in the bitmap.
4022 */
4023#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4024DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4025#else
4026DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4027{
4028 union { bool f; uint32_t u32; uint8_t u8; } rc;
4029# if RT_INLINE_ASM_USES_INTRIN
4030 rc.u32 = _bittest((long *)pvBitmap, iBit);
4031# elif RT_INLINE_ASM_GNU_STYLE
4032
4033 __asm__ __volatile__("btl %2, %1\n\t"
4034 "setc %b0\n\t"
4035 "andl $1, %0\n\t"
4036 : "=q" (rc.u32)
4037 : "m" (*(const volatile long *)pvBitmap),
4038 "Ir" (iBit)
4039 : "memory");
4040# else
4041 __asm
4042 {
4043 mov edx, [iBit]
4044# ifdef RT_ARCH_AMD64
4045 mov rax, [pvBitmap]
4046 bt [rax], edx
4047# else
4048 mov eax, [pvBitmap]
4049 bt [eax], edx
4050# endif
4051 setc al
4052 and eax, 1
4053 mov [rc.u32], eax
4054 }
4055# endif
4056 return rc.f;
4057}
4058#endif
4059
4060
4061/**
4062 * Clears a bit range within a bitmap.
4063 *
4064 * @param pvBitmap Pointer to the bitmap.
4065 * @param iBitStart The First bit to clear.
4066 * @param iBitEnd The first bit not to clear.
4067 */
4068DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4069{
4070 if (iBitStart < iBitEnd)
4071 {
4072 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4073 int iStart = iBitStart & ~31;
4074 int iEnd = iBitEnd & ~31;
4075 if (iStart == iEnd)
4076 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4077 else
4078 {
4079 /* bits in first dword. */
4080 if (iBitStart & 31)
4081 {
4082 *pu32 &= (1 << (iBitStart & 31)) - 1;
4083 pu32++;
4084 iBitStart = iStart + 32;
4085 }
4086
4087 /* whole dword. */
4088 if (iBitStart != iEnd)
4089 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4090
4091 /* bits in last dword. */
4092 if (iBitEnd & 31)
4093 {
4094 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4095 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4096 }
4097 }
4098 }
4099}
4100
4101
4102/**
4103 * Sets a bit range within a bitmap.
4104 *
4105 * @param pvBitmap Pointer to the bitmap.
4106 * @param iBitStart The First bit to set.
4107 * @param iBitEnd The first bit not to set.
4108 */
4109DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4110{
4111 if (iBitStart < iBitEnd)
4112 {
4113 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4114 int iStart = iBitStart & ~31;
4115 int iEnd = iBitEnd & ~31;
4116 if (iStart == iEnd)
4117 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
4118 else
4119 {
4120 /* bits in first dword. */
4121 if (iBitStart & 31)
4122 {
4123 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
4124 pu32++;
4125 iBitStart = iStart + 32;
4126 }
4127
4128 /* whole dword. */
4129 if (iBitStart != iEnd)
4130 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
4131
4132 /* bits in last dword. */
4133 if (iBitEnd & 31)
4134 {
4135 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4136 *pu32 |= (1 << (iBitEnd & 31)) - 1;
4137 }
4138 }
4139 }
4140}
4141
4142
4143/**
4144 * Finds the first clear bit in a bitmap.
4145 *
4146 * @returns Index of the first zero bit.
4147 * @returns -1 if no clear bit was found.
4148 * @param pvBitmap Pointer to the bitmap.
4149 * @param cBits The number of bits in the bitmap. Multiple of 32.
4150 */
4151#if RT_INLINE_ASM_EXTERNAL
4152DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4153#else
4154DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4155{
4156 if (cBits)
4157 {
4158 int32_t iBit;
4159# if RT_INLINE_ASM_GNU_STYLE
4160 RTCCUINTREG uEAX, uECX, uEDI;
4161 cBits = RT_ALIGN_32(cBits, 32);
4162 __asm__ __volatile__("repe; scasl\n\t"
4163 "je 1f\n\t"
4164# ifdef RT_ARCH_AMD64
4165 "lea -4(%%rdi), %%rdi\n\t"
4166 "xorl (%%rdi), %%eax\n\t"
4167 "subq %5, %%rdi\n\t"
4168# else
4169 "lea -4(%%edi), %%edi\n\t"
4170 "xorl (%%edi), %%eax\n\t"
4171 "subl %5, %%edi\n\t"
4172# endif
4173 "shll $3, %%edi\n\t"
4174 "bsfl %%eax, %%edx\n\t"
4175 "addl %%edi, %%edx\n\t"
4176 "1:\t\n"
4177 : "=d" (iBit),
4178 "=&c" (uECX),
4179 "=&D" (uEDI),
4180 "=&a" (uEAX)
4181 : "0" (0xffffffff),
4182 "mr" (pvBitmap),
4183 "1" (cBits >> 5),
4184 "2" (pvBitmap),
4185 "3" (0xffffffff));
4186# else
4187 cBits = RT_ALIGN_32(cBits, 32);
4188 __asm
4189 {
4190# ifdef RT_ARCH_AMD64
4191 mov rdi, [pvBitmap]
4192 mov rbx, rdi
4193# else
4194 mov edi, [pvBitmap]
4195 mov ebx, edi
4196# endif
4197 mov edx, 0ffffffffh
4198 mov eax, edx
4199 mov ecx, [cBits]
4200 shr ecx, 5
4201 repe scasd
4202 je done
4203
4204# ifdef RT_ARCH_AMD64
4205 lea rdi, [rdi - 4]
4206 xor eax, [rdi]
4207 sub rdi, rbx
4208# else
4209 lea edi, [edi - 4]
4210 xor eax, [edi]
4211 sub edi, ebx
4212# endif
4213 shl edi, 3
4214 bsf edx, eax
4215 add edx, edi
4216 done:
4217 mov [iBit], edx
4218 }
4219# endif
4220 return iBit;
4221 }
4222 return -1;
4223}
4224#endif
4225
4226
4227/**
4228 * Finds the next clear bit in a bitmap.
4229 *
4230 * @returns Index of the first zero bit.
4231 * @returns -1 if no clear bit was found.
4232 * @param pvBitmap Pointer to the bitmap.
4233 * @param cBits The number of bits in the bitmap. Multiple of 32.
4234 * @param iBitPrev The bit returned from the last search.
4235 * The search will start at iBitPrev + 1.
4236 */
4237#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4238DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4239#else
4240DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4241{
4242 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4243 int iBit = ++iBitPrev & 31;
4244 if (iBit)
4245 {
4246 /*
4247 * Inspect the 32-bit word containing the unaligned bit.
4248 */
4249 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4250
4251# if RT_INLINE_ASM_USES_INTRIN
4252 unsigned long ulBit = 0;
4253 if (_BitScanForward(&ulBit, u32))
4254 return ulBit + iBitPrev;
4255# else
4256# if RT_INLINE_ASM_GNU_STYLE
4257 __asm__ __volatile__("bsf %1, %0\n\t"
4258 "jnz 1f\n\t"
4259 "movl $-1, %0\n\t"
4260 "1:\n\t"
4261 : "=r" (iBit)
4262 : "r" (u32));
4263# else
4264 __asm
4265 {
4266 mov edx, [u32]
4267 bsf eax, edx
4268 jnz done
4269 mov eax, 0ffffffffh
4270 done:
4271 mov [iBit], eax
4272 }
4273# endif
4274 if (iBit >= 0)
4275 return iBit + iBitPrev;
4276# endif
4277
4278 /*
4279 * Skip ahead and see if there is anything left to search.
4280 */
4281 iBitPrev |= 31;
4282 iBitPrev++;
4283 if (cBits <= (uint32_t)iBitPrev)
4284 return -1;
4285 }
4286
4287 /*
4288 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4289 */
4290 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4291 if (iBit >= 0)
4292 iBit += iBitPrev;
4293 return iBit;
4294}
4295#endif
4296
4297
4298/**
4299 * Finds the first set bit in a bitmap.
4300 *
4301 * @returns Index of the first set bit.
4302 * @returns -1 if no clear bit was found.
4303 * @param pvBitmap Pointer to the bitmap.
4304 * @param cBits The number of bits in the bitmap. Multiple of 32.
4305 */
4306#if RT_INLINE_ASM_EXTERNAL
4307DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4308#else
4309DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4310{
4311 if (cBits)
4312 {
4313 int32_t iBit;
4314# if RT_INLINE_ASM_GNU_STYLE
4315 RTCCUINTREG uEAX, uECX, uEDI;
4316 cBits = RT_ALIGN_32(cBits, 32);
4317 __asm__ __volatile__("repe; scasl\n\t"
4318 "je 1f\n\t"
4319# ifdef RT_ARCH_AMD64
4320 "lea -4(%%rdi), %%rdi\n\t"
4321 "movl (%%rdi), %%eax\n\t"
4322 "subq %5, %%rdi\n\t"
4323# else
4324 "lea -4(%%edi), %%edi\n\t"
4325 "movl (%%edi), %%eax\n\t"
4326 "subl %5, %%edi\n\t"
4327# endif
4328 "shll $3, %%edi\n\t"
4329 "bsfl %%eax, %%edx\n\t"
4330 "addl %%edi, %%edx\n\t"
4331 "1:\t\n"
4332 : "=d" (iBit),
4333 "=&c" (uECX),
4334 "=&D" (uEDI),
4335 "=&a" (uEAX)
4336 : "0" (0xffffffff),
4337 "mr" (pvBitmap),
4338 "1" (cBits >> 5),
4339 "2" (pvBitmap),
4340 "3" (0));
4341# else
4342 cBits = RT_ALIGN_32(cBits, 32);
4343 __asm
4344 {
4345# ifdef RT_ARCH_AMD64
4346 mov rdi, [pvBitmap]
4347 mov rbx, rdi
4348# else
4349 mov edi, [pvBitmap]
4350 mov ebx, edi
4351# endif
4352 mov edx, 0ffffffffh
4353 xor eax, eax
4354 mov ecx, [cBits]
4355 shr ecx, 5
4356 repe scasd
4357 je done
4358# ifdef RT_ARCH_AMD64
4359 lea rdi, [rdi - 4]
4360 mov eax, [rdi]
4361 sub rdi, rbx
4362# else
4363 lea edi, [edi - 4]
4364 mov eax, [edi]
4365 sub edi, ebx
4366# endif
4367 shl edi, 3
4368 bsf edx, eax
4369 add edx, edi
4370 done:
4371 mov [iBit], edx
4372 }
4373# endif
4374 return iBit;
4375 }
4376 return -1;
4377}
4378#endif
4379
4380
4381/**
4382 * Finds the next set bit in a bitmap.
4383 *
4384 * @returns Index of the next set bit.
4385 * @returns -1 if no set bit was found.
4386 * @param pvBitmap Pointer to the bitmap.
4387 * @param cBits The number of bits in the bitmap. Multiple of 32.
4388 * @param iBitPrev The bit returned from the last search.
4389 * The search will start at iBitPrev + 1.
4390 */
4391#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4392DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4393#else
4394DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4395{
4396 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4397 int iBit = ++iBitPrev & 31;
4398 if (iBit)
4399 {
4400 /*
4401 * Inspect the 32-bit word containing the unaligned bit.
4402 */
4403 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
4404
4405# if RT_INLINE_ASM_USES_INTRIN
4406 unsigned long ulBit = 0;
4407 if (_BitScanForward(&ulBit, u32))
4408 return ulBit + iBitPrev;
4409# else
4410# if RT_INLINE_ASM_GNU_STYLE
4411 __asm__ __volatile__("bsf %1, %0\n\t"
4412 "jnz 1f\n\t"
4413 "movl $-1, %0\n\t"
4414 "1:\n\t"
4415 : "=r" (iBit)
4416 : "r" (u32));
4417# else
4418 __asm
4419 {
4420 mov edx, [u32]
4421 bsf eax, edx
4422 jnz done
4423 mov eax, 0ffffffffh
4424 done:
4425 mov [iBit], eax
4426 }
4427# endif
4428 if (iBit >= 0)
4429 return iBit + iBitPrev;
4430# endif
4431
4432 /*
4433 * Skip ahead and see if there is anything left to search.
4434 */
4435 iBitPrev |= 31;
4436 iBitPrev++;
4437 if (cBits <= (uint32_t)iBitPrev)
4438 return -1;
4439 }
4440
4441 /*
4442 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4443 */
4444 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4445 if (iBit >= 0)
4446 iBit += iBitPrev;
4447 return iBit;
4448}
4449#endif
4450
4451
4452/**
4453 * Finds the first bit which is set in the given 32-bit integer.
4454 * Bits are numbered from 1 (least significant) to 32.
4455 *
4456 * @returns index [1..32] of the first set bit.
4457 * @returns 0 if all bits are cleared.
4458 * @param u32 Integer to search for set bits.
4459 * @remark Similar to ffs() in BSD.
4460 */
4461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4462DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
4463#else
4464DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4465{
4466# if RT_INLINE_ASM_USES_INTRIN
4467 unsigned long iBit;
4468 if (_BitScanForward(&iBit, u32))
4469 iBit++;
4470 else
4471 iBit = 0;
4472# elif RT_INLINE_ASM_GNU_STYLE
4473 uint32_t iBit;
4474 __asm__ __volatile__("bsf %1, %0\n\t"
4475 "jnz 1f\n\t"
4476 "xorl %0, %0\n\t"
4477 "jmp 2f\n"
4478 "1:\n\t"
4479 "incl %0\n"
4480 "2:\n\t"
4481 : "=r" (iBit)
4482 : "rm" (u32));
4483# else
4484 uint32_t iBit;
4485 _asm
4486 {
4487 bsf eax, [u32]
4488 jnz found
4489 xor eax, eax
4490 jmp done
4491 found:
4492 inc eax
4493 done:
4494 mov [iBit], eax
4495 }
4496# endif
4497 return iBit;
4498}
4499#endif
4500
4501
4502/**
4503 * Finds the first bit which is set in the given 32-bit integer.
4504 * Bits are numbered from 1 (least significant) to 32.
4505 *
4506 * @returns index [1..32] of the first set bit.
4507 * @returns 0 if all bits are cleared.
4508 * @param i32 Integer to search for set bits.
4509 * @remark Similar to ffs() in BSD.
4510 */
4511DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4512{
4513 return ASMBitFirstSetU32((uint32_t)i32);
4514}
4515
4516
4517/**
4518 * Finds the last bit which is set in the given 32-bit integer.
4519 * Bits are numbered from 1 (least significant) to 32.
4520 *
4521 * @returns index [1..32] of the last set bit.
4522 * @returns 0 if all bits are cleared.
4523 * @param u32 Integer to search for set bits.
4524 * @remark Similar to fls() in BSD.
4525 */
4526#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4527DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
4528#else
4529DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4530{
4531# if RT_INLINE_ASM_USES_INTRIN
4532 unsigned long iBit;
4533 if (_BitScanReverse(&iBit, u32))
4534 iBit++;
4535 else
4536 iBit = 0;
4537# elif RT_INLINE_ASM_GNU_STYLE
4538 uint32_t iBit;
4539 __asm__ __volatile__("bsrl %1, %0\n\t"
4540 "jnz 1f\n\t"
4541 "xorl %0, %0\n\t"
4542 "jmp 2f\n"
4543 "1:\n\t"
4544 "incl %0\n"
4545 "2:\n\t"
4546 : "=r" (iBit)
4547 : "rm" (u32));
4548# else
4549 uint32_t iBit;
4550 _asm
4551 {
4552 bsr eax, [u32]
4553 jnz found
4554 xor eax, eax
4555 jmp done
4556 found:
4557 inc eax
4558 done:
4559 mov [iBit], eax
4560 }
4561# endif
4562 return iBit;
4563}
4564#endif
4565
4566
4567/**
4568 * Finds the last bit which is set in the given 32-bit integer.
4569 * Bits are numbered from 1 (least significant) to 32.
4570 *
4571 * @returns index [1..32] of the last set bit.
4572 * @returns 0 if all bits are cleared.
4573 * @param i32 Integer to search for set bits.
4574 * @remark Similar to fls() in BSD.
4575 */
4576DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4577{
4578 return ASMBitLastSetU32((uint32_t)i32);
4579}
4580
4581/**
4582 * Reverse the byte order of the given 16-bit integer.
4583 *
4584 * @returns Revert
4585 * @param u16 16-bit integer value.
4586 */
4587#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4588DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
4589#else
4590DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
4591{
4592# if RT_INLINE_ASM_USES_INTRIN
4593 u16 = _byteswap_ushort(u16);
4594# elif RT_INLINE_ASM_GNU_STYLE
4595 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
4596# else
4597 _asm
4598 {
4599 mov ax, [u16]
4600 ror ax, 8
4601 mov [u16], ax
4602 }
4603# endif
4604 return u16;
4605}
4606#endif
4607
4608
4609/**
4610 * Reverse the byte order of the given 32-bit integer.
4611 *
4612 * @returns Revert
4613 * @param u32 32-bit integer value.
4614 */
4615#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4616DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
4617#else
4618DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4619{
4620# if RT_INLINE_ASM_USES_INTRIN
4621 u32 = _byteswap_ulong(u32);
4622# elif RT_INLINE_ASM_GNU_STYLE
4623 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4624# else
4625 _asm
4626 {
4627 mov eax, [u32]
4628 bswap eax
4629 mov [u32], eax
4630 }
4631# endif
4632 return u32;
4633}
4634#endif
4635
4636
4637/**
4638 * Reverse the byte order of the given 64-bit integer.
4639 *
4640 * @returns Revert
4641 * @param u64 64-bit integer value.
4642 */
4643DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
4644{
4645#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
4646 u64 = _byteswap_uint64(u64);
4647#else
4648 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
4649 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
4650#endif
4651 return u64;
4652}
4653
4654
4655/** @} */
4656
4657
4658/** @} */
4659
4660#endif
4661
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette