VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 59470

Last change on this file since 59470 was 58791, checked in by vboxsync, 9 years ago

iprt/asm.h: 16-bit fixes to ASMBitSetRange and ASMBitClearRange.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 150.0 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2015 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# pragma intrinsic(_rotl)
69# pragma intrinsic(_rotr)
70# pragma intrinsic(_rotl64)
71# pragma intrinsic(_rotr64)
72# ifdef RT_ARCH_AMD64
73# pragma intrinsic(__stosq)
74# pragma intrinsic(_byteswap_uint64)
75# pragma intrinsic(_InterlockedExchange64)
76# pragma intrinsic(_InterlockedExchangeAdd64)
77# pragma intrinsic(_InterlockedAnd64)
78# pragma intrinsic(_InterlockedOr64)
79# pragma intrinsic(_InterlockedIncrement64)
80# pragma intrinsic(_InterlockedDecrement64)
81# endif
82#endif
83
84/*
85 * Include #pragma aux definitions for Watcom C/C++.
86 */
87#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
88# include "asm-watcom-x86-16.h"
89#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
90# include "asm-watcom-x86-32.h"
91#endif
92
93
94
95/** @defgroup grp_rt_asm ASM - Assembly Routines
96 * @ingroup grp_rt
97 *
98 * @remarks The difference between ordered and unordered atomic operations are that
99 * the former will complete outstanding reads and writes before continuing
100 * while the latter doesn't make any promises about the order. Ordered
101 * operations doesn't, it seems, make any 100% promise wrt to whether
102 * the operation will complete before any subsequent memory access.
103 * (please, correct if wrong.)
104 *
105 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
106 * are unordered (note the Uo).
107 *
108 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
109 * or even optimize assembler instructions away. For instance, in the following code
110 * the second rdmsr instruction is optimized away because gcc treats that instruction
111 * as deterministic:
112 *
113 * @code
114 * static inline uint64_t rdmsr_low(int idx)
115 * {
116 * uint32_t low;
117 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
118 * }
119 * ...
120 * uint32_t msr1 = rdmsr_low(1);
121 * foo(msr1);
122 * msr1 = rdmsr_low(1);
123 * bar(msr1);
124 * @endcode
125 *
126 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
127 * use the result of the first call as input parameter for bar() as well. For rdmsr this
128 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
129 * machine status information in general.
130 *
131 * @{
132 */
133
134
135/** @def RT_INLINE_ASM_GCC_4_3_X_X86
136 * Used to work around some 4.3.x register allocation issues in this version of
137 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
138#ifdef __GNUC__
139# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
140#endif
141#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
142# define RT_INLINE_ASM_GCC_4_3_X_X86 0
143#endif
144
145/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
146 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
147 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
148 * mode, x86.
149 *
150 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
151 * when in PIC mode on x86.
152 */
153#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
154# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
155# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
156# else
157# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
158 ( (defined(PIC) || defined(__PIC__)) \
159 && defined(RT_ARCH_X86) \
160 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
161 || defined(RT_OS_DARWIN)) )
162# endif
163#endif
164
165
166/** @def ASMReturnAddress
167 * Gets the return address of the current (or calling if you like) function or method.
168 */
169#ifdef _MSC_VER
170# ifdef __cplusplus
171extern "C"
172# endif
173void * _ReturnAddress(void);
174# pragma intrinsic(_ReturnAddress)
175# define ASMReturnAddress() _ReturnAddress()
176#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
177# define ASMReturnAddress() __builtin_return_address(0)
178#elif defined(__WATCOMC__)
179# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
180#else
181# error "Unsupported compiler."
182#endif
183
184
185/**
186 * Compiler memory barrier.
187 *
188 * Ensure that the compiler does not use any cached (register/tmp stack) memory
189 * values or any outstanding writes when returning from this function.
190 *
191 * This function must be used if non-volatile data is modified by a
192 * device or the VMM. Typical cases are port access, MMIO access,
193 * trapping instruction, etc.
194 */
195#if RT_INLINE_ASM_GNU_STYLE
196# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
197#elif RT_INLINE_ASM_USES_INTRIN
198# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
199#elif defined(__WATCOMC__)
200void ASMCompilerBarrier(void);
201#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
202DECLINLINE(void) ASMCompilerBarrier(void)
203{
204 __asm
205 {
206 }
207}
208#endif
209
210
211/** @def ASMBreakpoint
212 * Debugger Breakpoint.
213 * @deprecated Use RT_BREAKPOINT instead.
214 * @internal
215 */
216#define ASMBreakpoint() RT_BREAKPOINT()
217
218
219/**
220 * Spinloop hint for platforms that have these, empty function on the other
221 * platforms.
222 *
223 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
224 * spin locks.
225 */
226#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
227DECLASM(void) ASMNopPause(void);
228#else
229DECLINLINE(void) ASMNopPause(void)
230{
231# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
232# if RT_INLINE_ASM_GNU_STYLE
233 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
234# else
235 __asm {
236 _emit 0f3h
237 _emit 090h
238 }
239# endif
240# else
241 /* dummy */
242# endif
243}
244#endif
245
246
247/**
248 * Atomically Exchange an unsigned 8-bit value, ordered.
249 *
250 * @returns Current *pu8 value
251 * @param pu8 Pointer to the 8-bit variable to update.
252 * @param u8 The 8-bit value to assign to *pu8.
253 */
254#if RT_INLINE_ASM_EXTERNAL
255DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
256#else
257DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
258{
259# if RT_INLINE_ASM_GNU_STYLE
260 __asm__ __volatile__("xchgb %0, %1\n\t"
261 : "=m" (*pu8),
262 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
263 : "1" (u8),
264 "m" (*pu8));
265# else
266 __asm
267 {
268# ifdef RT_ARCH_AMD64
269 mov rdx, [pu8]
270 mov al, [u8]
271 xchg [rdx], al
272 mov [u8], al
273# else
274 mov edx, [pu8]
275 mov al, [u8]
276 xchg [edx], al
277 mov [u8], al
278# endif
279 }
280# endif
281 return u8;
282}
283#endif
284
285
286/**
287 * Atomically Exchange a signed 8-bit value, ordered.
288 *
289 * @returns Current *pu8 value
290 * @param pi8 Pointer to the 8-bit variable to update.
291 * @param i8 The 8-bit value to assign to *pi8.
292 */
293DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
294{
295 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
296}
297
298
299/**
300 * Atomically Exchange a bool value, ordered.
301 *
302 * @returns Current *pf value
303 * @param pf Pointer to the 8-bit variable to update.
304 * @param f The 8-bit value to assign to *pi8.
305 */
306DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
307{
308#ifdef _MSC_VER
309 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
310#else
311 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
312#endif
313}
314
315
316/**
317 * Atomically Exchange an unsigned 16-bit value, ordered.
318 *
319 * @returns Current *pu16 value
320 * @param pu16 Pointer to the 16-bit variable to update.
321 * @param u16 The 16-bit value to assign to *pu16.
322 */
323#if RT_INLINE_ASM_EXTERNAL
324DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
325#else
326DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
327{
328# if RT_INLINE_ASM_GNU_STYLE
329 __asm__ __volatile__("xchgw %0, %1\n\t"
330 : "=m" (*pu16),
331 "=r" (u16)
332 : "1" (u16),
333 "m" (*pu16));
334# else
335 __asm
336 {
337# ifdef RT_ARCH_AMD64
338 mov rdx, [pu16]
339 mov ax, [u16]
340 xchg [rdx], ax
341 mov [u16], ax
342# else
343 mov edx, [pu16]
344 mov ax, [u16]
345 xchg [edx], ax
346 mov [u16], ax
347# endif
348 }
349# endif
350 return u16;
351}
352#endif
353
354
355/**
356 * Atomically Exchange a signed 16-bit value, ordered.
357 *
358 * @returns Current *pu16 value
359 * @param pi16 Pointer to the 16-bit variable to update.
360 * @param i16 The 16-bit value to assign to *pi16.
361 */
362DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
363{
364 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
365}
366
367
368/**
369 * Atomically Exchange an unsigned 32-bit value, ordered.
370 *
371 * @returns Current *pu32 value
372 * @param pu32 Pointer to the 32-bit variable to update.
373 * @param u32 The 32-bit value to assign to *pu32.
374 */
375#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
376DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
377#else
378DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
379{
380# if RT_INLINE_ASM_GNU_STYLE
381 __asm__ __volatile__("xchgl %0, %1\n\t"
382 : "=m" (*pu32),
383 "=r" (u32)
384 : "1" (u32),
385 "m" (*pu32));
386
387# elif RT_INLINE_ASM_USES_INTRIN
388 u32 = _InterlockedExchange((long *)pu32, u32);
389
390# else
391 __asm
392 {
393# ifdef RT_ARCH_AMD64
394 mov rdx, [pu32]
395 mov eax, u32
396 xchg [rdx], eax
397 mov [u32], eax
398# else
399 mov edx, [pu32]
400 mov eax, u32
401 xchg [edx], eax
402 mov [u32], eax
403# endif
404 }
405# endif
406 return u32;
407}
408#endif
409
410
411/**
412 * Atomically Exchange a signed 32-bit value, ordered.
413 *
414 * @returns Current *pu32 value
415 * @param pi32 Pointer to the 32-bit variable to update.
416 * @param i32 The 32-bit value to assign to *pi32.
417 */
418DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
419{
420 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
421}
422
423
424/**
425 * Atomically Exchange an unsigned 64-bit value, ordered.
426 *
427 * @returns Current *pu64 value
428 * @param pu64 Pointer to the 64-bit variable to update.
429 * @param u64 The 64-bit value to assign to *pu64.
430 */
431#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
432 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
433DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
434#else
435DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
436{
437# if defined(RT_ARCH_AMD64)
438# if RT_INLINE_ASM_USES_INTRIN
439 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
440
441# elif RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("xchgq %0, %1\n\t"
443 : "=m" (*pu64),
444 "=r" (u64)
445 : "1" (u64),
446 "m" (*pu64));
447# else
448 __asm
449 {
450 mov rdx, [pu64]
451 mov rax, [u64]
452 xchg [rdx], rax
453 mov [u64], rax
454 }
455# endif
456# else /* !RT_ARCH_AMD64 */
457# if RT_INLINE_ASM_GNU_STYLE
458# if defined(PIC) || defined(__PIC__)
459 uint32_t u32EBX = (uint32_t)u64;
460 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
461 "xchgl %%ebx, %3\n\t"
462 "1:\n\t"
463 "lock; cmpxchg8b (%5)\n\t"
464 "jnz 1b\n\t"
465 "movl %3, %%ebx\n\t"
466 /*"xchgl %%esi, %5\n\t"*/
467 : "=A" (u64),
468 "=m" (*pu64)
469 : "0" (*pu64),
470 "m" ( u32EBX ),
471 "c" ( (uint32_t)(u64 >> 32) ),
472 "S" (pu64));
473# else /* !PIC */
474 __asm__ __volatile__("1:\n\t"
475 "lock; cmpxchg8b %1\n\t"
476 "jnz 1b\n\t"
477 : "=A" (u64),
478 "=m" (*pu64)
479 : "0" (*pu64),
480 "b" ( (uint32_t)u64 ),
481 "c" ( (uint32_t)(u64 >> 32) ));
482# endif
483# else
484 __asm
485 {
486 mov ebx, dword ptr [u64]
487 mov ecx, dword ptr [u64 + 4]
488 mov edi, pu64
489 mov eax, dword ptr [edi]
490 mov edx, dword ptr [edi + 4]
491 retry:
492 lock cmpxchg8b [edi]
493 jnz retry
494 mov dword ptr [u64], eax
495 mov dword ptr [u64 + 4], edx
496 }
497# endif
498# endif /* !RT_ARCH_AMD64 */
499 return u64;
500}
501#endif
502
503
504/**
505 * Atomically Exchange an signed 64-bit value, ordered.
506 *
507 * @returns Current *pi64 value
508 * @param pi64 Pointer to the 64-bit variable to update.
509 * @param i64 The 64-bit value to assign to *pi64.
510 */
511DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
512{
513 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
514}
515
516
517/**
518 * Atomically Exchange a pointer value, ordered.
519 *
520 * @returns Current *ppv value
521 * @param ppv Pointer to the pointer variable to update.
522 * @param pv The pointer value to assign to *ppv.
523 */
524DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
525{
526#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
527 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
528#elif ARCH_BITS == 64
529 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
530#else
531# error "ARCH_BITS is bogus"
532#endif
533}
534
535
536/**
537 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
538 *
539 * @returns Current *pv value
540 * @param ppv Pointer to the pointer variable to update.
541 * @param pv The pointer value to assign to *ppv.
542 * @param Type The type of *ppv, sans volatile.
543 */
544#ifdef __GNUC__
545# define ASMAtomicXchgPtrT(ppv, pv, Type) \
546 __extension__ \
547 ({\
548 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
549 Type const pvTypeChecked = (pv); \
550 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
551 pvTypeCheckedRet; \
552 })
553#else
554# define ASMAtomicXchgPtrT(ppv, pv, Type) \
555 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
556#endif
557
558
559/**
560 * Atomically Exchange a raw-mode context pointer value, ordered.
561 *
562 * @returns Current *ppv value
563 * @param ppvRC Pointer to the pointer variable to update.
564 * @param pvRC The pointer value to assign to *ppv.
565 */
566DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
567{
568 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
569}
570
571
572/**
573 * Atomically Exchange a ring-0 pointer value, ordered.
574 *
575 * @returns Current *ppv value
576 * @param ppvR0 Pointer to the pointer variable to update.
577 * @param pvR0 The pointer value to assign to *ppv.
578 */
579DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
580{
581#if R0_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
582 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
583#elif R0_ARCH_BITS == 64
584 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
585#else
586# error "R0_ARCH_BITS is bogus"
587#endif
588}
589
590
591/**
592 * Atomically Exchange a ring-3 pointer value, ordered.
593 *
594 * @returns Current *ppv value
595 * @param ppvR3 Pointer to the pointer variable to update.
596 * @param pvR3 The pointer value to assign to *ppv.
597 */
598DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
599{
600#if R3_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
601 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
602#elif R3_ARCH_BITS == 64
603 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
604#else
605# error "R3_ARCH_BITS is bogus"
606#endif
607}
608
609
610/** @def ASMAtomicXchgHandle
611 * Atomically Exchange a typical IPRT handle value, ordered.
612 *
613 * @param ph Pointer to the value to update.
614 * @param hNew The new value to assigned to *pu.
615 * @param phRes Where to store the current *ph value.
616 *
617 * @remarks This doesn't currently work for all handles (like RTFILE).
618 */
619#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
620# define ASMAtomicXchgHandle(ph, hNew, phRes) \
621 do { \
622 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
623 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
624 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
625 } while (0)
626#elif HC_ARCH_BITS == 64
627# define ASMAtomicXchgHandle(ph, hNew, phRes) \
628 do { \
629 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
630 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
631 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
632 } while (0)
633#else
634# error HC_ARCH_BITS
635#endif
636
637
638/**
639 * Atomically Exchange a value which size might differ
640 * between platforms or compilers, ordered.
641 *
642 * @param pu Pointer to the variable to update.
643 * @param uNew The value to assign to *pu.
644 * @todo This is busted as its missing the result argument.
645 */
646#define ASMAtomicXchgSize(pu, uNew) \
647 do { \
648 switch (sizeof(*(pu))) { \
649 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
650 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
651 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
652 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
653 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
654 } \
655 } while (0)
656
657/**
658 * Atomically Exchange a value which size might differ
659 * between platforms or compilers, ordered.
660 *
661 * @param pu Pointer to the variable to update.
662 * @param uNew The value to assign to *pu.
663 * @param puRes Where to store the current *pu value.
664 */
665#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
666 do { \
667 switch (sizeof(*(pu))) { \
668 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
669 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
670 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
671 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
672 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
673 } \
674 } while (0)
675
676
677
678/**
679 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
680 *
681 * @returns true if xchg was done.
682 * @returns false if xchg wasn't done.
683 *
684 * @param pu8 Pointer to the value to update.
685 * @param u8New The new value to assigned to *pu8.
686 * @param u8Old The old value to *pu8 compare with.
687 */
688#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
689DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
690#else
691DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
692{
693 uint8_t u8Ret;
694 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
695 "setz %1\n\t"
696 : "=m" (*pu8),
697 "=qm" (u8Ret),
698 "=a" (u8Old)
699 : "q" (u8New),
700 "2" (u8Old),
701 "m" (*pu8));
702 return (bool)u8Ret;
703}
704#endif
705
706
707/**
708 * Atomically Compare and Exchange a signed 8-bit value, ordered.
709 *
710 * @returns true if xchg was done.
711 * @returns false if xchg wasn't done.
712 *
713 * @param pi8 Pointer to the value to update.
714 * @param i8New The new value to assigned to *pi8.
715 * @param i8Old The old value to *pi8 compare with.
716 */
717DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
718{
719 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
720}
721
722
723/**
724 * Atomically Compare and Exchange a bool value, ordered.
725 *
726 * @returns true if xchg was done.
727 * @returns false if xchg wasn't done.
728 *
729 * @param pf Pointer to the value to update.
730 * @param fNew The new value to assigned to *pf.
731 * @param fOld The old value to *pf compare with.
732 */
733DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
734{
735 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
736}
737
738
739/**
740 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
741 *
742 * @returns true if xchg was done.
743 * @returns false if xchg wasn't done.
744 *
745 * @param pu32 Pointer to the value to update.
746 * @param u32New The new value to assigned to *pu32.
747 * @param u32Old The old value to *pu32 compare with.
748 */
749#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
750DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
751#else
752DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
753{
754# if RT_INLINE_ASM_GNU_STYLE
755 uint8_t u8Ret;
756 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
757 "setz %1\n\t"
758 : "=m" (*pu32),
759 "=qm" (u8Ret),
760 "=a" (u32Old)
761 : "r" (u32New),
762 "2" (u32Old),
763 "m" (*pu32));
764 return (bool)u8Ret;
765
766# elif RT_INLINE_ASM_USES_INTRIN
767 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
768
769# else
770 uint32_t u32Ret;
771 __asm
772 {
773# ifdef RT_ARCH_AMD64
774 mov rdx, [pu32]
775# else
776 mov edx, [pu32]
777# endif
778 mov eax, [u32Old]
779 mov ecx, [u32New]
780# ifdef RT_ARCH_AMD64
781 lock cmpxchg [rdx], ecx
782# else
783 lock cmpxchg [edx], ecx
784# endif
785 setz al
786 movzx eax, al
787 mov [u32Ret], eax
788 }
789 return !!u32Ret;
790# endif
791}
792#endif
793
794
795/**
796 * Atomically Compare and Exchange a signed 32-bit value, ordered.
797 *
798 * @returns true if xchg was done.
799 * @returns false if xchg wasn't done.
800 *
801 * @param pi32 Pointer to the value to update.
802 * @param i32New The new value to assigned to *pi32.
803 * @param i32Old The old value to *pi32 compare with.
804 */
805DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
806{
807 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
808}
809
810
811/**
812 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
813 *
814 * @returns true if xchg was done.
815 * @returns false if xchg wasn't done.
816 *
817 * @param pu64 Pointer to the 64-bit variable to update.
818 * @param u64New The 64-bit value to assign to *pu64.
819 * @param u64Old The value to compare with.
820 */
821#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
822 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
823DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
824#else
825DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
826{
827# if RT_INLINE_ASM_USES_INTRIN
828 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
829
830# elif defined(RT_ARCH_AMD64)
831# if RT_INLINE_ASM_GNU_STYLE
832 uint8_t u8Ret;
833 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
834 "setz %1\n\t"
835 : "=m" (*pu64),
836 "=qm" (u8Ret),
837 "=a" (u64Old)
838 : "r" (u64New),
839 "2" (u64Old),
840 "m" (*pu64));
841 return (bool)u8Ret;
842# else
843 bool fRet;
844 __asm
845 {
846 mov rdx, [pu32]
847 mov rax, [u64Old]
848 mov rcx, [u64New]
849 lock cmpxchg [rdx], rcx
850 setz al
851 mov [fRet], al
852 }
853 return fRet;
854# endif
855# else /* !RT_ARCH_AMD64 */
856 uint32_t u32Ret;
857# if RT_INLINE_ASM_GNU_STYLE
858# if defined(PIC) || defined(__PIC__)
859 uint32_t u32EBX = (uint32_t)u64New;
860 uint32_t u32Spill;
861 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
862 "lock; cmpxchg8b (%6)\n\t"
863 "setz %%al\n\t"
864 "movl %4, %%ebx\n\t"
865 "movzbl %%al, %%eax\n\t"
866 : "=a" (u32Ret),
867 "=d" (u32Spill),
868# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
869 "+m" (*pu64)
870# else
871 "=m" (*pu64)
872# endif
873 : "A" (u64Old),
874 "m" ( u32EBX ),
875 "c" ( (uint32_t)(u64New >> 32) ),
876 "S" (pu64));
877# else /* !PIC */
878 uint32_t u32Spill;
879 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
880 "setz %%al\n\t"
881 "movzbl %%al, %%eax\n\t"
882 : "=a" (u32Ret),
883 "=d" (u32Spill),
884 "+m" (*pu64)
885 : "A" (u64Old),
886 "b" ( (uint32_t)u64New ),
887 "c" ( (uint32_t)(u64New >> 32) ));
888# endif
889 return (bool)u32Ret;
890# else
891 __asm
892 {
893 mov ebx, dword ptr [u64New]
894 mov ecx, dword ptr [u64New + 4]
895 mov edi, [pu64]
896 mov eax, dword ptr [u64Old]
897 mov edx, dword ptr [u64Old + 4]
898 lock cmpxchg8b [edi]
899 setz al
900 movzx eax, al
901 mov dword ptr [u32Ret], eax
902 }
903 return !!u32Ret;
904# endif
905# endif /* !RT_ARCH_AMD64 */
906}
907#endif
908
909
910/**
911 * Atomically Compare and exchange a signed 64-bit value, ordered.
912 *
913 * @returns true if xchg was done.
914 * @returns false if xchg wasn't done.
915 *
916 * @param pi64 Pointer to the 64-bit variable to update.
917 * @param i64 The 64-bit value to assign to *pu64.
918 * @param i64Old The value to compare with.
919 */
920DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
921{
922 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
923}
924
925
926/**
927 * Atomically Compare and Exchange a pointer value, ordered.
928 *
929 * @returns true if xchg was done.
930 * @returns false if xchg wasn't done.
931 *
932 * @param ppv Pointer to the value to update.
933 * @param pvNew The new value to assigned to *ppv.
934 * @param pvOld The old value to *ppv compare with.
935 */
936DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
937{
938#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
939 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
940#elif ARCH_BITS == 64
941 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
942#else
943# error "ARCH_BITS is bogus"
944#endif
945}
946
947
948/**
949 * Atomically Compare and Exchange a pointer value, ordered.
950 *
951 * @returns true if xchg was done.
952 * @returns false if xchg wasn't done.
953 *
954 * @param ppv Pointer to the value to update.
955 * @param pvNew The new value to assigned to *ppv.
956 * @param pvOld The old value to *ppv compare with.
957 *
958 * @remarks This is relatively type safe on GCC platforms.
959 */
960#ifdef __GNUC__
961# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
962 __extension__ \
963 ({\
964 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
965 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
966 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
967 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
968 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
969 fMacroRet; \
970 })
971#else
972# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
973 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
974#endif
975
976
977/** @def ASMAtomicCmpXchgHandle
978 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
979 *
980 * @param ph Pointer to the value to update.
981 * @param hNew The new value to assigned to *pu.
982 * @param hOld The old value to *pu compare with.
983 * @param fRc Where to store the result.
984 *
985 * @remarks This doesn't currently work for all handles (like RTFILE).
986 */
987#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
988# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
989 do { \
990 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
991 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
992 } while (0)
993#elif HC_ARCH_BITS == 64
994# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
995 do { \
996 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
997 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
998 } while (0)
999#else
1000# error HC_ARCH_BITS
1001#endif
1002
1003
1004/** @def ASMAtomicCmpXchgSize
1005 * Atomically Compare and Exchange a value which size might differ
1006 * between platforms or compilers, ordered.
1007 *
1008 * @param pu Pointer to the value to update.
1009 * @param uNew The new value to assigned to *pu.
1010 * @param uOld The old value to *pu compare with.
1011 * @param fRc Where to store the result.
1012 */
1013#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1014 do { \
1015 switch (sizeof(*(pu))) { \
1016 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1017 break; \
1018 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1019 break; \
1020 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1021 (fRc) = false; \
1022 break; \
1023 } \
1024 } while (0)
1025
1026
1027/**
1028 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1029 * passes back old value, ordered.
1030 *
1031 * @returns true if xchg was done.
1032 * @returns false if xchg wasn't done.
1033 *
1034 * @param pu32 Pointer to the value to update.
1035 * @param u32New The new value to assigned to *pu32.
1036 * @param u32Old The old value to *pu32 compare with.
1037 * @param pu32Old Pointer store the old value at.
1038 */
1039#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1040DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1041#else
1042DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1043{
1044# if RT_INLINE_ASM_GNU_STYLE
1045 uint8_t u8Ret;
1046 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1047 "setz %1\n\t"
1048 : "=m" (*pu32),
1049 "=qm" (u8Ret),
1050 "=a" (*pu32Old)
1051 : "r" (u32New),
1052 "a" (u32Old),
1053 "m" (*pu32));
1054 return (bool)u8Ret;
1055
1056# elif RT_INLINE_ASM_USES_INTRIN
1057 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1058
1059# else
1060 uint32_t u32Ret;
1061 __asm
1062 {
1063# ifdef RT_ARCH_AMD64
1064 mov rdx, [pu32]
1065# else
1066 mov edx, [pu32]
1067# endif
1068 mov eax, [u32Old]
1069 mov ecx, [u32New]
1070# ifdef RT_ARCH_AMD64
1071 lock cmpxchg [rdx], ecx
1072 mov rdx, [pu32Old]
1073 mov [rdx], eax
1074# else
1075 lock cmpxchg [edx], ecx
1076 mov edx, [pu32Old]
1077 mov [edx], eax
1078# endif
1079 setz al
1080 movzx eax, al
1081 mov [u32Ret], eax
1082 }
1083 return !!u32Ret;
1084# endif
1085}
1086#endif
1087
1088
1089/**
1090 * Atomically Compare and Exchange a signed 32-bit value, additionally
1091 * passes back old value, ordered.
1092 *
1093 * @returns true if xchg was done.
1094 * @returns false if xchg wasn't done.
1095 *
1096 * @param pi32 Pointer to the value to update.
1097 * @param i32New The new value to assigned to *pi32.
1098 * @param i32Old The old value to *pi32 compare with.
1099 * @param pi32Old Pointer store the old value at.
1100 */
1101DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1102{
1103 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1104}
1105
1106
1107/**
1108 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1109 * passing back old value, ordered.
1110 *
1111 * @returns true if xchg was done.
1112 * @returns false if xchg wasn't done.
1113 *
1114 * @param pu64 Pointer to the 64-bit variable to update.
1115 * @param u64New The 64-bit value to assign to *pu64.
1116 * @param u64Old The value to compare with.
1117 * @param pu64Old Pointer store the old value at.
1118 */
1119#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1120 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1121DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1122#else
1123DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1124{
1125# if RT_INLINE_ASM_USES_INTRIN
1126 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1127
1128# elif defined(RT_ARCH_AMD64)
1129# if RT_INLINE_ASM_GNU_STYLE
1130 uint8_t u8Ret;
1131 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1132 "setz %1\n\t"
1133 : "=m" (*pu64),
1134 "=qm" (u8Ret),
1135 "=a" (*pu64Old)
1136 : "r" (u64New),
1137 "a" (u64Old),
1138 "m" (*pu64));
1139 return (bool)u8Ret;
1140# else
1141 bool fRet;
1142 __asm
1143 {
1144 mov rdx, [pu32]
1145 mov rax, [u64Old]
1146 mov rcx, [u64New]
1147 lock cmpxchg [rdx], rcx
1148 mov rdx, [pu64Old]
1149 mov [rdx], rax
1150 setz al
1151 mov [fRet], al
1152 }
1153 return fRet;
1154# endif
1155# else /* !RT_ARCH_AMD64 */
1156# if RT_INLINE_ASM_GNU_STYLE
1157 uint64_t u64Ret;
1158# if defined(PIC) || defined(__PIC__)
1159 /* NB: this code uses a memory clobber description, because the clean
1160 * solution with an output value for *pu64 makes gcc run out of registers.
1161 * This will cause suboptimal code, and anyone with a better solution is
1162 * welcome to improve this. */
1163 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1164 "lock; cmpxchg8b %3\n\t"
1165 "xchgl %%ebx, %1\n\t"
1166 : "=A" (u64Ret)
1167 : "DS" ((uint32_t)u64New),
1168 "c" ((uint32_t)(u64New >> 32)),
1169 "m" (*pu64),
1170 "0" (u64Old)
1171 : "memory" );
1172# else /* !PIC */
1173 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1174 : "=A" (u64Ret),
1175 "=m" (*pu64)
1176 : "b" ((uint32_t)u64New),
1177 "c" ((uint32_t)(u64New >> 32)),
1178 "m" (*pu64),
1179 "0" (u64Old));
1180# endif
1181 *pu64Old = u64Ret;
1182 return u64Ret == u64Old;
1183# else
1184 uint32_t u32Ret;
1185 __asm
1186 {
1187 mov ebx, dword ptr [u64New]
1188 mov ecx, dword ptr [u64New + 4]
1189 mov edi, [pu64]
1190 mov eax, dword ptr [u64Old]
1191 mov edx, dword ptr [u64Old + 4]
1192 lock cmpxchg8b [edi]
1193 mov ebx, [pu64Old]
1194 mov [ebx], eax
1195 setz al
1196 movzx eax, al
1197 add ebx, 4
1198 mov [ebx], edx
1199 mov dword ptr [u32Ret], eax
1200 }
1201 return !!u32Ret;
1202# endif
1203# endif /* !RT_ARCH_AMD64 */
1204}
1205#endif
1206
1207
1208/**
1209 * Atomically Compare and exchange a signed 64-bit value, additionally
1210 * passing back old value, ordered.
1211 *
1212 * @returns true if xchg was done.
1213 * @returns false if xchg wasn't done.
1214 *
1215 * @param pi64 Pointer to the 64-bit variable to update.
1216 * @param i64 The 64-bit value to assign to *pu64.
1217 * @param i64Old The value to compare with.
1218 * @param pi64Old Pointer store the old value at.
1219 */
1220DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1221{
1222 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1223}
1224
1225/** @def ASMAtomicCmpXchgExHandle
1226 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1227 *
1228 * @param ph Pointer to the value to update.
1229 * @param hNew The new value to assigned to *pu.
1230 * @param hOld The old value to *pu compare with.
1231 * @param fRc Where to store the result.
1232 * @param phOldVal Pointer to where to store the old value.
1233 *
1234 * @remarks This doesn't currently work for all handles (like RTFILE).
1235 */
1236#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1237# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1238 do { \
1239 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1240 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1241 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1242 } while (0)
1243#elif HC_ARCH_BITS == 64
1244# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1245 do { \
1246 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1247 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1248 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1249 } while (0)
1250#else
1251# error HC_ARCH_BITS
1252#endif
1253
1254
1255/** @def ASMAtomicCmpXchgExSize
1256 * Atomically Compare and Exchange a value which size might differ
1257 * between platforms or compilers. Additionally passes back old value.
1258 *
1259 * @param pu Pointer to the value to update.
1260 * @param uNew The new value to assigned to *pu.
1261 * @param uOld The old value to *pu compare with.
1262 * @param fRc Where to store the result.
1263 * @param puOldVal Pointer to where to store the old value.
1264 */
1265#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1266 do { \
1267 switch (sizeof(*(pu))) { \
1268 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1269 break; \
1270 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1271 break; \
1272 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1273 (fRc) = false; \
1274 (uOldVal) = 0; \
1275 break; \
1276 } \
1277 } while (0)
1278
1279
1280/**
1281 * Atomically Compare and Exchange a pointer value, additionally
1282 * passing back old value, ordered.
1283 *
1284 * @returns true if xchg was done.
1285 * @returns false if xchg wasn't done.
1286 *
1287 * @param ppv Pointer to the value to update.
1288 * @param pvNew The new value to assigned to *ppv.
1289 * @param pvOld The old value to *ppv compare with.
1290 * @param ppvOld Pointer store the old value at.
1291 */
1292DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1293{
1294#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1295 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1296#elif ARCH_BITS == 64
1297 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1298#else
1299# error "ARCH_BITS is bogus"
1300#endif
1301}
1302
1303
1304/**
1305 * Atomically Compare and Exchange a pointer value, additionally
1306 * passing back old value, ordered.
1307 *
1308 * @returns true if xchg was done.
1309 * @returns false if xchg wasn't done.
1310 *
1311 * @param ppv Pointer to the value to update.
1312 * @param pvNew The new value to assigned to *ppv.
1313 * @param pvOld The old value to *ppv compare with.
1314 * @param ppvOld Pointer store the old value at.
1315 *
1316 * @remarks This is relatively type safe on GCC platforms.
1317 */
1318#ifdef __GNUC__
1319# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1320 __extension__ \
1321 ({\
1322 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1323 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1324 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1325 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1326 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1327 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1328 (void **)ppvOldTypeChecked); \
1329 fMacroRet; \
1330 })
1331#else
1332# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1333 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1334#endif
1335
1336
1337/**
1338 * Serialize Instruction.
1339 */
1340#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1341DECLASM(void) ASMSerializeInstruction(void);
1342#else
1343DECLINLINE(void) ASMSerializeInstruction(void)
1344{
1345# if RT_INLINE_ASM_GNU_STYLE
1346 RTCCUINTREG xAX = 0;
1347# ifdef RT_ARCH_AMD64
1348 __asm__ ("cpuid"
1349 : "=a" (xAX)
1350 : "0" (xAX)
1351 : "rbx", "rcx", "rdx");
1352# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1353 __asm__ ("push %%ebx\n\t"
1354 "cpuid\n\t"
1355 "pop %%ebx\n\t"
1356 : "=a" (xAX)
1357 : "0" (xAX)
1358 : "ecx", "edx");
1359# else
1360 __asm__ ("cpuid"
1361 : "=a" (xAX)
1362 : "0" (xAX)
1363 : "ebx", "ecx", "edx");
1364# endif
1365
1366# elif RT_INLINE_ASM_USES_INTRIN
1367 int aInfo[4];
1368 __cpuid(aInfo, 0);
1369
1370# else
1371 __asm
1372 {
1373 push ebx
1374 xor eax, eax
1375 cpuid
1376 pop ebx
1377 }
1378# endif
1379}
1380#endif
1381
1382
1383/**
1384 * Memory fence, waits for any pending writes and reads to complete.
1385 */
1386DECLINLINE(void) ASMMemoryFence(void)
1387{
1388 /** @todo use mfence? check if all cpus we care for support it. */
1389 uint32_t volatile u32;
1390 ASMAtomicXchgU32(&u32, 0);
1391}
1392
1393
1394/**
1395 * Write fence, waits for any pending writes to complete.
1396 */
1397DECLINLINE(void) ASMWriteFence(void)
1398{
1399 /** @todo use sfence? check if all cpus we care for support it. */
1400 ASMMemoryFence();
1401}
1402
1403
1404/**
1405 * Read fence, waits for any pending reads to complete.
1406 */
1407DECLINLINE(void) ASMReadFence(void)
1408{
1409 /** @todo use lfence? check if all cpus we care for support it. */
1410 ASMMemoryFence();
1411}
1412
1413
1414/**
1415 * Atomically reads an unsigned 8-bit value, ordered.
1416 *
1417 * @returns Current *pu8 value
1418 * @param pu8 Pointer to the 8-bit variable to read.
1419 */
1420DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1421{
1422 ASMMemoryFence();
1423 return *pu8; /* byte reads are atomic on x86 */
1424}
1425
1426
1427/**
1428 * Atomically reads an unsigned 8-bit value, unordered.
1429 *
1430 * @returns Current *pu8 value
1431 * @param pu8 Pointer to the 8-bit variable to read.
1432 */
1433DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1434{
1435 return *pu8; /* byte reads are atomic on x86 */
1436}
1437
1438
1439/**
1440 * Atomically reads a signed 8-bit value, ordered.
1441 *
1442 * @returns Current *pi8 value
1443 * @param pi8 Pointer to the 8-bit variable to read.
1444 */
1445DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1446{
1447 ASMMemoryFence();
1448 return *pi8; /* byte reads are atomic on x86 */
1449}
1450
1451
1452/**
1453 * Atomically reads a signed 8-bit value, unordered.
1454 *
1455 * @returns Current *pi8 value
1456 * @param pi8 Pointer to the 8-bit variable to read.
1457 */
1458DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1459{
1460 return *pi8; /* byte reads are atomic on x86 */
1461}
1462
1463
1464/**
1465 * Atomically reads an unsigned 16-bit value, ordered.
1466 *
1467 * @returns Current *pu16 value
1468 * @param pu16 Pointer to the 16-bit variable to read.
1469 */
1470DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1471{
1472 ASMMemoryFence();
1473 Assert(!((uintptr_t)pu16 & 1));
1474 return *pu16;
1475}
1476
1477
1478/**
1479 * Atomically reads an unsigned 16-bit value, unordered.
1480 *
1481 * @returns Current *pu16 value
1482 * @param pu16 Pointer to the 16-bit variable to read.
1483 */
1484DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1485{
1486 Assert(!((uintptr_t)pu16 & 1));
1487 return *pu16;
1488}
1489
1490
1491/**
1492 * Atomically reads a signed 16-bit value, ordered.
1493 *
1494 * @returns Current *pi16 value
1495 * @param pi16 Pointer to the 16-bit variable to read.
1496 */
1497DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1498{
1499 ASMMemoryFence();
1500 Assert(!((uintptr_t)pi16 & 1));
1501 return *pi16;
1502}
1503
1504
1505/**
1506 * Atomically reads a signed 16-bit value, unordered.
1507 *
1508 * @returns Current *pi16 value
1509 * @param pi16 Pointer to the 16-bit variable to read.
1510 */
1511DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1512{
1513 Assert(!((uintptr_t)pi16 & 1));
1514 return *pi16;
1515}
1516
1517
1518/**
1519 * Atomically reads an unsigned 32-bit value, ordered.
1520 *
1521 * @returns Current *pu32 value
1522 * @param pu32 Pointer to the 32-bit variable to read.
1523 */
1524DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1525{
1526 ASMMemoryFence();
1527 Assert(!((uintptr_t)pu32 & 3));
1528 return *pu32;
1529}
1530
1531
1532/**
1533 * Atomically reads an unsigned 32-bit value, unordered.
1534 *
1535 * @returns Current *pu32 value
1536 * @param pu32 Pointer to the 32-bit variable to read.
1537 */
1538DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1539{
1540 Assert(!((uintptr_t)pu32 & 3));
1541 return *pu32;
1542}
1543
1544
1545/**
1546 * Atomically reads a signed 32-bit value, ordered.
1547 *
1548 * @returns Current *pi32 value
1549 * @param pi32 Pointer to the 32-bit variable to read.
1550 */
1551DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1552{
1553 ASMMemoryFence();
1554 Assert(!((uintptr_t)pi32 & 3));
1555 return *pi32;
1556}
1557
1558
1559/**
1560 * Atomically reads a signed 32-bit value, unordered.
1561 *
1562 * @returns Current *pi32 value
1563 * @param pi32 Pointer to the 32-bit variable to read.
1564 */
1565DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1566{
1567 Assert(!((uintptr_t)pi32 & 3));
1568 return *pi32;
1569}
1570
1571
1572/**
1573 * Atomically reads an unsigned 64-bit value, ordered.
1574 *
1575 * @returns Current *pu64 value
1576 * @param pu64 Pointer to the 64-bit variable to read.
1577 * The memory pointed to must be writable.
1578 * @remark This will fault if the memory is read-only!
1579 */
1580#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1581 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1582DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1583#else
1584DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1585{
1586 uint64_t u64;
1587# ifdef RT_ARCH_AMD64
1588 Assert(!((uintptr_t)pu64 & 7));
1589/*# if RT_INLINE_ASM_GNU_STYLE
1590 __asm__ __volatile__( "mfence\n\t"
1591 "movq %1, %0\n\t"
1592 : "=r" (u64)
1593 : "m" (*pu64));
1594# else
1595 __asm
1596 {
1597 mfence
1598 mov rdx, [pu64]
1599 mov rax, [rdx]
1600 mov [u64], rax
1601 }
1602# endif*/
1603 ASMMemoryFence();
1604 u64 = *pu64;
1605# else /* !RT_ARCH_AMD64 */
1606# if RT_INLINE_ASM_GNU_STYLE
1607# if defined(PIC) || defined(__PIC__)
1608 uint32_t u32EBX = 0;
1609 Assert(!((uintptr_t)pu64 & 7));
1610 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1611 "lock; cmpxchg8b (%5)\n\t"
1612 "movl %3, %%ebx\n\t"
1613 : "=A" (u64),
1614# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1615 "+m" (*pu64)
1616# else
1617 "=m" (*pu64)
1618# endif
1619 : "0" (0ULL),
1620 "m" (u32EBX),
1621 "c" (0),
1622 "S" (pu64));
1623# else /* !PIC */
1624 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1625 : "=A" (u64),
1626 "+m" (*pu64)
1627 : "0" (0ULL),
1628 "b" (0),
1629 "c" (0));
1630# endif
1631# else
1632 Assert(!((uintptr_t)pu64 & 7));
1633 __asm
1634 {
1635 xor eax, eax
1636 xor edx, edx
1637 mov edi, pu64
1638 xor ecx, ecx
1639 xor ebx, ebx
1640 lock cmpxchg8b [edi]
1641 mov dword ptr [u64], eax
1642 mov dword ptr [u64 + 4], edx
1643 }
1644# endif
1645# endif /* !RT_ARCH_AMD64 */
1646 return u64;
1647}
1648#endif
1649
1650
1651/**
1652 * Atomically reads an unsigned 64-bit value, unordered.
1653 *
1654 * @returns Current *pu64 value
1655 * @param pu64 Pointer to the 64-bit variable to read.
1656 * The memory pointed to must be writable.
1657 * @remark This will fault if the memory is read-only!
1658 */
1659#if !defined(RT_ARCH_AMD64) \
1660 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1661 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1662DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1663#else
1664DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1665{
1666 uint64_t u64;
1667# ifdef RT_ARCH_AMD64
1668 Assert(!((uintptr_t)pu64 & 7));
1669/*# if RT_INLINE_ASM_GNU_STYLE
1670 Assert(!((uintptr_t)pu64 & 7));
1671 __asm__ __volatile__("movq %1, %0\n\t"
1672 : "=r" (u64)
1673 : "m" (*pu64));
1674# else
1675 __asm
1676 {
1677 mov rdx, [pu64]
1678 mov rax, [rdx]
1679 mov [u64], rax
1680 }
1681# endif */
1682 u64 = *pu64;
1683# else /* !RT_ARCH_AMD64 */
1684# if RT_INLINE_ASM_GNU_STYLE
1685# if defined(PIC) || defined(__PIC__)
1686 uint32_t u32EBX = 0;
1687 uint32_t u32Spill;
1688 Assert(!((uintptr_t)pu64 & 7));
1689 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1690 "xor %%ecx,%%ecx\n\t"
1691 "xor %%edx,%%edx\n\t"
1692 "xchgl %%ebx, %3\n\t"
1693 "lock; cmpxchg8b (%4)\n\t"
1694 "movl %3, %%ebx\n\t"
1695 : "=A" (u64),
1696# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1697 "+m" (*pu64),
1698# else
1699 "=m" (*pu64),
1700# endif
1701 "=c" (u32Spill)
1702 : "m" (u32EBX),
1703 "S" (pu64));
1704# else /* !PIC */
1705 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1706 : "=A" (u64),
1707 "+m" (*pu64)
1708 : "0" (0ULL),
1709 "b" (0),
1710 "c" (0));
1711# endif
1712# else
1713 Assert(!((uintptr_t)pu64 & 7));
1714 __asm
1715 {
1716 xor eax, eax
1717 xor edx, edx
1718 mov edi, pu64
1719 xor ecx, ecx
1720 xor ebx, ebx
1721 lock cmpxchg8b [edi]
1722 mov dword ptr [u64], eax
1723 mov dword ptr [u64 + 4], edx
1724 }
1725# endif
1726# endif /* !RT_ARCH_AMD64 */
1727 return u64;
1728}
1729#endif
1730
1731
1732/**
1733 * Atomically reads a signed 64-bit value, ordered.
1734 *
1735 * @returns Current *pi64 value
1736 * @param pi64 Pointer to the 64-bit variable to read.
1737 * The memory pointed to must be writable.
1738 * @remark This will fault if the memory is read-only!
1739 */
1740DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1741{
1742 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1743}
1744
1745
1746/**
1747 * Atomically reads a signed 64-bit value, unordered.
1748 *
1749 * @returns Current *pi64 value
1750 * @param pi64 Pointer to the 64-bit variable to read.
1751 * The memory pointed to must be writable.
1752 * @remark This will fault if the memory is read-only!
1753 */
1754DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1755{
1756 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1757}
1758
1759
1760/**
1761 * Atomically reads a size_t value, ordered.
1762 *
1763 * @returns Current *pcb value
1764 * @param pcb Pointer to the size_t variable to read.
1765 */
1766DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1767{
1768#if ARCH_BITS == 64
1769 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1770#elif ARCH_BITS == 32
1771 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1772#elif ARCH_BITS == 16
1773 AssertCompileSize(size_t, 2);
1774 return ASMAtomicReadU16((uint16_t volatile *)pcb);
1775#else
1776# error "Unsupported ARCH_BITS value"
1777#endif
1778}
1779
1780
1781/**
1782 * Atomically reads a size_t value, unordered.
1783 *
1784 * @returns Current *pcb value
1785 * @param pcb Pointer to the size_t variable to read.
1786 */
1787DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1788{
1789#if ARCH_BITS == 64 || (ARCH_BITS == 16 && RT_FAR_DATA)
1790 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1791#elif ARCH_BITS == 32
1792 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1793#elif ARCH_BITS == 16
1794 AssertCompileSize(size_t, 2);
1795 return ASMAtomicUoReadU16((uint16_t volatile *)pcb);
1796#else
1797# error "Unsupported ARCH_BITS value"
1798#endif
1799}
1800
1801
1802/**
1803 * Atomically reads a pointer value, ordered.
1804 *
1805 * @returns Current *pv value
1806 * @param ppv Pointer to the pointer variable to read.
1807 *
1808 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1809 * requires less typing (no casts).
1810 */
1811DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1812{
1813#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1814 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1815#elif ARCH_BITS == 64
1816 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1817#else
1818# error "ARCH_BITS is bogus"
1819#endif
1820}
1821
1822/**
1823 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1824 *
1825 * @returns Current *pv value
1826 * @param ppv Pointer to the pointer variable to read.
1827 * @param Type The type of *ppv, sans volatile.
1828 */
1829#ifdef __GNUC__
1830# define ASMAtomicReadPtrT(ppv, Type) \
1831 __extension__ \
1832 ({\
1833 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1834 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1835 pvTypeChecked; \
1836 })
1837#else
1838# define ASMAtomicReadPtrT(ppv, Type) \
1839 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1840#endif
1841
1842
1843/**
1844 * Atomically reads a pointer value, unordered.
1845 *
1846 * @returns Current *pv value
1847 * @param ppv Pointer to the pointer variable to read.
1848 *
1849 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1850 * requires less typing (no casts).
1851 */
1852DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1853{
1854#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1855 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1856#elif ARCH_BITS == 64
1857 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1858#else
1859# error "ARCH_BITS is bogus"
1860#endif
1861}
1862
1863
1864/**
1865 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
1866 *
1867 * @returns Current *pv value
1868 * @param ppv Pointer to the pointer variable to read.
1869 * @param Type The type of *ppv, sans volatile.
1870 */
1871#ifdef __GNUC__
1872# define ASMAtomicUoReadPtrT(ppv, Type) \
1873 __extension__ \
1874 ({\
1875 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1876 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
1877 pvTypeChecked; \
1878 })
1879#else
1880# define ASMAtomicUoReadPtrT(ppv, Type) \
1881 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
1882#endif
1883
1884
1885/**
1886 * Atomically reads a boolean value, ordered.
1887 *
1888 * @returns Current *pf value
1889 * @param pf Pointer to the boolean variable to read.
1890 */
1891DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
1892{
1893 ASMMemoryFence();
1894 return *pf; /* byte reads are atomic on x86 */
1895}
1896
1897
1898/**
1899 * Atomically reads a boolean value, unordered.
1900 *
1901 * @returns Current *pf value
1902 * @param pf Pointer to the boolean variable to read.
1903 */
1904DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
1905{
1906 return *pf; /* byte reads are atomic on x86 */
1907}
1908
1909
1910/**
1911 * Atomically read a typical IPRT handle value, ordered.
1912 *
1913 * @param ph Pointer to the handle variable to read.
1914 * @param phRes Where to store the result.
1915 *
1916 * @remarks This doesn't currently work for all handles (like RTFILE).
1917 */
1918#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1919# define ASMAtomicReadHandle(ph, phRes) \
1920 do { \
1921 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1922 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1923 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
1924 } while (0)
1925#elif HC_ARCH_BITS == 64
1926# define ASMAtomicReadHandle(ph, phRes) \
1927 do { \
1928 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1929 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1930 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
1931 } while (0)
1932#else
1933# error HC_ARCH_BITS
1934#endif
1935
1936
1937/**
1938 * Atomically read a typical IPRT handle value, unordered.
1939 *
1940 * @param ph Pointer to the handle variable to read.
1941 * @param phRes Where to store the result.
1942 *
1943 * @remarks This doesn't currently work for all handles (like RTFILE).
1944 */
1945#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1946# define ASMAtomicUoReadHandle(ph, phRes) \
1947 do { \
1948 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1949 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1950 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
1951 } while (0)
1952#elif HC_ARCH_BITS == 64
1953# define ASMAtomicUoReadHandle(ph, phRes) \
1954 do { \
1955 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1956 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1957 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
1958 } while (0)
1959#else
1960# error HC_ARCH_BITS
1961#endif
1962
1963
1964/**
1965 * Atomically read a value which size might differ
1966 * between platforms or compilers, ordered.
1967 *
1968 * @param pu Pointer to the variable to read.
1969 * @param puRes Where to store the result.
1970 */
1971#define ASMAtomicReadSize(pu, puRes) \
1972 do { \
1973 switch (sizeof(*(pu))) { \
1974 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1975 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
1976 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
1977 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
1978 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1979 } \
1980 } while (0)
1981
1982
1983/**
1984 * Atomically read a value which size might differ
1985 * between platforms or compilers, unordered.
1986 *
1987 * @param pu Pointer to the variable to read.
1988 * @param puRes Where to store the result.
1989 */
1990#define ASMAtomicUoReadSize(pu, puRes) \
1991 do { \
1992 switch (sizeof(*(pu))) { \
1993 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1994 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
1995 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
1996 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
1997 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1998 } \
1999 } while (0)
2000
2001
2002/**
2003 * Atomically writes an unsigned 8-bit value, ordered.
2004 *
2005 * @param pu8 Pointer to the 8-bit variable.
2006 * @param u8 The 8-bit value to assign to *pu8.
2007 */
2008DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
2009{
2010 ASMAtomicXchgU8(pu8, u8);
2011}
2012
2013
2014/**
2015 * Atomically writes an unsigned 8-bit value, unordered.
2016 *
2017 * @param pu8 Pointer to the 8-bit variable.
2018 * @param u8 The 8-bit value to assign to *pu8.
2019 */
2020DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2021{
2022 *pu8 = u8; /* byte writes are atomic on x86 */
2023}
2024
2025
2026/**
2027 * Atomically writes a signed 8-bit value, ordered.
2028 *
2029 * @param pi8 Pointer to the 8-bit variable to read.
2030 * @param i8 The 8-bit value to assign to *pi8.
2031 */
2032DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2033{
2034 ASMAtomicXchgS8(pi8, i8);
2035}
2036
2037
2038/**
2039 * Atomically writes a signed 8-bit value, unordered.
2040 *
2041 * @param pi8 Pointer to the 8-bit variable to write.
2042 * @param i8 The 8-bit value to assign to *pi8.
2043 */
2044DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2045{
2046 *pi8 = i8; /* byte writes are atomic on x86 */
2047}
2048
2049
2050/**
2051 * Atomically writes an unsigned 16-bit value, ordered.
2052 *
2053 * @param pu16 Pointer to the 16-bit variable to write.
2054 * @param u16 The 16-bit value to assign to *pu16.
2055 */
2056DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2057{
2058 ASMAtomicXchgU16(pu16, u16);
2059}
2060
2061
2062/**
2063 * Atomically writes an unsigned 16-bit value, unordered.
2064 *
2065 * @param pu16 Pointer to the 16-bit variable to write.
2066 * @param u16 The 16-bit value to assign to *pu16.
2067 */
2068DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2069{
2070 Assert(!((uintptr_t)pu16 & 1));
2071 *pu16 = u16;
2072}
2073
2074
2075/**
2076 * Atomically writes a signed 16-bit value, ordered.
2077 *
2078 * @param pi16 Pointer to the 16-bit variable to write.
2079 * @param i16 The 16-bit value to assign to *pi16.
2080 */
2081DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2082{
2083 ASMAtomicXchgS16(pi16, i16);
2084}
2085
2086
2087/**
2088 * Atomically writes a signed 16-bit value, unordered.
2089 *
2090 * @param pi16 Pointer to the 16-bit variable to write.
2091 * @param i16 The 16-bit value to assign to *pi16.
2092 */
2093DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2094{
2095 Assert(!((uintptr_t)pi16 & 1));
2096 *pi16 = i16;
2097}
2098
2099
2100/**
2101 * Atomically writes an unsigned 32-bit value, ordered.
2102 *
2103 * @param pu32 Pointer to the 32-bit variable to write.
2104 * @param u32 The 32-bit value to assign to *pu32.
2105 */
2106DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2107{
2108 ASMAtomicXchgU32(pu32, u32);
2109}
2110
2111
2112/**
2113 * Atomically writes an unsigned 32-bit value, unordered.
2114 *
2115 * @param pu32 Pointer to the 32-bit variable to write.
2116 * @param u32 The 32-bit value to assign to *pu32.
2117 */
2118DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2119{
2120 Assert(!((uintptr_t)pu32 & 3));
2121 *pu32 = u32;
2122}
2123
2124
2125/**
2126 * Atomically writes a signed 32-bit value, ordered.
2127 *
2128 * @param pi32 Pointer to the 32-bit variable to write.
2129 * @param i32 The 32-bit value to assign to *pi32.
2130 */
2131DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2132{
2133 ASMAtomicXchgS32(pi32, i32);
2134}
2135
2136
2137/**
2138 * Atomically writes a signed 32-bit value, unordered.
2139 *
2140 * @param pi32 Pointer to the 32-bit variable to write.
2141 * @param i32 The 32-bit value to assign to *pi32.
2142 */
2143DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2144{
2145 Assert(!((uintptr_t)pi32 & 3));
2146 *pi32 = i32;
2147}
2148
2149
2150/**
2151 * Atomically writes an unsigned 64-bit value, ordered.
2152 *
2153 * @param pu64 Pointer to the 64-bit variable to write.
2154 * @param u64 The 64-bit value to assign to *pu64.
2155 */
2156DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2157{
2158 ASMAtomicXchgU64(pu64, u64);
2159}
2160
2161
2162/**
2163 * Atomically writes an unsigned 64-bit value, unordered.
2164 *
2165 * @param pu64 Pointer to the 64-bit variable to write.
2166 * @param u64 The 64-bit value to assign to *pu64.
2167 */
2168DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2169{
2170 Assert(!((uintptr_t)pu64 & 7));
2171#if ARCH_BITS == 64
2172 *pu64 = u64;
2173#else
2174 ASMAtomicXchgU64(pu64, u64);
2175#endif
2176}
2177
2178
2179/**
2180 * Atomically writes a signed 64-bit value, ordered.
2181 *
2182 * @param pi64 Pointer to the 64-bit variable to write.
2183 * @param i64 The 64-bit value to assign to *pi64.
2184 */
2185DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2186{
2187 ASMAtomicXchgS64(pi64, i64);
2188}
2189
2190
2191/**
2192 * Atomically writes a signed 64-bit value, unordered.
2193 *
2194 * @param pi64 Pointer to the 64-bit variable to write.
2195 * @param i64 The 64-bit value to assign to *pi64.
2196 */
2197DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2198{
2199 Assert(!((uintptr_t)pi64 & 7));
2200#if ARCH_BITS == 64
2201 *pi64 = i64;
2202#else
2203 ASMAtomicXchgS64(pi64, i64);
2204#endif
2205}
2206
2207
2208/**
2209 * Atomically writes a boolean value, unordered.
2210 *
2211 * @param pf Pointer to the boolean variable to write.
2212 * @param f The boolean value to assign to *pf.
2213 */
2214DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2215{
2216 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2217}
2218
2219
2220/**
2221 * Atomically writes a boolean value, unordered.
2222 *
2223 * @param pf Pointer to the boolean variable to write.
2224 * @param f The boolean value to assign to *pf.
2225 */
2226DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2227{
2228 *pf = f; /* byte writes are atomic on x86 */
2229}
2230
2231
2232/**
2233 * Atomically writes a pointer value, ordered.
2234 *
2235 * @param ppv Pointer to the pointer variable to write.
2236 * @param pv The pointer value to assign to *ppv.
2237 */
2238DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2239{
2240#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2241 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2242#elif ARCH_BITS == 64
2243 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2244#else
2245# error "ARCH_BITS is bogus"
2246#endif
2247}
2248
2249
2250/**
2251 * Atomically writes a pointer value, ordered.
2252 *
2253 * @param ppv Pointer to the pointer variable to write.
2254 * @param pv The pointer value to assign to *ppv. If NULL use
2255 * ASMAtomicWriteNullPtr or you'll land in trouble.
2256 *
2257 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2258 * NULL.
2259 */
2260#ifdef __GNUC__
2261# define ASMAtomicWritePtr(ppv, pv) \
2262 do \
2263 { \
2264 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2265 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2266 \
2267 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2268 AssertCompile(sizeof(pv) == sizeof(void *)); \
2269 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2270 \
2271 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2272 } while (0)
2273#else
2274# define ASMAtomicWritePtr(ppv, pv) \
2275 do \
2276 { \
2277 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2278 AssertCompile(sizeof(pv) == sizeof(void *)); \
2279 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2280 \
2281 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2282 } while (0)
2283#endif
2284
2285
2286/**
2287 * Atomically sets a pointer to NULL, ordered.
2288 *
2289 * @param ppv Pointer to the pointer variable that should be set to NULL.
2290 *
2291 * @remarks This is relatively type safe on GCC platforms.
2292 */
2293#ifdef __GNUC__
2294# define ASMAtomicWriteNullPtr(ppv) \
2295 do \
2296 { \
2297 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2298 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2299 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2300 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2301 } while (0)
2302#else
2303# define ASMAtomicWriteNullPtr(ppv) \
2304 do \
2305 { \
2306 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2307 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2308 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2309 } while (0)
2310#endif
2311
2312
2313/**
2314 * Atomically writes a pointer value, unordered.
2315 *
2316 * @returns Current *pv value
2317 * @param ppv Pointer to the pointer variable.
2318 * @param pv The pointer value to assign to *ppv. If NULL use
2319 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2320 *
2321 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2322 * NULL.
2323 */
2324#ifdef __GNUC__
2325# define ASMAtomicUoWritePtr(ppv, pv) \
2326 do \
2327 { \
2328 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2329 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2330 \
2331 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2332 AssertCompile(sizeof(pv) == sizeof(void *)); \
2333 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2334 \
2335 *(ppvTypeChecked) = pvTypeChecked; \
2336 } while (0)
2337#else
2338# define ASMAtomicUoWritePtr(ppv, pv) \
2339 do \
2340 { \
2341 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2342 AssertCompile(sizeof(pv) == sizeof(void *)); \
2343 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2344 *(ppv) = pv; \
2345 } while (0)
2346#endif
2347
2348
2349/**
2350 * Atomically sets a pointer to NULL, unordered.
2351 *
2352 * @param ppv Pointer to the pointer variable that should be set to NULL.
2353 *
2354 * @remarks This is relatively type safe on GCC platforms.
2355 */
2356#ifdef __GNUC__
2357# define ASMAtomicUoWriteNullPtr(ppv) \
2358 do \
2359 { \
2360 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2361 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2362 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2363 *(ppvTypeChecked) = NULL; \
2364 } while (0)
2365#else
2366# define ASMAtomicUoWriteNullPtr(ppv) \
2367 do \
2368 { \
2369 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2370 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2371 *(ppv) = NULL; \
2372 } while (0)
2373#endif
2374
2375
2376/**
2377 * Atomically write a typical IPRT handle value, ordered.
2378 *
2379 * @param ph Pointer to the variable to update.
2380 * @param hNew The value to assign to *ph.
2381 *
2382 * @remarks This doesn't currently work for all handles (like RTFILE).
2383 */
2384#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2385# define ASMAtomicWriteHandle(ph, hNew) \
2386 do { \
2387 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2388 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2389 } while (0)
2390#elif HC_ARCH_BITS == 64
2391# define ASMAtomicWriteHandle(ph, hNew) \
2392 do { \
2393 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2394 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2395 } while (0)
2396#else
2397# error HC_ARCH_BITS
2398#endif
2399
2400
2401/**
2402 * Atomically write a typical IPRT handle value, unordered.
2403 *
2404 * @param ph Pointer to the variable to update.
2405 * @param hNew The value to assign to *ph.
2406 *
2407 * @remarks This doesn't currently work for all handles (like RTFILE).
2408 */
2409#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2410# define ASMAtomicUoWriteHandle(ph, hNew) \
2411 do { \
2412 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2413 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2414 } while (0)
2415#elif HC_ARCH_BITS == 64
2416# define ASMAtomicUoWriteHandle(ph, hNew) \
2417 do { \
2418 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2419 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2420 } while (0)
2421#else
2422# error HC_ARCH_BITS
2423#endif
2424
2425
2426/**
2427 * Atomically write a value which size might differ
2428 * between platforms or compilers, ordered.
2429 *
2430 * @param pu Pointer to the variable to update.
2431 * @param uNew The value to assign to *pu.
2432 */
2433#define ASMAtomicWriteSize(pu, uNew) \
2434 do { \
2435 switch (sizeof(*(pu))) { \
2436 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2437 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2438 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2439 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2440 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2441 } \
2442 } while (0)
2443
2444/**
2445 * Atomically write a value which size might differ
2446 * between platforms or compilers, unordered.
2447 *
2448 * @param pu Pointer to the variable to update.
2449 * @param uNew The value to assign to *pu.
2450 */
2451#define ASMAtomicUoWriteSize(pu, uNew) \
2452 do { \
2453 switch (sizeof(*(pu))) { \
2454 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2455 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2456 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2457 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2458 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2459 } \
2460 } while (0)
2461
2462
2463
2464/**
2465 * Atomically exchanges and adds to a 16-bit value, ordered.
2466 *
2467 * @returns The old value.
2468 * @param pu16 Pointer to the value.
2469 * @param u16 Number to add.
2470 * @remarks Currently not implemented, just to make 16-bit code happy.
2471 */
2472DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile *pu16, uint32_t u16);
2473
2474
2475/**
2476 * Atomically exchanges and adds to a 32-bit value, ordered.
2477 *
2478 * @returns The old value.
2479 * @param pu32 Pointer to the value.
2480 * @param u32 Number to add.
2481 */
2482#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2483DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2484#else
2485DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2486{
2487# if RT_INLINE_ASM_USES_INTRIN
2488 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2489 return u32;
2490
2491# elif RT_INLINE_ASM_GNU_STYLE
2492 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2493 : "=r" (u32),
2494 "=m" (*pu32)
2495 : "0" (u32),
2496 "m" (*pu32)
2497 : "memory");
2498 return u32;
2499# else
2500 __asm
2501 {
2502 mov eax, [u32]
2503# ifdef RT_ARCH_AMD64
2504 mov rdx, [pu32]
2505 lock xadd [rdx], eax
2506# else
2507 mov edx, [pu32]
2508 lock xadd [edx], eax
2509# endif
2510 mov [u32], eax
2511 }
2512 return u32;
2513# endif
2514}
2515#endif
2516
2517
2518/**
2519 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2520 *
2521 * @returns The old value.
2522 * @param pi32 Pointer to the value.
2523 * @param i32 Number to add.
2524 */
2525DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2526{
2527 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2528}
2529
2530
2531/**
2532 * Atomically exchanges and adds to a 64-bit value, ordered.
2533 *
2534 * @returns The old value.
2535 * @param pu64 Pointer to the value.
2536 * @param u64 Number to add.
2537 */
2538#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2539DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2540#else
2541DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2542{
2543# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2544 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2545 return u64;
2546
2547# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2548 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2549 : "=r" (u64),
2550 "=m" (*pu64)
2551 : "0" (u64),
2552 "m" (*pu64)
2553 : "memory");
2554 return u64;
2555# else
2556 uint64_t u64Old;
2557 for (;;)
2558 {
2559 uint64_t u64New;
2560 u64Old = ASMAtomicUoReadU64(pu64);
2561 u64New = u64Old + u64;
2562 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2563 break;
2564 ASMNopPause();
2565 }
2566 return u64Old;
2567# endif
2568}
2569#endif
2570
2571
2572/**
2573 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2574 *
2575 * @returns The old value.
2576 * @param pi64 Pointer to the value.
2577 * @param i64 Number to add.
2578 */
2579DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2580{
2581 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2582}
2583
2584
2585/**
2586 * Atomically exchanges and adds to a size_t value, ordered.
2587 *
2588 * @returns The old value.
2589 * @param pcb Pointer to the size_t value.
2590 * @param cb Number to add.
2591 */
2592DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2593{
2594#if ARCH_BITS == 64
2595 AssertCompileSize(size_t, 8);
2596 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2597#elif ARCH_BITS == 32
2598 AssertCompileSize(size_t, 4);
2599 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2600#elif ARCH_BITS == 16
2601 AssertCompileSize(size_t, 2);
2602 return ASMAtomicAddU16((uint16_t volatile *)pcb, cb);
2603#else
2604# error "Unsupported ARCH_BITS value"
2605#endif
2606}
2607
2608
2609/**
2610 * Atomically exchanges and adds a value which size might differ between
2611 * platforms or compilers, ordered.
2612 *
2613 * @param pu Pointer to the variable to update.
2614 * @param uNew The value to add to *pu.
2615 * @param puOld Where to store the old value.
2616 */
2617#define ASMAtomicAddSize(pu, uNew, puOld) \
2618 do { \
2619 switch (sizeof(*(pu))) { \
2620 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2621 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2622 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2623 } \
2624 } while (0)
2625
2626
2627
2628/**
2629 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2630 *
2631 * @returns The old value.
2632 * @param pu16 Pointer to the value.
2633 * @param u16 Number to subtract.
2634 */
2635DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile *pu16, uint32_t u16)
2636{
2637 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2638}
2639
2640
2641/**
2642 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2643 *
2644 * @returns The old value.
2645 * @param pi16 Pointer to the value.
2646 * @param i16 Number to subtract.
2647 */
2648DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile *pi16, int16_t i16)
2649{
2650 return (int16_t)ASMAtomicAddU16((uint16_t volatile *)pi16, (uint16_t)-i16);
2651}
2652
2653
2654/**
2655 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2656 *
2657 * @returns The old value.
2658 * @param pu32 Pointer to the value.
2659 * @param u32 Number to subtract.
2660 */
2661DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2662{
2663 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2664}
2665
2666
2667/**
2668 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2669 *
2670 * @returns The old value.
2671 * @param pi32 Pointer to the value.
2672 * @param i32 Number to subtract.
2673 */
2674DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2675{
2676 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2677}
2678
2679
2680/**
2681 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2682 *
2683 * @returns The old value.
2684 * @param pu64 Pointer to the value.
2685 * @param u64 Number to subtract.
2686 */
2687DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2688{
2689 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2690}
2691
2692
2693/**
2694 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2695 *
2696 * @returns The old value.
2697 * @param pi64 Pointer to the value.
2698 * @param i64 Number to subtract.
2699 */
2700DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2701{
2702 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2703}
2704
2705
2706/**
2707 * Atomically exchanges and subtracts to a size_t value, ordered.
2708 *
2709 * @returns The old value.
2710 * @param pcb Pointer to the size_t value.
2711 * @param cb Number to subtract.
2712 */
2713DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2714{
2715#if ARCH_BITS == 64
2716 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2717#elif ARCH_BITS == 32
2718 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2719#elif ARCH_BITS == 16
2720 AssertCompileSize(size_t, 2);
2721 return ASMAtomicSubU16((uint16_t volatile *)pcb, cb);
2722#else
2723# error "Unsupported ARCH_BITS value"
2724#endif
2725}
2726
2727
2728/**
2729 * Atomically exchanges and subtracts a value which size might differ between
2730 * platforms or compilers, ordered.
2731 *
2732 * @param pu Pointer to the variable to update.
2733 * @param uNew The value to subtract to *pu.
2734 * @param puOld Where to store the old value.
2735 */
2736#define ASMAtomicSubSize(pu, uNew, puOld) \
2737 do { \
2738 switch (sizeof(*(pu))) { \
2739 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2740 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2741 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2742 } \
2743 } while (0)
2744
2745
2746
2747/**
2748 * Atomically increment a 16-bit value, ordered.
2749 *
2750 * @returns The new value.
2751 * @param pu16 Pointer to the value to increment.
2752 * @remarks Not implemented. Just to make 16-bit code happy.
2753 */
2754DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile *pu16);
2755
2756
2757/**
2758 * Atomically increment a 32-bit value, ordered.
2759 *
2760 * @returns The new value.
2761 * @param pu32 Pointer to the value to increment.
2762 */
2763#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2764DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2765#else
2766DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2767{
2768 uint32_t u32;
2769# if RT_INLINE_ASM_USES_INTRIN
2770 u32 = _InterlockedIncrement((long *)pu32);
2771 return u32;
2772
2773# elif RT_INLINE_ASM_GNU_STYLE
2774 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2775 : "=r" (u32),
2776 "=m" (*pu32)
2777 : "0" (1),
2778 "m" (*pu32)
2779 : "memory");
2780 return u32+1;
2781# else
2782 __asm
2783 {
2784 mov eax, 1
2785# ifdef RT_ARCH_AMD64
2786 mov rdx, [pu32]
2787 lock xadd [rdx], eax
2788# else
2789 mov edx, [pu32]
2790 lock xadd [edx], eax
2791# endif
2792 mov u32, eax
2793 }
2794 return u32+1;
2795# endif
2796}
2797#endif
2798
2799
2800/**
2801 * Atomically increment a signed 32-bit value, ordered.
2802 *
2803 * @returns The new value.
2804 * @param pi32 Pointer to the value to increment.
2805 */
2806DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2807{
2808 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2809}
2810
2811
2812/**
2813 * Atomically increment a 64-bit value, ordered.
2814 *
2815 * @returns The new value.
2816 * @param pu64 Pointer to the value to increment.
2817 */
2818#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2819DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2820#else
2821DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2822{
2823# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2824 uint64_t u64;
2825 u64 = _InterlockedIncrement64((__int64 *)pu64);
2826 return u64;
2827
2828# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2829 uint64_t u64;
2830 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2831 : "=r" (u64),
2832 "=m" (*pu64)
2833 : "0" (1),
2834 "m" (*pu64)
2835 : "memory");
2836 return u64 + 1;
2837# else
2838 return ASMAtomicAddU64(pu64, 1) + 1;
2839# endif
2840}
2841#endif
2842
2843
2844/**
2845 * Atomically increment a signed 64-bit value, ordered.
2846 *
2847 * @returns The new value.
2848 * @param pi64 Pointer to the value to increment.
2849 */
2850DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
2851{
2852 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
2853}
2854
2855
2856/**
2857 * Atomically increment a size_t value, ordered.
2858 *
2859 * @returns The new value.
2860 * @param pcb Pointer to the value to increment.
2861 */
2862DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
2863{
2864#if ARCH_BITS == 64
2865 return ASMAtomicIncU64((uint64_t volatile *)pcb);
2866#elif ARCH_BITS == 32
2867 return ASMAtomicIncU32((uint32_t volatile *)pcb);
2868#elif ARCH_BITS == 16
2869 return ASMAtomicIncU16((uint16_t volatile *)pcb);
2870#else
2871# error "Unsupported ARCH_BITS value"
2872#endif
2873}
2874
2875
2876
2877/**
2878 * Atomically decrement an unsigned 32-bit value, ordered.
2879 *
2880 * @returns The new value.
2881 * @param pu16 Pointer to the value to decrement.
2882 * @remarks Not implemented. Just to make 16-bit code happy.
2883 */
2884DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile *pu16);
2885
2886
2887/**
2888 * Atomically decrement an unsigned 32-bit value, ordered.
2889 *
2890 * @returns The new value.
2891 * @param pu32 Pointer to the value to decrement.
2892 */
2893#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2894DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2895#else
2896DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2897{
2898 uint32_t u32;
2899# if RT_INLINE_ASM_USES_INTRIN
2900 u32 = _InterlockedDecrement((long *)pu32);
2901 return u32;
2902
2903# elif RT_INLINE_ASM_GNU_STYLE
2904 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2905 : "=r" (u32),
2906 "=m" (*pu32)
2907 : "0" (-1),
2908 "m" (*pu32)
2909 : "memory");
2910 return u32-1;
2911# else
2912 __asm
2913 {
2914 mov eax, -1
2915# ifdef RT_ARCH_AMD64
2916 mov rdx, [pu32]
2917 lock xadd [rdx], eax
2918# else
2919 mov edx, [pu32]
2920 lock xadd [edx], eax
2921# endif
2922 mov u32, eax
2923 }
2924 return u32-1;
2925# endif
2926}
2927#endif
2928
2929
2930/**
2931 * Atomically decrement a signed 32-bit value, ordered.
2932 *
2933 * @returns The new value.
2934 * @param pi32 Pointer to the value to decrement.
2935 */
2936DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2937{
2938 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2939}
2940
2941
2942/**
2943 * Atomically decrement an unsigned 64-bit value, ordered.
2944 *
2945 * @returns The new value.
2946 * @param pu64 Pointer to the value to decrement.
2947 */
2948#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2949DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
2950#else
2951DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
2952{
2953# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2954 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
2955 return u64;
2956
2957# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2958 uint64_t u64;
2959 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
2960 : "=r" (u64),
2961 "=m" (*pu64)
2962 : "0" (~(uint64_t)0),
2963 "m" (*pu64)
2964 : "memory");
2965 return u64-1;
2966# else
2967 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
2968# endif
2969}
2970#endif
2971
2972
2973/**
2974 * Atomically decrement a signed 64-bit value, ordered.
2975 *
2976 * @returns The new value.
2977 * @param pi64 Pointer to the value to decrement.
2978 */
2979DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
2980{
2981 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
2982}
2983
2984
2985/**
2986 * Atomically decrement a size_t value, ordered.
2987 *
2988 * @returns The new value.
2989 * @param pcb Pointer to the value to decrement.
2990 */
2991DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
2992{
2993#if ARCH_BITS == 64
2994 return ASMAtomicDecU64((uint64_t volatile *)pcb);
2995#elif ARCH_BITS == 32
2996 return ASMAtomicDecU32((uint32_t volatile *)pcb);
2997#elif ARCH_BITS == 16
2998 return ASMAtomicDecU16((uint16_t volatile *)pcb);
2999#else
3000# error "Unsupported ARCH_BITS value"
3001#endif
3002}
3003
3004
3005/**
3006 * Atomically Or an unsigned 32-bit value, ordered.
3007 *
3008 * @param pu32 Pointer to the pointer variable to OR u32 with.
3009 * @param u32 The value to OR *pu32 with.
3010 */
3011#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3012DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3013#else
3014DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3015{
3016# if RT_INLINE_ASM_USES_INTRIN
3017 _InterlockedOr((long volatile *)pu32, (long)u32);
3018
3019# elif RT_INLINE_ASM_GNU_STYLE
3020 __asm__ __volatile__("lock; orl %1, %0\n\t"
3021 : "=m" (*pu32)
3022 : "ir" (u32),
3023 "m" (*pu32));
3024# else
3025 __asm
3026 {
3027 mov eax, [u32]
3028# ifdef RT_ARCH_AMD64
3029 mov rdx, [pu32]
3030 lock or [rdx], eax
3031# else
3032 mov edx, [pu32]
3033 lock or [edx], eax
3034# endif
3035 }
3036# endif
3037}
3038#endif
3039
3040
3041/**
3042 * Atomically Or a signed 32-bit value, ordered.
3043 *
3044 * @param pi32 Pointer to the pointer variable to OR u32 with.
3045 * @param i32 The value to OR *pu32 with.
3046 */
3047DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3048{
3049 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3050}
3051
3052
3053/**
3054 * Atomically Or an unsigned 64-bit value, ordered.
3055 *
3056 * @param pu64 Pointer to the pointer variable to OR u64 with.
3057 * @param u64 The value to OR *pu64 with.
3058 */
3059#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3060DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
3061#else
3062DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
3063{
3064# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3065 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
3066
3067# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3068 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3069 : "=m" (*pu64)
3070 : "r" (u64),
3071 "m" (*pu64));
3072# else
3073 for (;;)
3074 {
3075 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3076 uint64_t u64New = u64Old | u64;
3077 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3078 break;
3079 ASMNopPause();
3080 }
3081# endif
3082}
3083#endif
3084
3085
3086/**
3087 * Atomically Or a signed 64-bit value, ordered.
3088 *
3089 * @param pi64 Pointer to the pointer variable to OR u64 with.
3090 * @param i64 The value to OR *pu64 with.
3091 */
3092DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3093{
3094 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3095}
3096
3097
3098/**
3099 * Atomically And an unsigned 32-bit value, ordered.
3100 *
3101 * @param pu32 Pointer to the pointer variable to AND u32 with.
3102 * @param u32 The value to AND *pu32 with.
3103 */
3104#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3105DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3106#else
3107DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3108{
3109# if RT_INLINE_ASM_USES_INTRIN
3110 _InterlockedAnd((long volatile *)pu32, u32);
3111
3112# elif RT_INLINE_ASM_GNU_STYLE
3113 __asm__ __volatile__("lock; andl %1, %0\n\t"
3114 : "=m" (*pu32)
3115 : "ir" (u32),
3116 "m" (*pu32));
3117# else
3118 __asm
3119 {
3120 mov eax, [u32]
3121# ifdef RT_ARCH_AMD64
3122 mov rdx, [pu32]
3123 lock and [rdx], eax
3124# else
3125 mov edx, [pu32]
3126 lock and [edx], eax
3127# endif
3128 }
3129# endif
3130}
3131#endif
3132
3133
3134/**
3135 * Atomically And a signed 32-bit value, ordered.
3136 *
3137 * @param pi32 Pointer to the pointer variable to AND i32 with.
3138 * @param i32 The value to AND *pi32 with.
3139 */
3140DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3141{
3142 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3143}
3144
3145
3146/**
3147 * Atomically And an unsigned 64-bit value, ordered.
3148 *
3149 * @param pu64 Pointer to the pointer variable to AND u64 with.
3150 * @param u64 The value to AND *pu64 with.
3151 */
3152#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3153DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3154#else
3155DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3156{
3157# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3158 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3159
3160# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3161 __asm__ __volatile__("lock; andq %1, %0\n\t"
3162 : "=m" (*pu64)
3163 : "r" (u64),
3164 "m" (*pu64));
3165# else
3166 for (;;)
3167 {
3168 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3169 uint64_t u64New = u64Old & u64;
3170 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3171 break;
3172 ASMNopPause();
3173 }
3174# endif
3175}
3176#endif
3177
3178
3179/**
3180 * Atomically And a signed 64-bit value, ordered.
3181 *
3182 * @param pi64 Pointer to the pointer variable to AND i64 with.
3183 * @param i64 The value to AND *pi64 with.
3184 */
3185DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3186{
3187 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3188}
3189
3190
3191/**
3192 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3193 *
3194 * @param pu32 Pointer to the pointer variable to OR u32 with.
3195 * @param u32 The value to OR *pu32 with.
3196 */
3197#if RT_INLINE_ASM_EXTERNAL
3198DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3199#else
3200DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3201{
3202# if RT_INLINE_ASM_GNU_STYLE
3203 __asm__ __volatile__("orl %1, %0\n\t"
3204 : "=m" (*pu32)
3205 : "ir" (u32),
3206 "m" (*pu32));
3207# else
3208 __asm
3209 {
3210 mov eax, [u32]
3211# ifdef RT_ARCH_AMD64
3212 mov rdx, [pu32]
3213 or [rdx], eax
3214# else
3215 mov edx, [pu32]
3216 or [edx], eax
3217# endif
3218 }
3219# endif
3220}
3221#endif
3222
3223
3224/**
3225 * Atomically OR a signed 32-bit value, unordered.
3226 *
3227 * @param pi32 Pointer to the pointer variable to OR u32 with.
3228 * @param i32 The value to OR *pu32 with.
3229 */
3230DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3231{
3232 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3233}
3234
3235
3236/**
3237 * Atomically OR an unsigned 64-bit value, unordered.
3238 *
3239 * @param pu64 Pointer to the pointer variable to OR u64 with.
3240 * @param u64 The value to OR *pu64 with.
3241 */
3242#if RT_INLINE_ASM_EXTERNAL
3243DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3244#else
3245DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3246{
3247# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3248 __asm__ __volatile__("orq %1, %q0\n\t"
3249 : "=m" (*pu64)
3250 : "r" (u64),
3251 "m" (*pu64));
3252# else
3253 for (;;)
3254 {
3255 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3256 uint64_t u64New = u64Old | u64;
3257 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3258 break;
3259 ASMNopPause();
3260 }
3261# endif
3262}
3263#endif
3264
3265
3266/**
3267 * Atomically Or a signed 64-bit value, unordered.
3268 *
3269 * @param pi64 Pointer to the pointer variable to OR u64 with.
3270 * @param i64 The value to OR *pu64 with.
3271 */
3272DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3273{
3274 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3275}
3276
3277
3278/**
3279 * Atomically And an unsigned 32-bit value, unordered.
3280 *
3281 * @param pu32 Pointer to the pointer variable to AND u32 with.
3282 * @param u32 The value to AND *pu32 with.
3283 */
3284#if RT_INLINE_ASM_EXTERNAL
3285DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3286#else
3287DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3288{
3289# if RT_INLINE_ASM_GNU_STYLE
3290 __asm__ __volatile__("andl %1, %0\n\t"
3291 : "=m" (*pu32)
3292 : "ir" (u32),
3293 "m" (*pu32));
3294# else
3295 __asm
3296 {
3297 mov eax, [u32]
3298# ifdef RT_ARCH_AMD64
3299 mov rdx, [pu32]
3300 and [rdx], eax
3301# else
3302 mov edx, [pu32]
3303 and [edx], eax
3304# endif
3305 }
3306# endif
3307}
3308#endif
3309
3310
3311/**
3312 * Atomically And a signed 32-bit value, unordered.
3313 *
3314 * @param pi32 Pointer to the pointer variable to AND i32 with.
3315 * @param i32 The value to AND *pi32 with.
3316 */
3317DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3318{
3319 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3320}
3321
3322
3323/**
3324 * Atomically And an unsigned 64-bit value, unordered.
3325 *
3326 * @param pu64 Pointer to the pointer variable to AND u64 with.
3327 * @param u64 The value to AND *pu64 with.
3328 */
3329#if RT_INLINE_ASM_EXTERNAL
3330DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3331#else
3332DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3333{
3334# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3335 __asm__ __volatile__("andq %1, %0\n\t"
3336 : "=m" (*pu64)
3337 : "r" (u64),
3338 "m" (*pu64));
3339# else
3340 for (;;)
3341 {
3342 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3343 uint64_t u64New = u64Old & u64;
3344 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3345 break;
3346 ASMNopPause();
3347 }
3348# endif
3349}
3350#endif
3351
3352
3353/**
3354 * Atomically And a signed 64-bit value, unordered.
3355 *
3356 * @param pi64 Pointer to the pointer variable to AND i64 with.
3357 * @param i64 The value to AND *pi64 with.
3358 */
3359DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3360{
3361 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3362}
3363
3364
3365/**
3366 * Atomically increment an unsigned 32-bit value, unordered.
3367 *
3368 * @returns the new value.
3369 * @param pu32 Pointer to the variable to increment.
3370 */
3371#if RT_INLINE_ASM_EXTERNAL
3372DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3373#else
3374DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3375{
3376 uint32_t u32;
3377# if RT_INLINE_ASM_GNU_STYLE
3378 __asm__ __volatile__("xaddl %0, %1\n\t"
3379 : "=r" (u32),
3380 "=m" (*pu32)
3381 : "0" (1),
3382 "m" (*pu32)
3383 : "memory");
3384 return u32 + 1;
3385# else
3386 __asm
3387 {
3388 mov eax, 1
3389# ifdef RT_ARCH_AMD64
3390 mov rdx, [pu32]
3391 xadd [rdx], eax
3392# else
3393 mov edx, [pu32]
3394 xadd [edx], eax
3395# endif
3396 mov u32, eax
3397 }
3398 return u32 + 1;
3399# endif
3400}
3401#endif
3402
3403
3404/**
3405 * Atomically decrement an unsigned 32-bit value, unordered.
3406 *
3407 * @returns the new value.
3408 * @param pu32 Pointer to the variable to decrement.
3409 */
3410#if RT_INLINE_ASM_EXTERNAL
3411DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3412#else
3413DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3414{
3415 uint32_t u32;
3416# if RT_INLINE_ASM_GNU_STYLE
3417 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3418 : "=r" (u32),
3419 "=m" (*pu32)
3420 : "0" (-1),
3421 "m" (*pu32)
3422 : "memory");
3423 return u32 - 1;
3424# else
3425 __asm
3426 {
3427 mov eax, -1
3428# ifdef RT_ARCH_AMD64
3429 mov rdx, [pu32]
3430 xadd [rdx], eax
3431# else
3432 mov edx, [pu32]
3433 xadd [edx], eax
3434# endif
3435 mov u32, eax
3436 }
3437 return u32 - 1;
3438# endif
3439}
3440#endif
3441
3442
3443/** @def RT_ASM_PAGE_SIZE
3444 * We try avoid dragging in iprt/param.h here.
3445 * @internal
3446 */
3447#if defined(RT_ARCH_SPARC64)
3448# define RT_ASM_PAGE_SIZE 0x2000
3449# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3450# if PAGE_SIZE != 0x2000
3451# error "PAGE_SIZE is not 0x2000!"
3452# endif
3453# endif
3454#else
3455# define RT_ASM_PAGE_SIZE 0x1000
3456# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3457# if PAGE_SIZE != 0x1000
3458# error "PAGE_SIZE is not 0x1000!"
3459# endif
3460# endif
3461#endif
3462
3463/**
3464 * Zeros a 4K memory page.
3465 *
3466 * @param pv Pointer to the memory block. This must be page aligned.
3467 */
3468#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3469DECLASM(void) ASMMemZeroPage(volatile void *pv);
3470# else
3471DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3472{
3473# if RT_INLINE_ASM_USES_INTRIN
3474# ifdef RT_ARCH_AMD64
3475 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3476# else
3477 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3478# endif
3479
3480# elif RT_INLINE_ASM_GNU_STYLE
3481 RTCCUINTREG uDummy;
3482# ifdef RT_ARCH_AMD64
3483 __asm__ __volatile__("rep stosq"
3484 : "=D" (pv),
3485 "=c" (uDummy)
3486 : "0" (pv),
3487 "c" (RT_ASM_PAGE_SIZE >> 3),
3488 "a" (0)
3489 : "memory");
3490# else
3491 __asm__ __volatile__("rep stosl"
3492 : "=D" (pv),
3493 "=c" (uDummy)
3494 : "0" (pv),
3495 "c" (RT_ASM_PAGE_SIZE >> 2),
3496 "a" (0)
3497 : "memory");
3498# endif
3499# else
3500 __asm
3501 {
3502# ifdef RT_ARCH_AMD64
3503 xor rax, rax
3504 mov ecx, 0200h
3505 mov rdi, [pv]
3506 rep stosq
3507# else
3508 xor eax, eax
3509 mov ecx, 0400h
3510 mov edi, [pv]
3511 rep stosd
3512# endif
3513 }
3514# endif
3515}
3516# endif
3517
3518
3519/**
3520 * Zeros a memory block with a 32-bit aligned size.
3521 *
3522 * @param pv Pointer to the memory block.
3523 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3524 */
3525#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3526DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3527#else
3528DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3529{
3530# if RT_INLINE_ASM_USES_INTRIN
3531# ifdef RT_ARCH_AMD64
3532 if (!(cb & 7))
3533 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3534 else
3535# endif
3536 __stosd((unsigned long *)pv, 0, cb / 4);
3537
3538# elif RT_INLINE_ASM_GNU_STYLE
3539 __asm__ __volatile__("rep stosl"
3540 : "=D" (pv),
3541 "=c" (cb)
3542 : "0" (pv),
3543 "1" (cb >> 2),
3544 "a" (0)
3545 : "memory");
3546# else
3547 __asm
3548 {
3549 xor eax, eax
3550# ifdef RT_ARCH_AMD64
3551 mov rcx, [cb]
3552 shr rcx, 2
3553 mov rdi, [pv]
3554# else
3555 mov ecx, [cb]
3556 shr ecx, 2
3557 mov edi, [pv]
3558# endif
3559 rep stosd
3560 }
3561# endif
3562}
3563#endif
3564
3565
3566/**
3567 * Fills a memory block with a 32-bit aligned size.
3568 *
3569 * @param pv Pointer to the memory block.
3570 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3571 * @param u32 The value to fill with.
3572 */
3573#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3574DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3575#else
3576DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3577{
3578# if RT_INLINE_ASM_USES_INTRIN
3579# ifdef RT_ARCH_AMD64
3580 if (!(cb & 7))
3581 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3582 else
3583# endif
3584 __stosd((unsigned long *)pv, u32, cb / 4);
3585
3586# elif RT_INLINE_ASM_GNU_STYLE
3587 __asm__ __volatile__("rep stosl"
3588 : "=D" (pv),
3589 "=c" (cb)
3590 : "0" (pv),
3591 "1" (cb >> 2),
3592 "a" (u32)
3593 : "memory");
3594# else
3595 __asm
3596 {
3597# ifdef RT_ARCH_AMD64
3598 mov rcx, [cb]
3599 shr rcx, 2
3600 mov rdi, [pv]
3601# else
3602 mov ecx, [cb]
3603 shr ecx, 2
3604 mov edi, [pv]
3605# endif
3606 mov eax, [u32]
3607 rep stosd
3608 }
3609# endif
3610}
3611#endif
3612
3613
3614/**
3615 * Checks if a memory page is all zeros.
3616 *
3617 * @returns true / false.
3618 *
3619 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3620 * boundary
3621 */
3622DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3623{
3624# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3625 union { RTCCUINTREG r; bool f; } uAX;
3626 RTCCUINTREG xCX, xDI;
3627 Assert(!((uintptr_t)pvPage & 15));
3628 __asm__ __volatile__("repe; "
3629# ifdef RT_ARCH_AMD64
3630 "scasq\n\t"
3631# else
3632 "scasl\n\t"
3633# endif
3634 "setnc %%al\n\t"
3635 : "=&c" (xCX),
3636 "=&D" (xDI),
3637 "=&a" (uAX.r)
3638 : "mr" (pvPage),
3639# ifdef RT_ARCH_AMD64
3640 "0" (RT_ASM_PAGE_SIZE/8),
3641# else
3642 "0" (RT_ASM_PAGE_SIZE/4),
3643# endif
3644 "1" (pvPage),
3645 "2" (0));
3646 return uAX.f;
3647# else
3648 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3649 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3650 Assert(!((uintptr_t)pvPage & 15));
3651 for (;;)
3652 {
3653 if (puPtr[0]) return false;
3654 if (puPtr[4]) return false;
3655
3656 if (puPtr[2]) return false;
3657 if (puPtr[6]) return false;
3658
3659 if (puPtr[1]) return false;
3660 if (puPtr[5]) return false;
3661
3662 if (puPtr[3]) return false;
3663 if (puPtr[7]) return false;
3664
3665 if (!--cLeft)
3666 return true;
3667 puPtr += 8;
3668 }
3669 return true;
3670# endif
3671}
3672
3673
3674/**
3675 * Checks if a memory block is filled with the specified byte.
3676 *
3677 * This is a sort of inverted memchr.
3678 *
3679 * @returns Pointer to the byte which doesn't equal u8.
3680 * @returns NULL if all equal to u8.
3681 *
3682 * @param pv Pointer to the memory block.
3683 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3684 * @param u8 The value it's supposed to be filled with.
3685 *
3686 * @todo Fix name, it is a predicate function but it's not returning boolean!
3687 */
3688DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3689{
3690/** @todo rewrite this in inline assembly? */
3691 uint8_t const *pb = (uint8_t const *)pv;
3692 for (; cb; cb--, pb++)
3693 if (RT_LIKELY(*pb == u8))
3694 { /* likely */ }
3695 else
3696 return (void *)pb;
3697 return NULL;
3698}
3699
3700
3701/**
3702 * Checks if a memory block is filled with the specified 32-bit value.
3703 *
3704 * This is a sort of inverted memchr.
3705 *
3706 * @returns Pointer to the first value which doesn't equal u32.
3707 * @returns NULL if all equal to u32.
3708 *
3709 * @param pv Pointer to the memory block.
3710 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3711 * @param u32 The value it's supposed to be filled with.
3712 *
3713 * @todo Fix name, it is a predicate function but it's not returning boolean!
3714 */
3715DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3716{
3717/** @todo rewrite this in inline assembly? */
3718 uint32_t const *pu32 = (uint32_t const *)pv;
3719 for (; cb; cb -= 4, pu32++)
3720 if (RT_LIKELY(*pu32 == u32))
3721 { /* likely */ }
3722 else
3723 return (uint32_t *)pu32;
3724 return NULL;
3725}
3726
3727
3728/**
3729 * Probes a byte pointer for read access.
3730 *
3731 * While the function will not fault if the byte is not read accessible,
3732 * the idea is to do this in a safe place like before acquiring locks
3733 * and such like.
3734 *
3735 * Also, this functions guarantees that an eager compiler is not going
3736 * to optimize the probing away.
3737 *
3738 * @param pvByte Pointer to the byte.
3739 */
3740#if RT_INLINE_ASM_EXTERNAL
3741DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3742#else
3743DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3744{
3745 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3746 uint8_t u8;
3747# if RT_INLINE_ASM_GNU_STYLE
3748 __asm__ __volatile__("movb (%1), %0\n\t"
3749 : "=r" (u8)
3750 : "r" (pvByte));
3751# else
3752 __asm
3753 {
3754# ifdef RT_ARCH_AMD64
3755 mov rax, [pvByte]
3756 mov al, [rax]
3757# else
3758 mov eax, [pvByte]
3759 mov al, [eax]
3760# endif
3761 mov [u8], al
3762 }
3763# endif
3764 return u8;
3765}
3766#endif
3767
3768/**
3769 * Probes a buffer for read access page by page.
3770 *
3771 * While the function will fault if the buffer is not fully read
3772 * accessible, the idea is to do this in a safe place like before
3773 * acquiring locks and such like.
3774 *
3775 * Also, this functions guarantees that an eager compiler is not going
3776 * to optimize the probing away.
3777 *
3778 * @param pvBuf Pointer to the buffer.
3779 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3780 */
3781DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3782{
3783 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3784 /* the first byte */
3785 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3786 ASMProbeReadByte(pu8);
3787
3788 /* the pages in between pages. */
3789 while (cbBuf > RT_ASM_PAGE_SIZE)
3790 {
3791 ASMProbeReadByte(pu8);
3792 cbBuf -= RT_ASM_PAGE_SIZE;
3793 pu8 += RT_ASM_PAGE_SIZE;
3794 }
3795
3796 /* the last byte */
3797 ASMProbeReadByte(pu8 + cbBuf - 1);
3798}
3799
3800
3801
3802/** @defgroup grp_inline_bits Bit Operations
3803 * @{
3804 */
3805
3806
3807/**
3808 * Sets a bit in a bitmap.
3809 *
3810 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
3811 * @param iBit The bit to set.
3812 *
3813 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3814 * However, doing so will yield better performance as well as avoiding
3815 * traps accessing the last bits in the bitmap.
3816 */
3817#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3818DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3819#else
3820DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3821{
3822# if RT_INLINE_ASM_USES_INTRIN
3823 _bittestandset((long *)pvBitmap, iBit);
3824
3825# elif RT_INLINE_ASM_GNU_STYLE
3826 __asm__ __volatile__("btsl %1, %0"
3827 : "=m" (*(volatile long *)pvBitmap)
3828 : "Ir" (iBit),
3829 "m" (*(volatile long *)pvBitmap)
3830 : "memory");
3831# else
3832 __asm
3833 {
3834# ifdef RT_ARCH_AMD64
3835 mov rax, [pvBitmap]
3836 mov edx, [iBit]
3837 bts [rax], edx
3838# else
3839 mov eax, [pvBitmap]
3840 mov edx, [iBit]
3841 bts [eax], edx
3842# endif
3843 }
3844# endif
3845}
3846#endif
3847
3848
3849/**
3850 * Atomically sets a bit in a bitmap, ordered.
3851 *
3852 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3853 * the memory access isn't atomic!
3854 * @param iBit The bit to set.
3855 */
3856#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3857DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3858#else
3859DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3860{
3861 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3862# if RT_INLINE_ASM_USES_INTRIN
3863 _interlockedbittestandset((long *)pvBitmap, iBit);
3864# elif RT_INLINE_ASM_GNU_STYLE
3865 __asm__ __volatile__("lock; btsl %1, %0"
3866 : "=m" (*(volatile long *)pvBitmap)
3867 : "Ir" (iBit),
3868 "m" (*(volatile long *)pvBitmap)
3869 : "memory");
3870# else
3871 __asm
3872 {
3873# ifdef RT_ARCH_AMD64
3874 mov rax, [pvBitmap]
3875 mov edx, [iBit]
3876 lock bts [rax], edx
3877# else
3878 mov eax, [pvBitmap]
3879 mov edx, [iBit]
3880 lock bts [eax], edx
3881# endif
3882 }
3883# endif
3884}
3885#endif
3886
3887
3888/**
3889 * Clears a bit in a bitmap.
3890 *
3891 * @param pvBitmap Pointer to the bitmap.
3892 * @param iBit The bit to clear.
3893 *
3894 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3895 * However, doing so will yield better performance as well as avoiding
3896 * traps accessing the last bits in the bitmap.
3897 */
3898#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3899DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3900#else
3901DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3902{
3903# if RT_INLINE_ASM_USES_INTRIN
3904 _bittestandreset((long *)pvBitmap, iBit);
3905
3906# elif RT_INLINE_ASM_GNU_STYLE
3907 __asm__ __volatile__("btrl %1, %0"
3908 : "=m" (*(volatile long *)pvBitmap)
3909 : "Ir" (iBit),
3910 "m" (*(volatile long *)pvBitmap)
3911 : "memory");
3912# else
3913 __asm
3914 {
3915# ifdef RT_ARCH_AMD64
3916 mov rax, [pvBitmap]
3917 mov edx, [iBit]
3918 btr [rax], edx
3919# else
3920 mov eax, [pvBitmap]
3921 mov edx, [iBit]
3922 btr [eax], edx
3923# endif
3924 }
3925# endif
3926}
3927#endif
3928
3929
3930/**
3931 * Atomically clears a bit in a bitmap, ordered.
3932 *
3933 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3934 * the memory access isn't atomic!
3935 * @param iBit The bit to toggle set.
3936 * @remarks No memory barrier, take care on smp.
3937 */
3938#if RT_INLINE_ASM_EXTERNAL
3939DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3940#else
3941DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3942{
3943 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3944# if RT_INLINE_ASM_GNU_STYLE
3945 __asm__ __volatile__("lock; btrl %1, %0"
3946 : "=m" (*(volatile long *)pvBitmap)
3947 : "Ir" (iBit),
3948 "m" (*(volatile long *)pvBitmap)
3949 : "memory");
3950# else
3951 __asm
3952 {
3953# ifdef RT_ARCH_AMD64
3954 mov rax, [pvBitmap]
3955 mov edx, [iBit]
3956 lock btr [rax], edx
3957# else
3958 mov eax, [pvBitmap]
3959 mov edx, [iBit]
3960 lock btr [eax], edx
3961# endif
3962 }
3963# endif
3964}
3965#endif
3966
3967
3968/**
3969 * Toggles a bit in a bitmap.
3970 *
3971 * @param pvBitmap Pointer to the bitmap.
3972 * @param iBit The bit to toggle.
3973 *
3974 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3975 * However, doing so will yield better performance as well as avoiding
3976 * traps accessing the last bits in the bitmap.
3977 */
3978#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3979DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3980#else
3981DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3982{
3983# if RT_INLINE_ASM_USES_INTRIN
3984 _bittestandcomplement((long *)pvBitmap, iBit);
3985# elif RT_INLINE_ASM_GNU_STYLE
3986 __asm__ __volatile__("btcl %1, %0"
3987 : "=m" (*(volatile long *)pvBitmap)
3988 : "Ir" (iBit),
3989 "m" (*(volatile long *)pvBitmap)
3990 : "memory");
3991# else
3992 __asm
3993 {
3994# ifdef RT_ARCH_AMD64
3995 mov rax, [pvBitmap]
3996 mov edx, [iBit]
3997 btc [rax], edx
3998# else
3999 mov eax, [pvBitmap]
4000 mov edx, [iBit]
4001 btc [eax], edx
4002# endif
4003 }
4004# endif
4005}
4006#endif
4007
4008
4009/**
4010 * Atomically toggles a bit in a bitmap, ordered.
4011 *
4012 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4013 * the memory access isn't atomic!
4014 * @param iBit The bit to test and set.
4015 */
4016#if RT_INLINE_ASM_EXTERNAL
4017DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4018#else
4019DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4020{
4021 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4022# if RT_INLINE_ASM_GNU_STYLE
4023 __asm__ __volatile__("lock; btcl %1, %0"
4024 : "=m" (*(volatile long *)pvBitmap)
4025 : "Ir" (iBit),
4026 "m" (*(volatile long *)pvBitmap)
4027 : "memory");
4028# else
4029 __asm
4030 {
4031# ifdef RT_ARCH_AMD64
4032 mov rax, [pvBitmap]
4033 mov edx, [iBit]
4034 lock btc [rax], edx
4035# else
4036 mov eax, [pvBitmap]
4037 mov edx, [iBit]
4038 lock btc [eax], edx
4039# endif
4040 }
4041# endif
4042}
4043#endif
4044
4045
4046/**
4047 * Tests and sets a bit in a bitmap.
4048 *
4049 * @returns true if the bit was set.
4050 * @returns false if the bit was clear.
4051 *
4052 * @param pvBitmap Pointer to the bitmap.
4053 * @param iBit The bit to test and set.
4054 *
4055 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4056 * However, doing so will yield better performance as well as avoiding
4057 * traps accessing the last bits in the bitmap.
4058 */
4059#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4060DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4061#else
4062DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4063{
4064 union { bool f; uint32_t u32; uint8_t u8; } rc;
4065# if RT_INLINE_ASM_USES_INTRIN
4066 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4067
4068# elif RT_INLINE_ASM_GNU_STYLE
4069 __asm__ __volatile__("btsl %2, %1\n\t"
4070 "setc %b0\n\t"
4071 "andl $1, %0\n\t"
4072 : "=q" (rc.u32),
4073 "=m" (*(volatile long *)pvBitmap)
4074 : "Ir" (iBit),
4075 "m" (*(volatile long *)pvBitmap)
4076 : "memory");
4077# else
4078 __asm
4079 {
4080 mov edx, [iBit]
4081# ifdef RT_ARCH_AMD64
4082 mov rax, [pvBitmap]
4083 bts [rax], edx
4084# else
4085 mov eax, [pvBitmap]
4086 bts [eax], edx
4087# endif
4088 setc al
4089 and eax, 1
4090 mov [rc.u32], eax
4091 }
4092# endif
4093 return rc.f;
4094}
4095#endif
4096
4097
4098/**
4099 * Atomically tests and sets a bit in a bitmap, ordered.
4100 *
4101 * @returns true if the bit was set.
4102 * @returns false if the bit was clear.
4103 *
4104 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4105 * the memory access isn't atomic!
4106 * @param iBit The bit to set.
4107 */
4108#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4109DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4110#else
4111DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4112{
4113 union { bool f; uint32_t u32; uint8_t u8; } rc;
4114 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4115# if RT_INLINE_ASM_USES_INTRIN
4116 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4117# elif RT_INLINE_ASM_GNU_STYLE
4118 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4119 "setc %b0\n\t"
4120 "andl $1, %0\n\t"
4121 : "=q" (rc.u32),
4122 "=m" (*(volatile long *)pvBitmap)
4123 : "Ir" (iBit),
4124 "m" (*(volatile long *)pvBitmap)
4125 : "memory");
4126# else
4127 __asm
4128 {
4129 mov edx, [iBit]
4130# ifdef RT_ARCH_AMD64
4131 mov rax, [pvBitmap]
4132 lock bts [rax], edx
4133# else
4134 mov eax, [pvBitmap]
4135 lock bts [eax], edx
4136# endif
4137 setc al
4138 and eax, 1
4139 mov [rc.u32], eax
4140 }
4141# endif
4142 return rc.f;
4143}
4144#endif
4145
4146
4147/**
4148 * Tests and clears a bit in a bitmap.
4149 *
4150 * @returns true if the bit was set.
4151 * @returns false if the bit was clear.
4152 *
4153 * @param pvBitmap Pointer to the bitmap.
4154 * @param iBit The bit to test and clear.
4155 *
4156 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4157 * However, doing so will yield better performance as well as avoiding
4158 * traps accessing the last bits in the bitmap.
4159 */
4160#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4161DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4162#else
4163DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4164{
4165 union { bool f; uint32_t u32; uint8_t u8; } rc;
4166# if RT_INLINE_ASM_USES_INTRIN
4167 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4168
4169# elif RT_INLINE_ASM_GNU_STYLE
4170 __asm__ __volatile__("btrl %2, %1\n\t"
4171 "setc %b0\n\t"
4172 "andl $1, %0\n\t"
4173 : "=q" (rc.u32),
4174 "=m" (*(volatile long *)pvBitmap)
4175 : "Ir" (iBit),
4176 "m" (*(volatile long *)pvBitmap)
4177 : "memory");
4178# else
4179 __asm
4180 {
4181 mov edx, [iBit]
4182# ifdef RT_ARCH_AMD64
4183 mov rax, [pvBitmap]
4184 btr [rax], edx
4185# else
4186 mov eax, [pvBitmap]
4187 btr [eax], edx
4188# endif
4189 setc al
4190 and eax, 1
4191 mov [rc.u32], eax
4192 }
4193# endif
4194 return rc.f;
4195}
4196#endif
4197
4198
4199/**
4200 * Atomically tests and clears a bit in a bitmap, ordered.
4201 *
4202 * @returns true if the bit was set.
4203 * @returns false if the bit was clear.
4204 *
4205 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4206 * the memory access isn't atomic!
4207 * @param iBit The bit to test and clear.
4208 *
4209 * @remarks No memory barrier, take care on smp.
4210 */
4211#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4212DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4213#else
4214DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4215{
4216 union { bool f; uint32_t u32; uint8_t u8; } rc;
4217 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4218# if RT_INLINE_ASM_USES_INTRIN
4219 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4220
4221# elif RT_INLINE_ASM_GNU_STYLE
4222 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4223 "setc %b0\n\t"
4224 "andl $1, %0\n\t"
4225 : "=q" (rc.u32),
4226 "=m" (*(volatile long *)pvBitmap)
4227 : "Ir" (iBit),
4228 "m" (*(volatile long *)pvBitmap)
4229 : "memory");
4230# else
4231 __asm
4232 {
4233 mov edx, [iBit]
4234# ifdef RT_ARCH_AMD64
4235 mov rax, [pvBitmap]
4236 lock btr [rax], edx
4237# else
4238 mov eax, [pvBitmap]
4239 lock btr [eax], edx
4240# endif
4241 setc al
4242 and eax, 1
4243 mov [rc.u32], eax
4244 }
4245# endif
4246 return rc.f;
4247}
4248#endif
4249
4250
4251/**
4252 * Tests and toggles a bit in a bitmap.
4253 *
4254 * @returns true if the bit was set.
4255 * @returns false if the bit was clear.
4256 *
4257 * @param pvBitmap Pointer to the bitmap.
4258 * @param iBit The bit to test and toggle.
4259 *
4260 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4261 * However, doing so will yield better performance as well as avoiding
4262 * traps accessing the last bits in the bitmap.
4263 */
4264#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4265DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4266#else
4267DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4268{
4269 union { bool f; uint32_t u32; uint8_t u8; } rc;
4270# if RT_INLINE_ASM_USES_INTRIN
4271 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4272
4273# elif RT_INLINE_ASM_GNU_STYLE
4274 __asm__ __volatile__("btcl %2, %1\n\t"
4275 "setc %b0\n\t"
4276 "andl $1, %0\n\t"
4277 : "=q" (rc.u32),
4278 "=m" (*(volatile long *)pvBitmap)
4279 : "Ir" (iBit),
4280 "m" (*(volatile long *)pvBitmap)
4281 : "memory");
4282# else
4283 __asm
4284 {
4285 mov edx, [iBit]
4286# ifdef RT_ARCH_AMD64
4287 mov rax, [pvBitmap]
4288 btc [rax], edx
4289# else
4290 mov eax, [pvBitmap]
4291 btc [eax], edx
4292# endif
4293 setc al
4294 and eax, 1
4295 mov [rc.u32], eax
4296 }
4297# endif
4298 return rc.f;
4299}
4300#endif
4301
4302
4303/**
4304 * Atomically tests and toggles a bit in a bitmap, ordered.
4305 *
4306 * @returns true if the bit was set.
4307 * @returns false if the bit was clear.
4308 *
4309 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4310 * the memory access isn't atomic!
4311 * @param iBit The bit to test and toggle.
4312 */
4313#if RT_INLINE_ASM_EXTERNAL
4314DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4315#else
4316DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4317{
4318 union { bool f; uint32_t u32; uint8_t u8; } rc;
4319 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4320# if RT_INLINE_ASM_GNU_STYLE
4321 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4322 "setc %b0\n\t"
4323 "andl $1, %0\n\t"
4324 : "=q" (rc.u32),
4325 "=m" (*(volatile long *)pvBitmap)
4326 : "Ir" (iBit),
4327 "m" (*(volatile long *)pvBitmap)
4328 : "memory");
4329# else
4330 __asm
4331 {
4332 mov edx, [iBit]
4333# ifdef RT_ARCH_AMD64
4334 mov rax, [pvBitmap]
4335 lock btc [rax], edx
4336# else
4337 mov eax, [pvBitmap]
4338 lock btc [eax], edx
4339# endif
4340 setc al
4341 and eax, 1
4342 mov [rc.u32], eax
4343 }
4344# endif
4345 return rc.f;
4346}
4347#endif
4348
4349
4350/**
4351 * Tests if a bit in a bitmap is set.
4352 *
4353 * @returns true if the bit is set.
4354 * @returns false if the bit is clear.
4355 *
4356 * @param pvBitmap Pointer to the bitmap.
4357 * @param iBit The bit to test.
4358 *
4359 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4360 * However, doing so will yield better performance as well as avoiding
4361 * traps accessing the last bits in the bitmap.
4362 */
4363#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4364DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4365#else
4366DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4367{
4368 union { bool f; uint32_t u32; uint8_t u8; } rc;
4369# if RT_INLINE_ASM_USES_INTRIN
4370 rc.u32 = _bittest((long *)pvBitmap, iBit);
4371# elif RT_INLINE_ASM_GNU_STYLE
4372
4373 __asm__ __volatile__("btl %2, %1\n\t"
4374 "setc %b0\n\t"
4375 "andl $1, %0\n\t"
4376 : "=q" (rc.u32)
4377 : "m" (*(const volatile long *)pvBitmap),
4378 "Ir" (iBit)
4379 : "memory");
4380# else
4381 __asm
4382 {
4383 mov edx, [iBit]
4384# ifdef RT_ARCH_AMD64
4385 mov rax, [pvBitmap]
4386 bt [rax], edx
4387# else
4388 mov eax, [pvBitmap]
4389 bt [eax], edx
4390# endif
4391 setc al
4392 and eax, 1
4393 mov [rc.u32], eax
4394 }
4395# endif
4396 return rc.f;
4397}
4398#endif
4399
4400
4401/**
4402 * Clears a bit range within a bitmap.
4403 *
4404 * @param pvBitmap Pointer to the bitmap.
4405 * @param iBitStart The First bit to clear.
4406 * @param iBitEnd The first bit not to clear.
4407 */
4408DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4409{
4410 if (iBitStart < iBitEnd)
4411 {
4412 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4413 int32_t iStart = iBitStart & ~31;
4414 int32_t iEnd = iBitEnd & ~31;
4415 if (iStart == iEnd)
4416 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4417 else
4418 {
4419 /* bits in first dword. */
4420 if (iBitStart & 31)
4421 {
4422 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4423 pu32++;
4424 iBitStart = iStart + 32;
4425 }
4426
4427 /* whole dword. */
4428 if (iBitStart != iEnd)
4429 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4430
4431 /* bits in last dword. */
4432 if (iBitEnd & 31)
4433 {
4434 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4435 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4436 }
4437 }
4438 }
4439}
4440
4441
4442/**
4443 * Sets a bit range within a bitmap.
4444 *
4445 * @param pvBitmap Pointer to the bitmap.
4446 * @param iBitStart The First bit to set.
4447 * @param iBitEnd The first bit not to set.
4448 */
4449DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4450{
4451 if (iBitStart < iBitEnd)
4452 {
4453 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4454 int32_t iStart = iBitStart & ~31;
4455 int32_t iEnd = iBitEnd & ~31;
4456 if (iStart == iEnd)
4457 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4458 else
4459 {
4460 /* bits in first dword. */
4461 if (iBitStart & 31)
4462 {
4463 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4464 pu32++;
4465 iBitStart = iStart + 32;
4466 }
4467
4468 /* whole dword. */
4469 if (iBitStart != iEnd)
4470 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4471
4472 /* bits in last dword. */
4473 if (iBitEnd & 31)
4474 {
4475 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4476 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4477 }
4478 }
4479 }
4480}
4481
4482
4483/**
4484 * Finds the first clear bit in a bitmap.
4485 *
4486 * @returns Index of the first zero bit.
4487 * @returns -1 if no clear bit was found.
4488 * @param pvBitmap Pointer to the bitmap.
4489 * @param cBits The number of bits in the bitmap. Multiple of 32.
4490 */
4491#if RT_INLINE_ASM_EXTERNAL
4492DECLASM(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4493#else
4494DECLINLINE(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4495{
4496 if (cBits)
4497 {
4498 int32_t iBit;
4499# if RT_INLINE_ASM_GNU_STYLE
4500 RTCCUINTREG uEAX, uECX, uEDI;
4501 cBits = RT_ALIGN_32(cBits, 32);
4502 __asm__ __volatile__("repe; scasl\n\t"
4503 "je 1f\n\t"
4504# ifdef RT_ARCH_AMD64
4505 "lea -4(%%rdi), %%rdi\n\t"
4506 "xorl (%%rdi), %%eax\n\t"
4507 "subq %5, %%rdi\n\t"
4508# else
4509 "lea -4(%%edi), %%edi\n\t"
4510 "xorl (%%edi), %%eax\n\t"
4511 "subl %5, %%edi\n\t"
4512# endif
4513 "shll $3, %%edi\n\t"
4514 "bsfl %%eax, %%edx\n\t"
4515 "addl %%edi, %%edx\n\t"
4516 "1:\t\n"
4517 : "=d" (iBit),
4518 "=&c" (uECX),
4519 "=&D" (uEDI),
4520 "=&a" (uEAX)
4521 : "0" (0xffffffff),
4522 "mr" (pvBitmap),
4523 "1" (cBits >> 5),
4524 "2" (pvBitmap),
4525 "3" (0xffffffff));
4526# else
4527 cBits = RT_ALIGN_32(cBits, 32);
4528 __asm
4529 {
4530# ifdef RT_ARCH_AMD64
4531 mov rdi, [pvBitmap]
4532 mov rbx, rdi
4533# else
4534 mov edi, [pvBitmap]
4535 mov ebx, edi
4536# endif
4537 mov edx, 0ffffffffh
4538 mov eax, edx
4539 mov ecx, [cBits]
4540 shr ecx, 5
4541 repe scasd
4542 je done
4543
4544# ifdef RT_ARCH_AMD64
4545 lea rdi, [rdi - 4]
4546 xor eax, [rdi]
4547 sub rdi, rbx
4548# else
4549 lea edi, [edi - 4]
4550 xor eax, [edi]
4551 sub edi, ebx
4552# endif
4553 shl edi, 3
4554 bsf edx, eax
4555 add edx, edi
4556 done:
4557 mov [iBit], edx
4558 }
4559# endif
4560 return iBit;
4561 }
4562 return -1;
4563}
4564#endif
4565
4566
4567/**
4568 * Finds the next clear bit in a bitmap.
4569 *
4570 * @returns Index of the first zero bit.
4571 * @returns -1 if no clear bit was found.
4572 * @param pvBitmap Pointer to the bitmap.
4573 * @param cBits The number of bits in the bitmap. Multiple of 32.
4574 * @param iBitPrev The bit returned from the last search.
4575 * The search will start at iBitPrev + 1.
4576 */
4577#if RT_INLINE_ASM_EXTERNAL
4578DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4579#else
4580DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4581{
4582 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4583 int iBit = ++iBitPrev & 31;
4584 if (iBit)
4585 {
4586 /*
4587 * Inspect the 32-bit word containing the unaligned bit.
4588 */
4589 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4590
4591# if RT_INLINE_ASM_USES_INTRIN
4592 unsigned long ulBit = 0;
4593 if (_BitScanForward(&ulBit, u32))
4594 return ulBit + iBitPrev;
4595# else
4596# if RT_INLINE_ASM_GNU_STYLE
4597 __asm__ __volatile__("bsf %1, %0\n\t"
4598 "jnz 1f\n\t"
4599 "movl $-1, %0\n\t"
4600 "1:\n\t"
4601 : "=r" (iBit)
4602 : "r" (u32));
4603# else
4604 __asm
4605 {
4606 mov edx, [u32]
4607 bsf eax, edx
4608 jnz done
4609 mov eax, 0ffffffffh
4610 done:
4611 mov [iBit], eax
4612 }
4613# endif
4614 if (iBit >= 0)
4615 return iBit + iBitPrev;
4616# endif
4617
4618 /*
4619 * Skip ahead and see if there is anything left to search.
4620 */
4621 iBitPrev |= 31;
4622 iBitPrev++;
4623 if (cBits <= (uint32_t)iBitPrev)
4624 return -1;
4625 }
4626
4627 /*
4628 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4629 */
4630 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4631 if (iBit >= 0)
4632 iBit += iBitPrev;
4633 return iBit;
4634}
4635#endif
4636
4637
4638/**
4639 * Finds the first set bit in a bitmap.
4640 *
4641 * @returns Index of the first set bit.
4642 * @returns -1 if no clear bit was found.
4643 * @param pvBitmap Pointer to the bitmap.
4644 * @param cBits The number of bits in the bitmap. Multiple of 32.
4645 */
4646#if RT_INLINE_ASM_EXTERNAL
4647DECLASM(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4648#else
4649DECLINLINE(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4650{
4651 if (cBits)
4652 {
4653 int32_t iBit;
4654# if RT_INLINE_ASM_GNU_STYLE
4655 RTCCUINTREG uEAX, uECX, uEDI;
4656 cBits = RT_ALIGN_32(cBits, 32);
4657 __asm__ __volatile__("repe; scasl\n\t"
4658 "je 1f\n\t"
4659# ifdef RT_ARCH_AMD64
4660 "lea -4(%%rdi), %%rdi\n\t"
4661 "movl (%%rdi), %%eax\n\t"
4662 "subq %5, %%rdi\n\t"
4663# else
4664 "lea -4(%%edi), %%edi\n\t"
4665 "movl (%%edi), %%eax\n\t"
4666 "subl %5, %%edi\n\t"
4667# endif
4668 "shll $3, %%edi\n\t"
4669 "bsfl %%eax, %%edx\n\t"
4670 "addl %%edi, %%edx\n\t"
4671 "1:\t\n"
4672 : "=d" (iBit),
4673 "=&c" (uECX),
4674 "=&D" (uEDI),
4675 "=&a" (uEAX)
4676 : "0" (0xffffffff),
4677 "mr" (pvBitmap),
4678 "1" (cBits >> 5),
4679 "2" (pvBitmap),
4680 "3" (0));
4681# else
4682 cBits = RT_ALIGN_32(cBits, 32);
4683 __asm
4684 {
4685# ifdef RT_ARCH_AMD64
4686 mov rdi, [pvBitmap]
4687 mov rbx, rdi
4688# else
4689 mov edi, [pvBitmap]
4690 mov ebx, edi
4691# endif
4692 mov edx, 0ffffffffh
4693 xor eax, eax
4694 mov ecx, [cBits]
4695 shr ecx, 5
4696 repe scasd
4697 je done
4698# ifdef RT_ARCH_AMD64
4699 lea rdi, [rdi - 4]
4700 mov eax, [rdi]
4701 sub rdi, rbx
4702# else
4703 lea edi, [edi - 4]
4704 mov eax, [edi]
4705 sub edi, ebx
4706# endif
4707 shl edi, 3
4708 bsf edx, eax
4709 add edx, edi
4710 done:
4711 mov [iBit], edx
4712 }
4713# endif
4714 return iBit;
4715 }
4716 return -1;
4717}
4718#endif
4719
4720
4721/**
4722 * Finds the next set bit in a bitmap.
4723 *
4724 * @returns Index of the next set bit.
4725 * @returns -1 if no set bit was found.
4726 * @param pvBitmap Pointer to the bitmap.
4727 * @param cBits The number of bits in the bitmap. Multiple of 32.
4728 * @param iBitPrev The bit returned from the last search.
4729 * The search will start at iBitPrev + 1.
4730 */
4731#if RT_INLINE_ASM_EXTERNAL
4732DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4733#else
4734DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4735{
4736 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4737 int iBit = ++iBitPrev & 31;
4738 if (iBit)
4739 {
4740 /*
4741 * Inspect the 32-bit word containing the unaligned bit.
4742 */
4743 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
4744
4745# if RT_INLINE_ASM_USES_INTRIN
4746 unsigned long ulBit = 0;
4747 if (_BitScanForward(&ulBit, u32))
4748 return ulBit + iBitPrev;
4749# else
4750# if RT_INLINE_ASM_GNU_STYLE
4751 __asm__ __volatile__("bsf %1, %0\n\t"
4752 "jnz 1f\n\t"
4753 "movl $-1, %0\n\t"
4754 "1:\n\t"
4755 : "=r" (iBit)
4756 : "r" (u32));
4757# else
4758 __asm
4759 {
4760 mov edx, [u32]
4761 bsf eax, edx
4762 jnz done
4763 mov eax, 0ffffffffh
4764 done:
4765 mov [iBit], eax
4766 }
4767# endif
4768 if (iBit >= 0)
4769 return iBit + iBitPrev;
4770# endif
4771
4772 /*
4773 * Skip ahead and see if there is anything left to search.
4774 */
4775 iBitPrev |= 31;
4776 iBitPrev++;
4777 if (cBits <= (uint32_t)iBitPrev)
4778 return -1;
4779 }
4780
4781 /*
4782 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4783 */
4784 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4785 if (iBit >= 0)
4786 iBit += iBitPrev;
4787 return iBit;
4788}
4789#endif
4790
4791
4792/**
4793 * Finds the first bit which is set in the given 32-bit integer.
4794 * Bits are numbered from 1 (least significant) to 32.
4795 *
4796 * @returns index [1..32] of the first set bit.
4797 * @returns 0 if all bits are cleared.
4798 * @param u32 Integer to search for set bits.
4799 * @remarks Similar to ffs() in BSD.
4800 */
4801#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4802DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
4803#else
4804DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4805{
4806# if RT_INLINE_ASM_USES_INTRIN
4807 unsigned long iBit;
4808 if (_BitScanForward(&iBit, u32))
4809 iBit++;
4810 else
4811 iBit = 0;
4812# elif RT_INLINE_ASM_GNU_STYLE
4813 uint32_t iBit;
4814 __asm__ __volatile__("bsf %1, %0\n\t"
4815 "jnz 1f\n\t"
4816 "xorl %0, %0\n\t"
4817 "jmp 2f\n"
4818 "1:\n\t"
4819 "incl %0\n"
4820 "2:\n\t"
4821 : "=r" (iBit)
4822 : "rm" (u32));
4823# else
4824 uint32_t iBit;
4825 _asm
4826 {
4827 bsf eax, [u32]
4828 jnz found
4829 xor eax, eax
4830 jmp done
4831 found:
4832 inc eax
4833 done:
4834 mov [iBit], eax
4835 }
4836# endif
4837 return iBit;
4838}
4839#endif
4840
4841
4842/**
4843 * Finds the first bit which is set in the given 32-bit integer.
4844 * Bits are numbered from 1 (least significant) to 32.
4845 *
4846 * @returns index [1..32] of the first set bit.
4847 * @returns 0 if all bits are cleared.
4848 * @param i32 Integer to search for set bits.
4849 * @remark Similar to ffs() in BSD.
4850 */
4851DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4852{
4853 return ASMBitFirstSetU32((uint32_t)i32);
4854}
4855
4856
4857/**
4858 * Finds the first bit which is set in the given 64-bit integer.
4859 *
4860 * Bits are numbered from 1 (least significant) to 64.
4861 *
4862 * @returns index [1..64] of the first set bit.
4863 * @returns 0 if all bits are cleared.
4864 * @param u64 Integer to search for set bits.
4865 * @remarks Similar to ffs() in BSD.
4866 */
4867#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4868DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
4869#else
4870DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
4871{
4872# if RT_INLINE_ASM_USES_INTRIN
4873 unsigned long iBit;
4874# if ARCH_BITS == 64
4875 if (_BitScanForward64(&iBit, u64))
4876 iBit++;
4877 else
4878 iBit = 0;
4879# else
4880 if (_BitScanForward(&iBit, (uint32_t)u64))
4881 iBit++;
4882 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
4883 iBit += 33;
4884 else
4885 iBit = 0;
4886# endif
4887# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
4888 uint64_t iBit;
4889 __asm__ __volatile__("bsfq %1, %0\n\t"
4890 "jnz 1f\n\t"
4891 "xorl %0, %0\n\t"
4892 "jmp 2f\n"
4893 "1:\n\t"
4894 "incl %0\n"
4895 "2:\n\t"
4896 : "=r" (iBit)
4897 : "rm" (u64));
4898# else
4899 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
4900 if (!iBit)
4901 {
4902 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
4903 if (iBit)
4904 iBit += 32;
4905 }
4906# endif
4907 return (unsigned)iBit;
4908}
4909#endif
4910
4911
4912/**
4913 * Finds the first bit which is set in the given 16-bit integer.
4914 *
4915 * Bits are numbered from 1 (least significant) to 16.
4916 *
4917 * @returns index [1..16] of the first set bit.
4918 * @returns 0 if all bits are cleared.
4919 * @param u16 Integer to search for set bits.
4920 * @remarks For 16-bit bs3kit code.
4921 */
4922#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4923DECLASM(unsigned) ASMBitFirstSetU16(uint32_t u16);
4924#else
4925DECLINLINE(unsigned) ASMBitFirstSetU16(uint32_t u16)
4926{
4927 return ASMBitFirstSetU32((uint32_t)u16);
4928}
4929#endif
4930
4931
4932/**
4933 * Finds the last bit which is set in the given 32-bit integer.
4934 * Bits are numbered from 1 (least significant) to 32.
4935 *
4936 * @returns index [1..32] of the last set bit.
4937 * @returns 0 if all bits are cleared.
4938 * @param u32 Integer to search for set bits.
4939 * @remark Similar to fls() in BSD.
4940 */
4941#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4942DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
4943#else
4944DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4945{
4946# if RT_INLINE_ASM_USES_INTRIN
4947 unsigned long iBit;
4948 if (_BitScanReverse(&iBit, u32))
4949 iBit++;
4950 else
4951 iBit = 0;
4952# elif RT_INLINE_ASM_GNU_STYLE
4953 uint32_t iBit;
4954 __asm__ __volatile__("bsrl %1, %0\n\t"
4955 "jnz 1f\n\t"
4956 "xorl %0, %0\n\t"
4957 "jmp 2f\n"
4958 "1:\n\t"
4959 "incl %0\n"
4960 "2:\n\t"
4961 : "=r" (iBit)
4962 : "rm" (u32));
4963# else
4964 uint32_t iBit;
4965 _asm
4966 {
4967 bsr eax, [u32]
4968 jnz found
4969 xor eax, eax
4970 jmp done
4971 found:
4972 inc eax
4973 done:
4974 mov [iBit], eax
4975 }
4976# endif
4977 return iBit;
4978}
4979#endif
4980
4981
4982/**
4983 * Finds the last bit which is set in the given 32-bit integer.
4984 * Bits are numbered from 1 (least significant) to 32.
4985 *
4986 * @returns index [1..32] of the last set bit.
4987 * @returns 0 if all bits are cleared.
4988 * @param i32 Integer to search for set bits.
4989 * @remark Similar to fls() in BSD.
4990 */
4991DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4992{
4993 return ASMBitLastSetU32((uint32_t)i32);
4994}
4995
4996
4997/**
4998 * Finds the last bit which is set in the given 64-bit integer.
4999 *
5000 * Bits are numbered from 1 (least significant) to 64.
5001 *
5002 * @returns index [1..64] of the last set bit.
5003 * @returns 0 if all bits are cleared.
5004 * @param u64 Integer to search for set bits.
5005 * @remark Similar to fls() in BSD.
5006 */
5007#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5008DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5009#else
5010DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5011{
5012# if RT_INLINE_ASM_USES_INTRIN
5013 unsigned long iBit;
5014# if ARCH_BITS == 64
5015 if (_BitScanReverse64(&iBit, u64))
5016 iBit++;
5017 else
5018 iBit = 0;
5019# else
5020 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5021 iBit += 33;
5022 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5023 iBit++;
5024 else
5025 iBit = 0;
5026# endif
5027# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5028 uint64_t iBit;
5029 __asm__ __volatile__("bsrq %1, %0\n\t"
5030 "jnz 1f\n\t"
5031 "xorl %0, %0\n\t"
5032 "jmp 2f\n"
5033 "1:\n\t"
5034 "incl %0\n"
5035 "2:\n\t"
5036 : "=r" (iBit)
5037 : "rm" (u64));
5038# else
5039 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5040 if (iBit)
5041 iBit += 32;
5042 else
5043 iBit = ASMBitLastSetU32((uint32_t)u64);
5044#endif
5045 return (unsigned)iBit;
5046}
5047#endif
5048
5049
5050/**
5051 * Finds the last bit which is set in the given 16-bit integer.
5052 *
5053 * Bits are numbered from 1 (least significant) to 16.
5054 *
5055 * @returns index [1..16] of the last set bit.
5056 * @returns 0 if all bits are cleared.
5057 * @param u16 Integer to search for set bits.
5058 * @remarks For 16-bit bs3kit code.
5059 */
5060#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5061DECLASM(unsigned) ASMBitLastSetU16(uint32_t u16);
5062#else
5063DECLINLINE(unsigned) ASMBitLastSetU16(uint32_t u16)
5064{
5065 return ASMBitLastSetU32((uint32_t)u16);
5066}
5067#endif
5068
5069
5070/**
5071 * Reverse the byte order of the given 16-bit integer.
5072 *
5073 * @returns Revert
5074 * @param u16 16-bit integer value.
5075 */
5076#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5077DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5078#else
5079DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5080{
5081# if RT_INLINE_ASM_USES_INTRIN
5082 u16 = _byteswap_ushort(u16);
5083# elif RT_INLINE_ASM_GNU_STYLE
5084 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5085# else
5086 _asm
5087 {
5088 mov ax, [u16]
5089 ror ax, 8
5090 mov [u16], ax
5091 }
5092# endif
5093 return u16;
5094}
5095#endif
5096
5097
5098/**
5099 * Reverse the byte order of the given 32-bit integer.
5100 *
5101 * @returns Revert
5102 * @param u32 32-bit integer value.
5103 */
5104#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5105DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5106#else
5107DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5108{
5109# if RT_INLINE_ASM_USES_INTRIN
5110 u32 = _byteswap_ulong(u32);
5111# elif RT_INLINE_ASM_GNU_STYLE
5112 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5113# else
5114 _asm
5115 {
5116 mov eax, [u32]
5117 bswap eax
5118 mov [u32], eax
5119 }
5120# endif
5121 return u32;
5122}
5123#endif
5124
5125
5126/**
5127 * Reverse the byte order of the given 64-bit integer.
5128 *
5129 * @returns Revert
5130 * @param u64 64-bit integer value.
5131 */
5132DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5133{
5134#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5135 u64 = _byteswap_uint64(u64);
5136#else
5137 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5138 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5139#endif
5140 return u64;
5141}
5142
5143
5144/**
5145 * Rotate 32-bit unsigned value to the left by @a cShift.
5146 *
5147 * @returns Rotated value.
5148 * @param u32 The value to rotate.
5149 * @param cShift How many bits to rotate by.
5150 */
5151#ifdef __WATCOMC__
5152DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5153#else
5154DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5155{
5156# if RT_INLINE_ASM_USES_INTRIN
5157 return _rotl(u32, cShift);
5158# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5159 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5160 return u32;
5161# else
5162 cShift &= 31;
5163 return (u32 << cShift) | (u32 >> (32 - cShift));
5164# endif
5165}
5166#endif
5167
5168
5169/**
5170 * Rotate 32-bit unsigned value to the right by @a cShift.
5171 *
5172 * @returns Rotated value.
5173 * @param u32 The value to rotate.
5174 * @param cShift How many bits to rotate by.
5175 */
5176#ifdef __WATCOMC__
5177DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5178#else
5179DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5180{
5181# if RT_INLINE_ASM_USES_INTRIN
5182 return _rotr(u32, cShift);
5183# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5184 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5185 return u32;
5186# else
5187 cShift &= 31;
5188 return (u32 >> cShift) | (u32 << (32 - cShift));
5189# endif
5190}
5191#endif
5192
5193
5194/**
5195 * Rotate 64-bit unsigned value to the left by @a cShift.
5196 *
5197 * @returns Rotated value.
5198 * @param u64 The value to rotate.
5199 * @param cShift How many bits to rotate by.
5200 */
5201DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5202{
5203#if RT_INLINE_ASM_USES_INTRIN
5204 return _rotl64(u64, cShift);
5205#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5206 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5207 return u64;
5208#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5209 uint32_t uSpill;
5210 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5211 "jz 1f\n\t"
5212 "xchgl %%eax, %%edx\n\t"
5213 "1:\n\t"
5214 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5215 "jz 2f\n\t"
5216 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5217 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5218 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5219 "2:\n\t" /* } */
5220 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5221 : "0" (u64),
5222 "1" (cShift));
5223 return u64;
5224#else
5225 cShift &= 63;
5226 return (u64 << cShift) | (u64 >> (64 - cShift));
5227#endif
5228}
5229
5230
5231/**
5232 * Rotate 64-bit unsigned value to the right by @a cShift.
5233 *
5234 * @returns Rotated value.
5235 * @param u64 The value to rotate.
5236 * @param cShift How many bits to rotate by.
5237 */
5238DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5239{
5240#if RT_INLINE_ASM_USES_INTRIN
5241 return _rotr64(u64, cShift);
5242#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5243 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5244 return u64;
5245#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5246 uint32_t uSpill;
5247 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5248 "jz 1f\n\t"
5249 "xchgl %%eax, %%edx\n\t"
5250 "1:\n\t"
5251 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5252 "jz 2f\n\t"
5253 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5254 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5255 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5256 "2:\n\t" /* } */
5257 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5258 : "0" (u64),
5259 "1" (cShift));
5260 return u64;
5261#else
5262 cShift &= 63;
5263 return (u64 >> cShift) | (u64 << (64 - cShift));
5264#endif
5265}
5266
5267/** @} */
5268
5269
5270/** @} */
5271
5272#endif
5273
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette