VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 45621

Last change on this file since 45621 was 44528, checked in by vboxsync, 12 years ago

header (C) fixes

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 131.9 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2012 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# ifdef RT_ARCH_AMD64
69# pragma intrinsic(__stosq)
70# pragma intrinsic(_byteswap_uint64)
71# pragma intrinsic(_InterlockedExchange64)
72# pragma intrinsic(_InterlockedExchangeAdd64)
73# pragma intrinsic(_InterlockedAnd64)
74# pragma intrinsic(_InterlockedOr64)
75# pragma intrinsic(_InterlockedIncrement64)
76# pragma intrinsic(_InterlockedDecrement64)
77# endif
78#endif
79
80
81/** @defgroup grp_rt_asm ASM - Assembly Routines
82 * @ingroup grp_rt
83 *
84 * @remarks The difference between ordered and unordered atomic operations are that
85 * the former will complete outstanding reads and writes before continuing
86 * while the latter doesn't make any promises about the order. Ordered
87 * operations doesn't, it seems, make any 100% promise wrt to whether
88 * the operation will complete before any subsequent memory access.
89 * (please, correct if wrong.)
90 *
91 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
92 * are unordered (note the Uo).
93 *
94 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
95 * or even optimize assembler instructions away. For instance, in the following code
96 * the second rdmsr instruction is optimized away because gcc treats that instruction
97 * as deterministic:
98 *
99 * @code
100 * static inline uint64_t rdmsr_low(int idx)
101 * {
102 * uint32_t low;
103 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
104 * }
105 * ...
106 * uint32_t msr1 = rdmsr_low(1);
107 * foo(msr1);
108 * msr1 = rdmsr_low(1);
109 * bar(msr1);
110 * @endcode
111 *
112 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
113 * use the result of the first call as input parameter for bar() as well. For rdmsr this
114 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
115 * machine status information in general.
116 *
117 * @{
118 */
119
120
121/** @def RT_INLINE_ASM_GCC_4_3_X_X86
122 * Used to work around some 4.3.x register allocation issues in this version of
123 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
124#ifdef __GNUC__
125# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
126#endif
127#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
128# define RT_INLINE_ASM_GCC_4_3_X_X86 0
129#endif
130
131/** @def RT_INLINE_DONT_USE_CMPXCHG8B
132 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
133 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
134 * mode, x86.
135 *
136 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
137 * when in PIC mode on x86.
138 */
139#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
140# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
141 ( (defined(PIC) || defined(__PIC__)) \
142 && defined(RT_ARCH_X86) \
143 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
144 || defined(RT_OS_DARWIN)) )
145#endif
146
147
148/** @def ASMReturnAddress
149 * Gets the return address of the current (or calling if you like) function or method.
150 */
151#ifdef _MSC_VER
152# ifdef __cplusplus
153extern "C"
154# endif
155void * _ReturnAddress(void);
156# pragma intrinsic(_ReturnAddress)
157# define ASMReturnAddress() _ReturnAddress()
158#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
159# define ASMReturnAddress() __builtin_return_address(0)
160#else
161# error "Unsupported compiler."
162#endif
163
164
165/**
166 * Compiler memory barrier.
167 *
168 * Ensure that the compiler does not use any cached (register/tmp stack) memory
169 * values or any outstanding writes when returning from this function.
170 *
171 * This function must be used if non-volatile data is modified by a
172 * device or the VMM. Typical cases are port access, MMIO access,
173 * trapping instruction, etc.
174 */
175#if RT_INLINE_ASM_GNU_STYLE
176# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
177#elif RT_INLINE_ASM_USES_INTRIN
178# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
179#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
180DECLINLINE(void) ASMCompilerBarrier(void)
181{
182 __asm
183 {
184 }
185}
186#endif
187
188
189/** @def ASMBreakpoint
190 * Debugger Breakpoint.
191 * @deprecated Use RT_BREAKPOINT instead.
192 * @internal
193 */
194#define ASMBreakpoint() RT_BREAKPOINT()
195
196
197/**
198 * Spinloop hint for platforms that have these, empty function on the other
199 * platforms.
200 *
201 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
202 * spin locks.
203 */
204#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
205DECLASM(void) ASMNopPause(void);
206#else
207DECLINLINE(void) ASMNopPause(void)
208{
209# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
210# if RT_INLINE_ASM_GNU_STYLE
211 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
212# else
213 __asm {
214 _emit 0f3h
215 _emit 090h
216 }
217# endif
218# else
219 /* dummy */
220# endif
221}
222#endif
223
224
225/**
226 * Atomically Exchange an unsigned 8-bit value, ordered.
227 *
228 * @returns Current *pu8 value
229 * @param pu8 Pointer to the 8-bit variable to update.
230 * @param u8 The 8-bit value to assign to *pu8.
231 */
232#if RT_INLINE_ASM_EXTERNAL
233DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
234#else
235DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
236{
237# if RT_INLINE_ASM_GNU_STYLE
238 __asm__ __volatile__("xchgb %0, %1\n\t"
239 : "=m" (*pu8),
240 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
241 : "1" (u8),
242 "m" (*pu8));
243# else
244 __asm
245 {
246# ifdef RT_ARCH_AMD64
247 mov rdx, [pu8]
248 mov al, [u8]
249 xchg [rdx], al
250 mov [u8], al
251# else
252 mov edx, [pu8]
253 mov al, [u8]
254 xchg [edx], al
255 mov [u8], al
256# endif
257 }
258# endif
259 return u8;
260}
261#endif
262
263
264/**
265 * Atomically Exchange a signed 8-bit value, ordered.
266 *
267 * @returns Current *pu8 value
268 * @param pi8 Pointer to the 8-bit variable to update.
269 * @param i8 The 8-bit value to assign to *pi8.
270 */
271DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
272{
273 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
274}
275
276
277/**
278 * Atomically Exchange a bool value, ordered.
279 *
280 * @returns Current *pf value
281 * @param pf Pointer to the 8-bit variable to update.
282 * @param f The 8-bit value to assign to *pi8.
283 */
284DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
285{
286#ifdef _MSC_VER
287 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
288#else
289 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
290#endif
291}
292
293
294/**
295 * Atomically Exchange an unsigned 16-bit value, ordered.
296 *
297 * @returns Current *pu16 value
298 * @param pu16 Pointer to the 16-bit variable to update.
299 * @param u16 The 16-bit value to assign to *pu16.
300 */
301#if RT_INLINE_ASM_EXTERNAL
302DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
303#else
304DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
305{
306# if RT_INLINE_ASM_GNU_STYLE
307 __asm__ __volatile__("xchgw %0, %1\n\t"
308 : "=m" (*pu16),
309 "=r" (u16)
310 : "1" (u16),
311 "m" (*pu16));
312# else
313 __asm
314 {
315# ifdef RT_ARCH_AMD64
316 mov rdx, [pu16]
317 mov ax, [u16]
318 xchg [rdx], ax
319 mov [u16], ax
320# else
321 mov edx, [pu16]
322 mov ax, [u16]
323 xchg [edx], ax
324 mov [u16], ax
325# endif
326 }
327# endif
328 return u16;
329}
330#endif
331
332
333/**
334 * Atomically Exchange a signed 16-bit value, ordered.
335 *
336 * @returns Current *pu16 value
337 * @param pi16 Pointer to the 16-bit variable to update.
338 * @param i16 The 16-bit value to assign to *pi16.
339 */
340DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
341{
342 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
343}
344
345
346/**
347 * Atomically Exchange an unsigned 32-bit value, ordered.
348 *
349 * @returns Current *pu32 value
350 * @param pu32 Pointer to the 32-bit variable to update.
351 * @param u32 The 32-bit value to assign to *pu32.
352 */
353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
354DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
355#else
356DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
357{
358# if RT_INLINE_ASM_GNU_STYLE
359 __asm__ __volatile__("xchgl %0, %1\n\t"
360 : "=m" (*pu32),
361 "=r" (u32)
362 : "1" (u32),
363 "m" (*pu32));
364
365# elif RT_INLINE_ASM_USES_INTRIN
366 u32 = _InterlockedExchange((long *)pu32, u32);
367
368# else
369 __asm
370 {
371# ifdef RT_ARCH_AMD64
372 mov rdx, [pu32]
373 mov eax, u32
374 xchg [rdx], eax
375 mov [u32], eax
376# else
377 mov edx, [pu32]
378 mov eax, u32
379 xchg [edx], eax
380 mov [u32], eax
381# endif
382 }
383# endif
384 return u32;
385}
386#endif
387
388
389/**
390 * Atomically Exchange a signed 32-bit value, ordered.
391 *
392 * @returns Current *pu32 value
393 * @param pi32 Pointer to the 32-bit variable to update.
394 * @param i32 The 32-bit value to assign to *pi32.
395 */
396DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
397{
398 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
399}
400
401
402/**
403 * Atomically Exchange an unsigned 64-bit value, ordered.
404 *
405 * @returns Current *pu64 value
406 * @param pu64 Pointer to the 64-bit variable to update.
407 * @param u64 The 64-bit value to assign to *pu64.
408 */
409#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
410 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
411DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
412#else
413DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
414{
415# if defined(RT_ARCH_AMD64)
416# if RT_INLINE_ASM_USES_INTRIN
417 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
418
419# elif RT_INLINE_ASM_GNU_STYLE
420 __asm__ __volatile__("xchgq %0, %1\n\t"
421 : "=m" (*pu64),
422 "=r" (u64)
423 : "1" (u64),
424 "m" (*pu64));
425# else
426 __asm
427 {
428 mov rdx, [pu64]
429 mov rax, [u64]
430 xchg [rdx], rax
431 mov [u64], rax
432 }
433# endif
434# else /* !RT_ARCH_AMD64 */
435# if RT_INLINE_ASM_GNU_STYLE
436# if defined(PIC) || defined(__PIC__)
437 uint32_t u32EBX = (uint32_t)u64;
438 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
439 "xchgl %%ebx, %3\n\t"
440 "1:\n\t"
441 "lock; cmpxchg8b (%5)\n\t"
442 "jnz 1b\n\t"
443 "movl %3, %%ebx\n\t"
444 /*"xchgl %%esi, %5\n\t"*/
445 : "=A" (u64),
446 "=m" (*pu64)
447 : "0" (*pu64),
448 "m" ( u32EBX ),
449 "c" ( (uint32_t)(u64 >> 32) ),
450 "S" (pu64));
451# else /* !PIC */
452 __asm__ __volatile__("1:\n\t"
453 "lock; cmpxchg8b %1\n\t"
454 "jnz 1b\n\t"
455 : "=A" (u64),
456 "=m" (*pu64)
457 : "0" (*pu64),
458 "b" ( (uint32_t)u64 ),
459 "c" ( (uint32_t)(u64 >> 32) ));
460# endif
461# else
462 __asm
463 {
464 mov ebx, dword ptr [u64]
465 mov ecx, dword ptr [u64 + 4]
466 mov edi, pu64
467 mov eax, dword ptr [edi]
468 mov edx, dword ptr [edi + 4]
469 retry:
470 lock cmpxchg8b [edi]
471 jnz retry
472 mov dword ptr [u64], eax
473 mov dword ptr [u64 + 4], edx
474 }
475# endif
476# endif /* !RT_ARCH_AMD64 */
477 return u64;
478}
479#endif
480
481
482/**
483 * Atomically Exchange an signed 64-bit value, ordered.
484 *
485 * @returns Current *pi64 value
486 * @param pi64 Pointer to the 64-bit variable to update.
487 * @param i64 The 64-bit value to assign to *pi64.
488 */
489DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
490{
491 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
492}
493
494
495/**
496 * Atomically Exchange a pointer value, ordered.
497 *
498 * @returns Current *ppv value
499 * @param ppv Pointer to the pointer variable to update.
500 * @param pv The pointer value to assign to *ppv.
501 */
502DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
503{
504#if ARCH_BITS == 32
505 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
506#elif ARCH_BITS == 64
507 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
508#else
509# error "ARCH_BITS is bogus"
510#endif
511}
512
513
514/**
515 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
516 *
517 * @returns Current *pv value
518 * @param ppv Pointer to the pointer variable to update.
519 * @param pv The pointer value to assign to *ppv.
520 * @param Type The type of *ppv, sans volatile.
521 */
522#ifdef __GNUC__
523# define ASMAtomicXchgPtrT(ppv, pv, Type) \
524 __extension__ \
525 ({\
526 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
527 Type const pvTypeChecked = (pv); \
528 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
529 pvTypeCheckedRet; \
530 })
531#else
532# define ASMAtomicXchgPtrT(ppv, pv, Type) \
533 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
534#endif
535
536
537/**
538 * Atomically Exchange a raw-mode context pointer value, ordered.
539 *
540 * @returns Current *ppv value
541 * @param ppvRC Pointer to the pointer variable to update.
542 * @param pvRC The pointer value to assign to *ppv.
543 */
544DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
545{
546 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
547}
548
549
550/**
551 * Atomically Exchange a ring-0 pointer value, ordered.
552 *
553 * @returns Current *ppv value
554 * @param ppvR0 Pointer to the pointer variable to update.
555 * @param pvR0 The pointer value to assign to *ppv.
556 */
557DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
558{
559#if R0_ARCH_BITS == 32
560 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
561#elif R0_ARCH_BITS == 64
562 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
563#else
564# error "R0_ARCH_BITS is bogus"
565#endif
566}
567
568
569/**
570 * Atomically Exchange a ring-3 pointer value, ordered.
571 *
572 * @returns Current *ppv value
573 * @param ppvR3 Pointer to the pointer variable to update.
574 * @param pvR3 The pointer value to assign to *ppv.
575 */
576DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
577{
578#if R3_ARCH_BITS == 32
579 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
580#elif R3_ARCH_BITS == 64
581 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
582#else
583# error "R3_ARCH_BITS is bogus"
584#endif
585}
586
587
588/** @def ASMAtomicXchgHandle
589 * Atomically Exchange a typical IPRT handle value, ordered.
590 *
591 * @param ph Pointer to the value to update.
592 * @param hNew The new value to assigned to *pu.
593 * @param phRes Where to store the current *ph value.
594 *
595 * @remarks This doesn't currently work for all handles (like RTFILE).
596 */
597#if HC_ARCH_BITS == 32
598# define ASMAtomicXchgHandle(ph, hNew, phRes) \
599 do { \
600 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
601 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
602 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
603 } while (0)
604#elif HC_ARCH_BITS == 64
605# define ASMAtomicXchgHandle(ph, hNew, phRes) \
606 do { \
607 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
608 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
609 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
610 } while (0)
611#else
612# error HC_ARCH_BITS
613#endif
614
615
616/**
617 * Atomically Exchange a value which size might differ
618 * between platforms or compilers, ordered.
619 *
620 * @param pu Pointer to the variable to update.
621 * @param uNew The value to assign to *pu.
622 * @todo This is busted as its missing the result argument.
623 */
624#define ASMAtomicXchgSize(pu, uNew) \
625 do { \
626 switch (sizeof(*(pu))) { \
627 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
628 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
629 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
630 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
631 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
632 } \
633 } while (0)
634
635/**
636 * Atomically Exchange a value which size might differ
637 * between platforms or compilers, ordered.
638 *
639 * @param pu Pointer to the variable to update.
640 * @param uNew The value to assign to *pu.
641 * @param puRes Where to store the current *pu value.
642 */
643#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
644 do { \
645 switch (sizeof(*(pu))) { \
646 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
647 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
648 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
649 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
650 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
651 } \
652 } while (0)
653
654
655
656/**
657 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
658 *
659 * @returns true if xchg was done.
660 * @returns false if xchg wasn't done.
661 *
662 * @param pu8 Pointer to the value to update.
663 * @param u8New The new value to assigned to *pu8.
664 * @param u8Old The old value to *pu8 compare with.
665 */
666#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
667DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
668#else
669DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
670{
671 uint8_t u8Ret;
672 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
673 "setz %1\n\t"
674 : "=m" (*pu8),
675 "=qm" (u8Ret),
676 "=a" (u8Old)
677 : "q" (u8New),
678 "2" (u8Old),
679 "m" (*pu8));
680 return (bool)u8Ret;
681}
682#endif
683
684
685/**
686 * Atomically Compare and Exchange a signed 8-bit value, ordered.
687 *
688 * @returns true if xchg was done.
689 * @returns false if xchg wasn't done.
690 *
691 * @param pi8 Pointer to the value to update.
692 * @param i8New The new value to assigned to *pi8.
693 * @param i8Old The old value to *pi8 compare with.
694 */
695DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
696{
697 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
698}
699
700
701/**
702 * Atomically Compare and Exchange a bool value, ordered.
703 *
704 * @returns true if xchg was done.
705 * @returns false if xchg wasn't done.
706 *
707 * @param pf Pointer to the value to update.
708 * @param fNew The new value to assigned to *pf.
709 * @param fOld The old value to *pf compare with.
710 */
711DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
712{
713 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
714}
715
716
717/**
718 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
719 *
720 * @returns true if xchg was done.
721 * @returns false if xchg wasn't done.
722 *
723 * @param pu32 Pointer to the value to update.
724 * @param u32New The new value to assigned to *pu32.
725 * @param u32Old The old value to *pu32 compare with.
726 */
727#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
728DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
729#else
730DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
731{
732# if RT_INLINE_ASM_GNU_STYLE
733 uint8_t u8Ret;
734 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
735 "setz %1\n\t"
736 : "=m" (*pu32),
737 "=qm" (u8Ret),
738 "=a" (u32Old)
739 : "r" (u32New),
740 "2" (u32Old),
741 "m" (*pu32));
742 return (bool)u8Ret;
743
744# elif RT_INLINE_ASM_USES_INTRIN
745 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
746
747# else
748 uint32_t u32Ret;
749 __asm
750 {
751# ifdef RT_ARCH_AMD64
752 mov rdx, [pu32]
753# else
754 mov edx, [pu32]
755# endif
756 mov eax, [u32Old]
757 mov ecx, [u32New]
758# ifdef RT_ARCH_AMD64
759 lock cmpxchg [rdx], ecx
760# else
761 lock cmpxchg [edx], ecx
762# endif
763 setz al
764 movzx eax, al
765 mov [u32Ret], eax
766 }
767 return !!u32Ret;
768# endif
769}
770#endif
771
772
773/**
774 * Atomically Compare and Exchange a signed 32-bit value, ordered.
775 *
776 * @returns true if xchg was done.
777 * @returns false if xchg wasn't done.
778 *
779 * @param pi32 Pointer to the value to update.
780 * @param i32New The new value to assigned to *pi32.
781 * @param i32Old The old value to *pi32 compare with.
782 */
783DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
784{
785 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
786}
787
788
789/**
790 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
791 *
792 * @returns true if xchg was done.
793 * @returns false if xchg wasn't done.
794 *
795 * @param pu64 Pointer to the 64-bit variable to update.
796 * @param u64New The 64-bit value to assign to *pu64.
797 * @param u64Old The value to compare with.
798 */
799#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
800 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
801DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
802#else
803DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
804{
805# if RT_INLINE_ASM_USES_INTRIN
806 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
807
808# elif defined(RT_ARCH_AMD64)
809# if RT_INLINE_ASM_GNU_STYLE
810 uint8_t u8Ret;
811 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
812 "setz %1\n\t"
813 : "=m" (*pu64),
814 "=qm" (u8Ret),
815 "=a" (u64Old)
816 : "r" (u64New),
817 "2" (u64Old),
818 "m" (*pu64));
819 return (bool)u8Ret;
820# else
821 bool fRet;
822 __asm
823 {
824 mov rdx, [pu32]
825 mov rax, [u64Old]
826 mov rcx, [u64New]
827 lock cmpxchg [rdx], rcx
828 setz al
829 mov [fRet], al
830 }
831 return fRet;
832# endif
833# else /* !RT_ARCH_AMD64 */
834 uint32_t u32Ret;
835# if RT_INLINE_ASM_GNU_STYLE
836# if defined(PIC) || defined(__PIC__)
837 uint32_t u32EBX = (uint32_t)u64New;
838 uint32_t u32Spill;
839 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
840 "lock; cmpxchg8b (%6)\n\t"
841 "setz %%al\n\t"
842 "movl %4, %%ebx\n\t"
843 "movzbl %%al, %%eax\n\t"
844 : "=a" (u32Ret),
845 "=d" (u32Spill),
846# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
847 "+m" (*pu64)
848# else
849 "=m" (*pu64)
850# endif
851 : "A" (u64Old),
852 "m" ( u32EBX ),
853 "c" ( (uint32_t)(u64New >> 32) ),
854 "S" (pu64));
855# else /* !PIC */
856 uint32_t u32Spill;
857 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
858 "setz %%al\n\t"
859 "movzbl %%al, %%eax\n\t"
860 : "=a" (u32Ret),
861 "=d" (u32Spill),
862 "+m" (*pu64)
863 : "A" (u64Old),
864 "b" ( (uint32_t)u64New ),
865 "c" ( (uint32_t)(u64New >> 32) ));
866# endif
867 return (bool)u32Ret;
868# else
869 __asm
870 {
871 mov ebx, dword ptr [u64New]
872 mov ecx, dword ptr [u64New + 4]
873 mov edi, [pu64]
874 mov eax, dword ptr [u64Old]
875 mov edx, dword ptr [u64Old + 4]
876 lock cmpxchg8b [edi]
877 setz al
878 movzx eax, al
879 mov dword ptr [u32Ret], eax
880 }
881 return !!u32Ret;
882# endif
883# endif /* !RT_ARCH_AMD64 */
884}
885#endif
886
887
888/**
889 * Atomically Compare and exchange a signed 64-bit value, ordered.
890 *
891 * @returns true if xchg was done.
892 * @returns false if xchg wasn't done.
893 *
894 * @param pi64 Pointer to the 64-bit variable to update.
895 * @param i64 The 64-bit value to assign to *pu64.
896 * @param i64Old The value to compare with.
897 */
898DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
899{
900 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
901}
902
903
904/**
905 * Atomically Compare and Exchange a pointer value, ordered.
906 *
907 * @returns true if xchg was done.
908 * @returns false if xchg wasn't done.
909 *
910 * @param ppv Pointer to the value to update.
911 * @param pvNew The new value to assigned to *ppv.
912 * @param pvOld The old value to *ppv compare with.
913 */
914DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
915{
916#if ARCH_BITS == 32
917 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
918#elif ARCH_BITS == 64
919 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
920#else
921# error "ARCH_BITS is bogus"
922#endif
923}
924
925
926/**
927 * Atomically Compare and Exchange a pointer value, ordered.
928 *
929 * @returns true if xchg was done.
930 * @returns false if xchg wasn't done.
931 *
932 * @param ppv Pointer to the value to update.
933 * @param pvNew The new value to assigned to *ppv.
934 * @param pvOld The old value to *ppv compare with.
935 *
936 * @remarks This is relatively type safe on GCC platforms.
937 */
938#ifdef __GNUC__
939# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
940 __extension__ \
941 ({\
942 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
943 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
944 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
945 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
946 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
947 fMacroRet; \
948 })
949#else
950# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
951 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
952#endif
953
954
955/** @def ASMAtomicCmpXchgHandle
956 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
957 *
958 * @param ph Pointer to the value to update.
959 * @param hNew The new value to assigned to *pu.
960 * @param hOld The old value to *pu compare with.
961 * @param fRc Where to store the result.
962 *
963 * @remarks This doesn't currently work for all handles (like RTFILE).
964 */
965#if HC_ARCH_BITS == 32
966# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
967 do { \
968 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
969 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
970 } while (0)
971#elif HC_ARCH_BITS == 64
972# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
973 do { \
974 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
975 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
976 } while (0)
977#else
978# error HC_ARCH_BITS
979#endif
980
981
982/** @def ASMAtomicCmpXchgSize
983 * Atomically Compare and Exchange a value which size might differ
984 * between platforms or compilers, ordered.
985 *
986 * @param pu Pointer to the value to update.
987 * @param uNew The new value to assigned to *pu.
988 * @param uOld The old value to *pu compare with.
989 * @param fRc Where to store the result.
990 */
991#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
992 do { \
993 switch (sizeof(*(pu))) { \
994 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
995 break; \
996 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
997 break; \
998 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
999 (fRc) = false; \
1000 break; \
1001 } \
1002 } while (0)
1003
1004
1005/**
1006 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1007 * passes back old value, ordered.
1008 *
1009 * @returns true if xchg was done.
1010 * @returns false if xchg wasn't done.
1011 *
1012 * @param pu32 Pointer to the value to update.
1013 * @param u32New The new value to assigned to *pu32.
1014 * @param u32Old The old value to *pu32 compare with.
1015 * @param pu32Old Pointer store the old value at.
1016 */
1017#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1018DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1019#else
1020DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1021{
1022# if RT_INLINE_ASM_GNU_STYLE
1023 uint8_t u8Ret;
1024 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1025 "setz %1\n\t"
1026 : "=m" (*pu32),
1027 "=qm" (u8Ret),
1028 "=a" (*pu32Old)
1029 : "r" (u32New),
1030 "a" (u32Old),
1031 "m" (*pu32));
1032 return (bool)u8Ret;
1033
1034# elif RT_INLINE_ASM_USES_INTRIN
1035 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1036
1037# else
1038 uint32_t u32Ret;
1039 __asm
1040 {
1041# ifdef RT_ARCH_AMD64
1042 mov rdx, [pu32]
1043# else
1044 mov edx, [pu32]
1045# endif
1046 mov eax, [u32Old]
1047 mov ecx, [u32New]
1048# ifdef RT_ARCH_AMD64
1049 lock cmpxchg [rdx], ecx
1050 mov rdx, [pu32Old]
1051 mov [rdx], eax
1052# else
1053 lock cmpxchg [edx], ecx
1054 mov edx, [pu32Old]
1055 mov [edx], eax
1056# endif
1057 setz al
1058 movzx eax, al
1059 mov [u32Ret], eax
1060 }
1061 return !!u32Ret;
1062# endif
1063}
1064#endif
1065
1066
1067/**
1068 * Atomically Compare and Exchange a signed 32-bit value, additionally
1069 * passes back old value, ordered.
1070 *
1071 * @returns true if xchg was done.
1072 * @returns false if xchg wasn't done.
1073 *
1074 * @param pi32 Pointer to the value to update.
1075 * @param i32New The new value to assigned to *pi32.
1076 * @param i32Old The old value to *pi32 compare with.
1077 * @param pi32Old Pointer store the old value at.
1078 */
1079DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1080{
1081 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1082}
1083
1084
1085/**
1086 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1087 * passing back old value, ordered.
1088 *
1089 * @returns true if xchg was done.
1090 * @returns false if xchg wasn't done.
1091 *
1092 * @param pu64 Pointer to the 64-bit variable to update.
1093 * @param u64New The 64-bit value to assign to *pu64.
1094 * @param u64Old The value to compare with.
1095 * @param pu64Old Pointer store the old value at.
1096 */
1097#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1098 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1099DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1100#else
1101DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1102{
1103# if RT_INLINE_ASM_USES_INTRIN
1104 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1105
1106# elif defined(RT_ARCH_AMD64)
1107# if RT_INLINE_ASM_GNU_STYLE
1108 uint8_t u8Ret;
1109 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1110 "setz %1\n\t"
1111 : "=m" (*pu64),
1112 "=qm" (u8Ret),
1113 "=a" (*pu64Old)
1114 : "r" (u64New),
1115 "a" (u64Old),
1116 "m" (*pu64));
1117 return (bool)u8Ret;
1118# else
1119 bool fRet;
1120 __asm
1121 {
1122 mov rdx, [pu32]
1123 mov rax, [u64Old]
1124 mov rcx, [u64New]
1125 lock cmpxchg [rdx], rcx
1126 mov rdx, [pu64Old]
1127 mov [rdx], rax
1128 setz al
1129 mov [fRet], al
1130 }
1131 return fRet;
1132# endif
1133# else /* !RT_ARCH_AMD64 */
1134# if RT_INLINE_ASM_GNU_STYLE
1135 uint64_t u64Ret;
1136# if defined(PIC) || defined(__PIC__)
1137 /* NB: this code uses a memory clobber description, because the clean
1138 * solution with an output value for *pu64 makes gcc run out of registers.
1139 * This will cause suboptimal code, and anyone with a better solution is
1140 * welcome to improve this. */
1141 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1142 "lock; cmpxchg8b %3\n\t"
1143 "xchgl %%ebx, %1\n\t"
1144 : "=A" (u64Ret)
1145 : "DS" ((uint32_t)u64New),
1146 "c" ((uint32_t)(u64New >> 32)),
1147 "m" (*pu64),
1148 "0" (u64Old)
1149 : "memory" );
1150# else /* !PIC */
1151 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1152 : "=A" (u64Ret),
1153 "=m" (*pu64)
1154 : "b" ((uint32_t)u64New),
1155 "c" ((uint32_t)(u64New >> 32)),
1156 "m" (*pu64),
1157 "0" (u64Old));
1158# endif
1159 *pu64Old = u64Ret;
1160 return u64Ret == u64Old;
1161# else
1162 uint32_t u32Ret;
1163 __asm
1164 {
1165 mov ebx, dword ptr [u64New]
1166 mov ecx, dword ptr [u64New + 4]
1167 mov edi, [pu64]
1168 mov eax, dword ptr [u64Old]
1169 mov edx, dword ptr [u64Old + 4]
1170 lock cmpxchg8b [edi]
1171 mov ebx, [pu64Old]
1172 mov [ebx], eax
1173 setz al
1174 movzx eax, al
1175 add ebx, 4
1176 mov [ebx], edx
1177 mov dword ptr [u32Ret], eax
1178 }
1179 return !!u32Ret;
1180# endif
1181# endif /* !RT_ARCH_AMD64 */
1182}
1183#endif
1184
1185
1186/**
1187 * Atomically Compare and exchange a signed 64-bit value, additionally
1188 * passing back old value, ordered.
1189 *
1190 * @returns true if xchg was done.
1191 * @returns false if xchg wasn't done.
1192 *
1193 * @param pi64 Pointer to the 64-bit variable to update.
1194 * @param i64 The 64-bit value to assign to *pu64.
1195 * @param i64Old The value to compare with.
1196 * @param pi64Old Pointer store the old value at.
1197 */
1198DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1199{
1200 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1201}
1202
1203/** @def ASMAtomicCmpXchgExHandle
1204 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1205 *
1206 * @param ph Pointer to the value to update.
1207 * @param hNew The new value to assigned to *pu.
1208 * @param hOld The old value to *pu compare with.
1209 * @param fRc Where to store the result.
1210 * @param phOldVal Pointer to where to store the old value.
1211 *
1212 * @remarks This doesn't currently work for all handles (like RTFILE).
1213 */
1214#if HC_ARCH_BITS == 32
1215# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1216 do { \
1217 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1218 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1219 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1220 } while (0)
1221#elif HC_ARCH_BITS == 64
1222# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1223 do { \
1224 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1225 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1226 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1227 } while (0)
1228#else
1229# error HC_ARCH_BITS
1230#endif
1231
1232
1233/** @def ASMAtomicCmpXchgExSize
1234 * Atomically Compare and Exchange a value which size might differ
1235 * between platforms or compilers. Additionally passes back old value.
1236 *
1237 * @param pu Pointer to the value to update.
1238 * @param uNew The new value to assigned to *pu.
1239 * @param uOld The old value to *pu compare with.
1240 * @param fRc Where to store the result.
1241 * @param puOldVal Pointer to where to store the old value.
1242 */
1243#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1244 do { \
1245 switch (sizeof(*(pu))) { \
1246 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1247 break; \
1248 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1249 break; \
1250 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1251 (fRc) = false; \
1252 (uOldVal) = 0; \
1253 break; \
1254 } \
1255 } while (0)
1256
1257
1258/**
1259 * Atomically Compare and Exchange a pointer value, additionally
1260 * passing back old value, ordered.
1261 *
1262 * @returns true if xchg was done.
1263 * @returns false if xchg wasn't done.
1264 *
1265 * @param ppv Pointer to the value to update.
1266 * @param pvNew The new value to assigned to *ppv.
1267 * @param pvOld The old value to *ppv compare with.
1268 * @param ppvOld Pointer store the old value at.
1269 */
1270DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1271{
1272#if ARCH_BITS == 32
1273 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1274#elif ARCH_BITS == 64
1275 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1276#else
1277# error "ARCH_BITS is bogus"
1278#endif
1279}
1280
1281
1282/**
1283 * Atomically Compare and Exchange a pointer value, additionally
1284 * passing back old value, ordered.
1285 *
1286 * @returns true if xchg was done.
1287 * @returns false if xchg wasn't done.
1288 *
1289 * @param ppv Pointer to the value to update.
1290 * @param pvNew The new value to assigned to *ppv.
1291 * @param pvOld The old value to *ppv compare with.
1292 * @param ppvOld Pointer store the old value at.
1293 *
1294 * @remarks This is relatively type safe on GCC platforms.
1295 */
1296#ifdef __GNUC__
1297# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1298 __extension__ \
1299 ({\
1300 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1301 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1302 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1303 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1304 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1305 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1306 (void **)ppvOldTypeChecked); \
1307 fMacroRet; \
1308 })
1309#else
1310# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1311 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1312#endif
1313
1314
1315/**
1316 * Serialize Instruction.
1317 */
1318#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1319DECLASM(void) ASMSerializeInstruction(void);
1320#else
1321DECLINLINE(void) ASMSerializeInstruction(void)
1322{
1323# if RT_INLINE_ASM_GNU_STYLE
1324 RTCCUINTREG xAX = 0;
1325# ifdef RT_ARCH_AMD64
1326 __asm__ ("cpuid"
1327 : "=a" (xAX)
1328 : "0" (xAX)
1329 : "rbx", "rcx", "rdx");
1330# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1331 __asm__ ("push %%ebx\n\t"
1332 "cpuid\n\t"
1333 "pop %%ebx\n\t"
1334 : "=a" (xAX)
1335 : "0" (xAX)
1336 : "ecx", "edx");
1337# else
1338 __asm__ ("cpuid"
1339 : "=a" (xAX)
1340 : "0" (xAX)
1341 : "ebx", "ecx", "edx");
1342# endif
1343
1344# elif RT_INLINE_ASM_USES_INTRIN
1345 int aInfo[4];
1346 __cpuid(aInfo, 0);
1347
1348# else
1349 __asm
1350 {
1351 push ebx
1352 xor eax, eax
1353 cpuid
1354 pop ebx
1355 }
1356# endif
1357}
1358#endif
1359
1360
1361/**
1362 * Memory fence, waits for any pending writes and reads to complete.
1363 */
1364DECLINLINE(void) ASMMemoryFence(void)
1365{
1366 /** @todo use mfence? check if all cpus we care for support it. */
1367 uint32_t volatile u32;
1368 ASMAtomicXchgU32(&u32, 0);
1369}
1370
1371
1372/**
1373 * Write fence, waits for any pending writes to complete.
1374 */
1375DECLINLINE(void) ASMWriteFence(void)
1376{
1377 /** @todo use sfence? check if all cpus we care for support it. */
1378 ASMMemoryFence();
1379}
1380
1381
1382/**
1383 * Read fence, waits for any pending reads to complete.
1384 */
1385DECLINLINE(void) ASMReadFence(void)
1386{
1387 /** @todo use lfence? check if all cpus we care for support it. */
1388 ASMMemoryFence();
1389}
1390
1391
1392/**
1393 * Atomically reads an unsigned 8-bit value, ordered.
1394 *
1395 * @returns Current *pu8 value
1396 * @param pu8 Pointer to the 8-bit variable to read.
1397 */
1398DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1399{
1400 ASMMemoryFence();
1401 return *pu8; /* byte reads are atomic on x86 */
1402}
1403
1404
1405/**
1406 * Atomically reads an unsigned 8-bit value, unordered.
1407 *
1408 * @returns Current *pu8 value
1409 * @param pu8 Pointer to the 8-bit variable to read.
1410 */
1411DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1412{
1413 return *pu8; /* byte reads are atomic on x86 */
1414}
1415
1416
1417/**
1418 * Atomically reads a signed 8-bit value, ordered.
1419 *
1420 * @returns Current *pi8 value
1421 * @param pi8 Pointer to the 8-bit variable to read.
1422 */
1423DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1424{
1425 ASMMemoryFence();
1426 return *pi8; /* byte reads are atomic on x86 */
1427}
1428
1429
1430/**
1431 * Atomically reads a signed 8-bit value, unordered.
1432 *
1433 * @returns Current *pi8 value
1434 * @param pi8 Pointer to the 8-bit variable to read.
1435 */
1436DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1437{
1438 return *pi8; /* byte reads are atomic on x86 */
1439}
1440
1441
1442/**
1443 * Atomically reads an unsigned 16-bit value, ordered.
1444 *
1445 * @returns Current *pu16 value
1446 * @param pu16 Pointer to the 16-bit variable to read.
1447 */
1448DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1449{
1450 ASMMemoryFence();
1451 Assert(!((uintptr_t)pu16 & 1));
1452 return *pu16;
1453}
1454
1455
1456/**
1457 * Atomically reads an unsigned 16-bit value, unordered.
1458 *
1459 * @returns Current *pu16 value
1460 * @param pu16 Pointer to the 16-bit variable to read.
1461 */
1462DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1463{
1464 Assert(!((uintptr_t)pu16 & 1));
1465 return *pu16;
1466}
1467
1468
1469/**
1470 * Atomically reads a signed 16-bit value, ordered.
1471 *
1472 * @returns Current *pi16 value
1473 * @param pi16 Pointer to the 16-bit variable to read.
1474 */
1475DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1476{
1477 ASMMemoryFence();
1478 Assert(!((uintptr_t)pi16 & 1));
1479 return *pi16;
1480}
1481
1482
1483/**
1484 * Atomically reads a signed 16-bit value, unordered.
1485 *
1486 * @returns Current *pi16 value
1487 * @param pi16 Pointer to the 16-bit variable to read.
1488 */
1489DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1490{
1491 Assert(!((uintptr_t)pi16 & 1));
1492 return *pi16;
1493}
1494
1495
1496/**
1497 * Atomically reads an unsigned 32-bit value, ordered.
1498 *
1499 * @returns Current *pu32 value
1500 * @param pu32 Pointer to the 32-bit variable to read.
1501 */
1502DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1503{
1504 ASMMemoryFence();
1505 Assert(!((uintptr_t)pu32 & 3));
1506 return *pu32;
1507}
1508
1509
1510/**
1511 * Atomically reads an unsigned 32-bit value, unordered.
1512 *
1513 * @returns Current *pu32 value
1514 * @param pu32 Pointer to the 32-bit variable to read.
1515 */
1516DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1517{
1518 Assert(!((uintptr_t)pu32 & 3));
1519 return *pu32;
1520}
1521
1522
1523/**
1524 * Atomically reads a signed 32-bit value, ordered.
1525 *
1526 * @returns Current *pi32 value
1527 * @param pi32 Pointer to the 32-bit variable to read.
1528 */
1529DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1530{
1531 ASMMemoryFence();
1532 Assert(!((uintptr_t)pi32 & 3));
1533 return *pi32;
1534}
1535
1536
1537/**
1538 * Atomically reads a signed 32-bit value, unordered.
1539 *
1540 * @returns Current *pi32 value
1541 * @param pi32 Pointer to the 32-bit variable to read.
1542 */
1543DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1544{
1545 Assert(!((uintptr_t)pi32 & 3));
1546 return *pi32;
1547}
1548
1549
1550/**
1551 * Atomically reads an unsigned 64-bit value, ordered.
1552 *
1553 * @returns Current *pu64 value
1554 * @param pu64 Pointer to the 64-bit variable to read.
1555 * The memory pointed to must be writable.
1556 * @remark This will fault if the memory is read-only!
1557 */
1558#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1559 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1560DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1561#else
1562DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1563{
1564 uint64_t u64;
1565# ifdef RT_ARCH_AMD64
1566 Assert(!((uintptr_t)pu64 & 7));
1567/*# if RT_INLINE_ASM_GNU_STYLE
1568 __asm__ __volatile__( "mfence\n\t"
1569 "movq %1, %0\n\t"
1570 : "=r" (u64)
1571 : "m" (*pu64));
1572# else
1573 __asm
1574 {
1575 mfence
1576 mov rdx, [pu64]
1577 mov rax, [rdx]
1578 mov [u64], rax
1579 }
1580# endif*/
1581 ASMMemoryFence();
1582 u64 = *pu64;
1583# else /* !RT_ARCH_AMD64 */
1584# if RT_INLINE_ASM_GNU_STYLE
1585# if defined(PIC) || defined(__PIC__)
1586 uint32_t u32EBX = 0;
1587 Assert(!((uintptr_t)pu64 & 7));
1588 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1589 "lock; cmpxchg8b (%5)\n\t"
1590 "movl %3, %%ebx\n\t"
1591 : "=A" (u64),
1592# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1593 "+m" (*pu64)
1594# else
1595 "=m" (*pu64)
1596# endif
1597 : "0" (0ULL),
1598 "m" (u32EBX),
1599 "c" (0),
1600 "S" (pu64));
1601# else /* !PIC */
1602 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1603 : "=A" (u64),
1604 "+m" (*pu64)
1605 : "0" (0ULL),
1606 "b" (0),
1607 "c" (0));
1608# endif
1609# else
1610 Assert(!((uintptr_t)pu64 & 7));
1611 __asm
1612 {
1613 xor eax, eax
1614 xor edx, edx
1615 mov edi, pu64
1616 xor ecx, ecx
1617 xor ebx, ebx
1618 lock cmpxchg8b [edi]
1619 mov dword ptr [u64], eax
1620 mov dword ptr [u64 + 4], edx
1621 }
1622# endif
1623# endif /* !RT_ARCH_AMD64 */
1624 return u64;
1625}
1626#endif
1627
1628
1629/**
1630 * Atomically reads an unsigned 64-bit value, unordered.
1631 *
1632 * @returns Current *pu64 value
1633 * @param pu64 Pointer to the 64-bit variable to read.
1634 * The memory pointed to must be writable.
1635 * @remark This will fault if the memory is read-only!
1636 */
1637#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1638 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1639DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1640#else
1641DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1642{
1643 uint64_t u64;
1644# ifdef RT_ARCH_AMD64
1645 Assert(!((uintptr_t)pu64 & 7));
1646/*# if RT_INLINE_ASM_GNU_STYLE
1647 Assert(!((uintptr_t)pu64 & 7));
1648 __asm__ __volatile__("movq %1, %0\n\t"
1649 : "=r" (u64)
1650 : "m" (*pu64));
1651# else
1652 __asm
1653 {
1654 mov rdx, [pu64]
1655 mov rax, [rdx]
1656 mov [u64], rax
1657 }
1658# endif */
1659 u64 = *pu64;
1660# else /* !RT_ARCH_AMD64 */
1661# if RT_INLINE_ASM_GNU_STYLE
1662# if defined(PIC) || defined(__PIC__)
1663 uint32_t u32EBX = 0;
1664 uint32_t u32Spill;
1665 Assert(!((uintptr_t)pu64 & 7));
1666 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1667 "xor %%ecx,%%ecx\n\t"
1668 "xor %%edx,%%edx\n\t"
1669 "xchgl %%ebx, %3\n\t"
1670 "lock; cmpxchg8b (%4)\n\t"
1671 "movl %3, %%ebx\n\t"
1672 : "=A" (u64),
1673# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1674 "+m" (*pu64),
1675# else
1676 "=m" (*pu64),
1677# endif
1678 "=c" (u32Spill)
1679 : "m" (u32EBX),
1680 "S" (pu64));
1681# else /* !PIC */
1682 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1683 : "=A" (u64),
1684 "+m" (*pu64)
1685 : "0" (0ULL),
1686 "b" (0),
1687 "c" (0));
1688# endif
1689# else
1690 Assert(!((uintptr_t)pu64 & 7));
1691 __asm
1692 {
1693 xor eax, eax
1694 xor edx, edx
1695 mov edi, pu64
1696 xor ecx, ecx
1697 xor ebx, ebx
1698 lock cmpxchg8b [edi]
1699 mov dword ptr [u64], eax
1700 mov dword ptr [u64 + 4], edx
1701 }
1702# endif
1703# endif /* !RT_ARCH_AMD64 */
1704 return u64;
1705}
1706#endif
1707
1708
1709/**
1710 * Atomically reads a signed 64-bit value, ordered.
1711 *
1712 * @returns Current *pi64 value
1713 * @param pi64 Pointer to the 64-bit variable to read.
1714 * The memory pointed to must be writable.
1715 * @remark This will fault if the memory is read-only!
1716 */
1717DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1718{
1719 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1720}
1721
1722
1723/**
1724 * Atomically reads a signed 64-bit value, unordered.
1725 *
1726 * @returns Current *pi64 value
1727 * @param pi64 Pointer to the 64-bit variable to read.
1728 * The memory pointed to must be writable.
1729 * @remark This will fault if the memory is read-only!
1730 */
1731DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1732{
1733 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1734}
1735
1736
1737/**
1738 * Atomically reads a size_t value, ordered.
1739 *
1740 * @returns Current *pcb value
1741 * @param pcb Pointer to the size_t variable to read.
1742 */
1743DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1744{
1745#if ARCH_BITS == 64
1746 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1747#elif ARCH_BITS == 32
1748 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1749#else
1750# error "Unsupported ARCH_BITS value"
1751#endif
1752}
1753
1754
1755/**
1756 * Atomically reads a size_t value, unordered.
1757 *
1758 * @returns Current *pcb value
1759 * @param pcb Pointer to the size_t variable to read.
1760 */
1761DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1762{
1763#if ARCH_BITS == 64
1764 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1765#elif ARCH_BITS == 32
1766 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1767#else
1768# error "Unsupported ARCH_BITS value"
1769#endif
1770}
1771
1772
1773/**
1774 * Atomically reads a pointer value, ordered.
1775 *
1776 * @returns Current *pv value
1777 * @param ppv Pointer to the pointer variable to read.
1778 *
1779 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1780 * requires less typing (no casts).
1781 */
1782DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1783{
1784#if ARCH_BITS == 32
1785 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1786#elif ARCH_BITS == 64
1787 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1788#else
1789# error "ARCH_BITS is bogus"
1790#endif
1791}
1792
1793/**
1794 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1795 *
1796 * @returns Current *pv value
1797 * @param ppv Pointer to the pointer variable to read.
1798 * @param Type The type of *ppv, sans volatile.
1799 */
1800#ifdef __GNUC__
1801# define ASMAtomicReadPtrT(ppv, Type) \
1802 __extension__ \
1803 ({\
1804 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1805 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1806 pvTypeChecked; \
1807 })
1808#else
1809# define ASMAtomicReadPtrT(ppv, Type) \
1810 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1811#endif
1812
1813
1814/**
1815 * Atomically reads a pointer value, unordered.
1816 *
1817 * @returns Current *pv value
1818 * @param ppv Pointer to the pointer variable to read.
1819 *
1820 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1821 * requires less typing (no casts).
1822 */
1823DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1824{
1825#if ARCH_BITS == 32
1826 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1827#elif ARCH_BITS == 64
1828 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1829#else
1830# error "ARCH_BITS is bogus"
1831#endif
1832}
1833
1834
1835/**
1836 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
1837 *
1838 * @returns Current *pv value
1839 * @param ppv Pointer to the pointer variable to read.
1840 * @param Type The type of *ppv, sans volatile.
1841 */
1842#ifdef __GNUC__
1843# define ASMAtomicUoReadPtrT(ppv, Type) \
1844 __extension__ \
1845 ({\
1846 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1847 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
1848 pvTypeChecked; \
1849 })
1850#else
1851# define ASMAtomicUoReadPtrT(ppv, Type) \
1852 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
1853#endif
1854
1855
1856/**
1857 * Atomically reads a boolean value, ordered.
1858 *
1859 * @returns Current *pf value
1860 * @param pf Pointer to the boolean variable to read.
1861 */
1862DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
1863{
1864 ASMMemoryFence();
1865 return *pf; /* byte reads are atomic on x86 */
1866}
1867
1868
1869/**
1870 * Atomically reads a boolean value, unordered.
1871 *
1872 * @returns Current *pf value
1873 * @param pf Pointer to the boolean variable to read.
1874 */
1875DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
1876{
1877 return *pf; /* byte reads are atomic on x86 */
1878}
1879
1880
1881/**
1882 * Atomically read a typical IPRT handle value, ordered.
1883 *
1884 * @param ph Pointer to the handle variable to read.
1885 * @param phRes Where to store the result.
1886 *
1887 * @remarks This doesn't currently work for all handles (like RTFILE).
1888 */
1889#if HC_ARCH_BITS == 32
1890# define ASMAtomicReadHandle(ph, phRes) \
1891 do { \
1892 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1893 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1894 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
1895 } while (0)
1896#elif HC_ARCH_BITS == 64
1897# define ASMAtomicReadHandle(ph, phRes) \
1898 do { \
1899 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1900 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1901 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
1902 } while (0)
1903#else
1904# error HC_ARCH_BITS
1905#endif
1906
1907
1908/**
1909 * Atomically read a typical IPRT handle value, unordered.
1910 *
1911 * @param ph Pointer to the handle variable to read.
1912 * @param phRes Where to store the result.
1913 *
1914 * @remarks This doesn't currently work for all handles (like RTFILE).
1915 */
1916#if HC_ARCH_BITS == 32
1917# define ASMAtomicUoReadHandle(ph, phRes) \
1918 do { \
1919 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1920 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1921 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
1922 } while (0)
1923#elif HC_ARCH_BITS == 64
1924# define ASMAtomicUoReadHandle(ph, phRes) \
1925 do { \
1926 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1927 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1928 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
1929 } while (0)
1930#else
1931# error HC_ARCH_BITS
1932#endif
1933
1934
1935/**
1936 * Atomically read a value which size might differ
1937 * between platforms or compilers, ordered.
1938 *
1939 * @param pu Pointer to the variable to read.
1940 * @param puRes Where to store the result.
1941 */
1942#define ASMAtomicReadSize(pu, puRes) \
1943 do { \
1944 switch (sizeof(*(pu))) { \
1945 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1946 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
1947 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
1948 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
1949 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1950 } \
1951 } while (0)
1952
1953
1954/**
1955 * Atomically read a value which size might differ
1956 * between platforms or compilers, unordered.
1957 *
1958 * @param pu Pointer to the variable to read.
1959 * @param puRes Where to store the result.
1960 */
1961#define ASMAtomicUoReadSize(pu, puRes) \
1962 do { \
1963 switch (sizeof(*(pu))) { \
1964 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
1965 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
1966 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
1967 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
1968 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
1969 } \
1970 } while (0)
1971
1972
1973/**
1974 * Atomically writes an unsigned 8-bit value, ordered.
1975 *
1976 * @param pu8 Pointer to the 8-bit variable.
1977 * @param u8 The 8-bit value to assign to *pu8.
1978 */
1979DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
1980{
1981 ASMAtomicXchgU8(pu8, u8);
1982}
1983
1984
1985/**
1986 * Atomically writes an unsigned 8-bit value, unordered.
1987 *
1988 * @param pu8 Pointer to the 8-bit variable.
1989 * @param u8 The 8-bit value to assign to *pu8.
1990 */
1991DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
1992{
1993 *pu8 = u8; /* byte writes are atomic on x86 */
1994}
1995
1996
1997/**
1998 * Atomically writes a signed 8-bit value, ordered.
1999 *
2000 * @param pi8 Pointer to the 8-bit variable to read.
2001 * @param i8 The 8-bit value to assign to *pi8.
2002 */
2003DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2004{
2005 ASMAtomicXchgS8(pi8, i8);
2006}
2007
2008
2009/**
2010 * Atomically writes a signed 8-bit value, unordered.
2011 *
2012 * @param pi8 Pointer to the 8-bit variable to write.
2013 * @param i8 The 8-bit value to assign to *pi8.
2014 */
2015DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2016{
2017 *pi8 = i8; /* byte writes are atomic on x86 */
2018}
2019
2020
2021/**
2022 * Atomically writes an unsigned 16-bit value, ordered.
2023 *
2024 * @param pu16 Pointer to the 16-bit variable to write.
2025 * @param u16 The 16-bit value to assign to *pu16.
2026 */
2027DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2028{
2029 ASMAtomicXchgU16(pu16, u16);
2030}
2031
2032
2033/**
2034 * Atomically writes an unsigned 16-bit value, unordered.
2035 *
2036 * @param pu16 Pointer to the 16-bit variable to write.
2037 * @param u16 The 16-bit value to assign to *pu16.
2038 */
2039DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2040{
2041 Assert(!((uintptr_t)pu16 & 1));
2042 *pu16 = u16;
2043}
2044
2045
2046/**
2047 * Atomically writes a signed 16-bit value, ordered.
2048 *
2049 * @param pi16 Pointer to the 16-bit variable to write.
2050 * @param i16 The 16-bit value to assign to *pi16.
2051 */
2052DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2053{
2054 ASMAtomicXchgS16(pi16, i16);
2055}
2056
2057
2058/**
2059 * Atomically writes a signed 16-bit value, unordered.
2060 *
2061 * @param pi16 Pointer to the 16-bit variable to write.
2062 * @param i16 The 16-bit value to assign to *pi16.
2063 */
2064DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2065{
2066 Assert(!((uintptr_t)pi16 & 1));
2067 *pi16 = i16;
2068}
2069
2070
2071/**
2072 * Atomically writes an unsigned 32-bit value, ordered.
2073 *
2074 * @param pu32 Pointer to the 32-bit variable to write.
2075 * @param u32 The 32-bit value to assign to *pu32.
2076 */
2077DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2078{
2079 ASMAtomicXchgU32(pu32, u32);
2080}
2081
2082
2083/**
2084 * Atomically writes an unsigned 32-bit value, unordered.
2085 *
2086 * @param pu32 Pointer to the 32-bit variable to write.
2087 * @param u32 The 32-bit value to assign to *pu32.
2088 */
2089DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2090{
2091 Assert(!((uintptr_t)pu32 & 3));
2092 *pu32 = u32;
2093}
2094
2095
2096/**
2097 * Atomically writes a signed 32-bit value, ordered.
2098 *
2099 * @param pi32 Pointer to the 32-bit variable to write.
2100 * @param i32 The 32-bit value to assign to *pi32.
2101 */
2102DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2103{
2104 ASMAtomicXchgS32(pi32, i32);
2105}
2106
2107
2108/**
2109 * Atomically writes a signed 32-bit value, unordered.
2110 *
2111 * @param pi32 Pointer to the 32-bit variable to write.
2112 * @param i32 The 32-bit value to assign to *pi32.
2113 */
2114DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2115{
2116 Assert(!((uintptr_t)pi32 & 3));
2117 *pi32 = i32;
2118}
2119
2120
2121/**
2122 * Atomically writes an unsigned 64-bit value, ordered.
2123 *
2124 * @param pu64 Pointer to the 64-bit variable to write.
2125 * @param u64 The 64-bit value to assign to *pu64.
2126 */
2127DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2128{
2129 ASMAtomicXchgU64(pu64, u64);
2130}
2131
2132
2133/**
2134 * Atomically writes an unsigned 64-bit value, unordered.
2135 *
2136 * @param pu64 Pointer to the 64-bit variable to write.
2137 * @param u64 The 64-bit value to assign to *pu64.
2138 */
2139DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2140{
2141 Assert(!((uintptr_t)pu64 & 7));
2142#if ARCH_BITS == 64
2143 *pu64 = u64;
2144#else
2145 ASMAtomicXchgU64(pu64, u64);
2146#endif
2147}
2148
2149
2150/**
2151 * Atomically writes a signed 64-bit value, ordered.
2152 *
2153 * @param pi64 Pointer to the 64-bit variable to write.
2154 * @param i64 The 64-bit value to assign to *pi64.
2155 */
2156DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2157{
2158 ASMAtomicXchgS64(pi64, i64);
2159}
2160
2161
2162/**
2163 * Atomically writes a signed 64-bit value, unordered.
2164 *
2165 * @param pi64 Pointer to the 64-bit variable to write.
2166 * @param i64 The 64-bit value to assign to *pi64.
2167 */
2168DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2169{
2170 Assert(!((uintptr_t)pi64 & 7));
2171#if ARCH_BITS == 64
2172 *pi64 = i64;
2173#else
2174 ASMAtomicXchgS64(pi64, i64);
2175#endif
2176}
2177
2178
2179/**
2180 * Atomically writes a boolean value, unordered.
2181 *
2182 * @param pf Pointer to the boolean variable to write.
2183 * @param f The boolean value to assign to *pf.
2184 */
2185DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2186{
2187 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2188}
2189
2190
2191/**
2192 * Atomically writes a boolean value, unordered.
2193 *
2194 * @param pf Pointer to the boolean variable to write.
2195 * @param f The boolean value to assign to *pf.
2196 */
2197DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2198{
2199 *pf = f; /* byte writes are atomic on x86 */
2200}
2201
2202
2203/**
2204 * Atomically writes a pointer value, ordered.
2205 *
2206 * @param ppv Pointer to the pointer variable to write.
2207 * @param pv The pointer value to assign to *ppv.
2208 */
2209DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2210{
2211#if ARCH_BITS == 32
2212 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2213#elif ARCH_BITS == 64
2214 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2215#else
2216# error "ARCH_BITS is bogus"
2217#endif
2218}
2219
2220
2221/**
2222 * Atomically writes a pointer value, ordered.
2223 *
2224 * @param ppv Pointer to the pointer variable to write.
2225 * @param pv The pointer value to assign to *ppv. If NULL use
2226 * ASMAtomicWriteNullPtr or you'll land in trouble.
2227 *
2228 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2229 * NULL.
2230 */
2231#ifdef __GNUC__
2232# define ASMAtomicWritePtr(ppv, pv) \
2233 do \
2234 { \
2235 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2236 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2237 \
2238 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2239 AssertCompile(sizeof(pv) == sizeof(void *)); \
2240 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2241 \
2242 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2243 } while (0)
2244#else
2245# define ASMAtomicWritePtr(ppv, pv) \
2246 do \
2247 { \
2248 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2249 AssertCompile(sizeof(pv) == sizeof(void *)); \
2250 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2251 \
2252 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2253 } while (0)
2254#endif
2255
2256
2257/**
2258 * Atomically sets a pointer to NULL, ordered.
2259 *
2260 * @param ppv Pointer to the pointer variable that should be set to NULL.
2261 *
2262 * @remarks This is relatively type safe on GCC platforms.
2263 */
2264#ifdef __GNUC__
2265# define ASMAtomicWriteNullPtr(ppv) \
2266 do \
2267 { \
2268 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2269 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2270 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2271 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2272 } while (0)
2273#else
2274# define ASMAtomicWriteNullPtr(ppv) \
2275 do \
2276 { \
2277 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2278 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2279 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2280 } while (0)
2281#endif
2282
2283
2284/**
2285 * Atomically writes a pointer value, unordered.
2286 *
2287 * @returns Current *pv value
2288 * @param ppv Pointer to the pointer variable.
2289 * @param pv The pointer value to assign to *ppv. If NULL use
2290 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2291 *
2292 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2293 * NULL.
2294 */
2295#ifdef __GNUC__
2296# define ASMAtomicUoWritePtr(ppv, pv) \
2297 do \
2298 { \
2299 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2300 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2301 \
2302 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2303 AssertCompile(sizeof(pv) == sizeof(void *)); \
2304 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2305 \
2306 *(ppvTypeChecked) = pvTypeChecked; \
2307 } while (0)
2308#else
2309# define ASMAtomicUoWritePtr(ppv, pv) \
2310 do \
2311 { \
2312 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2313 AssertCompile(sizeof(pv) == sizeof(void *)); \
2314 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2315 *(ppv) = pv; \
2316 } while (0)
2317#endif
2318
2319
2320/**
2321 * Atomically sets a pointer to NULL, unordered.
2322 *
2323 * @param ppv Pointer to the pointer variable that should be set to NULL.
2324 *
2325 * @remarks This is relatively type safe on GCC platforms.
2326 */
2327#ifdef __GNUC__
2328# define ASMAtomicUoWriteNullPtr(ppv) \
2329 do \
2330 { \
2331 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2332 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2333 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2334 *(ppvTypeChecked) = NULL; \
2335 } while (0)
2336#else
2337# define ASMAtomicUoWriteNullPtr(ppv) \
2338 do \
2339 { \
2340 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2341 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2342 *(ppv) = NULL; \
2343 } while (0)
2344#endif
2345
2346
2347/**
2348 * Atomically write a typical IPRT handle value, ordered.
2349 *
2350 * @param ph Pointer to the variable to update.
2351 * @param hNew The value to assign to *ph.
2352 *
2353 * @remarks This doesn't currently work for all handles (like RTFILE).
2354 */
2355#if HC_ARCH_BITS == 32
2356# define ASMAtomicWriteHandle(ph, hNew) \
2357 do { \
2358 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2359 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2360 } while (0)
2361#elif HC_ARCH_BITS == 64
2362# define ASMAtomicWriteHandle(ph, hNew) \
2363 do { \
2364 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2365 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2366 } while (0)
2367#else
2368# error HC_ARCH_BITS
2369#endif
2370
2371
2372/**
2373 * Atomically write a typical IPRT handle value, unordered.
2374 *
2375 * @param ph Pointer to the variable to update.
2376 * @param hNew The value to assign to *ph.
2377 *
2378 * @remarks This doesn't currently work for all handles (like RTFILE).
2379 */
2380#if HC_ARCH_BITS == 32
2381# define ASMAtomicUoWriteHandle(ph, hNew) \
2382 do { \
2383 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2384 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2385 } while (0)
2386#elif HC_ARCH_BITS == 64
2387# define ASMAtomicUoWriteHandle(ph, hNew) \
2388 do { \
2389 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2390 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2391 } while (0)
2392#else
2393# error HC_ARCH_BITS
2394#endif
2395
2396
2397/**
2398 * Atomically write a value which size might differ
2399 * between platforms or compilers, ordered.
2400 *
2401 * @param pu Pointer to the variable to update.
2402 * @param uNew The value to assign to *pu.
2403 */
2404#define ASMAtomicWriteSize(pu, uNew) \
2405 do { \
2406 switch (sizeof(*(pu))) { \
2407 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2408 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2409 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2410 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2411 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2412 } \
2413 } while (0)
2414
2415/**
2416 * Atomically write a value which size might differ
2417 * between platforms or compilers, unordered.
2418 *
2419 * @param pu Pointer to the variable to update.
2420 * @param uNew The value to assign to *pu.
2421 */
2422#define ASMAtomicUoWriteSize(pu, uNew) \
2423 do { \
2424 switch (sizeof(*(pu))) { \
2425 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2426 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2427 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2428 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2429 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2430 } \
2431 } while (0)
2432
2433
2434
2435/**
2436 * Atomically exchanges and adds to a 32-bit value, ordered.
2437 *
2438 * @returns The old value.
2439 * @param pu32 Pointer to the value.
2440 * @param u32 Number to add.
2441 */
2442#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2443DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2444#else
2445DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2446{
2447# if RT_INLINE_ASM_USES_INTRIN
2448 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2449 return u32;
2450
2451# elif RT_INLINE_ASM_GNU_STYLE
2452 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2453 : "=r" (u32),
2454 "=m" (*pu32)
2455 : "0" (u32),
2456 "m" (*pu32)
2457 : "memory");
2458 return u32;
2459# else
2460 __asm
2461 {
2462 mov eax, [u32]
2463# ifdef RT_ARCH_AMD64
2464 mov rdx, [pu32]
2465 lock xadd [rdx], eax
2466# else
2467 mov edx, [pu32]
2468 lock xadd [edx], eax
2469# endif
2470 mov [u32], eax
2471 }
2472 return u32;
2473# endif
2474}
2475#endif
2476
2477
2478/**
2479 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2480 *
2481 * @returns The old value.
2482 * @param pi32 Pointer to the value.
2483 * @param i32 Number to add.
2484 */
2485DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2486{
2487 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2488}
2489
2490
2491/**
2492 * Atomically exchanges and adds to a 64-bit value, ordered.
2493 *
2494 * @returns The old value.
2495 * @param pu64 Pointer to the value.
2496 * @param u64 Number to add.
2497 */
2498#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2499DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2500#else
2501DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2502{
2503# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2504 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2505 return u64;
2506
2507# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2508 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2509 : "=r" (u64),
2510 "=m" (*pu64)
2511 : "0" (u64),
2512 "m" (*pu64)
2513 : "memory");
2514 return u64;
2515# else
2516 uint64_t u64Old;
2517 for (;;)
2518 {
2519 uint64_t u64New;
2520 u64Old = ASMAtomicUoReadU64(pu64);
2521 u64New = u64Old + u64;
2522 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2523 break;
2524 ASMNopPause();
2525 }
2526 return u64Old;
2527# endif
2528}
2529#endif
2530
2531
2532/**
2533 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2534 *
2535 * @returns The old value.
2536 * @param pi64 Pointer to the value.
2537 * @param i64 Number to add.
2538 */
2539DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2540{
2541 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2542}
2543
2544
2545/**
2546 * Atomically exchanges and adds to a size_t value, ordered.
2547 *
2548 * @returns The old value.
2549 * @param pcb Pointer to the size_t value.
2550 * @param cb Number to add.
2551 */
2552DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2553{
2554#if ARCH_BITS == 64
2555 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2556#elif ARCH_BITS == 32
2557 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2558#else
2559# error "Unsupported ARCH_BITS value"
2560#endif
2561}
2562
2563
2564/**
2565 * Atomically exchanges and adds a value which size might differ between
2566 * platforms or compilers, ordered.
2567 *
2568 * @param pu Pointer to the variable to update.
2569 * @param uNew The value to add to *pu.
2570 * @param puOld Where to store the old value.
2571 */
2572#define ASMAtomicAddSize(pu, uNew, puOld) \
2573 do { \
2574 switch (sizeof(*(pu))) { \
2575 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2576 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2577 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2578 } \
2579 } while (0)
2580
2581
2582/**
2583 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2584 *
2585 * @returns The old value.
2586 * @param pu32 Pointer to the value.
2587 * @param u32 Number to subtract.
2588 */
2589DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2590{
2591 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2592}
2593
2594
2595/**
2596 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2597 *
2598 * @returns The old value.
2599 * @param pi32 Pointer to the value.
2600 * @param i32 Number to subtract.
2601 */
2602DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2603{
2604 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2605}
2606
2607
2608/**
2609 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2610 *
2611 * @returns The old value.
2612 * @param pu64 Pointer to the value.
2613 * @param u64 Number to subtract.
2614 */
2615DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2616{
2617 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2618}
2619
2620
2621/**
2622 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2623 *
2624 * @returns The old value.
2625 * @param pi64 Pointer to the value.
2626 * @param i64 Number to subtract.
2627 */
2628DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2629{
2630 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2631}
2632
2633
2634/**
2635 * Atomically exchanges and subtracts to a size_t value, ordered.
2636 *
2637 * @returns The old value.
2638 * @param pcb Pointer to the size_t value.
2639 * @param cb Number to subtract.
2640 */
2641DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2642{
2643#if ARCH_BITS == 64
2644 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2645#elif ARCH_BITS == 32
2646 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2647#else
2648# error "Unsupported ARCH_BITS value"
2649#endif
2650}
2651
2652
2653/**
2654 * Atomically exchanges and subtracts a value which size might differ between
2655 * platforms or compilers, ordered.
2656 *
2657 * @param pu Pointer to the variable to update.
2658 * @param uNew The value to subtract to *pu.
2659 * @param puOld Where to store the old value.
2660 */
2661#define ASMAtomicSubSize(pu, uNew, puOld) \
2662 do { \
2663 switch (sizeof(*(pu))) { \
2664 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2665 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2666 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2667 } \
2668 } while (0)
2669
2670
2671/**
2672 * Atomically increment a 32-bit value, ordered.
2673 *
2674 * @returns The new value.
2675 * @param pu32 Pointer to the value to increment.
2676 */
2677#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2678DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2679#else
2680DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2681{
2682 uint32_t u32;
2683# if RT_INLINE_ASM_USES_INTRIN
2684 u32 = _InterlockedIncrement((long *)pu32);
2685 return u32;
2686
2687# elif RT_INLINE_ASM_GNU_STYLE
2688 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2689 : "=r" (u32),
2690 "=m" (*pu32)
2691 : "0" (1),
2692 "m" (*pu32)
2693 : "memory");
2694 return u32+1;
2695# else
2696 __asm
2697 {
2698 mov eax, 1
2699# ifdef RT_ARCH_AMD64
2700 mov rdx, [pu32]
2701 lock xadd [rdx], eax
2702# else
2703 mov edx, [pu32]
2704 lock xadd [edx], eax
2705# endif
2706 mov u32, eax
2707 }
2708 return u32+1;
2709# endif
2710}
2711#endif
2712
2713
2714/**
2715 * Atomically increment a signed 32-bit value, ordered.
2716 *
2717 * @returns The new value.
2718 * @param pi32 Pointer to the value to increment.
2719 */
2720DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2721{
2722 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2723}
2724
2725
2726/**
2727 * Atomically increment a 64-bit value, ordered.
2728 *
2729 * @returns The new value.
2730 * @param pu64 Pointer to the value to increment.
2731 */
2732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2733DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2734#else
2735DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2736{
2737# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2738 uint64_t u64;
2739 u64 = _InterlockedIncrement64((__int64 *)pu64);
2740 return u64;
2741
2742# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2743 uint64_t u64;
2744 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2745 : "=r" (u64),
2746 "=m" (*pu64)
2747 : "0" (1),
2748 "m" (*pu64)
2749 : "memory");
2750 return u64 + 1;
2751# else
2752 return ASMAtomicAddU64(pu64, 1) + 1;
2753# endif
2754}
2755#endif
2756
2757
2758/**
2759 * Atomically increment a signed 64-bit value, ordered.
2760 *
2761 * @returns The new value.
2762 * @param pi64 Pointer to the value to increment.
2763 */
2764DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
2765{
2766 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
2767}
2768
2769
2770/**
2771 * Atomically increment a size_t value, ordered.
2772 *
2773 * @returns The new value.
2774 * @param pcb Pointer to the value to increment.
2775 */
2776DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
2777{
2778#if ARCH_BITS == 64
2779 return ASMAtomicIncU64((uint64_t volatile *)pcb);
2780#elif ARCH_BITS == 32
2781 return ASMAtomicIncU32((uint32_t volatile *)pcb);
2782#else
2783# error "Unsupported ARCH_BITS value"
2784#endif
2785}
2786
2787
2788/**
2789 * Atomically decrement an unsigned 32-bit value, ordered.
2790 *
2791 * @returns The new value.
2792 * @param pu32 Pointer to the value to decrement.
2793 */
2794#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2795DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2796#else
2797DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2798{
2799 uint32_t u32;
2800# if RT_INLINE_ASM_USES_INTRIN
2801 u32 = _InterlockedDecrement((long *)pu32);
2802 return u32;
2803
2804# elif RT_INLINE_ASM_GNU_STYLE
2805 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2806 : "=r" (u32),
2807 "=m" (*pu32)
2808 : "0" (-1),
2809 "m" (*pu32)
2810 : "memory");
2811 return u32-1;
2812# else
2813 __asm
2814 {
2815 mov eax, -1
2816# ifdef RT_ARCH_AMD64
2817 mov rdx, [pu32]
2818 lock xadd [rdx], eax
2819# else
2820 mov edx, [pu32]
2821 lock xadd [edx], eax
2822# endif
2823 mov u32, eax
2824 }
2825 return u32-1;
2826# endif
2827}
2828#endif
2829
2830
2831/**
2832 * Atomically decrement a signed 32-bit value, ordered.
2833 *
2834 * @returns The new value.
2835 * @param pi32 Pointer to the value to decrement.
2836 */
2837DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2838{
2839 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2840}
2841
2842
2843/**
2844 * Atomically decrement an unsigned 64-bit value, ordered.
2845 *
2846 * @returns The new value.
2847 * @param pu64 Pointer to the value to decrement.
2848 */
2849#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2850DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
2851#else
2852DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
2853{
2854# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2855 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
2856 return u64;
2857
2858# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2859 uint64_t u64;
2860 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
2861 : "=r" (u64),
2862 "=m" (*pu64)
2863 : "0" (~(uint64_t)0),
2864 "m" (*pu64)
2865 : "memory");
2866 return u64-1;
2867# else
2868 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
2869# endif
2870}
2871#endif
2872
2873
2874/**
2875 * Atomically decrement a signed 64-bit value, ordered.
2876 *
2877 * @returns The new value.
2878 * @param pi64 Pointer to the value to decrement.
2879 */
2880DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
2881{
2882 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
2883}
2884
2885
2886/**
2887 * Atomically decrement a size_t value, ordered.
2888 *
2889 * @returns The new value.
2890 * @param pcb Pointer to the value to decrement.
2891 */
2892DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
2893{
2894#if ARCH_BITS == 64
2895 return ASMAtomicDecU64((uint64_t volatile *)pcb);
2896#elif ARCH_BITS == 32
2897 return ASMAtomicDecU32((uint32_t volatile *)pcb);
2898#else
2899# error "Unsupported ARCH_BITS value"
2900#endif
2901}
2902
2903
2904/**
2905 * Atomically Or an unsigned 32-bit value, ordered.
2906 *
2907 * @param pu32 Pointer to the pointer variable to OR u32 with.
2908 * @param u32 The value to OR *pu32 with.
2909 */
2910#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2911DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2912#else
2913DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2914{
2915# if RT_INLINE_ASM_USES_INTRIN
2916 _InterlockedOr((long volatile *)pu32, (long)u32);
2917
2918# elif RT_INLINE_ASM_GNU_STYLE
2919 __asm__ __volatile__("lock; orl %1, %0\n\t"
2920 : "=m" (*pu32)
2921 : "ir" (u32),
2922 "m" (*pu32));
2923# else
2924 __asm
2925 {
2926 mov eax, [u32]
2927# ifdef RT_ARCH_AMD64
2928 mov rdx, [pu32]
2929 lock or [rdx], eax
2930# else
2931 mov edx, [pu32]
2932 lock or [edx], eax
2933# endif
2934 }
2935# endif
2936}
2937#endif
2938
2939
2940/**
2941 * Atomically Or a signed 32-bit value, ordered.
2942 *
2943 * @param pi32 Pointer to the pointer variable to OR u32 with.
2944 * @param i32 The value to OR *pu32 with.
2945 */
2946DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2947{
2948 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2949}
2950
2951
2952/**
2953 * Atomically Or an unsigned 64-bit value, ordered.
2954 *
2955 * @param pu64 Pointer to the pointer variable to OR u64 with.
2956 * @param u64 The value to OR *pu64 with.
2957 */
2958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2959DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
2960#else
2961DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
2962{
2963# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2964 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
2965
2966# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2967 __asm__ __volatile__("lock; orq %1, %q0\n\t"
2968 : "=m" (*pu64)
2969 : "r" (u64),
2970 "m" (*pu64));
2971# else
2972 for (;;)
2973 {
2974 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
2975 uint64_t u64New = u64Old | u64;
2976 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2977 break;
2978 ASMNopPause();
2979 }
2980# endif
2981}
2982#endif
2983
2984
2985/**
2986 * Atomically Or a signed 64-bit value, ordered.
2987 *
2988 * @param pi64 Pointer to the pointer variable to OR u64 with.
2989 * @param i64 The value to OR *pu64 with.
2990 */
2991DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
2992{
2993 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
2994}
2995/**
2996 * Atomically And an unsigned 32-bit value, ordered.
2997 *
2998 * @param pu32 Pointer to the pointer variable to AND u32 with.
2999 * @param u32 The value to AND *pu32 with.
3000 */
3001#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3002DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3003#else
3004DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3005{
3006# if RT_INLINE_ASM_USES_INTRIN
3007 _InterlockedAnd((long volatile *)pu32, u32);
3008
3009# elif RT_INLINE_ASM_GNU_STYLE
3010 __asm__ __volatile__("lock; andl %1, %0\n\t"
3011 : "=m" (*pu32)
3012 : "ir" (u32),
3013 "m" (*pu32));
3014# else
3015 __asm
3016 {
3017 mov eax, [u32]
3018# ifdef RT_ARCH_AMD64
3019 mov rdx, [pu32]
3020 lock and [rdx], eax
3021# else
3022 mov edx, [pu32]
3023 lock and [edx], eax
3024# endif
3025 }
3026# endif
3027}
3028#endif
3029
3030
3031/**
3032 * Atomically And a signed 32-bit value, ordered.
3033 *
3034 * @param pi32 Pointer to the pointer variable to AND i32 with.
3035 * @param i32 The value to AND *pi32 with.
3036 */
3037DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3038{
3039 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3040}
3041
3042
3043/**
3044 * Atomically And an unsigned 64-bit value, ordered.
3045 *
3046 * @param pu64 Pointer to the pointer variable to AND u64 with.
3047 * @param u64 The value to AND *pu64 with.
3048 */
3049#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3050DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3051#else
3052DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3053{
3054# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3055 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3056
3057# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3058 __asm__ __volatile__("lock; andq %1, %0\n\t"
3059 : "=m" (*pu64)
3060 : "r" (u64),
3061 "m" (*pu64));
3062# else
3063 for (;;)
3064 {
3065 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3066 uint64_t u64New = u64Old & u64;
3067 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3068 break;
3069 ASMNopPause();
3070 }
3071# endif
3072}
3073#endif
3074
3075
3076/**
3077 * Atomically And a signed 64-bit value, ordered.
3078 *
3079 * @param pi64 Pointer to the pointer variable to AND i64 with.
3080 * @param i64 The value to AND *pi64 with.
3081 */
3082DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3083{
3084 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3085}
3086
3087
3088
3089/** @def RT_ASM_PAGE_SIZE
3090 * We try avoid dragging in iprt/param.h here.
3091 * @internal
3092 */
3093#if defined(RT_ARCH_SPARC64)
3094# define RT_ASM_PAGE_SIZE 0x2000
3095# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3096# if PAGE_SIZE != 0x2000
3097# error "PAGE_SIZE is not 0x2000!"
3098# endif
3099# endif
3100#else
3101# define RT_ASM_PAGE_SIZE 0x1000
3102# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3103# if PAGE_SIZE != 0x1000
3104# error "PAGE_SIZE is not 0x1000!"
3105# endif
3106# endif
3107#endif
3108
3109/**
3110 * Zeros a 4K memory page.
3111 *
3112 * @param pv Pointer to the memory block. This must be page aligned.
3113 */
3114#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3115DECLASM(void) ASMMemZeroPage(volatile void *pv);
3116# else
3117DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3118{
3119# if RT_INLINE_ASM_USES_INTRIN
3120# ifdef RT_ARCH_AMD64
3121 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3122# else
3123 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3124# endif
3125
3126# elif RT_INLINE_ASM_GNU_STYLE
3127 RTCCUINTREG uDummy;
3128# ifdef RT_ARCH_AMD64
3129 __asm__ __volatile__("rep stosq"
3130 : "=D" (pv),
3131 "=c" (uDummy)
3132 : "0" (pv),
3133 "c" (RT_ASM_PAGE_SIZE >> 3),
3134 "a" (0)
3135 : "memory");
3136# else
3137 __asm__ __volatile__("rep stosl"
3138 : "=D" (pv),
3139 "=c" (uDummy)
3140 : "0" (pv),
3141 "c" (RT_ASM_PAGE_SIZE >> 2),
3142 "a" (0)
3143 : "memory");
3144# endif
3145# else
3146 __asm
3147 {
3148# ifdef RT_ARCH_AMD64
3149 xor rax, rax
3150 mov ecx, 0200h
3151 mov rdi, [pv]
3152 rep stosq
3153# else
3154 xor eax, eax
3155 mov ecx, 0400h
3156 mov edi, [pv]
3157 rep stosd
3158# endif
3159 }
3160# endif
3161}
3162# endif
3163
3164
3165/**
3166 * Zeros a memory block with a 32-bit aligned size.
3167 *
3168 * @param pv Pointer to the memory block.
3169 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3170 */
3171#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3172DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3173#else
3174DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3175{
3176# if RT_INLINE_ASM_USES_INTRIN
3177# ifdef RT_ARCH_AMD64
3178 if (!(cb & 7))
3179 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3180 else
3181# endif
3182 __stosd((unsigned long *)pv, 0, cb / 4);
3183
3184# elif RT_INLINE_ASM_GNU_STYLE
3185 __asm__ __volatile__("rep stosl"
3186 : "=D" (pv),
3187 "=c" (cb)
3188 : "0" (pv),
3189 "1" (cb >> 2),
3190 "a" (0)
3191 : "memory");
3192# else
3193 __asm
3194 {
3195 xor eax, eax
3196# ifdef RT_ARCH_AMD64
3197 mov rcx, [cb]
3198 shr rcx, 2
3199 mov rdi, [pv]
3200# else
3201 mov ecx, [cb]
3202 shr ecx, 2
3203 mov edi, [pv]
3204# endif
3205 rep stosd
3206 }
3207# endif
3208}
3209#endif
3210
3211
3212/**
3213 * Fills a memory block with a 32-bit aligned size.
3214 *
3215 * @param pv Pointer to the memory block.
3216 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3217 * @param u32 The value to fill with.
3218 */
3219#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3220DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3221#else
3222DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3223{
3224# if RT_INLINE_ASM_USES_INTRIN
3225# ifdef RT_ARCH_AMD64
3226 if (!(cb & 7))
3227 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3228 else
3229# endif
3230 __stosd((unsigned long *)pv, u32, cb / 4);
3231
3232# elif RT_INLINE_ASM_GNU_STYLE
3233 __asm__ __volatile__("rep stosl"
3234 : "=D" (pv),
3235 "=c" (cb)
3236 : "0" (pv),
3237 "1" (cb >> 2),
3238 "a" (u32)
3239 : "memory");
3240# else
3241 __asm
3242 {
3243# ifdef RT_ARCH_AMD64
3244 mov rcx, [cb]
3245 shr rcx, 2
3246 mov rdi, [pv]
3247# else
3248 mov ecx, [cb]
3249 shr ecx, 2
3250 mov edi, [pv]
3251# endif
3252 mov eax, [u32]
3253 rep stosd
3254 }
3255# endif
3256}
3257#endif
3258
3259
3260/**
3261 * Checks if a memory page is all zeros.
3262 *
3263 * @returns true / false.
3264 *
3265 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3266 * boundary
3267 */
3268DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3269{
3270# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3271 union { RTCCUINTREG r; bool f; } uAX;
3272 RTCCUINTREG xCX, xDI;
3273 Assert(!((uintptr_t)pvPage & 15));
3274 __asm__ __volatile__("repe; "
3275# ifdef RT_ARCH_AMD64
3276 "scasq\n\t"
3277# else
3278 "scasl\n\t"
3279# endif
3280 "setnc %%al\n\t"
3281 : "=&c" (xCX),
3282 "=&D" (xDI),
3283 "=&a" (uAX.r)
3284 : "mr" (pvPage),
3285# ifdef RT_ARCH_AMD64
3286 "0" (RT_ASM_PAGE_SIZE/8),
3287# else
3288 "0" (RT_ASM_PAGE_SIZE/4),
3289# endif
3290 "1" (pvPage),
3291 "2" (0));
3292 return uAX.f;
3293# else
3294 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3295 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3296 Assert(!((uintptr_t)pvPage & 15));
3297 for (;;)
3298 {
3299 if (puPtr[0]) return false;
3300 if (puPtr[4]) return false;
3301
3302 if (puPtr[2]) return false;
3303 if (puPtr[6]) return false;
3304
3305 if (puPtr[1]) return false;
3306 if (puPtr[5]) return false;
3307
3308 if (puPtr[3]) return false;
3309 if (puPtr[7]) return false;
3310
3311 if (!--cLeft)
3312 return true;
3313 puPtr += 8;
3314 }
3315 return true;
3316# endif
3317}
3318
3319
3320/**
3321 * Checks if a memory block is filled with the specified byte.
3322 *
3323 * This is a sort of inverted memchr.
3324 *
3325 * @returns Pointer to the byte which doesn't equal u8.
3326 * @returns NULL if all equal to u8.
3327 *
3328 * @param pv Pointer to the memory block.
3329 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3330 * @param u8 The value it's supposed to be filled with.
3331 *
3332 * @todo Fix name, it is a predicate function but it's not returning boolean!
3333 */
3334DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3335{
3336/** @todo rewrite this in inline assembly? */
3337 uint8_t const *pb = (uint8_t const *)pv;
3338 for (; cb; cb--, pb++)
3339 if (RT_UNLIKELY(*pb != u8))
3340 return (void *)pb;
3341 return NULL;
3342}
3343
3344
3345/**
3346 * Checks if a memory block is filled with the specified 32-bit value.
3347 *
3348 * This is a sort of inverted memchr.
3349 *
3350 * @returns Pointer to the first value which doesn't equal u32.
3351 * @returns NULL if all equal to u32.
3352 *
3353 * @param pv Pointer to the memory block.
3354 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3355 * @param u32 The value it's supposed to be filled with.
3356 *
3357 * @todo Fix name, it is a predicate function but it's not returning boolean!
3358 */
3359DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3360{
3361/** @todo rewrite this in inline assembly? */
3362 uint32_t const *pu32 = (uint32_t const *)pv;
3363 for (; cb; cb -= 4, pu32++)
3364 if (RT_UNLIKELY(*pu32 != u32))
3365 return (uint32_t *)pu32;
3366 return NULL;
3367}
3368
3369
3370/**
3371 * Probes a byte pointer for read access.
3372 *
3373 * While the function will not fault if the byte is not read accessible,
3374 * the idea is to do this in a safe place like before acquiring locks
3375 * and such like.
3376 *
3377 * Also, this functions guarantees that an eager compiler is not going
3378 * to optimize the probing away.
3379 *
3380 * @param pvByte Pointer to the byte.
3381 */
3382#if RT_INLINE_ASM_EXTERNAL
3383DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3384#else
3385DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3386{
3387 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3388 uint8_t u8;
3389# if RT_INLINE_ASM_GNU_STYLE
3390 __asm__ __volatile__("movb (%1), %0\n\t"
3391 : "=r" (u8)
3392 : "r" (pvByte));
3393# else
3394 __asm
3395 {
3396# ifdef RT_ARCH_AMD64
3397 mov rax, [pvByte]
3398 mov al, [rax]
3399# else
3400 mov eax, [pvByte]
3401 mov al, [eax]
3402# endif
3403 mov [u8], al
3404 }
3405# endif
3406 return u8;
3407}
3408#endif
3409
3410/**
3411 * Probes a buffer for read access page by page.
3412 *
3413 * While the function will fault if the buffer is not fully read
3414 * accessible, the idea is to do this in a safe place like before
3415 * acquiring locks and such like.
3416 *
3417 * Also, this functions guarantees that an eager compiler is not going
3418 * to optimize the probing away.
3419 *
3420 * @param pvBuf Pointer to the buffer.
3421 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3422 */
3423DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3424{
3425 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3426 /* the first byte */
3427 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3428 ASMProbeReadByte(pu8);
3429
3430 /* the pages in between pages. */
3431 while (cbBuf > RT_ASM_PAGE_SIZE)
3432 {
3433 ASMProbeReadByte(pu8);
3434 cbBuf -= RT_ASM_PAGE_SIZE;
3435 pu8 += RT_ASM_PAGE_SIZE;
3436 }
3437
3438 /* the last byte */
3439 ASMProbeReadByte(pu8 + cbBuf - 1);
3440}
3441
3442
3443
3444/** @defgroup grp_inline_bits Bit Operations
3445 * @{
3446 */
3447
3448
3449/**
3450 * Sets a bit in a bitmap.
3451 *
3452 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
3453 * @param iBit The bit to set.
3454 *
3455 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3456 * However, doing so will yield better performance as well as avoiding
3457 * traps accessing the last bits in the bitmap.
3458 */
3459#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3460DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3461#else
3462DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3463{
3464# if RT_INLINE_ASM_USES_INTRIN
3465 _bittestandset((long *)pvBitmap, iBit);
3466
3467# elif RT_INLINE_ASM_GNU_STYLE
3468 __asm__ __volatile__("btsl %1, %0"
3469 : "=m" (*(volatile long *)pvBitmap)
3470 : "Ir" (iBit),
3471 "m" (*(volatile long *)pvBitmap)
3472 : "memory");
3473# else
3474 __asm
3475 {
3476# ifdef RT_ARCH_AMD64
3477 mov rax, [pvBitmap]
3478 mov edx, [iBit]
3479 bts [rax], edx
3480# else
3481 mov eax, [pvBitmap]
3482 mov edx, [iBit]
3483 bts [eax], edx
3484# endif
3485 }
3486# endif
3487}
3488#endif
3489
3490
3491/**
3492 * Atomically sets a bit in a bitmap, ordered.
3493 *
3494 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3495 * the memory access isn't atomic!
3496 * @param iBit The bit to set.
3497 */
3498#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3499DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3500#else
3501DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3502{
3503 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3504# if RT_INLINE_ASM_USES_INTRIN
3505 _interlockedbittestandset((long *)pvBitmap, iBit);
3506# elif RT_INLINE_ASM_GNU_STYLE
3507 __asm__ __volatile__("lock; btsl %1, %0"
3508 : "=m" (*(volatile long *)pvBitmap)
3509 : "Ir" (iBit),
3510 "m" (*(volatile long *)pvBitmap)
3511 : "memory");
3512# else
3513 __asm
3514 {
3515# ifdef RT_ARCH_AMD64
3516 mov rax, [pvBitmap]
3517 mov edx, [iBit]
3518 lock bts [rax], edx
3519# else
3520 mov eax, [pvBitmap]
3521 mov edx, [iBit]
3522 lock bts [eax], edx
3523# endif
3524 }
3525# endif
3526}
3527#endif
3528
3529
3530/**
3531 * Clears a bit in a bitmap.
3532 *
3533 * @param pvBitmap Pointer to the bitmap.
3534 * @param iBit The bit to clear.
3535 *
3536 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3537 * However, doing so will yield better performance as well as avoiding
3538 * traps accessing the last bits in the bitmap.
3539 */
3540#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3541DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3542#else
3543DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3544{
3545# if RT_INLINE_ASM_USES_INTRIN
3546 _bittestandreset((long *)pvBitmap, iBit);
3547
3548# elif RT_INLINE_ASM_GNU_STYLE
3549 __asm__ __volatile__("btrl %1, %0"
3550 : "=m" (*(volatile long *)pvBitmap)
3551 : "Ir" (iBit),
3552 "m" (*(volatile long *)pvBitmap)
3553 : "memory");
3554# else
3555 __asm
3556 {
3557# ifdef RT_ARCH_AMD64
3558 mov rax, [pvBitmap]
3559 mov edx, [iBit]
3560 btr [rax], edx
3561# else
3562 mov eax, [pvBitmap]
3563 mov edx, [iBit]
3564 btr [eax], edx
3565# endif
3566 }
3567# endif
3568}
3569#endif
3570
3571
3572/**
3573 * Atomically clears a bit in a bitmap, ordered.
3574 *
3575 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3576 * the memory access isn't atomic!
3577 * @param iBit The bit to toggle set.
3578 * @remarks No memory barrier, take care on smp.
3579 */
3580#if RT_INLINE_ASM_EXTERNAL
3581DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3582#else
3583DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3584{
3585 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3586# if RT_INLINE_ASM_GNU_STYLE
3587 __asm__ __volatile__("lock; btrl %1, %0"
3588 : "=m" (*(volatile long *)pvBitmap)
3589 : "Ir" (iBit),
3590 "m" (*(volatile long *)pvBitmap)
3591 : "memory");
3592# else
3593 __asm
3594 {
3595# ifdef RT_ARCH_AMD64
3596 mov rax, [pvBitmap]
3597 mov edx, [iBit]
3598 lock btr [rax], edx
3599# else
3600 mov eax, [pvBitmap]
3601 mov edx, [iBit]
3602 lock btr [eax], edx
3603# endif
3604 }
3605# endif
3606}
3607#endif
3608
3609
3610/**
3611 * Toggles a bit in a bitmap.
3612 *
3613 * @param pvBitmap Pointer to the bitmap.
3614 * @param iBit The bit to toggle.
3615 *
3616 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3617 * However, doing so will yield better performance as well as avoiding
3618 * traps accessing the last bits in the bitmap.
3619 */
3620#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3621DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3622#else
3623DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3624{
3625# if RT_INLINE_ASM_USES_INTRIN
3626 _bittestandcomplement((long *)pvBitmap, iBit);
3627# elif RT_INLINE_ASM_GNU_STYLE
3628 __asm__ __volatile__("btcl %1, %0"
3629 : "=m" (*(volatile long *)pvBitmap)
3630 : "Ir" (iBit),
3631 "m" (*(volatile long *)pvBitmap)
3632 : "memory");
3633# else
3634 __asm
3635 {
3636# ifdef RT_ARCH_AMD64
3637 mov rax, [pvBitmap]
3638 mov edx, [iBit]
3639 btc [rax], edx
3640# else
3641 mov eax, [pvBitmap]
3642 mov edx, [iBit]
3643 btc [eax], edx
3644# endif
3645 }
3646# endif
3647}
3648#endif
3649
3650
3651/**
3652 * Atomically toggles a bit in a bitmap, ordered.
3653 *
3654 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3655 * the memory access isn't atomic!
3656 * @param iBit The bit to test and set.
3657 */
3658#if RT_INLINE_ASM_EXTERNAL
3659DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3660#else
3661DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3662{
3663 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3664# if RT_INLINE_ASM_GNU_STYLE
3665 __asm__ __volatile__("lock; btcl %1, %0"
3666 : "=m" (*(volatile long *)pvBitmap)
3667 : "Ir" (iBit),
3668 "m" (*(volatile long *)pvBitmap)
3669 : "memory");
3670# else
3671 __asm
3672 {
3673# ifdef RT_ARCH_AMD64
3674 mov rax, [pvBitmap]
3675 mov edx, [iBit]
3676 lock btc [rax], edx
3677# else
3678 mov eax, [pvBitmap]
3679 mov edx, [iBit]
3680 lock btc [eax], edx
3681# endif
3682 }
3683# endif
3684}
3685#endif
3686
3687
3688/**
3689 * Tests and sets a bit in a bitmap.
3690 *
3691 * @returns true if the bit was set.
3692 * @returns false if the bit was clear.
3693 *
3694 * @param pvBitmap Pointer to the bitmap.
3695 * @param iBit The bit to test and set.
3696 *
3697 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3698 * However, doing so will yield better performance as well as avoiding
3699 * traps accessing the last bits in the bitmap.
3700 */
3701#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3702DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3703#else
3704DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3705{
3706 union { bool f; uint32_t u32; uint8_t u8; } rc;
3707# if RT_INLINE_ASM_USES_INTRIN
3708 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3709
3710# elif RT_INLINE_ASM_GNU_STYLE
3711 __asm__ __volatile__("btsl %2, %1\n\t"
3712 "setc %b0\n\t"
3713 "andl $1, %0\n\t"
3714 : "=q" (rc.u32),
3715 "=m" (*(volatile long *)pvBitmap)
3716 : "Ir" (iBit),
3717 "m" (*(volatile long *)pvBitmap)
3718 : "memory");
3719# else
3720 __asm
3721 {
3722 mov edx, [iBit]
3723# ifdef RT_ARCH_AMD64
3724 mov rax, [pvBitmap]
3725 bts [rax], edx
3726# else
3727 mov eax, [pvBitmap]
3728 bts [eax], edx
3729# endif
3730 setc al
3731 and eax, 1
3732 mov [rc.u32], eax
3733 }
3734# endif
3735 return rc.f;
3736}
3737#endif
3738
3739
3740/**
3741 * Atomically tests and sets a bit in a bitmap, ordered.
3742 *
3743 * @returns true if the bit was set.
3744 * @returns false if the bit was clear.
3745 *
3746 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3747 * the memory access isn't atomic!
3748 * @param iBit The bit to set.
3749 */
3750#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3751DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3752#else
3753DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3754{
3755 union { bool f; uint32_t u32; uint8_t u8; } rc;
3756 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3757# if RT_INLINE_ASM_USES_INTRIN
3758 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3759# elif RT_INLINE_ASM_GNU_STYLE
3760 __asm__ __volatile__("lock; btsl %2, %1\n\t"
3761 "setc %b0\n\t"
3762 "andl $1, %0\n\t"
3763 : "=q" (rc.u32),
3764 "=m" (*(volatile long *)pvBitmap)
3765 : "Ir" (iBit),
3766 "m" (*(volatile long *)pvBitmap)
3767 : "memory");
3768# else
3769 __asm
3770 {
3771 mov edx, [iBit]
3772# ifdef RT_ARCH_AMD64
3773 mov rax, [pvBitmap]
3774 lock bts [rax], edx
3775# else
3776 mov eax, [pvBitmap]
3777 lock bts [eax], edx
3778# endif
3779 setc al
3780 and eax, 1
3781 mov [rc.u32], eax
3782 }
3783# endif
3784 return rc.f;
3785}
3786#endif
3787
3788
3789/**
3790 * Tests and clears a bit in a bitmap.
3791 *
3792 * @returns true if the bit was set.
3793 * @returns false if the bit was clear.
3794 *
3795 * @param pvBitmap Pointer to the bitmap.
3796 * @param iBit The bit to test and clear.
3797 *
3798 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3799 * However, doing so will yield better performance as well as avoiding
3800 * traps accessing the last bits in the bitmap.
3801 */
3802#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3803DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3804#else
3805DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3806{
3807 union { bool f; uint32_t u32; uint8_t u8; } rc;
3808# if RT_INLINE_ASM_USES_INTRIN
3809 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3810
3811# elif RT_INLINE_ASM_GNU_STYLE
3812 __asm__ __volatile__("btrl %2, %1\n\t"
3813 "setc %b0\n\t"
3814 "andl $1, %0\n\t"
3815 : "=q" (rc.u32),
3816 "=m" (*(volatile long *)pvBitmap)
3817 : "Ir" (iBit),
3818 "m" (*(volatile long *)pvBitmap)
3819 : "memory");
3820# else
3821 __asm
3822 {
3823 mov edx, [iBit]
3824# ifdef RT_ARCH_AMD64
3825 mov rax, [pvBitmap]
3826 btr [rax], edx
3827# else
3828 mov eax, [pvBitmap]
3829 btr [eax], edx
3830# endif
3831 setc al
3832 and eax, 1
3833 mov [rc.u32], eax
3834 }
3835# endif
3836 return rc.f;
3837}
3838#endif
3839
3840
3841/**
3842 * Atomically tests and clears a bit in a bitmap, ordered.
3843 *
3844 * @returns true if the bit was set.
3845 * @returns false if the bit was clear.
3846 *
3847 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3848 * the memory access isn't atomic!
3849 * @param iBit The bit to test and clear.
3850 *
3851 * @remarks No memory barrier, take care on smp.
3852 */
3853#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3854DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3855#else
3856DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3857{
3858 union { bool f; uint32_t u32; uint8_t u8; } rc;
3859 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3860# if RT_INLINE_ASM_USES_INTRIN
3861 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3862
3863# elif RT_INLINE_ASM_GNU_STYLE
3864 __asm__ __volatile__("lock; btrl %2, %1\n\t"
3865 "setc %b0\n\t"
3866 "andl $1, %0\n\t"
3867 : "=q" (rc.u32),
3868 "=m" (*(volatile long *)pvBitmap)
3869 : "Ir" (iBit),
3870 "m" (*(volatile long *)pvBitmap)
3871 : "memory");
3872# else
3873 __asm
3874 {
3875 mov edx, [iBit]
3876# ifdef RT_ARCH_AMD64
3877 mov rax, [pvBitmap]
3878 lock btr [rax], edx
3879# else
3880 mov eax, [pvBitmap]
3881 lock btr [eax], edx
3882# endif
3883 setc al
3884 and eax, 1
3885 mov [rc.u32], eax
3886 }
3887# endif
3888 return rc.f;
3889}
3890#endif
3891
3892
3893/**
3894 * Tests and toggles a bit in a bitmap.
3895 *
3896 * @returns true if the bit was set.
3897 * @returns false if the bit was clear.
3898 *
3899 * @param pvBitmap Pointer to the bitmap.
3900 * @param iBit The bit to test and toggle.
3901 *
3902 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
3903 * However, doing so will yield better performance as well as avoiding
3904 * traps accessing the last bits in the bitmap.
3905 */
3906#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3907DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3908#else
3909DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3910{
3911 union { bool f; uint32_t u32; uint8_t u8; } rc;
3912# if RT_INLINE_ASM_USES_INTRIN
3913 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3914
3915# elif RT_INLINE_ASM_GNU_STYLE
3916 __asm__ __volatile__("btcl %2, %1\n\t"
3917 "setc %b0\n\t"
3918 "andl $1, %0\n\t"
3919 : "=q" (rc.u32),
3920 "=m" (*(volatile long *)pvBitmap)
3921 : "Ir" (iBit),
3922 "m" (*(volatile long *)pvBitmap)
3923 : "memory");
3924# else
3925 __asm
3926 {
3927 mov edx, [iBit]
3928# ifdef RT_ARCH_AMD64
3929 mov rax, [pvBitmap]
3930 btc [rax], edx
3931# else
3932 mov eax, [pvBitmap]
3933 btc [eax], edx
3934# endif
3935 setc al
3936 and eax, 1
3937 mov [rc.u32], eax
3938 }
3939# endif
3940 return rc.f;
3941}
3942#endif
3943
3944
3945/**
3946 * Atomically tests and toggles a bit in a bitmap, ordered.
3947 *
3948 * @returns true if the bit was set.
3949 * @returns false if the bit was clear.
3950 *
3951 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
3952 * the memory access isn't atomic!
3953 * @param iBit The bit to test and toggle.
3954 */
3955#if RT_INLINE_ASM_EXTERNAL
3956DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3957#else
3958DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3959{
3960 union { bool f; uint32_t u32; uint8_t u8; } rc;
3961 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
3962# if RT_INLINE_ASM_GNU_STYLE
3963 __asm__ __volatile__("lock; btcl %2, %1\n\t"
3964 "setc %b0\n\t"
3965 "andl $1, %0\n\t"
3966 : "=q" (rc.u32),
3967 "=m" (*(volatile long *)pvBitmap)
3968 : "Ir" (iBit),
3969 "m" (*(volatile long *)pvBitmap)
3970 : "memory");
3971# else
3972 __asm
3973 {
3974 mov edx, [iBit]
3975# ifdef RT_ARCH_AMD64
3976 mov rax, [pvBitmap]
3977 lock btc [rax], edx
3978# else
3979 mov eax, [pvBitmap]
3980 lock btc [eax], edx
3981# endif
3982 setc al
3983 and eax, 1
3984 mov [rc.u32], eax
3985 }
3986# endif
3987 return rc.f;
3988}
3989#endif
3990
3991
3992/**
3993 * Tests if a bit in a bitmap is set.
3994 *
3995 * @returns true if the bit is set.
3996 * @returns false if the bit is clear.
3997 *
3998 * @param pvBitmap Pointer to the bitmap.
3999 * @param iBit The bit to test.
4000 *
4001 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4002 * However, doing so will yield better performance as well as avoiding
4003 * traps accessing the last bits in the bitmap.
4004 */
4005#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4006DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4007#else
4008DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4009{
4010 union { bool f; uint32_t u32; uint8_t u8; } rc;
4011# if RT_INLINE_ASM_USES_INTRIN
4012 rc.u32 = _bittest((long *)pvBitmap, iBit);
4013# elif RT_INLINE_ASM_GNU_STYLE
4014
4015 __asm__ __volatile__("btl %2, %1\n\t"
4016 "setc %b0\n\t"
4017 "andl $1, %0\n\t"
4018 : "=q" (rc.u32)
4019 : "m" (*(const volatile long *)pvBitmap),
4020 "Ir" (iBit)
4021 : "memory");
4022# else
4023 __asm
4024 {
4025 mov edx, [iBit]
4026# ifdef RT_ARCH_AMD64
4027 mov rax, [pvBitmap]
4028 bt [rax], edx
4029# else
4030 mov eax, [pvBitmap]
4031 bt [eax], edx
4032# endif
4033 setc al
4034 and eax, 1
4035 mov [rc.u32], eax
4036 }
4037# endif
4038 return rc.f;
4039}
4040#endif
4041
4042
4043/**
4044 * Clears a bit range within a bitmap.
4045 *
4046 * @param pvBitmap Pointer to the bitmap.
4047 * @param iBitStart The First bit to clear.
4048 * @param iBitEnd The first bit not to clear.
4049 */
4050DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4051{
4052 if (iBitStart < iBitEnd)
4053 {
4054 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4055 int iStart = iBitStart & ~31;
4056 int iEnd = iBitEnd & ~31;
4057 if (iStart == iEnd)
4058 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4059 else
4060 {
4061 /* bits in first dword. */
4062 if (iBitStart & 31)
4063 {
4064 *pu32 &= (1 << (iBitStart & 31)) - 1;
4065 pu32++;
4066 iBitStart = iStart + 32;
4067 }
4068
4069 /* whole dword. */
4070 if (iBitStart != iEnd)
4071 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4072
4073 /* bits in last dword. */
4074 if (iBitEnd & 31)
4075 {
4076 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4077 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4078 }
4079 }
4080 }
4081}
4082
4083
4084/**
4085 * Sets a bit range within a bitmap.
4086 *
4087 * @param pvBitmap Pointer to the bitmap.
4088 * @param iBitStart The First bit to set.
4089 * @param iBitEnd The first bit not to set.
4090 */
4091DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4092{
4093 if (iBitStart < iBitEnd)
4094 {
4095 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4096 int iStart = iBitStart & ~31;
4097 int iEnd = iBitEnd & ~31;
4098 if (iStart == iEnd)
4099 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
4100 else
4101 {
4102 /* bits in first dword. */
4103 if (iBitStart & 31)
4104 {
4105 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
4106 pu32++;
4107 iBitStart = iStart + 32;
4108 }
4109
4110 /* whole dword. */
4111 if (iBitStart != iEnd)
4112 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4113
4114 /* bits in last dword. */
4115 if (iBitEnd & 31)
4116 {
4117 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4118 *pu32 |= (1 << (iBitEnd & 31)) - 1;
4119 }
4120 }
4121 }
4122}
4123
4124
4125/**
4126 * Finds the first clear bit in a bitmap.
4127 *
4128 * @returns Index of the first zero bit.
4129 * @returns -1 if no clear bit was found.
4130 * @param pvBitmap Pointer to the bitmap.
4131 * @param cBits The number of bits in the bitmap. Multiple of 32.
4132 */
4133#if RT_INLINE_ASM_EXTERNAL
4134DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4135#else
4136DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4137{
4138 if (cBits)
4139 {
4140 int32_t iBit;
4141# if RT_INLINE_ASM_GNU_STYLE
4142 RTCCUINTREG uEAX, uECX, uEDI;
4143 cBits = RT_ALIGN_32(cBits, 32);
4144 __asm__ __volatile__("repe; scasl\n\t"
4145 "je 1f\n\t"
4146# ifdef RT_ARCH_AMD64
4147 "lea -4(%%rdi), %%rdi\n\t"
4148 "xorl (%%rdi), %%eax\n\t"
4149 "subq %5, %%rdi\n\t"
4150# else
4151 "lea -4(%%edi), %%edi\n\t"
4152 "xorl (%%edi), %%eax\n\t"
4153 "subl %5, %%edi\n\t"
4154# endif
4155 "shll $3, %%edi\n\t"
4156 "bsfl %%eax, %%edx\n\t"
4157 "addl %%edi, %%edx\n\t"
4158 "1:\t\n"
4159 : "=d" (iBit),
4160 "=&c" (uECX),
4161 "=&D" (uEDI),
4162 "=&a" (uEAX)
4163 : "0" (0xffffffff),
4164 "mr" (pvBitmap),
4165 "1" (cBits >> 5),
4166 "2" (pvBitmap),
4167 "3" (0xffffffff));
4168# else
4169 cBits = RT_ALIGN_32(cBits, 32);
4170 __asm
4171 {
4172# ifdef RT_ARCH_AMD64
4173 mov rdi, [pvBitmap]
4174 mov rbx, rdi
4175# else
4176 mov edi, [pvBitmap]
4177 mov ebx, edi
4178# endif
4179 mov edx, 0ffffffffh
4180 mov eax, edx
4181 mov ecx, [cBits]
4182 shr ecx, 5
4183 repe scasd
4184 je done
4185
4186# ifdef RT_ARCH_AMD64
4187 lea rdi, [rdi - 4]
4188 xor eax, [rdi]
4189 sub rdi, rbx
4190# else
4191 lea edi, [edi - 4]
4192 xor eax, [edi]
4193 sub edi, ebx
4194# endif
4195 shl edi, 3
4196 bsf edx, eax
4197 add edx, edi
4198 done:
4199 mov [iBit], edx
4200 }
4201# endif
4202 return iBit;
4203 }
4204 return -1;
4205}
4206#endif
4207
4208
4209/**
4210 * Finds the next clear bit in a bitmap.
4211 *
4212 * @returns Index of the first zero bit.
4213 * @returns -1 if no clear bit was found.
4214 * @param pvBitmap Pointer to the bitmap.
4215 * @param cBits The number of bits in the bitmap. Multiple of 32.
4216 * @param iBitPrev The bit returned from the last search.
4217 * The search will start at iBitPrev + 1.
4218 */
4219#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4220DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4221#else
4222DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4223{
4224 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4225 int iBit = ++iBitPrev & 31;
4226 if (iBit)
4227 {
4228 /*
4229 * Inspect the 32-bit word containing the unaligned bit.
4230 */
4231 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4232
4233# if RT_INLINE_ASM_USES_INTRIN
4234 unsigned long ulBit = 0;
4235 if (_BitScanForward(&ulBit, u32))
4236 return ulBit + iBitPrev;
4237# else
4238# if RT_INLINE_ASM_GNU_STYLE
4239 __asm__ __volatile__("bsf %1, %0\n\t"
4240 "jnz 1f\n\t"
4241 "movl $-1, %0\n\t"
4242 "1:\n\t"
4243 : "=r" (iBit)
4244 : "r" (u32));
4245# else
4246 __asm
4247 {
4248 mov edx, [u32]
4249 bsf eax, edx
4250 jnz done
4251 mov eax, 0ffffffffh
4252 done:
4253 mov [iBit], eax
4254 }
4255# endif
4256 if (iBit >= 0)
4257 return iBit + iBitPrev;
4258# endif
4259
4260 /*
4261 * Skip ahead and see if there is anything left to search.
4262 */
4263 iBitPrev |= 31;
4264 iBitPrev++;
4265 if (cBits <= (uint32_t)iBitPrev)
4266 return -1;
4267 }
4268
4269 /*
4270 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4271 */
4272 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4273 if (iBit >= 0)
4274 iBit += iBitPrev;
4275 return iBit;
4276}
4277#endif
4278
4279
4280/**
4281 * Finds the first set bit in a bitmap.
4282 *
4283 * @returns Index of the first set bit.
4284 * @returns -1 if no clear bit was found.
4285 * @param pvBitmap Pointer to the bitmap.
4286 * @param cBits The number of bits in the bitmap. Multiple of 32.
4287 */
4288#if RT_INLINE_ASM_EXTERNAL
4289DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4290#else
4291DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4292{
4293 if (cBits)
4294 {
4295 int32_t iBit;
4296# if RT_INLINE_ASM_GNU_STYLE
4297 RTCCUINTREG uEAX, uECX, uEDI;
4298 cBits = RT_ALIGN_32(cBits, 32);
4299 __asm__ __volatile__("repe; scasl\n\t"
4300 "je 1f\n\t"
4301# ifdef RT_ARCH_AMD64
4302 "lea -4(%%rdi), %%rdi\n\t"
4303 "movl (%%rdi), %%eax\n\t"
4304 "subq %5, %%rdi\n\t"
4305# else
4306 "lea -4(%%edi), %%edi\n\t"
4307 "movl (%%edi), %%eax\n\t"
4308 "subl %5, %%edi\n\t"
4309# endif
4310 "shll $3, %%edi\n\t"
4311 "bsfl %%eax, %%edx\n\t"
4312 "addl %%edi, %%edx\n\t"
4313 "1:\t\n"
4314 : "=d" (iBit),
4315 "=&c" (uECX),
4316 "=&D" (uEDI),
4317 "=&a" (uEAX)
4318 : "0" (0xffffffff),
4319 "mr" (pvBitmap),
4320 "1" (cBits >> 5),
4321 "2" (pvBitmap),
4322 "3" (0));
4323# else
4324 cBits = RT_ALIGN_32(cBits, 32);
4325 __asm
4326 {
4327# ifdef RT_ARCH_AMD64
4328 mov rdi, [pvBitmap]
4329 mov rbx, rdi
4330# else
4331 mov edi, [pvBitmap]
4332 mov ebx, edi
4333# endif
4334 mov edx, 0ffffffffh
4335 xor eax, eax
4336 mov ecx, [cBits]
4337 shr ecx, 5
4338 repe scasd
4339 je done
4340# ifdef RT_ARCH_AMD64
4341 lea rdi, [rdi - 4]
4342 mov eax, [rdi]
4343 sub rdi, rbx
4344# else
4345 lea edi, [edi - 4]
4346 mov eax, [edi]
4347 sub edi, ebx
4348# endif
4349 shl edi, 3
4350 bsf edx, eax
4351 add edx, edi
4352 done:
4353 mov [iBit], edx
4354 }
4355# endif
4356 return iBit;
4357 }
4358 return -1;
4359}
4360#endif
4361
4362
4363/**
4364 * Finds the next set bit in a bitmap.
4365 *
4366 * @returns Index of the next set bit.
4367 * @returns -1 if no set bit was found.
4368 * @param pvBitmap Pointer to the bitmap.
4369 * @param cBits The number of bits in the bitmap. Multiple of 32.
4370 * @param iBitPrev The bit returned from the last search.
4371 * The search will start at iBitPrev + 1.
4372 */
4373#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4374DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4375#else
4376DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4377{
4378 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4379 int iBit = ++iBitPrev & 31;
4380 if (iBit)
4381 {
4382 /*
4383 * Inspect the 32-bit word containing the unaligned bit.
4384 */
4385 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
4386
4387# if RT_INLINE_ASM_USES_INTRIN
4388 unsigned long ulBit = 0;
4389 if (_BitScanForward(&ulBit, u32))
4390 return ulBit + iBitPrev;
4391# else
4392# if RT_INLINE_ASM_GNU_STYLE
4393 __asm__ __volatile__("bsf %1, %0\n\t"
4394 "jnz 1f\n\t"
4395 "movl $-1, %0\n\t"
4396 "1:\n\t"
4397 : "=r" (iBit)
4398 : "r" (u32));
4399# else
4400 __asm
4401 {
4402 mov edx, [u32]
4403 bsf eax, edx
4404 jnz done
4405 mov eax, 0ffffffffh
4406 done:
4407 mov [iBit], eax
4408 }
4409# endif
4410 if (iBit >= 0)
4411 return iBit + iBitPrev;
4412# endif
4413
4414 /*
4415 * Skip ahead and see if there is anything left to search.
4416 */
4417 iBitPrev |= 31;
4418 iBitPrev++;
4419 if (cBits <= (uint32_t)iBitPrev)
4420 return -1;
4421 }
4422
4423 /*
4424 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4425 */
4426 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4427 if (iBit >= 0)
4428 iBit += iBitPrev;
4429 return iBit;
4430}
4431#endif
4432
4433
4434/**
4435 * Finds the first bit which is set in the given 32-bit integer.
4436 * Bits are numbered from 1 (least significant) to 32.
4437 *
4438 * @returns index [1..32] of the first set bit.
4439 * @returns 0 if all bits are cleared.
4440 * @param u32 Integer to search for set bits.
4441 * @remark Similar to ffs() in BSD.
4442 */
4443#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4444DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
4445#else
4446DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4447{
4448# if RT_INLINE_ASM_USES_INTRIN
4449 unsigned long iBit;
4450 if (_BitScanForward(&iBit, u32))
4451 iBit++;
4452 else
4453 iBit = 0;
4454# elif RT_INLINE_ASM_GNU_STYLE
4455 uint32_t iBit;
4456 __asm__ __volatile__("bsf %1, %0\n\t"
4457 "jnz 1f\n\t"
4458 "xorl %0, %0\n\t"
4459 "jmp 2f\n"
4460 "1:\n\t"
4461 "incl %0\n"
4462 "2:\n\t"
4463 : "=r" (iBit)
4464 : "rm" (u32));
4465# else
4466 uint32_t iBit;
4467 _asm
4468 {
4469 bsf eax, [u32]
4470 jnz found
4471 xor eax, eax
4472 jmp done
4473 found:
4474 inc eax
4475 done:
4476 mov [iBit], eax
4477 }
4478# endif
4479 return iBit;
4480}
4481#endif
4482
4483
4484/**
4485 * Finds the first bit which is set in the given 32-bit integer.
4486 * Bits are numbered from 1 (least significant) to 32.
4487 *
4488 * @returns index [1..32] of the first set bit.
4489 * @returns 0 if all bits are cleared.
4490 * @param i32 Integer to search for set bits.
4491 * @remark Similar to ffs() in BSD.
4492 */
4493DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4494{
4495 return ASMBitFirstSetU32((uint32_t)i32);
4496}
4497
4498
4499/**
4500 * Finds the last bit which is set in the given 32-bit integer.
4501 * Bits are numbered from 1 (least significant) to 32.
4502 *
4503 * @returns index [1..32] of the last set bit.
4504 * @returns 0 if all bits are cleared.
4505 * @param u32 Integer to search for set bits.
4506 * @remark Similar to fls() in BSD.
4507 */
4508#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4509DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
4510#else
4511DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4512{
4513# if RT_INLINE_ASM_USES_INTRIN
4514 unsigned long iBit;
4515 if (_BitScanReverse(&iBit, u32))
4516 iBit++;
4517 else
4518 iBit = 0;
4519# elif RT_INLINE_ASM_GNU_STYLE
4520 uint32_t iBit;
4521 __asm__ __volatile__("bsrl %1, %0\n\t"
4522 "jnz 1f\n\t"
4523 "xorl %0, %0\n\t"
4524 "jmp 2f\n"
4525 "1:\n\t"
4526 "incl %0\n"
4527 "2:\n\t"
4528 : "=r" (iBit)
4529 : "rm" (u32));
4530# else
4531 uint32_t iBit;
4532 _asm
4533 {
4534 bsr eax, [u32]
4535 jnz found
4536 xor eax, eax
4537 jmp done
4538 found:
4539 inc eax
4540 done:
4541 mov [iBit], eax
4542 }
4543# endif
4544 return iBit;
4545}
4546#endif
4547
4548
4549/**
4550 * Finds the last bit which is set in the given 32-bit integer.
4551 * Bits are numbered from 1 (least significant) to 32.
4552 *
4553 * @returns index [1..32] of the last set bit.
4554 * @returns 0 if all bits are cleared.
4555 * @param i32 Integer to search for set bits.
4556 * @remark Similar to fls() in BSD.
4557 */
4558DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4559{
4560 return ASMBitLastSetU32((uint32_t)i32);
4561}
4562
4563/**
4564 * Reverse the byte order of the given 16-bit integer.
4565 *
4566 * @returns Revert
4567 * @param u16 16-bit integer value.
4568 */
4569#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4570DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
4571#else
4572DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
4573{
4574# if RT_INLINE_ASM_USES_INTRIN
4575 u16 = _byteswap_ushort(u16);
4576# elif RT_INLINE_ASM_GNU_STYLE
4577 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
4578# else
4579 _asm
4580 {
4581 mov ax, [u16]
4582 ror ax, 8
4583 mov [u16], ax
4584 }
4585# endif
4586 return u16;
4587}
4588#endif
4589
4590
4591/**
4592 * Reverse the byte order of the given 32-bit integer.
4593 *
4594 * @returns Revert
4595 * @param u32 32-bit integer value.
4596 */
4597#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4598DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
4599#else
4600DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4601{
4602# if RT_INLINE_ASM_USES_INTRIN
4603 u32 = _byteswap_ulong(u32);
4604# elif RT_INLINE_ASM_GNU_STYLE
4605 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4606# else
4607 _asm
4608 {
4609 mov eax, [u32]
4610 bswap eax
4611 mov [u32], eax
4612 }
4613# endif
4614 return u32;
4615}
4616#endif
4617
4618
4619/**
4620 * Reverse the byte order of the given 64-bit integer.
4621 *
4622 * @returns Revert
4623 * @param u64 64-bit integer value.
4624 */
4625DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
4626{
4627#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
4628 u64 = _byteswap_uint64(u64);
4629#else
4630 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
4631 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
4632#endif
4633 return u64;
4634}
4635
4636
4637/** @} */
4638
4639
4640/** @} */
4641
4642#endif
4643
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette