VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 60149

Last change on this file since 60149 was 60077, checked in by vboxsync, 9 years ago

introduced RT_GNUC_PREREQ

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 157.4 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2015 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# pragma intrinsic(_rotl)
69# pragma intrinsic(_rotr)
70# pragma intrinsic(_rotl64)
71# pragma intrinsic(_rotr64)
72# ifdef RT_ARCH_AMD64
73# pragma intrinsic(__stosq)
74# pragma intrinsic(_byteswap_uint64)
75# pragma intrinsic(_InterlockedExchange64)
76# pragma intrinsic(_InterlockedExchangeAdd64)
77# pragma intrinsic(_InterlockedAnd64)
78# pragma intrinsic(_InterlockedOr64)
79# pragma intrinsic(_InterlockedIncrement64)
80# pragma intrinsic(_InterlockedDecrement64)
81# endif
82#endif
83
84/*
85 * Include #pragma aux definitions for Watcom C/C++.
86 */
87#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
88# include "asm-watcom-x86-16.h"
89#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
90# include "asm-watcom-x86-32.h"
91#endif
92
93
94
95/** @defgroup grp_rt_asm ASM - Assembly Routines
96 * @ingroup grp_rt
97 *
98 * @remarks The difference between ordered and unordered atomic operations are that
99 * the former will complete outstanding reads and writes before continuing
100 * while the latter doesn't make any promises about the order. Ordered
101 * operations doesn't, it seems, make any 100% promise wrt to whether
102 * the operation will complete before any subsequent memory access.
103 * (please, correct if wrong.)
104 *
105 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
106 * are unordered (note the Uo).
107 *
108 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
109 * or even optimize assembler instructions away. For instance, in the following code
110 * the second rdmsr instruction is optimized away because gcc treats that instruction
111 * as deterministic:
112 *
113 * @code
114 * static inline uint64_t rdmsr_low(int idx)
115 * {
116 * uint32_t low;
117 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
118 * }
119 * ...
120 * uint32_t msr1 = rdmsr_low(1);
121 * foo(msr1);
122 * msr1 = rdmsr_low(1);
123 * bar(msr1);
124 * @endcode
125 *
126 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
127 * use the result of the first call as input parameter for bar() as well. For rdmsr this
128 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
129 * machine status information in general.
130 *
131 * @{
132 */
133
134
135/** @def RT_INLINE_ASM_GCC_4_3_X_X86
136 * Used to work around some 4.3.x register allocation issues in this version of
137 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
138 * definitely not for 5.x */
139#define RT_INLINE_ASM_GCC_4_3_X_X86 (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
140#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
141# define RT_INLINE_ASM_GCC_4_3_X_X86 0
142#endif
143
144/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
145 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
146 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
147 * mode, x86.
148 *
149 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
150 * when in PIC mode on x86.
151 */
152#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
153# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
154# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
155# else
156# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
157 ( (defined(PIC) || defined(__PIC__)) \
158 && defined(RT_ARCH_X86) \
159 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
160 || defined(RT_OS_DARWIN)) )
161# endif
162#endif
163
164
165/** @def ASMReturnAddress
166 * Gets the return address of the current (or calling if you like) function or method.
167 */
168#ifdef _MSC_VER
169# ifdef __cplusplus
170extern "C"
171# endif
172void * _ReturnAddress(void);
173# pragma intrinsic(_ReturnAddress)
174# define ASMReturnAddress() _ReturnAddress()
175#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
176# define ASMReturnAddress() __builtin_return_address(0)
177#elif defined(__WATCOMC__)
178# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
179#else
180# error "Unsupported compiler."
181#endif
182
183
184/**
185 * Compiler memory barrier.
186 *
187 * Ensure that the compiler does not use any cached (register/tmp stack) memory
188 * values or any outstanding writes when returning from this function.
189 *
190 * This function must be used if non-volatile data is modified by a
191 * device or the VMM. Typical cases are port access, MMIO access,
192 * trapping instruction, etc.
193 */
194#if RT_INLINE_ASM_GNU_STYLE
195# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
196#elif RT_INLINE_ASM_USES_INTRIN
197# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
198#elif defined(__WATCOMC__)
199void ASMCompilerBarrier(void);
200#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
201DECLINLINE(void) ASMCompilerBarrier(void)
202{
203 __asm
204 {
205 }
206}
207#endif
208
209
210/** @def ASMBreakpoint
211 * Debugger Breakpoint.
212 * @deprecated Use RT_BREAKPOINT instead.
213 * @internal
214 */
215#define ASMBreakpoint() RT_BREAKPOINT()
216
217
218/**
219 * Spinloop hint for platforms that have these, empty function on the other
220 * platforms.
221 *
222 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
223 * spin locks.
224 */
225#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
226DECLASM(void) ASMNopPause(void);
227#else
228DECLINLINE(void) ASMNopPause(void)
229{
230# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
231# if RT_INLINE_ASM_GNU_STYLE
232 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
233# else
234 __asm {
235 _emit 0f3h
236 _emit 090h
237 }
238# endif
239# else
240 /* dummy */
241# endif
242}
243#endif
244
245
246/**
247 * Atomically Exchange an unsigned 8-bit value, ordered.
248 *
249 * @returns Current *pu8 value
250 * @param pu8 Pointer to the 8-bit variable to update.
251 * @param u8 The 8-bit value to assign to *pu8.
252 */
253#if RT_INLINE_ASM_EXTERNAL
254DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
255#else
256DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
257{
258# if RT_INLINE_ASM_GNU_STYLE
259 __asm__ __volatile__("xchgb %0, %1\n\t"
260 : "=m" (*pu8),
261 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
262 : "1" (u8),
263 "m" (*pu8));
264# else
265 __asm
266 {
267# ifdef RT_ARCH_AMD64
268 mov rdx, [pu8]
269 mov al, [u8]
270 xchg [rdx], al
271 mov [u8], al
272# else
273 mov edx, [pu8]
274 mov al, [u8]
275 xchg [edx], al
276 mov [u8], al
277# endif
278 }
279# endif
280 return u8;
281}
282#endif
283
284
285/**
286 * Atomically Exchange a signed 8-bit value, ordered.
287 *
288 * @returns Current *pu8 value
289 * @param pi8 Pointer to the 8-bit variable to update.
290 * @param i8 The 8-bit value to assign to *pi8.
291 */
292DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
293{
294 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
295}
296
297
298/**
299 * Atomically Exchange a bool value, ordered.
300 *
301 * @returns Current *pf value
302 * @param pf Pointer to the 8-bit variable to update.
303 * @param f The 8-bit value to assign to *pi8.
304 */
305DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
306{
307#ifdef _MSC_VER
308 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
309#else
310 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
311#endif
312}
313
314
315/**
316 * Atomically Exchange an unsigned 16-bit value, ordered.
317 *
318 * @returns Current *pu16 value
319 * @param pu16 Pointer to the 16-bit variable to update.
320 * @param u16 The 16-bit value to assign to *pu16.
321 */
322#if RT_INLINE_ASM_EXTERNAL
323DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
324#else
325DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
326{
327# if RT_INLINE_ASM_GNU_STYLE
328 __asm__ __volatile__("xchgw %0, %1\n\t"
329 : "=m" (*pu16),
330 "=r" (u16)
331 : "1" (u16),
332 "m" (*pu16));
333# else
334 __asm
335 {
336# ifdef RT_ARCH_AMD64
337 mov rdx, [pu16]
338 mov ax, [u16]
339 xchg [rdx], ax
340 mov [u16], ax
341# else
342 mov edx, [pu16]
343 mov ax, [u16]
344 xchg [edx], ax
345 mov [u16], ax
346# endif
347 }
348# endif
349 return u16;
350}
351#endif
352
353
354/**
355 * Atomically Exchange a signed 16-bit value, ordered.
356 *
357 * @returns Current *pu16 value
358 * @param pi16 Pointer to the 16-bit variable to update.
359 * @param i16 The 16-bit value to assign to *pi16.
360 */
361DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
362{
363 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
364}
365
366
367/**
368 * Atomically Exchange an unsigned 32-bit value, ordered.
369 *
370 * @returns Current *pu32 value
371 * @param pu32 Pointer to the 32-bit variable to update.
372 * @param u32 The 32-bit value to assign to *pu32.
373 *
374 * @remarks Does not work on 286 and earlier.
375 */
376#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
377DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
378#else
379DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
380{
381# if RT_INLINE_ASM_GNU_STYLE
382 __asm__ __volatile__("xchgl %0, %1\n\t"
383 : "=m" (*pu32),
384 "=r" (u32)
385 : "1" (u32),
386 "m" (*pu32));
387
388# elif RT_INLINE_ASM_USES_INTRIN
389 u32 = _InterlockedExchange((long *)pu32, u32);
390
391# else
392 __asm
393 {
394# ifdef RT_ARCH_AMD64
395 mov rdx, [pu32]
396 mov eax, u32
397 xchg [rdx], eax
398 mov [u32], eax
399# else
400 mov edx, [pu32]
401 mov eax, u32
402 xchg [edx], eax
403 mov [u32], eax
404# endif
405 }
406# endif
407 return u32;
408}
409#endif
410
411
412/**
413 * Atomically Exchange a signed 32-bit value, ordered.
414 *
415 * @returns Current *pu32 value
416 * @param pi32 Pointer to the 32-bit variable to update.
417 * @param i32 The 32-bit value to assign to *pi32.
418 */
419DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
420{
421 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
422}
423
424
425/**
426 * Atomically Exchange an unsigned 64-bit value, ordered.
427 *
428 * @returns Current *pu64 value
429 * @param pu64 Pointer to the 64-bit variable to update.
430 * @param u64 The 64-bit value to assign to *pu64.
431 *
432 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
433 */
434#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
435 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
436DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
437#else
438DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
439{
440# if defined(RT_ARCH_AMD64)
441# if RT_INLINE_ASM_USES_INTRIN
442 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
443
444# elif RT_INLINE_ASM_GNU_STYLE
445 __asm__ __volatile__("xchgq %0, %1\n\t"
446 : "=m" (*pu64),
447 "=r" (u64)
448 : "1" (u64),
449 "m" (*pu64));
450# else
451 __asm
452 {
453 mov rdx, [pu64]
454 mov rax, [u64]
455 xchg [rdx], rax
456 mov [u64], rax
457 }
458# endif
459# else /* !RT_ARCH_AMD64 */
460# if RT_INLINE_ASM_GNU_STYLE
461# if defined(PIC) || defined(__PIC__)
462 uint32_t u32EBX = (uint32_t)u64;
463 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
464 "xchgl %%ebx, %3\n\t"
465 "1:\n\t"
466 "lock; cmpxchg8b (%5)\n\t"
467 "jnz 1b\n\t"
468 "movl %3, %%ebx\n\t"
469 /*"xchgl %%esi, %5\n\t"*/
470 : "=A" (u64),
471 "=m" (*pu64)
472 : "0" (*pu64),
473 "m" ( u32EBX ),
474 "c" ( (uint32_t)(u64 >> 32) ),
475 "S" (pu64));
476# else /* !PIC */
477 __asm__ __volatile__("1:\n\t"
478 "lock; cmpxchg8b %1\n\t"
479 "jnz 1b\n\t"
480 : "=A" (u64),
481 "=m" (*pu64)
482 : "0" (*pu64),
483 "b" ( (uint32_t)u64 ),
484 "c" ( (uint32_t)(u64 >> 32) ));
485# endif
486# else
487 __asm
488 {
489 mov ebx, dword ptr [u64]
490 mov ecx, dword ptr [u64 + 4]
491 mov edi, pu64
492 mov eax, dword ptr [edi]
493 mov edx, dword ptr [edi + 4]
494 retry:
495 lock cmpxchg8b [edi]
496 jnz retry
497 mov dword ptr [u64], eax
498 mov dword ptr [u64 + 4], edx
499 }
500# endif
501# endif /* !RT_ARCH_AMD64 */
502 return u64;
503}
504#endif
505
506
507/**
508 * Atomically Exchange an signed 64-bit value, ordered.
509 *
510 * @returns Current *pi64 value
511 * @param pi64 Pointer to the 64-bit variable to update.
512 * @param i64 The 64-bit value to assign to *pi64.
513 */
514DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
515{
516 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
517}
518
519
520/**
521 * Atomically Exchange a pointer value, ordered.
522 *
523 * @returns Current *ppv value
524 * @param ppv Pointer to the pointer variable to update.
525 * @param pv The pointer value to assign to *ppv.
526 */
527DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
528{
529#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
530 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
531#elif ARCH_BITS == 64
532 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
533#else
534# error "ARCH_BITS is bogus"
535#endif
536}
537
538
539/**
540 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
541 *
542 * @returns Current *pv value
543 * @param ppv Pointer to the pointer variable to update.
544 * @param pv The pointer value to assign to *ppv.
545 * @param Type The type of *ppv, sans volatile.
546 */
547#ifdef __GNUC__
548# define ASMAtomicXchgPtrT(ppv, pv, Type) \
549 __extension__ \
550 ({\
551 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
552 Type const pvTypeChecked = (pv); \
553 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
554 pvTypeCheckedRet; \
555 })
556#else
557# define ASMAtomicXchgPtrT(ppv, pv, Type) \
558 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
559#endif
560
561
562/**
563 * Atomically Exchange a raw-mode context pointer value, ordered.
564 *
565 * @returns Current *ppv value
566 * @param ppvRC Pointer to the pointer variable to update.
567 * @param pvRC The pointer value to assign to *ppv.
568 */
569DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
570{
571 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
572}
573
574
575/**
576 * Atomically Exchange a ring-0 pointer value, ordered.
577 *
578 * @returns Current *ppv value
579 * @param ppvR0 Pointer to the pointer variable to update.
580 * @param pvR0 The pointer value to assign to *ppv.
581 */
582DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
583{
584#if R0_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
585 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
586#elif R0_ARCH_BITS == 64
587 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
588#else
589# error "R0_ARCH_BITS is bogus"
590#endif
591}
592
593
594/**
595 * Atomically Exchange a ring-3 pointer value, ordered.
596 *
597 * @returns Current *ppv value
598 * @param ppvR3 Pointer to the pointer variable to update.
599 * @param pvR3 The pointer value to assign to *ppv.
600 */
601DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
602{
603#if R3_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
604 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
605#elif R3_ARCH_BITS == 64
606 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
607#else
608# error "R3_ARCH_BITS is bogus"
609#endif
610}
611
612
613/** @def ASMAtomicXchgHandle
614 * Atomically Exchange a typical IPRT handle value, ordered.
615 *
616 * @param ph Pointer to the value to update.
617 * @param hNew The new value to assigned to *pu.
618 * @param phRes Where to store the current *ph value.
619 *
620 * @remarks This doesn't currently work for all handles (like RTFILE).
621 */
622#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
623# define ASMAtomicXchgHandle(ph, hNew, phRes) \
624 do { \
625 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
626 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
627 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
628 } while (0)
629#elif HC_ARCH_BITS == 64
630# define ASMAtomicXchgHandle(ph, hNew, phRes) \
631 do { \
632 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
633 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
634 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
635 } while (0)
636#else
637# error HC_ARCH_BITS
638#endif
639
640
641/**
642 * Atomically Exchange a value which size might differ
643 * between platforms or compilers, ordered.
644 *
645 * @param pu Pointer to the variable to update.
646 * @param uNew The value to assign to *pu.
647 * @todo This is busted as its missing the result argument.
648 */
649#define ASMAtomicXchgSize(pu, uNew) \
650 do { \
651 switch (sizeof(*(pu))) { \
652 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
653 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
654 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
655 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
656 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
657 } \
658 } while (0)
659
660/**
661 * Atomically Exchange a value which size might differ
662 * between platforms or compilers, ordered.
663 *
664 * @param pu Pointer to the variable to update.
665 * @param uNew The value to assign to *pu.
666 * @param puRes Where to store the current *pu value.
667 */
668#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
669 do { \
670 switch (sizeof(*(pu))) { \
671 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
672 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
673 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
674 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
675 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
676 } \
677 } while (0)
678
679
680
681/**
682 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
683 *
684 * @returns true if xchg was done.
685 * @returns false if xchg wasn't done.
686 *
687 * @param pu8 Pointer to the value to update.
688 * @param u8New The new value to assigned to *pu8.
689 * @param u8Old The old value to *pu8 compare with.
690 *
691 * @remarks x86: Requires a 486 or later.
692 */
693#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
694DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
695#else
696DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
697{
698 uint8_t u8Ret;
699 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
700 "setz %1\n\t"
701 : "=m" (*pu8),
702 "=qm" (u8Ret),
703 "=a" (u8Old)
704 : "q" (u8New),
705 "2" (u8Old),
706 "m" (*pu8));
707 return (bool)u8Ret;
708}
709#endif
710
711
712/**
713 * Atomically Compare and Exchange a signed 8-bit value, ordered.
714 *
715 * @returns true if xchg was done.
716 * @returns false if xchg wasn't done.
717 *
718 * @param pi8 Pointer to the value to update.
719 * @param i8New The new value to assigned to *pi8.
720 * @param i8Old The old value to *pi8 compare with.
721 *
722 * @remarks x86: Requires a 486 or later.
723 */
724DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
725{
726 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
727}
728
729
730/**
731 * Atomically Compare and Exchange a bool value, ordered.
732 *
733 * @returns true if xchg was done.
734 * @returns false if xchg wasn't done.
735 *
736 * @param pf Pointer to the value to update.
737 * @param fNew The new value to assigned to *pf.
738 * @param fOld The old value to *pf compare with.
739 *
740 * @remarks x86: Requires a 486 or later.
741 */
742DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
743{
744 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
745}
746
747
748/**
749 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
750 *
751 * @returns true if xchg was done.
752 * @returns false if xchg wasn't done.
753 *
754 * @param pu32 Pointer to the value to update.
755 * @param u32New The new value to assigned to *pu32.
756 * @param u32Old The old value to *pu32 compare with.
757 *
758 * @remarks x86: Requires a 486 or later.
759 */
760#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
761DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
762#else
763DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
764{
765# if RT_INLINE_ASM_GNU_STYLE
766 uint8_t u8Ret;
767 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
768 "setz %1\n\t"
769 : "=m" (*pu32),
770 "=qm" (u8Ret),
771 "=a" (u32Old)
772 : "r" (u32New),
773 "2" (u32Old),
774 "m" (*pu32));
775 return (bool)u8Ret;
776
777# elif RT_INLINE_ASM_USES_INTRIN
778 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
779
780# else
781 uint32_t u32Ret;
782 __asm
783 {
784# ifdef RT_ARCH_AMD64
785 mov rdx, [pu32]
786# else
787 mov edx, [pu32]
788# endif
789 mov eax, [u32Old]
790 mov ecx, [u32New]
791# ifdef RT_ARCH_AMD64
792 lock cmpxchg [rdx], ecx
793# else
794 lock cmpxchg [edx], ecx
795# endif
796 setz al
797 movzx eax, al
798 mov [u32Ret], eax
799 }
800 return !!u32Ret;
801# endif
802}
803#endif
804
805
806/**
807 * Atomically Compare and Exchange a signed 32-bit value, ordered.
808 *
809 * @returns true if xchg was done.
810 * @returns false if xchg wasn't done.
811 *
812 * @param pi32 Pointer to the value to update.
813 * @param i32New The new value to assigned to *pi32.
814 * @param i32Old The old value to *pi32 compare with.
815 *
816 * @remarks x86: Requires a 486 or later.
817 */
818DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
819{
820 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
821}
822
823
824/**
825 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
826 *
827 * @returns true if xchg was done.
828 * @returns false if xchg wasn't done.
829 *
830 * @param pu64 Pointer to the 64-bit variable to update.
831 * @param u64New The 64-bit value to assign to *pu64.
832 * @param u64Old The value to compare with.
833 *
834 * @remarks x86: Requires a Pentium or later.
835 */
836#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
837 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
838DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
839#else
840DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
841{
842# if RT_INLINE_ASM_USES_INTRIN
843 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
844
845# elif defined(RT_ARCH_AMD64)
846# if RT_INLINE_ASM_GNU_STYLE
847 uint8_t u8Ret;
848 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
849 "setz %1\n\t"
850 : "=m" (*pu64),
851 "=qm" (u8Ret),
852 "=a" (u64Old)
853 : "r" (u64New),
854 "2" (u64Old),
855 "m" (*pu64));
856 return (bool)u8Ret;
857# else
858 bool fRet;
859 __asm
860 {
861 mov rdx, [pu32]
862 mov rax, [u64Old]
863 mov rcx, [u64New]
864 lock cmpxchg [rdx], rcx
865 setz al
866 mov [fRet], al
867 }
868 return fRet;
869# endif
870# else /* !RT_ARCH_AMD64 */
871 uint32_t u32Ret;
872# if RT_INLINE_ASM_GNU_STYLE
873# if defined(PIC) || defined(__PIC__)
874 uint32_t u32EBX = (uint32_t)u64New;
875 uint32_t u32Spill;
876 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
877 "lock; cmpxchg8b (%6)\n\t"
878 "setz %%al\n\t"
879 "movl %4, %%ebx\n\t"
880 "movzbl %%al, %%eax\n\t"
881 : "=a" (u32Ret),
882 "=d" (u32Spill),
883# if RT_GNUC_PREREQ(4, 3)
884 "+m" (*pu64)
885# else
886 "=m" (*pu64)
887# endif
888 : "A" (u64Old),
889 "m" ( u32EBX ),
890 "c" ( (uint32_t)(u64New >> 32) ),
891 "S" (pu64));
892# else /* !PIC */
893 uint32_t u32Spill;
894 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
895 "setz %%al\n\t"
896 "movzbl %%al, %%eax\n\t"
897 : "=a" (u32Ret),
898 "=d" (u32Spill),
899 "+m" (*pu64)
900 : "A" (u64Old),
901 "b" ( (uint32_t)u64New ),
902 "c" ( (uint32_t)(u64New >> 32) ));
903# endif
904 return (bool)u32Ret;
905# else
906 __asm
907 {
908 mov ebx, dword ptr [u64New]
909 mov ecx, dword ptr [u64New + 4]
910 mov edi, [pu64]
911 mov eax, dword ptr [u64Old]
912 mov edx, dword ptr [u64Old + 4]
913 lock cmpxchg8b [edi]
914 setz al
915 movzx eax, al
916 mov dword ptr [u32Ret], eax
917 }
918 return !!u32Ret;
919# endif
920# endif /* !RT_ARCH_AMD64 */
921}
922#endif
923
924
925/**
926 * Atomically Compare and exchange a signed 64-bit value, ordered.
927 *
928 * @returns true if xchg was done.
929 * @returns false if xchg wasn't done.
930 *
931 * @param pi64 Pointer to the 64-bit variable to update.
932 * @param i64 The 64-bit value to assign to *pu64.
933 * @param i64Old The value to compare with.
934 *
935 * @remarks x86: Requires a Pentium or later.
936 */
937DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
938{
939 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
940}
941
942
943/**
944 * Atomically Compare and Exchange a pointer value, ordered.
945 *
946 * @returns true if xchg was done.
947 * @returns false if xchg wasn't done.
948 *
949 * @param ppv Pointer to the value to update.
950 * @param pvNew The new value to assigned to *ppv.
951 * @param pvOld The old value to *ppv compare with.
952 *
953 * @remarks x86: Requires a 486 or later.
954 */
955DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
956{
957#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
958 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
959#elif ARCH_BITS == 64
960 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
961#else
962# error "ARCH_BITS is bogus"
963#endif
964}
965
966
967/**
968 * Atomically Compare and Exchange a pointer value, ordered.
969 *
970 * @returns true if xchg was done.
971 * @returns false if xchg wasn't done.
972 *
973 * @param ppv Pointer to the value to update.
974 * @param pvNew The new value to assigned to *ppv.
975 * @param pvOld The old value to *ppv compare with.
976 *
977 * @remarks This is relatively type safe on GCC platforms.
978 * @remarks x86: Requires a 486 or later.
979 */
980#ifdef __GNUC__
981# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
982 __extension__ \
983 ({\
984 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
985 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
986 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
987 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
988 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
989 fMacroRet; \
990 })
991#else
992# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
993 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
994#endif
995
996
997/** @def ASMAtomicCmpXchgHandle
998 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
999 *
1000 * @param ph Pointer to the value to update.
1001 * @param hNew The new value to assigned to *pu.
1002 * @param hOld The old value to *pu compare with.
1003 * @param fRc Where to store the result.
1004 *
1005 * @remarks This doesn't currently work for all handles (like RTFILE).
1006 * @remarks x86: Requires a 486 or later.
1007 */
1008#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1009# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1010 do { \
1011 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1012 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1013 } while (0)
1014#elif HC_ARCH_BITS == 64
1015# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1016 do { \
1017 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1018 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1019 } while (0)
1020#else
1021# error HC_ARCH_BITS
1022#endif
1023
1024
1025/** @def ASMAtomicCmpXchgSize
1026 * Atomically Compare and Exchange a value which size might differ
1027 * between platforms or compilers, ordered.
1028 *
1029 * @param pu Pointer to the value to update.
1030 * @param uNew The new value to assigned to *pu.
1031 * @param uOld The old value to *pu compare with.
1032 * @param fRc Where to store the result.
1033 *
1034 * @remarks x86: Requires a 486 or later.
1035 */
1036#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1037 do { \
1038 switch (sizeof(*(pu))) { \
1039 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1040 break; \
1041 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1042 break; \
1043 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1044 (fRc) = false; \
1045 break; \
1046 } \
1047 } while (0)
1048
1049
1050/**
1051 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1052 * passes back old value, ordered.
1053 *
1054 * @returns true if xchg was done.
1055 * @returns false if xchg wasn't done.
1056 *
1057 * @param pu32 Pointer to the value to update.
1058 * @param u32New The new value to assigned to *pu32.
1059 * @param u32Old The old value to *pu32 compare with.
1060 * @param pu32Old Pointer store the old value at.
1061 *
1062 * @remarks x86: Requires a 486 or later.
1063 */
1064#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1065DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1066#else
1067DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1068{
1069# if RT_INLINE_ASM_GNU_STYLE
1070 uint8_t u8Ret;
1071 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1072 "setz %1\n\t"
1073 : "=m" (*pu32),
1074 "=qm" (u8Ret),
1075 "=a" (*pu32Old)
1076 : "r" (u32New),
1077 "a" (u32Old),
1078 "m" (*pu32));
1079 return (bool)u8Ret;
1080
1081# elif RT_INLINE_ASM_USES_INTRIN
1082 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1083
1084# else
1085 uint32_t u32Ret;
1086 __asm
1087 {
1088# ifdef RT_ARCH_AMD64
1089 mov rdx, [pu32]
1090# else
1091 mov edx, [pu32]
1092# endif
1093 mov eax, [u32Old]
1094 mov ecx, [u32New]
1095# ifdef RT_ARCH_AMD64
1096 lock cmpxchg [rdx], ecx
1097 mov rdx, [pu32Old]
1098 mov [rdx], eax
1099# else
1100 lock cmpxchg [edx], ecx
1101 mov edx, [pu32Old]
1102 mov [edx], eax
1103# endif
1104 setz al
1105 movzx eax, al
1106 mov [u32Ret], eax
1107 }
1108 return !!u32Ret;
1109# endif
1110}
1111#endif
1112
1113
1114/**
1115 * Atomically Compare and Exchange a signed 32-bit value, additionally
1116 * passes back old value, ordered.
1117 *
1118 * @returns true if xchg was done.
1119 * @returns false if xchg wasn't done.
1120 *
1121 * @param pi32 Pointer to the value to update.
1122 * @param i32New The new value to assigned to *pi32.
1123 * @param i32Old The old value to *pi32 compare with.
1124 * @param pi32Old Pointer store the old value at.
1125 *
1126 * @remarks x86: Requires a 486 or later.
1127 */
1128DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1129{
1130 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1131}
1132
1133
1134/**
1135 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1136 * passing back old value, ordered.
1137 *
1138 * @returns true if xchg was done.
1139 * @returns false if xchg wasn't done.
1140 *
1141 * @param pu64 Pointer to the 64-bit variable to update.
1142 * @param u64New The 64-bit value to assign to *pu64.
1143 * @param u64Old The value to compare with.
1144 * @param pu64Old Pointer store the old value at.
1145 *
1146 * @remarks x86: Requires a Pentium or later.
1147 */
1148#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1149 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1150DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1151#else
1152DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1153{
1154# if RT_INLINE_ASM_USES_INTRIN
1155 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1156
1157# elif defined(RT_ARCH_AMD64)
1158# if RT_INLINE_ASM_GNU_STYLE
1159 uint8_t u8Ret;
1160 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1161 "setz %1\n\t"
1162 : "=m" (*pu64),
1163 "=qm" (u8Ret),
1164 "=a" (*pu64Old)
1165 : "r" (u64New),
1166 "a" (u64Old),
1167 "m" (*pu64));
1168 return (bool)u8Ret;
1169# else
1170 bool fRet;
1171 __asm
1172 {
1173 mov rdx, [pu32]
1174 mov rax, [u64Old]
1175 mov rcx, [u64New]
1176 lock cmpxchg [rdx], rcx
1177 mov rdx, [pu64Old]
1178 mov [rdx], rax
1179 setz al
1180 mov [fRet], al
1181 }
1182 return fRet;
1183# endif
1184# else /* !RT_ARCH_AMD64 */
1185# if RT_INLINE_ASM_GNU_STYLE
1186 uint64_t u64Ret;
1187# if defined(PIC) || defined(__PIC__)
1188 /* NB: this code uses a memory clobber description, because the clean
1189 * solution with an output value for *pu64 makes gcc run out of registers.
1190 * This will cause suboptimal code, and anyone with a better solution is
1191 * welcome to improve this. */
1192 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1193 "lock; cmpxchg8b %3\n\t"
1194 "xchgl %%ebx, %1\n\t"
1195 : "=A" (u64Ret)
1196 : "DS" ((uint32_t)u64New),
1197 "c" ((uint32_t)(u64New >> 32)),
1198 "m" (*pu64),
1199 "0" (u64Old)
1200 : "memory" );
1201# else /* !PIC */
1202 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1203 : "=A" (u64Ret),
1204 "=m" (*pu64)
1205 : "b" ((uint32_t)u64New),
1206 "c" ((uint32_t)(u64New >> 32)),
1207 "m" (*pu64),
1208 "0" (u64Old));
1209# endif
1210 *pu64Old = u64Ret;
1211 return u64Ret == u64Old;
1212# else
1213 uint32_t u32Ret;
1214 __asm
1215 {
1216 mov ebx, dword ptr [u64New]
1217 mov ecx, dword ptr [u64New + 4]
1218 mov edi, [pu64]
1219 mov eax, dword ptr [u64Old]
1220 mov edx, dword ptr [u64Old + 4]
1221 lock cmpxchg8b [edi]
1222 mov ebx, [pu64Old]
1223 mov [ebx], eax
1224 setz al
1225 movzx eax, al
1226 add ebx, 4
1227 mov [ebx], edx
1228 mov dword ptr [u32Ret], eax
1229 }
1230 return !!u32Ret;
1231# endif
1232# endif /* !RT_ARCH_AMD64 */
1233}
1234#endif
1235
1236
1237/**
1238 * Atomically Compare and exchange a signed 64-bit value, additionally
1239 * passing back old value, ordered.
1240 *
1241 * @returns true if xchg was done.
1242 * @returns false if xchg wasn't done.
1243 *
1244 * @param pi64 Pointer to the 64-bit variable to update.
1245 * @param i64 The 64-bit value to assign to *pu64.
1246 * @param i64Old The value to compare with.
1247 * @param pi64Old Pointer store the old value at.
1248 *
1249 * @remarks x86: Requires a Pentium or later.
1250 */
1251DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1252{
1253 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1254}
1255
1256/** @def ASMAtomicCmpXchgExHandle
1257 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1258 *
1259 * @param ph Pointer to the value to update.
1260 * @param hNew The new value to assigned to *pu.
1261 * @param hOld The old value to *pu compare with.
1262 * @param fRc Where to store the result.
1263 * @param phOldVal Pointer to where to store the old value.
1264 *
1265 * @remarks This doesn't currently work for all handles (like RTFILE).
1266 */
1267#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1268# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1269 do { \
1270 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1271 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1272 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1273 } while (0)
1274#elif HC_ARCH_BITS == 64
1275# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1276 do { \
1277 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1278 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1279 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1280 } while (0)
1281#else
1282# error HC_ARCH_BITS
1283#endif
1284
1285
1286/** @def ASMAtomicCmpXchgExSize
1287 * Atomically Compare and Exchange a value which size might differ
1288 * between platforms or compilers. Additionally passes back old value.
1289 *
1290 * @param pu Pointer to the value to update.
1291 * @param uNew The new value to assigned to *pu.
1292 * @param uOld The old value to *pu compare with.
1293 * @param fRc Where to store the result.
1294 * @param puOldVal Pointer to where to store the old value.
1295 *
1296 * @remarks x86: Requires a 486 or later.
1297 */
1298#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1299 do { \
1300 switch (sizeof(*(pu))) { \
1301 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1302 break; \
1303 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1304 break; \
1305 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1306 (fRc) = false; \
1307 (uOldVal) = 0; \
1308 break; \
1309 } \
1310 } while (0)
1311
1312
1313/**
1314 * Atomically Compare and Exchange a pointer value, additionally
1315 * passing back old value, ordered.
1316 *
1317 * @returns true if xchg was done.
1318 * @returns false if xchg wasn't done.
1319 *
1320 * @param ppv Pointer to the value to update.
1321 * @param pvNew The new value to assigned to *ppv.
1322 * @param pvOld The old value to *ppv compare with.
1323 * @param ppvOld Pointer store the old value at.
1324 *
1325 * @remarks x86: Requires a 486 or later.
1326 */
1327DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1328{
1329#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1330 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1331#elif ARCH_BITS == 64
1332 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1333#else
1334# error "ARCH_BITS is bogus"
1335#endif
1336}
1337
1338
1339/**
1340 * Atomically Compare and Exchange a pointer value, additionally
1341 * passing back old value, ordered.
1342 *
1343 * @returns true if xchg was done.
1344 * @returns false if xchg wasn't done.
1345 *
1346 * @param ppv Pointer to the value to update.
1347 * @param pvNew The new value to assigned to *ppv.
1348 * @param pvOld The old value to *ppv compare with.
1349 * @param ppvOld Pointer store the old value at.
1350 *
1351 * @remarks This is relatively type safe on GCC platforms.
1352 * @remarks x86: Requires a 486 or later.
1353 */
1354#ifdef __GNUC__
1355# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1356 __extension__ \
1357 ({\
1358 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1359 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1360 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1361 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1362 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1363 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1364 (void **)ppvOldTypeChecked); \
1365 fMacroRet; \
1366 })
1367#else
1368# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1369 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1370#endif
1371
1372
1373/**
1374 * Virtualization unfriendly serializing instruction, always exits.
1375 */
1376#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1377DECLASM(void) ASMSerializeInstructionCpuId(void);
1378#else
1379DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1380{
1381# if RT_INLINE_ASM_GNU_STYLE
1382 RTCCUINTREG xAX = 0;
1383# ifdef RT_ARCH_AMD64
1384 __asm__ __volatile__ ("cpuid"
1385 : "=a" (xAX)
1386 : "0" (xAX)
1387 : "rbx", "rcx", "rdx", "memory");
1388# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1389 __asm__ __volatile__ ("push %%ebx\n\t"
1390 "cpuid\n\t"
1391 "pop %%ebx\n\t"
1392 : "=a" (xAX)
1393 : "0" (xAX)
1394 : "ecx", "edx", "memory");
1395# else
1396 __asm__ __volatile__ ("cpuid"
1397 : "=a" (xAX)
1398 : "0" (xAX)
1399 : "ebx", "ecx", "edx", "memory");
1400# endif
1401
1402# elif RT_INLINE_ASM_USES_INTRIN
1403 int aInfo[4];
1404 _ReadWriteBarrier();
1405 __cpuid(aInfo, 0);
1406
1407# else
1408 __asm
1409 {
1410 push ebx
1411 xor eax, eax
1412 cpuid
1413 pop ebx
1414 }
1415# endif
1416}
1417#endif
1418
1419/**
1420 * Virtualization friendly serializing instruction, though more expensive.
1421 */
1422#if RT_INLINE_ASM_EXTERNAL
1423DECLASM(void) ASMSerializeInstructionIRet(void);
1424#else
1425DECLINLINE(void) ASMSerializeInstructionIRet(void)
1426{
1427# if RT_INLINE_ASM_GNU_STYLE
1428# ifdef RT_ARCH_AMD64
1429 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1430 "subq $128, %%rsp\n\t" /*redzone*/
1431 "mov %%ss, %%eax\n\t"
1432 "pushq %%rax\n\t"
1433 "pushq %%r10\n\t"
1434 "pushfq\n\t"
1435 "movl %%cs, %%eax\n\t"
1436 "pushq %%rax\n\t"
1437 "leaq 1f(%%rip), %%rax\n\t"
1438 "pushq %%rax\n\t"
1439 "iretq\n\t"
1440 "1:\n\t"
1441 ::: "rax", "r10", "memory");
1442# else
1443 __asm__ __volatile__ ("pushfl\n\t"
1444 "pushl %%cs\n\t"
1445 "pushl $1f\n\t"
1446 "iretl\n\t"
1447 "1:\n\t"
1448 ::: "memory");
1449# endif
1450
1451# else
1452 __asm
1453 {
1454 pushfd
1455 push cs
1456 push la_ret
1457 iretd
1458 la_ret:
1459 }
1460# endif
1461}
1462#endif
1463
1464/**
1465 * Virtualization friendlier serializing instruction, may still cause exits.
1466 */
1467#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1468DECLASM(void) ASMSerializeInstructionRdTscp(void);
1469#else
1470DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1471{
1472# if RT_INLINE_ASM_GNU_STYLE
1473 /* rdtscp is not supported by ancient linux build VM of course :-( */
1474# ifdef RT_ARCH_AMD64
1475 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1476 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1477# else
1478 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1479 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1480# endif
1481# else
1482# if RT_INLINE_ASM_USES_INTRIN >= 15
1483 uint32_t uIgnore;
1484 _ReadWriteBarrier();
1485 (void)__rdtscp(&uIgnore);
1486 (void)uIgnore;
1487# else
1488 __asm
1489 {
1490 rdtscp
1491 }
1492# endif
1493# endif
1494}
1495#endif
1496
1497
1498/**
1499 * Serialize Instruction.
1500 */
1501#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1502# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1503#else
1504# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1505#endif
1506
1507
1508/**
1509 * Memory fence, waits for any pending writes and reads to complete.
1510 */
1511DECLINLINE(void) ASMMemoryFence(void)
1512{
1513 /** @todo use mfence? check if all cpus we care for support it. */
1514 uint32_t volatile u32;
1515 ASMAtomicXchgU32(&u32, 0);
1516}
1517
1518
1519/**
1520 * Write fence, waits for any pending writes to complete.
1521 */
1522DECLINLINE(void) ASMWriteFence(void)
1523{
1524 /** @todo use sfence? check if all cpus we care for support it. */
1525 ASMMemoryFence();
1526}
1527
1528
1529/**
1530 * Read fence, waits for any pending reads to complete.
1531 */
1532DECLINLINE(void) ASMReadFence(void)
1533{
1534 /** @todo use lfence? check if all cpus we care for support it. */
1535 ASMMemoryFence();
1536}
1537
1538
1539/**
1540 * Atomically reads an unsigned 8-bit value, ordered.
1541 *
1542 * @returns Current *pu8 value
1543 * @param pu8 Pointer to the 8-bit variable to read.
1544 */
1545DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1546{
1547 ASMMemoryFence();
1548 return *pu8; /* byte reads are atomic on x86 */
1549}
1550
1551
1552/**
1553 * Atomically reads an unsigned 8-bit value, unordered.
1554 *
1555 * @returns Current *pu8 value
1556 * @param pu8 Pointer to the 8-bit variable to read.
1557 */
1558DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1559{
1560 return *pu8; /* byte reads are atomic on x86 */
1561}
1562
1563
1564/**
1565 * Atomically reads a signed 8-bit value, ordered.
1566 *
1567 * @returns Current *pi8 value
1568 * @param pi8 Pointer to the 8-bit variable to read.
1569 */
1570DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1571{
1572 ASMMemoryFence();
1573 return *pi8; /* byte reads are atomic on x86 */
1574}
1575
1576
1577/**
1578 * Atomically reads a signed 8-bit value, unordered.
1579 *
1580 * @returns Current *pi8 value
1581 * @param pi8 Pointer to the 8-bit variable to read.
1582 */
1583DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1584{
1585 return *pi8; /* byte reads are atomic on x86 */
1586}
1587
1588
1589/**
1590 * Atomically reads an unsigned 16-bit value, ordered.
1591 *
1592 * @returns Current *pu16 value
1593 * @param pu16 Pointer to the 16-bit variable to read.
1594 */
1595DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1596{
1597 ASMMemoryFence();
1598 Assert(!((uintptr_t)pu16 & 1));
1599 return *pu16;
1600}
1601
1602
1603/**
1604 * Atomically reads an unsigned 16-bit value, unordered.
1605 *
1606 * @returns Current *pu16 value
1607 * @param pu16 Pointer to the 16-bit variable to read.
1608 */
1609DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1610{
1611 Assert(!((uintptr_t)pu16 & 1));
1612 return *pu16;
1613}
1614
1615
1616/**
1617 * Atomically reads a signed 16-bit value, ordered.
1618 *
1619 * @returns Current *pi16 value
1620 * @param pi16 Pointer to the 16-bit variable to read.
1621 */
1622DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1623{
1624 ASMMemoryFence();
1625 Assert(!((uintptr_t)pi16 & 1));
1626 return *pi16;
1627}
1628
1629
1630/**
1631 * Atomically reads a signed 16-bit value, unordered.
1632 *
1633 * @returns Current *pi16 value
1634 * @param pi16 Pointer to the 16-bit variable to read.
1635 */
1636DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1637{
1638 Assert(!((uintptr_t)pi16 & 1));
1639 return *pi16;
1640}
1641
1642
1643/**
1644 * Atomically reads an unsigned 32-bit value, ordered.
1645 *
1646 * @returns Current *pu32 value
1647 * @param pu32 Pointer to the 32-bit variable to read.
1648 */
1649DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1650{
1651 ASMMemoryFence();
1652 Assert(!((uintptr_t)pu32 & 3));
1653 return *pu32;
1654}
1655
1656
1657/**
1658 * Atomically reads an unsigned 32-bit value, unordered.
1659 *
1660 * @returns Current *pu32 value
1661 * @param pu32 Pointer to the 32-bit variable to read.
1662 */
1663DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1664{
1665 Assert(!((uintptr_t)pu32 & 3));
1666 return *pu32;
1667}
1668
1669
1670/**
1671 * Atomically reads a signed 32-bit value, ordered.
1672 *
1673 * @returns Current *pi32 value
1674 * @param pi32 Pointer to the 32-bit variable to read.
1675 */
1676DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1677{
1678 ASMMemoryFence();
1679 Assert(!((uintptr_t)pi32 & 3));
1680 return *pi32;
1681}
1682
1683
1684/**
1685 * Atomically reads a signed 32-bit value, unordered.
1686 *
1687 * @returns Current *pi32 value
1688 * @param pi32 Pointer to the 32-bit variable to read.
1689 */
1690DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1691{
1692 Assert(!((uintptr_t)pi32 & 3));
1693 return *pi32;
1694}
1695
1696
1697/**
1698 * Atomically reads an unsigned 64-bit value, ordered.
1699 *
1700 * @returns Current *pu64 value
1701 * @param pu64 Pointer to the 64-bit variable to read.
1702 * The memory pointed to must be writable.
1703 *
1704 * @remarks This may fault if the memory is read-only!
1705 * @remarks x86: Requires a Pentium or later.
1706 */
1707#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1708 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1709DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1710#else
1711DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1712{
1713 uint64_t u64;
1714# ifdef RT_ARCH_AMD64
1715 Assert(!((uintptr_t)pu64 & 7));
1716/*# if RT_INLINE_ASM_GNU_STYLE
1717 __asm__ __volatile__( "mfence\n\t"
1718 "movq %1, %0\n\t"
1719 : "=r" (u64)
1720 : "m" (*pu64));
1721# else
1722 __asm
1723 {
1724 mfence
1725 mov rdx, [pu64]
1726 mov rax, [rdx]
1727 mov [u64], rax
1728 }
1729# endif*/
1730 ASMMemoryFence();
1731 u64 = *pu64;
1732# else /* !RT_ARCH_AMD64 */
1733# if RT_INLINE_ASM_GNU_STYLE
1734# if defined(PIC) || defined(__PIC__)
1735 uint32_t u32EBX = 0;
1736 Assert(!((uintptr_t)pu64 & 7));
1737 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1738 "lock; cmpxchg8b (%5)\n\t"
1739 "movl %3, %%ebx\n\t"
1740 : "=A" (u64),
1741# if RT_GNUC_PREREQ(4, 3)
1742 "+m" (*pu64)
1743# else
1744 "=m" (*pu64)
1745# endif
1746 : "0" (0ULL),
1747 "m" (u32EBX),
1748 "c" (0),
1749 "S" (pu64));
1750# else /* !PIC */
1751 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1752 : "=A" (u64),
1753 "+m" (*pu64)
1754 : "0" (0ULL),
1755 "b" (0),
1756 "c" (0));
1757# endif
1758# else
1759 Assert(!((uintptr_t)pu64 & 7));
1760 __asm
1761 {
1762 xor eax, eax
1763 xor edx, edx
1764 mov edi, pu64
1765 xor ecx, ecx
1766 xor ebx, ebx
1767 lock cmpxchg8b [edi]
1768 mov dword ptr [u64], eax
1769 mov dword ptr [u64 + 4], edx
1770 }
1771# endif
1772# endif /* !RT_ARCH_AMD64 */
1773 return u64;
1774}
1775#endif
1776
1777
1778/**
1779 * Atomically reads an unsigned 64-bit value, unordered.
1780 *
1781 * @returns Current *pu64 value
1782 * @param pu64 Pointer to the 64-bit variable to read.
1783 * The memory pointed to must be writable.
1784 *
1785 * @remarks This may fault if the memory is read-only!
1786 * @remarks x86: Requires a Pentium or later.
1787 */
1788#if !defined(RT_ARCH_AMD64) \
1789 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1790 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1791DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1792#else
1793DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1794{
1795 uint64_t u64;
1796# ifdef RT_ARCH_AMD64
1797 Assert(!((uintptr_t)pu64 & 7));
1798/*# if RT_INLINE_ASM_GNU_STYLE
1799 Assert(!((uintptr_t)pu64 & 7));
1800 __asm__ __volatile__("movq %1, %0\n\t"
1801 : "=r" (u64)
1802 : "m" (*pu64));
1803# else
1804 __asm
1805 {
1806 mov rdx, [pu64]
1807 mov rax, [rdx]
1808 mov [u64], rax
1809 }
1810# endif */
1811 u64 = *pu64;
1812# else /* !RT_ARCH_AMD64 */
1813# if RT_INLINE_ASM_GNU_STYLE
1814# if defined(PIC) || defined(__PIC__)
1815 uint32_t u32EBX = 0;
1816 uint32_t u32Spill;
1817 Assert(!((uintptr_t)pu64 & 7));
1818 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1819 "xor %%ecx,%%ecx\n\t"
1820 "xor %%edx,%%edx\n\t"
1821 "xchgl %%ebx, %3\n\t"
1822 "lock; cmpxchg8b (%4)\n\t"
1823 "movl %3, %%ebx\n\t"
1824 : "=A" (u64),
1825# if RT_GNUC_PREREQ(4, 3)
1826 "+m" (*pu64),
1827# else
1828 "=m" (*pu64),
1829# endif
1830 "=c" (u32Spill)
1831 : "m" (u32EBX),
1832 "S" (pu64));
1833# else /* !PIC */
1834 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1835 : "=A" (u64),
1836 "+m" (*pu64)
1837 : "0" (0ULL),
1838 "b" (0),
1839 "c" (0));
1840# endif
1841# else
1842 Assert(!((uintptr_t)pu64 & 7));
1843 __asm
1844 {
1845 xor eax, eax
1846 xor edx, edx
1847 mov edi, pu64
1848 xor ecx, ecx
1849 xor ebx, ebx
1850 lock cmpxchg8b [edi]
1851 mov dword ptr [u64], eax
1852 mov dword ptr [u64 + 4], edx
1853 }
1854# endif
1855# endif /* !RT_ARCH_AMD64 */
1856 return u64;
1857}
1858#endif
1859
1860
1861/**
1862 * Atomically reads a signed 64-bit value, ordered.
1863 *
1864 * @returns Current *pi64 value
1865 * @param pi64 Pointer to the 64-bit variable to read.
1866 * The memory pointed to must be writable.
1867 *
1868 * @remarks This may fault if the memory is read-only!
1869 * @remarks x86: Requires a Pentium or later.
1870 */
1871DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1872{
1873 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1874}
1875
1876
1877/**
1878 * Atomically reads a signed 64-bit value, unordered.
1879 *
1880 * @returns Current *pi64 value
1881 * @param pi64 Pointer to the 64-bit variable to read.
1882 * The memory pointed to must be writable.
1883 *
1884 * @remarks This will fault if the memory is read-only!
1885 * @remarks x86: Requires a Pentium or later.
1886 */
1887DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1888{
1889 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1890}
1891
1892
1893/**
1894 * Atomically reads a size_t value, ordered.
1895 *
1896 * @returns Current *pcb value
1897 * @param pcb Pointer to the size_t variable to read.
1898 */
1899DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1900{
1901#if ARCH_BITS == 64
1902 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1903#elif ARCH_BITS == 32
1904 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1905#elif ARCH_BITS == 16
1906 AssertCompileSize(size_t, 2);
1907 return ASMAtomicReadU16((uint16_t volatile *)pcb);
1908#else
1909# error "Unsupported ARCH_BITS value"
1910#endif
1911}
1912
1913
1914/**
1915 * Atomically reads a size_t value, unordered.
1916 *
1917 * @returns Current *pcb value
1918 * @param pcb Pointer to the size_t variable to read.
1919 */
1920DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1921{
1922#if ARCH_BITS == 64 || (ARCH_BITS == 16 && RT_FAR_DATA)
1923 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1924#elif ARCH_BITS == 32
1925 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1926#elif ARCH_BITS == 16
1927 AssertCompileSize(size_t, 2);
1928 return ASMAtomicUoReadU16((uint16_t volatile *)pcb);
1929#else
1930# error "Unsupported ARCH_BITS value"
1931#endif
1932}
1933
1934
1935/**
1936 * Atomically reads a pointer value, ordered.
1937 *
1938 * @returns Current *pv value
1939 * @param ppv Pointer to the pointer variable to read.
1940 *
1941 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1942 * requires less typing (no casts).
1943 */
1944DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1945{
1946#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1947 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1948#elif ARCH_BITS == 64
1949 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1950#else
1951# error "ARCH_BITS is bogus"
1952#endif
1953}
1954
1955/**
1956 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1957 *
1958 * @returns Current *pv value
1959 * @param ppv Pointer to the pointer variable to read.
1960 * @param Type The type of *ppv, sans volatile.
1961 */
1962#ifdef __GNUC__
1963# define ASMAtomicReadPtrT(ppv, Type) \
1964 __extension__ \
1965 ({\
1966 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1967 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1968 pvTypeChecked; \
1969 })
1970#else
1971# define ASMAtomicReadPtrT(ppv, Type) \
1972 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1973#endif
1974
1975
1976/**
1977 * Atomically reads a pointer value, unordered.
1978 *
1979 * @returns Current *pv value
1980 * @param ppv Pointer to the pointer variable to read.
1981 *
1982 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1983 * requires less typing (no casts).
1984 */
1985DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1986{
1987#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1988 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1989#elif ARCH_BITS == 64
1990 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1991#else
1992# error "ARCH_BITS is bogus"
1993#endif
1994}
1995
1996
1997/**
1998 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
1999 *
2000 * @returns Current *pv value
2001 * @param ppv Pointer to the pointer variable to read.
2002 * @param Type The type of *ppv, sans volatile.
2003 */
2004#ifdef __GNUC__
2005# define ASMAtomicUoReadPtrT(ppv, Type) \
2006 __extension__ \
2007 ({\
2008 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2009 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2010 pvTypeChecked; \
2011 })
2012#else
2013# define ASMAtomicUoReadPtrT(ppv, Type) \
2014 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
2015#endif
2016
2017
2018/**
2019 * Atomically reads a boolean value, ordered.
2020 *
2021 * @returns Current *pf value
2022 * @param pf Pointer to the boolean variable to read.
2023 */
2024DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
2025{
2026 ASMMemoryFence();
2027 return *pf; /* byte reads are atomic on x86 */
2028}
2029
2030
2031/**
2032 * Atomically reads a boolean value, unordered.
2033 *
2034 * @returns Current *pf value
2035 * @param pf Pointer to the boolean variable to read.
2036 */
2037DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
2038{
2039 return *pf; /* byte reads are atomic on x86 */
2040}
2041
2042
2043/**
2044 * Atomically read a typical IPRT handle value, ordered.
2045 *
2046 * @param ph Pointer to the handle variable to read.
2047 * @param phRes Where to store the result.
2048 *
2049 * @remarks This doesn't currently work for all handles (like RTFILE).
2050 */
2051#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2052# define ASMAtomicReadHandle(ph, phRes) \
2053 do { \
2054 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2055 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2056 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
2057 } while (0)
2058#elif HC_ARCH_BITS == 64
2059# define ASMAtomicReadHandle(ph, phRes) \
2060 do { \
2061 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2062 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2063 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
2064 } while (0)
2065#else
2066# error HC_ARCH_BITS
2067#endif
2068
2069
2070/**
2071 * Atomically read a typical IPRT handle value, unordered.
2072 *
2073 * @param ph Pointer to the handle variable to read.
2074 * @param phRes Where to store the result.
2075 *
2076 * @remarks This doesn't currently work for all handles (like RTFILE).
2077 */
2078#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2079# define ASMAtomicUoReadHandle(ph, phRes) \
2080 do { \
2081 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2082 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2083 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
2084 } while (0)
2085#elif HC_ARCH_BITS == 64
2086# define ASMAtomicUoReadHandle(ph, phRes) \
2087 do { \
2088 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2089 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2090 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
2091 } while (0)
2092#else
2093# error HC_ARCH_BITS
2094#endif
2095
2096
2097/**
2098 * Atomically read a value which size might differ
2099 * between platforms or compilers, ordered.
2100 *
2101 * @param pu Pointer to the variable to read.
2102 * @param puRes Where to store the result.
2103 */
2104#define ASMAtomicReadSize(pu, puRes) \
2105 do { \
2106 switch (sizeof(*(pu))) { \
2107 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2108 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
2109 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
2110 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
2111 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2112 } \
2113 } while (0)
2114
2115
2116/**
2117 * Atomically read a value which size might differ
2118 * between platforms or compilers, unordered.
2119 *
2120 * @param pu Pointer to the variable to read.
2121 * @param puRes Where to store the result.
2122 */
2123#define ASMAtomicUoReadSize(pu, puRes) \
2124 do { \
2125 switch (sizeof(*(pu))) { \
2126 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2127 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
2128 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
2129 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
2130 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2131 } \
2132 } while (0)
2133
2134
2135/**
2136 * Atomically writes an unsigned 8-bit value, ordered.
2137 *
2138 * @param pu8 Pointer to the 8-bit variable.
2139 * @param u8 The 8-bit value to assign to *pu8.
2140 */
2141DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
2142{
2143 ASMAtomicXchgU8(pu8, u8);
2144}
2145
2146
2147/**
2148 * Atomically writes an unsigned 8-bit value, unordered.
2149 *
2150 * @param pu8 Pointer to the 8-bit variable.
2151 * @param u8 The 8-bit value to assign to *pu8.
2152 */
2153DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2154{
2155 *pu8 = u8; /* byte writes are atomic on x86 */
2156}
2157
2158
2159/**
2160 * Atomically writes a signed 8-bit value, ordered.
2161 *
2162 * @param pi8 Pointer to the 8-bit variable to read.
2163 * @param i8 The 8-bit value to assign to *pi8.
2164 */
2165DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2166{
2167 ASMAtomicXchgS8(pi8, i8);
2168}
2169
2170
2171/**
2172 * Atomically writes a signed 8-bit value, unordered.
2173 *
2174 * @param pi8 Pointer to the 8-bit variable to write.
2175 * @param i8 The 8-bit value to assign to *pi8.
2176 */
2177DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2178{
2179 *pi8 = i8; /* byte writes are atomic on x86 */
2180}
2181
2182
2183/**
2184 * Atomically writes an unsigned 16-bit value, ordered.
2185 *
2186 * @param pu16 Pointer to the 16-bit variable to write.
2187 * @param u16 The 16-bit value to assign to *pu16.
2188 */
2189DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2190{
2191 ASMAtomicXchgU16(pu16, u16);
2192}
2193
2194
2195/**
2196 * Atomically writes an unsigned 16-bit value, unordered.
2197 *
2198 * @param pu16 Pointer to the 16-bit variable to write.
2199 * @param u16 The 16-bit value to assign to *pu16.
2200 */
2201DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2202{
2203 Assert(!((uintptr_t)pu16 & 1));
2204 *pu16 = u16;
2205}
2206
2207
2208/**
2209 * Atomically writes a signed 16-bit value, ordered.
2210 *
2211 * @param pi16 Pointer to the 16-bit variable to write.
2212 * @param i16 The 16-bit value to assign to *pi16.
2213 */
2214DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2215{
2216 ASMAtomicXchgS16(pi16, i16);
2217}
2218
2219
2220/**
2221 * Atomically writes a signed 16-bit value, unordered.
2222 *
2223 * @param pi16 Pointer to the 16-bit variable to write.
2224 * @param i16 The 16-bit value to assign to *pi16.
2225 */
2226DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2227{
2228 Assert(!((uintptr_t)pi16 & 1));
2229 *pi16 = i16;
2230}
2231
2232
2233/**
2234 * Atomically writes an unsigned 32-bit value, ordered.
2235 *
2236 * @param pu32 Pointer to the 32-bit variable to write.
2237 * @param u32 The 32-bit value to assign to *pu32.
2238 */
2239DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2240{
2241 ASMAtomicXchgU32(pu32, u32);
2242}
2243
2244
2245/**
2246 * Atomically writes an unsigned 32-bit value, unordered.
2247 *
2248 * @param pu32 Pointer to the 32-bit variable to write.
2249 * @param u32 The 32-bit value to assign to *pu32.
2250 */
2251DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2252{
2253 Assert(!((uintptr_t)pu32 & 3));
2254 *pu32 = u32;
2255}
2256
2257
2258/**
2259 * Atomically writes a signed 32-bit value, ordered.
2260 *
2261 * @param pi32 Pointer to the 32-bit variable to write.
2262 * @param i32 The 32-bit value to assign to *pi32.
2263 */
2264DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2265{
2266 ASMAtomicXchgS32(pi32, i32);
2267}
2268
2269
2270/**
2271 * Atomically writes a signed 32-bit value, unordered.
2272 *
2273 * @param pi32 Pointer to the 32-bit variable to write.
2274 * @param i32 The 32-bit value to assign to *pi32.
2275 */
2276DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2277{
2278 Assert(!((uintptr_t)pi32 & 3));
2279 *pi32 = i32;
2280}
2281
2282
2283/**
2284 * Atomically writes an unsigned 64-bit value, ordered.
2285 *
2286 * @param pu64 Pointer to the 64-bit variable to write.
2287 * @param u64 The 64-bit value to assign to *pu64.
2288 */
2289DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2290{
2291 ASMAtomicXchgU64(pu64, u64);
2292}
2293
2294
2295/**
2296 * Atomically writes an unsigned 64-bit value, unordered.
2297 *
2298 * @param pu64 Pointer to the 64-bit variable to write.
2299 * @param u64 The 64-bit value to assign to *pu64.
2300 */
2301DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2302{
2303 Assert(!((uintptr_t)pu64 & 7));
2304#if ARCH_BITS == 64
2305 *pu64 = u64;
2306#else
2307 ASMAtomicXchgU64(pu64, u64);
2308#endif
2309}
2310
2311
2312/**
2313 * Atomically writes a signed 64-bit value, ordered.
2314 *
2315 * @param pi64 Pointer to the 64-bit variable to write.
2316 * @param i64 The 64-bit value to assign to *pi64.
2317 */
2318DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2319{
2320 ASMAtomicXchgS64(pi64, i64);
2321}
2322
2323
2324/**
2325 * Atomically writes a signed 64-bit value, unordered.
2326 *
2327 * @param pi64 Pointer to the 64-bit variable to write.
2328 * @param i64 The 64-bit value to assign to *pi64.
2329 */
2330DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2331{
2332 Assert(!((uintptr_t)pi64 & 7));
2333#if ARCH_BITS == 64
2334 *pi64 = i64;
2335#else
2336 ASMAtomicXchgS64(pi64, i64);
2337#endif
2338}
2339
2340
2341/**
2342 * Atomically writes a boolean value, unordered.
2343 *
2344 * @param pf Pointer to the boolean variable to write.
2345 * @param f The boolean value to assign to *pf.
2346 */
2347DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2348{
2349 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2350}
2351
2352
2353/**
2354 * Atomically writes a boolean value, unordered.
2355 *
2356 * @param pf Pointer to the boolean variable to write.
2357 * @param f The boolean value to assign to *pf.
2358 */
2359DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2360{
2361 *pf = f; /* byte writes are atomic on x86 */
2362}
2363
2364
2365/**
2366 * Atomically writes a pointer value, ordered.
2367 *
2368 * @param ppv Pointer to the pointer variable to write.
2369 * @param pv The pointer value to assign to *ppv.
2370 */
2371DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2372{
2373#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2374 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2375#elif ARCH_BITS == 64
2376 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2377#else
2378# error "ARCH_BITS is bogus"
2379#endif
2380}
2381
2382
2383/**
2384 * Atomically writes a pointer value, ordered.
2385 *
2386 * @param ppv Pointer to the pointer variable to write.
2387 * @param pv The pointer value to assign to *ppv. If NULL use
2388 * ASMAtomicWriteNullPtr or you'll land in trouble.
2389 *
2390 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2391 * NULL.
2392 */
2393#ifdef __GNUC__
2394# define ASMAtomicWritePtr(ppv, pv) \
2395 do \
2396 { \
2397 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2398 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2399 \
2400 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2401 AssertCompile(sizeof(pv) == sizeof(void *)); \
2402 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2403 \
2404 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2405 } while (0)
2406#else
2407# define ASMAtomicWritePtr(ppv, pv) \
2408 do \
2409 { \
2410 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2411 AssertCompile(sizeof(pv) == sizeof(void *)); \
2412 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2413 \
2414 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2415 } while (0)
2416#endif
2417
2418
2419/**
2420 * Atomically sets a pointer to NULL, ordered.
2421 *
2422 * @param ppv Pointer to the pointer variable that should be set to NULL.
2423 *
2424 * @remarks This is relatively type safe on GCC platforms.
2425 */
2426#ifdef __GNUC__
2427# define ASMAtomicWriteNullPtr(ppv) \
2428 do \
2429 { \
2430 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2431 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2432 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2433 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2434 } while (0)
2435#else
2436# define ASMAtomicWriteNullPtr(ppv) \
2437 do \
2438 { \
2439 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2440 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2441 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2442 } while (0)
2443#endif
2444
2445
2446/**
2447 * Atomically writes a pointer value, unordered.
2448 *
2449 * @returns Current *pv value
2450 * @param ppv Pointer to the pointer variable.
2451 * @param pv The pointer value to assign to *ppv. If NULL use
2452 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2453 *
2454 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2455 * NULL.
2456 */
2457#ifdef __GNUC__
2458# define ASMAtomicUoWritePtr(ppv, pv) \
2459 do \
2460 { \
2461 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2462 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2463 \
2464 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2465 AssertCompile(sizeof(pv) == sizeof(void *)); \
2466 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2467 \
2468 *(ppvTypeChecked) = pvTypeChecked; \
2469 } while (0)
2470#else
2471# define ASMAtomicUoWritePtr(ppv, pv) \
2472 do \
2473 { \
2474 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2475 AssertCompile(sizeof(pv) == sizeof(void *)); \
2476 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2477 *(ppv) = pv; \
2478 } while (0)
2479#endif
2480
2481
2482/**
2483 * Atomically sets a pointer to NULL, unordered.
2484 *
2485 * @param ppv Pointer to the pointer variable that should be set to NULL.
2486 *
2487 * @remarks This is relatively type safe on GCC platforms.
2488 */
2489#ifdef __GNUC__
2490# define ASMAtomicUoWriteNullPtr(ppv) \
2491 do \
2492 { \
2493 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2494 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2495 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2496 *(ppvTypeChecked) = NULL; \
2497 } while (0)
2498#else
2499# define ASMAtomicUoWriteNullPtr(ppv) \
2500 do \
2501 { \
2502 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2503 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2504 *(ppv) = NULL; \
2505 } while (0)
2506#endif
2507
2508
2509/**
2510 * Atomically write a typical IPRT handle value, ordered.
2511 *
2512 * @param ph Pointer to the variable to update.
2513 * @param hNew The value to assign to *ph.
2514 *
2515 * @remarks This doesn't currently work for all handles (like RTFILE).
2516 */
2517#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2518# define ASMAtomicWriteHandle(ph, hNew) \
2519 do { \
2520 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2521 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2522 } while (0)
2523#elif HC_ARCH_BITS == 64
2524# define ASMAtomicWriteHandle(ph, hNew) \
2525 do { \
2526 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2527 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2528 } while (0)
2529#else
2530# error HC_ARCH_BITS
2531#endif
2532
2533
2534/**
2535 * Atomically write a typical IPRT handle value, unordered.
2536 *
2537 * @param ph Pointer to the variable to update.
2538 * @param hNew The value to assign to *ph.
2539 *
2540 * @remarks This doesn't currently work for all handles (like RTFILE).
2541 */
2542#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2543# define ASMAtomicUoWriteHandle(ph, hNew) \
2544 do { \
2545 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2546 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2547 } while (0)
2548#elif HC_ARCH_BITS == 64
2549# define ASMAtomicUoWriteHandle(ph, hNew) \
2550 do { \
2551 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2552 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2553 } while (0)
2554#else
2555# error HC_ARCH_BITS
2556#endif
2557
2558
2559/**
2560 * Atomically write a value which size might differ
2561 * between platforms or compilers, ordered.
2562 *
2563 * @param pu Pointer to the variable to update.
2564 * @param uNew The value to assign to *pu.
2565 */
2566#define ASMAtomicWriteSize(pu, uNew) \
2567 do { \
2568 switch (sizeof(*(pu))) { \
2569 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2570 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2571 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2572 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2573 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2574 } \
2575 } while (0)
2576
2577/**
2578 * Atomically write a value which size might differ
2579 * between platforms or compilers, unordered.
2580 *
2581 * @param pu Pointer to the variable to update.
2582 * @param uNew The value to assign to *pu.
2583 */
2584#define ASMAtomicUoWriteSize(pu, uNew) \
2585 do { \
2586 switch (sizeof(*(pu))) { \
2587 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2588 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2589 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2590 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2591 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2592 } \
2593 } while (0)
2594
2595
2596
2597/**
2598 * Atomically exchanges and adds to a 16-bit value, ordered.
2599 *
2600 * @returns The old value.
2601 * @param pu16 Pointer to the value.
2602 * @param u16 Number to add.
2603 *
2604 * @remarks Currently not implemented, just to make 16-bit code happy.
2605 * @remarks x86: Requires a 486 or later.
2606 */
2607DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile *pu16, uint32_t u16);
2608
2609
2610/**
2611 * Atomically exchanges and adds to a 32-bit value, ordered.
2612 *
2613 * @returns The old value.
2614 * @param pu32 Pointer to the value.
2615 * @param u32 Number to add.
2616 *
2617 * @remarks x86: Requires a 486 or later.
2618 */
2619#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2620DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2621#else
2622DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2623{
2624# if RT_INLINE_ASM_USES_INTRIN
2625 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2626 return u32;
2627
2628# elif RT_INLINE_ASM_GNU_STYLE
2629 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2630 : "=r" (u32),
2631 "=m" (*pu32)
2632 : "0" (u32),
2633 "m" (*pu32)
2634 : "memory");
2635 return u32;
2636# else
2637 __asm
2638 {
2639 mov eax, [u32]
2640# ifdef RT_ARCH_AMD64
2641 mov rdx, [pu32]
2642 lock xadd [rdx], eax
2643# else
2644 mov edx, [pu32]
2645 lock xadd [edx], eax
2646# endif
2647 mov [u32], eax
2648 }
2649 return u32;
2650# endif
2651}
2652#endif
2653
2654
2655/**
2656 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2657 *
2658 * @returns The old value.
2659 * @param pi32 Pointer to the value.
2660 * @param i32 Number to add.
2661 *
2662 * @remarks x86: Requires a 486 or later.
2663 */
2664DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2665{
2666 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2667}
2668
2669
2670/**
2671 * Atomically exchanges and adds to a 64-bit value, ordered.
2672 *
2673 * @returns The old value.
2674 * @param pu64 Pointer to the value.
2675 * @param u64 Number to add.
2676 *
2677 * @remarks x86: Requires a Pentium or later.
2678 */
2679#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2680DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2681#else
2682DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2683{
2684# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2685 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2686 return u64;
2687
2688# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2689 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2690 : "=r" (u64),
2691 "=m" (*pu64)
2692 : "0" (u64),
2693 "m" (*pu64)
2694 : "memory");
2695 return u64;
2696# else
2697 uint64_t u64Old;
2698 for (;;)
2699 {
2700 uint64_t u64New;
2701 u64Old = ASMAtomicUoReadU64(pu64);
2702 u64New = u64Old + u64;
2703 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2704 break;
2705 ASMNopPause();
2706 }
2707 return u64Old;
2708# endif
2709}
2710#endif
2711
2712
2713/**
2714 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2715 *
2716 * @returns The old value.
2717 * @param pi64 Pointer to the value.
2718 * @param i64 Number to add.
2719 *
2720 * @remarks x86: Requires a Pentium or later.
2721 */
2722DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2723{
2724 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2725}
2726
2727
2728/**
2729 * Atomically exchanges and adds to a size_t value, ordered.
2730 *
2731 * @returns The old value.
2732 * @param pcb Pointer to the size_t value.
2733 * @param cb Number to add.
2734 */
2735DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2736{
2737#if ARCH_BITS == 64
2738 AssertCompileSize(size_t, 8);
2739 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2740#elif ARCH_BITS == 32
2741 AssertCompileSize(size_t, 4);
2742 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2743#elif ARCH_BITS == 16
2744 AssertCompileSize(size_t, 2);
2745 return ASMAtomicAddU16((uint16_t volatile *)pcb, cb);
2746#else
2747# error "Unsupported ARCH_BITS value"
2748#endif
2749}
2750
2751
2752/**
2753 * Atomically exchanges and adds a value which size might differ between
2754 * platforms or compilers, ordered.
2755 *
2756 * @param pu Pointer to the variable to update.
2757 * @param uNew The value to add to *pu.
2758 * @param puOld Where to store the old value.
2759 */
2760#define ASMAtomicAddSize(pu, uNew, puOld) \
2761 do { \
2762 switch (sizeof(*(pu))) { \
2763 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2764 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2765 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2766 } \
2767 } while (0)
2768
2769
2770
2771/**
2772 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2773 *
2774 * @returns The old value.
2775 * @param pu16 Pointer to the value.
2776 * @param u16 Number to subtract.
2777 *
2778 * @remarks x86: Requires a 486 or later.
2779 */
2780DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile *pu16, uint32_t u16)
2781{
2782 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2783}
2784
2785
2786/**
2787 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2788 *
2789 * @returns The old value.
2790 * @param pi16 Pointer to the value.
2791 * @param i16 Number to subtract.
2792 *
2793 * @remarks x86: Requires a 486 or later.
2794 */
2795DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile *pi16, int16_t i16)
2796{
2797 return (int16_t)ASMAtomicAddU16((uint16_t volatile *)pi16, (uint16_t)-i16);
2798}
2799
2800
2801/**
2802 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2803 *
2804 * @returns The old value.
2805 * @param pu32 Pointer to the value.
2806 * @param u32 Number to subtract.
2807 *
2808 * @remarks x86: Requires a 486 or later.
2809 */
2810DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2811{
2812 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2813}
2814
2815
2816/**
2817 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2818 *
2819 * @returns The old value.
2820 * @param pi32 Pointer to the value.
2821 * @param i32 Number to subtract.
2822 *
2823 * @remarks x86: Requires a 486 or later.
2824 */
2825DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2826{
2827 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2828}
2829
2830
2831/**
2832 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2833 *
2834 * @returns The old value.
2835 * @param pu64 Pointer to the value.
2836 * @param u64 Number to subtract.
2837 *
2838 * @remarks x86: Requires a Pentium or later.
2839 */
2840DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2841{
2842 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2843}
2844
2845
2846/**
2847 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2848 *
2849 * @returns The old value.
2850 * @param pi64 Pointer to the value.
2851 * @param i64 Number to subtract.
2852 *
2853 * @remarks x86: Requires a Pentium or later.
2854 */
2855DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2856{
2857 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2858}
2859
2860
2861/**
2862 * Atomically exchanges and subtracts to a size_t value, ordered.
2863 *
2864 * @returns The old value.
2865 * @param pcb Pointer to the size_t value.
2866 * @param cb Number to subtract.
2867 *
2868 * @remarks x86: Requires a 486 or later.
2869 */
2870DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2871{
2872#if ARCH_BITS == 64
2873 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2874#elif ARCH_BITS == 32
2875 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2876#elif ARCH_BITS == 16
2877 AssertCompileSize(size_t, 2);
2878 return ASMAtomicSubU16((uint16_t volatile *)pcb, cb);
2879#else
2880# error "Unsupported ARCH_BITS value"
2881#endif
2882}
2883
2884
2885/**
2886 * Atomically exchanges and subtracts a value which size might differ between
2887 * platforms or compilers, ordered.
2888 *
2889 * @param pu Pointer to the variable to update.
2890 * @param uNew The value to subtract to *pu.
2891 * @param puOld Where to store the old value.
2892 *
2893 * @remarks x86: Requires a 486 or later.
2894 */
2895#define ASMAtomicSubSize(pu, uNew, puOld) \
2896 do { \
2897 switch (sizeof(*(pu))) { \
2898 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2899 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2900 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2901 } \
2902 } while (0)
2903
2904
2905
2906/**
2907 * Atomically increment a 16-bit value, ordered.
2908 *
2909 * @returns The new value.
2910 * @param pu16 Pointer to the value to increment.
2911 * @remarks Not implemented. Just to make 16-bit code happy.
2912 *
2913 * @remarks x86: Requires a 486 or later.
2914 */
2915DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile *pu16);
2916
2917
2918/**
2919 * Atomically increment a 32-bit value, ordered.
2920 *
2921 * @returns The new value.
2922 * @param pu32 Pointer to the value to increment.
2923 *
2924 * @remarks x86: Requires a 486 or later.
2925 */
2926#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2927DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2928#else
2929DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2930{
2931 uint32_t u32;
2932# if RT_INLINE_ASM_USES_INTRIN
2933 u32 = _InterlockedIncrement((long *)pu32);
2934 return u32;
2935
2936# elif RT_INLINE_ASM_GNU_STYLE
2937 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2938 : "=r" (u32),
2939 "=m" (*pu32)
2940 : "0" (1),
2941 "m" (*pu32)
2942 : "memory");
2943 return u32+1;
2944# else
2945 __asm
2946 {
2947 mov eax, 1
2948# ifdef RT_ARCH_AMD64
2949 mov rdx, [pu32]
2950 lock xadd [rdx], eax
2951# else
2952 mov edx, [pu32]
2953 lock xadd [edx], eax
2954# endif
2955 mov u32, eax
2956 }
2957 return u32+1;
2958# endif
2959}
2960#endif
2961
2962
2963/**
2964 * Atomically increment a signed 32-bit value, ordered.
2965 *
2966 * @returns The new value.
2967 * @param pi32 Pointer to the value to increment.
2968 *
2969 * @remarks x86: Requires a 486 or later.
2970 */
2971DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2972{
2973 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2974}
2975
2976
2977/**
2978 * Atomically increment a 64-bit value, ordered.
2979 *
2980 * @returns The new value.
2981 * @param pu64 Pointer to the value to increment.
2982 *
2983 * @remarks x86: Requires a Pentium or later.
2984 */
2985#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2986DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2987#else
2988DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2989{
2990# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2991 uint64_t u64;
2992 u64 = _InterlockedIncrement64((__int64 *)pu64);
2993 return u64;
2994
2995# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2996 uint64_t u64;
2997 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2998 : "=r" (u64),
2999 "=m" (*pu64)
3000 : "0" (1),
3001 "m" (*pu64)
3002 : "memory");
3003 return u64 + 1;
3004# else
3005 return ASMAtomicAddU64(pu64, 1) + 1;
3006# endif
3007}
3008#endif
3009
3010
3011/**
3012 * Atomically increment a signed 64-bit value, ordered.
3013 *
3014 * @returns The new value.
3015 * @param pi64 Pointer to the value to increment.
3016 *
3017 * @remarks x86: Requires a Pentium or later.
3018 */
3019DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
3020{
3021 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
3022}
3023
3024
3025/**
3026 * Atomically increment a size_t value, ordered.
3027 *
3028 * @returns The new value.
3029 * @param pcb Pointer to the value to increment.
3030 *
3031 * @remarks x86: Requires a 486 or later.
3032 */
3033DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
3034{
3035#if ARCH_BITS == 64
3036 return ASMAtomicIncU64((uint64_t volatile *)pcb);
3037#elif ARCH_BITS == 32
3038 return ASMAtomicIncU32((uint32_t volatile *)pcb);
3039#elif ARCH_BITS == 16
3040 return ASMAtomicIncU16((uint16_t volatile *)pcb);
3041#else
3042# error "Unsupported ARCH_BITS value"
3043#endif
3044}
3045
3046
3047
3048/**
3049 * Atomically decrement an unsigned 32-bit value, ordered.
3050 *
3051 * @returns The new value.
3052 * @param pu16 Pointer to the value to decrement.
3053 * @remarks Not implemented. Just to make 16-bit code happy.
3054 *
3055 * @remarks x86: Requires a 486 or later.
3056 */
3057DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile *pu16);
3058
3059
3060/**
3061 * Atomically decrement an unsigned 32-bit value, ordered.
3062 *
3063 * @returns The new value.
3064 * @param pu32 Pointer to the value to decrement.
3065 *
3066 * @remarks x86: Requires a 486 or later.
3067 */
3068#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3069DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3070#else
3071DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3072{
3073 uint32_t u32;
3074# if RT_INLINE_ASM_USES_INTRIN
3075 u32 = _InterlockedDecrement((long *)pu32);
3076 return u32;
3077
3078# elif RT_INLINE_ASM_GNU_STYLE
3079 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3080 : "=r" (u32),
3081 "=m" (*pu32)
3082 : "0" (-1),
3083 "m" (*pu32)
3084 : "memory");
3085 return u32-1;
3086# else
3087 __asm
3088 {
3089 mov eax, -1
3090# ifdef RT_ARCH_AMD64
3091 mov rdx, [pu32]
3092 lock xadd [rdx], eax
3093# else
3094 mov edx, [pu32]
3095 lock xadd [edx], eax
3096# endif
3097 mov u32, eax
3098 }
3099 return u32-1;
3100# endif
3101}
3102#endif
3103
3104
3105/**
3106 * Atomically decrement a signed 32-bit value, ordered.
3107 *
3108 * @returns The new value.
3109 * @param pi32 Pointer to the value to decrement.
3110 *
3111 * @remarks x86: Requires a 486 or later.
3112 */
3113DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3114{
3115 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3116}
3117
3118
3119/**
3120 * Atomically decrement an unsigned 64-bit value, ordered.
3121 *
3122 * @returns The new value.
3123 * @param pu64 Pointer to the value to decrement.
3124 *
3125 * @remarks x86: Requires a Pentium or later.
3126 */
3127#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3128DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
3129#else
3130DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
3131{
3132# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3133 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
3134 return u64;
3135
3136# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3137 uint64_t u64;
3138 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3139 : "=r" (u64),
3140 "=m" (*pu64)
3141 : "0" (~(uint64_t)0),
3142 "m" (*pu64)
3143 : "memory");
3144 return u64-1;
3145# else
3146 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3147# endif
3148}
3149#endif
3150
3151
3152/**
3153 * Atomically decrement a signed 64-bit value, ordered.
3154 *
3155 * @returns The new value.
3156 * @param pi64 Pointer to the value to decrement.
3157 *
3158 * @remarks x86: Requires a Pentium or later.
3159 */
3160DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
3161{
3162 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
3163}
3164
3165
3166/**
3167 * Atomically decrement a size_t value, ordered.
3168 *
3169 * @returns The new value.
3170 * @param pcb Pointer to the value to decrement.
3171 *
3172 * @remarks x86: Requires a 486 or later.
3173 */
3174DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
3175{
3176#if ARCH_BITS == 64
3177 return ASMAtomicDecU64((uint64_t volatile *)pcb);
3178#elif ARCH_BITS == 32
3179 return ASMAtomicDecU32((uint32_t volatile *)pcb);
3180#elif ARCH_BITS == 16
3181 return ASMAtomicDecU16((uint16_t volatile *)pcb);
3182#else
3183# error "Unsupported ARCH_BITS value"
3184#endif
3185}
3186
3187
3188/**
3189 * Atomically Or an unsigned 32-bit value, ordered.
3190 *
3191 * @param pu32 Pointer to the pointer variable to OR u32 with.
3192 * @param u32 The value to OR *pu32 with.
3193 *
3194 * @remarks x86: Requires a 386 or later.
3195 */
3196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3197DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3198#else
3199DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3200{
3201# if RT_INLINE_ASM_USES_INTRIN
3202 _InterlockedOr((long volatile *)pu32, (long)u32);
3203
3204# elif RT_INLINE_ASM_GNU_STYLE
3205 __asm__ __volatile__("lock; orl %1, %0\n\t"
3206 : "=m" (*pu32)
3207 : "ir" (u32),
3208 "m" (*pu32));
3209# else
3210 __asm
3211 {
3212 mov eax, [u32]
3213# ifdef RT_ARCH_AMD64
3214 mov rdx, [pu32]
3215 lock or [rdx], eax
3216# else
3217 mov edx, [pu32]
3218 lock or [edx], eax
3219# endif
3220 }
3221# endif
3222}
3223#endif
3224
3225
3226/**
3227 * Atomically Or a signed 32-bit value, ordered.
3228 *
3229 * @param pi32 Pointer to the pointer variable to OR u32 with.
3230 * @param i32 The value to OR *pu32 with.
3231 *
3232 * @remarks x86: Requires a 386 or later.
3233 */
3234DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3235{
3236 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3237}
3238
3239
3240/**
3241 * Atomically Or an unsigned 64-bit value, ordered.
3242 *
3243 * @param pu64 Pointer to the pointer variable to OR u64 with.
3244 * @param u64 The value to OR *pu64 with.
3245 *
3246 * @remarks x86: Requires a Pentium or later.
3247 */
3248#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3249DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
3250#else
3251DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
3252{
3253# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3254 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
3255
3256# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3257 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3258 : "=m" (*pu64)
3259 : "r" (u64),
3260 "m" (*pu64));
3261# else
3262 for (;;)
3263 {
3264 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3265 uint64_t u64New = u64Old | u64;
3266 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3267 break;
3268 ASMNopPause();
3269 }
3270# endif
3271}
3272#endif
3273
3274
3275/**
3276 * Atomically Or a signed 64-bit value, ordered.
3277 *
3278 * @param pi64 Pointer to the pointer variable to OR u64 with.
3279 * @param i64 The value to OR *pu64 with.
3280 *
3281 * @remarks x86: Requires a Pentium or later.
3282 */
3283DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3284{
3285 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3286}
3287
3288
3289/**
3290 * Atomically And an unsigned 32-bit value, ordered.
3291 *
3292 * @param pu32 Pointer to the pointer variable to AND u32 with.
3293 * @param u32 The value to AND *pu32 with.
3294 *
3295 * @remarks x86: Requires a 386 or later.
3296 */
3297#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3298DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3299#else
3300DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3301{
3302# if RT_INLINE_ASM_USES_INTRIN
3303 _InterlockedAnd((long volatile *)pu32, u32);
3304
3305# elif RT_INLINE_ASM_GNU_STYLE
3306 __asm__ __volatile__("lock; andl %1, %0\n\t"
3307 : "=m" (*pu32)
3308 : "ir" (u32),
3309 "m" (*pu32));
3310# else
3311 __asm
3312 {
3313 mov eax, [u32]
3314# ifdef RT_ARCH_AMD64
3315 mov rdx, [pu32]
3316 lock and [rdx], eax
3317# else
3318 mov edx, [pu32]
3319 lock and [edx], eax
3320# endif
3321 }
3322# endif
3323}
3324#endif
3325
3326
3327/**
3328 * Atomically And a signed 32-bit value, ordered.
3329 *
3330 * @param pi32 Pointer to the pointer variable to AND i32 with.
3331 * @param i32 The value to AND *pi32 with.
3332 *
3333 * @remarks x86: Requires a 386 or later.
3334 */
3335DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3336{
3337 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3338}
3339
3340
3341/**
3342 * Atomically And an unsigned 64-bit value, ordered.
3343 *
3344 * @param pu64 Pointer to the pointer variable to AND u64 with.
3345 * @param u64 The value to AND *pu64 with.
3346 *
3347 * @remarks x86: Requires a Pentium or later.
3348 */
3349#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3350DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3351#else
3352DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3353{
3354# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3355 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3356
3357# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3358 __asm__ __volatile__("lock; andq %1, %0\n\t"
3359 : "=m" (*pu64)
3360 : "r" (u64),
3361 "m" (*pu64));
3362# else
3363 for (;;)
3364 {
3365 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3366 uint64_t u64New = u64Old & u64;
3367 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3368 break;
3369 ASMNopPause();
3370 }
3371# endif
3372}
3373#endif
3374
3375
3376/**
3377 * Atomically And a signed 64-bit value, ordered.
3378 *
3379 * @param pi64 Pointer to the pointer variable to AND i64 with.
3380 * @param i64 The value to AND *pi64 with.
3381 *
3382 * @remarks x86: Requires a Pentium or later.
3383 */
3384DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3385{
3386 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3387}
3388
3389
3390/**
3391 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3392 *
3393 * @param pu32 Pointer to the pointer variable to OR u32 with.
3394 * @param u32 The value to OR *pu32 with.
3395 *
3396 * @remarks x86: Requires a 386 or later.
3397 */
3398#if RT_INLINE_ASM_EXTERNAL
3399DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3400#else
3401DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3402{
3403# if RT_INLINE_ASM_GNU_STYLE
3404 __asm__ __volatile__("orl %1, %0\n\t"
3405 : "=m" (*pu32)
3406 : "ir" (u32),
3407 "m" (*pu32));
3408# else
3409 __asm
3410 {
3411 mov eax, [u32]
3412# ifdef RT_ARCH_AMD64
3413 mov rdx, [pu32]
3414 or [rdx], eax
3415# else
3416 mov edx, [pu32]
3417 or [edx], eax
3418# endif
3419 }
3420# endif
3421}
3422#endif
3423
3424
3425/**
3426 * Atomically OR a signed 32-bit value, unordered.
3427 *
3428 * @param pi32 Pointer to the pointer variable to OR u32 with.
3429 * @param i32 The value to OR *pu32 with.
3430 *
3431 * @remarks x86: Requires a 386 or later.
3432 */
3433DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3434{
3435 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3436}
3437
3438
3439/**
3440 * Atomically OR an unsigned 64-bit value, unordered.
3441 *
3442 * @param pu64 Pointer to the pointer variable to OR u64 with.
3443 * @param u64 The value to OR *pu64 with.
3444 *
3445 * @remarks x86: Requires a Pentium or later.
3446 */
3447#if RT_INLINE_ASM_EXTERNAL
3448DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3449#else
3450DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3451{
3452# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3453 __asm__ __volatile__("orq %1, %q0\n\t"
3454 : "=m" (*pu64)
3455 : "r" (u64),
3456 "m" (*pu64));
3457# else
3458 for (;;)
3459 {
3460 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3461 uint64_t u64New = u64Old | u64;
3462 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3463 break;
3464 ASMNopPause();
3465 }
3466# endif
3467}
3468#endif
3469
3470
3471/**
3472 * Atomically Or a signed 64-bit value, unordered.
3473 *
3474 * @param pi64 Pointer to the pointer variable to OR u64 with.
3475 * @param i64 The value to OR *pu64 with.
3476 *
3477 * @remarks x86: Requires a Pentium or later.
3478 */
3479DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3480{
3481 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3482}
3483
3484
3485/**
3486 * Atomically And an unsigned 32-bit value, unordered.
3487 *
3488 * @param pu32 Pointer to the pointer variable to AND u32 with.
3489 * @param u32 The value to AND *pu32 with.
3490 *
3491 * @remarks x86: Requires a 386 or later.
3492 */
3493#if RT_INLINE_ASM_EXTERNAL
3494DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3495#else
3496DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3497{
3498# if RT_INLINE_ASM_GNU_STYLE
3499 __asm__ __volatile__("andl %1, %0\n\t"
3500 : "=m" (*pu32)
3501 : "ir" (u32),
3502 "m" (*pu32));
3503# else
3504 __asm
3505 {
3506 mov eax, [u32]
3507# ifdef RT_ARCH_AMD64
3508 mov rdx, [pu32]
3509 and [rdx], eax
3510# else
3511 mov edx, [pu32]
3512 and [edx], eax
3513# endif
3514 }
3515# endif
3516}
3517#endif
3518
3519
3520/**
3521 * Atomically And a signed 32-bit value, unordered.
3522 *
3523 * @param pi32 Pointer to the pointer variable to AND i32 with.
3524 * @param i32 The value to AND *pi32 with.
3525 *
3526 * @remarks x86: Requires a 386 or later.
3527 */
3528DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3529{
3530 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3531}
3532
3533
3534/**
3535 * Atomically And an unsigned 64-bit value, unordered.
3536 *
3537 * @param pu64 Pointer to the pointer variable to AND u64 with.
3538 * @param u64 The value to AND *pu64 with.
3539 *
3540 * @remarks x86: Requires a Pentium or later.
3541 */
3542#if RT_INLINE_ASM_EXTERNAL
3543DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3544#else
3545DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3546{
3547# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3548 __asm__ __volatile__("andq %1, %0\n\t"
3549 : "=m" (*pu64)
3550 : "r" (u64),
3551 "m" (*pu64));
3552# else
3553 for (;;)
3554 {
3555 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3556 uint64_t u64New = u64Old & u64;
3557 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3558 break;
3559 ASMNopPause();
3560 }
3561# endif
3562}
3563#endif
3564
3565
3566/**
3567 * Atomically And a signed 64-bit value, unordered.
3568 *
3569 * @param pi64 Pointer to the pointer variable to AND i64 with.
3570 * @param i64 The value to AND *pi64 with.
3571 *
3572 * @remarks x86: Requires a Pentium or later.
3573 */
3574DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3575{
3576 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3577}
3578
3579
3580/**
3581 * Atomically increment an unsigned 32-bit value, unordered.
3582 *
3583 * @returns the new value.
3584 * @param pu32 Pointer to the variable to increment.
3585 *
3586 * @remarks x86: Requires a 486 or later.
3587 */
3588#if RT_INLINE_ASM_EXTERNAL
3589DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3590#else
3591DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3592{
3593 uint32_t u32;
3594# if RT_INLINE_ASM_GNU_STYLE
3595 __asm__ __volatile__("xaddl %0, %1\n\t"
3596 : "=r" (u32),
3597 "=m" (*pu32)
3598 : "0" (1),
3599 "m" (*pu32)
3600 : "memory");
3601 return u32 + 1;
3602# else
3603 __asm
3604 {
3605 mov eax, 1
3606# ifdef RT_ARCH_AMD64
3607 mov rdx, [pu32]
3608 xadd [rdx], eax
3609# else
3610 mov edx, [pu32]
3611 xadd [edx], eax
3612# endif
3613 mov u32, eax
3614 }
3615 return u32 + 1;
3616# endif
3617}
3618#endif
3619
3620
3621/**
3622 * Atomically decrement an unsigned 32-bit value, unordered.
3623 *
3624 * @returns the new value.
3625 * @param pu32 Pointer to the variable to decrement.
3626 *
3627 * @remarks x86: Requires a 486 or later.
3628 */
3629#if RT_INLINE_ASM_EXTERNAL
3630DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3631#else
3632DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3633{
3634 uint32_t u32;
3635# if RT_INLINE_ASM_GNU_STYLE
3636 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3637 : "=r" (u32),
3638 "=m" (*pu32)
3639 : "0" (-1),
3640 "m" (*pu32)
3641 : "memory");
3642 return u32 - 1;
3643# else
3644 __asm
3645 {
3646 mov eax, -1
3647# ifdef RT_ARCH_AMD64
3648 mov rdx, [pu32]
3649 xadd [rdx], eax
3650# else
3651 mov edx, [pu32]
3652 xadd [edx], eax
3653# endif
3654 mov u32, eax
3655 }
3656 return u32 - 1;
3657# endif
3658}
3659#endif
3660
3661
3662/** @def RT_ASM_PAGE_SIZE
3663 * We try avoid dragging in iprt/param.h here.
3664 * @internal
3665 */
3666#if defined(RT_ARCH_SPARC64)
3667# define RT_ASM_PAGE_SIZE 0x2000
3668# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3669# if PAGE_SIZE != 0x2000
3670# error "PAGE_SIZE is not 0x2000!"
3671# endif
3672# endif
3673#else
3674# define RT_ASM_PAGE_SIZE 0x1000
3675# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3676# if PAGE_SIZE != 0x1000
3677# error "PAGE_SIZE is not 0x1000!"
3678# endif
3679# endif
3680#endif
3681
3682/**
3683 * Zeros a 4K memory page.
3684 *
3685 * @param pv Pointer to the memory block. This must be page aligned.
3686 */
3687#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3688DECLASM(void) ASMMemZeroPage(volatile void *pv);
3689# else
3690DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3691{
3692# if RT_INLINE_ASM_USES_INTRIN
3693# ifdef RT_ARCH_AMD64
3694 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3695# else
3696 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3697# endif
3698
3699# elif RT_INLINE_ASM_GNU_STYLE
3700 RTCCUINTREG uDummy;
3701# ifdef RT_ARCH_AMD64
3702 __asm__ __volatile__("rep stosq"
3703 : "=D" (pv),
3704 "=c" (uDummy)
3705 : "0" (pv),
3706 "c" (RT_ASM_PAGE_SIZE >> 3),
3707 "a" (0)
3708 : "memory");
3709# else
3710 __asm__ __volatile__("rep stosl"
3711 : "=D" (pv),
3712 "=c" (uDummy)
3713 : "0" (pv),
3714 "c" (RT_ASM_PAGE_SIZE >> 2),
3715 "a" (0)
3716 : "memory");
3717# endif
3718# else
3719 __asm
3720 {
3721# ifdef RT_ARCH_AMD64
3722 xor rax, rax
3723 mov ecx, 0200h
3724 mov rdi, [pv]
3725 rep stosq
3726# else
3727 xor eax, eax
3728 mov ecx, 0400h
3729 mov edi, [pv]
3730 rep stosd
3731# endif
3732 }
3733# endif
3734}
3735# endif
3736
3737
3738/**
3739 * Zeros a memory block with a 32-bit aligned size.
3740 *
3741 * @param pv Pointer to the memory block.
3742 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3743 */
3744#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3745DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3746#else
3747DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3748{
3749# if RT_INLINE_ASM_USES_INTRIN
3750# ifdef RT_ARCH_AMD64
3751 if (!(cb & 7))
3752 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3753 else
3754# endif
3755 __stosd((unsigned long *)pv, 0, cb / 4);
3756
3757# elif RT_INLINE_ASM_GNU_STYLE
3758 __asm__ __volatile__("rep stosl"
3759 : "=D" (pv),
3760 "=c" (cb)
3761 : "0" (pv),
3762 "1" (cb >> 2),
3763 "a" (0)
3764 : "memory");
3765# else
3766 __asm
3767 {
3768 xor eax, eax
3769# ifdef RT_ARCH_AMD64
3770 mov rcx, [cb]
3771 shr rcx, 2
3772 mov rdi, [pv]
3773# else
3774 mov ecx, [cb]
3775 shr ecx, 2
3776 mov edi, [pv]
3777# endif
3778 rep stosd
3779 }
3780# endif
3781}
3782#endif
3783
3784
3785/**
3786 * Fills a memory block with a 32-bit aligned size.
3787 *
3788 * @param pv Pointer to the memory block.
3789 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3790 * @param u32 The value to fill with.
3791 */
3792#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3793DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3794#else
3795DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3796{
3797# if RT_INLINE_ASM_USES_INTRIN
3798# ifdef RT_ARCH_AMD64
3799 if (!(cb & 7))
3800 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3801 else
3802# endif
3803 __stosd((unsigned long *)pv, u32, cb / 4);
3804
3805# elif RT_INLINE_ASM_GNU_STYLE
3806 __asm__ __volatile__("rep stosl"
3807 : "=D" (pv),
3808 "=c" (cb)
3809 : "0" (pv),
3810 "1" (cb >> 2),
3811 "a" (u32)
3812 : "memory");
3813# else
3814 __asm
3815 {
3816# ifdef RT_ARCH_AMD64
3817 mov rcx, [cb]
3818 shr rcx, 2
3819 mov rdi, [pv]
3820# else
3821 mov ecx, [cb]
3822 shr ecx, 2
3823 mov edi, [pv]
3824# endif
3825 mov eax, [u32]
3826 rep stosd
3827 }
3828# endif
3829}
3830#endif
3831
3832
3833/**
3834 * Checks if a memory block is all zeros.
3835 *
3836 * @returns Pointer to the first non-zero byte.
3837 * @returns NULL if all zero.
3838 *
3839 * @param pv Pointer to the memory block.
3840 * @param cb Number of bytes in the block.
3841 *
3842 * @todo Fix name, it is a predicate function but it's not returning boolean!
3843 */
3844#if !defined(RT_OS_LINUX) || !defined(__KERNEL__)
3845DECLASM(void *) ASMMemFirstNonZero(void const *pv, size_t cb);
3846#else
3847DECLINLINE(void *) ASMMemFirstNonZero(void const *pv, size_t cb)
3848{
3849 uint8_t const *pb = (uint8_t const *)pv;
3850 for (; cb; cb--, pb++)
3851 if (RT_LIKELY(*pb == 0))
3852 { /* likely */ }
3853 else
3854 return (void *)pb;
3855 return NULL;
3856}
3857#endif
3858
3859
3860/**
3861 * Checks if a memory block is all zeros.
3862 *
3863 * @returns true if zero, false if not.
3864 *
3865 * @param pv Pointer to the memory block.
3866 * @param cb Number of bytes in the block.
3867 *
3868 * @sa ASMMemFirstNonZero
3869 */
3870DECLINLINE(bool) ASMMemIsZero(void const *pv, size_t cb)
3871{
3872 return ASMMemFirstNonZero(pv, cb) == NULL;
3873}
3874
3875
3876/**
3877 * Checks if a memory page is all zeros.
3878 *
3879 * @returns true / false.
3880 *
3881 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3882 * boundary
3883 */
3884DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3885{
3886# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3887 union { RTCCUINTREG r; bool f; } uAX;
3888 RTCCUINTREG xCX, xDI;
3889 Assert(!((uintptr_t)pvPage & 15));
3890 __asm__ __volatile__("repe; "
3891# ifdef RT_ARCH_AMD64
3892 "scasq\n\t"
3893# else
3894 "scasl\n\t"
3895# endif
3896 "setnc %%al\n\t"
3897 : "=&c" (xCX),
3898 "=&D" (xDI),
3899 "=&a" (uAX.r)
3900 : "mr" (pvPage),
3901# ifdef RT_ARCH_AMD64
3902 "0" (RT_ASM_PAGE_SIZE/8),
3903# else
3904 "0" (RT_ASM_PAGE_SIZE/4),
3905# endif
3906 "1" (pvPage),
3907 "2" (0));
3908 return uAX.f;
3909# else
3910 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3911 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3912 Assert(!((uintptr_t)pvPage & 15));
3913 for (;;)
3914 {
3915 if (puPtr[0]) return false;
3916 if (puPtr[4]) return false;
3917
3918 if (puPtr[2]) return false;
3919 if (puPtr[6]) return false;
3920
3921 if (puPtr[1]) return false;
3922 if (puPtr[5]) return false;
3923
3924 if (puPtr[3]) return false;
3925 if (puPtr[7]) return false;
3926
3927 if (!--cLeft)
3928 return true;
3929 puPtr += 8;
3930 }
3931 return true;
3932# endif
3933}
3934
3935
3936/**
3937 * Checks if a memory block is filled with the specified byte, returning the
3938 * first mismatch.
3939 *
3940 * This is sort of an inverted memchr.
3941 *
3942 * @returns Pointer to the byte which doesn't equal u8.
3943 * @returns NULL if all equal to u8.
3944 *
3945 * @param pv Pointer to the memory block.
3946 * @param cb Number of bytes in the block.
3947 * @param u8 The value it's supposed to be filled with.
3948 *
3949 * @remarks No alignment requirements.
3950 */
3951#if !defined(RT_OS_LINUX) || !defined(__KERNEL__)
3952DECLASM(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8);
3953#else
3954DECLINLINE(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8)
3955{
3956 uint8_t const *pb = (uint8_t const *)pv;
3957 for (; cb; cb--, pb++)
3958 if (RT_LIKELY(*pb == u8))
3959 { /* likely */ }
3960 else
3961 return (void *)pb;
3962 return NULL;
3963}
3964#endif
3965
3966
3967/**
3968 * Checks if a memory block is filled with the specified byte.
3969 *
3970 * @returns true if all matching, false if not.
3971 *
3972 * @param pv Pointer to the memory block.
3973 * @param cb Number of bytes in the block.
3974 * @param u8 The value it's supposed to be filled with.
3975 *
3976 * @remarks No alignment requirements.
3977 */
3978DECLINLINE(bool) ASMMemIsAllU8(void const *pv, size_t cb, uint8_t u8)
3979{
3980 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
3981}
3982
3983
3984/**
3985 * Checks if a memory block is filled with the specified 32-bit value.
3986 *
3987 * This is a sort of inverted memchr.
3988 *
3989 * @returns Pointer to the first value which doesn't equal u32.
3990 * @returns NULL if all equal to u32.
3991 *
3992 * @param pv Pointer to the memory block.
3993 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3994 * @param u32 The value it's supposed to be filled with.
3995 */
3996DECLINLINE(uint32_t *) ASMMemFirstMismatchingU32(void const *pv, size_t cb, uint32_t u32)
3997{
3998/** @todo rewrite this in inline assembly? */
3999 uint32_t const *pu32 = (uint32_t const *)pv;
4000 for (; cb; cb -= 4, pu32++)
4001 if (RT_LIKELY(*pu32 == u32))
4002 { /* likely */ }
4003 else
4004 return (uint32_t *)pu32;
4005 return NULL;
4006}
4007
4008
4009/**
4010 * Probes a byte pointer for read access.
4011 *
4012 * While the function will not fault if the byte is not read accessible,
4013 * the idea is to do this in a safe place like before acquiring locks
4014 * and such like.
4015 *
4016 * Also, this functions guarantees that an eager compiler is not going
4017 * to optimize the probing away.
4018 *
4019 * @param pvByte Pointer to the byte.
4020 */
4021#if RT_INLINE_ASM_EXTERNAL
4022DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4023#else
4024DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4025{
4026 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4027 uint8_t u8;
4028# if RT_INLINE_ASM_GNU_STYLE
4029 __asm__ __volatile__("movb (%1), %0\n\t"
4030 : "=r" (u8)
4031 : "r" (pvByte));
4032# else
4033 __asm
4034 {
4035# ifdef RT_ARCH_AMD64
4036 mov rax, [pvByte]
4037 mov al, [rax]
4038# else
4039 mov eax, [pvByte]
4040 mov al, [eax]
4041# endif
4042 mov [u8], al
4043 }
4044# endif
4045 return u8;
4046}
4047#endif
4048
4049/**
4050 * Probes a buffer for read access page by page.
4051 *
4052 * While the function will fault if the buffer is not fully read
4053 * accessible, the idea is to do this in a safe place like before
4054 * acquiring locks and such like.
4055 *
4056 * Also, this functions guarantees that an eager compiler is not going
4057 * to optimize the probing away.
4058 *
4059 * @param pvBuf Pointer to the buffer.
4060 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4061 */
4062DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4063{
4064 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4065 /* the first byte */
4066 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4067 ASMProbeReadByte(pu8);
4068
4069 /* the pages in between pages. */
4070 while (cbBuf > RT_ASM_PAGE_SIZE)
4071 {
4072 ASMProbeReadByte(pu8);
4073 cbBuf -= RT_ASM_PAGE_SIZE;
4074 pu8 += RT_ASM_PAGE_SIZE;
4075 }
4076
4077 /* the last byte */
4078 ASMProbeReadByte(pu8 + cbBuf - 1);
4079}
4080
4081
4082
4083/** @defgroup grp_inline_bits Bit Operations
4084 * @{
4085 */
4086
4087
4088/**
4089 * Sets a bit in a bitmap.
4090 *
4091 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4092 * @param iBit The bit to set.
4093 *
4094 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4095 * However, doing so will yield better performance as well as avoiding
4096 * traps accessing the last bits in the bitmap.
4097 */
4098#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4099DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4100#else
4101DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4102{
4103# if RT_INLINE_ASM_USES_INTRIN
4104 _bittestandset((long *)pvBitmap, iBit);
4105
4106# elif RT_INLINE_ASM_GNU_STYLE
4107 __asm__ __volatile__("btsl %1, %0"
4108 : "=m" (*(volatile long *)pvBitmap)
4109 : "Ir" (iBit),
4110 "m" (*(volatile long *)pvBitmap)
4111 : "memory");
4112# else
4113 __asm
4114 {
4115# ifdef RT_ARCH_AMD64
4116 mov rax, [pvBitmap]
4117 mov edx, [iBit]
4118 bts [rax], edx
4119# else
4120 mov eax, [pvBitmap]
4121 mov edx, [iBit]
4122 bts [eax], edx
4123# endif
4124 }
4125# endif
4126}
4127#endif
4128
4129
4130/**
4131 * Atomically sets a bit in a bitmap, ordered.
4132 *
4133 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4134 * the memory access isn't atomic!
4135 * @param iBit The bit to set.
4136 *
4137 * @remarks x86: Requires a 386 or later.
4138 */
4139#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4140DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4141#else
4142DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4143{
4144 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4145# if RT_INLINE_ASM_USES_INTRIN
4146 _interlockedbittestandset((long *)pvBitmap, iBit);
4147# elif RT_INLINE_ASM_GNU_STYLE
4148 __asm__ __volatile__("lock; btsl %1, %0"
4149 : "=m" (*(volatile long *)pvBitmap)
4150 : "Ir" (iBit),
4151 "m" (*(volatile long *)pvBitmap)
4152 : "memory");
4153# else
4154 __asm
4155 {
4156# ifdef RT_ARCH_AMD64
4157 mov rax, [pvBitmap]
4158 mov edx, [iBit]
4159 lock bts [rax], edx
4160# else
4161 mov eax, [pvBitmap]
4162 mov edx, [iBit]
4163 lock bts [eax], edx
4164# endif
4165 }
4166# endif
4167}
4168#endif
4169
4170
4171/**
4172 * Clears a bit in a bitmap.
4173 *
4174 * @param pvBitmap Pointer to the bitmap.
4175 * @param iBit The bit to clear.
4176 *
4177 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4178 * However, doing so will yield better performance as well as avoiding
4179 * traps accessing the last bits in the bitmap.
4180 */
4181#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4182DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4183#else
4184DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4185{
4186# if RT_INLINE_ASM_USES_INTRIN
4187 _bittestandreset((long *)pvBitmap, iBit);
4188
4189# elif RT_INLINE_ASM_GNU_STYLE
4190 __asm__ __volatile__("btrl %1, %0"
4191 : "=m" (*(volatile long *)pvBitmap)
4192 : "Ir" (iBit),
4193 "m" (*(volatile long *)pvBitmap)
4194 : "memory");
4195# else
4196 __asm
4197 {
4198# ifdef RT_ARCH_AMD64
4199 mov rax, [pvBitmap]
4200 mov edx, [iBit]
4201 btr [rax], edx
4202# else
4203 mov eax, [pvBitmap]
4204 mov edx, [iBit]
4205 btr [eax], edx
4206# endif
4207 }
4208# endif
4209}
4210#endif
4211
4212
4213/**
4214 * Atomically clears a bit in a bitmap, ordered.
4215 *
4216 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4217 * the memory access isn't atomic!
4218 * @param iBit The bit to toggle set.
4219 *
4220 * @remarks No memory barrier, take care on smp.
4221 * @remarks x86: Requires a 386 or later.
4222 */
4223#if RT_INLINE_ASM_EXTERNAL
4224DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4225#else
4226DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4227{
4228 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4229# if RT_INLINE_ASM_GNU_STYLE
4230 __asm__ __volatile__("lock; btrl %1, %0"
4231 : "=m" (*(volatile long *)pvBitmap)
4232 : "Ir" (iBit),
4233 "m" (*(volatile long *)pvBitmap)
4234 : "memory");
4235# else
4236 __asm
4237 {
4238# ifdef RT_ARCH_AMD64
4239 mov rax, [pvBitmap]
4240 mov edx, [iBit]
4241 lock btr [rax], edx
4242# else
4243 mov eax, [pvBitmap]
4244 mov edx, [iBit]
4245 lock btr [eax], edx
4246# endif
4247 }
4248# endif
4249}
4250#endif
4251
4252
4253/**
4254 * Toggles a bit in a bitmap.
4255 *
4256 * @param pvBitmap Pointer to the bitmap.
4257 * @param iBit The bit to toggle.
4258 *
4259 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4260 * However, doing so will yield better performance as well as avoiding
4261 * traps accessing the last bits in the bitmap.
4262 */
4263#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4264DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4265#else
4266DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4267{
4268# if RT_INLINE_ASM_USES_INTRIN
4269 _bittestandcomplement((long *)pvBitmap, iBit);
4270# elif RT_INLINE_ASM_GNU_STYLE
4271 __asm__ __volatile__("btcl %1, %0"
4272 : "=m" (*(volatile long *)pvBitmap)
4273 : "Ir" (iBit),
4274 "m" (*(volatile long *)pvBitmap)
4275 : "memory");
4276# else
4277 __asm
4278 {
4279# ifdef RT_ARCH_AMD64
4280 mov rax, [pvBitmap]
4281 mov edx, [iBit]
4282 btc [rax], edx
4283# else
4284 mov eax, [pvBitmap]
4285 mov edx, [iBit]
4286 btc [eax], edx
4287# endif
4288 }
4289# endif
4290}
4291#endif
4292
4293
4294/**
4295 * Atomically toggles a bit in a bitmap, ordered.
4296 *
4297 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4298 * the memory access isn't atomic!
4299 * @param iBit The bit to test and set.
4300 *
4301 * @remarks x86: Requires a 386 or later.
4302 */
4303#if RT_INLINE_ASM_EXTERNAL
4304DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4305#else
4306DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4307{
4308 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4309# if RT_INLINE_ASM_GNU_STYLE
4310 __asm__ __volatile__("lock; btcl %1, %0"
4311 : "=m" (*(volatile long *)pvBitmap)
4312 : "Ir" (iBit),
4313 "m" (*(volatile long *)pvBitmap)
4314 : "memory");
4315# else
4316 __asm
4317 {
4318# ifdef RT_ARCH_AMD64
4319 mov rax, [pvBitmap]
4320 mov edx, [iBit]
4321 lock btc [rax], edx
4322# else
4323 mov eax, [pvBitmap]
4324 mov edx, [iBit]
4325 lock btc [eax], edx
4326# endif
4327 }
4328# endif
4329}
4330#endif
4331
4332
4333/**
4334 * Tests and sets a bit in a bitmap.
4335 *
4336 * @returns true if the bit was set.
4337 * @returns false if the bit was clear.
4338 *
4339 * @param pvBitmap Pointer to the bitmap.
4340 * @param iBit The bit to test and set.
4341 *
4342 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4343 * However, doing so will yield better performance as well as avoiding
4344 * traps accessing the last bits in the bitmap.
4345 */
4346#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4347DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4348#else
4349DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4350{
4351 union { bool f; uint32_t u32; uint8_t u8; } rc;
4352# if RT_INLINE_ASM_USES_INTRIN
4353 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4354
4355# elif RT_INLINE_ASM_GNU_STYLE
4356 __asm__ __volatile__("btsl %2, %1\n\t"
4357 "setc %b0\n\t"
4358 "andl $1, %0\n\t"
4359 : "=q" (rc.u32),
4360 "=m" (*(volatile long *)pvBitmap)
4361 : "Ir" (iBit),
4362 "m" (*(volatile long *)pvBitmap)
4363 : "memory");
4364# else
4365 __asm
4366 {
4367 mov edx, [iBit]
4368# ifdef RT_ARCH_AMD64
4369 mov rax, [pvBitmap]
4370 bts [rax], edx
4371# else
4372 mov eax, [pvBitmap]
4373 bts [eax], edx
4374# endif
4375 setc al
4376 and eax, 1
4377 mov [rc.u32], eax
4378 }
4379# endif
4380 return rc.f;
4381}
4382#endif
4383
4384
4385/**
4386 * Atomically tests and sets a bit in a bitmap, ordered.
4387 *
4388 * @returns true if the bit was set.
4389 * @returns false if the bit was clear.
4390 *
4391 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4392 * the memory access isn't atomic!
4393 * @param iBit The bit to set.
4394 *
4395 * @remarks x86: Requires a 386 or later.
4396 */
4397#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4398DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4399#else
4400DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4401{
4402 union { bool f; uint32_t u32; uint8_t u8; } rc;
4403 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4404# if RT_INLINE_ASM_USES_INTRIN
4405 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4406# elif RT_INLINE_ASM_GNU_STYLE
4407 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4408 "setc %b0\n\t"
4409 "andl $1, %0\n\t"
4410 : "=q" (rc.u32),
4411 "=m" (*(volatile long *)pvBitmap)
4412 : "Ir" (iBit),
4413 "m" (*(volatile long *)pvBitmap)
4414 : "memory");
4415# else
4416 __asm
4417 {
4418 mov edx, [iBit]
4419# ifdef RT_ARCH_AMD64
4420 mov rax, [pvBitmap]
4421 lock bts [rax], edx
4422# else
4423 mov eax, [pvBitmap]
4424 lock bts [eax], edx
4425# endif
4426 setc al
4427 and eax, 1
4428 mov [rc.u32], eax
4429 }
4430# endif
4431 return rc.f;
4432}
4433#endif
4434
4435
4436/**
4437 * Tests and clears a bit in a bitmap.
4438 *
4439 * @returns true if the bit was set.
4440 * @returns false if the bit was clear.
4441 *
4442 * @param pvBitmap Pointer to the bitmap.
4443 * @param iBit The bit to test and clear.
4444 *
4445 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4446 * However, doing so will yield better performance as well as avoiding
4447 * traps accessing the last bits in the bitmap.
4448 */
4449#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4450DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4451#else
4452DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4453{
4454 union { bool f; uint32_t u32; uint8_t u8; } rc;
4455# if RT_INLINE_ASM_USES_INTRIN
4456 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4457
4458# elif RT_INLINE_ASM_GNU_STYLE
4459 __asm__ __volatile__("btrl %2, %1\n\t"
4460 "setc %b0\n\t"
4461 "andl $1, %0\n\t"
4462 : "=q" (rc.u32),
4463 "=m" (*(volatile long *)pvBitmap)
4464 : "Ir" (iBit),
4465 "m" (*(volatile long *)pvBitmap)
4466 : "memory");
4467# else
4468 __asm
4469 {
4470 mov edx, [iBit]
4471# ifdef RT_ARCH_AMD64
4472 mov rax, [pvBitmap]
4473 btr [rax], edx
4474# else
4475 mov eax, [pvBitmap]
4476 btr [eax], edx
4477# endif
4478 setc al
4479 and eax, 1
4480 mov [rc.u32], eax
4481 }
4482# endif
4483 return rc.f;
4484}
4485#endif
4486
4487
4488/**
4489 * Atomically tests and clears a bit in a bitmap, ordered.
4490 *
4491 * @returns true if the bit was set.
4492 * @returns false if the bit was clear.
4493 *
4494 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4495 * the memory access isn't atomic!
4496 * @param iBit The bit to test and clear.
4497 *
4498 * @remarks No memory barrier, take care on smp.
4499 * @remarks x86: Requires a 386 or later.
4500 */
4501#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4502DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4503#else
4504DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4505{
4506 union { bool f; uint32_t u32; uint8_t u8; } rc;
4507 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4508# if RT_INLINE_ASM_USES_INTRIN
4509 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4510
4511# elif RT_INLINE_ASM_GNU_STYLE
4512 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4513 "setc %b0\n\t"
4514 "andl $1, %0\n\t"
4515 : "=q" (rc.u32),
4516 "=m" (*(volatile long *)pvBitmap)
4517 : "Ir" (iBit),
4518 "m" (*(volatile long *)pvBitmap)
4519 : "memory");
4520# else
4521 __asm
4522 {
4523 mov edx, [iBit]
4524# ifdef RT_ARCH_AMD64
4525 mov rax, [pvBitmap]
4526 lock btr [rax], edx
4527# else
4528 mov eax, [pvBitmap]
4529 lock btr [eax], edx
4530# endif
4531 setc al
4532 and eax, 1
4533 mov [rc.u32], eax
4534 }
4535# endif
4536 return rc.f;
4537}
4538#endif
4539
4540
4541/**
4542 * Tests and toggles a bit in a bitmap.
4543 *
4544 * @returns true if the bit was set.
4545 * @returns false if the bit was clear.
4546 *
4547 * @param pvBitmap Pointer to the bitmap.
4548 * @param iBit The bit to test and toggle.
4549 *
4550 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4551 * However, doing so will yield better performance as well as avoiding
4552 * traps accessing the last bits in the bitmap.
4553 */
4554#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4555DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4556#else
4557DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4558{
4559 union { bool f; uint32_t u32; uint8_t u8; } rc;
4560# if RT_INLINE_ASM_USES_INTRIN
4561 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4562
4563# elif RT_INLINE_ASM_GNU_STYLE
4564 __asm__ __volatile__("btcl %2, %1\n\t"
4565 "setc %b0\n\t"
4566 "andl $1, %0\n\t"
4567 : "=q" (rc.u32),
4568 "=m" (*(volatile long *)pvBitmap)
4569 : "Ir" (iBit),
4570 "m" (*(volatile long *)pvBitmap)
4571 : "memory");
4572# else
4573 __asm
4574 {
4575 mov edx, [iBit]
4576# ifdef RT_ARCH_AMD64
4577 mov rax, [pvBitmap]
4578 btc [rax], edx
4579# else
4580 mov eax, [pvBitmap]
4581 btc [eax], edx
4582# endif
4583 setc al
4584 and eax, 1
4585 mov [rc.u32], eax
4586 }
4587# endif
4588 return rc.f;
4589}
4590#endif
4591
4592
4593/**
4594 * Atomically tests and toggles a bit in a bitmap, ordered.
4595 *
4596 * @returns true if the bit was set.
4597 * @returns false if the bit was clear.
4598 *
4599 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4600 * the memory access isn't atomic!
4601 * @param iBit The bit to test and toggle.
4602 *
4603 * @remarks x86: Requires a 386 or later.
4604 */
4605#if RT_INLINE_ASM_EXTERNAL
4606DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4607#else
4608DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4609{
4610 union { bool f; uint32_t u32; uint8_t u8; } rc;
4611 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4612# if RT_INLINE_ASM_GNU_STYLE
4613 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4614 "setc %b0\n\t"
4615 "andl $1, %0\n\t"
4616 : "=q" (rc.u32),
4617 "=m" (*(volatile long *)pvBitmap)
4618 : "Ir" (iBit),
4619 "m" (*(volatile long *)pvBitmap)
4620 : "memory");
4621# else
4622 __asm
4623 {
4624 mov edx, [iBit]
4625# ifdef RT_ARCH_AMD64
4626 mov rax, [pvBitmap]
4627 lock btc [rax], edx
4628# else
4629 mov eax, [pvBitmap]
4630 lock btc [eax], edx
4631# endif
4632 setc al
4633 and eax, 1
4634 mov [rc.u32], eax
4635 }
4636# endif
4637 return rc.f;
4638}
4639#endif
4640
4641
4642/**
4643 * Tests if a bit in a bitmap is set.
4644 *
4645 * @returns true if the bit is set.
4646 * @returns false if the bit is clear.
4647 *
4648 * @param pvBitmap Pointer to the bitmap.
4649 * @param iBit The bit to test.
4650 *
4651 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4652 * However, doing so will yield better performance as well as avoiding
4653 * traps accessing the last bits in the bitmap.
4654 */
4655#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4656DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4657#else
4658DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4659{
4660 union { bool f; uint32_t u32; uint8_t u8; } rc;
4661# if RT_INLINE_ASM_USES_INTRIN
4662 rc.u32 = _bittest((long *)pvBitmap, iBit);
4663# elif RT_INLINE_ASM_GNU_STYLE
4664
4665 __asm__ __volatile__("btl %2, %1\n\t"
4666 "setc %b0\n\t"
4667 "andl $1, %0\n\t"
4668 : "=q" (rc.u32)
4669 : "m" (*(const volatile long *)pvBitmap),
4670 "Ir" (iBit)
4671 : "memory");
4672# else
4673 __asm
4674 {
4675 mov edx, [iBit]
4676# ifdef RT_ARCH_AMD64
4677 mov rax, [pvBitmap]
4678 bt [rax], edx
4679# else
4680 mov eax, [pvBitmap]
4681 bt [eax], edx
4682# endif
4683 setc al
4684 and eax, 1
4685 mov [rc.u32], eax
4686 }
4687# endif
4688 return rc.f;
4689}
4690#endif
4691
4692
4693/**
4694 * Clears a bit range within a bitmap.
4695 *
4696 * @param pvBitmap Pointer to the bitmap.
4697 * @param iBitStart The First bit to clear.
4698 * @param iBitEnd The first bit not to clear.
4699 */
4700DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4701{
4702 if (iBitStart < iBitEnd)
4703 {
4704 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4705 int32_t iStart = iBitStart & ~31;
4706 int32_t iEnd = iBitEnd & ~31;
4707 if (iStart == iEnd)
4708 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4709 else
4710 {
4711 /* bits in first dword. */
4712 if (iBitStart & 31)
4713 {
4714 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4715 pu32++;
4716 iBitStart = iStart + 32;
4717 }
4718
4719 /* whole dword. */
4720 if (iBitStart != iEnd)
4721 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4722
4723 /* bits in last dword. */
4724 if (iBitEnd & 31)
4725 {
4726 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4727 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4728 }
4729 }
4730 }
4731}
4732
4733
4734/**
4735 * Sets a bit range within a bitmap.
4736 *
4737 * @param pvBitmap Pointer to the bitmap.
4738 * @param iBitStart The First bit to set.
4739 * @param iBitEnd The first bit not to set.
4740 */
4741DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4742{
4743 if (iBitStart < iBitEnd)
4744 {
4745 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4746 int32_t iStart = iBitStart & ~31;
4747 int32_t iEnd = iBitEnd & ~31;
4748 if (iStart == iEnd)
4749 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4750 else
4751 {
4752 /* bits in first dword. */
4753 if (iBitStart & 31)
4754 {
4755 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4756 pu32++;
4757 iBitStart = iStart + 32;
4758 }
4759
4760 /* whole dword. */
4761 if (iBitStart != iEnd)
4762 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4763
4764 /* bits in last dword. */
4765 if (iBitEnd & 31)
4766 {
4767 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4768 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4769 }
4770 }
4771 }
4772}
4773
4774
4775/**
4776 * Finds the first clear bit in a bitmap.
4777 *
4778 * @returns Index of the first zero bit.
4779 * @returns -1 if no clear bit was found.
4780 * @param pvBitmap Pointer to the bitmap.
4781 * @param cBits The number of bits in the bitmap. Multiple of 32.
4782 */
4783#if RT_INLINE_ASM_EXTERNAL
4784DECLASM(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4785#else
4786DECLINLINE(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4787{
4788 if (cBits)
4789 {
4790 int32_t iBit;
4791# if RT_INLINE_ASM_GNU_STYLE
4792 RTCCUINTREG uEAX, uECX, uEDI;
4793 cBits = RT_ALIGN_32(cBits, 32);
4794 __asm__ __volatile__("repe; scasl\n\t"
4795 "je 1f\n\t"
4796# ifdef RT_ARCH_AMD64
4797 "lea -4(%%rdi), %%rdi\n\t"
4798 "xorl (%%rdi), %%eax\n\t"
4799 "subq %5, %%rdi\n\t"
4800# else
4801 "lea -4(%%edi), %%edi\n\t"
4802 "xorl (%%edi), %%eax\n\t"
4803 "subl %5, %%edi\n\t"
4804# endif
4805 "shll $3, %%edi\n\t"
4806 "bsfl %%eax, %%edx\n\t"
4807 "addl %%edi, %%edx\n\t"
4808 "1:\t\n"
4809 : "=d" (iBit),
4810 "=&c" (uECX),
4811 "=&D" (uEDI),
4812 "=&a" (uEAX)
4813 : "0" (0xffffffff),
4814 "mr" (pvBitmap),
4815 "1" (cBits >> 5),
4816 "2" (pvBitmap),
4817 "3" (0xffffffff));
4818# else
4819 cBits = RT_ALIGN_32(cBits, 32);
4820 __asm
4821 {
4822# ifdef RT_ARCH_AMD64
4823 mov rdi, [pvBitmap]
4824 mov rbx, rdi
4825# else
4826 mov edi, [pvBitmap]
4827 mov ebx, edi
4828# endif
4829 mov edx, 0ffffffffh
4830 mov eax, edx
4831 mov ecx, [cBits]
4832 shr ecx, 5
4833 repe scasd
4834 je done
4835
4836# ifdef RT_ARCH_AMD64
4837 lea rdi, [rdi - 4]
4838 xor eax, [rdi]
4839 sub rdi, rbx
4840# else
4841 lea edi, [edi - 4]
4842 xor eax, [edi]
4843 sub edi, ebx
4844# endif
4845 shl edi, 3
4846 bsf edx, eax
4847 add edx, edi
4848 done:
4849 mov [iBit], edx
4850 }
4851# endif
4852 return iBit;
4853 }
4854 return -1;
4855}
4856#endif
4857
4858
4859/**
4860 * Finds the next clear bit in a bitmap.
4861 *
4862 * @returns Index of the first zero bit.
4863 * @returns -1 if no clear bit was found.
4864 * @param pvBitmap Pointer to the bitmap.
4865 * @param cBits The number of bits in the bitmap. Multiple of 32.
4866 * @param iBitPrev The bit returned from the last search.
4867 * The search will start at iBitPrev + 1.
4868 */
4869#if RT_INLINE_ASM_EXTERNAL
4870DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4871#else
4872DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4873{
4874 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4875 int iBit = ++iBitPrev & 31;
4876 if (iBit)
4877 {
4878 /*
4879 * Inspect the 32-bit word containing the unaligned bit.
4880 */
4881 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4882
4883# if RT_INLINE_ASM_USES_INTRIN
4884 unsigned long ulBit = 0;
4885 if (_BitScanForward(&ulBit, u32))
4886 return ulBit + iBitPrev;
4887# else
4888# if RT_INLINE_ASM_GNU_STYLE
4889 __asm__ __volatile__("bsf %1, %0\n\t"
4890 "jnz 1f\n\t"
4891 "movl $-1, %0\n\t"
4892 "1:\n\t"
4893 : "=r" (iBit)
4894 : "r" (u32));
4895# else
4896 __asm
4897 {
4898 mov edx, [u32]
4899 bsf eax, edx
4900 jnz done
4901 mov eax, 0ffffffffh
4902 done:
4903 mov [iBit], eax
4904 }
4905# endif
4906 if (iBit >= 0)
4907 return iBit + iBitPrev;
4908# endif
4909
4910 /*
4911 * Skip ahead and see if there is anything left to search.
4912 */
4913 iBitPrev |= 31;
4914 iBitPrev++;
4915 if (cBits <= (uint32_t)iBitPrev)
4916 return -1;
4917 }
4918
4919 /*
4920 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4921 */
4922 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4923 if (iBit >= 0)
4924 iBit += iBitPrev;
4925 return iBit;
4926}
4927#endif
4928
4929
4930/**
4931 * Finds the first set bit in a bitmap.
4932 *
4933 * @returns Index of the first set bit.
4934 * @returns -1 if no clear bit was found.
4935 * @param pvBitmap Pointer to the bitmap.
4936 * @param cBits The number of bits in the bitmap. Multiple of 32.
4937 */
4938#if RT_INLINE_ASM_EXTERNAL
4939DECLASM(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4940#else
4941DECLINLINE(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4942{
4943 if (cBits)
4944 {
4945 int32_t iBit;
4946# if RT_INLINE_ASM_GNU_STYLE
4947 RTCCUINTREG uEAX, uECX, uEDI;
4948 cBits = RT_ALIGN_32(cBits, 32);
4949 __asm__ __volatile__("repe; scasl\n\t"
4950 "je 1f\n\t"
4951# ifdef RT_ARCH_AMD64
4952 "lea -4(%%rdi), %%rdi\n\t"
4953 "movl (%%rdi), %%eax\n\t"
4954 "subq %5, %%rdi\n\t"
4955# else
4956 "lea -4(%%edi), %%edi\n\t"
4957 "movl (%%edi), %%eax\n\t"
4958 "subl %5, %%edi\n\t"
4959# endif
4960 "shll $3, %%edi\n\t"
4961 "bsfl %%eax, %%edx\n\t"
4962 "addl %%edi, %%edx\n\t"
4963 "1:\t\n"
4964 : "=d" (iBit),
4965 "=&c" (uECX),
4966 "=&D" (uEDI),
4967 "=&a" (uEAX)
4968 : "0" (0xffffffff),
4969 "mr" (pvBitmap),
4970 "1" (cBits >> 5),
4971 "2" (pvBitmap),
4972 "3" (0));
4973# else
4974 cBits = RT_ALIGN_32(cBits, 32);
4975 __asm
4976 {
4977# ifdef RT_ARCH_AMD64
4978 mov rdi, [pvBitmap]
4979 mov rbx, rdi
4980# else
4981 mov edi, [pvBitmap]
4982 mov ebx, edi
4983# endif
4984 mov edx, 0ffffffffh
4985 xor eax, eax
4986 mov ecx, [cBits]
4987 shr ecx, 5
4988 repe scasd
4989 je done
4990# ifdef RT_ARCH_AMD64
4991 lea rdi, [rdi - 4]
4992 mov eax, [rdi]
4993 sub rdi, rbx
4994# else
4995 lea edi, [edi - 4]
4996 mov eax, [edi]
4997 sub edi, ebx
4998# endif
4999 shl edi, 3
5000 bsf edx, eax
5001 add edx, edi
5002 done:
5003 mov [iBit], edx
5004 }
5005# endif
5006 return iBit;
5007 }
5008 return -1;
5009}
5010#endif
5011
5012
5013/**
5014 * Finds the next set bit in a bitmap.
5015 *
5016 * @returns Index of the next set bit.
5017 * @returns -1 if no set bit was found.
5018 * @param pvBitmap Pointer to the bitmap.
5019 * @param cBits The number of bits in the bitmap. Multiple of 32.
5020 * @param iBitPrev The bit returned from the last search.
5021 * The search will start at iBitPrev + 1.
5022 */
5023#if RT_INLINE_ASM_EXTERNAL
5024DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5025#else
5026DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5027{
5028 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
5029 int iBit = ++iBitPrev & 31;
5030 if (iBit)
5031 {
5032 /*
5033 * Inspect the 32-bit word containing the unaligned bit.
5034 */
5035 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5036
5037# if RT_INLINE_ASM_USES_INTRIN
5038 unsigned long ulBit = 0;
5039 if (_BitScanForward(&ulBit, u32))
5040 return ulBit + iBitPrev;
5041# else
5042# if RT_INLINE_ASM_GNU_STYLE
5043 __asm__ __volatile__("bsf %1, %0\n\t"
5044 "jnz 1f\n\t"
5045 "movl $-1, %0\n\t"
5046 "1:\n\t"
5047 : "=r" (iBit)
5048 : "r" (u32));
5049# else
5050 __asm
5051 {
5052 mov edx, [u32]
5053 bsf eax, edx
5054 jnz done
5055 mov eax, 0ffffffffh
5056 done:
5057 mov [iBit], eax
5058 }
5059# endif
5060 if (iBit >= 0)
5061 return iBit + iBitPrev;
5062# endif
5063
5064 /*
5065 * Skip ahead and see if there is anything left to search.
5066 */
5067 iBitPrev |= 31;
5068 iBitPrev++;
5069 if (cBits <= (uint32_t)iBitPrev)
5070 return -1;
5071 }
5072
5073 /*
5074 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5075 */
5076 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5077 if (iBit >= 0)
5078 iBit += iBitPrev;
5079 return iBit;
5080}
5081#endif
5082
5083
5084/**
5085 * Finds the first bit which is set in the given 32-bit integer.
5086 * Bits are numbered from 1 (least significant) to 32.
5087 *
5088 * @returns index [1..32] of the first set bit.
5089 * @returns 0 if all bits are cleared.
5090 * @param u32 Integer to search for set bits.
5091 * @remarks Similar to ffs() in BSD.
5092 */
5093#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5094DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5095#else
5096DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5097{
5098# if RT_INLINE_ASM_USES_INTRIN
5099 unsigned long iBit;
5100 if (_BitScanForward(&iBit, u32))
5101 iBit++;
5102 else
5103 iBit = 0;
5104# elif RT_INLINE_ASM_GNU_STYLE
5105 uint32_t iBit;
5106 __asm__ __volatile__("bsf %1, %0\n\t"
5107 "jnz 1f\n\t"
5108 "xorl %0, %0\n\t"
5109 "jmp 2f\n"
5110 "1:\n\t"
5111 "incl %0\n"
5112 "2:\n\t"
5113 : "=r" (iBit)
5114 : "rm" (u32));
5115# else
5116 uint32_t iBit;
5117 _asm
5118 {
5119 bsf eax, [u32]
5120 jnz found
5121 xor eax, eax
5122 jmp done
5123 found:
5124 inc eax
5125 done:
5126 mov [iBit], eax
5127 }
5128# endif
5129 return iBit;
5130}
5131#endif
5132
5133
5134/**
5135 * Finds the first bit which is set in the given 32-bit integer.
5136 * Bits are numbered from 1 (least significant) to 32.
5137 *
5138 * @returns index [1..32] of the first set bit.
5139 * @returns 0 if all bits are cleared.
5140 * @param i32 Integer to search for set bits.
5141 * @remark Similar to ffs() in BSD.
5142 */
5143DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5144{
5145 return ASMBitFirstSetU32((uint32_t)i32);
5146}
5147
5148
5149/**
5150 * Finds the first bit which is set in the given 64-bit integer.
5151 *
5152 * Bits are numbered from 1 (least significant) to 64.
5153 *
5154 * @returns index [1..64] of the first set bit.
5155 * @returns 0 if all bits are cleared.
5156 * @param u64 Integer to search for set bits.
5157 * @remarks Similar to ffs() in BSD.
5158 */
5159#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5160DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5161#else
5162DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5163{
5164# if RT_INLINE_ASM_USES_INTRIN
5165 unsigned long iBit;
5166# if ARCH_BITS == 64
5167 if (_BitScanForward64(&iBit, u64))
5168 iBit++;
5169 else
5170 iBit = 0;
5171# else
5172 if (_BitScanForward(&iBit, (uint32_t)u64))
5173 iBit++;
5174 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5175 iBit += 33;
5176 else
5177 iBit = 0;
5178# endif
5179# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5180 uint64_t iBit;
5181 __asm__ __volatile__("bsfq %1, %0\n\t"
5182 "jnz 1f\n\t"
5183 "xorl %0, %0\n\t"
5184 "jmp 2f\n"
5185 "1:\n\t"
5186 "incl %0\n"
5187 "2:\n\t"
5188 : "=r" (iBit)
5189 : "rm" (u64));
5190# else
5191 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5192 if (!iBit)
5193 {
5194 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5195 if (iBit)
5196 iBit += 32;
5197 }
5198# endif
5199 return (unsigned)iBit;
5200}
5201#endif
5202
5203
5204/**
5205 * Finds the first bit which is set in the given 16-bit integer.
5206 *
5207 * Bits are numbered from 1 (least significant) to 16.
5208 *
5209 * @returns index [1..16] of the first set bit.
5210 * @returns 0 if all bits are cleared.
5211 * @param u16 Integer to search for set bits.
5212 * @remarks For 16-bit bs3kit code.
5213 */
5214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5215DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5216#else
5217DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5218{
5219 return ASMBitFirstSetU32((uint32_t)u16);
5220}
5221#endif
5222
5223
5224/**
5225 * Finds the last bit which is set in the given 32-bit integer.
5226 * Bits are numbered from 1 (least significant) to 32.
5227 *
5228 * @returns index [1..32] of the last set bit.
5229 * @returns 0 if all bits are cleared.
5230 * @param u32 Integer to search for set bits.
5231 * @remark Similar to fls() in BSD.
5232 */
5233#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5234DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5235#else
5236DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5237{
5238# if RT_INLINE_ASM_USES_INTRIN
5239 unsigned long iBit;
5240 if (_BitScanReverse(&iBit, u32))
5241 iBit++;
5242 else
5243 iBit = 0;
5244# elif RT_INLINE_ASM_GNU_STYLE
5245 uint32_t iBit;
5246 __asm__ __volatile__("bsrl %1, %0\n\t"
5247 "jnz 1f\n\t"
5248 "xorl %0, %0\n\t"
5249 "jmp 2f\n"
5250 "1:\n\t"
5251 "incl %0\n"
5252 "2:\n\t"
5253 : "=r" (iBit)
5254 : "rm" (u32));
5255# else
5256 uint32_t iBit;
5257 _asm
5258 {
5259 bsr eax, [u32]
5260 jnz found
5261 xor eax, eax
5262 jmp done
5263 found:
5264 inc eax
5265 done:
5266 mov [iBit], eax
5267 }
5268# endif
5269 return iBit;
5270}
5271#endif
5272
5273
5274/**
5275 * Finds the last bit which is set in the given 32-bit integer.
5276 * Bits are numbered from 1 (least significant) to 32.
5277 *
5278 * @returns index [1..32] of the last set bit.
5279 * @returns 0 if all bits are cleared.
5280 * @param i32 Integer to search for set bits.
5281 * @remark Similar to fls() in BSD.
5282 */
5283DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5284{
5285 return ASMBitLastSetU32((uint32_t)i32);
5286}
5287
5288
5289/**
5290 * Finds the last bit which is set in the given 64-bit integer.
5291 *
5292 * Bits are numbered from 1 (least significant) to 64.
5293 *
5294 * @returns index [1..64] of the last set bit.
5295 * @returns 0 if all bits are cleared.
5296 * @param u64 Integer to search for set bits.
5297 * @remark Similar to fls() in BSD.
5298 */
5299#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5300DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5301#else
5302DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5303{
5304# if RT_INLINE_ASM_USES_INTRIN
5305 unsigned long iBit;
5306# if ARCH_BITS == 64
5307 if (_BitScanReverse64(&iBit, u64))
5308 iBit++;
5309 else
5310 iBit = 0;
5311# else
5312 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5313 iBit += 33;
5314 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5315 iBit++;
5316 else
5317 iBit = 0;
5318# endif
5319# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5320 uint64_t iBit;
5321 __asm__ __volatile__("bsrq %1, %0\n\t"
5322 "jnz 1f\n\t"
5323 "xorl %0, %0\n\t"
5324 "jmp 2f\n"
5325 "1:\n\t"
5326 "incl %0\n"
5327 "2:\n\t"
5328 : "=r" (iBit)
5329 : "rm" (u64));
5330# else
5331 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5332 if (iBit)
5333 iBit += 32;
5334 else
5335 iBit = ASMBitLastSetU32((uint32_t)u64);
5336#endif
5337 return (unsigned)iBit;
5338}
5339#endif
5340
5341
5342/**
5343 * Finds the last bit which is set in the given 16-bit integer.
5344 *
5345 * Bits are numbered from 1 (least significant) to 16.
5346 *
5347 * @returns index [1..16] of the last set bit.
5348 * @returns 0 if all bits are cleared.
5349 * @param u16 Integer to search for set bits.
5350 * @remarks For 16-bit bs3kit code.
5351 */
5352#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5353DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5354#else
5355DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5356{
5357 return ASMBitLastSetU32((uint32_t)u16);
5358}
5359#endif
5360
5361
5362/**
5363 * Reverse the byte order of the given 16-bit integer.
5364 *
5365 * @returns Revert
5366 * @param u16 16-bit integer value.
5367 */
5368#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5369DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5370#else
5371DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5372{
5373# if RT_INLINE_ASM_USES_INTRIN
5374 u16 = _byteswap_ushort(u16);
5375# elif RT_INLINE_ASM_GNU_STYLE
5376 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5377# else
5378 _asm
5379 {
5380 mov ax, [u16]
5381 ror ax, 8
5382 mov [u16], ax
5383 }
5384# endif
5385 return u16;
5386}
5387#endif
5388
5389
5390/**
5391 * Reverse the byte order of the given 32-bit integer.
5392 *
5393 * @returns Revert
5394 * @param u32 32-bit integer value.
5395 */
5396#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5397DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5398#else
5399DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5400{
5401# if RT_INLINE_ASM_USES_INTRIN
5402 u32 = _byteswap_ulong(u32);
5403# elif RT_INLINE_ASM_GNU_STYLE
5404 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5405# else
5406 _asm
5407 {
5408 mov eax, [u32]
5409 bswap eax
5410 mov [u32], eax
5411 }
5412# endif
5413 return u32;
5414}
5415#endif
5416
5417
5418/**
5419 * Reverse the byte order of the given 64-bit integer.
5420 *
5421 * @returns Revert
5422 * @param u64 64-bit integer value.
5423 */
5424DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5425{
5426#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5427 u64 = _byteswap_uint64(u64);
5428#else
5429 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5430 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5431#endif
5432 return u64;
5433}
5434
5435
5436/**
5437 * Rotate 32-bit unsigned value to the left by @a cShift.
5438 *
5439 * @returns Rotated value.
5440 * @param u32 The value to rotate.
5441 * @param cShift How many bits to rotate by.
5442 */
5443#ifdef __WATCOMC__
5444DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5445#else
5446DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5447{
5448# if RT_INLINE_ASM_USES_INTRIN
5449 return _rotl(u32, cShift);
5450# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5451 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5452 return u32;
5453# else
5454 cShift &= 31;
5455 return (u32 << cShift) | (u32 >> (32 - cShift));
5456# endif
5457}
5458#endif
5459
5460
5461/**
5462 * Rotate 32-bit unsigned value to the right by @a cShift.
5463 *
5464 * @returns Rotated value.
5465 * @param u32 The value to rotate.
5466 * @param cShift How many bits to rotate by.
5467 */
5468#ifdef __WATCOMC__
5469DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5470#else
5471DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5472{
5473# if RT_INLINE_ASM_USES_INTRIN
5474 return _rotr(u32, cShift);
5475# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5476 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5477 return u32;
5478# else
5479 cShift &= 31;
5480 return (u32 >> cShift) | (u32 << (32 - cShift));
5481# endif
5482}
5483#endif
5484
5485
5486/**
5487 * Rotate 64-bit unsigned value to the left by @a cShift.
5488 *
5489 * @returns Rotated value.
5490 * @param u64 The value to rotate.
5491 * @param cShift How many bits to rotate by.
5492 */
5493DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5494{
5495#if RT_INLINE_ASM_USES_INTRIN
5496 return _rotl64(u64, cShift);
5497#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5498 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5499 return u64;
5500#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5501 uint32_t uSpill;
5502 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5503 "jz 1f\n\t"
5504 "xchgl %%eax, %%edx\n\t"
5505 "1:\n\t"
5506 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5507 "jz 2f\n\t"
5508 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5509 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5510 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5511 "2:\n\t" /* } */
5512 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5513 : "0" (u64),
5514 "1" (cShift));
5515 return u64;
5516#else
5517 cShift &= 63;
5518 return (u64 << cShift) | (u64 >> (64 - cShift));
5519#endif
5520}
5521
5522
5523/**
5524 * Rotate 64-bit unsigned value to the right by @a cShift.
5525 *
5526 * @returns Rotated value.
5527 * @param u64 The value to rotate.
5528 * @param cShift How many bits to rotate by.
5529 */
5530DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5531{
5532#if RT_INLINE_ASM_USES_INTRIN
5533 return _rotr64(u64, cShift);
5534#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5535 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5536 return u64;
5537#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5538 uint32_t uSpill;
5539 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5540 "jz 1f\n\t"
5541 "xchgl %%eax, %%edx\n\t"
5542 "1:\n\t"
5543 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5544 "jz 2f\n\t"
5545 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5546 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5547 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5548 "2:\n\t" /* } */
5549 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5550 : "0" (u64),
5551 "1" (cShift));
5552 return u64;
5553#else
5554 cShift &= 63;
5555 return (u64 >> cShift) | (u64 << (64 - cShift));
5556#endif
5557}
5558
5559/** @} */
5560
5561
5562/** @} */
5563
5564#endif
5565
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette