VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 59527

Last change on this file since 59527 was 59527, checked in by vboxsync, 9 years ago

iprt/asm.h: Experimented with alternative serializating instructions (ASMSerializeInstruction), cpuid(0) is very reasonable compared to iret (rdtscp would be preferable).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 155.9 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2015 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# pragma intrinsic(_rotl)
69# pragma intrinsic(_rotr)
70# pragma intrinsic(_rotl64)
71# pragma intrinsic(_rotr64)
72# ifdef RT_ARCH_AMD64
73# pragma intrinsic(__stosq)
74# pragma intrinsic(_byteswap_uint64)
75# pragma intrinsic(_InterlockedExchange64)
76# pragma intrinsic(_InterlockedExchangeAdd64)
77# pragma intrinsic(_InterlockedAnd64)
78# pragma intrinsic(_InterlockedOr64)
79# pragma intrinsic(_InterlockedIncrement64)
80# pragma intrinsic(_InterlockedDecrement64)
81# endif
82#endif
83
84/*
85 * Include #pragma aux definitions for Watcom C/C++.
86 */
87#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
88# include "asm-watcom-x86-16.h"
89#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
90# include "asm-watcom-x86-32.h"
91#endif
92
93
94
95/** @defgroup grp_rt_asm ASM - Assembly Routines
96 * @ingroup grp_rt
97 *
98 * @remarks The difference between ordered and unordered atomic operations are that
99 * the former will complete outstanding reads and writes before continuing
100 * while the latter doesn't make any promises about the order. Ordered
101 * operations doesn't, it seems, make any 100% promise wrt to whether
102 * the operation will complete before any subsequent memory access.
103 * (please, correct if wrong.)
104 *
105 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
106 * are unordered (note the Uo).
107 *
108 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
109 * or even optimize assembler instructions away. For instance, in the following code
110 * the second rdmsr instruction is optimized away because gcc treats that instruction
111 * as deterministic:
112 *
113 * @code
114 * static inline uint64_t rdmsr_low(int idx)
115 * {
116 * uint32_t low;
117 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
118 * }
119 * ...
120 * uint32_t msr1 = rdmsr_low(1);
121 * foo(msr1);
122 * msr1 = rdmsr_low(1);
123 * bar(msr1);
124 * @endcode
125 *
126 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
127 * use the result of the first call as input parameter for bar() as well. For rdmsr this
128 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
129 * machine status information in general.
130 *
131 * @{
132 */
133
134
135/** @def RT_INLINE_ASM_GCC_4_3_X_X86
136 * Used to work around some 4.3.x register allocation issues in this version of
137 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
138#ifdef __GNUC__
139# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
140#endif
141#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
142# define RT_INLINE_ASM_GCC_4_3_X_X86 0
143#endif
144
145/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
146 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
147 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
148 * mode, x86.
149 *
150 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
151 * when in PIC mode on x86.
152 */
153#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
154# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
155# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
156# else
157# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
158 ( (defined(PIC) || defined(__PIC__)) \
159 && defined(RT_ARCH_X86) \
160 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
161 || defined(RT_OS_DARWIN)) )
162# endif
163#endif
164
165
166/** @def ASMReturnAddress
167 * Gets the return address of the current (or calling if you like) function or method.
168 */
169#ifdef _MSC_VER
170# ifdef __cplusplus
171extern "C"
172# endif
173void * _ReturnAddress(void);
174# pragma intrinsic(_ReturnAddress)
175# define ASMReturnAddress() _ReturnAddress()
176#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
177# define ASMReturnAddress() __builtin_return_address(0)
178#elif defined(__WATCOMC__)
179# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
180#else
181# error "Unsupported compiler."
182#endif
183
184
185/**
186 * Compiler memory barrier.
187 *
188 * Ensure that the compiler does not use any cached (register/tmp stack) memory
189 * values or any outstanding writes when returning from this function.
190 *
191 * This function must be used if non-volatile data is modified by a
192 * device or the VMM. Typical cases are port access, MMIO access,
193 * trapping instruction, etc.
194 */
195#if RT_INLINE_ASM_GNU_STYLE
196# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
197#elif RT_INLINE_ASM_USES_INTRIN
198# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
199#elif defined(__WATCOMC__)
200void ASMCompilerBarrier(void);
201#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
202DECLINLINE(void) ASMCompilerBarrier(void)
203{
204 __asm
205 {
206 }
207}
208#endif
209
210
211/** @def ASMBreakpoint
212 * Debugger Breakpoint.
213 * @deprecated Use RT_BREAKPOINT instead.
214 * @internal
215 */
216#define ASMBreakpoint() RT_BREAKPOINT()
217
218
219/**
220 * Spinloop hint for platforms that have these, empty function on the other
221 * platforms.
222 *
223 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
224 * spin locks.
225 */
226#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
227DECLASM(void) ASMNopPause(void);
228#else
229DECLINLINE(void) ASMNopPause(void)
230{
231# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
232# if RT_INLINE_ASM_GNU_STYLE
233 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
234# else
235 __asm {
236 _emit 0f3h
237 _emit 090h
238 }
239# endif
240# else
241 /* dummy */
242# endif
243}
244#endif
245
246
247/**
248 * Atomically Exchange an unsigned 8-bit value, ordered.
249 *
250 * @returns Current *pu8 value
251 * @param pu8 Pointer to the 8-bit variable to update.
252 * @param u8 The 8-bit value to assign to *pu8.
253 */
254#if RT_INLINE_ASM_EXTERNAL
255DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
256#else
257DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
258{
259# if RT_INLINE_ASM_GNU_STYLE
260 __asm__ __volatile__("xchgb %0, %1\n\t"
261 : "=m" (*pu8),
262 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
263 : "1" (u8),
264 "m" (*pu8));
265# else
266 __asm
267 {
268# ifdef RT_ARCH_AMD64
269 mov rdx, [pu8]
270 mov al, [u8]
271 xchg [rdx], al
272 mov [u8], al
273# else
274 mov edx, [pu8]
275 mov al, [u8]
276 xchg [edx], al
277 mov [u8], al
278# endif
279 }
280# endif
281 return u8;
282}
283#endif
284
285
286/**
287 * Atomically Exchange a signed 8-bit value, ordered.
288 *
289 * @returns Current *pu8 value
290 * @param pi8 Pointer to the 8-bit variable to update.
291 * @param i8 The 8-bit value to assign to *pi8.
292 */
293DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
294{
295 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
296}
297
298
299/**
300 * Atomically Exchange a bool value, ordered.
301 *
302 * @returns Current *pf value
303 * @param pf Pointer to the 8-bit variable to update.
304 * @param f The 8-bit value to assign to *pi8.
305 */
306DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
307{
308#ifdef _MSC_VER
309 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
310#else
311 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
312#endif
313}
314
315
316/**
317 * Atomically Exchange an unsigned 16-bit value, ordered.
318 *
319 * @returns Current *pu16 value
320 * @param pu16 Pointer to the 16-bit variable to update.
321 * @param u16 The 16-bit value to assign to *pu16.
322 */
323#if RT_INLINE_ASM_EXTERNAL
324DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
325#else
326DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
327{
328# if RT_INLINE_ASM_GNU_STYLE
329 __asm__ __volatile__("xchgw %0, %1\n\t"
330 : "=m" (*pu16),
331 "=r" (u16)
332 : "1" (u16),
333 "m" (*pu16));
334# else
335 __asm
336 {
337# ifdef RT_ARCH_AMD64
338 mov rdx, [pu16]
339 mov ax, [u16]
340 xchg [rdx], ax
341 mov [u16], ax
342# else
343 mov edx, [pu16]
344 mov ax, [u16]
345 xchg [edx], ax
346 mov [u16], ax
347# endif
348 }
349# endif
350 return u16;
351}
352#endif
353
354
355/**
356 * Atomically Exchange a signed 16-bit value, ordered.
357 *
358 * @returns Current *pu16 value
359 * @param pi16 Pointer to the 16-bit variable to update.
360 * @param i16 The 16-bit value to assign to *pi16.
361 */
362DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
363{
364 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
365}
366
367
368/**
369 * Atomically Exchange an unsigned 32-bit value, ordered.
370 *
371 * @returns Current *pu32 value
372 * @param pu32 Pointer to the 32-bit variable to update.
373 * @param u32 The 32-bit value to assign to *pu32.
374 *
375 * @remarks Does not work on 286 and earlier.
376 */
377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
378DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
379#else
380DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
381{
382# if RT_INLINE_ASM_GNU_STYLE
383 __asm__ __volatile__("xchgl %0, %1\n\t"
384 : "=m" (*pu32),
385 "=r" (u32)
386 : "1" (u32),
387 "m" (*pu32));
388
389# elif RT_INLINE_ASM_USES_INTRIN
390 u32 = _InterlockedExchange((long *)pu32, u32);
391
392# else
393 __asm
394 {
395# ifdef RT_ARCH_AMD64
396 mov rdx, [pu32]
397 mov eax, u32
398 xchg [rdx], eax
399 mov [u32], eax
400# else
401 mov edx, [pu32]
402 mov eax, u32
403 xchg [edx], eax
404 mov [u32], eax
405# endif
406 }
407# endif
408 return u32;
409}
410#endif
411
412
413/**
414 * Atomically Exchange a signed 32-bit value, ordered.
415 *
416 * @returns Current *pu32 value
417 * @param pi32 Pointer to the 32-bit variable to update.
418 * @param i32 The 32-bit value to assign to *pi32.
419 */
420DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
421{
422 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
423}
424
425
426/**
427 * Atomically Exchange an unsigned 64-bit value, ordered.
428 *
429 * @returns Current *pu64 value
430 * @param pu64 Pointer to the 64-bit variable to update.
431 * @param u64 The 64-bit value to assign to *pu64.
432 *
433 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
434 */
435#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
436 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
437DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
438#else
439DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
440{
441# if defined(RT_ARCH_AMD64)
442# if RT_INLINE_ASM_USES_INTRIN
443 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
444
445# elif RT_INLINE_ASM_GNU_STYLE
446 __asm__ __volatile__("xchgq %0, %1\n\t"
447 : "=m" (*pu64),
448 "=r" (u64)
449 : "1" (u64),
450 "m" (*pu64));
451# else
452 __asm
453 {
454 mov rdx, [pu64]
455 mov rax, [u64]
456 xchg [rdx], rax
457 mov [u64], rax
458 }
459# endif
460# else /* !RT_ARCH_AMD64 */
461# if RT_INLINE_ASM_GNU_STYLE
462# if defined(PIC) || defined(__PIC__)
463 uint32_t u32EBX = (uint32_t)u64;
464 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
465 "xchgl %%ebx, %3\n\t"
466 "1:\n\t"
467 "lock; cmpxchg8b (%5)\n\t"
468 "jnz 1b\n\t"
469 "movl %3, %%ebx\n\t"
470 /*"xchgl %%esi, %5\n\t"*/
471 : "=A" (u64),
472 "=m" (*pu64)
473 : "0" (*pu64),
474 "m" ( u32EBX ),
475 "c" ( (uint32_t)(u64 >> 32) ),
476 "S" (pu64));
477# else /* !PIC */
478 __asm__ __volatile__("1:\n\t"
479 "lock; cmpxchg8b %1\n\t"
480 "jnz 1b\n\t"
481 : "=A" (u64),
482 "=m" (*pu64)
483 : "0" (*pu64),
484 "b" ( (uint32_t)u64 ),
485 "c" ( (uint32_t)(u64 >> 32) ));
486# endif
487# else
488 __asm
489 {
490 mov ebx, dword ptr [u64]
491 mov ecx, dword ptr [u64 + 4]
492 mov edi, pu64
493 mov eax, dword ptr [edi]
494 mov edx, dword ptr [edi + 4]
495 retry:
496 lock cmpxchg8b [edi]
497 jnz retry
498 mov dword ptr [u64], eax
499 mov dword ptr [u64 + 4], edx
500 }
501# endif
502# endif /* !RT_ARCH_AMD64 */
503 return u64;
504}
505#endif
506
507
508/**
509 * Atomically Exchange an signed 64-bit value, ordered.
510 *
511 * @returns Current *pi64 value
512 * @param pi64 Pointer to the 64-bit variable to update.
513 * @param i64 The 64-bit value to assign to *pi64.
514 */
515DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
516{
517 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
518}
519
520
521/**
522 * Atomically Exchange a pointer value, ordered.
523 *
524 * @returns Current *ppv value
525 * @param ppv Pointer to the pointer variable to update.
526 * @param pv The pointer value to assign to *ppv.
527 */
528DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
529{
530#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
531 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
532#elif ARCH_BITS == 64
533 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
534#else
535# error "ARCH_BITS is bogus"
536#endif
537}
538
539
540/**
541 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
542 *
543 * @returns Current *pv value
544 * @param ppv Pointer to the pointer variable to update.
545 * @param pv The pointer value to assign to *ppv.
546 * @param Type The type of *ppv, sans volatile.
547 */
548#ifdef __GNUC__
549# define ASMAtomicXchgPtrT(ppv, pv, Type) \
550 __extension__ \
551 ({\
552 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
553 Type const pvTypeChecked = (pv); \
554 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
555 pvTypeCheckedRet; \
556 })
557#else
558# define ASMAtomicXchgPtrT(ppv, pv, Type) \
559 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
560#endif
561
562
563/**
564 * Atomically Exchange a raw-mode context pointer value, ordered.
565 *
566 * @returns Current *ppv value
567 * @param ppvRC Pointer to the pointer variable to update.
568 * @param pvRC The pointer value to assign to *ppv.
569 */
570DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
571{
572 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
573}
574
575
576/**
577 * Atomically Exchange a ring-0 pointer value, ordered.
578 *
579 * @returns Current *ppv value
580 * @param ppvR0 Pointer to the pointer variable to update.
581 * @param pvR0 The pointer value to assign to *ppv.
582 */
583DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
584{
585#if R0_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
586 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
587#elif R0_ARCH_BITS == 64
588 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
589#else
590# error "R0_ARCH_BITS is bogus"
591#endif
592}
593
594
595/**
596 * Atomically Exchange a ring-3 pointer value, ordered.
597 *
598 * @returns Current *ppv value
599 * @param ppvR3 Pointer to the pointer variable to update.
600 * @param pvR3 The pointer value to assign to *ppv.
601 */
602DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
603{
604#if R3_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
605 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
606#elif R3_ARCH_BITS == 64
607 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
608#else
609# error "R3_ARCH_BITS is bogus"
610#endif
611}
612
613
614/** @def ASMAtomicXchgHandle
615 * Atomically Exchange a typical IPRT handle value, ordered.
616 *
617 * @param ph Pointer to the value to update.
618 * @param hNew The new value to assigned to *pu.
619 * @param phRes Where to store the current *ph value.
620 *
621 * @remarks This doesn't currently work for all handles (like RTFILE).
622 */
623#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
624# define ASMAtomicXchgHandle(ph, hNew, phRes) \
625 do { \
626 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
627 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
628 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
629 } while (0)
630#elif HC_ARCH_BITS == 64
631# define ASMAtomicXchgHandle(ph, hNew, phRes) \
632 do { \
633 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
634 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
635 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
636 } while (0)
637#else
638# error HC_ARCH_BITS
639#endif
640
641
642/**
643 * Atomically Exchange a value which size might differ
644 * between platforms or compilers, ordered.
645 *
646 * @param pu Pointer to the variable to update.
647 * @param uNew The value to assign to *pu.
648 * @todo This is busted as its missing the result argument.
649 */
650#define ASMAtomicXchgSize(pu, uNew) \
651 do { \
652 switch (sizeof(*(pu))) { \
653 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
654 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
655 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
656 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
657 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
658 } \
659 } while (0)
660
661/**
662 * Atomically Exchange a value which size might differ
663 * between platforms or compilers, ordered.
664 *
665 * @param pu Pointer to the variable to update.
666 * @param uNew The value to assign to *pu.
667 * @param puRes Where to store the current *pu value.
668 */
669#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
670 do { \
671 switch (sizeof(*(pu))) { \
672 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
673 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
674 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
675 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
676 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
677 } \
678 } while (0)
679
680
681
682/**
683 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
684 *
685 * @returns true if xchg was done.
686 * @returns false if xchg wasn't done.
687 *
688 * @param pu8 Pointer to the value to update.
689 * @param u8New The new value to assigned to *pu8.
690 * @param u8Old The old value to *pu8 compare with.
691 *
692 * @remarks x86: Requires a 486 or later.
693 */
694#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
695DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
696#else
697DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
698{
699 uint8_t u8Ret;
700 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
701 "setz %1\n\t"
702 : "=m" (*pu8),
703 "=qm" (u8Ret),
704 "=a" (u8Old)
705 : "q" (u8New),
706 "2" (u8Old),
707 "m" (*pu8));
708 return (bool)u8Ret;
709}
710#endif
711
712
713/**
714 * Atomically Compare and Exchange a signed 8-bit value, ordered.
715 *
716 * @returns true if xchg was done.
717 * @returns false if xchg wasn't done.
718 *
719 * @param pi8 Pointer to the value to update.
720 * @param i8New The new value to assigned to *pi8.
721 * @param i8Old The old value to *pi8 compare with.
722 *
723 * @remarks x86: Requires a 486 or later.
724 */
725DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
726{
727 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
728}
729
730
731/**
732 * Atomically Compare and Exchange a bool value, ordered.
733 *
734 * @returns true if xchg was done.
735 * @returns false if xchg wasn't done.
736 *
737 * @param pf Pointer to the value to update.
738 * @param fNew The new value to assigned to *pf.
739 * @param fOld The old value to *pf compare with.
740 *
741 * @remarks x86: Requires a 486 or later.
742 */
743DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
744{
745 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
746}
747
748
749/**
750 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
751 *
752 * @returns true if xchg was done.
753 * @returns false if xchg wasn't done.
754 *
755 * @param pu32 Pointer to the value to update.
756 * @param u32New The new value to assigned to *pu32.
757 * @param u32Old The old value to *pu32 compare with.
758 *
759 * @remarks x86: Requires a 486 or later.
760 */
761#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
762DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
763#else
764DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
765{
766# if RT_INLINE_ASM_GNU_STYLE
767 uint8_t u8Ret;
768 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
769 "setz %1\n\t"
770 : "=m" (*pu32),
771 "=qm" (u8Ret),
772 "=a" (u32Old)
773 : "r" (u32New),
774 "2" (u32Old),
775 "m" (*pu32));
776 return (bool)u8Ret;
777
778# elif RT_INLINE_ASM_USES_INTRIN
779 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
780
781# else
782 uint32_t u32Ret;
783 __asm
784 {
785# ifdef RT_ARCH_AMD64
786 mov rdx, [pu32]
787# else
788 mov edx, [pu32]
789# endif
790 mov eax, [u32Old]
791 mov ecx, [u32New]
792# ifdef RT_ARCH_AMD64
793 lock cmpxchg [rdx], ecx
794# else
795 lock cmpxchg [edx], ecx
796# endif
797 setz al
798 movzx eax, al
799 mov [u32Ret], eax
800 }
801 return !!u32Ret;
802# endif
803}
804#endif
805
806
807/**
808 * Atomically Compare and Exchange a signed 32-bit value, ordered.
809 *
810 * @returns true if xchg was done.
811 * @returns false if xchg wasn't done.
812 *
813 * @param pi32 Pointer to the value to update.
814 * @param i32New The new value to assigned to *pi32.
815 * @param i32Old The old value to *pi32 compare with.
816 *
817 * @remarks x86: Requires a 486 or later.
818 */
819DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
820{
821 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
822}
823
824
825/**
826 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
827 *
828 * @returns true if xchg was done.
829 * @returns false if xchg wasn't done.
830 *
831 * @param pu64 Pointer to the 64-bit variable to update.
832 * @param u64New The 64-bit value to assign to *pu64.
833 * @param u64Old The value to compare with.
834 *
835 * @remarks x86: Requires a Pentium or later.
836 */
837#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
838 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
839DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
840#else
841DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
842{
843# if RT_INLINE_ASM_USES_INTRIN
844 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
845
846# elif defined(RT_ARCH_AMD64)
847# if RT_INLINE_ASM_GNU_STYLE
848 uint8_t u8Ret;
849 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
850 "setz %1\n\t"
851 : "=m" (*pu64),
852 "=qm" (u8Ret),
853 "=a" (u64Old)
854 : "r" (u64New),
855 "2" (u64Old),
856 "m" (*pu64));
857 return (bool)u8Ret;
858# else
859 bool fRet;
860 __asm
861 {
862 mov rdx, [pu32]
863 mov rax, [u64Old]
864 mov rcx, [u64New]
865 lock cmpxchg [rdx], rcx
866 setz al
867 mov [fRet], al
868 }
869 return fRet;
870# endif
871# else /* !RT_ARCH_AMD64 */
872 uint32_t u32Ret;
873# if RT_INLINE_ASM_GNU_STYLE
874# if defined(PIC) || defined(__PIC__)
875 uint32_t u32EBX = (uint32_t)u64New;
876 uint32_t u32Spill;
877 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
878 "lock; cmpxchg8b (%6)\n\t"
879 "setz %%al\n\t"
880 "movl %4, %%ebx\n\t"
881 "movzbl %%al, %%eax\n\t"
882 : "=a" (u32Ret),
883 "=d" (u32Spill),
884# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
885 "+m" (*pu64)
886# else
887 "=m" (*pu64)
888# endif
889 : "A" (u64Old),
890 "m" ( u32EBX ),
891 "c" ( (uint32_t)(u64New >> 32) ),
892 "S" (pu64));
893# else /* !PIC */
894 uint32_t u32Spill;
895 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
896 "setz %%al\n\t"
897 "movzbl %%al, %%eax\n\t"
898 : "=a" (u32Ret),
899 "=d" (u32Spill),
900 "+m" (*pu64)
901 : "A" (u64Old),
902 "b" ( (uint32_t)u64New ),
903 "c" ( (uint32_t)(u64New >> 32) ));
904# endif
905 return (bool)u32Ret;
906# else
907 __asm
908 {
909 mov ebx, dword ptr [u64New]
910 mov ecx, dword ptr [u64New + 4]
911 mov edi, [pu64]
912 mov eax, dword ptr [u64Old]
913 mov edx, dword ptr [u64Old + 4]
914 lock cmpxchg8b [edi]
915 setz al
916 movzx eax, al
917 mov dword ptr [u32Ret], eax
918 }
919 return !!u32Ret;
920# endif
921# endif /* !RT_ARCH_AMD64 */
922}
923#endif
924
925
926/**
927 * Atomically Compare and exchange a signed 64-bit value, ordered.
928 *
929 * @returns true if xchg was done.
930 * @returns false if xchg wasn't done.
931 *
932 * @param pi64 Pointer to the 64-bit variable to update.
933 * @param i64 The 64-bit value to assign to *pu64.
934 * @param i64Old The value to compare with.
935 *
936 * @remarks x86: Requires a Pentium or later.
937 */
938DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
939{
940 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
941}
942
943
944/**
945 * Atomically Compare and Exchange a pointer value, ordered.
946 *
947 * @returns true if xchg was done.
948 * @returns false if xchg wasn't done.
949 *
950 * @param ppv Pointer to the value to update.
951 * @param pvNew The new value to assigned to *ppv.
952 * @param pvOld The old value to *ppv compare with.
953 *
954 * @remarks x86: Requires a 486 or later.
955 */
956DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
957{
958#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
959 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
960#elif ARCH_BITS == 64
961 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
962#else
963# error "ARCH_BITS is bogus"
964#endif
965}
966
967
968/**
969 * Atomically Compare and Exchange a pointer value, ordered.
970 *
971 * @returns true if xchg was done.
972 * @returns false if xchg wasn't done.
973 *
974 * @param ppv Pointer to the value to update.
975 * @param pvNew The new value to assigned to *ppv.
976 * @param pvOld The old value to *ppv compare with.
977 *
978 * @remarks This is relatively type safe on GCC platforms.
979 * @remarks x86: Requires a 486 or later.
980 */
981#ifdef __GNUC__
982# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
983 __extension__ \
984 ({\
985 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
986 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
987 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
988 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
989 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
990 fMacroRet; \
991 })
992#else
993# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
994 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
995#endif
996
997
998/** @def ASMAtomicCmpXchgHandle
999 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1000 *
1001 * @param ph Pointer to the value to update.
1002 * @param hNew The new value to assigned to *pu.
1003 * @param hOld The old value to *pu compare with.
1004 * @param fRc Where to store the result.
1005 *
1006 * @remarks This doesn't currently work for all handles (like RTFILE).
1007 * @remarks x86: Requires a 486 or later.
1008 */
1009#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1010# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1011 do { \
1012 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1013 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1014 } while (0)
1015#elif HC_ARCH_BITS == 64
1016# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1017 do { \
1018 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1019 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1020 } while (0)
1021#else
1022# error HC_ARCH_BITS
1023#endif
1024
1025
1026/** @def ASMAtomicCmpXchgSize
1027 * Atomically Compare and Exchange a value which size might differ
1028 * between platforms or compilers, ordered.
1029 *
1030 * @param pu Pointer to the value to update.
1031 * @param uNew The new value to assigned to *pu.
1032 * @param uOld The old value to *pu compare with.
1033 * @param fRc Where to store the result.
1034 *
1035 * @remarks x86: Requires a 486 or later.
1036 */
1037#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1038 do { \
1039 switch (sizeof(*(pu))) { \
1040 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1041 break; \
1042 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1043 break; \
1044 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1045 (fRc) = false; \
1046 break; \
1047 } \
1048 } while (0)
1049
1050
1051/**
1052 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1053 * passes back old value, ordered.
1054 *
1055 * @returns true if xchg was done.
1056 * @returns false if xchg wasn't done.
1057 *
1058 * @param pu32 Pointer to the value to update.
1059 * @param u32New The new value to assigned to *pu32.
1060 * @param u32Old The old value to *pu32 compare with.
1061 * @param pu32Old Pointer store the old value at.
1062 *
1063 * @remarks x86: Requires a 486 or later.
1064 */
1065#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1066DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1067#else
1068DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1069{
1070# if RT_INLINE_ASM_GNU_STYLE
1071 uint8_t u8Ret;
1072 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1073 "setz %1\n\t"
1074 : "=m" (*pu32),
1075 "=qm" (u8Ret),
1076 "=a" (*pu32Old)
1077 : "r" (u32New),
1078 "a" (u32Old),
1079 "m" (*pu32));
1080 return (bool)u8Ret;
1081
1082# elif RT_INLINE_ASM_USES_INTRIN
1083 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1084
1085# else
1086 uint32_t u32Ret;
1087 __asm
1088 {
1089# ifdef RT_ARCH_AMD64
1090 mov rdx, [pu32]
1091# else
1092 mov edx, [pu32]
1093# endif
1094 mov eax, [u32Old]
1095 mov ecx, [u32New]
1096# ifdef RT_ARCH_AMD64
1097 lock cmpxchg [rdx], ecx
1098 mov rdx, [pu32Old]
1099 mov [rdx], eax
1100# else
1101 lock cmpxchg [edx], ecx
1102 mov edx, [pu32Old]
1103 mov [edx], eax
1104# endif
1105 setz al
1106 movzx eax, al
1107 mov [u32Ret], eax
1108 }
1109 return !!u32Ret;
1110# endif
1111}
1112#endif
1113
1114
1115/**
1116 * Atomically Compare and Exchange a signed 32-bit value, additionally
1117 * passes back old value, ordered.
1118 *
1119 * @returns true if xchg was done.
1120 * @returns false if xchg wasn't done.
1121 *
1122 * @param pi32 Pointer to the value to update.
1123 * @param i32New The new value to assigned to *pi32.
1124 * @param i32Old The old value to *pi32 compare with.
1125 * @param pi32Old Pointer store the old value at.
1126 *
1127 * @remarks x86: Requires a 486 or later.
1128 */
1129DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1130{
1131 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1132}
1133
1134
1135/**
1136 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1137 * passing back old value, ordered.
1138 *
1139 * @returns true if xchg was done.
1140 * @returns false if xchg wasn't done.
1141 *
1142 * @param pu64 Pointer to the 64-bit variable to update.
1143 * @param u64New The 64-bit value to assign to *pu64.
1144 * @param u64Old The value to compare with.
1145 * @param pu64Old Pointer store the old value at.
1146 *
1147 * @remarks x86: Requires a Pentium or later.
1148 */
1149#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1150 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1151DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1152#else
1153DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1154{
1155# if RT_INLINE_ASM_USES_INTRIN
1156 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1157
1158# elif defined(RT_ARCH_AMD64)
1159# if RT_INLINE_ASM_GNU_STYLE
1160 uint8_t u8Ret;
1161 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1162 "setz %1\n\t"
1163 : "=m" (*pu64),
1164 "=qm" (u8Ret),
1165 "=a" (*pu64Old)
1166 : "r" (u64New),
1167 "a" (u64Old),
1168 "m" (*pu64));
1169 return (bool)u8Ret;
1170# else
1171 bool fRet;
1172 __asm
1173 {
1174 mov rdx, [pu32]
1175 mov rax, [u64Old]
1176 mov rcx, [u64New]
1177 lock cmpxchg [rdx], rcx
1178 mov rdx, [pu64Old]
1179 mov [rdx], rax
1180 setz al
1181 mov [fRet], al
1182 }
1183 return fRet;
1184# endif
1185# else /* !RT_ARCH_AMD64 */
1186# if RT_INLINE_ASM_GNU_STYLE
1187 uint64_t u64Ret;
1188# if defined(PIC) || defined(__PIC__)
1189 /* NB: this code uses a memory clobber description, because the clean
1190 * solution with an output value for *pu64 makes gcc run out of registers.
1191 * This will cause suboptimal code, and anyone with a better solution is
1192 * welcome to improve this. */
1193 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1194 "lock; cmpxchg8b %3\n\t"
1195 "xchgl %%ebx, %1\n\t"
1196 : "=A" (u64Ret)
1197 : "DS" ((uint32_t)u64New),
1198 "c" ((uint32_t)(u64New >> 32)),
1199 "m" (*pu64),
1200 "0" (u64Old)
1201 : "memory" );
1202# else /* !PIC */
1203 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1204 : "=A" (u64Ret),
1205 "=m" (*pu64)
1206 : "b" ((uint32_t)u64New),
1207 "c" ((uint32_t)(u64New >> 32)),
1208 "m" (*pu64),
1209 "0" (u64Old));
1210# endif
1211 *pu64Old = u64Ret;
1212 return u64Ret == u64Old;
1213# else
1214 uint32_t u32Ret;
1215 __asm
1216 {
1217 mov ebx, dword ptr [u64New]
1218 mov ecx, dword ptr [u64New + 4]
1219 mov edi, [pu64]
1220 mov eax, dword ptr [u64Old]
1221 mov edx, dword ptr [u64Old + 4]
1222 lock cmpxchg8b [edi]
1223 mov ebx, [pu64Old]
1224 mov [ebx], eax
1225 setz al
1226 movzx eax, al
1227 add ebx, 4
1228 mov [ebx], edx
1229 mov dword ptr [u32Ret], eax
1230 }
1231 return !!u32Ret;
1232# endif
1233# endif /* !RT_ARCH_AMD64 */
1234}
1235#endif
1236
1237
1238/**
1239 * Atomically Compare and exchange a signed 64-bit value, additionally
1240 * passing back old value, ordered.
1241 *
1242 * @returns true if xchg was done.
1243 * @returns false if xchg wasn't done.
1244 *
1245 * @param pi64 Pointer to the 64-bit variable to update.
1246 * @param i64 The 64-bit value to assign to *pu64.
1247 * @param i64Old The value to compare with.
1248 * @param pi64Old Pointer store the old value at.
1249 *
1250 * @remarks x86: Requires a Pentium or later.
1251 */
1252DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1253{
1254 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1255}
1256
1257/** @def ASMAtomicCmpXchgExHandle
1258 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1259 *
1260 * @param ph Pointer to the value to update.
1261 * @param hNew The new value to assigned to *pu.
1262 * @param hOld The old value to *pu compare with.
1263 * @param fRc Where to store the result.
1264 * @param phOldVal Pointer to where to store the old value.
1265 *
1266 * @remarks This doesn't currently work for all handles (like RTFILE).
1267 */
1268#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1269# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1270 do { \
1271 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1272 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1273 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1274 } while (0)
1275#elif HC_ARCH_BITS == 64
1276# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1277 do { \
1278 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1279 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1280 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1281 } while (0)
1282#else
1283# error HC_ARCH_BITS
1284#endif
1285
1286
1287/** @def ASMAtomicCmpXchgExSize
1288 * Atomically Compare and Exchange a value which size might differ
1289 * between platforms or compilers. Additionally passes back old value.
1290 *
1291 * @param pu Pointer to the value to update.
1292 * @param uNew The new value to assigned to *pu.
1293 * @param uOld The old value to *pu compare with.
1294 * @param fRc Where to store the result.
1295 * @param puOldVal Pointer to where to store the old value.
1296 *
1297 * @remarks x86: Requires a 486 or later.
1298 */
1299#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1300 do { \
1301 switch (sizeof(*(pu))) { \
1302 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1303 break; \
1304 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1305 break; \
1306 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1307 (fRc) = false; \
1308 (uOldVal) = 0; \
1309 break; \
1310 } \
1311 } while (0)
1312
1313
1314/**
1315 * Atomically Compare and Exchange a pointer value, additionally
1316 * passing back old value, ordered.
1317 *
1318 * @returns true if xchg was done.
1319 * @returns false if xchg wasn't done.
1320 *
1321 * @param ppv Pointer to the value to update.
1322 * @param pvNew The new value to assigned to *ppv.
1323 * @param pvOld The old value to *ppv compare with.
1324 * @param ppvOld Pointer store the old value at.
1325 *
1326 * @remarks x86: Requires a 486 or later.
1327 */
1328DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1329{
1330#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1331 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1332#elif ARCH_BITS == 64
1333 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1334#else
1335# error "ARCH_BITS is bogus"
1336#endif
1337}
1338
1339
1340/**
1341 * Atomically Compare and Exchange a pointer value, additionally
1342 * passing back old value, ordered.
1343 *
1344 * @returns true if xchg was done.
1345 * @returns false if xchg wasn't done.
1346 *
1347 * @param ppv Pointer to the value to update.
1348 * @param pvNew The new value to assigned to *ppv.
1349 * @param pvOld The old value to *ppv compare with.
1350 * @param ppvOld Pointer store the old value at.
1351 *
1352 * @remarks This is relatively type safe on GCC platforms.
1353 * @remarks x86: Requires a 486 or later.
1354 */
1355#ifdef __GNUC__
1356# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1357 __extension__ \
1358 ({\
1359 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1360 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1361 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1362 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1363 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1364 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1365 (void **)ppvOldTypeChecked); \
1366 fMacroRet; \
1367 })
1368#else
1369# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1370 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1371#endif
1372
1373
1374/**
1375 * Virtualization unfriendly serializing instruction, always exits.
1376 */
1377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1378DECLASM(void) ASMSerializeInstructionCpuId(void);
1379#else
1380DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1381{
1382# if RT_INLINE_ASM_GNU_STYLE
1383 RTCCUINTREG xAX = 0;
1384# ifdef RT_ARCH_AMD64
1385 __asm__ __volatile__ ("cpuid"
1386 : "=a" (xAX)
1387 : "0" (xAX)
1388 : "rbx", "rcx", "rdx");
1389# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1390 __asm__ __volatile__ ("push %%ebx\n\t"
1391 "cpuid\n\t"
1392 "pop %%ebx\n\t"
1393 : "=a" (xAX)
1394 : "0" (xAX)
1395 : "ecx", "edx");
1396# else
1397 __asm__ __volatile__ ("cpuid"
1398 : "=a" (xAX)
1399 : "0" (xAX)
1400 : "ebx", "ecx", "edx");
1401# endif
1402
1403# elif RT_INLINE_ASM_USES_INTRIN
1404 int aInfo[4];
1405 __cpuid(aInfo, 0);
1406
1407# else
1408 __asm
1409 {
1410 push ebx
1411 xor eax, eax
1412 cpuid
1413 pop ebx
1414 }
1415# endif
1416}
1417#endif
1418
1419/**
1420 * Virtualization friendly serializing instruction, though more expensive.
1421 */
1422#if RT_INLINE_ASM_EXTERNAL
1423DECLASM(void) ASMSerializeInstructionIRet(void);
1424#else
1425DECLINLINE(void) ASMSerializeInstructionIRet(void)
1426{
1427# if RT_INLINE_ASM_GNU_STYLE
1428# ifdef RT_ARCH_AMD64
1429 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1430 "subq $128, %%rsp\n\t" /*redzone*/
1431 "mov %%ss, %%eax\n\t"
1432 "pushq %%rax\n\t"
1433 "pushq %%r10\n\t"
1434 "pushfq\n\t"
1435 "movl %%cs, %%eax\n\t"
1436 "pushq %%rax\n\t"
1437 "leaq 1f(%%rip), %%rax\n\t"
1438 "pushq %%rax\n\t"
1439 "iretq\n\t"
1440 "1:\n\t"
1441 ::: "rax", "r10");
1442# else
1443 __asm__ __volatile__ ("pushfl\n\t"
1444 "pushl %%cs\n\t"
1445 "pushl $1f\n\t"
1446 "iretl\n\t"
1447 "1:\n\t"
1448 :::);
1449# endif
1450
1451# else
1452 __asm
1453 {
1454 pushfd
1455 push cs
1456 push la_ret
1457 retd
1458 la_ret:
1459 }
1460# endif
1461}
1462#endif
1463
1464/**
1465 * Virtualization friendlier serializing instruction, may still cause exits.
1466 */
1467#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1468DECLASM(void) ASMSerializeInstructionRdTscp(void);
1469#else
1470DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1471{
1472# if RT_INLINE_ASM_GNU_STYLE
1473 /* rdtscp is not supported by ancient linux build VM of course :-( */
1474# ifdef RT_ARCH_AMD64
1475 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1476 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx");
1477# else
1478 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1479 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx");
1480# endif
1481# else
1482# if RT_INLINE_ASM_USES_INTRIN >= 15
1483 uint32_t uIgnore;
1484 (void)__rdtscp(&uIgnore);
1485 (void)uIgnore;
1486# else
1487 __asm
1488 {
1489 rdtscp
1490 }
1491# endif
1492# endif
1493}
1494#endif
1495
1496
1497/**
1498 * Serialize Instruction.
1499 */
1500#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1501# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1502#else
1503# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1504#endif
1505
1506
1507/**
1508 * Memory fence, waits for any pending writes and reads to complete.
1509 */
1510DECLINLINE(void) ASMMemoryFence(void)
1511{
1512 /** @todo use mfence? check if all cpus we care for support it. */
1513 uint32_t volatile u32;
1514 ASMAtomicXchgU32(&u32, 0);
1515}
1516
1517
1518/**
1519 * Write fence, waits for any pending writes to complete.
1520 */
1521DECLINLINE(void) ASMWriteFence(void)
1522{
1523 /** @todo use sfence? check if all cpus we care for support it. */
1524 ASMMemoryFence();
1525}
1526
1527
1528/**
1529 * Read fence, waits for any pending reads to complete.
1530 */
1531DECLINLINE(void) ASMReadFence(void)
1532{
1533 /** @todo use lfence? check if all cpus we care for support it. */
1534 ASMMemoryFence();
1535}
1536
1537
1538/**
1539 * Atomically reads an unsigned 8-bit value, ordered.
1540 *
1541 * @returns Current *pu8 value
1542 * @param pu8 Pointer to the 8-bit variable to read.
1543 */
1544DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1545{
1546 ASMMemoryFence();
1547 return *pu8; /* byte reads are atomic on x86 */
1548}
1549
1550
1551/**
1552 * Atomically reads an unsigned 8-bit value, unordered.
1553 *
1554 * @returns Current *pu8 value
1555 * @param pu8 Pointer to the 8-bit variable to read.
1556 */
1557DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1558{
1559 return *pu8; /* byte reads are atomic on x86 */
1560}
1561
1562
1563/**
1564 * Atomically reads a signed 8-bit value, ordered.
1565 *
1566 * @returns Current *pi8 value
1567 * @param pi8 Pointer to the 8-bit variable to read.
1568 */
1569DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1570{
1571 ASMMemoryFence();
1572 return *pi8; /* byte reads are atomic on x86 */
1573}
1574
1575
1576/**
1577 * Atomically reads a signed 8-bit value, unordered.
1578 *
1579 * @returns Current *pi8 value
1580 * @param pi8 Pointer to the 8-bit variable to read.
1581 */
1582DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1583{
1584 return *pi8; /* byte reads are atomic on x86 */
1585}
1586
1587
1588/**
1589 * Atomically reads an unsigned 16-bit value, ordered.
1590 *
1591 * @returns Current *pu16 value
1592 * @param pu16 Pointer to the 16-bit variable to read.
1593 */
1594DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1595{
1596 ASMMemoryFence();
1597 Assert(!((uintptr_t)pu16 & 1));
1598 return *pu16;
1599}
1600
1601
1602/**
1603 * Atomically reads an unsigned 16-bit value, unordered.
1604 *
1605 * @returns Current *pu16 value
1606 * @param pu16 Pointer to the 16-bit variable to read.
1607 */
1608DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1609{
1610 Assert(!((uintptr_t)pu16 & 1));
1611 return *pu16;
1612}
1613
1614
1615/**
1616 * Atomically reads a signed 16-bit value, ordered.
1617 *
1618 * @returns Current *pi16 value
1619 * @param pi16 Pointer to the 16-bit variable to read.
1620 */
1621DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1622{
1623 ASMMemoryFence();
1624 Assert(!((uintptr_t)pi16 & 1));
1625 return *pi16;
1626}
1627
1628
1629/**
1630 * Atomically reads a signed 16-bit value, unordered.
1631 *
1632 * @returns Current *pi16 value
1633 * @param pi16 Pointer to the 16-bit variable to read.
1634 */
1635DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1636{
1637 Assert(!((uintptr_t)pi16 & 1));
1638 return *pi16;
1639}
1640
1641
1642/**
1643 * Atomically reads an unsigned 32-bit value, ordered.
1644 *
1645 * @returns Current *pu32 value
1646 * @param pu32 Pointer to the 32-bit variable to read.
1647 */
1648DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1649{
1650 ASMMemoryFence();
1651 Assert(!((uintptr_t)pu32 & 3));
1652 return *pu32;
1653}
1654
1655
1656/**
1657 * Atomically reads an unsigned 32-bit value, unordered.
1658 *
1659 * @returns Current *pu32 value
1660 * @param pu32 Pointer to the 32-bit variable to read.
1661 */
1662DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1663{
1664 Assert(!((uintptr_t)pu32 & 3));
1665 return *pu32;
1666}
1667
1668
1669/**
1670 * Atomically reads a signed 32-bit value, ordered.
1671 *
1672 * @returns Current *pi32 value
1673 * @param pi32 Pointer to the 32-bit variable to read.
1674 */
1675DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1676{
1677 ASMMemoryFence();
1678 Assert(!((uintptr_t)pi32 & 3));
1679 return *pi32;
1680}
1681
1682
1683/**
1684 * Atomically reads a signed 32-bit value, unordered.
1685 *
1686 * @returns Current *pi32 value
1687 * @param pi32 Pointer to the 32-bit variable to read.
1688 */
1689DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1690{
1691 Assert(!((uintptr_t)pi32 & 3));
1692 return *pi32;
1693}
1694
1695
1696/**
1697 * Atomically reads an unsigned 64-bit value, ordered.
1698 *
1699 * @returns Current *pu64 value
1700 * @param pu64 Pointer to the 64-bit variable to read.
1701 * The memory pointed to must be writable.
1702 *
1703 * @remarks This may fault if the memory is read-only!
1704 * @remarks x86: Requires a Pentium or later.
1705 */
1706#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1707 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1708DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1709#else
1710DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1711{
1712 uint64_t u64;
1713# ifdef RT_ARCH_AMD64
1714 Assert(!((uintptr_t)pu64 & 7));
1715/*# if RT_INLINE_ASM_GNU_STYLE
1716 __asm__ __volatile__( "mfence\n\t"
1717 "movq %1, %0\n\t"
1718 : "=r" (u64)
1719 : "m" (*pu64));
1720# else
1721 __asm
1722 {
1723 mfence
1724 mov rdx, [pu64]
1725 mov rax, [rdx]
1726 mov [u64], rax
1727 }
1728# endif*/
1729 ASMMemoryFence();
1730 u64 = *pu64;
1731# else /* !RT_ARCH_AMD64 */
1732# if RT_INLINE_ASM_GNU_STYLE
1733# if defined(PIC) || defined(__PIC__)
1734 uint32_t u32EBX = 0;
1735 Assert(!((uintptr_t)pu64 & 7));
1736 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1737 "lock; cmpxchg8b (%5)\n\t"
1738 "movl %3, %%ebx\n\t"
1739 : "=A" (u64),
1740# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1741 "+m" (*pu64)
1742# else
1743 "=m" (*pu64)
1744# endif
1745 : "0" (0ULL),
1746 "m" (u32EBX),
1747 "c" (0),
1748 "S" (pu64));
1749# else /* !PIC */
1750 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1751 : "=A" (u64),
1752 "+m" (*pu64)
1753 : "0" (0ULL),
1754 "b" (0),
1755 "c" (0));
1756# endif
1757# else
1758 Assert(!((uintptr_t)pu64 & 7));
1759 __asm
1760 {
1761 xor eax, eax
1762 xor edx, edx
1763 mov edi, pu64
1764 xor ecx, ecx
1765 xor ebx, ebx
1766 lock cmpxchg8b [edi]
1767 mov dword ptr [u64], eax
1768 mov dword ptr [u64 + 4], edx
1769 }
1770# endif
1771# endif /* !RT_ARCH_AMD64 */
1772 return u64;
1773}
1774#endif
1775
1776
1777/**
1778 * Atomically reads an unsigned 64-bit value, unordered.
1779 *
1780 * @returns Current *pu64 value
1781 * @param pu64 Pointer to the 64-bit variable to read.
1782 * The memory pointed to must be writable.
1783 *
1784 * @remarks This may fault if the memory is read-only!
1785 * @remarks x86: Requires a Pentium or later.
1786 */
1787#if !defined(RT_ARCH_AMD64) \
1788 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1789 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1790DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1791#else
1792DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1793{
1794 uint64_t u64;
1795# ifdef RT_ARCH_AMD64
1796 Assert(!((uintptr_t)pu64 & 7));
1797/*# if RT_INLINE_ASM_GNU_STYLE
1798 Assert(!((uintptr_t)pu64 & 7));
1799 __asm__ __volatile__("movq %1, %0\n\t"
1800 : "=r" (u64)
1801 : "m" (*pu64));
1802# else
1803 __asm
1804 {
1805 mov rdx, [pu64]
1806 mov rax, [rdx]
1807 mov [u64], rax
1808 }
1809# endif */
1810 u64 = *pu64;
1811# else /* !RT_ARCH_AMD64 */
1812# if RT_INLINE_ASM_GNU_STYLE
1813# if defined(PIC) || defined(__PIC__)
1814 uint32_t u32EBX = 0;
1815 uint32_t u32Spill;
1816 Assert(!((uintptr_t)pu64 & 7));
1817 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1818 "xor %%ecx,%%ecx\n\t"
1819 "xor %%edx,%%edx\n\t"
1820 "xchgl %%ebx, %3\n\t"
1821 "lock; cmpxchg8b (%4)\n\t"
1822 "movl %3, %%ebx\n\t"
1823 : "=A" (u64),
1824# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1825 "+m" (*pu64),
1826# else
1827 "=m" (*pu64),
1828# endif
1829 "=c" (u32Spill)
1830 : "m" (u32EBX),
1831 "S" (pu64));
1832# else /* !PIC */
1833 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1834 : "=A" (u64),
1835 "+m" (*pu64)
1836 : "0" (0ULL),
1837 "b" (0),
1838 "c" (0));
1839# endif
1840# else
1841 Assert(!((uintptr_t)pu64 & 7));
1842 __asm
1843 {
1844 xor eax, eax
1845 xor edx, edx
1846 mov edi, pu64
1847 xor ecx, ecx
1848 xor ebx, ebx
1849 lock cmpxchg8b [edi]
1850 mov dword ptr [u64], eax
1851 mov dword ptr [u64 + 4], edx
1852 }
1853# endif
1854# endif /* !RT_ARCH_AMD64 */
1855 return u64;
1856}
1857#endif
1858
1859
1860/**
1861 * Atomically reads a signed 64-bit value, ordered.
1862 *
1863 * @returns Current *pi64 value
1864 * @param pi64 Pointer to the 64-bit variable to read.
1865 * The memory pointed to must be writable.
1866 *
1867 * @remarks This may fault if the memory is read-only!
1868 * @remarks x86: Requires a Pentium or later.
1869 */
1870DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1871{
1872 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1873}
1874
1875
1876/**
1877 * Atomically reads a signed 64-bit value, unordered.
1878 *
1879 * @returns Current *pi64 value
1880 * @param pi64 Pointer to the 64-bit variable to read.
1881 * The memory pointed to must be writable.
1882 *
1883 * @remarks This will fault if the memory is read-only!
1884 * @remarks x86: Requires a Pentium or later.
1885 */
1886DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1887{
1888 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1889}
1890
1891
1892/**
1893 * Atomically reads a size_t value, ordered.
1894 *
1895 * @returns Current *pcb value
1896 * @param pcb Pointer to the size_t variable to read.
1897 */
1898DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1899{
1900#if ARCH_BITS == 64
1901 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1902#elif ARCH_BITS == 32
1903 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1904#elif ARCH_BITS == 16
1905 AssertCompileSize(size_t, 2);
1906 return ASMAtomicReadU16((uint16_t volatile *)pcb);
1907#else
1908# error "Unsupported ARCH_BITS value"
1909#endif
1910}
1911
1912
1913/**
1914 * Atomically reads a size_t value, unordered.
1915 *
1916 * @returns Current *pcb value
1917 * @param pcb Pointer to the size_t variable to read.
1918 */
1919DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1920{
1921#if ARCH_BITS == 64 || (ARCH_BITS == 16 && RT_FAR_DATA)
1922 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1923#elif ARCH_BITS == 32
1924 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1925#elif ARCH_BITS == 16
1926 AssertCompileSize(size_t, 2);
1927 return ASMAtomicUoReadU16((uint16_t volatile *)pcb);
1928#else
1929# error "Unsupported ARCH_BITS value"
1930#endif
1931}
1932
1933
1934/**
1935 * Atomically reads a pointer value, ordered.
1936 *
1937 * @returns Current *pv value
1938 * @param ppv Pointer to the pointer variable to read.
1939 *
1940 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1941 * requires less typing (no casts).
1942 */
1943DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1944{
1945#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1946 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1947#elif ARCH_BITS == 64
1948 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1949#else
1950# error "ARCH_BITS is bogus"
1951#endif
1952}
1953
1954/**
1955 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1956 *
1957 * @returns Current *pv value
1958 * @param ppv Pointer to the pointer variable to read.
1959 * @param Type The type of *ppv, sans volatile.
1960 */
1961#ifdef __GNUC__
1962# define ASMAtomicReadPtrT(ppv, Type) \
1963 __extension__ \
1964 ({\
1965 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1966 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1967 pvTypeChecked; \
1968 })
1969#else
1970# define ASMAtomicReadPtrT(ppv, Type) \
1971 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1972#endif
1973
1974
1975/**
1976 * Atomically reads a pointer value, unordered.
1977 *
1978 * @returns Current *pv value
1979 * @param ppv Pointer to the pointer variable to read.
1980 *
1981 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1982 * requires less typing (no casts).
1983 */
1984DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1985{
1986#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1987 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1988#elif ARCH_BITS == 64
1989 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1990#else
1991# error "ARCH_BITS is bogus"
1992#endif
1993}
1994
1995
1996/**
1997 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
1998 *
1999 * @returns Current *pv value
2000 * @param ppv Pointer to the pointer variable to read.
2001 * @param Type The type of *ppv, sans volatile.
2002 */
2003#ifdef __GNUC__
2004# define ASMAtomicUoReadPtrT(ppv, Type) \
2005 __extension__ \
2006 ({\
2007 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2008 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2009 pvTypeChecked; \
2010 })
2011#else
2012# define ASMAtomicUoReadPtrT(ppv, Type) \
2013 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
2014#endif
2015
2016
2017/**
2018 * Atomically reads a boolean value, ordered.
2019 *
2020 * @returns Current *pf value
2021 * @param pf Pointer to the boolean variable to read.
2022 */
2023DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
2024{
2025 ASMMemoryFence();
2026 return *pf; /* byte reads are atomic on x86 */
2027}
2028
2029
2030/**
2031 * Atomically reads a boolean value, unordered.
2032 *
2033 * @returns Current *pf value
2034 * @param pf Pointer to the boolean variable to read.
2035 */
2036DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
2037{
2038 return *pf; /* byte reads are atomic on x86 */
2039}
2040
2041
2042/**
2043 * Atomically read a typical IPRT handle value, ordered.
2044 *
2045 * @param ph Pointer to the handle variable to read.
2046 * @param phRes Where to store the result.
2047 *
2048 * @remarks This doesn't currently work for all handles (like RTFILE).
2049 */
2050#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2051# define ASMAtomicReadHandle(ph, phRes) \
2052 do { \
2053 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2054 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2055 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
2056 } while (0)
2057#elif HC_ARCH_BITS == 64
2058# define ASMAtomicReadHandle(ph, phRes) \
2059 do { \
2060 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2061 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2062 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
2063 } while (0)
2064#else
2065# error HC_ARCH_BITS
2066#endif
2067
2068
2069/**
2070 * Atomically read a typical IPRT handle value, unordered.
2071 *
2072 * @param ph Pointer to the handle variable to read.
2073 * @param phRes Where to store the result.
2074 *
2075 * @remarks This doesn't currently work for all handles (like RTFILE).
2076 */
2077#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2078# define ASMAtomicUoReadHandle(ph, phRes) \
2079 do { \
2080 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2081 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2082 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
2083 } while (0)
2084#elif HC_ARCH_BITS == 64
2085# define ASMAtomicUoReadHandle(ph, phRes) \
2086 do { \
2087 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2088 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2089 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
2090 } while (0)
2091#else
2092# error HC_ARCH_BITS
2093#endif
2094
2095
2096/**
2097 * Atomically read a value which size might differ
2098 * between platforms or compilers, ordered.
2099 *
2100 * @param pu Pointer to the variable to read.
2101 * @param puRes Where to store the result.
2102 */
2103#define ASMAtomicReadSize(pu, puRes) \
2104 do { \
2105 switch (sizeof(*(pu))) { \
2106 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2107 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
2108 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
2109 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
2110 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2111 } \
2112 } while (0)
2113
2114
2115/**
2116 * Atomically read a value which size might differ
2117 * between platforms or compilers, unordered.
2118 *
2119 * @param pu Pointer to the variable to read.
2120 * @param puRes Where to store the result.
2121 */
2122#define ASMAtomicUoReadSize(pu, puRes) \
2123 do { \
2124 switch (sizeof(*(pu))) { \
2125 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2126 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
2127 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
2128 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
2129 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2130 } \
2131 } while (0)
2132
2133
2134/**
2135 * Atomically writes an unsigned 8-bit value, ordered.
2136 *
2137 * @param pu8 Pointer to the 8-bit variable.
2138 * @param u8 The 8-bit value to assign to *pu8.
2139 */
2140DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
2141{
2142 ASMAtomicXchgU8(pu8, u8);
2143}
2144
2145
2146/**
2147 * Atomically writes an unsigned 8-bit value, unordered.
2148 *
2149 * @param pu8 Pointer to the 8-bit variable.
2150 * @param u8 The 8-bit value to assign to *pu8.
2151 */
2152DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2153{
2154 *pu8 = u8; /* byte writes are atomic on x86 */
2155}
2156
2157
2158/**
2159 * Atomically writes a signed 8-bit value, ordered.
2160 *
2161 * @param pi8 Pointer to the 8-bit variable to read.
2162 * @param i8 The 8-bit value to assign to *pi8.
2163 */
2164DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2165{
2166 ASMAtomicXchgS8(pi8, i8);
2167}
2168
2169
2170/**
2171 * Atomically writes a signed 8-bit value, unordered.
2172 *
2173 * @param pi8 Pointer to the 8-bit variable to write.
2174 * @param i8 The 8-bit value to assign to *pi8.
2175 */
2176DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2177{
2178 *pi8 = i8; /* byte writes are atomic on x86 */
2179}
2180
2181
2182/**
2183 * Atomically writes an unsigned 16-bit value, ordered.
2184 *
2185 * @param pu16 Pointer to the 16-bit variable to write.
2186 * @param u16 The 16-bit value to assign to *pu16.
2187 */
2188DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2189{
2190 ASMAtomicXchgU16(pu16, u16);
2191}
2192
2193
2194/**
2195 * Atomically writes an unsigned 16-bit value, unordered.
2196 *
2197 * @param pu16 Pointer to the 16-bit variable to write.
2198 * @param u16 The 16-bit value to assign to *pu16.
2199 */
2200DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2201{
2202 Assert(!((uintptr_t)pu16 & 1));
2203 *pu16 = u16;
2204}
2205
2206
2207/**
2208 * Atomically writes a signed 16-bit value, ordered.
2209 *
2210 * @param pi16 Pointer to the 16-bit variable to write.
2211 * @param i16 The 16-bit value to assign to *pi16.
2212 */
2213DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2214{
2215 ASMAtomicXchgS16(pi16, i16);
2216}
2217
2218
2219/**
2220 * Atomically writes a signed 16-bit value, unordered.
2221 *
2222 * @param pi16 Pointer to the 16-bit variable to write.
2223 * @param i16 The 16-bit value to assign to *pi16.
2224 */
2225DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2226{
2227 Assert(!((uintptr_t)pi16 & 1));
2228 *pi16 = i16;
2229}
2230
2231
2232/**
2233 * Atomically writes an unsigned 32-bit value, ordered.
2234 *
2235 * @param pu32 Pointer to the 32-bit variable to write.
2236 * @param u32 The 32-bit value to assign to *pu32.
2237 */
2238DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2239{
2240 ASMAtomicXchgU32(pu32, u32);
2241}
2242
2243
2244/**
2245 * Atomically writes an unsigned 32-bit value, unordered.
2246 *
2247 * @param pu32 Pointer to the 32-bit variable to write.
2248 * @param u32 The 32-bit value to assign to *pu32.
2249 */
2250DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2251{
2252 Assert(!((uintptr_t)pu32 & 3));
2253 *pu32 = u32;
2254}
2255
2256
2257/**
2258 * Atomically writes a signed 32-bit value, ordered.
2259 *
2260 * @param pi32 Pointer to the 32-bit variable to write.
2261 * @param i32 The 32-bit value to assign to *pi32.
2262 */
2263DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2264{
2265 ASMAtomicXchgS32(pi32, i32);
2266}
2267
2268
2269/**
2270 * Atomically writes a signed 32-bit value, unordered.
2271 *
2272 * @param pi32 Pointer to the 32-bit variable to write.
2273 * @param i32 The 32-bit value to assign to *pi32.
2274 */
2275DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2276{
2277 Assert(!((uintptr_t)pi32 & 3));
2278 *pi32 = i32;
2279}
2280
2281
2282/**
2283 * Atomically writes an unsigned 64-bit value, ordered.
2284 *
2285 * @param pu64 Pointer to the 64-bit variable to write.
2286 * @param u64 The 64-bit value to assign to *pu64.
2287 */
2288DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2289{
2290 ASMAtomicXchgU64(pu64, u64);
2291}
2292
2293
2294/**
2295 * Atomically writes an unsigned 64-bit value, unordered.
2296 *
2297 * @param pu64 Pointer to the 64-bit variable to write.
2298 * @param u64 The 64-bit value to assign to *pu64.
2299 */
2300DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2301{
2302 Assert(!((uintptr_t)pu64 & 7));
2303#if ARCH_BITS == 64
2304 *pu64 = u64;
2305#else
2306 ASMAtomicXchgU64(pu64, u64);
2307#endif
2308}
2309
2310
2311/**
2312 * Atomically writes a signed 64-bit value, ordered.
2313 *
2314 * @param pi64 Pointer to the 64-bit variable to write.
2315 * @param i64 The 64-bit value to assign to *pi64.
2316 */
2317DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2318{
2319 ASMAtomicXchgS64(pi64, i64);
2320}
2321
2322
2323/**
2324 * Atomically writes a signed 64-bit value, unordered.
2325 *
2326 * @param pi64 Pointer to the 64-bit variable to write.
2327 * @param i64 The 64-bit value to assign to *pi64.
2328 */
2329DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2330{
2331 Assert(!((uintptr_t)pi64 & 7));
2332#if ARCH_BITS == 64
2333 *pi64 = i64;
2334#else
2335 ASMAtomicXchgS64(pi64, i64);
2336#endif
2337}
2338
2339
2340/**
2341 * Atomically writes a boolean value, unordered.
2342 *
2343 * @param pf Pointer to the boolean variable to write.
2344 * @param f The boolean value to assign to *pf.
2345 */
2346DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2347{
2348 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2349}
2350
2351
2352/**
2353 * Atomically writes a boolean value, unordered.
2354 *
2355 * @param pf Pointer to the boolean variable to write.
2356 * @param f The boolean value to assign to *pf.
2357 */
2358DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2359{
2360 *pf = f; /* byte writes are atomic on x86 */
2361}
2362
2363
2364/**
2365 * Atomically writes a pointer value, ordered.
2366 *
2367 * @param ppv Pointer to the pointer variable to write.
2368 * @param pv The pointer value to assign to *ppv.
2369 */
2370DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2371{
2372#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2373 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2374#elif ARCH_BITS == 64
2375 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2376#else
2377# error "ARCH_BITS is bogus"
2378#endif
2379}
2380
2381
2382/**
2383 * Atomically writes a pointer value, ordered.
2384 *
2385 * @param ppv Pointer to the pointer variable to write.
2386 * @param pv The pointer value to assign to *ppv. If NULL use
2387 * ASMAtomicWriteNullPtr or you'll land in trouble.
2388 *
2389 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2390 * NULL.
2391 */
2392#ifdef __GNUC__
2393# define ASMAtomicWritePtr(ppv, pv) \
2394 do \
2395 { \
2396 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2397 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2398 \
2399 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2400 AssertCompile(sizeof(pv) == sizeof(void *)); \
2401 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2402 \
2403 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2404 } while (0)
2405#else
2406# define ASMAtomicWritePtr(ppv, pv) \
2407 do \
2408 { \
2409 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2410 AssertCompile(sizeof(pv) == sizeof(void *)); \
2411 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2412 \
2413 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2414 } while (0)
2415#endif
2416
2417
2418/**
2419 * Atomically sets a pointer to NULL, ordered.
2420 *
2421 * @param ppv Pointer to the pointer variable that should be set to NULL.
2422 *
2423 * @remarks This is relatively type safe on GCC platforms.
2424 */
2425#ifdef __GNUC__
2426# define ASMAtomicWriteNullPtr(ppv) \
2427 do \
2428 { \
2429 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2430 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2431 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2432 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2433 } while (0)
2434#else
2435# define ASMAtomicWriteNullPtr(ppv) \
2436 do \
2437 { \
2438 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2439 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2440 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2441 } while (0)
2442#endif
2443
2444
2445/**
2446 * Atomically writes a pointer value, unordered.
2447 *
2448 * @returns Current *pv value
2449 * @param ppv Pointer to the pointer variable.
2450 * @param pv The pointer value to assign to *ppv. If NULL use
2451 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2452 *
2453 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2454 * NULL.
2455 */
2456#ifdef __GNUC__
2457# define ASMAtomicUoWritePtr(ppv, pv) \
2458 do \
2459 { \
2460 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2461 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2462 \
2463 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2464 AssertCompile(sizeof(pv) == sizeof(void *)); \
2465 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2466 \
2467 *(ppvTypeChecked) = pvTypeChecked; \
2468 } while (0)
2469#else
2470# define ASMAtomicUoWritePtr(ppv, pv) \
2471 do \
2472 { \
2473 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2474 AssertCompile(sizeof(pv) == sizeof(void *)); \
2475 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2476 *(ppv) = pv; \
2477 } while (0)
2478#endif
2479
2480
2481/**
2482 * Atomically sets a pointer to NULL, unordered.
2483 *
2484 * @param ppv Pointer to the pointer variable that should be set to NULL.
2485 *
2486 * @remarks This is relatively type safe on GCC platforms.
2487 */
2488#ifdef __GNUC__
2489# define ASMAtomicUoWriteNullPtr(ppv) \
2490 do \
2491 { \
2492 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2493 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2494 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2495 *(ppvTypeChecked) = NULL; \
2496 } while (0)
2497#else
2498# define ASMAtomicUoWriteNullPtr(ppv) \
2499 do \
2500 { \
2501 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2502 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2503 *(ppv) = NULL; \
2504 } while (0)
2505#endif
2506
2507
2508/**
2509 * Atomically write a typical IPRT handle value, ordered.
2510 *
2511 * @param ph Pointer to the variable to update.
2512 * @param hNew The value to assign to *ph.
2513 *
2514 * @remarks This doesn't currently work for all handles (like RTFILE).
2515 */
2516#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2517# define ASMAtomicWriteHandle(ph, hNew) \
2518 do { \
2519 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2520 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2521 } while (0)
2522#elif HC_ARCH_BITS == 64
2523# define ASMAtomicWriteHandle(ph, hNew) \
2524 do { \
2525 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2526 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2527 } while (0)
2528#else
2529# error HC_ARCH_BITS
2530#endif
2531
2532
2533/**
2534 * Atomically write a typical IPRT handle value, unordered.
2535 *
2536 * @param ph Pointer to the variable to update.
2537 * @param hNew The value to assign to *ph.
2538 *
2539 * @remarks This doesn't currently work for all handles (like RTFILE).
2540 */
2541#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2542# define ASMAtomicUoWriteHandle(ph, hNew) \
2543 do { \
2544 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2545 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2546 } while (0)
2547#elif HC_ARCH_BITS == 64
2548# define ASMAtomicUoWriteHandle(ph, hNew) \
2549 do { \
2550 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2551 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2552 } while (0)
2553#else
2554# error HC_ARCH_BITS
2555#endif
2556
2557
2558/**
2559 * Atomically write a value which size might differ
2560 * between platforms or compilers, ordered.
2561 *
2562 * @param pu Pointer to the variable to update.
2563 * @param uNew The value to assign to *pu.
2564 */
2565#define ASMAtomicWriteSize(pu, uNew) \
2566 do { \
2567 switch (sizeof(*(pu))) { \
2568 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2569 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2570 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2571 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2572 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2573 } \
2574 } while (0)
2575
2576/**
2577 * Atomically write a value which size might differ
2578 * between platforms or compilers, unordered.
2579 *
2580 * @param pu Pointer to the variable to update.
2581 * @param uNew The value to assign to *pu.
2582 */
2583#define ASMAtomicUoWriteSize(pu, uNew) \
2584 do { \
2585 switch (sizeof(*(pu))) { \
2586 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2587 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2588 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2589 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2590 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2591 } \
2592 } while (0)
2593
2594
2595
2596/**
2597 * Atomically exchanges and adds to a 16-bit value, ordered.
2598 *
2599 * @returns The old value.
2600 * @param pu16 Pointer to the value.
2601 * @param u16 Number to add.
2602 *
2603 * @remarks Currently not implemented, just to make 16-bit code happy.
2604 * @remarks x86: Requires a 486 or later.
2605 */
2606DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile *pu16, uint32_t u16);
2607
2608
2609/**
2610 * Atomically exchanges and adds to a 32-bit value, ordered.
2611 *
2612 * @returns The old value.
2613 * @param pu32 Pointer to the value.
2614 * @param u32 Number to add.
2615 *
2616 * @remarks x86: Requires a 486 or later.
2617 */
2618#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2619DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2620#else
2621DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2622{
2623# if RT_INLINE_ASM_USES_INTRIN
2624 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2625 return u32;
2626
2627# elif RT_INLINE_ASM_GNU_STYLE
2628 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2629 : "=r" (u32),
2630 "=m" (*pu32)
2631 : "0" (u32),
2632 "m" (*pu32)
2633 : "memory");
2634 return u32;
2635# else
2636 __asm
2637 {
2638 mov eax, [u32]
2639# ifdef RT_ARCH_AMD64
2640 mov rdx, [pu32]
2641 lock xadd [rdx], eax
2642# else
2643 mov edx, [pu32]
2644 lock xadd [edx], eax
2645# endif
2646 mov [u32], eax
2647 }
2648 return u32;
2649# endif
2650}
2651#endif
2652
2653
2654/**
2655 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2656 *
2657 * @returns The old value.
2658 * @param pi32 Pointer to the value.
2659 * @param i32 Number to add.
2660 *
2661 * @remarks x86: Requires a 486 or later.
2662 */
2663DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2664{
2665 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2666}
2667
2668
2669/**
2670 * Atomically exchanges and adds to a 64-bit value, ordered.
2671 *
2672 * @returns The old value.
2673 * @param pu64 Pointer to the value.
2674 * @param u64 Number to add.
2675 *
2676 * @remarks x86: Requires a Pentium or later.
2677 */
2678#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2679DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2680#else
2681DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2682{
2683# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2684 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2685 return u64;
2686
2687# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2688 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2689 : "=r" (u64),
2690 "=m" (*pu64)
2691 : "0" (u64),
2692 "m" (*pu64)
2693 : "memory");
2694 return u64;
2695# else
2696 uint64_t u64Old;
2697 for (;;)
2698 {
2699 uint64_t u64New;
2700 u64Old = ASMAtomicUoReadU64(pu64);
2701 u64New = u64Old + u64;
2702 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2703 break;
2704 ASMNopPause();
2705 }
2706 return u64Old;
2707# endif
2708}
2709#endif
2710
2711
2712/**
2713 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2714 *
2715 * @returns The old value.
2716 * @param pi64 Pointer to the value.
2717 * @param i64 Number to add.
2718 *
2719 * @remarks x86: Requires a Pentium or later.
2720 */
2721DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2722{
2723 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2724}
2725
2726
2727/**
2728 * Atomically exchanges and adds to a size_t value, ordered.
2729 *
2730 * @returns The old value.
2731 * @param pcb Pointer to the size_t value.
2732 * @param cb Number to add.
2733 */
2734DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2735{
2736#if ARCH_BITS == 64
2737 AssertCompileSize(size_t, 8);
2738 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2739#elif ARCH_BITS == 32
2740 AssertCompileSize(size_t, 4);
2741 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2742#elif ARCH_BITS == 16
2743 AssertCompileSize(size_t, 2);
2744 return ASMAtomicAddU16((uint16_t volatile *)pcb, cb);
2745#else
2746# error "Unsupported ARCH_BITS value"
2747#endif
2748}
2749
2750
2751/**
2752 * Atomically exchanges and adds a value which size might differ between
2753 * platforms or compilers, ordered.
2754 *
2755 * @param pu Pointer to the variable to update.
2756 * @param uNew The value to add to *pu.
2757 * @param puOld Where to store the old value.
2758 */
2759#define ASMAtomicAddSize(pu, uNew, puOld) \
2760 do { \
2761 switch (sizeof(*(pu))) { \
2762 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2763 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2764 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2765 } \
2766 } while (0)
2767
2768
2769
2770/**
2771 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2772 *
2773 * @returns The old value.
2774 * @param pu16 Pointer to the value.
2775 * @param u16 Number to subtract.
2776 *
2777 * @remarks x86: Requires a 486 or later.
2778 */
2779DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile *pu16, uint32_t u16)
2780{
2781 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2782}
2783
2784
2785/**
2786 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2787 *
2788 * @returns The old value.
2789 * @param pi16 Pointer to the value.
2790 * @param i16 Number to subtract.
2791 *
2792 * @remarks x86: Requires a 486 or later.
2793 */
2794DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile *pi16, int16_t i16)
2795{
2796 return (int16_t)ASMAtomicAddU16((uint16_t volatile *)pi16, (uint16_t)-i16);
2797}
2798
2799
2800/**
2801 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2802 *
2803 * @returns The old value.
2804 * @param pu32 Pointer to the value.
2805 * @param u32 Number to subtract.
2806 *
2807 * @remarks x86: Requires a 486 or later.
2808 */
2809DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2810{
2811 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2812}
2813
2814
2815/**
2816 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2817 *
2818 * @returns The old value.
2819 * @param pi32 Pointer to the value.
2820 * @param i32 Number to subtract.
2821 *
2822 * @remarks x86: Requires a 486 or later.
2823 */
2824DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2825{
2826 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2827}
2828
2829
2830/**
2831 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2832 *
2833 * @returns The old value.
2834 * @param pu64 Pointer to the value.
2835 * @param u64 Number to subtract.
2836 *
2837 * @remarks x86: Requires a Pentium or later.
2838 */
2839DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2840{
2841 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2842}
2843
2844
2845/**
2846 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2847 *
2848 * @returns The old value.
2849 * @param pi64 Pointer to the value.
2850 * @param i64 Number to subtract.
2851 *
2852 * @remarks x86: Requires a Pentium or later.
2853 */
2854DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2855{
2856 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2857}
2858
2859
2860/**
2861 * Atomically exchanges and subtracts to a size_t value, ordered.
2862 *
2863 * @returns The old value.
2864 * @param pcb Pointer to the size_t value.
2865 * @param cb Number to subtract.
2866 *
2867 * @remarks x86: Requires a 486 or later.
2868 */
2869DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2870{
2871#if ARCH_BITS == 64
2872 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2873#elif ARCH_BITS == 32
2874 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2875#elif ARCH_BITS == 16
2876 AssertCompileSize(size_t, 2);
2877 return ASMAtomicSubU16((uint16_t volatile *)pcb, cb);
2878#else
2879# error "Unsupported ARCH_BITS value"
2880#endif
2881}
2882
2883
2884/**
2885 * Atomically exchanges and subtracts a value which size might differ between
2886 * platforms or compilers, ordered.
2887 *
2888 * @param pu Pointer to the variable to update.
2889 * @param uNew The value to subtract to *pu.
2890 * @param puOld Where to store the old value.
2891 *
2892 * @remarks x86: Requires a 486 or later.
2893 */
2894#define ASMAtomicSubSize(pu, uNew, puOld) \
2895 do { \
2896 switch (sizeof(*(pu))) { \
2897 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2898 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2899 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2900 } \
2901 } while (0)
2902
2903
2904
2905/**
2906 * Atomically increment a 16-bit value, ordered.
2907 *
2908 * @returns The new value.
2909 * @param pu16 Pointer to the value to increment.
2910 * @remarks Not implemented. Just to make 16-bit code happy.
2911 *
2912 * @remarks x86: Requires a 486 or later.
2913 */
2914DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile *pu16);
2915
2916
2917/**
2918 * Atomically increment a 32-bit value, ordered.
2919 *
2920 * @returns The new value.
2921 * @param pu32 Pointer to the value to increment.
2922 *
2923 * @remarks x86: Requires a 486 or later.
2924 */
2925#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2926DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2927#else
2928DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2929{
2930 uint32_t u32;
2931# if RT_INLINE_ASM_USES_INTRIN
2932 u32 = _InterlockedIncrement((long *)pu32);
2933 return u32;
2934
2935# elif RT_INLINE_ASM_GNU_STYLE
2936 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2937 : "=r" (u32),
2938 "=m" (*pu32)
2939 : "0" (1),
2940 "m" (*pu32)
2941 : "memory");
2942 return u32+1;
2943# else
2944 __asm
2945 {
2946 mov eax, 1
2947# ifdef RT_ARCH_AMD64
2948 mov rdx, [pu32]
2949 lock xadd [rdx], eax
2950# else
2951 mov edx, [pu32]
2952 lock xadd [edx], eax
2953# endif
2954 mov u32, eax
2955 }
2956 return u32+1;
2957# endif
2958}
2959#endif
2960
2961
2962/**
2963 * Atomically increment a signed 32-bit value, ordered.
2964 *
2965 * @returns The new value.
2966 * @param pi32 Pointer to the value to increment.
2967 *
2968 * @remarks x86: Requires a 486 or later.
2969 */
2970DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2971{
2972 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2973}
2974
2975
2976/**
2977 * Atomically increment a 64-bit value, ordered.
2978 *
2979 * @returns The new value.
2980 * @param pu64 Pointer to the value to increment.
2981 *
2982 * @remarks x86: Requires a Pentium or later.
2983 */
2984#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2985DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2986#else
2987DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2988{
2989# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2990 uint64_t u64;
2991 u64 = _InterlockedIncrement64((__int64 *)pu64);
2992 return u64;
2993
2994# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2995 uint64_t u64;
2996 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2997 : "=r" (u64),
2998 "=m" (*pu64)
2999 : "0" (1),
3000 "m" (*pu64)
3001 : "memory");
3002 return u64 + 1;
3003# else
3004 return ASMAtomicAddU64(pu64, 1) + 1;
3005# endif
3006}
3007#endif
3008
3009
3010/**
3011 * Atomically increment a signed 64-bit value, ordered.
3012 *
3013 * @returns The new value.
3014 * @param pi64 Pointer to the value to increment.
3015 *
3016 * @remarks x86: Requires a Pentium or later.
3017 */
3018DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
3019{
3020 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
3021}
3022
3023
3024/**
3025 * Atomically increment a size_t value, ordered.
3026 *
3027 * @returns The new value.
3028 * @param pcb Pointer to the value to increment.
3029 *
3030 * @remarks x86: Requires a 486 or later.
3031 */
3032DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
3033{
3034#if ARCH_BITS == 64
3035 return ASMAtomicIncU64((uint64_t volatile *)pcb);
3036#elif ARCH_BITS == 32
3037 return ASMAtomicIncU32((uint32_t volatile *)pcb);
3038#elif ARCH_BITS == 16
3039 return ASMAtomicIncU16((uint16_t volatile *)pcb);
3040#else
3041# error "Unsupported ARCH_BITS value"
3042#endif
3043}
3044
3045
3046
3047/**
3048 * Atomically decrement an unsigned 32-bit value, ordered.
3049 *
3050 * @returns The new value.
3051 * @param pu16 Pointer to the value to decrement.
3052 * @remarks Not implemented. Just to make 16-bit code happy.
3053 *
3054 * @remarks x86: Requires a 486 or later.
3055 */
3056DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile *pu16);
3057
3058
3059/**
3060 * Atomically decrement an unsigned 32-bit value, ordered.
3061 *
3062 * @returns The new value.
3063 * @param pu32 Pointer to the value to decrement.
3064 *
3065 * @remarks x86: Requires a 486 or later.
3066 */
3067#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3068DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3069#else
3070DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3071{
3072 uint32_t u32;
3073# if RT_INLINE_ASM_USES_INTRIN
3074 u32 = _InterlockedDecrement((long *)pu32);
3075 return u32;
3076
3077# elif RT_INLINE_ASM_GNU_STYLE
3078 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3079 : "=r" (u32),
3080 "=m" (*pu32)
3081 : "0" (-1),
3082 "m" (*pu32)
3083 : "memory");
3084 return u32-1;
3085# else
3086 __asm
3087 {
3088 mov eax, -1
3089# ifdef RT_ARCH_AMD64
3090 mov rdx, [pu32]
3091 lock xadd [rdx], eax
3092# else
3093 mov edx, [pu32]
3094 lock xadd [edx], eax
3095# endif
3096 mov u32, eax
3097 }
3098 return u32-1;
3099# endif
3100}
3101#endif
3102
3103
3104/**
3105 * Atomically decrement a signed 32-bit value, ordered.
3106 *
3107 * @returns The new value.
3108 * @param pi32 Pointer to the value to decrement.
3109 *
3110 * @remarks x86: Requires a 486 or later.
3111 */
3112DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3113{
3114 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3115}
3116
3117
3118/**
3119 * Atomically decrement an unsigned 64-bit value, ordered.
3120 *
3121 * @returns The new value.
3122 * @param pu64 Pointer to the value to decrement.
3123 *
3124 * @remarks x86: Requires a Pentium or later.
3125 */
3126#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3127DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
3128#else
3129DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
3130{
3131# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3132 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
3133 return u64;
3134
3135# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3136 uint64_t u64;
3137 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3138 : "=r" (u64),
3139 "=m" (*pu64)
3140 : "0" (~(uint64_t)0),
3141 "m" (*pu64)
3142 : "memory");
3143 return u64-1;
3144# else
3145 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3146# endif
3147}
3148#endif
3149
3150
3151/**
3152 * Atomically decrement a signed 64-bit value, ordered.
3153 *
3154 * @returns The new value.
3155 * @param pi64 Pointer to the value to decrement.
3156 *
3157 * @remarks x86: Requires a Pentium or later.
3158 */
3159DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
3160{
3161 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
3162}
3163
3164
3165/**
3166 * Atomically decrement a size_t value, ordered.
3167 *
3168 * @returns The new value.
3169 * @param pcb Pointer to the value to decrement.
3170 *
3171 * @remarks x86: Requires a 486 or later.
3172 */
3173DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
3174{
3175#if ARCH_BITS == 64
3176 return ASMAtomicDecU64((uint64_t volatile *)pcb);
3177#elif ARCH_BITS == 32
3178 return ASMAtomicDecU32((uint32_t volatile *)pcb);
3179#elif ARCH_BITS == 16
3180 return ASMAtomicDecU16((uint16_t volatile *)pcb);
3181#else
3182# error "Unsupported ARCH_BITS value"
3183#endif
3184}
3185
3186
3187/**
3188 * Atomically Or an unsigned 32-bit value, ordered.
3189 *
3190 * @param pu32 Pointer to the pointer variable to OR u32 with.
3191 * @param u32 The value to OR *pu32 with.
3192 *
3193 * @remarks x86: Requires a 386 or later.
3194 */
3195#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3196DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3197#else
3198DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3199{
3200# if RT_INLINE_ASM_USES_INTRIN
3201 _InterlockedOr((long volatile *)pu32, (long)u32);
3202
3203# elif RT_INLINE_ASM_GNU_STYLE
3204 __asm__ __volatile__("lock; orl %1, %0\n\t"
3205 : "=m" (*pu32)
3206 : "ir" (u32),
3207 "m" (*pu32));
3208# else
3209 __asm
3210 {
3211 mov eax, [u32]
3212# ifdef RT_ARCH_AMD64
3213 mov rdx, [pu32]
3214 lock or [rdx], eax
3215# else
3216 mov edx, [pu32]
3217 lock or [edx], eax
3218# endif
3219 }
3220# endif
3221}
3222#endif
3223
3224
3225/**
3226 * Atomically Or a signed 32-bit value, ordered.
3227 *
3228 * @param pi32 Pointer to the pointer variable to OR u32 with.
3229 * @param i32 The value to OR *pu32 with.
3230 *
3231 * @remarks x86: Requires a 386 or later.
3232 */
3233DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3234{
3235 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3236}
3237
3238
3239/**
3240 * Atomically Or an unsigned 64-bit value, ordered.
3241 *
3242 * @param pu64 Pointer to the pointer variable to OR u64 with.
3243 * @param u64 The value to OR *pu64 with.
3244 *
3245 * @remarks x86: Requires a Pentium or later.
3246 */
3247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3248DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
3249#else
3250DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
3251{
3252# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3253 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
3254
3255# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3256 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3257 : "=m" (*pu64)
3258 : "r" (u64),
3259 "m" (*pu64));
3260# else
3261 for (;;)
3262 {
3263 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3264 uint64_t u64New = u64Old | u64;
3265 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3266 break;
3267 ASMNopPause();
3268 }
3269# endif
3270}
3271#endif
3272
3273
3274/**
3275 * Atomically Or a signed 64-bit value, ordered.
3276 *
3277 * @param pi64 Pointer to the pointer variable to OR u64 with.
3278 * @param i64 The value to OR *pu64 with.
3279 *
3280 * @remarks x86: Requires a Pentium or later.
3281 */
3282DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3283{
3284 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3285}
3286
3287
3288/**
3289 * Atomically And an unsigned 32-bit value, ordered.
3290 *
3291 * @param pu32 Pointer to the pointer variable to AND u32 with.
3292 * @param u32 The value to AND *pu32 with.
3293 *
3294 * @remarks x86: Requires a 386 or later.
3295 */
3296#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3297DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3298#else
3299DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3300{
3301# if RT_INLINE_ASM_USES_INTRIN
3302 _InterlockedAnd((long volatile *)pu32, u32);
3303
3304# elif RT_INLINE_ASM_GNU_STYLE
3305 __asm__ __volatile__("lock; andl %1, %0\n\t"
3306 : "=m" (*pu32)
3307 : "ir" (u32),
3308 "m" (*pu32));
3309# else
3310 __asm
3311 {
3312 mov eax, [u32]
3313# ifdef RT_ARCH_AMD64
3314 mov rdx, [pu32]
3315 lock and [rdx], eax
3316# else
3317 mov edx, [pu32]
3318 lock and [edx], eax
3319# endif
3320 }
3321# endif
3322}
3323#endif
3324
3325
3326/**
3327 * Atomically And a signed 32-bit value, ordered.
3328 *
3329 * @param pi32 Pointer to the pointer variable to AND i32 with.
3330 * @param i32 The value to AND *pi32 with.
3331 *
3332 * @remarks x86: Requires a 386 or later.
3333 */
3334DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3335{
3336 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3337}
3338
3339
3340/**
3341 * Atomically And an unsigned 64-bit value, ordered.
3342 *
3343 * @param pu64 Pointer to the pointer variable to AND u64 with.
3344 * @param u64 The value to AND *pu64 with.
3345 *
3346 * @remarks x86: Requires a Pentium or later.
3347 */
3348#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3349DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3350#else
3351DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3352{
3353# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3354 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3355
3356# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3357 __asm__ __volatile__("lock; andq %1, %0\n\t"
3358 : "=m" (*pu64)
3359 : "r" (u64),
3360 "m" (*pu64));
3361# else
3362 for (;;)
3363 {
3364 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3365 uint64_t u64New = u64Old & u64;
3366 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3367 break;
3368 ASMNopPause();
3369 }
3370# endif
3371}
3372#endif
3373
3374
3375/**
3376 * Atomically And a signed 64-bit value, ordered.
3377 *
3378 * @param pi64 Pointer to the pointer variable to AND i64 with.
3379 * @param i64 The value to AND *pi64 with.
3380 *
3381 * @remarks x86: Requires a Pentium or later.
3382 */
3383DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3384{
3385 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3386}
3387
3388
3389/**
3390 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3391 *
3392 * @param pu32 Pointer to the pointer variable to OR u32 with.
3393 * @param u32 The value to OR *pu32 with.
3394 *
3395 * @remarks x86: Requires a 386 or later.
3396 */
3397#if RT_INLINE_ASM_EXTERNAL
3398DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3399#else
3400DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3401{
3402# if RT_INLINE_ASM_GNU_STYLE
3403 __asm__ __volatile__("orl %1, %0\n\t"
3404 : "=m" (*pu32)
3405 : "ir" (u32),
3406 "m" (*pu32));
3407# else
3408 __asm
3409 {
3410 mov eax, [u32]
3411# ifdef RT_ARCH_AMD64
3412 mov rdx, [pu32]
3413 or [rdx], eax
3414# else
3415 mov edx, [pu32]
3416 or [edx], eax
3417# endif
3418 }
3419# endif
3420}
3421#endif
3422
3423
3424/**
3425 * Atomically OR a signed 32-bit value, unordered.
3426 *
3427 * @param pi32 Pointer to the pointer variable to OR u32 with.
3428 * @param i32 The value to OR *pu32 with.
3429 *
3430 * @remarks x86: Requires a 386 or later.
3431 */
3432DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3433{
3434 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3435}
3436
3437
3438/**
3439 * Atomically OR an unsigned 64-bit value, unordered.
3440 *
3441 * @param pu64 Pointer to the pointer variable to OR u64 with.
3442 * @param u64 The value to OR *pu64 with.
3443 *
3444 * @remarks x86: Requires a Pentium or later.
3445 */
3446#if RT_INLINE_ASM_EXTERNAL
3447DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3448#else
3449DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3450{
3451# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3452 __asm__ __volatile__("orq %1, %q0\n\t"
3453 : "=m" (*pu64)
3454 : "r" (u64),
3455 "m" (*pu64));
3456# else
3457 for (;;)
3458 {
3459 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3460 uint64_t u64New = u64Old | u64;
3461 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3462 break;
3463 ASMNopPause();
3464 }
3465# endif
3466}
3467#endif
3468
3469
3470/**
3471 * Atomically Or a signed 64-bit value, unordered.
3472 *
3473 * @param pi64 Pointer to the pointer variable to OR u64 with.
3474 * @param i64 The value to OR *pu64 with.
3475 *
3476 * @remarks x86: Requires a Pentium or later.
3477 */
3478DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3479{
3480 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3481}
3482
3483
3484/**
3485 * Atomically And an unsigned 32-bit value, unordered.
3486 *
3487 * @param pu32 Pointer to the pointer variable to AND u32 with.
3488 * @param u32 The value to AND *pu32 with.
3489 *
3490 * @remarks x86: Requires a 386 or later.
3491 */
3492#if RT_INLINE_ASM_EXTERNAL
3493DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3494#else
3495DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3496{
3497# if RT_INLINE_ASM_GNU_STYLE
3498 __asm__ __volatile__("andl %1, %0\n\t"
3499 : "=m" (*pu32)
3500 : "ir" (u32),
3501 "m" (*pu32));
3502# else
3503 __asm
3504 {
3505 mov eax, [u32]
3506# ifdef RT_ARCH_AMD64
3507 mov rdx, [pu32]
3508 and [rdx], eax
3509# else
3510 mov edx, [pu32]
3511 and [edx], eax
3512# endif
3513 }
3514# endif
3515}
3516#endif
3517
3518
3519/**
3520 * Atomically And a signed 32-bit value, unordered.
3521 *
3522 * @param pi32 Pointer to the pointer variable to AND i32 with.
3523 * @param i32 The value to AND *pi32 with.
3524 *
3525 * @remarks x86: Requires a 386 or later.
3526 */
3527DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3528{
3529 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3530}
3531
3532
3533/**
3534 * Atomically And an unsigned 64-bit value, unordered.
3535 *
3536 * @param pu64 Pointer to the pointer variable to AND u64 with.
3537 * @param u64 The value to AND *pu64 with.
3538 *
3539 * @remarks x86: Requires a Pentium or later.
3540 */
3541#if RT_INLINE_ASM_EXTERNAL
3542DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3543#else
3544DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3545{
3546# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3547 __asm__ __volatile__("andq %1, %0\n\t"
3548 : "=m" (*pu64)
3549 : "r" (u64),
3550 "m" (*pu64));
3551# else
3552 for (;;)
3553 {
3554 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3555 uint64_t u64New = u64Old & u64;
3556 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3557 break;
3558 ASMNopPause();
3559 }
3560# endif
3561}
3562#endif
3563
3564
3565/**
3566 * Atomically And a signed 64-bit value, unordered.
3567 *
3568 * @param pi64 Pointer to the pointer variable to AND i64 with.
3569 * @param i64 The value to AND *pi64 with.
3570 *
3571 * @remarks x86: Requires a Pentium or later.
3572 */
3573DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3574{
3575 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3576}
3577
3578
3579/**
3580 * Atomically increment an unsigned 32-bit value, unordered.
3581 *
3582 * @returns the new value.
3583 * @param pu32 Pointer to the variable to increment.
3584 *
3585 * @remarks x86: Requires a 486 or later.
3586 */
3587#if RT_INLINE_ASM_EXTERNAL
3588DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3589#else
3590DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3591{
3592 uint32_t u32;
3593# if RT_INLINE_ASM_GNU_STYLE
3594 __asm__ __volatile__("xaddl %0, %1\n\t"
3595 : "=r" (u32),
3596 "=m" (*pu32)
3597 : "0" (1),
3598 "m" (*pu32)
3599 : "memory");
3600 return u32 + 1;
3601# else
3602 __asm
3603 {
3604 mov eax, 1
3605# ifdef RT_ARCH_AMD64
3606 mov rdx, [pu32]
3607 xadd [rdx], eax
3608# else
3609 mov edx, [pu32]
3610 xadd [edx], eax
3611# endif
3612 mov u32, eax
3613 }
3614 return u32 + 1;
3615# endif
3616}
3617#endif
3618
3619
3620/**
3621 * Atomically decrement an unsigned 32-bit value, unordered.
3622 *
3623 * @returns the new value.
3624 * @param pu32 Pointer to the variable to decrement.
3625 *
3626 * @remarks x86: Requires a 486 or later.
3627 */
3628#if RT_INLINE_ASM_EXTERNAL
3629DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3630#else
3631DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3632{
3633 uint32_t u32;
3634# if RT_INLINE_ASM_GNU_STYLE
3635 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3636 : "=r" (u32),
3637 "=m" (*pu32)
3638 : "0" (-1),
3639 "m" (*pu32)
3640 : "memory");
3641 return u32 - 1;
3642# else
3643 __asm
3644 {
3645 mov eax, -1
3646# ifdef RT_ARCH_AMD64
3647 mov rdx, [pu32]
3648 xadd [rdx], eax
3649# else
3650 mov edx, [pu32]
3651 xadd [edx], eax
3652# endif
3653 mov u32, eax
3654 }
3655 return u32 - 1;
3656# endif
3657}
3658#endif
3659
3660
3661/** @def RT_ASM_PAGE_SIZE
3662 * We try avoid dragging in iprt/param.h here.
3663 * @internal
3664 */
3665#if defined(RT_ARCH_SPARC64)
3666# define RT_ASM_PAGE_SIZE 0x2000
3667# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3668# if PAGE_SIZE != 0x2000
3669# error "PAGE_SIZE is not 0x2000!"
3670# endif
3671# endif
3672#else
3673# define RT_ASM_PAGE_SIZE 0x1000
3674# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3675# if PAGE_SIZE != 0x1000
3676# error "PAGE_SIZE is not 0x1000!"
3677# endif
3678# endif
3679#endif
3680
3681/**
3682 * Zeros a 4K memory page.
3683 *
3684 * @param pv Pointer to the memory block. This must be page aligned.
3685 */
3686#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3687DECLASM(void) ASMMemZeroPage(volatile void *pv);
3688# else
3689DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3690{
3691# if RT_INLINE_ASM_USES_INTRIN
3692# ifdef RT_ARCH_AMD64
3693 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3694# else
3695 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3696# endif
3697
3698# elif RT_INLINE_ASM_GNU_STYLE
3699 RTCCUINTREG uDummy;
3700# ifdef RT_ARCH_AMD64
3701 __asm__ __volatile__("rep stosq"
3702 : "=D" (pv),
3703 "=c" (uDummy)
3704 : "0" (pv),
3705 "c" (RT_ASM_PAGE_SIZE >> 3),
3706 "a" (0)
3707 : "memory");
3708# else
3709 __asm__ __volatile__("rep stosl"
3710 : "=D" (pv),
3711 "=c" (uDummy)
3712 : "0" (pv),
3713 "c" (RT_ASM_PAGE_SIZE >> 2),
3714 "a" (0)
3715 : "memory");
3716# endif
3717# else
3718 __asm
3719 {
3720# ifdef RT_ARCH_AMD64
3721 xor rax, rax
3722 mov ecx, 0200h
3723 mov rdi, [pv]
3724 rep stosq
3725# else
3726 xor eax, eax
3727 mov ecx, 0400h
3728 mov edi, [pv]
3729 rep stosd
3730# endif
3731 }
3732# endif
3733}
3734# endif
3735
3736
3737/**
3738 * Zeros a memory block with a 32-bit aligned size.
3739 *
3740 * @param pv Pointer to the memory block.
3741 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3742 */
3743#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3744DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3745#else
3746DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3747{
3748# if RT_INLINE_ASM_USES_INTRIN
3749# ifdef RT_ARCH_AMD64
3750 if (!(cb & 7))
3751 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3752 else
3753# endif
3754 __stosd((unsigned long *)pv, 0, cb / 4);
3755
3756# elif RT_INLINE_ASM_GNU_STYLE
3757 __asm__ __volatile__("rep stosl"
3758 : "=D" (pv),
3759 "=c" (cb)
3760 : "0" (pv),
3761 "1" (cb >> 2),
3762 "a" (0)
3763 : "memory");
3764# else
3765 __asm
3766 {
3767 xor eax, eax
3768# ifdef RT_ARCH_AMD64
3769 mov rcx, [cb]
3770 shr rcx, 2
3771 mov rdi, [pv]
3772# else
3773 mov ecx, [cb]
3774 shr ecx, 2
3775 mov edi, [pv]
3776# endif
3777 rep stosd
3778 }
3779# endif
3780}
3781#endif
3782
3783
3784/**
3785 * Fills a memory block with a 32-bit aligned size.
3786 *
3787 * @param pv Pointer to the memory block.
3788 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3789 * @param u32 The value to fill with.
3790 */
3791#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3792DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3793#else
3794DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3795{
3796# if RT_INLINE_ASM_USES_INTRIN
3797# ifdef RT_ARCH_AMD64
3798 if (!(cb & 7))
3799 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3800 else
3801# endif
3802 __stosd((unsigned long *)pv, u32, cb / 4);
3803
3804# elif RT_INLINE_ASM_GNU_STYLE
3805 __asm__ __volatile__("rep stosl"
3806 : "=D" (pv),
3807 "=c" (cb)
3808 : "0" (pv),
3809 "1" (cb >> 2),
3810 "a" (u32)
3811 : "memory");
3812# else
3813 __asm
3814 {
3815# ifdef RT_ARCH_AMD64
3816 mov rcx, [cb]
3817 shr rcx, 2
3818 mov rdi, [pv]
3819# else
3820 mov ecx, [cb]
3821 shr ecx, 2
3822 mov edi, [pv]
3823# endif
3824 mov eax, [u32]
3825 rep stosd
3826 }
3827# endif
3828}
3829#endif
3830
3831
3832/**
3833 * Checks if a memory page is all zeros.
3834 *
3835 * @returns true / false.
3836 *
3837 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3838 * boundary
3839 */
3840DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3841{
3842# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3843 union { RTCCUINTREG r; bool f; } uAX;
3844 RTCCUINTREG xCX, xDI;
3845 Assert(!((uintptr_t)pvPage & 15));
3846 __asm__ __volatile__("repe; "
3847# ifdef RT_ARCH_AMD64
3848 "scasq\n\t"
3849# else
3850 "scasl\n\t"
3851# endif
3852 "setnc %%al\n\t"
3853 : "=&c" (xCX),
3854 "=&D" (xDI),
3855 "=&a" (uAX.r)
3856 : "mr" (pvPage),
3857# ifdef RT_ARCH_AMD64
3858 "0" (RT_ASM_PAGE_SIZE/8),
3859# else
3860 "0" (RT_ASM_PAGE_SIZE/4),
3861# endif
3862 "1" (pvPage),
3863 "2" (0));
3864 return uAX.f;
3865# else
3866 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3867 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3868 Assert(!((uintptr_t)pvPage & 15));
3869 for (;;)
3870 {
3871 if (puPtr[0]) return false;
3872 if (puPtr[4]) return false;
3873
3874 if (puPtr[2]) return false;
3875 if (puPtr[6]) return false;
3876
3877 if (puPtr[1]) return false;
3878 if (puPtr[5]) return false;
3879
3880 if (puPtr[3]) return false;
3881 if (puPtr[7]) return false;
3882
3883 if (!--cLeft)
3884 return true;
3885 puPtr += 8;
3886 }
3887 return true;
3888# endif
3889}
3890
3891
3892/**
3893 * Checks if a memory block is filled with the specified byte.
3894 *
3895 * This is a sort of inverted memchr.
3896 *
3897 * @returns Pointer to the byte which doesn't equal u8.
3898 * @returns NULL if all equal to u8.
3899 *
3900 * @param pv Pointer to the memory block.
3901 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3902 * @param u8 The value it's supposed to be filled with.
3903 *
3904 * @todo Fix name, it is a predicate function but it's not returning boolean!
3905 */
3906DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3907{
3908/** @todo rewrite this in inline assembly? */
3909 uint8_t const *pb = (uint8_t const *)pv;
3910 for (; cb; cb--, pb++)
3911 if (RT_LIKELY(*pb == u8))
3912 { /* likely */ }
3913 else
3914 return (void *)pb;
3915 return NULL;
3916}
3917
3918
3919/**
3920 * Checks if a memory block is filled with the specified 32-bit value.
3921 *
3922 * This is a sort of inverted memchr.
3923 *
3924 * @returns Pointer to the first value which doesn't equal u32.
3925 * @returns NULL if all equal to u32.
3926 *
3927 * @param pv Pointer to the memory block.
3928 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3929 * @param u32 The value it's supposed to be filled with.
3930 *
3931 * @todo Fix name, it is a predicate function but it's not returning boolean!
3932 */
3933DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3934{
3935/** @todo rewrite this in inline assembly? */
3936 uint32_t const *pu32 = (uint32_t const *)pv;
3937 for (; cb; cb -= 4, pu32++)
3938 if (RT_LIKELY(*pu32 == u32))
3939 { /* likely */ }
3940 else
3941 return (uint32_t *)pu32;
3942 return NULL;
3943}
3944
3945
3946/**
3947 * Probes a byte pointer for read access.
3948 *
3949 * While the function will not fault if the byte is not read accessible,
3950 * the idea is to do this in a safe place like before acquiring locks
3951 * and such like.
3952 *
3953 * Also, this functions guarantees that an eager compiler is not going
3954 * to optimize the probing away.
3955 *
3956 * @param pvByte Pointer to the byte.
3957 */
3958#if RT_INLINE_ASM_EXTERNAL
3959DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3960#else
3961DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3962{
3963 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3964 uint8_t u8;
3965# if RT_INLINE_ASM_GNU_STYLE
3966 __asm__ __volatile__("movb (%1), %0\n\t"
3967 : "=r" (u8)
3968 : "r" (pvByte));
3969# else
3970 __asm
3971 {
3972# ifdef RT_ARCH_AMD64
3973 mov rax, [pvByte]
3974 mov al, [rax]
3975# else
3976 mov eax, [pvByte]
3977 mov al, [eax]
3978# endif
3979 mov [u8], al
3980 }
3981# endif
3982 return u8;
3983}
3984#endif
3985
3986/**
3987 * Probes a buffer for read access page by page.
3988 *
3989 * While the function will fault if the buffer is not fully read
3990 * accessible, the idea is to do this in a safe place like before
3991 * acquiring locks and such like.
3992 *
3993 * Also, this functions guarantees that an eager compiler is not going
3994 * to optimize the probing away.
3995 *
3996 * @param pvBuf Pointer to the buffer.
3997 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3998 */
3999DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4000{
4001 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4002 /* the first byte */
4003 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4004 ASMProbeReadByte(pu8);
4005
4006 /* the pages in between pages. */
4007 while (cbBuf > RT_ASM_PAGE_SIZE)
4008 {
4009 ASMProbeReadByte(pu8);
4010 cbBuf -= RT_ASM_PAGE_SIZE;
4011 pu8 += RT_ASM_PAGE_SIZE;
4012 }
4013
4014 /* the last byte */
4015 ASMProbeReadByte(pu8 + cbBuf - 1);
4016}
4017
4018
4019
4020/** @defgroup grp_inline_bits Bit Operations
4021 * @{
4022 */
4023
4024
4025/**
4026 * Sets a bit in a bitmap.
4027 *
4028 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4029 * @param iBit The bit to set.
4030 *
4031 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4032 * However, doing so will yield better performance as well as avoiding
4033 * traps accessing the last bits in the bitmap.
4034 */
4035#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4036DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4037#else
4038DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4039{
4040# if RT_INLINE_ASM_USES_INTRIN
4041 _bittestandset((long *)pvBitmap, iBit);
4042
4043# elif RT_INLINE_ASM_GNU_STYLE
4044 __asm__ __volatile__("btsl %1, %0"
4045 : "=m" (*(volatile long *)pvBitmap)
4046 : "Ir" (iBit),
4047 "m" (*(volatile long *)pvBitmap)
4048 : "memory");
4049# else
4050 __asm
4051 {
4052# ifdef RT_ARCH_AMD64
4053 mov rax, [pvBitmap]
4054 mov edx, [iBit]
4055 bts [rax], edx
4056# else
4057 mov eax, [pvBitmap]
4058 mov edx, [iBit]
4059 bts [eax], edx
4060# endif
4061 }
4062# endif
4063}
4064#endif
4065
4066
4067/**
4068 * Atomically sets a bit in a bitmap, ordered.
4069 *
4070 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4071 * the memory access isn't atomic!
4072 * @param iBit The bit to set.
4073 *
4074 * @remarks x86: Requires a 386 or later.
4075 */
4076#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4077DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4078#else
4079DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4080{
4081 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4082# if RT_INLINE_ASM_USES_INTRIN
4083 _interlockedbittestandset((long *)pvBitmap, iBit);
4084# elif RT_INLINE_ASM_GNU_STYLE
4085 __asm__ __volatile__("lock; btsl %1, %0"
4086 : "=m" (*(volatile long *)pvBitmap)
4087 : "Ir" (iBit),
4088 "m" (*(volatile long *)pvBitmap)
4089 : "memory");
4090# else
4091 __asm
4092 {
4093# ifdef RT_ARCH_AMD64
4094 mov rax, [pvBitmap]
4095 mov edx, [iBit]
4096 lock bts [rax], edx
4097# else
4098 mov eax, [pvBitmap]
4099 mov edx, [iBit]
4100 lock bts [eax], edx
4101# endif
4102 }
4103# endif
4104}
4105#endif
4106
4107
4108/**
4109 * Clears a bit in a bitmap.
4110 *
4111 * @param pvBitmap Pointer to the bitmap.
4112 * @param iBit The bit to clear.
4113 *
4114 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4115 * However, doing so will yield better performance as well as avoiding
4116 * traps accessing the last bits in the bitmap.
4117 */
4118#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4119DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4120#else
4121DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4122{
4123# if RT_INLINE_ASM_USES_INTRIN
4124 _bittestandreset((long *)pvBitmap, iBit);
4125
4126# elif RT_INLINE_ASM_GNU_STYLE
4127 __asm__ __volatile__("btrl %1, %0"
4128 : "=m" (*(volatile long *)pvBitmap)
4129 : "Ir" (iBit),
4130 "m" (*(volatile long *)pvBitmap)
4131 : "memory");
4132# else
4133 __asm
4134 {
4135# ifdef RT_ARCH_AMD64
4136 mov rax, [pvBitmap]
4137 mov edx, [iBit]
4138 btr [rax], edx
4139# else
4140 mov eax, [pvBitmap]
4141 mov edx, [iBit]
4142 btr [eax], edx
4143# endif
4144 }
4145# endif
4146}
4147#endif
4148
4149
4150/**
4151 * Atomically clears a bit in a bitmap, ordered.
4152 *
4153 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4154 * the memory access isn't atomic!
4155 * @param iBit The bit to toggle set.
4156 *
4157 * @remarks No memory barrier, take care on smp.
4158 * @remarks x86: Requires a 386 or later.
4159 */
4160#if RT_INLINE_ASM_EXTERNAL
4161DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4162#else
4163DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4164{
4165 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4166# if RT_INLINE_ASM_GNU_STYLE
4167 __asm__ __volatile__("lock; btrl %1, %0"
4168 : "=m" (*(volatile long *)pvBitmap)
4169 : "Ir" (iBit),
4170 "m" (*(volatile long *)pvBitmap)
4171 : "memory");
4172# else
4173 __asm
4174 {
4175# ifdef RT_ARCH_AMD64
4176 mov rax, [pvBitmap]
4177 mov edx, [iBit]
4178 lock btr [rax], edx
4179# else
4180 mov eax, [pvBitmap]
4181 mov edx, [iBit]
4182 lock btr [eax], edx
4183# endif
4184 }
4185# endif
4186}
4187#endif
4188
4189
4190/**
4191 * Toggles a bit in a bitmap.
4192 *
4193 * @param pvBitmap Pointer to the bitmap.
4194 * @param iBit The bit to toggle.
4195 *
4196 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4197 * However, doing so will yield better performance as well as avoiding
4198 * traps accessing the last bits in the bitmap.
4199 */
4200#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4201DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4202#else
4203DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4204{
4205# if RT_INLINE_ASM_USES_INTRIN
4206 _bittestandcomplement((long *)pvBitmap, iBit);
4207# elif RT_INLINE_ASM_GNU_STYLE
4208 __asm__ __volatile__("btcl %1, %0"
4209 : "=m" (*(volatile long *)pvBitmap)
4210 : "Ir" (iBit),
4211 "m" (*(volatile long *)pvBitmap)
4212 : "memory");
4213# else
4214 __asm
4215 {
4216# ifdef RT_ARCH_AMD64
4217 mov rax, [pvBitmap]
4218 mov edx, [iBit]
4219 btc [rax], edx
4220# else
4221 mov eax, [pvBitmap]
4222 mov edx, [iBit]
4223 btc [eax], edx
4224# endif
4225 }
4226# endif
4227}
4228#endif
4229
4230
4231/**
4232 * Atomically toggles a bit in a bitmap, ordered.
4233 *
4234 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4235 * the memory access isn't atomic!
4236 * @param iBit The bit to test and set.
4237 *
4238 * @remarks x86: Requires a 386 or later.
4239 */
4240#if RT_INLINE_ASM_EXTERNAL
4241DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4242#else
4243DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4244{
4245 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4246# if RT_INLINE_ASM_GNU_STYLE
4247 __asm__ __volatile__("lock; btcl %1, %0"
4248 : "=m" (*(volatile long *)pvBitmap)
4249 : "Ir" (iBit),
4250 "m" (*(volatile long *)pvBitmap)
4251 : "memory");
4252# else
4253 __asm
4254 {
4255# ifdef RT_ARCH_AMD64
4256 mov rax, [pvBitmap]
4257 mov edx, [iBit]
4258 lock btc [rax], edx
4259# else
4260 mov eax, [pvBitmap]
4261 mov edx, [iBit]
4262 lock btc [eax], edx
4263# endif
4264 }
4265# endif
4266}
4267#endif
4268
4269
4270/**
4271 * Tests and sets a bit in a bitmap.
4272 *
4273 * @returns true if the bit was set.
4274 * @returns false if the bit was clear.
4275 *
4276 * @param pvBitmap Pointer to the bitmap.
4277 * @param iBit The bit to test and set.
4278 *
4279 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4280 * However, doing so will yield better performance as well as avoiding
4281 * traps accessing the last bits in the bitmap.
4282 */
4283#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4284DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4285#else
4286DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4287{
4288 union { bool f; uint32_t u32; uint8_t u8; } rc;
4289# if RT_INLINE_ASM_USES_INTRIN
4290 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4291
4292# elif RT_INLINE_ASM_GNU_STYLE
4293 __asm__ __volatile__("btsl %2, %1\n\t"
4294 "setc %b0\n\t"
4295 "andl $1, %0\n\t"
4296 : "=q" (rc.u32),
4297 "=m" (*(volatile long *)pvBitmap)
4298 : "Ir" (iBit),
4299 "m" (*(volatile long *)pvBitmap)
4300 : "memory");
4301# else
4302 __asm
4303 {
4304 mov edx, [iBit]
4305# ifdef RT_ARCH_AMD64
4306 mov rax, [pvBitmap]
4307 bts [rax], edx
4308# else
4309 mov eax, [pvBitmap]
4310 bts [eax], edx
4311# endif
4312 setc al
4313 and eax, 1
4314 mov [rc.u32], eax
4315 }
4316# endif
4317 return rc.f;
4318}
4319#endif
4320
4321
4322/**
4323 * Atomically tests and sets a bit in a bitmap, ordered.
4324 *
4325 * @returns true if the bit was set.
4326 * @returns false if the bit was clear.
4327 *
4328 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4329 * the memory access isn't atomic!
4330 * @param iBit The bit to set.
4331 *
4332 * @remarks x86: Requires a 386 or later.
4333 */
4334#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4335DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4336#else
4337DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4338{
4339 union { bool f; uint32_t u32; uint8_t u8; } rc;
4340 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4341# if RT_INLINE_ASM_USES_INTRIN
4342 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4343# elif RT_INLINE_ASM_GNU_STYLE
4344 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4345 "setc %b0\n\t"
4346 "andl $1, %0\n\t"
4347 : "=q" (rc.u32),
4348 "=m" (*(volatile long *)pvBitmap)
4349 : "Ir" (iBit),
4350 "m" (*(volatile long *)pvBitmap)
4351 : "memory");
4352# else
4353 __asm
4354 {
4355 mov edx, [iBit]
4356# ifdef RT_ARCH_AMD64
4357 mov rax, [pvBitmap]
4358 lock bts [rax], edx
4359# else
4360 mov eax, [pvBitmap]
4361 lock bts [eax], edx
4362# endif
4363 setc al
4364 and eax, 1
4365 mov [rc.u32], eax
4366 }
4367# endif
4368 return rc.f;
4369}
4370#endif
4371
4372
4373/**
4374 * Tests and clears a bit in a bitmap.
4375 *
4376 * @returns true if the bit was set.
4377 * @returns false if the bit was clear.
4378 *
4379 * @param pvBitmap Pointer to the bitmap.
4380 * @param iBit The bit to test and clear.
4381 *
4382 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4383 * However, doing so will yield better performance as well as avoiding
4384 * traps accessing the last bits in the bitmap.
4385 */
4386#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4387DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4388#else
4389DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4390{
4391 union { bool f; uint32_t u32; uint8_t u8; } rc;
4392# if RT_INLINE_ASM_USES_INTRIN
4393 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4394
4395# elif RT_INLINE_ASM_GNU_STYLE
4396 __asm__ __volatile__("btrl %2, %1\n\t"
4397 "setc %b0\n\t"
4398 "andl $1, %0\n\t"
4399 : "=q" (rc.u32),
4400 "=m" (*(volatile long *)pvBitmap)
4401 : "Ir" (iBit),
4402 "m" (*(volatile long *)pvBitmap)
4403 : "memory");
4404# else
4405 __asm
4406 {
4407 mov edx, [iBit]
4408# ifdef RT_ARCH_AMD64
4409 mov rax, [pvBitmap]
4410 btr [rax], edx
4411# else
4412 mov eax, [pvBitmap]
4413 btr [eax], edx
4414# endif
4415 setc al
4416 and eax, 1
4417 mov [rc.u32], eax
4418 }
4419# endif
4420 return rc.f;
4421}
4422#endif
4423
4424
4425/**
4426 * Atomically tests and clears a bit in a bitmap, ordered.
4427 *
4428 * @returns true if the bit was set.
4429 * @returns false if the bit was clear.
4430 *
4431 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4432 * the memory access isn't atomic!
4433 * @param iBit The bit to test and clear.
4434 *
4435 * @remarks No memory barrier, take care on smp.
4436 * @remarks x86: Requires a 386 or later.
4437 */
4438#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4439DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4440#else
4441DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4442{
4443 union { bool f; uint32_t u32; uint8_t u8; } rc;
4444 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4445# if RT_INLINE_ASM_USES_INTRIN
4446 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4447
4448# elif RT_INLINE_ASM_GNU_STYLE
4449 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4450 "setc %b0\n\t"
4451 "andl $1, %0\n\t"
4452 : "=q" (rc.u32),
4453 "=m" (*(volatile long *)pvBitmap)
4454 : "Ir" (iBit),
4455 "m" (*(volatile long *)pvBitmap)
4456 : "memory");
4457# else
4458 __asm
4459 {
4460 mov edx, [iBit]
4461# ifdef RT_ARCH_AMD64
4462 mov rax, [pvBitmap]
4463 lock btr [rax], edx
4464# else
4465 mov eax, [pvBitmap]
4466 lock btr [eax], edx
4467# endif
4468 setc al
4469 and eax, 1
4470 mov [rc.u32], eax
4471 }
4472# endif
4473 return rc.f;
4474}
4475#endif
4476
4477
4478/**
4479 * Tests and toggles a bit in a bitmap.
4480 *
4481 * @returns true if the bit was set.
4482 * @returns false if the bit was clear.
4483 *
4484 * @param pvBitmap Pointer to the bitmap.
4485 * @param iBit The bit to test and toggle.
4486 *
4487 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4488 * However, doing so will yield better performance as well as avoiding
4489 * traps accessing the last bits in the bitmap.
4490 */
4491#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4492DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4493#else
4494DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4495{
4496 union { bool f; uint32_t u32; uint8_t u8; } rc;
4497# if RT_INLINE_ASM_USES_INTRIN
4498 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4499
4500# elif RT_INLINE_ASM_GNU_STYLE
4501 __asm__ __volatile__("btcl %2, %1\n\t"
4502 "setc %b0\n\t"
4503 "andl $1, %0\n\t"
4504 : "=q" (rc.u32),
4505 "=m" (*(volatile long *)pvBitmap)
4506 : "Ir" (iBit),
4507 "m" (*(volatile long *)pvBitmap)
4508 : "memory");
4509# else
4510 __asm
4511 {
4512 mov edx, [iBit]
4513# ifdef RT_ARCH_AMD64
4514 mov rax, [pvBitmap]
4515 btc [rax], edx
4516# else
4517 mov eax, [pvBitmap]
4518 btc [eax], edx
4519# endif
4520 setc al
4521 and eax, 1
4522 mov [rc.u32], eax
4523 }
4524# endif
4525 return rc.f;
4526}
4527#endif
4528
4529
4530/**
4531 * Atomically tests and toggles a bit in a bitmap, ordered.
4532 *
4533 * @returns true if the bit was set.
4534 * @returns false if the bit was clear.
4535 *
4536 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4537 * the memory access isn't atomic!
4538 * @param iBit The bit to test and toggle.
4539 *
4540 * @remarks x86: Requires a 386 or later.
4541 */
4542#if RT_INLINE_ASM_EXTERNAL
4543DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4544#else
4545DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4546{
4547 union { bool f; uint32_t u32; uint8_t u8; } rc;
4548 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4549# if RT_INLINE_ASM_GNU_STYLE
4550 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4551 "setc %b0\n\t"
4552 "andl $1, %0\n\t"
4553 : "=q" (rc.u32),
4554 "=m" (*(volatile long *)pvBitmap)
4555 : "Ir" (iBit),
4556 "m" (*(volatile long *)pvBitmap)
4557 : "memory");
4558# else
4559 __asm
4560 {
4561 mov edx, [iBit]
4562# ifdef RT_ARCH_AMD64
4563 mov rax, [pvBitmap]
4564 lock btc [rax], edx
4565# else
4566 mov eax, [pvBitmap]
4567 lock btc [eax], edx
4568# endif
4569 setc al
4570 and eax, 1
4571 mov [rc.u32], eax
4572 }
4573# endif
4574 return rc.f;
4575}
4576#endif
4577
4578
4579/**
4580 * Tests if a bit in a bitmap is set.
4581 *
4582 * @returns true if the bit is set.
4583 * @returns false if the bit is clear.
4584 *
4585 * @param pvBitmap Pointer to the bitmap.
4586 * @param iBit The bit to test.
4587 *
4588 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4589 * However, doing so will yield better performance as well as avoiding
4590 * traps accessing the last bits in the bitmap.
4591 */
4592#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4593DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4594#else
4595DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4596{
4597 union { bool f; uint32_t u32; uint8_t u8; } rc;
4598# if RT_INLINE_ASM_USES_INTRIN
4599 rc.u32 = _bittest((long *)pvBitmap, iBit);
4600# elif RT_INLINE_ASM_GNU_STYLE
4601
4602 __asm__ __volatile__("btl %2, %1\n\t"
4603 "setc %b0\n\t"
4604 "andl $1, %0\n\t"
4605 : "=q" (rc.u32)
4606 : "m" (*(const volatile long *)pvBitmap),
4607 "Ir" (iBit)
4608 : "memory");
4609# else
4610 __asm
4611 {
4612 mov edx, [iBit]
4613# ifdef RT_ARCH_AMD64
4614 mov rax, [pvBitmap]
4615 bt [rax], edx
4616# else
4617 mov eax, [pvBitmap]
4618 bt [eax], edx
4619# endif
4620 setc al
4621 and eax, 1
4622 mov [rc.u32], eax
4623 }
4624# endif
4625 return rc.f;
4626}
4627#endif
4628
4629
4630/**
4631 * Clears a bit range within a bitmap.
4632 *
4633 * @param pvBitmap Pointer to the bitmap.
4634 * @param iBitStart The First bit to clear.
4635 * @param iBitEnd The first bit not to clear.
4636 */
4637DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4638{
4639 if (iBitStart < iBitEnd)
4640 {
4641 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4642 int32_t iStart = iBitStart & ~31;
4643 int32_t iEnd = iBitEnd & ~31;
4644 if (iStart == iEnd)
4645 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4646 else
4647 {
4648 /* bits in first dword. */
4649 if (iBitStart & 31)
4650 {
4651 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4652 pu32++;
4653 iBitStart = iStart + 32;
4654 }
4655
4656 /* whole dword. */
4657 if (iBitStart != iEnd)
4658 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4659
4660 /* bits in last dword. */
4661 if (iBitEnd & 31)
4662 {
4663 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4664 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4665 }
4666 }
4667 }
4668}
4669
4670
4671/**
4672 * Sets a bit range within a bitmap.
4673 *
4674 * @param pvBitmap Pointer to the bitmap.
4675 * @param iBitStart The First bit to set.
4676 * @param iBitEnd The first bit not to set.
4677 */
4678DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4679{
4680 if (iBitStart < iBitEnd)
4681 {
4682 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4683 int32_t iStart = iBitStart & ~31;
4684 int32_t iEnd = iBitEnd & ~31;
4685 if (iStart == iEnd)
4686 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4687 else
4688 {
4689 /* bits in first dword. */
4690 if (iBitStart & 31)
4691 {
4692 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4693 pu32++;
4694 iBitStart = iStart + 32;
4695 }
4696
4697 /* whole dword. */
4698 if (iBitStart != iEnd)
4699 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4700
4701 /* bits in last dword. */
4702 if (iBitEnd & 31)
4703 {
4704 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4705 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4706 }
4707 }
4708 }
4709}
4710
4711
4712/**
4713 * Finds the first clear bit in a bitmap.
4714 *
4715 * @returns Index of the first zero bit.
4716 * @returns -1 if no clear bit was found.
4717 * @param pvBitmap Pointer to the bitmap.
4718 * @param cBits The number of bits in the bitmap. Multiple of 32.
4719 */
4720#if RT_INLINE_ASM_EXTERNAL
4721DECLASM(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4722#else
4723DECLINLINE(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4724{
4725 if (cBits)
4726 {
4727 int32_t iBit;
4728# if RT_INLINE_ASM_GNU_STYLE
4729 RTCCUINTREG uEAX, uECX, uEDI;
4730 cBits = RT_ALIGN_32(cBits, 32);
4731 __asm__ __volatile__("repe; scasl\n\t"
4732 "je 1f\n\t"
4733# ifdef RT_ARCH_AMD64
4734 "lea -4(%%rdi), %%rdi\n\t"
4735 "xorl (%%rdi), %%eax\n\t"
4736 "subq %5, %%rdi\n\t"
4737# else
4738 "lea -4(%%edi), %%edi\n\t"
4739 "xorl (%%edi), %%eax\n\t"
4740 "subl %5, %%edi\n\t"
4741# endif
4742 "shll $3, %%edi\n\t"
4743 "bsfl %%eax, %%edx\n\t"
4744 "addl %%edi, %%edx\n\t"
4745 "1:\t\n"
4746 : "=d" (iBit),
4747 "=&c" (uECX),
4748 "=&D" (uEDI),
4749 "=&a" (uEAX)
4750 : "0" (0xffffffff),
4751 "mr" (pvBitmap),
4752 "1" (cBits >> 5),
4753 "2" (pvBitmap),
4754 "3" (0xffffffff));
4755# else
4756 cBits = RT_ALIGN_32(cBits, 32);
4757 __asm
4758 {
4759# ifdef RT_ARCH_AMD64
4760 mov rdi, [pvBitmap]
4761 mov rbx, rdi
4762# else
4763 mov edi, [pvBitmap]
4764 mov ebx, edi
4765# endif
4766 mov edx, 0ffffffffh
4767 mov eax, edx
4768 mov ecx, [cBits]
4769 shr ecx, 5
4770 repe scasd
4771 je done
4772
4773# ifdef RT_ARCH_AMD64
4774 lea rdi, [rdi - 4]
4775 xor eax, [rdi]
4776 sub rdi, rbx
4777# else
4778 lea edi, [edi - 4]
4779 xor eax, [edi]
4780 sub edi, ebx
4781# endif
4782 shl edi, 3
4783 bsf edx, eax
4784 add edx, edi
4785 done:
4786 mov [iBit], edx
4787 }
4788# endif
4789 return iBit;
4790 }
4791 return -1;
4792}
4793#endif
4794
4795
4796/**
4797 * Finds the next clear bit in a bitmap.
4798 *
4799 * @returns Index of the first zero bit.
4800 * @returns -1 if no clear bit was found.
4801 * @param pvBitmap Pointer to the bitmap.
4802 * @param cBits The number of bits in the bitmap. Multiple of 32.
4803 * @param iBitPrev The bit returned from the last search.
4804 * The search will start at iBitPrev + 1.
4805 */
4806#if RT_INLINE_ASM_EXTERNAL
4807DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4808#else
4809DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4810{
4811 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4812 int iBit = ++iBitPrev & 31;
4813 if (iBit)
4814 {
4815 /*
4816 * Inspect the 32-bit word containing the unaligned bit.
4817 */
4818 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4819
4820# if RT_INLINE_ASM_USES_INTRIN
4821 unsigned long ulBit = 0;
4822 if (_BitScanForward(&ulBit, u32))
4823 return ulBit + iBitPrev;
4824# else
4825# if RT_INLINE_ASM_GNU_STYLE
4826 __asm__ __volatile__("bsf %1, %0\n\t"
4827 "jnz 1f\n\t"
4828 "movl $-1, %0\n\t"
4829 "1:\n\t"
4830 : "=r" (iBit)
4831 : "r" (u32));
4832# else
4833 __asm
4834 {
4835 mov edx, [u32]
4836 bsf eax, edx
4837 jnz done
4838 mov eax, 0ffffffffh
4839 done:
4840 mov [iBit], eax
4841 }
4842# endif
4843 if (iBit >= 0)
4844 return iBit + iBitPrev;
4845# endif
4846
4847 /*
4848 * Skip ahead and see if there is anything left to search.
4849 */
4850 iBitPrev |= 31;
4851 iBitPrev++;
4852 if (cBits <= (uint32_t)iBitPrev)
4853 return -1;
4854 }
4855
4856 /*
4857 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4858 */
4859 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4860 if (iBit >= 0)
4861 iBit += iBitPrev;
4862 return iBit;
4863}
4864#endif
4865
4866
4867/**
4868 * Finds the first set bit in a bitmap.
4869 *
4870 * @returns Index of the first set bit.
4871 * @returns -1 if no clear bit was found.
4872 * @param pvBitmap Pointer to the bitmap.
4873 * @param cBits The number of bits in the bitmap. Multiple of 32.
4874 */
4875#if RT_INLINE_ASM_EXTERNAL
4876DECLASM(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4877#else
4878DECLINLINE(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4879{
4880 if (cBits)
4881 {
4882 int32_t iBit;
4883# if RT_INLINE_ASM_GNU_STYLE
4884 RTCCUINTREG uEAX, uECX, uEDI;
4885 cBits = RT_ALIGN_32(cBits, 32);
4886 __asm__ __volatile__("repe; scasl\n\t"
4887 "je 1f\n\t"
4888# ifdef RT_ARCH_AMD64
4889 "lea -4(%%rdi), %%rdi\n\t"
4890 "movl (%%rdi), %%eax\n\t"
4891 "subq %5, %%rdi\n\t"
4892# else
4893 "lea -4(%%edi), %%edi\n\t"
4894 "movl (%%edi), %%eax\n\t"
4895 "subl %5, %%edi\n\t"
4896# endif
4897 "shll $3, %%edi\n\t"
4898 "bsfl %%eax, %%edx\n\t"
4899 "addl %%edi, %%edx\n\t"
4900 "1:\t\n"
4901 : "=d" (iBit),
4902 "=&c" (uECX),
4903 "=&D" (uEDI),
4904 "=&a" (uEAX)
4905 : "0" (0xffffffff),
4906 "mr" (pvBitmap),
4907 "1" (cBits >> 5),
4908 "2" (pvBitmap),
4909 "3" (0));
4910# else
4911 cBits = RT_ALIGN_32(cBits, 32);
4912 __asm
4913 {
4914# ifdef RT_ARCH_AMD64
4915 mov rdi, [pvBitmap]
4916 mov rbx, rdi
4917# else
4918 mov edi, [pvBitmap]
4919 mov ebx, edi
4920# endif
4921 mov edx, 0ffffffffh
4922 xor eax, eax
4923 mov ecx, [cBits]
4924 shr ecx, 5
4925 repe scasd
4926 je done
4927# ifdef RT_ARCH_AMD64
4928 lea rdi, [rdi - 4]
4929 mov eax, [rdi]
4930 sub rdi, rbx
4931# else
4932 lea edi, [edi - 4]
4933 mov eax, [edi]
4934 sub edi, ebx
4935# endif
4936 shl edi, 3
4937 bsf edx, eax
4938 add edx, edi
4939 done:
4940 mov [iBit], edx
4941 }
4942# endif
4943 return iBit;
4944 }
4945 return -1;
4946}
4947#endif
4948
4949
4950/**
4951 * Finds the next set bit in a bitmap.
4952 *
4953 * @returns Index of the next set bit.
4954 * @returns -1 if no set bit was found.
4955 * @param pvBitmap Pointer to the bitmap.
4956 * @param cBits The number of bits in the bitmap. Multiple of 32.
4957 * @param iBitPrev The bit returned from the last search.
4958 * The search will start at iBitPrev + 1.
4959 */
4960#if RT_INLINE_ASM_EXTERNAL
4961DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4962#else
4963DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4964{
4965 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4966 int iBit = ++iBitPrev & 31;
4967 if (iBit)
4968 {
4969 /*
4970 * Inspect the 32-bit word containing the unaligned bit.
4971 */
4972 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
4973
4974# if RT_INLINE_ASM_USES_INTRIN
4975 unsigned long ulBit = 0;
4976 if (_BitScanForward(&ulBit, u32))
4977 return ulBit + iBitPrev;
4978# else
4979# if RT_INLINE_ASM_GNU_STYLE
4980 __asm__ __volatile__("bsf %1, %0\n\t"
4981 "jnz 1f\n\t"
4982 "movl $-1, %0\n\t"
4983 "1:\n\t"
4984 : "=r" (iBit)
4985 : "r" (u32));
4986# else
4987 __asm
4988 {
4989 mov edx, [u32]
4990 bsf eax, edx
4991 jnz done
4992 mov eax, 0ffffffffh
4993 done:
4994 mov [iBit], eax
4995 }
4996# endif
4997 if (iBit >= 0)
4998 return iBit + iBitPrev;
4999# endif
5000
5001 /*
5002 * Skip ahead and see if there is anything left to search.
5003 */
5004 iBitPrev |= 31;
5005 iBitPrev++;
5006 if (cBits <= (uint32_t)iBitPrev)
5007 return -1;
5008 }
5009
5010 /*
5011 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5012 */
5013 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5014 if (iBit >= 0)
5015 iBit += iBitPrev;
5016 return iBit;
5017}
5018#endif
5019
5020
5021/**
5022 * Finds the first bit which is set in the given 32-bit integer.
5023 * Bits are numbered from 1 (least significant) to 32.
5024 *
5025 * @returns index [1..32] of the first set bit.
5026 * @returns 0 if all bits are cleared.
5027 * @param u32 Integer to search for set bits.
5028 * @remarks Similar to ffs() in BSD.
5029 */
5030#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5031DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5032#else
5033DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5034{
5035# if RT_INLINE_ASM_USES_INTRIN
5036 unsigned long iBit;
5037 if (_BitScanForward(&iBit, u32))
5038 iBit++;
5039 else
5040 iBit = 0;
5041# elif RT_INLINE_ASM_GNU_STYLE
5042 uint32_t iBit;
5043 __asm__ __volatile__("bsf %1, %0\n\t"
5044 "jnz 1f\n\t"
5045 "xorl %0, %0\n\t"
5046 "jmp 2f\n"
5047 "1:\n\t"
5048 "incl %0\n"
5049 "2:\n\t"
5050 : "=r" (iBit)
5051 : "rm" (u32));
5052# else
5053 uint32_t iBit;
5054 _asm
5055 {
5056 bsf eax, [u32]
5057 jnz found
5058 xor eax, eax
5059 jmp done
5060 found:
5061 inc eax
5062 done:
5063 mov [iBit], eax
5064 }
5065# endif
5066 return iBit;
5067}
5068#endif
5069
5070
5071/**
5072 * Finds the first bit which is set in the given 32-bit integer.
5073 * Bits are numbered from 1 (least significant) to 32.
5074 *
5075 * @returns index [1..32] of the first set bit.
5076 * @returns 0 if all bits are cleared.
5077 * @param i32 Integer to search for set bits.
5078 * @remark Similar to ffs() in BSD.
5079 */
5080DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5081{
5082 return ASMBitFirstSetU32((uint32_t)i32);
5083}
5084
5085
5086/**
5087 * Finds the first bit which is set in the given 64-bit integer.
5088 *
5089 * Bits are numbered from 1 (least significant) to 64.
5090 *
5091 * @returns index [1..64] of the first set bit.
5092 * @returns 0 if all bits are cleared.
5093 * @param u64 Integer to search for set bits.
5094 * @remarks Similar to ffs() in BSD.
5095 */
5096#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5097DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5098#else
5099DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5100{
5101# if RT_INLINE_ASM_USES_INTRIN
5102 unsigned long iBit;
5103# if ARCH_BITS == 64
5104 if (_BitScanForward64(&iBit, u64))
5105 iBit++;
5106 else
5107 iBit = 0;
5108# else
5109 if (_BitScanForward(&iBit, (uint32_t)u64))
5110 iBit++;
5111 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5112 iBit += 33;
5113 else
5114 iBit = 0;
5115# endif
5116# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5117 uint64_t iBit;
5118 __asm__ __volatile__("bsfq %1, %0\n\t"
5119 "jnz 1f\n\t"
5120 "xorl %0, %0\n\t"
5121 "jmp 2f\n"
5122 "1:\n\t"
5123 "incl %0\n"
5124 "2:\n\t"
5125 : "=r" (iBit)
5126 : "rm" (u64));
5127# else
5128 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5129 if (!iBit)
5130 {
5131 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5132 if (iBit)
5133 iBit += 32;
5134 }
5135# endif
5136 return (unsigned)iBit;
5137}
5138#endif
5139
5140
5141/**
5142 * Finds the first bit which is set in the given 16-bit integer.
5143 *
5144 * Bits are numbered from 1 (least significant) to 16.
5145 *
5146 * @returns index [1..16] of the first set bit.
5147 * @returns 0 if all bits are cleared.
5148 * @param u16 Integer to search for set bits.
5149 * @remarks For 16-bit bs3kit code.
5150 */
5151#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5152DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5153#else
5154DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5155{
5156 return ASMBitFirstSetU32((uint32_t)u16);
5157}
5158#endif
5159
5160
5161/**
5162 * Finds the last bit which is set in the given 32-bit integer.
5163 * Bits are numbered from 1 (least significant) to 32.
5164 *
5165 * @returns index [1..32] of the last set bit.
5166 * @returns 0 if all bits are cleared.
5167 * @param u32 Integer to search for set bits.
5168 * @remark Similar to fls() in BSD.
5169 */
5170#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5171DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5172#else
5173DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5174{
5175# if RT_INLINE_ASM_USES_INTRIN
5176 unsigned long iBit;
5177 if (_BitScanReverse(&iBit, u32))
5178 iBit++;
5179 else
5180 iBit = 0;
5181# elif RT_INLINE_ASM_GNU_STYLE
5182 uint32_t iBit;
5183 __asm__ __volatile__("bsrl %1, %0\n\t"
5184 "jnz 1f\n\t"
5185 "xorl %0, %0\n\t"
5186 "jmp 2f\n"
5187 "1:\n\t"
5188 "incl %0\n"
5189 "2:\n\t"
5190 : "=r" (iBit)
5191 : "rm" (u32));
5192# else
5193 uint32_t iBit;
5194 _asm
5195 {
5196 bsr eax, [u32]
5197 jnz found
5198 xor eax, eax
5199 jmp done
5200 found:
5201 inc eax
5202 done:
5203 mov [iBit], eax
5204 }
5205# endif
5206 return iBit;
5207}
5208#endif
5209
5210
5211/**
5212 * Finds the last bit which is set in the given 32-bit integer.
5213 * Bits are numbered from 1 (least significant) to 32.
5214 *
5215 * @returns index [1..32] of the last set bit.
5216 * @returns 0 if all bits are cleared.
5217 * @param i32 Integer to search for set bits.
5218 * @remark Similar to fls() in BSD.
5219 */
5220DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5221{
5222 return ASMBitLastSetU32((uint32_t)i32);
5223}
5224
5225
5226/**
5227 * Finds the last bit which is set in the given 64-bit integer.
5228 *
5229 * Bits are numbered from 1 (least significant) to 64.
5230 *
5231 * @returns index [1..64] of the last set bit.
5232 * @returns 0 if all bits are cleared.
5233 * @param u64 Integer to search for set bits.
5234 * @remark Similar to fls() in BSD.
5235 */
5236#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5237DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5238#else
5239DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5240{
5241# if RT_INLINE_ASM_USES_INTRIN
5242 unsigned long iBit;
5243# if ARCH_BITS == 64
5244 if (_BitScanReverse64(&iBit, u64))
5245 iBit++;
5246 else
5247 iBit = 0;
5248# else
5249 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5250 iBit += 33;
5251 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5252 iBit++;
5253 else
5254 iBit = 0;
5255# endif
5256# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5257 uint64_t iBit;
5258 __asm__ __volatile__("bsrq %1, %0\n\t"
5259 "jnz 1f\n\t"
5260 "xorl %0, %0\n\t"
5261 "jmp 2f\n"
5262 "1:\n\t"
5263 "incl %0\n"
5264 "2:\n\t"
5265 : "=r" (iBit)
5266 : "rm" (u64));
5267# else
5268 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5269 if (iBit)
5270 iBit += 32;
5271 else
5272 iBit = ASMBitLastSetU32((uint32_t)u64);
5273#endif
5274 return (unsigned)iBit;
5275}
5276#endif
5277
5278
5279/**
5280 * Finds the last bit which is set in the given 16-bit integer.
5281 *
5282 * Bits are numbered from 1 (least significant) to 16.
5283 *
5284 * @returns index [1..16] of the last set bit.
5285 * @returns 0 if all bits are cleared.
5286 * @param u16 Integer to search for set bits.
5287 * @remarks For 16-bit bs3kit code.
5288 */
5289#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5290DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5291#else
5292DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5293{
5294 return ASMBitLastSetU32((uint32_t)u16);
5295}
5296#endif
5297
5298
5299/**
5300 * Reverse the byte order of the given 16-bit integer.
5301 *
5302 * @returns Revert
5303 * @param u16 16-bit integer value.
5304 */
5305#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5306DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5307#else
5308DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5309{
5310# if RT_INLINE_ASM_USES_INTRIN
5311 u16 = _byteswap_ushort(u16);
5312# elif RT_INLINE_ASM_GNU_STYLE
5313 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5314# else
5315 _asm
5316 {
5317 mov ax, [u16]
5318 ror ax, 8
5319 mov [u16], ax
5320 }
5321# endif
5322 return u16;
5323}
5324#endif
5325
5326
5327/**
5328 * Reverse the byte order of the given 32-bit integer.
5329 *
5330 * @returns Revert
5331 * @param u32 32-bit integer value.
5332 */
5333#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5334DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5335#else
5336DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5337{
5338# if RT_INLINE_ASM_USES_INTRIN
5339 u32 = _byteswap_ulong(u32);
5340# elif RT_INLINE_ASM_GNU_STYLE
5341 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5342# else
5343 _asm
5344 {
5345 mov eax, [u32]
5346 bswap eax
5347 mov [u32], eax
5348 }
5349# endif
5350 return u32;
5351}
5352#endif
5353
5354
5355/**
5356 * Reverse the byte order of the given 64-bit integer.
5357 *
5358 * @returns Revert
5359 * @param u64 64-bit integer value.
5360 */
5361DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5362{
5363#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5364 u64 = _byteswap_uint64(u64);
5365#else
5366 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5367 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5368#endif
5369 return u64;
5370}
5371
5372
5373/**
5374 * Rotate 32-bit unsigned value to the left by @a cShift.
5375 *
5376 * @returns Rotated value.
5377 * @param u32 The value to rotate.
5378 * @param cShift How many bits to rotate by.
5379 */
5380#ifdef __WATCOMC__
5381DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5382#else
5383DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5384{
5385# if RT_INLINE_ASM_USES_INTRIN
5386 return _rotl(u32, cShift);
5387# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5388 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5389 return u32;
5390# else
5391 cShift &= 31;
5392 return (u32 << cShift) | (u32 >> (32 - cShift));
5393# endif
5394}
5395#endif
5396
5397
5398/**
5399 * Rotate 32-bit unsigned value to the right by @a cShift.
5400 *
5401 * @returns Rotated value.
5402 * @param u32 The value to rotate.
5403 * @param cShift How many bits to rotate by.
5404 */
5405#ifdef __WATCOMC__
5406DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5407#else
5408DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5409{
5410# if RT_INLINE_ASM_USES_INTRIN
5411 return _rotr(u32, cShift);
5412# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5413 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5414 return u32;
5415# else
5416 cShift &= 31;
5417 return (u32 >> cShift) | (u32 << (32 - cShift));
5418# endif
5419}
5420#endif
5421
5422
5423/**
5424 * Rotate 64-bit unsigned value to the left by @a cShift.
5425 *
5426 * @returns Rotated value.
5427 * @param u64 The value to rotate.
5428 * @param cShift How many bits to rotate by.
5429 */
5430DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5431{
5432#if RT_INLINE_ASM_USES_INTRIN
5433 return _rotl64(u64, cShift);
5434#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5435 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5436 return u64;
5437#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5438 uint32_t uSpill;
5439 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5440 "jz 1f\n\t"
5441 "xchgl %%eax, %%edx\n\t"
5442 "1:\n\t"
5443 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5444 "jz 2f\n\t"
5445 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5446 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5447 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5448 "2:\n\t" /* } */
5449 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5450 : "0" (u64),
5451 "1" (cShift));
5452 return u64;
5453#else
5454 cShift &= 63;
5455 return (u64 << cShift) | (u64 >> (64 - cShift));
5456#endif
5457}
5458
5459
5460/**
5461 * Rotate 64-bit unsigned value to the right by @a cShift.
5462 *
5463 * @returns Rotated value.
5464 * @param u64 The value to rotate.
5465 * @param cShift How many bits to rotate by.
5466 */
5467DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5468{
5469#if RT_INLINE_ASM_USES_INTRIN
5470 return _rotr64(u64, cShift);
5471#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5472 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5473 return u64;
5474#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5475 uint32_t uSpill;
5476 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5477 "jz 1f\n\t"
5478 "xchgl %%eax, %%edx\n\t"
5479 "1:\n\t"
5480 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5481 "jz 2f\n\t"
5482 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5483 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5484 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5485 "2:\n\t" /* } */
5486 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5487 : "0" (u64),
5488 "1" (cShift));
5489 return u64;
5490#else
5491 cShift &= 63;
5492 return (u64 >> cShift) | (u64 << (64 - cShift));
5493#endif
5494}
5495
5496/** @} */
5497
5498
5499/** @} */
5500
5501#endif
5502
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette