VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 69866

Last change on this file since 69866 was 69105, checked in by vboxsync, 7 years ago

include/iprt/: (C) year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 162.2 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2017 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# pragma warning(push)
44# pragma warning(disable:4668) /* Several incorrect __cplusplus uses. */
45# pragma warning(disable:4255) /* Incorrect __slwpcb prototype. */
46# include <intrin.h>
47# pragma warning(pop)
48 /* Emit the intrinsics at all optimization levels. */
49# pragma intrinsic(_ReadWriteBarrier)
50# pragma intrinsic(__cpuid)
51# pragma intrinsic(__stosd)
52# pragma intrinsic(__stosw)
53# pragma intrinsic(__stosb)
54# pragma intrinsic(_BitScanForward)
55# pragma intrinsic(_BitScanReverse)
56# pragma intrinsic(_bittest)
57# pragma intrinsic(_bittestandset)
58# pragma intrinsic(_bittestandreset)
59# pragma intrinsic(_bittestandcomplement)
60# pragma intrinsic(_byteswap_ushort)
61# pragma intrinsic(_byteswap_ulong)
62# pragma intrinsic(_interlockedbittestandset)
63# pragma intrinsic(_interlockedbittestandreset)
64# pragma intrinsic(_InterlockedAnd)
65# pragma intrinsic(_InterlockedOr)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# pragma intrinsic(_InterlockedExchangeAdd64)
81# pragma intrinsic(_InterlockedAnd64)
82# pragma intrinsic(_InterlockedOr64)
83# pragma intrinsic(_InterlockedIncrement64)
84# pragma intrinsic(_InterlockedDecrement64)
85# endif
86#endif
87
88/*
89 * Include #pragma aux definitions for Watcom C/C++.
90 */
91#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
92# include "asm-watcom-x86-16.h"
93#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
94# include "asm-watcom-x86-32.h"
95#endif
96
97
98
99/** @defgroup grp_rt_asm ASM - Assembly Routines
100 * @ingroup grp_rt
101 *
102 * @remarks The difference between ordered and unordered atomic operations are that
103 * the former will complete outstanding reads and writes before continuing
104 * while the latter doesn't make any promises about the order. Ordered
105 * operations doesn't, it seems, make any 100% promise wrt to whether
106 * the operation will complete before any subsequent memory access.
107 * (please, correct if wrong.)
108 *
109 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
110 * are unordered (note the Uo).
111 *
112 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
113 * or even optimize assembler instructions away. For instance, in the following code
114 * the second rdmsr instruction is optimized away because gcc treats that instruction
115 * as deterministic:
116 *
117 * @code
118 * static inline uint64_t rdmsr_low(int idx)
119 * {
120 * uint32_t low;
121 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
122 * }
123 * ...
124 * uint32_t msr1 = rdmsr_low(1);
125 * foo(msr1);
126 * msr1 = rdmsr_low(1);
127 * bar(msr1);
128 * @endcode
129 *
130 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
131 * use the result of the first call as input parameter for bar() as well. For rdmsr this
132 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
133 * machine status information in general.
134 *
135 * @{
136 */
137
138
139/** @def RT_INLINE_ASM_GCC_4_3_X_X86
140 * Used to work around some 4.3.x register allocation issues in this version of
141 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
142 * definitely not for 5.x */
143#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
144# define RT_INLINE_ASM_GCC_4_3_X_X86 1
145#else
146# define RT_INLINE_ASM_GCC_4_3_X_X86 0
147#endif
148
149/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
150 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
151 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
152 * mode, x86.
153 *
154 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
155 * when in PIC mode on x86.
156 */
157#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
158# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
159# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
160# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
161# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
162# elif ( (defined(PIC) || defined(__PIC__)) \
163 && defined(RT_ARCH_X86) \
164 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
165 || defined(RT_OS_DARWIN)) )
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
167# else
168# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
169# endif
170#endif
171
172
173/** @def ASMReturnAddress
174 * Gets the return address of the current (or calling if you like) function or method.
175 */
176#ifdef _MSC_VER
177# ifdef __cplusplus
178extern "C"
179# endif
180void * _ReturnAddress(void);
181# pragma intrinsic(_ReturnAddress)
182# define ASMReturnAddress() _ReturnAddress()
183#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
184# define ASMReturnAddress() __builtin_return_address(0)
185#elif defined(__WATCOMC__)
186# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
187#else
188# error "Unsupported compiler."
189#endif
190
191
192/**
193 * Compiler memory barrier.
194 *
195 * Ensure that the compiler does not use any cached (register/tmp stack) memory
196 * values or any outstanding writes when returning from this function.
197 *
198 * This function must be used if non-volatile data is modified by a
199 * device or the VMM. Typical cases are port access, MMIO access,
200 * trapping instruction, etc.
201 */
202#if RT_INLINE_ASM_GNU_STYLE
203# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
204#elif RT_INLINE_ASM_USES_INTRIN
205# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
206#elif defined(__WATCOMC__)
207void ASMCompilerBarrier(void);
208#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
209DECLINLINE(void) ASMCompilerBarrier(void)
210{
211 __asm
212 {
213 }
214}
215#endif
216
217
218/** @def ASMBreakpoint
219 * Debugger Breakpoint.
220 * @deprecated Use RT_BREAKPOINT instead.
221 * @internal
222 */
223#define ASMBreakpoint() RT_BREAKPOINT()
224
225
226/**
227 * Spinloop hint for platforms that have these, empty function on the other
228 * platforms.
229 *
230 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
231 * spin locks.
232 */
233#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
234DECLASM(void) ASMNopPause(void);
235#else
236DECLINLINE(void) ASMNopPause(void)
237{
238# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
239# if RT_INLINE_ASM_GNU_STYLE
240 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
241# else
242 __asm {
243 _emit 0f3h
244 _emit 090h
245 }
246# endif
247# else
248 /* dummy */
249# endif
250}
251#endif
252
253
254/**
255 * Atomically Exchange an unsigned 8-bit value, ordered.
256 *
257 * @returns Current *pu8 value
258 * @param pu8 Pointer to the 8-bit variable to update.
259 * @param u8 The 8-bit value to assign to *pu8.
260 */
261#if RT_INLINE_ASM_EXTERNAL
262DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8);
263#else
264DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
265{
266# if RT_INLINE_ASM_GNU_STYLE
267 __asm__ __volatile__("xchgb %0, %1\n\t"
268 : "=m" (*pu8),
269 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
270 : "1" (u8),
271 "m" (*pu8));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rdx, [pu8]
277 mov al, [u8]
278 xchg [rdx], al
279 mov [u8], al
280# else
281 mov edx, [pu8]
282 mov al, [u8]
283 xchg [edx], al
284 mov [u8], al
285# endif
286 }
287# endif
288 return u8;
289}
290#endif
291
292
293/**
294 * Atomically Exchange a signed 8-bit value, ordered.
295 *
296 * @returns Current *pu8 value
297 * @param pi8 Pointer to the 8-bit variable to update.
298 * @param i8 The 8-bit value to assign to *pi8.
299 */
300DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8)
301{
302 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
303}
304
305
306/**
307 * Atomically Exchange a bool value, ordered.
308 *
309 * @returns Current *pf value
310 * @param pf Pointer to the 8-bit variable to update.
311 * @param f The 8-bit value to assign to *pi8.
312 */
313DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f)
314{
315#ifdef _MSC_VER
316 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
317#else
318 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
319#endif
320}
321
322
323/**
324 * Atomically Exchange an unsigned 16-bit value, ordered.
325 *
326 * @returns Current *pu16 value
327 * @param pu16 Pointer to the 16-bit variable to update.
328 * @param u16 The 16-bit value to assign to *pu16.
329 */
330#if RT_INLINE_ASM_EXTERNAL
331DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16);
332#else
333DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
334{
335# if RT_INLINE_ASM_GNU_STYLE
336 __asm__ __volatile__("xchgw %0, %1\n\t"
337 : "=m" (*pu16),
338 "=r" (u16)
339 : "1" (u16),
340 "m" (*pu16));
341# else
342 __asm
343 {
344# ifdef RT_ARCH_AMD64
345 mov rdx, [pu16]
346 mov ax, [u16]
347 xchg [rdx], ax
348 mov [u16], ax
349# else
350 mov edx, [pu16]
351 mov ax, [u16]
352 xchg [edx], ax
353 mov [u16], ax
354# endif
355 }
356# endif
357 return u16;
358}
359#endif
360
361
362/**
363 * Atomically Exchange a signed 16-bit value, ordered.
364 *
365 * @returns Current *pu16 value
366 * @param pi16 Pointer to the 16-bit variable to update.
367 * @param i16 The 16-bit value to assign to *pi16.
368 */
369DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16)
370{
371 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
372}
373
374
375/**
376 * Atomically Exchange an unsigned 32-bit value, ordered.
377 *
378 * @returns Current *pu32 value
379 * @param pu32 Pointer to the 32-bit variable to update.
380 * @param u32 The 32-bit value to assign to *pu32.
381 *
382 * @remarks Does not work on 286 and earlier.
383 */
384#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
385DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32);
386#else
387DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
388{
389# if RT_INLINE_ASM_GNU_STYLE
390 __asm__ __volatile__("xchgl %0, %1\n\t"
391 : "=m" (*pu32),
392 "=r" (u32)
393 : "1" (u32),
394 "m" (*pu32));
395
396# elif RT_INLINE_ASM_USES_INTRIN
397 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
398
399# else
400 __asm
401 {
402# ifdef RT_ARCH_AMD64
403 mov rdx, [pu32]
404 mov eax, u32
405 xchg [rdx], eax
406 mov [u32], eax
407# else
408 mov edx, [pu32]
409 mov eax, u32
410 xchg [edx], eax
411 mov [u32], eax
412# endif
413 }
414# endif
415 return u32;
416}
417#endif
418
419
420/**
421 * Atomically Exchange a signed 32-bit value, ordered.
422 *
423 * @returns Current *pu32 value
424 * @param pi32 Pointer to the 32-bit variable to update.
425 * @param i32 The 32-bit value to assign to *pi32.
426 */
427DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32)
428{
429 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
430}
431
432
433/**
434 * Atomically Exchange an unsigned 64-bit value, ordered.
435 *
436 * @returns Current *pu64 value
437 * @param pu64 Pointer to the 64-bit variable to update.
438 * @param u64 The 64-bit value to assign to *pu64.
439 *
440 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
441 */
442#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
443 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
444DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64);
445#else
446DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
447{
448# if defined(RT_ARCH_AMD64)
449# if RT_INLINE_ASM_USES_INTRIN
450 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
451
452# elif RT_INLINE_ASM_GNU_STYLE
453 __asm__ __volatile__("xchgq %0, %1\n\t"
454 : "=m" (*pu64),
455 "=r" (u64)
456 : "1" (u64),
457 "m" (*pu64));
458# else
459 __asm
460 {
461 mov rdx, [pu64]
462 mov rax, [u64]
463 xchg [rdx], rax
464 mov [u64], rax
465 }
466# endif
467# else /* !RT_ARCH_AMD64 */
468# if RT_INLINE_ASM_GNU_STYLE
469# if defined(PIC) || defined(__PIC__)
470 uint32_t u32EBX = (uint32_t)u64;
471 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
472 "xchgl %%ebx, %3\n\t"
473 "1:\n\t"
474 "lock; cmpxchg8b (%5)\n\t"
475 "jnz 1b\n\t"
476 "movl %3, %%ebx\n\t"
477 /*"xchgl %%esi, %5\n\t"*/
478 : "=A" (u64),
479 "=m" (*pu64)
480 : "0" (*pu64),
481 "m" ( u32EBX ),
482 "c" ( (uint32_t)(u64 >> 32) ),
483 "S" (pu64));
484# else /* !PIC */
485 __asm__ __volatile__("1:\n\t"
486 "lock; cmpxchg8b %1\n\t"
487 "jnz 1b\n\t"
488 : "=A" (u64),
489 "=m" (*pu64)
490 : "0" (*pu64),
491 "b" ( (uint32_t)u64 ),
492 "c" ( (uint32_t)(u64 >> 32) ));
493# endif
494# else
495 __asm
496 {
497 mov ebx, dword ptr [u64]
498 mov ecx, dword ptr [u64 + 4]
499 mov edi, pu64
500 mov eax, dword ptr [edi]
501 mov edx, dword ptr [edi + 4]
502 retry:
503 lock cmpxchg8b [edi]
504 jnz retry
505 mov dword ptr [u64], eax
506 mov dword ptr [u64 + 4], edx
507 }
508# endif
509# endif /* !RT_ARCH_AMD64 */
510 return u64;
511}
512#endif
513
514
515/**
516 * Atomically Exchange an signed 64-bit value, ordered.
517 *
518 * @returns Current *pi64 value
519 * @param pi64 Pointer to the 64-bit variable to update.
520 * @param i64 The 64-bit value to assign to *pi64.
521 */
522DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64)
523{
524 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
525}
526
527
528/**
529 * Atomically Exchange a pointer value, ordered.
530 *
531 * @returns Current *ppv value
532 * @param ppv Pointer to the pointer variable to update.
533 * @param pv The pointer value to assign to *ppv.
534 */
535DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv)
536{
537#if ARCH_BITS == 32 || ARCH_BITS == 16
538 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
539#elif ARCH_BITS == 64
540 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
541#else
542# error "ARCH_BITS is bogus"
543#endif
544}
545
546
547/**
548 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
549 *
550 * @returns Current *pv value
551 * @param ppv Pointer to the pointer variable to update.
552 * @param pv The pointer value to assign to *ppv.
553 * @param Type The type of *ppv, sans volatile.
554 */
555#ifdef __GNUC__
556# define ASMAtomicXchgPtrT(ppv, pv, Type) \
557 __extension__ \
558 ({\
559 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
560 Type const pvTypeChecked = (pv); \
561 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
562 pvTypeCheckedRet; \
563 })
564#else
565# define ASMAtomicXchgPtrT(ppv, pv, Type) \
566 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
567#endif
568
569
570/**
571 * Atomically Exchange a raw-mode context pointer value, ordered.
572 *
573 * @returns Current *ppv value
574 * @param ppvRC Pointer to the pointer variable to update.
575 * @param pvRC The pointer value to assign to *ppv.
576 */
577DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC)
578{
579 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
580}
581
582
583/**
584 * Atomically Exchange a ring-0 pointer value, ordered.
585 *
586 * @returns Current *ppv value
587 * @param ppvR0 Pointer to the pointer variable to update.
588 * @param pvR0 The pointer value to assign to *ppv.
589 */
590DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0)
591{
592#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
593 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
594#elif R0_ARCH_BITS == 64
595 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
596#else
597# error "R0_ARCH_BITS is bogus"
598#endif
599}
600
601
602/**
603 * Atomically Exchange a ring-3 pointer value, ordered.
604 *
605 * @returns Current *ppv value
606 * @param ppvR3 Pointer to the pointer variable to update.
607 * @param pvR3 The pointer value to assign to *ppv.
608 */
609DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3)
610{
611#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
612 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
613#elif R3_ARCH_BITS == 64
614 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
615#else
616# error "R3_ARCH_BITS is bogus"
617#endif
618}
619
620
621/** @def ASMAtomicXchgHandle
622 * Atomically Exchange a typical IPRT handle value, ordered.
623 *
624 * @param ph Pointer to the value to update.
625 * @param hNew The new value to assigned to *pu.
626 * @param phRes Where to store the current *ph value.
627 *
628 * @remarks This doesn't currently work for all handles (like RTFILE).
629 */
630#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
631# define ASMAtomicXchgHandle(ph, hNew, phRes) \
632 do { \
633 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
634 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
635 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
636 } while (0)
637#elif HC_ARCH_BITS == 64
638# define ASMAtomicXchgHandle(ph, hNew, phRes) \
639 do { \
640 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
641 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
642 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
643 } while (0)
644#else
645# error HC_ARCH_BITS
646#endif
647
648
649/**
650 * Atomically Exchange a value which size might differ
651 * between platforms or compilers, ordered.
652 *
653 * @param pu Pointer to the variable to update.
654 * @param uNew The value to assign to *pu.
655 * @todo This is busted as its missing the result argument.
656 */
657#define ASMAtomicXchgSize(pu, uNew) \
658 do { \
659 switch (sizeof(*(pu))) { \
660 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
661 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
662 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
663 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
664 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
665 } \
666 } while (0)
667
668/**
669 * Atomically Exchange a value which size might differ
670 * between platforms or compilers, ordered.
671 *
672 * @param pu Pointer to the variable to update.
673 * @param uNew The value to assign to *pu.
674 * @param puRes Where to store the current *pu value.
675 */
676#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
677 do { \
678 switch (sizeof(*(pu))) { \
679 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
680 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
681 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
682 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
683 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
684 } \
685 } while (0)
686
687
688
689/**
690 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
691 *
692 * @returns true if xchg was done.
693 * @returns false if xchg wasn't done.
694 *
695 * @param pu8 Pointer to the value to update.
696 * @param u8New The new value to assigned to *pu8.
697 * @param u8Old The old value to *pu8 compare with.
698 *
699 * @remarks x86: Requires a 486 or later.
700 */
701#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
702DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old);
703#else
704DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old)
705{
706 uint8_t u8Ret;
707 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
708 "setz %1\n\t"
709 : "=m" (*pu8),
710 "=qm" (u8Ret),
711 "=a" (u8Old)
712 : "q" (u8New),
713 "2" (u8Old),
714 "m" (*pu8));
715 return (bool)u8Ret;
716}
717#endif
718
719
720/**
721 * Atomically Compare and Exchange a signed 8-bit value, ordered.
722 *
723 * @returns true if xchg was done.
724 * @returns false if xchg wasn't done.
725 *
726 * @param pi8 Pointer to the value to update.
727 * @param i8New The new value to assigned to *pi8.
728 * @param i8Old The old value to *pi8 compare with.
729 *
730 * @remarks x86: Requires a 486 or later.
731 */
732DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old)
733{
734 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
735}
736
737
738/**
739 * Atomically Compare and Exchange a bool value, ordered.
740 *
741 * @returns true if xchg was done.
742 * @returns false if xchg wasn't done.
743 *
744 * @param pf Pointer to the value to update.
745 * @param fNew The new value to assigned to *pf.
746 * @param fOld The old value to *pf compare with.
747 *
748 * @remarks x86: Requires a 486 or later.
749 */
750DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld)
751{
752 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
753}
754
755
756/**
757 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
758 *
759 * @returns true if xchg was done.
760 * @returns false if xchg wasn't done.
761 *
762 * @param pu32 Pointer to the value to update.
763 * @param u32New The new value to assigned to *pu32.
764 * @param u32Old The old value to *pu32 compare with.
765 *
766 * @remarks x86: Requires a 486 or later.
767 */
768#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
769DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old);
770#else
771DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old)
772{
773# if RT_INLINE_ASM_GNU_STYLE
774 uint8_t u8Ret;
775 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
776 "setz %1\n\t"
777 : "=m" (*pu32),
778 "=qm" (u8Ret),
779 "=a" (u32Old)
780 : "r" (u32New),
781 "2" (u32Old),
782 "m" (*pu32));
783 return (bool)u8Ret;
784
785# elif RT_INLINE_ASM_USES_INTRIN
786 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
787
788# else
789 uint32_t u32Ret;
790 __asm
791 {
792# ifdef RT_ARCH_AMD64
793 mov rdx, [pu32]
794# else
795 mov edx, [pu32]
796# endif
797 mov eax, [u32Old]
798 mov ecx, [u32New]
799# ifdef RT_ARCH_AMD64
800 lock cmpxchg [rdx], ecx
801# else
802 lock cmpxchg [edx], ecx
803# endif
804 setz al
805 movzx eax, al
806 mov [u32Ret], eax
807 }
808 return !!u32Ret;
809# endif
810}
811#endif
812
813
814/**
815 * Atomically Compare and Exchange a signed 32-bit value, ordered.
816 *
817 * @returns true if xchg was done.
818 * @returns false if xchg wasn't done.
819 *
820 * @param pi32 Pointer to the value to update.
821 * @param i32New The new value to assigned to *pi32.
822 * @param i32Old The old value to *pi32 compare with.
823 *
824 * @remarks x86: Requires a 486 or later.
825 */
826DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old)
827{
828 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
829}
830
831
832/**
833 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
834 *
835 * @returns true if xchg was done.
836 * @returns false if xchg wasn't done.
837 *
838 * @param pu64 Pointer to the 64-bit variable to update.
839 * @param u64New The 64-bit value to assign to *pu64.
840 * @param u64Old The value to compare with.
841 *
842 * @remarks x86: Requires a Pentium or later.
843 */
844#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
845 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
846DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old);
847#else
848DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old)
849{
850# if RT_INLINE_ASM_USES_INTRIN
851 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
852
853# elif defined(RT_ARCH_AMD64)
854# if RT_INLINE_ASM_GNU_STYLE
855 uint8_t u8Ret;
856 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
857 "setz %1\n\t"
858 : "=m" (*pu64),
859 "=qm" (u8Ret),
860 "=a" (u64Old)
861 : "r" (u64New),
862 "2" (u64Old),
863 "m" (*pu64));
864 return (bool)u8Ret;
865# else
866 bool fRet;
867 __asm
868 {
869 mov rdx, [pu32]
870 mov rax, [u64Old]
871 mov rcx, [u64New]
872 lock cmpxchg [rdx], rcx
873 setz al
874 mov [fRet], al
875 }
876 return fRet;
877# endif
878# else /* !RT_ARCH_AMD64 */
879 uint32_t u32Ret;
880# if RT_INLINE_ASM_GNU_STYLE
881# if defined(PIC) || defined(__PIC__)
882 uint32_t u32EBX = (uint32_t)u64New;
883 uint32_t u32Spill;
884 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
885 "lock; cmpxchg8b (%6)\n\t"
886 "setz %%al\n\t"
887 "movl %4, %%ebx\n\t"
888 "movzbl %%al, %%eax\n\t"
889 : "=a" (u32Ret),
890 "=d" (u32Spill),
891# if RT_GNUC_PREREQ(4, 3)
892 "+m" (*pu64)
893# else
894 "=m" (*pu64)
895# endif
896 : "A" (u64Old),
897 "m" ( u32EBX ),
898 "c" ( (uint32_t)(u64New >> 32) ),
899 "S" (pu64));
900# else /* !PIC */
901 uint32_t u32Spill;
902 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
903 "setz %%al\n\t"
904 "movzbl %%al, %%eax\n\t"
905 : "=a" (u32Ret),
906 "=d" (u32Spill),
907 "+m" (*pu64)
908 : "A" (u64Old),
909 "b" ( (uint32_t)u64New ),
910 "c" ( (uint32_t)(u64New >> 32) ));
911# endif
912 return (bool)u32Ret;
913# else
914 __asm
915 {
916 mov ebx, dword ptr [u64New]
917 mov ecx, dword ptr [u64New + 4]
918 mov edi, [pu64]
919 mov eax, dword ptr [u64Old]
920 mov edx, dword ptr [u64Old + 4]
921 lock cmpxchg8b [edi]
922 setz al
923 movzx eax, al
924 mov dword ptr [u32Ret], eax
925 }
926 return !!u32Ret;
927# endif
928# endif /* !RT_ARCH_AMD64 */
929}
930#endif
931
932
933/**
934 * Atomically Compare and exchange a signed 64-bit value, ordered.
935 *
936 * @returns true if xchg was done.
937 * @returns false if xchg wasn't done.
938 *
939 * @param pi64 Pointer to the 64-bit variable to update.
940 * @param i64 The 64-bit value to assign to *pu64.
941 * @param i64Old The value to compare with.
942 *
943 * @remarks x86: Requires a Pentium or later.
944 */
945DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old)
946{
947 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
948}
949
950
951/**
952 * Atomically Compare and Exchange a pointer value, ordered.
953 *
954 * @returns true if xchg was done.
955 * @returns false if xchg wasn't done.
956 *
957 * @param ppv Pointer to the value to update.
958 * @param pvNew The new value to assigned to *ppv.
959 * @param pvOld The old value to *ppv compare with.
960 *
961 * @remarks x86: Requires a 486 or later.
962 */
963DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld)
964{
965#if ARCH_BITS == 32 || ARCH_BITS == 16
966 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
967#elif ARCH_BITS == 64
968 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
969#else
970# error "ARCH_BITS is bogus"
971#endif
972}
973
974
975/**
976 * Atomically Compare and Exchange a pointer value, ordered.
977 *
978 * @returns true if xchg was done.
979 * @returns false if xchg wasn't done.
980 *
981 * @param ppv Pointer to the value to update.
982 * @param pvNew The new value to assigned to *ppv.
983 * @param pvOld The old value to *ppv compare with.
984 *
985 * @remarks This is relatively type safe on GCC platforms.
986 * @remarks x86: Requires a 486 or later.
987 */
988#ifdef __GNUC__
989# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
990 __extension__ \
991 ({\
992 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
993 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
994 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
995 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
996 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
997 fMacroRet; \
998 })
999#else
1000# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1001 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1002#endif
1003
1004
1005/** @def ASMAtomicCmpXchgHandle
1006 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1007 *
1008 * @param ph Pointer to the value to update.
1009 * @param hNew The new value to assigned to *pu.
1010 * @param hOld The old value to *pu compare with.
1011 * @param fRc Where to store the result.
1012 *
1013 * @remarks This doesn't currently work for all handles (like RTFILE).
1014 * @remarks x86: Requires a 486 or later.
1015 */
1016#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1017# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1018 do { \
1019 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1020 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1021 } while (0)
1022#elif HC_ARCH_BITS == 64
1023# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1024 do { \
1025 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1026 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1027 } while (0)
1028#else
1029# error HC_ARCH_BITS
1030#endif
1031
1032
1033/** @def ASMAtomicCmpXchgSize
1034 * Atomically Compare and Exchange a value which size might differ
1035 * between platforms or compilers, ordered.
1036 *
1037 * @param pu Pointer to the value to update.
1038 * @param uNew The new value to assigned to *pu.
1039 * @param uOld The old value to *pu compare with.
1040 * @param fRc Where to store the result.
1041 *
1042 * @remarks x86: Requires a 486 or later.
1043 */
1044#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1045 do { \
1046 switch (sizeof(*(pu))) { \
1047 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1048 break; \
1049 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1050 break; \
1051 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1052 (fRc) = false; \
1053 break; \
1054 } \
1055 } while (0)
1056
1057
1058/**
1059 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1060 * passes back old value, ordered.
1061 *
1062 * @returns true if xchg was done.
1063 * @returns false if xchg wasn't done.
1064 *
1065 * @param pu32 Pointer to the value to update.
1066 * @param u32New The new value to assigned to *pu32.
1067 * @param u32Old The old value to *pu32 compare with.
1068 * @param pu32Old Pointer store the old value at.
1069 *
1070 * @remarks x86: Requires a 486 or later.
1071 */
1072#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1073DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old);
1074#else
1075DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old)
1076{
1077# if RT_INLINE_ASM_GNU_STYLE
1078 uint8_t u8Ret;
1079 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1080 "setz %1\n\t"
1081 : "=m" (*pu32),
1082 "=qm" (u8Ret),
1083 "=a" (*pu32Old)
1084 : "r" (u32New),
1085 "a" (u32Old),
1086 "m" (*pu32));
1087 return (bool)u8Ret;
1088
1089# elif RT_INLINE_ASM_USES_INTRIN
1090 return (*pu32Old =_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1091
1092# else
1093 uint32_t u32Ret;
1094 __asm
1095 {
1096# ifdef RT_ARCH_AMD64
1097 mov rdx, [pu32]
1098# else
1099 mov edx, [pu32]
1100# endif
1101 mov eax, [u32Old]
1102 mov ecx, [u32New]
1103# ifdef RT_ARCH_AMD64
1104 lock cmpxchg [rdx], ecx
1105 mov rdx, [pu32Old]
1106 mov [rdx], eax
1107# else
1108 lock cmpxchg [edx], ecx
1109 mov edx, [pu32Old]
1110 mov [edx], eax
1111# endif
1112 setz al
1113 movzx eax, al
1114 mov [u32Ret], eax
1115 }
1116 return !!u32Ret;
1117# endif
1118}
1119#endif
1120
1121
1122/**
1123 * Atomically Compare and Exchange a signed 32-bit value, additionally
1124 * passes back old value, ordered.
1125 *
1126 * @returns true if xchg was done.
1127 * @returns false if xchg wasn't done.
1128 *
1129 * @param pi32 Pointer to the value to update.
1130 * @param i32New The new value to assigned to *pi32.
1131 * @param i32Old The old value to *pi32 compare with.
1132 * @param pi32Old Pointer store the old value at.
1133 *
1134 * @remarks x86: Requires a 486 or later.
1135 */
1136DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old)
1137{
1138 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1139}
1140
1141
1142/**
1143 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1144 * passing back old value, ordered.
1145 *
1146 * @returns true if xchg was done.
1147 * @returns false if xchg wasn't done.
1148 *
1149 * @param pu64 Pointer to the 64-bit variable to update.
1150 * @param u64New The 64-bit value to assign to *pu64.
1151 * @param u64Old The value to compare with.
1152 * @param pu64Old Pointer store the old value at.
1153 *
1154 * @remarks x86: Requires a Pentium or later.
1155 */
1156#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1157 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1158DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old);
1159#else
1160DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old)
1161{
1162# if RT_INLINE_ASM_USES_INTRIN
1163 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1164
1165# elif defined(RT_ARCH_AMD64)
1166# if RT_INLINE_ASM_GNU_STYLE
1167 uint8_t u8Ret;
1168 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1169 "setz %1\n\t"
1170 : "=m" (*pu64),
1171 "=qm" (u8Ret),
1172 "=a" (*pu64Old)
1173 : "r" (u64New),
1174 "a" (u64Old),
1175 "m" (*pu64));
1176 return (bool)u8Ret;
1177# else
1178 bool fRet;
1179 __asm
1180 {
1181 mov rdx, [pu32]
1182 mov rax, [u64Old]
1183 mov rcx, [u64New]
1184 lock cmpxchg [rdx], rcx
1185 mov rdx, [pu64Old]
1186 mov [rdx], rax
1187 setz al
1188 mov [fRet], al
1189 }
1190 return fRet;
1191# endif
1192# else /* !RT_ARCH_AMD64 */
1193# if RT_INLINE_ASM_GNU_STYLE
1194 uint64_t u64Ret;
1195# if defined(PIC) || defined(__PIC__)
1196 /* NB: this code uses a memory clobber description, because the clean
1197 * solution with an output value for *pu64 makes gcc run out of registers.
1198 * This will cause suboptimal code, and anyone with a better solution is
1199 * welcome to improve this. */
1200 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1201 "lock; cmpxchg8b %3\n\t"
1202 "xchgl %%ebx, %1\n\t"
1203 : "=A" (u64Ret)
1204 : "DS" ((uint32_t)u64New),
1205 "c" ((uint32_t)(u64New >> 32)),
1206 "m" (*pu64),
1207 "0" (u64Old)
1208 : "memory" );
1209# else /* !PIC */
1210 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1211 : "=A" (u64Ret),
1212 "=m" (*pu64)
1213 : "b" ((uint32_t)u64New),
1214 "c" ((uint32_t)(u64New >> 32)),
1215 "m" (*pu64),
1216 "0" (u64Old));
1217# endif
1218 *pu64Old = u64Ret;
1219 return u64Ret == u64Old;
1220# else
1221 uint32_t u32Ret;
1222 __asm
1223 {
1224 mov ebx, dword ptr [u64New]
1225 mov ecx, dword ptr [u64New + 4]
1226 mov edi, [pu64]
1227 mov eax, dword ptr [u64Old]
1228 mov edx, dword ptr [u64Old + 4]
1229 lock cmpxchg8b [edi]
1230 mov ebx, [pu64Old]
1231 mov [ebx], eax
1232 setz al
1233 movzx eax, al
1234 add ebx, 4
1235 mov [ebx], edx
1236 mov dword ptr [u32Ret], eax
1237 }
1238 return !!u32Ret;
1239# endif
1240# endif /* !RT_ARCH_AMD64 */
1241}
1242#endif
1243
1244
1245/**
1246 * Atomically Compare and exchange a signed 64-bit value, additionally
1247 * passing back old value, ordered.
1248 *
1249 * @returns true if xchg was done.
1250 * @returns false if xchg wasn't done.
1251 *
1252 * @param pi64 Pointer to the 64-bit variable to update.
1253 * @param i64 The 64-bit value to assign to *pu64.
1254 * @param i64Old The value to compare with.
1255 * @param pi64Old Pointer store the old value at.
1256 *
1257 * @remarks x86: Requires a Pentium or later.
1258 */
1259DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old)
1260{
1261 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1262}
1263
1264/** @def ASMAtomicCmpXchgExHandle
1265 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1266 *
1267 * @param ph Pointer to the value to update.
1268 * @param hNew The new value to assigned to *pu.
1269 * @param hOld The old value to *pu compare with.
1270 * @param fRc Where to store the result.
1271 * @param phOldVal Pointer to where to store the old value.
1272 *
1273 * @remarks This doesn't currently work for all handles (like RTFILE).
1274 */
1275#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1276# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1277 do { \
1278 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1279 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1280 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1281 } while (0)
1282#elif HC_ARCH_BITS == 64
1283# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1284 do { \
1285 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1286 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1287 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1288 } while (0)
1289#else
1290# error HC_ARCH_BITS
1291#endif
1292
1293
1294/** @def ASMAtomicCmpXchgExSize
1295 * Atomically Compare and Exchange a value which size might differ
1296 * between platforms or compilers. Additionally passes back old value.
1297 *
1298 * @param pu Pointer to the value to update.
1299 * @param uNew The new value to assigned to *pu.
1300 * @param uOld The old value to *pu compare with.
1301 * @param fRc Where to store the result.
1302 * @param puOldVal Pointer to where to store the old value.
1303 *
1304 * @remarks x86: Requires a 486 or later.
1305 */
1306#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1307 do { \
1308 switch (sizeof(*(pu))) { \
1309 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1310 break; \
1311 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1312 break; \
1313 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1314 (fRc) = false; \
1315 (uOldVal) = 0; \
1316 break; \
1317 } \
1318 } while (0)
1319
1320
1321/**
1322 * Atomically Compare and Exchange a pointer value, additionally
1323 * passing back old value, ordered.
1324 *
1325 * @returns true if xchg was done.
1326 * @returns false if xchg wasn't done.
1327 *
1328 * @param ppv Pointer to the value to update.
1329 * @param pvNew The new value to assigned to *ppv.
1330 * @param pvOld The old value to *ppv compare with.
1331 * @param ppvOld Pointer store the old value at.
1332 *
1333 * @remarks x86: Requires a 486 or later.
1334 */
1335DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1336 void RT_FAR * RT_FAR *ppvOld)
1337{
1338#if ARCH_BITS == 32 || ARCH_BITS == 16
1339 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1340#elif ARCH_BITS == 64
1341 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1342#else
1343# error "ARCH_BITS is bogus"
1344#endif
1345}
1346
1347
1348/**
1349 * Atomically Compare and Exchange a pointer value, additionally
1350 * passing back old value, ordered.
1351 *
1352 * @returns true if xchg was done.
1353 * @returns false if xchg wasn't done.
1354 *
1355 * @param ppv Pointer to the value to update.
1356 * @param pvNew The new value to assigned to *ppv.
1357 * @param pvOld The old value to *ppv compare with.
1358 * @param ppvOld Pointer store the old value at.
1359 *
1360 * @remarks This is relatively type safe on GCC platforms.
1361 * @remarks x86: Requires a 486 or later.
1362 */
1363#ifdef __GNUC__
1364# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1365 __extension__ \
1366 ({\
1367 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1368 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1369 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1370 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1371 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1372 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1373 (void **)ppvOldTypeChecked); \
1374 fMacroRet; \
1375 })
1376#else
1377# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1378 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1379#endif
1380
1381
1382/**
1383 * Virtualization unfriendly serializing instruction, always exits.
1384 */
1385#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1386DECLASM(void) ASMSerializeInstructionCpuId(void);
1387#else
1388DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1389{
1390# if RT_INLINE_ASM_GNU_STYLE
1391 RTCCUINTREG xAX = 0;
1392# ifdef RT_ARCH_AMD64
1393 __asm__ __volatile__ ("cpuid"
1394 : "=a" (xAX)
1395 : "0" (xAX)
1396 : "rbx", "rcx", "rdx", "memory");
1397# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1398 __asm__ __volatile__ ("push %%ebx\n\t"
1399 "cpuid\n\t"
1400 "pop %%ebx\n\t"
1401 : "=a" (xAX)
1402 : "0" (xAX)
1403 : "ecx", "edx", "memory");
1404# else
1405 __asm__ __volatile__ ("cpuid"
1406 : "=a" (xAX)
1407 : "0" (xAX)
1408 : "ebx", "ecx", "edx", "memory");
1409# endif
1410
1411# elif RT_INLINE_ASM_USES_INTRIN
1412 int aInfo[4];
1413 _ReadWriteBarrier();
1414 __cpuid(aInfo, 0);
1415
1416# else
1417 __asm
1418 {
1419 push ebx
1420 xor eax, eax
1421 cpuid
1422 pop ebx
1423 }
1424# endif
1425}
1426#endif
1427
1428/**
1429 * Virtualization friendly serializing instruction, though more expensive.
1430 */
1431#if RT_INLINE_ASM_EXTERNAL
1432DECLASM(void) ASMSerializeInstructionIRet(void);
1433#else
1434DECLINLINE(void) ASMSerializeInstructionIRet(void)
1435{
1436# if RT_INLINE_ASM_GNU_STYLE
1437# ifdef RT_ARCH_AMD64
1438 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1439 "subq $128, %%rsp\n\t" /*redzone*/
1440 "mov %%ss, %%eax\n\t"
1441 "pushq %%rax\n\t"
1442 "pushq %%r10\n\t"
1443 "pushfq\n\t"
1444 "movl %%cs, %%eax\n\t"
1445 "pushq %%rax\n\t"
1446 "leaq 1f(%%rip), %%rax\n\t"
1447 "pushq %%rax\n\t"
1448 "iretq\n\t"
1449 "1:\n\t"
1450 ::: "rax", "r10", "memory");
1451# else
1452 __asm__ __volatile__ ("pushfl\n\t"
1453 "pushl %%cs\n\t"
1454 "pushl $1f\n\t"
1455 "iretl\n\t"
1456 "1:\n\t"
1457 ::: "memory");
1458# endif
1459
1460# else
1461 __asm
1462 {
1463 pushfd
1464 push cs
1465 push la_ret
1466 iretd
1467 la_ret:
1468 }
1469# endif
1470}
1471#endif
1472
1473/**
1474 * Virtualization friendlier serializing instruction, may still cause exits.
1475 */
1476#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1477DECLASM(void) ASMSerializeInstructionRdTscp(void);
1478#else
1479DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1480{
1481# if RT_INLINE_ASM_GNU_STYLE
1482 /* rdtscp is not supported by ancient linux build VM of course :-( */
1483# ifdef RT_ARCH_AMD64
1484 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1485 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1486# else
1487 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1488 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1489# endif
1490# else
1491# if RT_INLINE_ASM_USES_INTRIN >= 15
1492 uint32_t uIgnore;
1493 _ReadWriteBarrier();
1494 (void)__rdtscp(&uIgnore);
1495 (void)uIgnore;
1496# else
1497 __asm
1498 {
1499 rdtscp
1500 }
1501# endif
1502# endif
1503}
1504#endif
1505
1506
1507/**
1508 * Serialize Instruction.
1509 */
1510#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1511# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1512#else
1513# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1514#endif
1515
1516
1517/**
1518 * Memory fence, waits for any pending writes and reads to complete.
1519 */
1520DECLINLINE(void) ASMMemoryFence(void)
1521{
1522 /** @todo use mfence? check if all cpus we care for support it. */
1523#if ARCH_BITS == 16
1524 uint16_t volatile u16;
1525 ASMAtomicXchgU16(&u16, 0);
1526#else
1527 uint32_t volatile u32;
1528 ASMAtomicXchgU32(&u32, 0);
1529#endif
1530}
1531
1532
1533/**
1534 * Write fence, waits for any pending writes to complete.
1535 */
1536DECLINLINE(void) ASMWriteFence(void)
1537{
1538 /** @todo use sfence? check if all cpus we care for support it. */
1539 ASMMemoryFence();
1540}
1541
1542
1543/**
1544 * Read fence, waits for any pending reads to complete.
1545 */
1546DECLINLINE(void) ASMReadFence(void)
1547{
1548 /** @todo use lfence? check if all cpus we care for support it. */
1549 ASMMemoryFence();
1550}
1551
1552
1553/**
1554 * Atomically reads an unsigned 8-bit value, ordered.
1555 *
1556 * @returns Current *pu8 value
1557 * @param pu8 Pointer to the 8-bit variable to read.
1558 */
1559DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8)
1560{
1561 ASMMemoryFence();
1562 return *pu8; /* byte reads are atomic on x86 */
1563}
1564
1565
1566/**
1567 * Atomically reads an unsigned 8-bit value, unordered.
1568 *
1569 * @returns Current *pu8 value
1570 * @param pu8 Pointer to the 8-bit variable to read.
1571 */
1572DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8)
1573{
1574 return *pu8; /* byte reads are atomic on x86 */
1575}
1576
1577
1578/**
1579 * Atomically reads a signed 8-bit value, ordered.
1580 *
1581 * @returns Current *pi8 value
1582 * @param pi8 Pointer to the 8-bit variable to read.
1583 */
1584DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8)
1585{
1586 ASMMemoryFence();
1587 return *pi8; /* byte reads are atomic on x86 */
1588}
1589
1590
1591/**
1592 * Atomically reads a signed 8-bit value, unordered.
1593 *
1594 * @returns Current *pi8 value
1595 * @param pi8 Pointer to the 8-bit variable to read.
1596 */
1597DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8)
1598{
1599 return *pi8; /* byte reads are atomic on x86 */
1600}
1601
1602
1603/**
1604 * Atomically reads an unsigned 16-bit value, ordered.
1605 *
1606 * @returns Current *pu16 value
1607 * @param pu16 Pointer to the 16-bit variable to read.
1608 */
1609DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16)
1610{
1611 ASMMemoryFence();
1612 Assert(!((uintptr_t)pu16 & 1));
1613 return *pu16;
1614}
1615
1616
1617/**
1618 * Atomically reads an unsigned 16-bit value, unordered.
1619 *
1620 * @returns Current *pu16 value
1621 * @param pu16 Pointer to the 16-bit variable to read.
1622 */
1623DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16)
1624{
1625 Assert(!((uintptr_t)pu16 & 1));
1626 return *pu16;
1627}
1628
1629
1630/**
1631 * Atomically reads a signed 16-bit value, ordered.
1632 *
1633 * @returns Current *pi16 value
1634 * @param pi16 Pointer to the 16-bit variable to read.
1635 */
1636DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16)
1637{
1638 ASMMemoryFence();
1639 Assert(!((uintptr_t)pi16 & 1));
1640 return *pi16;
1641}
1642
1643
1644/**
1645 * Atomically reads a signed 16-bit value, unordered.
1646 *
1647 * @returns Current *pi16 value
1648 * @param pi16 Pointer to the 16-bit variable to read.
1649 */
1650DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16)
1651{
1652 Assert(!((uintptr_t)pi16 & 1));
1653 return *pi16;
1654}
1655
1656
1657/**
1658 * Atomically reads an unsigned 32-bit value, ordered.
1659 *
1660 * @returns Current *pu32 value
1661 * @param pu32 Pointer to the 32-bit variable to read.
1662 */
1663DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32)
1664{
1665 ASMMemoryFence();
1666 Assert(!((uintptr_t)pu32 & 3));
1667#if ARCH_BITS == 16
1668 AssertFailed(); /** @todo 16-bit */
1669#endif
1670 return *pu32;
1671}
1672
1673
1674/**
1675 * Atomically reads an unsigned 32-bit value, unordered.
1676 *
1677 * @returns Current *pu32 value
1678 * @param pu32 Pointer to the 32-bit variable to read.
1679 */
1680DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32)
1681{
1682 Assert(!((uintptr_t)pu32 & 3));
1683#if ARCH_BITS == 16
1684 AssertFailed(); /** @todo 16-bit */
1685#endif
1686 return *pu32;
1687}
1688
1689
1690/**
1691 * Atomically reads a signed 32-bit value, ordered.
1692 *
1693 * @returns Current *pi32 value
1694 * @param pi32 Pointer to the 32-bit variable to read.
1695 */
1696DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32)
1697{
1698 ASMMemoryFence();
1699 Assert(!((uintptr_t)pi32 & 3));
1700#if ARCH_BITS == 16
1701 AssertFailed(); /** @todo 16-bit */
1702#endif
1703 return *pi32;
1704}
1705
1706
1707/**
1708 * Atomically reads a signed 32-bit value, unordered.
1709 *
1710 * @returns Current *pi32 value
1711 * @param pi32 Pointer to the 32-bit variable to read.
1712 */
1713DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32)
1714{
1715 Assert(!((uintptr_t)pi32 & 3));
1716#if ARCH_BITS == 16
1717 AssertFailed(); /** @todo 16-bit */
1718#endif
1719 return *pi32;
1720}
1721
1722
1723/**
1724 * Atomically reads an unsigned 64-bit value, ordered.
1725 *
1726 * @returns Current *pu64 value
1727 * @param pu64 Pointer to the 64-bit variable to read.
1728 * The memory pointed to must be writable.
1729 *
1730 * @remarks This may fault if the memory is read-only!
1731 * @remarks x86: Requires a Pentium or later.
1732 */
1733#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1734 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1735DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64);
1736#else
1737DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64)
1738{
1739 uint64_t u64;
1740# ifdef RT_ARCH_AMD64
1741 Assert(!((uintptr_t)pu64 & 7));
1742/*# if RT_INLINE_ASM_GNU_STYLE
1743 __asm__ __volatile__( "mfence\n\t"
1744 "movq %1, %0\n\t"
1745 : "=r" (u64)
1746 : "m" (*pu64));
1747# else
1748 __asm
1749 {
1750 mfence
1751 mov rdx, [pu64]
1752 mov rax, [rdx]
1753 mov [u64], rax
1754 }
1755# endif*/
1756 ASMMemoryFence();
1757 u64 = *pu64;
1758# else /* !RT_ARCH_AMD64 */
1759# if RT_INLINE_ASM_GNU_STYLE
1760# if defined(PIC) || defined(__PIC__)
1761 uint32_t u32EBX = 0;
1762 Assert(!((uintptr_t)pu64 & 7));
1763 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1764 "lock; cmpxchg8b (%5)\n\t"
1765 "movl %3, %%ebx\n\t"
1766 : "=A" (u64),
1767# if RT_GNUC_PREREQ(4, 3)
1768 "+m" (*pu64)
1769# else
1770 "=m" (*pu64)
1771# endif
1772 : "0" (0ULL),
1773 "m" (u32EBX),
1774 "c" (0),
1775 "S" (pu64));
1776# else /* !PIC */
1777 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1778 : "=A" (u64),
1779 "+m" (*pu64)
1780 : "0" (0ULL),
1781 "b" (0),
1782 "c" (0));
1783# endif
1784# else
1785 Assert(!((uintptr_t)pu64 & 7));
1786 __asm
1787 {
1788 xor eax, eax
1789 xor edx, edx
1790 mov edi, pu64
1791 xor ecx, ecx
1792 xor ebx, ebx
1793 lock cmpxchg8b [edi]
1794 mov dword ptr [u64], eax
1795 mov dword ptr [u64 + 4], edx
1796 }
1797# endif
1798# endif /* !RT_ARCH_AMD64 */
1799 return u64;
1800}
1801#endif
1802
1803
1804/**
1805 * Atomically reads an unsigned 64-bit value, unordered.
1806 *
1807 * @returns Current *pu64 value
1808 * @param pu64 Pointer to the 64-bit variable to read.
1809 * The memory pointed to must be writable.
1810 *
1811 * @remarks This may fault if the memory is read-only!
1812 * @remarks x86: Requires a Pentium or later.
1813 */
1814#if !defined(RT_ARCH_AMD64) \
1815 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1816 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1817DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64);
1818#else
1819DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64)
1820{
1821 uint64_t u64;
1822# ifdef RT_ARCH_AMD64
1823 Assert(!((uintptr_t)pu64 & 7));
1824/*# if RT_INLINE_ASM_GNU_STYLE
1825 Assert(!((uintptr_t)pu64 & 7));
1826 __asm__ __volatile__("movq %1, %0\n\t"
1827 : "=r" (u64)
1828 : "m" (*pu64));
1829# else
1830 __asm
1831 {
1832 mov rdx, [pu64]
1833 mov rax, [rdx]
1834 mov [u64], rax
1835 }
1836# endif */
1837 u64 = *pu64;
1838# else /* !RT_ARCH_AMD64 */
1839# if RT_INLINE_ASM_GNU_STYLE
1840# if defined(PIC) || defined(__PIC__)
1841 uint32_t u32EBX = 0;
1842 uint32_t u32Spill;
1843 Assert(!((uintptr_t)pu64 & 7));
1844 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1845 "xor %%ecx,%%ecx\n\t"
1846 "xor %%edx,%%edx\n\t"
1847 "xchgl %%ebx, %3\n\t"
1848 "lock; cmpxchg8b (%4)\n\t"
1849 "movl %3, %%ebx\n\t"
1850 : "=A" (u64),
1851# if RT_GNUC_PREREQ(4, 3)
1852 "+m" (*pu64),
1853# else
1854 "=m" (*pu64),
1855# endif
1856 "=c" (u32Spill)
1857 : "m" (u32EBX),
1858 "S" (pu64));
1859# else /* !PIC */
1860 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1861 : "=A" (u64),
1862 "+m" (*pu64)
1863 : "0" (0ULL),
1864 "b" (0),
1865 "c" (0));
1866# endif
1867# else
1868 Assert(!((uintptr_t)pu64 & 7));
1869 __asm
1870 {
1871 xor eax, eax
1872 xor edx, edx
1873 mov edi, pu64
1874 xor ecx, ecx
1875 xor ebx, ebx
1876 lock cmpxchg8b [edi]
1877 mov dword ptr [u64], eax
1878 mov dword ptr [u64 + 4], edx
1879 }
1880# endif
1881# endif /* !RT_ARCH_AMD64 */
1882 return u64;
1883}
1884#endif
1885
1886
1887/**
1888 * Atomically reads a signed 64-bit value, ordered.
1889 *
1890 * @returns Current *pi64 value
1891 * @param pi64 Pointer to the 64-bit variable to read.
1892 * The memory pointed to must be writable.
1893 *
1894 * @remarks This may fault if the memory is read-only!
1895 * @remarks x86: Requires a Pentium or later.
1896 */
1897DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64)
1898{
1899 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
1900}
1901
1902
1903/**
1904 * Atomically reads a signed 64-bit value, unordered.
1905 *
1906 * @returns Current *pi64 value
1907 * @param pi64 Pointer to the 64-bit variable to read.
1908 * The memory pointed to must be writable.
1909 *
1910 * @remarks This will fault if the memory is read-only!
1911 * @remarks x86: Requires a Pentium or later.
1912 */
1913DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64)
1914{
1915 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
1916}
1917
1918
1919/**
1920 * Atomically reads a size_t value, ordered.
1921 *
1922 * @returns Current *pcb value
1923 * @param pcb Pointer to the size_t variable to read.
1924 */
1925DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb)
1926{
1927#if ARCH_BITS == 64
1928 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
1929#elif ARCH_BITS == 32
1930 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
1931#elif ARCH_BITS == 16
1932 AssertCompileSize(size_t, 2);
1933 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
1934#else
1935# error "Unsupported ARCH_BITS value"
1936#endif
1937}
1938
1939
1940/**
1941 * Atomically reads a size_t value, unordered.
1942 *
1943 * @returns Current *pcb value
1944 * @param pcb Pointer to the size_t variable to read.
1945 */
1946DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb)
1947{
1948#if ARCH_BITS == 64 || ARCH_BITS == 16
1949 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
1950#elif ARCH_BITS == 32
1951 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
1952#elif ARCH_BITS == 16
1953 AssertCompileSize(size_t, 2);
1954 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
1955#else
1956# error "Unsupported ARCH_BITS value"
1957#endif
1958}
1959
1960
1961/**
1962 * Atomically reads a pointer value, ordered.
1963 *
1964 * @returns Current *pv value
1965 * @param ppv Pointer to the pointer variable to read.
1966 *
1967 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1968 * requires less typing (no casts).
1969 */
1970DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv)
1971{
1972#if ARCH_BITS == 32 || ARCH_BITS == 16
1973 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
1974#elif ARCH_BITS == 64
1975 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
1976#else
1977# error "ARCH_BITS is bogus"
1978#endif
1979}
1980
1981/**
1982 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1983 *
1984 * @returns Current *pv value
1985 * @param ppv Pointer to the pointer variable to read.
1986 * @param Type The type of *ppv, sans volatile.
1987 */
1988#ifdef __GNUC__
1989# define ASMAtomicReadPtrT(ppv, Type) \
1990 __extension__ \
1991 ({\
1992 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1993 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1994 pvTypeChecked; \
1995 })
1996#else
1997# define ASMAtomicReadPtrT(ppv, Type) \
1998 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
1999#endif
2000
2001
2002/**
2003 * Atomically reads a pointer value, unordered.
2004 *
2005 * @returns Current *pv value
2006 * @param ppv Pointer to the pointer variable to read.
2007 *
2008 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2009 * requires less typing (no casts).
2010 */
2011DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2012{
2013#if ARCH_BITS == 32 || ARCH_BITS == 16
2014 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2015#elif ARCH_BITS == 64
2016 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2017#else
2018# error "ARCH_BITS is bogus"
2019#endif
2020}
2021
2022
2023/**
2024 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2025 *
2026 * @returns Current *pv value
2027 * @param ppv Pointer to the pointer variable to read.
2028 * @param Type The type of *ppv, sans volatile.
2029 */
2030#ifdef __GNUC__
2031# define ASMAtomicUoReadPtrT(ppv, Type) \
2032 __extension__ \
2033 ({\
2034 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2035 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2036 pvTypeChecked; \
2037 })
2038#else
2039# define ASMAtomicUoReadPtrT(ppv, Type) \
2040 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2041#endif
2042
2043
2044/**
2045 * Atomically reads a boolean value, ordered.
2046 *
2047 * @returns Current *pf value
2048 * @param pf Pointer to the boolean variable to read.
2049 */
2050DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf)
2051{
2052 ASMMemoryFence();
2053 return *pf; /* byte reads are atomic on x86 */
2054}
2055
2056
2057/**
2058 * Atomically reads a boolean value, unordered.
2059 *
2060 * @returns Current *pf value
2061 * @param pf Pointer to the boolean variable to read.
2062 */
2063DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf)
2064{
2065 return *pf; /* byte reads are atomic on x86 */
2066}
2067
2068
2069/**
2070 * Atomically read a typical IPRT handle value, ordered.
2071 *
2072 * @param ph Pointer to the handle variable to read.
2073 * @param phRes Where to store the result.
2074 *
2075 * @remarks This doesn't currently work for all handles (like RTFILE).
2076 */
2077#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2078# define ASMAtomicReadHandle(ph, phRes) \
2079 do { \
2080 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2081 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2082 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2083 } while (0)
2084#elif HC_ARCH_BITS == 64
2085# define ASMAtomicReadHandle(ph, phRes) \
2086 do { \
2087 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2088 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2089 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2090 } while (0)
2091#else
2092# error HC_ARCH_BITS
2093#endif
2094
2095
2096/**
2097 * Atomically read a typical IPRT handle value, unordered.
2098 *
2099 * @param ph Pointer to the handle variable to read.
2100 * @param phRes Where to store the result.
2101 *
2102 * @remarks This doesn't currently work for all handles (like RTFILE).
2103 */
2104#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2105# define ASMAtomicUoReadHandle(ph, phRes) \
2106 do { \
2107 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2108 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2109 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2110 } while (0)
2111#elif HC_ARCH_BITS == 64
2112# define ASMAtomicUoReadHandle(ph, phRes) \
2113 do { \
2114 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2115 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2116 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2117 } while (0)
2118#else
2119# error HC_ARCH_BITS
2120#endif
2121
2122
2123/**
2124 * Atomically read a value which size might differ
2125 * between platforms or compilers, ordered.
2126 *
2127 * @param pu Pointer to the variable to read.
2128 * @param puRes Where to store the result.
2129 */
2130#define ASMAtomicReadSize(pu, puRes) \
2131 do { \
2132 switch (sizeof(*(pu))) { \
2133 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2134 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2135 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2136 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2137 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2138 } \
2139 } while (0)
2140
2141
2142/**
2143 * Atomically read a value which size might differ
2144 * between platforms or compilers, unordered.
2145 *
2146 * @param pu Pointer to the variable to read.
2147 * @param puRes Where to store the result.
2148 */
2149#define ASMAtomicUoReadSize(pu, puRes) \
2150 do { \
2151 switch (sizeof(*(pu))) { \
2152 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2153 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2154 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2155 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2156 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2157 } \
2158 } while (0)
2159
2160
2161/**
2162 * Atomically writes an unsigned 8-bit value, ordered.
2163 *
2164 * @param pu8 Pointer to the 8-bit variable.
2165 * @param u8 The 8-bit value to assign to *pu8.
2166 */
2167DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2168{
2169 ASMAtomicXchgU8(pu8, u8);
2170}
2171
2172
2173/**
2174 * Atomically writes an unsigned 8-bit value, unordered.
2175 *
2176 * @param pu8 Pointer to the 8-bit variable.
2177 * @param u8 The 8-bit value to assign to *pu8.
2178 */
2179DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2180{
2181 *pu8 = u8; /* byte writes are atomic on x86 */
2182}
2183
2184
2185/**
2186 * Atomically writes a signed 8-bit value, ordered.
2187 *
2188 * @param pi8 Pointer to the 8-bit variable to read.
2189 * @param i8 The 8-bit value to assign to *pi8.
2190 */
2191DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2192{
2193 ASMAtomicXchgS8(pi8, i8);
2194}
2195
2196
2197/**
2198 * Atomically writes a signed 8-bit value, unordered.
2199 *
2200 * @param pi8 Pointer to the 8-bit variable to write.
2201 * @param i8 The 8-bit value to assign to *pi8.
2202 */
2203DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2204{
2205 *pi8 = i8; /* byte writes are atomic on x86 */
2206}
2207
2208
2209/**
2210 * Atomically writes an unsigned 16-bit value, ordered.
2211 *
2212 * @param pu16 Pointer to the 16-bit variable to write.
2213 * @param u16 The 16-bit value to assign to *pu16.
2214 */
2215DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2216{
2217 ASMAtomicXchgU16(pu16, u16);
2218}
2219
2220
2221/**
2222 * Atomically writes an unsigned 16-bit value, unordered.
2223 *
2224 * @param pu16 Pointer to the 16-bit variable to write.
2225 * @param u16 The 16-bit value to assign to *pu16.
2226 */
2227DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2228{
2229 Assert(!((uintptr_t)pu16 & 1));
2230 *pu16 = u16;
2231}
2232
2233
2234/**
2235 * Atomically writes a signed 16-bit value, ordered.
2236 *
2237 * @param pi16 Pointer to the 16-bit variable to write.
2238 * @param i16 The 16-bit value to assign to *pi16.
2239 */
2240DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2241{
2242 ASMAtomicXchgS16(pi16, i16);
2243}
2244
2245
2246/**
2247 * Atomically writes a signed 16-bit value, unordered.
2248 *
2249 * @param pi16 Pointer to the 16-bit variable to write.
2250 * @param i16 The 16-bit value to assign to *pi16.
2251 */
2252DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2253{
2254 Assert(!((uintptr_t)pi16 & 1));
2255 *pi16 = i16;
2256}
2257
2258
2259/**
2260 * Atomically writes an unsigned 32-bit value, ordered.
2261 *
2262 * @param pu32 Pointer to the 32-bit variable to write.
2263 * @param u32 The 32-bit value to assign to *pu32.
2264 */
2265DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2266{
2267 ASMAtomicXchgU32(pu32, u32);
2268}
2269
2270
2271/**
2272 * Atomically writes an unsigned 32-bit value, unordered.
2273 *
2274 * @param pu32 Pointer to the 32-bit variable to write.
2275 * @param u32 The 32-bit value to assign to *pu32.
2276 */
2277DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2278{
2279 Assert(!((uintptr_t)pu32 & 3));
2280#if ARCH_BITS >= 32
2281 *pu32 = u32;
2282#else
2283 ASMAtomicXchgU32(pu32, u32);
2284#endif
2285}
2286
2287
2288/**
2289 * Atomically writes a signed 32-bit value, ordered.
2290 *
2291 * @param pi32 Pointer to the 32-bit variable to write.
2292 * @param i32 The 32-bit value to assign to *pi32.
2293 */
2294DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2295{
2296 ASMAtomicXchgS32(pi32, i32);
2297}
2298
2299
2300/**
2301 * Atomically writes a signed 32-bit value, unordered.
2302 *
2303 * @param pi32 Pointer to the 32-bit variable to write.
2304 * @param i32 The 32-bit value to assign to *pi32.
2305 */
2306DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2307{
2308 Assert(!((uintptr_t)pi32 & 3));
2309#if ARCH_BITS >= 32
2310 *pi32 = i32;
2311#else
2312 ASMAtomicXchgS32(pi32, i32);
2313#endif
2314}
2315
2316
2317/**
2318 * Atomically writes an unsigned 64-bit value, ordered.
2319 *
2320 * @param pu64 Pointer to the 64-bit variable to write.
2321 * @param u64 The 64-bit value to assign to *pu64.
2322 */
2323DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2324{
2325 ASMAtomicXchgU64(pu64, u64);
2326}
2327
2328
2329/**
2330 * Atomically writes an unsigned 64-bit value, unordered.
2331 *
2332 * @param pu64 Pointer to the 64-bit variable to write.
2333 * @param u64 The 64-bit value to assign to *pu64.
2334 */
2335DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2336{
2337 Assert(!((uintptr_t)pu64 & 7));
2338#if ARCH_BITS == 64
2339 *pu64 = u64;
2340#else
2341 ASMAtomicXchgU64(pu64, u64);
2342#endif
2343}
2344
2345
2346/**
2347 * Atomically writes a signed 64-bit value, ordered.
2348 *
2349 * @param pi64 Pointer to the 64-bit variable to write.
2350 * @param i64 The 64-bit value to assign to *pi64.
2351 */
2352DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2353{
2354 ASMAtomicXchgS64(pi64, i64);
2355}
2356
2357
2358/**
2359 * Atomically writes a signed 64-bit value, unordered.
2360 *
2361 * @param pi64 Pointer to the 64-bit variable to write.
2362 * @param i64 The 64-bit value to assign to *pi64.
2363 */
2364DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2365{
2366 Assert(!((uintptr_t)pi64 & 7));
2367#if ARCH_BITS == 64
2368 *pi64 = i64;
2369#else
2370 ASMAtomicXchgS64(pi64, i64);
2371#endif
2372}
2373
2374
2375/**
2376 * Atomically writes a size_t value, ordered.
2377 *
2378 * @returns nothing.
2379 * @param pcb Pointer to the size_t variable to write.
2380 * @param cb The value to assign to *pcb.
2381 */
2382DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb)
2383{
2384#if ARCH_BITS == 64
2385 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
2386#elif ARCH_BITS == 32
2387 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
2388#elif ARCH_BITS == 16
2389 AssertCompileSize(size_t, 2);
2390 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
2391#else
2392# error "Unsupported ARCH_BITS value"
2393#endif
2394}
2395
2396
2397/**
2398 * Atomically writes a boolean value, unordered.
2399 *
2400 * @param pf Pointer to the boolean variable to write.
2401 * @param f The boolean value to assign to *pf.
2402 */
2403DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f)
2404{
2405 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
2406}
2407
2408
2409/**
2410 * Atomically writes a boolean value, unordered.
2411 *
2412 * @param pf Pointer to the boolean variable to write.
2413 * @param f The boolean value to assign to *pf.
2414 */
2415DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f)
2416{
2417 *pf = f; /* byte writes are atomic on x86 */
2418}
2419
2420
2421/**
2422 * Atomically writes a pointer value, ordered.
2423 *
2424 * @param ppv Pointer to the pointer variable to write.
2425 * @param pv The pointer value to assign to *ppv.
2426 */
2427DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv)
2428{
2429#if ARCH_BITS == 32 || ARCH_BITS == 16
2430 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
2431#elif ARCH_BITS == 64
2432 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
2433#else
2434# error "ARCH_BITS is bogus"
2435#endif
2436}
2437
2438
2439/**
2440 * Atomically writes a pointer value, ordered.
2441 *
2442 * @param ppv Pointer to the pointer variable to write.
2443 * @param pv The pointer value to assign to *ppv. If NULL use
2444 * ASMAtomicWriteNullPtr or you'll land in trouble.
2445 *
2446 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2447 * NULL.
2448 */
2449#ifdef __GNUC__
2450# define ASMAtomicWritePtr(ppv, pv) \
2451 do \
2452 { \
2453 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
2454 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2455 \
2456 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2457 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2458 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2459 \
2460 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
2461 } while (0)
2462#else
2463# define ASMAtomicWritePtr(ppv, pv) \
2464 do \
2465 { \
2466 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2467 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2468 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2469 \
2470 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
2471 } while (0)
2472#endif
2473
2474
2475/**
2476 * Atomically sets a pointer to NULL, ordered.
2477 *
2478 * @param ppv Pointer to the pointer variable that should be set to NULL.
2479 *
2480 * @remarks This is relatively type safe on GCC platforms.
2481 */
2482#ifdef __GNUC__
2483# define ASMAtomicWriteNullPtr(ppv) \
2484 do \
2485 { \
2486 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2487 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2488 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2489 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
2490 } while (0)
2491#else
2492# define ASMAtomicWriteNullPtr(ppv) \
2493 do \
2494 { \
2495 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2496 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2497 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
2498 } while (0)
2499#endif
2500
2501
2502/**
2503 * Atomically writes a pointer value, unordered.
2504 *
2505 * @returns Current *pv value
2506 * @param ppv Pointer to the pointer variable.
2507 * @param pv The pointer value to assign to *ppv. If NULL use
2508 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2509 *
2510 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2511 * NULL.
2512 */
2513#ifdef __GNUC__
2514# define ASMAtomicUoWritePtr(ppv, pv) \
2515 do \
2516 { \
2517 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2518 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2519 \
2520 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2521 AssertCompile(sizeof(pv) == sizeof(void *)); \
2522 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2523 \
2524 *(ppvTypeChecked) = pvTypeChecked; \
2525 } while (0)
2526#else
2527# define ASMAtomicUoWritePtr(ppv, pv) \
2528 do \
2529 { \
2530 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2531 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2532 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2533 *(ppv) = pv; \
2534 } while (0)
2535#endif
2536
2537
2538/**
2539 * Atomically sets a pointer to NULL, unordered.
2540 *
2541 * @param ppv Pointer to the pointer variable that should be set to NULL.
2542 *
2543 * @remarks This is relatively type safe on GCC platforms.
2544 */
2545#ifdef __GNUC__
2546# define ASMAtomicUoWriteNullPtr(ppv) \
2547 do \
2548 { \
2549 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2550 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2551 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2552 *(ppvTypeChecked) = NULL; \
2553 } while (0)
2554#else
2555# define ASMAtomicUoWriteNullPtr(ppv) \
2556 do \
2557 { \
2558 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2559 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2560 *(ppv) = NULL; \
2561 } while (0)
2562#endif
2563
2564
2565/**
2566 * Atomically write a typical IPRT handle value, ordered.
2567 *
2568 * @param ph Pointer to the variable to update.
2569 * @param hNew The value to assign to *ph.
2570 *
2571 * @remarks This doesn't currently work for all handles (like RTFILE).
2572 */
2573#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2574# define ASMAtomicWriteHandle(ph, hNew) \
2575 do { \
2576 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2577 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
2578 } while (0)
2579#elif HC_ARCH_BITS == 64
2580# define ASMAtomicWriteHandle(ph, hNew) \
2581 do { \
2582 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2583 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
2584 } while (0)
2585#else
2586# error HC_ARCH_BITS
2587#endif
2588
2589
2590/**
2591 * Atomically write a typical IPRT handle value, unordered.
2592 *
2593 * @param ph Pointer to the variable to update.
2594 * @param hNew The value to assign to *ph.
2595 *
2596 * @remarks This doesn't currently work for all handles (like RTFILE).
2597 */
2598#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2599# define ASMAtomicUoWriteHandle(ph, hNew) \
2600 do { \
2601 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2602 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
2603 } while (0)
2604#elif HC_ARCH_BITS == 64
2605# define ASMAtomicUoWriteHandle(ph, hNew) \
2606 do { \
2607 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2608 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
2609 } while (0)
2610#else
2611# error HC_ARCH_BITS
2612#endif
2613
2614
2615/**
2616 * Atomically write a value which size might differ
2617 * between platforms or compilers, ordered.
2618 *
2619 * @param pu Pointer to the variable to update.
2620 * @param uNew The value to assign to *pu.
2621 */
2622#define ASMAtomicWriteSize(pu, uNew) \
2623 do { \
2624 switch (sizeof(*(pu))) { \
2625 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2626 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2627 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2628 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2629 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2630 } \
2631 } while (0)
2632
2633/**
2634 * Atomically write a value which size might differ
2635 * between platforms or compilers, unordered.
2636 *
2637 * @param pu Pointer to the variable to update.
2638 * @param uNew The value to assign to *pu.
2639 */
2640#define ASMAtomicUoWriteSize(pu, uNew) \
2641 do { \
2642 switch (sizeof(*(pu))) { \
2643 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2644 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2645 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2646 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2647 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2648 } \
2649 } while (0)
2650
2651
2652
2653/**
2654 * Atomically exchanges and adds to a 16-bit value, ordered.
2655 *
2656 * @returns The old value.
2657 * @param pu16 Pointer to the value.
2658 * @param u16 Number to add.
2659 *
2660 * @remarks Currently not implemented, just to make 16-bit code happy.
2661 * @remarks x86: Requires a 486 or later.
2662 */
2663DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16);
2664
2665
2666/**
2667 * Atomically exchanges and adds to a 32-bit value, ordered.
2668 *
2669 * @returns The old value.
2670 * @param pu32 Pointer to the value.
2671 * @param u32 Number to add.
2672 *
2673 * @remarks x86: Requires a 486 or later.
2674 */
2675#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2676DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
2677#else
2678DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2679{
2680# if RT_INLINE_ASM_USES_INTRIN
2681 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
2682 return u32;
2683
2684# elif RT_INLINE_ASM_GNU_STYLE
2685 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2686 : "=r" (u32),
2687 "=m" (*pu32)
2688 : "0" (u32),
2689 "m" (*pu32)
2690 : "memory");
2691 return u32;
2692# else
2693 __asm
2694 {
2695 mov eax, [u32]
2696# ifdef RT_ARCH_AMD64
2697 mov rdx, [pu32]
2698 lock xadd [rdx], eax
2699# else
2700 mov edx, [pu32]
2701 lock xadd [edx], eax
2702# endif
2703 mov [u32], eax
2704 }
2705 return u32;
2706# endif
2707}
2708#endif
2709
2710
2711/**
2712 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2713 *
2714 * @returns The old value.
2715 * @param pi32 Pointer to the value.
2716 * @param i32 Number to add.
2717 *
2718 * @remarks x86: Requires a 486 or later.
2719 */
2720DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2721{
2722 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
2723}
2724
2725
2726/**
2727 * Atomically exchanges and adds to a 64-bit value, ordered.
2728 *
2729 * @returns The old value.
2730 * @param pu64 Pointer to the value.
2731 * @param u64 Number to add.
2732 *
2733 * @remarks x86: Requires a Pentium or later.
2734 */
2735#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2736DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
2737#else
2738DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2739{
2740# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2741 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
2742 return u64;
2743
2744# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2745 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2746 : "=r" (u64),
2747 "=m" (*pu64)
2748 : "0" (u64),
2749 "m" (*pu64)
2750 : "memory");
2751 return u64;
2752# else
2753 uint64_t u64Old;
2754 for (;;)
2755 {
2756 uint64_t u64New;
2757 u64Old = ASMAtomicUoReadU64(pu64);
2758 u64New = u64Old + u64;
2759 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2760 break;
2761 ASMNopPause();
2762 }
2763 return u64Old;
2764# endif
2765}
2766#endif
2767
2768
2769/**
2770 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2771 *
2772 * @returns The old value.
2773 * @param pi64 Pointer to the value.
2774 * @param i64 Number to add.
2775 *
2776 * @remarks x86: Requires a Pentium or later.
2777 */
2778DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2779{
2780 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
2781}
2782
2783
2784/**
2785 * Atomically exchanges and adds to a size_t value, ordered.
2786 *
2787 * @returns The old value.
2788 * @param pcb Pointer to the size_t value.
2789 * @param cb Number to add.
2790 */
2791DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb)
2792{
2793#if ARCH_BITS == 64
2794 AssertCompileSize(size_t, 8);
2795 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
2796#elif ARCH_BITS == 32
2797 AssertCompileSize(size_t, 4);
2798 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
2799#elif ARCH_BITS == 16
2800 AssertCompileSize(size_t, 2);
2801 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
2802#else
2803# error "Unsupported ARCH_BITS value"
2804#endif
2805}
2806
2807
2808/**
2809 * Atomically exchanges and adds a value which size might differ between
2810 * platforms or compilers, ordered.
2811 *
2812 * @param pu Pointer to the variable to update.
2813 * @param uNew The value to add to *pu.
2814 * @param puOld Where to store the old value.
2815 */
2816#define ASMAtomicAddSize(pu, uNew, puOld) \
2817 do { \
2818 switch (sizeof(*(pu))) { \
2819 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2820 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2821 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2822 } \
2823 } while (0)
2824
2825
2826
2827/**
2828 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2829 *
2830 * @returns The old value.
2831 * @param pu16 Pointer to the value.
2832 * @param u16 Number to subtract.
2833 *
2834 * @remarks x86: Requires a 486 or later.
2835 */
2836DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16)
2837{
2838 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2839}
2840
2841
2842/**
2843 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2844 *
2845 * @returns The old value.
2846 * @param pi16 Pointer to the value.
2847 * @param i16 Number to subtract.
2848 *
2849 * @remarks x86: Requires a 486 or later.
2850 */
2851DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16)
2852{
2853 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
2854}
2855
2856
2857/**
2858 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2859 *
2860 * @returns The old value.
2861 * @param pu32 Pointer to the value.
2862 * @param u32 Number to subtract.
2863 *
2864 * @remarks x86: Requires a 486 or later.
2865 */
2866DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2867{
2868 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2869}
2870
2871
2872/**
2873 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2874 *
2875 * @returns The old value.
2876 * @param pi32 Pointer to the value.
2877 * @param i32 Number to subtract.
2878 *
2879 * @remarks x86: Requires a 486 or later.
2880 */
2881DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2882{
2883 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
2884}
2885
2886
2887/**
2888 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2889 *
2890 * @returns The old value.
2891 * @param pu64 Pointer to the value.
2892 * @param u64 Number to subtract.
2893 *
2894 * @remarks x86: Requires a Pentium or later.
2895 */
2896DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2897{
2898 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2899}
2900
2901
2902/**
2903 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2904 *
2905 * @returns The old value.
2906 * @param pi64 Pointer to the value.
2907 * @param i64 Number to subtract.
2908 *
2909 * @remarks x86: Requires a Pentium or later.
2910 */
2911DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2912{
2913 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
2914}
2915
2916
2917/**
2918 * Atomically exchanges and subtracts to a size_t value, ordered.
2919 *
2920 * @returns The old value.
2921 * @param pcb Pointer to the size_t value.
2922 * @param cb Number to subtract.
2923 *
2924 * @remarks x86: Requires a 486 or later.
2925 */
2926DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb)
2927{
2928#if ARCH_BITS == 64
2929 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
2930#elif ARCH_BITS == 32
2931 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
2932#elif ARCH_BITS == 16
2933 AssertCompileSize(size_t, 2);
2934 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
2935#else
2936# error "Unsupported ARCH_BITS value"
2937#endif
2938}
2939
2940
2941/**
2942 * Atomically exchanges and subtracts a value which size might differ between
2943 * platforms or compilers, ordered.
2944 *
2945 * @param pu Pointer to the variable to update.
2946 * @param uNew The value to subtract to *pu.
2947 * @param puOld Where to store the old value.
2948 *
2949 * @remarks x86: Requires a 486 or later.
2950 */
2951#define ASMAtomicSubSize(pu, uNew, puOld) \
2952 do { \
2953 switch (sizeof(*(pu))) { \
2954 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2955 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2956 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2957 } \
2958 } while (0)
2959
2960
2961
2962/**
2963 * Atomically increment a 16-bit value, ordered.
2964 *
2965 * @returns The new value.
2966 * @param pu16 Pointer to the value to increment.
2967 * @remarks Not implemented. Just to make 16-bit code happy.
2968 *
2969 * @remarks x86: Requires a 486 or later.
2970 */
2971DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16);
2972
2973
2974/**
2975 * Atomically increment a 32-bit value, ordered.
2976 *
2977 * @returns The new value.
2978 * @param pu32 Pointer to the value to increment.
2979 *
2980 * @remarks x86: Requires a 486 or later.
2981 */
2982#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2983DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32);
2984#else
2985DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32)
2986{
2987 uint32_t u32;
2988# if RT_INLINE_ASM_USES_INTRIN
2989 u32 = _InterlockedIncrement((long RT_FAR *)pu32);
2990 return u32;
2991
2992# elif RT_INLINE_ASM_GNU_STYLE
2993 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2994 : "=r" (u32),
2995 "=m" (*pu32)
2996 : "0" (1),
2997 "m" (*pu32)
2998 : "memory");
2999 return u32+1;
3000# else
3001 __asm
3002 {
3003 mov eax, 1
3004# ifdef RT_ARCH_AMD64
3005 mov rdx, [pu32]
3006 lock xadd [rdx], eax
3007# else
3008 mov edx, [pu32]
3009 lock xadd [edx], eax
3010# endif
3011 mov u32, eax
3012 }
3013 return u32+1;
3014# endif
3015}
3016#endif
3017
3018
3019/**
3020 * Atomically increment a signed 32-bit value, ordered.
3021 *
3022 * @returns The new value.
3023 * @param pi32 Pointer to the value to increment.
3024 *
3025 * @remarks x86: Requires a 486 or later.
3026 */
3027DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32)
3028{
3029 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3030}
3031
3032
3033/**
3034 * Atomically increment a 64-bit value, ordered.
3035 *
3036 * @returns The new value.
3037 * @param pu64 Pointer to the value to increment.
3038 *
3039 * @remarks x86: Requires a Pentium or later.
3040 */
3041#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3042DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64);
3043#else
3044DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64)
3045{
3046# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3047 uint64_t u64;
3048 u64 = _InterlockedIncrement64((__int64 RT_FAR *)pu64);
3049 return u64;
3050
3051# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3052 uint64_t u64;
3053 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3054 : "=r" (u64),
3055 "=m" (*pu64)
3056 : "0" (1),
3057 "m" (*pu64)
3058 : "memory");
3059 return u64 + 1;
3060# else
3061 return ASMAtomicAddU64(pu64, 1) + 1;
3062# endif
3063}
3064#endif
3065
3066
3067/**
3068 * Atomically increment a signed 64-bit value, ordered.
3069 *
3070 * @returns The new value.
3071 * @param pi64 Pointer to the value to increment.
3072 *
3073 * @remarks x86: Requires a Pentium or later.
3074 */
3075DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64)
3076{
3077 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3078}
3079
3080
3081/**
3082 * Atomically increment a size_t value, ordered.
3083 *
3084 * @returns The new value.
3085 * @param pcb Pointer to the value to increment.
3086 *
3087 * @remarks x86: Requires a 486 or later.
3088 */
3089DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb)
3090{
3091#if ARCH_BITS == 64
3092 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3093#elif ARCH_BITS == 32
3094 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3095#elif ARCH_BITS == 16
3096 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3097#else
3098# error "Unsupported ARCH_BITS value"
3099#endif
3100}
3101
3102
3103
3104/**
3105 * Atomically decrement an unsigned 32-bit value, ordered.
3106 *
3107 * @returns The new value.
3108 * @param pu16 Pointer to the value to decrement.
3109 * @remarks Not implemented. Just to make 16-bit code happy.
3110 *
3111 * @remarks x86: Requires a 486 or later.
3112 */
3113DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16);
3114
3115
3116/**
3117 * Atomically decrement an unsigned 32-bit value, ordered.
3118 *
3119 * @returns The new value.
3120 * @param pu32 Pointer to the value to decrement.
3121 *
3122 * @remarks x86: Requires a 486 or later.
3123 */
3124#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3125DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32);
3126#else
3127DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32)
3128{
3129 uint32_t u32;
3130# if RT_INLINE_ASM_USES_INTRIN
3131 u32 = _InterlockedDecrement((long RT_FAR *)pu32);
3132 return u32;
3133
3134# elif RT_INLINE_ASM_GNU_STYLE
3135 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3136 : "=r" (u32),
3137 "=m" (*pu32)
3138 : "0" (-1),
3139 "m" (*pu32)
3140 : "memory");
3141 return u32-1;
3142# else
3143 __asm
3144 {
3145 mov eax, -1
3146# ifdef RT_ARCH_AMD64
3147 mov rdx, [pu32]
3148 lock xadd [rdx], eax
3149# else
3150 mov edx, [pu32]
3151 lock xadd [edx], eax
3152# endif
3153 mov u32, eax
3154 }
3155 return u32-1;
3156# endif
3157}
3158#endif
3159
3160
3161/**
3162 * Atomically decrement a signed 32-bit value, ordered.
3163 *
3164 * @returns The new value.
3165 * @param pi32 Pointer to the value to decrement.
3166 *
3167 * @remarks x86: Requires a 486 or later.
3168 */
3169DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32)
3170{
3171 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
3172}
3173
3174
3175/**
3176 * Atomically decrement an unsigned 64-bit value, ordered.
3177 *
3178 * @returns The new value.
3179 * @param pu64 Pointer to the value to decrement.
3180 *
3181 * @remarks x86: Requires a Pentium or later.
3182 */
3183#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3184DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64);
3185#else
3186DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64)
3187{
3188# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3189 uint64_t u64 = _InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
3190 return u64;
3191
3192# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3193 uint64_t u64;
3194 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3195 : "=r" (u64),
3196 "=m" (*pu64)
3197 : "0" (~(uint64_t)0),
3198 "m" (*pu64)
3199 : "memory");
3200 return u64-1;
3201# else
3202 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3203# endif
3204}
3205#endif
3206
3207
3208/**
3209 * Atomically decrement a signed 64-bit value, ordered.
3210 *
3211 * @returns The new value.
3212 * @param pi64 Pointer to the value to decrement.
3213 *
3214 * @remarks x86: Requires a Pentium or later.
3215 */
3216DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64)
3217{
3218 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
3219}
3220
3221
3222/**
3223 * Atomically decrement a size_t value, ordered.
3224 *
3225 * @returns The new value.
3226 * @param pcb Pointer to the value to decrement.
3227 *
3228 * @remarks x86: Requires a 486 or later.
3229 */
3230DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb)
3231{
3232#if ARCH_BITS == 64
3233 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
3234#elif ARCH_BITS == 32
3235 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
3236#elif ARCH_BITS == 16
3237 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
3238#else
3239# error "Unsupported ARCH_BITS value"
3240#endif
3241}
3242
3243
3244/**
3245 * Atomically Or an unsigned 32-bit value, ordered.
3246 *
3247 * @param pu32 Pointer to the pointer variable to OR u32 with.
3248 * @param u32 The value to OR *pu32 with.
3249 *
3250 * @remarks x86: Requires a 386 or later.
3251 */
3252#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3253DECLASM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3254#else
3255DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3256{
3257# if RT_INLINE_ASM_USES_INTRIN
3258 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
3259
3260# elif RT_INLINE_ASM_GNU_STYLE
3261 __asm__ __volatile__("lock; orl %1, %0\n\t"
3262 : "=m" (*pu32)
3263 : "ir" (u32),
3264 "m" (*pu32));
3265# else
3266 __asm
3267 {
3268 mov eax, [u32]
3269# ifdef RT_ARCH_AMD64
3270 mov rdx, [pu32]
3271 lock or [rdx], eax
3272# else
3273 mov edx, [pu32]
3274 lock or [edx], eax
3275# endif
3276 }
3277# endif
3278}
3279#endif
3280
3281
3282/**
3283 * Atomically Or a signed 32-bit value, ordered.
3284 *
3285 * @param pi32 Pointer to the pointer variable to OR u32 with.
3286 * @param i32 The value to OR *pu32 with.
3287 *
3288 * @remarks x86: Requires a 386 or later.
3289 */
3290DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3291{
3292 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3293}
3294
3295
3296/**
3297 * Atomically Or an unsigned 64-bit value, ordered.
3298 *
3299 * @param pu64 Pointer to the pointer variable to OR u64 with.
3300 * @param u64 The value to OR *pu64 with.
3301 *
3302 * @remarks x86: Requires a Pentium or later.
3303 */
3304#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3305DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3306#else
3307DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3308{
3309# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3310 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
3311
3312# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3313 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3314 : "=m" (*pu64)
3315 : "r" (u64),
3316 "m" (*pu64));
3317# else
3318 for (;;)
3319 {
3320 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3321 uint64_t u64New = u64Old | u64;
3322 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3323 break;
3324 ASMNopPause();
3325 }
3326# endif
3327}
3328#endif
3329
3330
3331/**
3332 * Atomically Or a signed 64-bit value, ordered.
3333 *
3334 * @param pi64 Pointer to the pointer variable to OR u64 with.
3335 * @param i64 The value to OR *pu64 with.
3336 *
3337 * @remarks x86: Requires a Pentium or later.
3338 */
3339DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3340{
3341 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3342}
3343
3344
3345/**
3346 * Atomically And an unsigned 32-bit value, ordered.
3347 *
3348 * @param pu32 Pointer to the pointer variable to AND u32 with.
3349 * @param u32 The value to AND *pu32 with.
3350 *
3351 * @remarks x86: Requires a 386 or later.
3352 */
3353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3354DECLASM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3355#else
3356DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3357{
3358# if RT_INLINE_ASM_USES_INTRIN
3359 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
3360
3361# elif RT_INLINE_ASM_GNU_STYLE
3362 __asm__ __volatile__("lock; andl %1, %0\n\t"
3363 : "=m" (*pu32)
3364 : "ir" (u32),
3365 "m" (*pu32));
3366# else
3367 __asm
3368 {
3369 mov eax, [u32]
3370# ifdef RT_ARCH_AMD64
3371 mov rdx, [pu32]
3372 lock and [rdx], eax
3373# else
3374 mov edx, [pu32]
3375 lock and [edx], eax
3376# endif
3377 }
3378# endif
3379}
3380#endif
3381
3382
3383/**
3384 * Atomically And a signed 32-bit value, ordered.
3385 *
3386 * @param pi32 Pointer to the pointer variable to AND i32 with.
3387 * @param i32 The value to AND *pi32 with.
3388 *
3389 * @remarks x86: Requires a 386 or later.
3390 */
3391DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3392{
3393 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3394}
3395
3396
3397/**
3398 * Atomically And an unsigned 64-bit value, ordered.
3399 *
3400 * @param pu64 Pointer to the pointer variable to AND u64 with.
3401 * @param u64 The value to AND *pu64 with.
3402 *
3403 * @remarks x86: Requires a Pentium or later.
3404 */
3405#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3406DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3407#else
3408DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3409{
3410# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3411 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
3412
3413# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3414 __asm__ __volatile__("lock; andq %1, %0\n\t"
3415 : "=m" (*pu64)
3416 : "r" (u64),
3417 "m" (*pu64));
3418# else
3419 for (;;)
3420 {
3421 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3422 uint64_t u64New = u64Old & u64;
3423 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3424 break;
3425 ASMNopPause();
3426 }
3427# endif
3428}
3429#endif
3430
3431
3432/**
3433 * Atomically And a signed 64-bit value, ordered.
3434 *
3435 * @param pi64 Pointer to the pointer variable to AND i64 with.
3436 * @param i64 The value to AND *pi64 with.
3437 *
3438 * @remarks x86: Requires a Pentium or later.
3439 */
3440DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3441{
3442 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3443}
3444
3445
3446/**
3447 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3448 *
3449 * @param pu32 Pointer to the pointer variable to OR u32 with.
3450 * @param u32 The value to OR *pu32 with.
3451 *
3452 * @remarks x86: Requires a 386 or later.
3453 */
3454#if RT_INLINE_ASM_EXTERNAL
3455DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3456#else
3457DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3458{
3459# if RT_INLINE_ASM_GNU_STYLE
3460 __asm__ __volatile__("orl %1, %0\n\t"
3461 : "=m" (*pu32)
3462 : "ir" (u32),
3463 "m" (*pu32));
3464# else
3465 __asm
3466 {
3467 mov eax, [u32]
3468# ifdef RT_ARCH_AMD64
3469 mov rdx, [pu32]
3470 or [rdx], eax
3471# else
3472 mov edx, [pu32]
3473 or [edx], eax
3474# endif
3475 }
3476# endif
3477}
3478#endif
3479
3480
3481/**
3482 * Atomically OR a signed 32-bit value, unordered.
3483 *
3484 * @param pi32 Pointer to the pointer variable to OR u32 with.
3485 * @param i32 The value to OR *pu32 with.
3486 *
3487 * @remarks x86: Requires a 386 or later.
3488 */
3489DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3490{
3491 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3492}
3493
3494
3495/**
3496 * Atomically OR an unsigned 64-bit value, unordered.
3497 *
3498 * @param pu64 Pointer to the pointer variable to OR u64 with.
3499 * @param u64 The value to OR *pu64 with.
3500 *
3501 * @remarks x86: Requires a Pentium or later.
3502 */
3503#if RT_INLINE_ASM_EXTERNAL
3504DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3505#else
3506DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3507{
3508# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3509 __asm__ __volatile__("orq %1, %q0\n\t"
3510 : "=m" (*pu64)
3511 : "r" (u64),
3512 "m" (*pu64));
3513# else
3514 for (;;)
3515 {
3516 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3517 uint64_t u64New = u64Old | u64;
3518 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3519 break;
3520 ASMNopPause();
3521 }
3522# endif
3523}
3524#endif
3525
3526
3527/**
3528 * Atomically Or a signed 64-bit value, unordered.
3529 *
3530 * @param pi64 Pointer to the pointer variable to OR u64 with.
3531 * @param i64 The value to OR *pu64 with.
3532 *
3533 * @remarks x86: Requires a Pentium or later.
3534 */
3535DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3536{
3537 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3538}
3539
3540
3541/**
3542 * Atomically And an unsigned 32-bit value, unordered.
3543 *
3544 * @param pu32 Pointer to the pointer variable to AND u32 with.
3545 * @param u32 The value to AND *pu32 with.
3546 *
3547 * @remarks x86: Requires a 386 or later.
3548 */
3549#if RT_INLINE_ASM_EXTERNAL
3550DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3551#else
3552DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3553{
3554# if RT_INLINE_ASM_GNU_STYLE
3555 __asm__ __volatile__("andl %1, %0\n\t"
3556 : "=m" (*pu32)
3557 : "ir" (u32),
3558 "m" (*pu32));
3559# else
3560 __asm
3561 {
3562 mov eax, [u32]
3563# ifdef RT_ARCH_AMD64
3564 mov rdx, [pu32]
3565 and [rdx], eax
3566# else
3567 mov edx, [pu32]
3568 and [edx], eax
3569# endif
3570 }
3571# endif
3572}
3573#endif
3574
3575
3576/**
3577 * Atomically And a signed 32-bit value, unordered.
3578 *
3579 * @param pi32 Pointer to the pointer variable to AND i32 with.
3580 * @param i32 The value to AND *pi32 with.
3581 *
3582 * @remarks x86: Requires a 386 or later.
3583 */
3584DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3585{
3586 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3587}
3588
3589
3590/**
3591 * Atomically And an unsigned 64-bit value, unordered.
3592 *
3593 * @param pu64 Pointer to the pointer variable to AND u64 with.
3594 * @param u64 The value to AND *pu64 with.
3595 *
3596 * @remarks x86: Requires a Pentium or later.
3597 */
3598#if RT_INLINE_ASM_EXTERNAL
3599DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3600#else
3601DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3602{
3603# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3604 __asm__ __volatile__("andq %1, %0\n\t"
3605 : "=m" (*pu64)
3606 : "r" (u64),
3607 "m" (*pu64));
3608# else
3609 for (;;)
3610 {
3611 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3612 uint64_t u64New = u64Old & u64;
3613 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3614 break;
3615 ASMNopPause();
3616 }
3617# endif
3618}
3619#endif
3620
3621
3622/**
3623 * Atomically And a signed 64-bit value, unordered.
3624 *
3625 * @param pi64 Pointer to the pointer variable to AND i64 with.
3626 * @param i64 The value to AND *pi64 with.
3627 *
3628 * @remarks x86: Requires a Pentium or later.
3629 */
3630DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3631{
3632 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3633}
3634
3635
3636/**
3637 * Atomically increment an unsigned 32-bit value, unordered.
3638 *
3639 * @returns the new value.
3640 * @param pu32 Pointer to the variable to increment.
3641 *
3642 * @remarks x86: Requires a 486 or later.
3643 */
3644#if RT_INLINE_ASM_EXTERNAL
3645DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32);
3646#else
3647DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32)
3648{
3649 uint32_t u32;
3650# if RT_INLINE_ASM_GNU_STYLE
3651 __asm__ __volatile__("xaddl %0, %1\n\t"
3652 : "=r" (u32),
3653 "=m" (*pu32)
3654 : "0" (1),
3655 "m" (*pu32)
3656 : "memory");
3657 return u32 + 1;
3658# else
3659 __asm
3660 {
3661 mov eax, 1
3662# ifdef RT_ARCH_AMD64
3663 mov rdx, [pu32]
3664 xadd [rdx], eax
3665# else
3666 mov edx, [pu32]
3667 xadd [edx], eax
3668# endif
3669 mov u32, eax
3670 }
3671 return u32 + 1;
3672# endif
3673}
3674#endif
3675
3676
3677/**
3678 * Atomically decrement an unsigned 32-bit value, unordered.
3679 *
3680 * @returns the new value.
3681 * @param pu32 Pointer to the variable to decrement.
3682 *
3683 * @remarks x86: Requires a 486 or later.
3684 */
3685#if RT_INLINE_ASM_EXTERNAL
3686DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32);
3687#else
3688DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32)
3689{
3690 uint32_t u32;
3691# if RT_INLINE_ASM_GNU_STYLE
3692 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3693 : "=r" (u32),
3694 "=m" (*pu32)
3695 : "0" (-1),
3696 "m" (*pu32)
3697 : "memory");
3698 return u32 - 1;
3699# else
3700 __asm
3701 {
3702 mov eax, -1
3703# ifdef RT_ARCH_AMD64
3704 mov rdx, [pu32]
3705 xadd [rdx], eax
3706# else
3707 mov edx, [pu32]
3708 xadd [edx], eax
3709# endif
3710 mov u32, eax
3711 }
3712 return u32 - 1;
3713# endif
3714}
3715#endif
3716
3717
3718/** @def RT_ASM_PAGE_SIZE
3719 * We try avoid dragging in iprt/param.h here.
3720 * @internal
3721 */
3722#if defined(RT_ARCH_SPARC64)
3723# define RT_ASM_PAGE_SIZE 0x2000
3724# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3725# if PAGE_SIZE != 0x2000
3726# error "PAGE_SIZE is not 0x2000!"
3727# endif
3728# endif
3729#else
3730# define RT_ASM_PAGE_SIZE 0x1000
3731# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3732# if PAGE_SIZE != 0x1000
3733# error "PAGE_SIZE is not 0x1000!"
3734# endif
3735# endif
3736#endif
3737
3738/**
3739 * Zeros a 4K memory page.
3740 *
3741 * @param pv Pointer to the memory block. This must be page aligned.
3742 */
3743#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3744DECLASM(void) ASMMemZeroPage(volatile void RT_FAR *pv);
3745# else
3746DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv)
3747{
3748# if RT_INLINE_ASM_USES_INTRIN
3749# ifdef RT_ARCH_AMD64
3750 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3751# else
3752 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3753# endif
3754
3755# elif RT_INLINE_ASM_GNU_STYLE
3756 RTCCUINTREG uDummy;
3757# ifdef RT_ARCH_AMD64
3758 __asm__ __volatile__("rep stosq"
3759 : "=D" (pv),
3760 "=c" (uDummy)
3761 : "0" (pv),
3762 "c" (RT_ASM_PAGE_SIZE >> 3),
3763 "a" (0)
3764 : "memory");
3765# else
3766 __asm__ __volatile__("rep stosl"
3767 : "=D" (pv),
3768 "=c" (uDummy)
3769 : "0" (pv),
3770 "c" (RT_ASM_PAGE_SIZE >> 2),
3771 "a" (0)
3772 : "memory");
3773# endif
3774# else
3775 __asm
3776 {
3777# ifdef RT_ARCH_AMD64
3778 xor rax, rax
3779 mov ecx, 0200h
3780 mov rdi, [pv]
3781 rep stosq
3782# else
3783 xor eax, eax
3784 mov ecx, 0400h
3785 mov edi, [pv]
3786 rep stosd
3787# endif
3788 }
3789# endif
3790}
3791# endif
3792
3793
3794/**
3795 * Zeros a memory block with a 32-bit aligned size.
3796 *
3797 * @param pv Pointer to the memory block.
3798 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3799 */
3800#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3801DECLASM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb);
3802#else
3803DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb)
3804{
3805# if RT_INLINE_ASM_USES_INTRIN
3806# ifdef RT_ARCH_AMD64
3807 if (!(cb & 7))
3808 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
3809 else
3810# endif
3811 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
3812
3813# elif RT_INLINE_ASM_GNU_STYLE
3814 __asm__ __volatile__("rep stosl"
3815 : "=D" (pv),
3816 "=c" (cb)
3817 : "0" (pv),
3818 "1" (cb >> 2),
3819 "a" (0)
3820 : "memory");
3821# else
3822 __asm
3823 {
3824 xor eax, eax
3825# ifdef RT_ARCH_AMD64
3826 mov rcx, [cb]
3827 shr rcx, 2
3828 mov rdi, [pv]
3829# else
3830 mov ecx, [cb]
3831 shr ecx, 2
3832 mov edi, [pv]
3833# endif
3834 rep stosd
3835 }
3836# endif
3837}
3838#endif
3839
3840
3841/**
3842 * Fills a memory block with a 32-bit aligned size.
3843 *
3844 * @param pv Pointer to the memory block.
3845 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3846 * @param u32 The value to fill with.
3847 */
3848#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3849DECLASM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32);
3850#else
3851DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32)
3852{
3853# if RT_INLINE_ASM_USES_INTRIN
3854# ifdef RT_ARCH_AMD64
3855 if (!(cb & 7))
3856 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3857 else
3858# endif
3859 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
3860
3861# elif RT_INLINE_ASM_GNU_STYLE
3862 __asm__ __volatile__("rep stosl"
3863 : "=D" (pv),
3864 "=c" (cb)
3865 : "0" (pv),
3866 "1" (cb >> 2),
3867 "a" (u32)
3868 : "memory");
3869# else
3870 __asm
3871 {
3872# ifdef RT_ARCH_AMD64
3873 mov rcx, [cb]
3874 shr rcx, 2
3875 mov rdi, [pv]
3876# else
3877 mov ecx, [cb]
3878 shr ecx, 2
3879 mov edi, [pv]
3880# endif
3881 mov eax, [u32]
3882 rep stosd
3883 }
3884# endif
3885}
3886#endif
3887
3888
3889/**
3890 * Checks if a memory block is all zeros.
3891 *
3892 * @returns Pointer to the first non-zero byte.
3893 * @returns NULL if all zero.
3894 *
3895 * @param pv Pointer to the memory block.
3896 * @param cb Number of bytes in the block.
3897 *
3898 * @todo Fix name, it is a predicate function but it's not returning boolean!
3899 */
3900#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
3901DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb);
3902#else
3903DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb)
3904{
3905 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
3906 for (; cb; cb--, pb++)
3907 if (RT_LIKELY(*pb == 0))
3908 { /* likely */ }
3909 else
3910 return (void RT_FAR *)pb;
3911 return NULL;
3912}
3913#endif
3914
3915
3916/**
3917 * Checks if a memory block is all zeros.
3918 *
3919 * @returns true if zero, false if not.
3920 *
3921 * @param pv Pointer to the memory block.
3922 * @param cb Number of bytes in the block.
3923 *
3924 * @sa ASMMemFirstNonZero
3925 */
3926DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb)
3927{
3928 return ASMMemFirstNonZero(pv, cb) == NULL;
3929}
3930
3931
3932/**
3933 * Checks if a memory page is all zeros.
3934 *
3935 * @returns true / false.
3936 *
3937 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3938 * boundary
3939 */
3940DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage)
3941{
3942# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3943 union { RTCCUINTREG r; bool f; } uAX;
3944 RTCCUINTREG xCX, xDI;
3945 Assert(!((uintptr_t)pvPage & 15));
3946 __asm__ __volatile__("repe; "
3947# ifdef RT_ARCH_AMD64
3948 "scasq\n\t"
3949# else
3950 "scasl\n\t"
3951# endif
3952 "setnc %%al\n\t"
3953 : "=&c" (xCX),
3954 "=&D" (xDI),
3955 "=&a" (uAX.r)
3956 : "mr" (pvPage),
3957# ifdef RT_ARCH_AMD64
3958 "0" (RT_ASM_PAGE_SIZE/8),
3959# else
3960 "0" (RT_ASM_PAGE_SIZE/4),
3961# endif
3962 "1" (pvPage),
3963 "2" (0));
3964 return uAX.f;
3965# else
3966 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
3967 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3968 Assert(!((uintptr_t)pvPage & 15));
3969 for (;;)
3970 {
3971 if (puPtr[0]) return false;
3972 if (puPtr[4]) return false;
3973
3974 if (puPtr[2]) return false;
3975 if (puPtr[6]) return false;
3976
3977 if (puPtr[1]) return false;
3978 if (puPtr[5]) return false;
3979
3980 if (puPtr[3]) return false;
3981 if (puPtr[7]) return false;
3982
3983 if (!--cLeft)
3984 return true;
3985 puPtr += 8;
3986 }
3987# endif
3988}
3989
3990
3991/**
3992 * Checks if a memory block is filled with the specified byte, returning the
3993 * first mismatch.
3994 *
3995 * This is sort of an inverted memchr.
3996 *
3997 * @returns Pointer to the byte which doesn't equal u8.
3998 * @returns NULL if all equal to u8.
3999 *
4000 * @param pv Pointer to the memory block.
4001 * @param cb Number of bytes in the block.
4002 * @param u8 The value it's supposed to be filled with.
4003 *
4004 * @remarks No alignment requirements.
4005 */
4006#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
4007 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
4008DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8);
4009#else
4010DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4011{
4012 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4013 for (; cb; cb--, pb++)
4014 if (RT_LIKELY(*pb == u8))
4015 { /* likely */ }
4016 else
4017 return (void *)pb;
4018 return NULL;
4019}
4020#endif
4021
4022
4023/**
4024 * Checks if a memory block is filled with the specified byte.
4025 *
4026 * @returns true if all matching, false if not.
4027 *
4028 * @param pv Pointer to the memory block.
4029 * @param cb Number of bytes in the block.
4030 * @param u8 The value it's supposed to be filled with.
4031 *
4032 * @remarks No alignment requirements.
4033 */
4034DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4035{
4036 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4037}
4038
4039
4040/**
4041 * Checks if a memory block is filled with the specified 32-bit value.
4042 *
4043 * This is a sort of inverted memchr.
4044 *
4045 * @returns Pointer to the first value which doesn't equal u32.
4046 * @returns NULL if all equal to u32.
4047 *
4048 * @param pv Pointer to the memory block.
4049 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4050 * @param u32 The value it's supposed to be filled with.
4051 */
4052DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32)
4053{
4054/** @todo rewrite this in inline assembly? */
4055 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
4056 for (; cb; cb -= 4, pu32++)
4057 if (RT_LIKELY(*pu32 == u32))
4058 { /* likely */ }
4059 else
4060 return (uint32_t RT_FAR *)pu32;
4061 return NULL;
4062}
4063
4064
4065/**
4066 * Probes a byte pointer for read access.
4067 *
4068 * While the function will not fault if the byte is not read accessible,
4069 * the idea is to do this in a safe place like before acquiring locks
4070 * and such like.
4071 *
4072 * Also, this functions guarantees that an eager compiler is not going
4073 * to optimize the probing away.
4074 *
4075 * @param pvByte Pointer to the byte.
4076 */
4077#if RT_INLINE_ASM_EXTERNAL
4078DECLASM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte);
4079#else
4080DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte)
4081{
4082 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4083 uint8_t u8;
4084# if RT_INLINE_ASM_GNU_STYLE
4085 __asm__ __volatile__("movb (%1), %0\n\t"
4086 : "=r" (u8)
4087 : "r" (pvByte));
4088# else
4089 __asm
4090 {
4091# ifdef RT_ARCH_AMD64
4092 mov rax, [pvByte]
4093 mov al, [rax]
4094# else
4095 mov eax, [pvByte]
4096 mov al, [eax]
4097# endif
4098 mov [u8], al
4099 }
4100# endif
4101 return u8;
4102}
4103#endif
4104
4105/**
4106 * Probes a buffer for read access page by page.
4107 *
4108 * While the function will fault if the buffer is not fully read
4109 * accessible, the idea is to do this in a safe place like before
4110 * acquiring locks and such like.
4111 *
4112 * Also, this functions guarantees that an eager compiler is not going
4113 * to optimize the probing away.
4114 *
4115 * @param pvBuf Pointer to the buffer.
4116 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4117 */
4118DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf)
4119{
4120 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4121 /* the first byte */
4122 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
4123 ASMProbeReadByte(pu8);
4124
4125 /* the pages in between pages. */
4126 while (cbBuf > RT_ASM_PAGE_SIZE)
4127 {
4128 ASMProbeReadByte(pu8);
4129 cbBuf -= RT_ASM_PAGE_SIZE;
4130 pu8 += RT_ASM_PAGE_SIZE;
4131 }
4132
4133 /* the last byte */
4134 ASMProbeReadByte(pu8 + cbBuf - 1);
4135}
4136
4137
4138
4139/** @defgroup grp_inline_bits Bit Operations
4140 * @{
4141 */
4142
4143
4144/**
4145 * Sets a bit in a bitmap.
4146 *
4147 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4148 * @param iBit The bit to set.
4149 *
4150 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4151 * However, doing so will yield better performance as well as avoiding
4152 * traps accessing the last bits in the bitmap.
4153 */
4154#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4155DECLASM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4156#else
4157DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4158{
4159# if RT_INLINE_ASM_USES_INTRIN
4160 _bittestandset((long RT_FAR *)pvBitmap, iBit);
4161
4162# elif RT_INLINE_ASM_GNU_STYLE
4163 __asm__ __volatile__("btsl %1, %0"
4164 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4165 : "Ir" (iBit),
4166 "m" (*(volatile long RT_FAR *)pvBitmap)
4167 : "memory");
4168# else
4169 __asm
4170 {
4171# ifdef RT_ARCH_AMD64
4172 mov rax, [pvBitmap]
4173 mov edx, [iBit]
4174 bts [rax], edx
4175# else
4176 mov eax, [pvBitmap]
4177 mov edx, [iBit]
4178 bts [eax], edx
4179# endif
4180 }
4181# endif
4182}
4183#endif
4184
4185
4186/**
4187 * Atomically sets a bit in a bitmap, ordered.
4188 *
4189 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4190 * the memory access isn't atomic!
4191 * @param iBit The bit to set.
4192 *
4193 * @remarks x86: Requires a 386 or later.
4194 */
4195#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4196DECLASM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4197#else
4198DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4199{
4200 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4201# if RT_INLINE_ASM_USES_INTRIN
4202 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4203# elif RT_INLINE_ASM_GNU_STYLE
4204 __asm__ __volatile__("lock; btsl %1, %0"
4205 : "=m" (*(volatile long *)pvBitmap)
4206 : "Ir" (iBit),
4207 "m" (*(volatile long *)pvBitmap)
4208 : "memory");
4209# else
4210 __asm
4211 {
4212# ifdef RT_ARCH_AMD64
4213 mov rax, [pvBitmap]
4214 mov edx, [iBit]
4215 lock bts [rax], edx
4216# else
4217 mov eax, [pvBitmap]
4218 mov edx, [iBit]
4219 lock bts [eax], edx
4220# endif
4221 }
4222# endif
4223}
4224#endif
4225
4226
4227/**
4228 * Clears a bit in a bitmap.
4229 *
4230 * @param pvBitmap Pointer to the bitmap.
4231 * @param iBit The bit to clear.
4232 *
4233 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4234 * However, doing so will yield better performance as well as avoiding
4235 * traps accessing the last bits in the bitmap.
4236 */
4237#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4238DECLASM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4239#else
4240DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4241{
4242# if RT_INLINE_ASM_USES_INTRIN
4243 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4244
4245# elif RT_INLINE_ASM_GNU_STYLE
4246 __asm__ __volatile__("btrl %1, %0"
4247 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4248 : "Ir" (iBit),
4249 "m" (*(volatile long RT_FAR *)pvBitmap)
4250 : "memory");
4251# else
4252 __asm
4253 {
4254# ifdef RT_ARCH_AMD64
4255 mov rax, [pvBitmap]
4256 mov edx, [iBit]
4257 btr [rax], edx
4258# else
4259 mov eax, [pvBitmap]
4260 mov edx, [iBit]
4261 btr [eax], edx
4262# endif
4263 }
4264# endif
4265}
4266#endif
4267
4268
4269/**
4270 * Atomically clears a bit in a bitmap, ordered.
4271 *
4272 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4273 * the memory access isn't atomic!
4274 * @param iBit The bit to toggle set.
4275 *
4276 * @remarks No memory barrier, take care on smp.
4277 * @remarks x86: Requires a 386 or later.
4278 */
4279#if RT_INLINE_ASM_EXTERNAL
4280DECLASM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4281#else
4282DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4283{
4284 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4285# if RT_INLINE_ASM_GNU_STYLE
4286 __asm__ __volatile__("lock; btrl %1, %0"
4287 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4288 : "Ir" (iBit),
4289 "m" (*(volatile long RT_FAR *)pvBitmap)
4290 : "memory");
4291# else
4292 __asm
4293 {
4294# ifdef RT_ARCH_AMD64
4295 mov rax, [pvBitmap]
4296 mov edx, [iBit]
4297 lock btr [rax], edx
4298# else
4299 mov eax, [pvBitmap]
4300 mov edx, [iBit]
4301 lock btr [eax], edx
4302# endif
4303 }
4304# endif
4305}
4306#endif
4307
4308
4309/**
4310 * Toggles a bit in a bitmap.
4311 *
4312 * @param pvBitmap Pointer to the bitmap.
4313 * @param iBit The bit to toggle.
4314 *
4315 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4316 * However, doing so will yield better performance as well as avoiding
4317 * traps accessing the last bits in the bitmap.
4318 */
4319#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4320DECLASM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4321#else
4322DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4323{
4324# if RT_INLINE_ASM_USES_INTRIN
4325 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4326# elif RT_INLINE_ASM_GNU_STYLE
4327 __asm__ __volatile__("btcl %1, %0"
4328 : "=m" (*(volatile long *)pvBitmap)
4329 : "Ir" (iBit),
4330 "m" (*(volatile long *)pvBitmap)
4331 : "memory");
4332# else
4333 __asm
4334 {
4335# ifdef RT_ARCH_AMD64
4336 mov rax, [pvBitmap]
4337 mov edx, [iBit]
4338 btc [rax], edx
4339# else
4340 mov eax, [pvBitmap]
4341 mov edx, [iBit]
4342 btc [eax], edx
4343# endif
4344 }
4345# endif
4346}
4347#endif
4348
4349
4350/**
4351 * Atomically toggles a bit in a bitmap, ordered.
4352 *
4353 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4354 * the memory access isn't atomic!
4355 * @param iBit The bit to test and set.
4356 *
4357 * @remarks x86: Requires a 386 or later.
4358 */
4359#if RT_INLINE_ASM_EXTERNAL
4360DECLASM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4361#else
4362DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4363{
4364 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4365# if RT_INLINE_ASM_GNU_STYLE
4366 __asm__ __volatile__("lock; btcl %1, %0"
4367 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4368 : "Ir" (iBit),
4369 "m" (*(volatile long RT_FAR *)pvBitmap)
4370 : "memory");
4371# else
4372 __asm
4373 {
4374# ifdef RT_ARCH_AMD64
4375 mov rax, [pvBitmap]
4376 mov edx, [iBit]
4377 lock btc [rax], edx
4378# else
4379 mov eax, [pvBitmap]
4380 mov edx, [iBit]
4381 lock btc [eax], edx
4382# endif
4383 }
4384# endif
4385}
4386#endif
4387
4388
4389/**
4390 * Tests and sets a bit in a bitmap.
4391 *
4392 * @returns true if the bit was set.
4393 * @returns false if the bit was clear.
4394 *
4395 * @param pvBitmap Pointer to the bitmap.
4396 * @param iBit The bit to test and set.
4397 *
4398 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4399 * However, doing so will yield better performance as well as avoiding
4400 * traps accessing the last bits in the bitmap.
4401 */
4402#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4403DECLASM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4404#else
4405DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4406{
4407 union { bool f; uint32_t u32; uint8_t u8; } rc;
4408# if RT_INLINE_ASM_USES_INTRIN
4409 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
4410
4411# elif RT_INLINE_ASM_GNU_STYLE
4412 __asm__ __volatile__("btsl %2, %1\n\t"
4413 "setc %b0\n\t"
4414 "andl $1, %0\n\t"
4415 : "=q" (rc.u32),
4416 "=m" (*(volatile long RT_FAR *)pvBitmap)
4417 : "Ir" (iBit),
4418 "m" (*(volatile long RT_FAR *)pvBitmap)
4419 : "memory");
4420# else
4421 __asm
4422 {
4423 mov edx, [iBit]
4424# ifdef RT_ARCH_AMD64
4425 mov rax, [pvBitmap]
4426 bts [rax], edx
4427# else
4428 mov eax, [pvBitmap]
4429 bts [eax], edx
4430# endif
4431 setc al
4432 and eax, 1
4433 mov [rc.u32], eax
4434 }
4435# endif
4436 return rc.f;
4437}
4438#endif
4439
4440
4441/**
4442 * Atomically tests and sets a bit in a bitmap, ordered.
4443 *
4444 * @returns true if the bit was set.
4445 * @returns false if the bit was clear.
4446 *
4447 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4448 * the memory access isn't atomic!
4449 * @param iBit The bit to set.
4450 *
4451 * @remarks x86: Requires a 386 or later.
4452 */
4453#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4454DECLASM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4455#else
4456DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4457{
4458 union { bool f; uint32_t u32; uint8_t u8; } rc;
4459 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4460# if RT_INLINE_ASM_USES_INTRIN
4461 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4462# elif RT_INLINE_ASM_GNU_STYLE
4463 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4464 "setc %b0\n\t"
4465 "andl $1, %0\n\t"
4466 : "=q" (rc.u32),
4467 "=m" (*(volatile long RT_FAR *)pvBitmap)
4468 : "Ir" (iBit),
4469 "m" (*(volatile long RT_FAR *)pvBitmap)
4470 : "memory");
4471# else
4472 __asm
4473 {
4474 mov edx, [iBit]
4475# ifdef RT_ARCH_AMD64
4476 mov rax, [pvBitmap]
4477 lock bts [rax], edx
4478# else
4479 mov eax, [pvBitmap]
4480 lock bts [eax], edx
4481# endif
4482 setc al
4483 and eax, 1
4484 mov [rc.u32], eax
4485 }
4486# endif
4487 return rc.f;
4488}
4489#endif
4490
4491
4492/**
4493 * Tests and clears a bit in a bitmap.
4494 *
4495 * @returns true if the bit was set.
4496 * @returns false if the bit was clear.
4497 *
4498 * @param pvBitmap Pointer to the bitmap.
4499 * @param iBit The bit to test and clear.
4500 *
4501 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4502 * However, doing so will yield better performance as well as avoiding
4503 * traps accessing the last bits in the bitmap.
4504 */
4505#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4506DECLASM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4507#else
4508DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4509{
4510 union { bool f; uint32_t u32; uint8_t u8; } rc;
4511# if RT_INLINE_ASM_USES_INTRIN
4512 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4513
4514# elif RT_INLINE_ASM_GNU_STYLE
4515 __asm__ __volatile__("btrl %2, %1\n\t"
4516 "setc %b0\n\t"
4517 "andl $1, %0\n\t"
4518 : "=q" (rc.u32),
4519 "=m" (*(volatile long RT_FAR *)pvBitmap)
4520 : "Ir" (iBit),
4521 "m" (*(volatile long RT_FAR *)pvBitmap)
4522 : "memory");
4523# else
4524 __asm
4525 {
4526 mov edx, [iBit]
4527# ifdef RT_ARCH_AMD64
4528 mov rax, [pvBitmap]
4529 btr [rax], edx
4530# else
4531 mov eax, [pvBitmap]
4532 btr [eax], edx
4533# endif
4534 setc al
4535 and eax, 1
4536 mov [rc.u32], eax
4537 }
4538# endif
4539 return rc.f;
4540}
4541#endif
4542
4543
4544/**
4545 * Atomically tests and clears a bit in a bitmap, ordered.
4546 *
4547 * @returns true if the bit was set.
4548 * @returns false if the bit was clear.
4549 *
4550 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4551 * the memory access isn't atomic!
4552 * @param iBit The bit to test and clear.
4553 *
4554 * @remarks No memory barrier, take care on smp.
4555 * @remarks x86: Requires a 386 or later.
4556 */
4557#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4558DECLASM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4559#else
4560DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4561{
4562 union { bool f; uint32_t u32; uint8_t u8; } rc;
4563 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4564# if RT_INLINE_ASM_USES_INTRIN
4565 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
4566
4567# elif RT_INLINE_ASM_GNU_STYLE
4568 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4569 "setc %b0\n\t"
4570 "andl $1, %0\n\t"
4571 : "=q" (rc.u32),
4572 "=m" (*(volatile long RT_FAR *)pvBitmap)
4573 : "Ir" (iBit),
4574 "m" (*(volatile long RT_FAR *)pvBitmap)
4575 : "memory");
4576# else
4577 __asm
4578 {
4579 mov edx, [iBit]
4580# ifdef RT_ARCH_AMD64
4581 mov rax, [pvBitmap]
4582 lock btr [rax], edx
4583# else
4584 mov eax, [pvBitmap]
4585 lock btr [eax], edx
4586# endif
4587 setc al
4588 and eax, 1
4589 mov [rc.u32], eax
4590 }
4591# endif
4592 return rc.f;
4593}
4594#endif
4595
4596
4597/**
4598 * Tests and toggles a bit in a bitmap.
4599 *
4600 * @returns true if the bit was set.
4601 * @returns false if the bit was clear.
4602 *
4603 * @param pvBitmap Pointer to the bitmap.
4604 * @param iBit The bit to test and toggle.
4605 *
4606 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4607 * However, doing so will yield better performance as well as avoiding
4608 * traps accessing the last bits in the bitmap.
4609 */
4610#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4611DECLASM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4612#else
4613DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4614{
4615 union { bool f; uint32_t u32; uint8_t u8; } rc;
4616# if RT_INLINE_ASM_USES_INTRIN
4617 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4618
4619# elif RT_INLINE_ASM_GNU_STYLE
4620 __asm__ __volatile__("btcl %2, %1\n\t"
4621 "setc %b0\n\t"
4622 "andl $1, %0\n\t"
4623 : "=q" (rc.u32),
4624 "=m" (*(volatile long RT_FAR *)pvBitmap)
4625 : "Ir" (iBit),
4626 "m" (*(volatile long RT_FAR *)pvBitmap)
4627 : "memory");
4628# else
4629 __asm
4630 {
4631 mov edx, [iBit]
4632# ifdef RT_ARCH_AMD64
4633 mov rax, [pvBitmap]
4634 btc [rax], edx
4635# else
4636 mov eax, [pvBitmap]
4637 btc [eax], edx
4638# endif
4639 setc al
4640 and eax, 1
4641 mov [rc.u32], eax
4642 }
4643# endif
4644 return rc.f;
4645}
4646#endif
4647
4648
4649/**
4650 * Atomically tests and toggles a bit in a bitmap, ordered.
4651 *
4652 * @returns true if the bit was set.
4653 * @returns false if the bit was clear.
4654 *
4655 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4656 * the memory access isn't atomic!
4657 * @param iBit The bit to test and toggle.
4658 *
4659 * @remarks x86: Requires a 386 or later.
4660 */
4661#if RT_INLINE_ASM_EXTERNAL
4662DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4663#else
4664DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4665{
4666 union { bool f; uint32_t u32; uint8_t u8; } rc;
4667 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4668# if RT_INLINE_ASM_GNU_STYLE
4669 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4670 "setc %b0\n\t"
4671 "andl $1, %0\n\t"
4672 : "=q" (rc.u32),
4673 "=m" (*(volatile long RT_FAR *)pvBitmap)
4674 : "Ir" (iBit),
4675 "m" (*(volatile long RT_FAR *)pvBitmap)
4676 : "memory");
4677# else
4678 __asm
4679 {
4680 mov edx, [iBit]
4681# ifdef RT_ARCH_AMD64
4682 mov rax, [pvBitmap]
4683 lock btc [rax], edx
4684# else
4685 mov eax, [pvBitmap]
4686 lock btc [eax], edx
4687# endif
4688 setc al
4689 and eax, 1
4690 mov [rc.u32], eax
4691 }
4692# endif
4693 return rc.f;
4694}
4695#endif
4696
4697
4698/**
4699 * Tests if a bit in a bitmap is set.
4700 *
4701 * @returns true if the bit is set.
4702 * @returns false if the bit is clear.
4703 *
4704 * @param pvBitmap Pointer to the bitmap.
4705 * @param iBit The bit to test.
4706 *
4707 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4708 * However, doing so will yield better performance as well as avoiding
4709 * traps accessing the last bits in the bitmap.
4710 */
4711#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4712DECLASM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit);
4713#else
4714DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit)
4715{
4716 union { bool f; uint32_t u32; uint8_t u8; } rc;
4717# if RT_INLINE_ASM_USES_INTRIN
4718 rc.u32 = _bittest((long *)pvBitmap, iBit);
4719# elif RT_INLINE_ASM_GNU_STYLE
4720
4721 __asm__ __volatile__("btl %2, %1\n\t"
4722 "setc %b0\n\t"
4723 "andl $1, %0\n\t"
4724 : "=q" (rc.u32)
4725 : "m" (*(const volatile long RT_FAR *)pvBitmap),
4726 "Ir" (iBit)
4727 : "memory");
4728# else
4729 __asm
4730 {
4731 mov edx, [iBit]
4732# ifdef RT_ARCH_AMD64
4733 mov rax, [pvBitmap]
4734 bt [rax], edx
4735# else
4736 mov eax, [pvBitmap]
4737 bt [eax], edx
4738# endif
4739 setc al
4740 and eax, 1
4741 mov [rc.u32], eax
4742 }
4743# endif
4744 return rc.f;
4745}
4746#endif
4747
4748
4749/**
4750 * Clears a bit range within a bitmap.
4751 *
4752 * @param pvBitmap Pointer to the bitmap.
4753 * @param iBitStart The First bit to clear.
4754 * @param iBitEnd The first bit not to clear.
4755 */
4756DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4757{
4758 if (iBitStart < iBitEnd)
4759 {
4760 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4761 int32_t iStart = iBitStart & ~31;
4762 int32_t iEnd = iBitEnd & ~31;
4763 if (iStart == iEnd)
4764 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4765 else
4766 {
4767 /* bits in first dword. */
4768 if (iBitStart & 31)
4769 {
4770 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4771 pu32++;
4772 iBitStart = iStart + 32;
4773 }
4774
4775 /* whole dword. */
4776 if (iBitStart != iEnd)
4777 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4778
4779 /* bits in last dword. */
4780 if (iBitEnd & 31)
4781 {
4782 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4783 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4784 }
4785 }
4786 }
4787}
4788
4789
4790/**
4791 * Sets a bit range within a bitmap.
4792 *
4793 * @param pvBitmap Pointer to the bitmap.
4794 * @param iBitStart The First bit to set.
4795 * @param iBitEnd The first bit not to set.
4796 */
4797DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4798{
4799 if (iBitStart < iBitEnd)
4800 {
4801 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4802 int32_t iStart = iBitStart & ~31;
4803 int32_t iEnd = iBitEnd & ~31;
4804 if (iStart == iEnd)
4805 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4806 else
4807 {
4808 /* bits in first dword. */
4809 if (iBitStart & 31)
4810 {
4811 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4812 pu32++;
4813 iBitStart = iStart + 32;
4814 }
4815
4816 /* whole dword. */
4817 if (iBitStart != iEnd)
4818 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4819
4820 /* bits in last dword. */
4821 if (iBitEnd & 31)
4822 {
4823 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
4824 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4825 }
4826 }
4827 }
4828}
4829
4830
4831/**
4832 * Finds the first clear bit in a bitmap.
4833 *
4834 * @returns Index of the first zero bit.
4835 * @returns -1 if no clear bit was found.
4836 * @param pvBitmap Pointer to the bitmap.
4837 * @param cBits The number of bits in the bitmap. Multiple of 32.
4838 */
4839#if RT_INLINE_ASM_EXTERNAL
4840DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
4841#else
4842DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
4843{
4844 if (cBits)
4845 {
4846 int32_t iBit;
4847# if RT_INLINE_ASM_GNU_STYLE
4848 RTCCUINTREG uEAX, uECX, uEDI;
4849 cBits = RT_ALIGN_32(cBits, 32);
4850 __asm__ __volatile__("repe; scasl\n\t"
4851 "je 1f\n\t"
4852# ifdef RT_ARCH_AMD64
4853 "lea -4(%%rdi), %%rdi\n\t"
4854 "xorl (%%rdi), %%eax\n\t"
4855 "subq %5, %%rdi\n\t"
4856# else
4857 "lea -4(%%edi), %%edi\n\t"
4858 "xorl (%%edi), %%eax\n\t"
4859 "subl %5, %%edi\n\t"
4860# endif
4861 "shll $3, %%edi\n\t"
4862 "bsfl %%eax, %%edx\n\t"
4863 "addl %%edi, %%edx\n\t"
4864 "1:\t\n"
4865 : "=d" (iBit),
4866 "=&c" (uECX),
4867 "=&D" (uEDI),
4868 "=&a" (uEAX)
4869 : "0" (0xffffffff),
4870 "mr" (pvBitmap),
4871 "1" (cBits >> 5),
4872 "2" (pvBitmap),
4873 "3" (0xffffffff));
4874# else
4875 cBits = RT_ALIGN_32(cBits, 32);
4876 __asm
4877 {
4878# ifdef RT_ARCH_AMD64
4879 mov rdi, [pvBitmap]
4880 mov rbx, rdi
4881# else
4882 mov edi, [pvBitmap]
4883 mov ebx, edi
4884# endif
4885 mov edx, 0ffffffffh
4886 mov eax, edx
4887 mov ecx, [cBits]
4888 shr ecx, 5
4889 repe scasd
4890 je done
4891
4892# ifdef RT_ARCH_AMD64
4893 lea rdi, [rdi - 4]
4894 xor eax, [rdi]
4895 sub rdi, rbx
4896# else
4897 lea edi, [edi - 4]
4898 xor eax, [edi]
4899 sub edi, ebx
4900# endif
4901 shl edi, 3
4902 bsf edx, eax
4903 add edx, edi
4904 done:
4905 mov [iBit], edx
4906 }
4907# endif
4908 return iBit;
4909 }
4910 return -1;
4911}
4912#endif
4913
4914
4915/**
4916 * Finds the next clear bit in a bitmap.
4917 *
4918 * @returns Index of the first zero bit.
4919 * @returns -1 if no clear bit was found.
4920 * @param pvBitmap Pointer to the bitmap.
4921 * @param cBits The number of bits in the bitmap. Multiple of 32.
4922 * @param iBitPrev The bit returned from the last search.
4923 * The search will start at iBitPrev + 1.
4924 */
4925#if RT_INLINE_ASM_EXTERNAL
4926DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4927#else
4928DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4929{
4930 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
4931 int iBit = ++iBitPrev & 31;
4932 if (iBit)
4933 {
4934 /*
4935 * Inspect the 32-bit word containing the unaligned bit.
4936 */
4937 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4938
4939# if RT_INLINE_ASM_USES_INTRIN
4940 unsigned long ulBit = 0;
4941 if (_BitScanForward(&ulBit, u32))
4942 return ulBit + iBitPrev;
4943# else
4944# if RT_INLINE_ASM_GNU_STYLE
4945 __asm__ __volatile__("bsf %1, %0\n\t"
4946 "jnz 1f\n\t"
4947 "movl $-1, %0\n\t"
4948 "1:\n\t"
4949 : "=r" (iBit)
4950 : "r" (u32));
4951# else
4952 __asm
4953 {
4954 mov edx, [u32]
4955 bsf eax, edx
4956 jnz done
4957 mov eax, 0ffffffffh
4958 done:
4959 mov [iBit], eax
4960 }
4961# endif
4962 if (iBit >= 0)
4963 return iBit + iBitPrev;
4964# endif
4965
4966 /*
4967 * Skip ahead and see if there is anything left to search.
4968 */
4969 iBitPrev |= 31;
4970 iBitPrev++;
4971 if (cBits <= (uint32_t)iBitPrev)
4972 return -1;
4973 }
4974
4975 /*
4976 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4977 */
4978 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4979 if (iBit >= 0)
4980 iBit += iBitPrev;
4981 return iBit;
4982}
4983#endif
4984
4985
4986/**
4987 * Finds the first set bit in a bitmap.
4988 *
4989 * @returns Index of the first set bit.
4990 * @returns -1 if no clear bit was found.
4991 * @param pvBitmap Pointer to the bitmap.
4992 * @param cBits The number of bits in the bitmap. Multiple of 32.
4993 */
4994#if RT_INLINE_ASM_EXTERNAL
4995DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
4996#else
4997DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
4998{
4999 if (cBits)
5000 {
5001 int32_t iBit;
5002# if RT_INLINE_ASM_GNU_STYLE
5003 RTCCUINTREG uEAX, uECX, uEDI;
5004 cBits = RT_ALIGN_32(cBits, 32);
5005 __asm__ __volatile__("repe; scasl\n\t"
5006 "je 1f\n\t"
5007# ifdef RT_ARCH_AMD64
5008 "lea -4(%%rdi), %%rdi\n\t"
5009 "movl (%%rdi), %%eax\n\t"
5010 "subq %5, %%rdi\n\t"
5011# else
5012 "lea -4(%%edi), %%edi\n\t"
5013 "movl (%%edi), %%eax\n\t"
5014 "subl %5, %%edi\n\t"
5015# endif
5016 "shll $3, %%edi\n\t"
5017 "bsfl %%eax, %%edx\n\t"
5018 "addl %%edi, %%edx\n\t"
5019 "1:\t\n"
5020 : "=d" (iBit),
5021 "=&c" (uECX),
5022 "=&D" (uEDI),
5023 "=&a" (uEAX)
5024 : "0" (0xffffffff),
5025 "mr" (pvBitmap),
5026 "1" (cBits >> 5),
5027 "2" (pvBitmap),
5028 "3" (0));
5029# else
5030 cBits = RT_ALIGN_32(cBits, 32);
5031 __asm
5032 {
5033# ifdef RT_ARCH_AMD64
5034 mov rdi, [pvBitmap]
5035 mov rbx, rdi
5036# else
5037 mov edi, [pvBitmap]
5038 mov ebx, edi
5039# endif
5040 mov edx, 0ffffffffh
5041 xor eax, eax
5042 mov ecx, [cBits]
5043 shr ecx, 5
5044 repe scasd
5045 je done
5046# ifdef RT_ARCH_AMD64
5047 lea rdi, [rdi - 4]
5048 mov eax, [rdi]
5049 sub rdi, rbx
5050# else
5051 lea edi, [edi - 4]
5052 mov eax, [edi]
5053 sub edi, ebx
5054# endif
5055 shl edi, 3
5056 bsf edx, eax
5057 add edx, edi
5058 done:
5059 mov [iBit], edx
5060 }
5061# endif
5062 return iBit;
5063 }
5064 return -1;
5065}
5066#endif
5067
5068
5069/**
5070 * Finds the next set bit in a bitmap.
5071 *
5072 * @returns Index of the next set bit.
5073 * @returns -1 if no set bit was found.
5074 * @param pvBitmap Pointer to the bitmap.
5075 * @param cBits The number of bits in the bitmap. Multiple of 32.
5076 * @param iBitPrev The bit returned from the last search.
5077 * The search will start at iBitPrev + 1.
5078 */
5079#if RT_INLINE_ASM_EXTERNAL
5080DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5081#else
5082DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5083{
5084 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5085 int iBit = ++iBitPrev & 31;
5086 if (iBit)
5087 {
5088 /*
5089 * Inspect the 32-bit word containing the unaligned bit.
5090 */
5091 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5092
5093# if RT_INLINE_ASM_USES_INTRIN
5094 unsigned long ulBit = 0;
5095 if (_BitScanForward(&ulBit, u32))
5096 return ulBit + iBitPrev;
5097# else
5098# if RT_INLINE_ASM_GNU_STYLE
5099 __asm__ __volatile__("bsf %1, %0\n\t"
5100 "jnz 1f\n\t"
5101 "movl $-1, %0\n\t"
5102 "1:\n\t"
5103 : "=r" (iBit)
5104 : "r" (u32));
5105# else
5106 __asm
5107 {
5108 mov edx, [u32]
5109 bsf eax, edx
5110 jnz done
5111 mov eax, 0ffffffffh
5112 done:
5113 mov [iBit], eax
5114 }
5115# endif
5116 if (iBit >= 0)
5117 return iBit + iBitPrev;
5118# endif
5119
5120 /*
5121 * Skip ahead and see if there is anything left to search.
5122 */
5123 iBitPrev |= 31;
5124 iBitPrev++;
5125 if (cBits <= (uint32_t)iBitPrev)
5126 return -1;
5127 }
5128
5129 /*
5130 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5131 */
5132 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5133 if (iBit >= 0)
5134 iBit += iBitPrev;
5135 return iBit;
5136}
5137#endif
5138
5139
5140/**
5141 * Finds the first bit which is set in the given 32-bit integer.
5142 * Bits are numbered from 1 (least significant) to 32.
5143 *
5144 * @returns index [1..32] of the first set bit.
5145 * @returns 0 if all bits are cleared.
5146 * @param u32 Integer to search for set bits.
5147 * @remarks Similar to ffs() in BSD.
5148 */
5149#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5150DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5151#else
5152DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5153{
5154# if RT_INLINE_ASM_USES_INTRIN
5155 unsigned long iBit;
5156 if (_BitScanForward(&iBit, u32))
5157 iBit++;
5158 else
5159 iBit = 0;
5160# elif RT_INLINE_ASM_GNU_STYLE
5161 uint32_t iBit;
5162 __asm__ __volatile__("bsf %1, %0\n\t"
5163 "jnz 1f\n\t"
5164 "xorl %0, %0\n\t"
5165 "jmp 2f\n"
5166 "1:\n\t"
5167 "incl %0\n"
5168 "2:\n\t"
5169 : "=r" (iBit)
5170 : "rm" (u32));
5171# else
5172 uint32_t iBit;
5173 _asm
5174 {
5175 bsf eax, [u32]
5176 jnz found
5177 xor eax, eax
5178 jmp done
5179 found:
5180 inc eax
5181 done:
5182 mov [iBit], eax
5183 }
5184# endif
5185 return iBit;
5186}
5187#endif
5188
5189
5190/**
5191 * Finds the first bit which is set in the given 32-bit integer.
5192 * Bits are numbered from 1 (least significant) to 32.
5193 *
5194 * @returns index [1..32] of the first set bit.
5195 * @returns 0 if all bits are cleared.
5196 * @param i32 Integer to search for set bits.
5197 * @remark Similar to ffs() in BSD.
5198 */
5199DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5200{
5201 return ASMBitFirstSetU32((uint32_t)i32);
5202}
5203
5204
5205/**
5206 * Finds the first bit which is set in the given 64-bit integer.
5207 *
5208 * Bits are numbered from 1 (least significant) to 64.
5209 *
5210 * @returns index [1..64] of the first set bit.
5211 * @returns 0 if all bits are cleared.
5212 * @param u64 Integer to search for set bits.
5213 * @remarks Similar to ffs() in BSD.
5214 */
5215#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5216DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5217#else
5218DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5219{
5220# if RT_INLINE_ASM_USES_INTRIN
5221 unsigned long iBit;
5222# if ARCH_BITS == 64
5223 if (_BitScanForward64(&iBit, u64))
5224 iBit++;
5225 else
5226 iBit = 0;
5227# else
5228 if (_BitScanForward(&iBit, (uint32_t)u64))
5229 iBit++;
5230 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5231 iBit += 33;
5232 else
5233 iBit = 0;
5234# endif
5235# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5236 uint64_t iBit;
5237 __asm__ __volatile__("bsfq %1, %0\n\t"
5238 "jnz 1f\n\t"
5239 "xorl %k0, %k0\n\t"
5240 "jmp 2f\n"
5241 "1:\n\t"
5242 "incl %k0\n"
5243 "2:\n\t"
5244 : "=r" (iBit)
5245 : "rm" (u64));
5246# else
5247 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5248 if (!iBit)
5249 {
5250 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5251 if (iBit)
5252 iBit += 32;
5253 }
5254# endif
5255 return (unsigned)iBit;
5256}
5257#endif
5258
5259
5260/**
5261 * Finds the first bit which is set in the given 16-bit integer.
5262 *
5263 * Bits are numbered from 1 (least significant) to 16.
5264 *
5265 * @returns index [1..16] of the first set bit.
5266 * @returns 0 if all bits are cleared.
5267 * @param u16 Integer to search for set bits.
5268 * @remarks For 16-bit bs3kit code.
5269 */
5270#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5271DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5272#else
5273DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5274{
5275 return ASMBitFirstSetU32((uint32_t)u16);
5276}
5277#endif
5278
5279
5280/**
5281 * Finds the last bit which is set in the given 32-bit integer.
5282 * Bits are numbered from 1 (least significant) to 32.
5283 *
5284 * @returns index [1..32] of the last set bit.
5285 * @returns 0 if all bits are cleared.
5286 * @param u32 Integer to search for set bits.
5287 * @remark Similar to fls() in BSD.
5288 */
5289#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5290DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5291#else
5292DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5293{
5294# if RT_INLINE_ASM_USES_INTRIN
5295 unsigned long iBit;
5296 if (_BitScanReverse(&iBit, u32))
5297 iBit++;
5298 else
5299 iBit = 0;
5300# elif RT_INLINE_ASM_GNU_STYLE
5301 uint32_t iBit;
5302 __asm__ __volatile__("bsrl %1, %0\n\t"
5303 "jnz 1f\n\t"
5304 "xorl %0, %0\n\t"
5305 "jmp 2f\n"
5306 "1:\n\t"
5307 "incl %0\n"
5308 "2:\n\t"
5309 : "=r" (iBit)
5310 : "rm" (u32));
5311# else
5312 uint32_t iBit;
5313 _asm
5314 {
5315 bsr eax, [u32]
5316 jnz found
5317 xor eax, eax
5318 jmp done
5319 found:
5320 inc eax
5321 done:
5322 mov [iBit], eax
5323 }
5324# endif
5325 return iBit;
5326}
5327#endif
5328
5329
5330/**
5331 * Finds the last bit which is set in the given 32-bit integer.
5332 * Bits are numbered from 1 (least significant) to 32.
5333 *
5334 * @returns index [1..32] of the last set bit.
5335 * @returns 0 if all bits are cleared.
5336 * @param i32 Integer to search for set bits.
5337 * @remark Similar to fls() in BSD.
5338 */
5339DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5340{
5341 return ASMBitLastSetU32((uint32_t)i32);
5342}
5343
5344
5345/**
5346 * Finds the last bit which is set in the given 64-bit integer.
5347 *
5348 * Bits are numbered from 1 (least significant) to 64.
5349 *
5350 * @returns index [1..64] of the last set bit.
5351 * @returns 0 if all bits are cleared.
5352 * @param u64 Integer to search for set bits.
5353 * @remark Similar to fls() in BSD.
5354 */
5355#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5356DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5357#else
5358DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5359{
5360# if RT_INLINE_ASM_USES_INTRIN
5361 unsigned long iBit;
5362# if ARCH_BITS == 64
5363 if (_BitScanReverse64(&iBit, u64))
5364 iBit++;
5365 else
5366 iBit = 0;
5367# else
5368 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5369 iBit += 33;
5370 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5371 iBit++;
5372 else
5373 iBit = 0;
5374# endif
5375# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5376 uint64_t iBit;
5377 __asm__ __volatile__("bsrq %1, %0\n\t"
5378 "jnz 1f\n\t"
5379 "xorl %k0, %k0\n\t"
5380 "jmp 2f\n"
5381 "1:\n\t"
5382 "incl %k0\n"
5383 "2:\n\t"
5384 : "=r" (iBit)
5385 : "rm" (u64));
5386# else
5387 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5388 if (iBit)
5389 iBit += 32;
5390 else
5391 iBit = ASMBitLastSetU32((uint32_t)u64);
5392#endif
5393 return (unsigned)iBit;
5394}
5395#endif
5396
5397
5398/**
5399 * Finds the last bit which is set in the given 16-bit integer.
5400 *
5401 * Bits are numbered from 1 (least significant) to 16.
5402 *
5403 * @returns index [1..16] of the last set bit.
5404 * @returns 0 if all bits are cleared.
5405 * @param u16 Integer to search for set bits.
5406 * @remarks For 16-bit bs3kit code.
5407 */
5408#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5409DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5410#else
5411DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5412{
5413 return ASMBitLastSetU32((uint32_t)u16);
5414}
5415#endif
5416
5417
5418/**
5419 * Reverse the byte order of the given 16-bit integer.
5420 *
5421 * @returns Revert
5422 * @param u16 16-bit integer value.
5423 */
5424#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5425DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5426#else
5427DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5428{
5429# if RT_INLINE_ASM_USES_INTRIN
5430 u16 = _byteswap_ushort(u16);
5431# elif RT_INLINE_ASM_GNU_STYLE
5432 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5433# else
5434 _asm
5435 {
5436 mov ax, [u16]
5437 ror ax, 8
5438 mov [u16], ax
5439 }
5440# endif
5441 return u16;
5442}
5443#endif
5444
5445
5446/**
5447 * Reverse the byte order of the given 32-bit integer.
5448 *
5449 * @returns Revert
5450 * @param u32 32-bit integer value.
5451 */
5452#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5453DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5454#else
5455DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5456{
5457# if RT_INLINE_ASM_USES_INTRIN
5458 u32 = _byteswap_ulong(u32);
5459# elif RT_INLINE_ASM_GNU_STYLE
5460 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5461# else
5462 _asm
5463 {
5464 mov eax, [u32]
5465 bswap eax
5466 mov [u32], eax
5467 }
5468# endif
5469 return u32;
5470}
5471#endif
5472
5473
5474/**
5475 * Reverse the byte order of the given 64-bit integer.
5476 *
5477 * @returns Revert
5478 * @param u64 64-bit integer value.
5479 */
5480DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5481{
5482#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5483 u64 = _byteswap_uint64(u64);
5484#else
5485 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5486 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5487#endif
5488 return u64;
5489}
5490
5491
5492/**
5493 * Rotate 32-bit unsigned value to the left by @a cShift.
5494 *
5495 * @returns Rotated value.
5496 * @param u32 The value to rotate.
5497 * @param cShift How many bits to rotate by.
5498 */
5499#ifdef __WATCOMC__
5500DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5501#else
5502DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5503{
5504# if RT_INLINE_ASM_USES_INTRIN
5505 return _rotl(u32, cShift);
5506# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5507 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5508 return u32;
5509# else
5510 cShift &= 31;
5511 return (u32 << cShift) | (u32 >> (32 - cShift));
5512# endif
5513}
5514#endif
5515
5516
5517/**
5518 * Rotate 32-bit unsigned value to the right by @a cShift.
5519 *
5520 * @returns Rotated value.
5521 * @param u32 The value to rotate.
5522 * @param cShift How many bits to rotate by.
5523 */
5524#ifdef __WATCOMC__
5525DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5526#else
5527DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5528{
5529# if RT_INLINE_ASM_USES_INTRIN
5530 return _rotr(u32, cShift);
5531# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5532 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5533 return u32;
5534# else
5535 cShift &= 31;
5536 return (u32 >> cShift) | (u32 << (32 - cShift));
5537# endif
5538}
5539#endif
5540
5541
5542/**
5543 * Rotate 64-bit unsigned value to the left by @a cShift.
5544 *
5545 * @returns Rotated value.
5546 * @param u64 The value to rotate.
5547 * @param cShift How many bits to rotate by.
5548 */
5549DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5550{
5551#if RT_INLINE_ASM_USES_INTRIN
5552 return _rotl64(u64, cShift);
5553#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5554 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5555 return u64;
5556#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5557 uint32_t uSpill;
5558 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5559 "jz 1f\n\t"
5560 "xchgl %%eax, %%edx\n\t"
5561 "1:\n\t"
5562 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5563 "jz 2f\n\t"
5564 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5565 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5566 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5567 "2:\n\t" /* } */
5568 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5569 : "0" (u64),
5570 "1" (cShift));
5571 return u64;
5572#else
5573 cShift &= 63;
5574 return (u64 << cShift) | (u64 >> (64 - cShift));
5575#endif
5576}
5577
5578
5579/**
5580 * Rotate 64-bit unsigned value to the right by @a cShift.
5581 *
5582 * @returns Rotated value.
5583 * @param u64 The value to rotate.
5584 * @param cShift How many bits to rotate by.
5585 */
5586DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5587{
5588#if RT_INLINE_ASM_USES_INTRIN
5589 return _rotr64(u64, cShift);
5590#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5591 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5592 return u64;
5593#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5594 uint32_t uSpill;
5595 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5596 "jz 1f\n\t"
5597 "xchgl %%eax, %%edx\n\t"
5598 "1:\n\t"
5599 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5600 "jz 2f\n\t"
5601 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5602 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5603 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5604 "2:\n\t" /* } */
5605 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5606 : "0" (u64),
5607 "1" (cShift));
5608 return u64;
5609#else
5610 cShift &= 63;
5611 return (u64 >> cShift) | (u64 << (64 - cShift));
5612#endif
5613}
5614
5615/** @} */
5616
5617
5618/** @} */
5619
5620#endif
5621
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette