VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 68676

Last change on this file since 68676 was 68606, checked in by vboxsync, 7 years ago

iprt/asm*: Made the code safe to use with 16-bit code models employing near data pointers (i.e. adding RT_FAR to all pointers).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 161.7 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2016 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# pragma warning(push)
44# pragma warning(disable:4668) /* Several incorrect __cplusplus uses. */
45# pragma warning(disable:4255) /* Incorrect __slwpcb prototype. */
46# include <intrin.h>
47# pragma warning(pop)
48 /* Emit the intrinsics at all optimization levels. */
49# pragma intrinsic(_ReadWriteBarrier)
50# pragma intrinsic(__cpuid)
51# pragma intrinsic(__stosd)
52# pragma intrinsic(__stosw)
53# pragma intrinsic(__stosb)
54# pragma intrinsic(_BitScanForward)
55# pragma intrinsic(_BitScanReverse)
56# pragma intrinsic(_bittest)
57# pragma intrinsic(_bittestandset)
58# pragma intrinsic(_bittestandreset)
59# pragma intrinsic(_bittestandcomplement)
60# pragma intrinsic(_byteswap_ushort)
61# pragma intrinsic(_byteswap_ulong)
62# pragma intrinsic(_interlockedbittestandset)
63# pragma intrinsic(_interlockedbittestandreset)
64# pragma intrinsic(_InterlockedAnd)
65# pragma intrinsic(_InterlockedOr)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# pragma intrinsic(_InterlockedExchangeAdd64)
81# pragma intrinsic(_InterlockedAnd64)
82# pragma intrinsic(_InterlockedOr64)
83# pragma intrinsic(_InterlockedIncrement64)
84# pragma intrinsic(_InterlockedDecrement64)
85# endif
86#endif
87
88/*
89 * Include #pragma aux definitions for Watcom C/C++.
90 */
91#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
92# include "asm-watcom-x86-16.h"
93#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
94# include "asm-watcom-x86-32.h"
95#endif
96
97
98
99/** @defgroup grp_rt_asm ASM - Assembly Routines
100 * @ingroup grp_rt
101 *
102 * @remarks The difference between ordered and unordered atomic operations are that
103 * the former will complete outstanding reads and writes before continuing
104 * while the latter doesn't make any promises about the order. Ordered
105 * operations doesn't, it seems, make any 100% promise wrt to whether
106 * the operation will complete before any subsequent memory access.
107 * (please, correct if wrong.)
108 *
109 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
110 * are unordered (note the Uo).
111 *
112 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
113 * or even optimize assembler instructions away. For instance, in the following code
114 * the second rdmsr instruction is optimized away because gcc treats that instruction
115 * as deterministic:
116 *
117 * @code
118 * static inline uint64_t rdmsr_low(int idx)
119 * {
120 * uint32_t low;
121 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
122 * }
123 * ...
124 * uint32_t msr1 = rdmsr_low(1);
125 * foo(msr1);
126 * msr1 = rdmsr_low(1);
127 * bar(msr1);
128 * @endcode
129 *
130 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
131 * use the result of the first call as input parameter for bar() as well. For rdmsr this
132 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
133 * machine status information in general.
134 *
135 * @{
136 */
137
138
139/** @def RT_INLINE_ASM_GCC_4_3_X_X86
140 * Used to work around some 4.3.x register allocation issues in this version of
141 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
142 * definitely not for 5.x */
143#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
144# define RT_INLINE_ASM_GCC_4_3_X_X86 1
145#else
146# define RT_INLINE_ASM_GCC_4_3_X_X86 0
147#endif
148
149/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
150 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
151 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
152 * mode, x86.
153 *
154 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
155 * when in PIC mode on x86.
156 */
157#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
158# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
159# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
160# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
161# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
162# elif ( (defined(PIC) || defined(__PIC__)) \
163 && defined(RT_ARCH_X86) \
164 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
165 || defined(RT_OS_DARWIN)) )
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
167# else
168# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
169# endif
170#endif
171
172
173/** @def ASMReturnAddress
174 * Gets the return address of the current (or calling if you like) function or method.
175 */
176#ifdef _MSC_VER
177# ifdef __cplusplus
178extern "C"
179# endif
180void * _ReturnAddress(void);
181# pragma intrinsic(_ReturnAddress)
182# define ASMReturnAddress() _ReturnAddress()
183#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
184# define ASMReturnAddress() __builtin_return_address(0)
185#elif defined(__WATCOMC__)
186# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
187#else
188# error "Unsupported compiler."
189#endif
190
191
192/**
193 * Compiler memory barrier.
194 *
195 * Ensure that the compiler does not use any cached (register/tmp stack) memory
196 * values or any outstanding writes when returning from this function.
197 *
198 * This function must be used if non-volatile data is modified by a
199 * device or the VMM. Typical cases are port access, MMIO access,
200 * trapping instruction, etc.
201 */
202#if RT_INLINE_ASM_GNU_STYLE
203# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
204#elif RT_INLINE_ASM_USES_INTRIN
205# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
206#elif defined(__WATCOMC__)
207void ASMCompilerBarrier(void);
208#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
209DECLINLINE(void) ASMCompilerBarrier(void)
210{
211 __asm
212 {
213 }
214}
215#endif
216
217
218/** @def ASMBreakpoint
219 * Debugger Breakpoint.
220 * @deprecated Use RT_BREAKPOINT instead.
221 * @internal
222 */
223#define ASMBreakpoint() RT_BREAKPOINT()
224
225
226/**
227 * Spinloop hint for platforms that have these, empty function on the other
228 * platforms.
229 *
230 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
231 * spin locks.
232 */
233#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
234DECLASM(void) ASMNopPause(void);
235#else
236DECLINLINE(void) ASMNopPause(void)
237{
238# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
239# if RT_INLINE_ASM_GNU_STYLE
240 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
241# else
242 __asm {
243 _emit 0f3h
244 _emit 090h
245 }
246# endif
247# else
248 /* dummy */
249# endif
250}
251#endif
252
253
254/**
255 * Atomically Exchange an unsigned 8-bit value, ordered.
256 *
257 * @returns Current *pu8 value
258 * @param pu8 Pointer to the 8-bit variable to update.
259 * @param u8 The 8-bit value to assign to *pu8.
260 */
261#if RT_INLINE_ASM_EXTERNAL
262DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8);
263#else
264DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
265{
266# if RT_INLINE_ASM_GNU_STYLE
267 __asm__ __volatile__("xchgb %0, %1\n\t"
268 : "=m" (*pu8),
269 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
270 : "1" (u8),
271 "m" (*pu8));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rdx, [pu8]
277 mov al, [u8]
278 xchg [rdx], al
279 mov [u8], al
280# else
281 mov edx, [pu8]
282 mov al, [u8]
283 xchg [edx], al
284 mov [u8], al
285# endif
286 }
287# endif
288 return u8;
289}
290#endif
291
292
293/**
294 * Atomically Exchange a signed 8-bit value, ordered.
295 *
296 * @returns Current *pu8 value
297 * @param pi8 Pointer to the 8-bit variable to update.
298 * @param i8 The 8-bit value to assign to *pi8.
299 */
300DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8)
301{
302 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
303}
304
305
306/**
307 * Atomically Exchange a bool value, ordered.
308 *
309 * @returns Current *pf value
310 * @param pf Pointer to the 8-bit variable to update.
311 * @param f The 8-bit value to assign to *pi8.
312 */
313DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f)
314{
315#ifdef _MSC_VER
316 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
317#else
318 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
319#endif
320}
321
322
323/**
324 * Atomically Exchange an unsigned 16-bit value, ordered.
325 *
326 * @returns Current *pu16 value
327 * @param pu16 Pointer to the 16-bit variable to update.
328 * @param u16 The 16-bit value to assign to *pu16.
329 */
330#if RT_INLINE_ASM_EXTERNAL
331DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16);
332#else
333DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
334{
335# if RT_INLINE_ASM_GNU_STYLE
336 __asm__ __volatile__("xchgw %0, %1\n\t"
337 : "=m" (*pu16),
338 "=r" (u16)
339 : "1" (u16),
340 "m" (*pu16));
341# else
342 __asm
343 {
344# ifdef RT_ARCH_AMD64
345 mov rdx, [pu16]
346 mov ax, [u16]
347 xchg [rdx], ax
348 mov [u16], ax
349# else
350 mov edx, [pu16]
351 mov ax, [u16]
352 xchg [edx], ax
353 mov [u16], ax
354# endif
355 }
356# endif
357 return u16;
358}
359#endif
360
361
362/**
363 * Atomically Exchange a signed 16-bit value, ordered.
364 *
365 * @returns Current *pu16 value
366 * @param pi16 Pointer to the 16-bit variable to update.
367 * @param i16 The 16-bit value to assign to *pi16.
368 */
369DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16)
370{
371 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
372}
373
374
375/**
376 * Atomically Exchange an unsigned 32-bit value, ordered.
377 *
378 * @returns Current *pu32 value
379 * @param pu32 Pointer to the 32-bit variable to update.
380 * @param u32 The 32-bit value to assign to *pu32.
381 *
382 * @remarks Does not work on 286 and earlier.
383 */
384#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
385DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32);
386#else
387DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
388{
389# if RT_INLINE_ASM_GNU_STYLE
390 __asm__ __volatile__("xchgl %0, %1\n\t"
391 : "=m" (*pu32),
392 "=r" (u32)
393 : "1" (u32),
394 "m" (*pu32));
395
396# elif RT_INLINE_ASM_USES_INTRIN
397 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
398
399# else
400 __asm
401 {
402# ifdef RT_ARCH_AMD64
403 mov rdx, [pu32]
404 mov eax, u32
405 xchg [rdx], eax
406 mov [u32], eax
407# else
408 mov edx, [pu32]
409 mov eax, u32
410 xchg [edx], eax
411 mov [u32], eax
412# endif
413 }
414# endif
415 return u32;
416}
417#endif
418
419
420/**
421 * Atomically Exchange a signed 32-bit value, ordered.
422 *
423 * @returns Current *pu32 value
424 * @param pi32 Pointer to the 32-bit variable to update.
425 * @param i32 The 32-bit value to assign to *pi32.
426 */
427DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32)
428{
429 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
430}
431
432
433/**
434 * Atomically Exchange an unsigned 64-bit value, ordered.
435 *
436 * @returns Current *pu64 value
437 * @param pu64 Pointer to the 64-bit variable to update.
438 * @param u64 The 64-bit value to assign to *pu64.
439 *
440 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
441 */
442#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
443 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
444DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64);
445#else
446DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
447{
448# if defined(RT_ARCH_AMD64)
449# if RT_INLINE_ASM_USES_INTRIN
450 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
451
452# elif RT_INLINE_ASM_GNU_STYLE
453 __asm__ __volatile__("xchgq %0, %1\n\t"
454 : "=m" (*pu64),
455 "=r" (u64)
456 : "1" (u64),
457 "m" (*pu64));
458# else
459 __asm
460 {
461 mov rdx, [pu64]
462 mov rax, [u64]
463 xchg [rdx], rax
464 mov [u64], rax
465 }
466# endif
467# else /* !RT_ARCH_AMD64 */
468# if RT_INLINE_ASM_GNU_STYLE
469# if defined(PIC) || defined(__PIC__)
470 uint32_t u32EBX = (uint32_t)u64;
471 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
472 "xchgl %%ebx, %3\n\t"
473 "1:\n\t"
474 "lock; cmpxchg8b (%5)\n\t"
475 "jnz 1b\n\t"
476 "movl %3, %%ebx\n\t"
477 /*"xchgl %%esi, %5\n\t"*/
478 : "=A" (u64),
479 "=m" (*pu64)
480 : "0" (*pu64),
481 "m" ( u32EBX ),
482 "c" ( (uint32_t)(u64 >> 32) ),
483 "S" (pu64));
484# else /* !PIC */
485 __asm__ __volatile__("1:\n\t"
486 "lock; cmpxchg8b %1\n\t"
487 "jnz 1b\n\t"
488 : "=A" (u64),
489 "=m" (*pu64)
490 : "0" (*pu64),
491 "b" ( (uint32_t)u64 ),
492 "c" ( (uint32_t)(u64 >> 32) ));
493# endif
494# else
495 __asm
496 {
497 mov ebx, dword ptr [u64]
498 mov ecx, dword ptr [u64 + 4]
499 mov edi, pu64
500 mov eax, dword ptr [edi]
501 mov edx, dword ptr [edi + 4]
502 retry:
503 lock cmpxchg8b [edi]
504 jnz retry
505 mov dword ptr [u64], eax
506 mov dword ptr [u64 + 4], edx
507 }
508# endif
509# endif /* !RT_ARCH_AMD64 */
510 return u64;
511}
512#endif
513
514
515/**
516 * Atomically Exchange an signed 64-bit value, ordered.
517 *
518 * @returns Current *pi64 value
519 * @param pi64 Pointer to the 64-bit variable to update.
520 * @param i64 The 64-bit value to assign to *pi64.
521 */
522DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64)
523{
524 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
525}
526
527
528/**
529 * Atomically Exchange a pointer value, ordered.
530 *
531 * @returns Current *ppv value
532 * @param ppv Pointer to the pointer variable to update.
533 * @param pv The pointer value to assign to *ppv.
534 */
535DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv)
536{
537#if ARCH_BITS == 32 || ARCH_BITS == 16
538 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
539#elif ARCH_BITS == 64
540 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
541#else
542# error "ARCH_BITS is bogus"
543#endif
544}
545
546
547/**
548 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
549 *
550 * @returns Current *pv value
551 * @param ppv Pointer to the pointer variable to update.
552 * @param pv The pointer value to assign to *ppv.
553 * @param Type The type of *ppv, sans volatile.
554 */
555#ifdef __GNUC__
556# define ASMAtomicXchgPtrT(ppv, pv, Type) \
557 __extension__ \
558 ({\
559 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
560 Type const pvTypeChecked = (pv); \
561 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
562 pvTypeCheckedRet; \
563 })
564#else
565# define ASMAtomicXchgPtrT(ppv, pv, Type) \
566 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
567#endif
568
569
570/**
571 * Atomically Exchange a raw-mode context pointer value, ordered.
572 *
573 * @returns Current *ppv value
574 * @param ppvRC Pointer to the pointer variable to update.
575 * @param pvRC The pointer value to assign to *ppv.
576 */
577DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC)
578{
579 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
580}
581
582
583/**
584 * Atomically Exchange a ring-0 pointer value, ordered.
585 *
586 * @returns Current *ppv value
587 * @param ppvR0 Pointer to the pointer variable to update.
588 * @param pvR0 The pointer value to assign to *ppv.
589 */
590DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0)
591{
592#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
593 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
594#elif R0_ARCH_BITS == 64
595 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
596#else
597# error "R0_ARCH_BITS is bogus"
598#endif
599}
600
601
602/**
603 * Atomically Exchange a ring-3 pointer value, ordered.
604 *
605 * @returns Current *ppv value
606 * @param ppvR3 Pointer to the pointer variable to update.
607 * @param pvR3 The pointer value to assign to *ppv.
608 */
609DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3)
610{
611#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
612 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
613#elif R3_ARCH_BITS == 64
614 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
615#else
616# error "R3_ARCH_BITS is bogus"
617#endif
618}
619
620
621/** @def ASMAtomicXchgHandle
622 * Atomically Exchange a typical IPRT handle value, ordered.
623 *
624 * @param ph Pointer to the value to update.
625 * @param hNew The new value to assigned to *pu.
626 * @param phRes Where to store the current *ph value.
627 *
628 * @remarks This doesn't currently work for all handles (like RTFILE).
629 */
630#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
631# define ASMAtomicXchgHandle(ph, hNew, phRes) \
632 do { \
633 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
634 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
635 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
636 } while (0)
637#elif HC_ARCH_BITS == 64
638# define ASMAtomicXchgHandle(ph, hNew, phRes) \
639 do { \
640 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
641 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
642 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
643 } while (0)
644#else
645# error HC_ARCH_BITS
646#endif
647
648
649/**
650 * Atomically Exchange a value which size might differ
651 * between platforms or compilers, ordered.
652 *
653 * @param pu Pointer to the variable to update.
654 * @param uNew The value to assign to *pu.
655 * @todo This is busted as its missing the result argument.
656 */
657#define ASMAtomicXchgSize(pu, uNew) \
658 do { \
659 switch (sizeof(*(pu))) { \
660 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
661 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
662 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
663 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
664 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
665 } \
666 } while (0)
667
668/**
669 * Atomically Exchange a value which size might differ
670 * between platforms or compilers, ordered.
671 *
672 * @param pu Pointer to the variable to update.
673 * @param uNew The value to assign to *pu.
674 * @param puRes Where to store the current *pu value.
675 */
676#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
677 do { \
678 switch (sizeof(*(pu))) { \
679 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
680 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
681 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
682 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
683 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
684 } \
685 } while (0)
686
687
688
689/**
690 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
691 *
692 * @returns true if xchg was done.
693 * @returns false if xchg wasn't done.
694 *
695 * @param pu8 Pointer to the value to update.
696 * @param u8New The new value to assigned to *pu8.
697 * @param u8Old The old value to *pu8 compare with.
698 *
699 * @remarks x86: Requires a 486 or later.
700 */
701#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
702DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old);
703#else
704DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old)
705{
706 uint8_t u8Ret;
707 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
708 "setz %1\n\t"
709 : "=m" (*pu8),
710 "=qm" (u8Ret),
711 "=a" (u8Old)
712 : "q" (u8New),
713 "2" (u8Old),
714 "m" (*pu8));
715 return (bool)u8Ret;
716}
717#endif
718
719
720/**
721 * Atomically Compare and Exchange a signed 8-bit value, ordered.
722 *
723 * @returns true if xchg was done.
724 * @returns false if xchg wasn't done.
725 *
726 * @param pi8 Pointer to the value to update.
727 * @param i8New The new value to assigned to *pi8.
728 * @param i8Old The old value to *pi8 compare with.
729 *
730 * @remarks x86: Requires a 486 or later.
731 */
732DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old)
733{
734 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
735}
736
737
738/**
739 * Atomically Compare and Exchange a bool value, ordered.
740 *
741 * @returns true if xchg was done.
742 * @returns false if xchg wasn't done.
743 *
744 * @param pf Pointer to the value to update.
745 * @param fNew The new value to assigned to *pf.
746 * @param fOld The old value to *pf compare with.
747 *
748 * @remarks x86: Requires a 486 or later.
749 */
750DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld)
751{
752 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
753}
754
755
756/**
757 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
758 *
759 * @returns true if xchg was done.
760 * @returns false if xchg wasn't done.
761 *
762 * @param pu32 Pointer to the value to update.
763 * @param u32New The new value to assigned to *pu32.
764 * @param u32Old The old value to *pu32 compare with.
765 *
766 * @remarks x86: Requires a 486 or later.
767 */
768#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
769DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old);
770#else
771DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old)
772{
773# if RT_INLINE_ASM_GNU_STYLE
774 uint8_t u8Ret;
775 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
776 "setz %1\n\t"
777 : "=m" (*pu32),
778 "=qm" (u8Ret),
779 "=a" (u32Old)
780 : "r" (u32New),
781 "2" (u32Old),
782 "m" (*pu32));
783 return (bool)u8Ret;
784
785# elif RT_INLINE_ASM_USES_INTRIN
786 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
787
788# else
789 uint32_t u32Ret;
790 __asm
791 {
792# ifdef RT_ARCH_AMD64
793 mov rdx, [pu32]
794# else
795 mov edx, [pu32]
796# endif
797 mov eax, [u32Old]
798 mov ecx, [u32New]
799# ifdef RT_ARCH_AMD64
800 lock cmpxchg [rdx], ecx
801# else
802 lock cmpxchg [edx], ecx
803# endif
804 setz al
805 movzx eax, al
806 mov [u32Ret], eax
807 }
808 return !!u32Ret;
809# endif
810}
811#endif
812
813
814/**
815 * Atomically Compare and Exchange a signed 32-bit value, ordered.
816 *
817 * @returns true if xchg was done.
818 * @returns false if xchg wasn't done.
819 *
820 * @param pi32 Pointer to the value to update.
821 * @param i32New The new value to assigned to *pi32.
822 * @param i32Old The old value to *pi32 compare with.
823 *
824 * @remarks x86: Requires a 486 or later.
825 */
826DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old)
827{
828 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
829}
830
831
832/**
833 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
834 *
835 * @returns true if xchg was done.
836 * @returns false if xchg wasn't done.
837 *
838 * @param pu64 Pointer to the 64-bit variable to update.
839 * @param u64New The 64-bit value to assign to *pu64.
840 * @param u64Old The value to compare with.
841 *
842 * @remarks x86: Requires a Pentium or later.
843 */
844#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
845 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
846DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old);
847#else
848DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old)
849{
850# if RT_INLINE_ASM_USES_INTRIN
851 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
852
853# elif defined(RT_ARCH_AMD64)
854# if RT_INLINE_ASM_GNU_STYLE
855 uint8_t u8Ret;
856 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
857 "setz %1\n\t"
858 : "=m" (*pu64),
859 "=qm" (u8Ret),
860 "=a" (u64Old)
861 : "r" (u64New),
862 "2" (u64Old),
863 "m" (*pu64));
864 return (bool)u8Ret;
865# else
866 bool fRet;
867 __asm
868 {
869 mov rdx, [pu32]
870 mov rax, [u64Old]
871 mov rcx, [u64New]
872 lock cmpxchg [rdx], rcx
873 setz al
874 mov [fRet], al
875 }
876 return fRet;
877# endif
878# else /* !RT_ARCH_AMD64 */
879 uint32_t u32Ret;
880# if RT_INLINE_ASM_GNU_STYLE
881# if defined(PIC) || defined(__PIC__)
882 uint32_t u32EBX = (uint32_t)u64New;
883 uint32_t u32Spill;
884 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
885 "lock; cmpxchg8b (%6)\n\t"
886 "setz %%al\n\t"
887 "movl %4, %%ebx\n\t"
888 "movzbl %%al, %%eax\n\t"
889 : "=a" (u32Ret),
890 "=d" (u32Spill),
891# if RT_GNUC_PREREQ(4, 3)
892 "+m" (*pu64)
893# else
894 "=m" (*pu64)
895# endif
896 : "A" (u64Old),
897 "m" ( u32EBX ),
898 "c" ( (uint32_t)(u64New >> 32) ),
899 "S" (pu64));
900# else /* !PIC */
901 uint32_t u32Spill;
902 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
903 "setz %%al\n\t"
904 "movzbl %%al, %%eax\n\t"
905 : "=a" (u32Ret),
906 "=d" (u32Spill),
907 "+m" (*pu64)
908 : "A" (u64Old),
909 "b" ( (uint32_t)u64New ),
910 "c" ( (uint32_t)(u64New >> 32) ));
911# endif
912 return (bool)u32Ret;
913# else
914 __asm
915 {
916 mov ebx, dword ptr [u64New]
917 mov ecx, dword ptr [u64New + 4]
918 mov edi, [pu64]
919 mov eax, dword ptr [u64Old]
920 mov edx, dword ptr [u64Old + 4]
921 lock cmpxchg8b [edi]
922 setz al
923 movzx eax, al
924 mov dword ptr [u32Ret], eax
925 }
926 return !!u32Ret;
927# endif
928# endif /* !RT_ARCH_AMD64 */
929}
930#endif
931
932
933/**
934 * Atomically Compare and exchange a signed 64-bit value, ordered.
935 *
936 * @returns true if xchg was done.
937 * @returns false if xchg wasn't done.
938 *
939 * @param pi64 Pointer to the 64-bit variable to update.
940 * @param i64 The 64-bit value to assign to *pu64.
941 * @param i64Old The value to compare with.
942 *
943 * @remarks x86: Requires a Pentium or later.
944 */
945DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old)
946{
947 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
948}
949
950
951/**
952 * Atomically Compare and Exchange a pointer value, ordered.
953 *
954 * @returns true if xchg was done.
955 * @returns false if xchg wasn't done.
956 *
957 * @param ppv Pointer to the value to update.
958 * @param pvNew The new value to assigned to *ppv.
959 * @param pvOld The old value to *ppv compare with.
960 *
961 * @remarks x86: Requires a 486 or later.
962 */
963DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld)
964{
965#if ARCH_BITS == 32 || ARCH_BITS == 16
966 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
967#elif ARCH_BITS == 64
968 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
969#else
970# error "ARCH_BITS is bogus"
971#endif
972}
973
974
975/**
976 * Atomically Compare and Exchange a pointer value, ordered.
977 *
978 * @returns true if xchg was done.
979 * @returns false if xchg wasn't done.
980 *
981 * @param ppv Pointer to the value to update.
982 * @param pvNew The new value to assigned to *ppv.
983 * @param pvOld The old value to *ppv compare with.
984 *
985 * @remarks This is relatively type safe on GCC platforms.
986 * @remarks x86: Requires a 486 or later.
987 */
988#ifdef __GNUC__
989# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
990 __extension__ \
991 ({\
992 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
993 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
994 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
995 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
996 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
997 fMacroRet; \
998 })
999#else
1000# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1001 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1002#endif
1003
1004
1005/** @def ASMAtomicCmpXchgHandle
1006 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1007 *
1008 * @param ph Pointer to the value to update.
1009 * @param hNew The new value to assigned to *pu.
1010 * @param hOld The old value to *pu compare with.
1011 * @param fRc Where to store the result.
1012 *
1013 * @remarks This doesn't currently work for all handles (like RTFILE).
1014 * @remarks x86: Requires a 486 or later.
1015 */
1016#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1017# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1018 do { \
1019 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1020 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1021 } while (0)
1022#elif HC_ARCH_BITS == 64
1023# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1024 do { \
1025 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1026 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1027 } while (0)
1028#else
1029# error HC_ARCH_BITS
1030#endif
1031
1032
1033/** @def ASMAtomicCmpXchgSize
1034 * Atomically Compare and Exchange a value which size might differ
1035 * between platforms or compilers, ordered.
1036 *
1037 * @param pu Pointer to the value to update.
1038 * @param uNew The new value to assigned to *pu.
1039 * @param uOld The old value to *pu compare with.
1040 * @param fRc Where to store the result.
1041 *
1042 * @remarks x86: Requires a 486 or later.
1043 */
1044#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1045 do { \
1046 switch (sizeof(*(pu))) { \
1047 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1048 break; \
1049 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1050 break; \
1051 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1052 (fRc) = false; \
1053 break; \
1054 } \
1055 } while (0)
1056
1057
1058/**
1059 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1060 * passes back old value, ordered.
1061 *
1062 * @returns true if xchg was done.
1063 * @returns false if xchg wasn't done.
1064 *
1065 * @param pu32 Pointer to the value to update.
1066 * @param u32New The new value to assigned to *pu32.
1067 * @param u32Old The old value to *pu32 compare with.
1068 * @param pu32Old Pointer store the old value at.
1069 *
1070 * @remarks x86: Requires a 486 or later.
1071 */
1072#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1073DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old);
1074#else
1075DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old)
1076{
1077# if RT_INLINE_ASM_GNU_STYLE
1078 uint8_t u8Ret;
1079 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1080 "setz %1\n\t"
1081 : "=m" (*pu32),
1082 "=qm" (u8Ret),
1083 "=a" (*pu32Old)
1084 : "r" (u32New),
1085 "a" (u32Old),
1086 "m" (*pu32));
1087 return (bool)u8Ret;
1088
1089# elif RT_INLINE_ASM_USES_INTRIN
1090 return (*pu32Old =_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1091
1092# else
1093 uint32_t u32Ret;
1094 __asm
1095 {
1096# ifdef RT_ARCH_AMD64
1097 mov rdx, [pu32]
1098# else
1099 mov edx, [pu32]
1100# endif
1101 mov eax, [u32Old]
1102 mov ecx, [u32New]
1103# ifdef RT_ARCH_AMD64
1104 lock cmpxchg [rdx], ecx
1105 mov rdx, [pu32Old]
1106 mov [rdx], eax
1107# else
1108 lock cmpxchg [edx], ecx
1109 mov edx, [pu32Old]
1110 mov [edx], eax
1111# endif
1112 setz al
1113 movzx eax, al
1114 mov [u32Ret], eax
1115 }
1116 return !!u32Ret;
1117# endif
1118}
1119#endif
1120
1121
1122/**
1123 * Atomically Compare and Exchange a signed 32-bit value, additionally
1124 * passes back old value, ordered.
1125 *
1126 * @returns true if xchg was done.
1127 * @returns false if xchg wasn't done.
1128 *
1129 * @param pi32 Pointer to the value to update.
1130 * @param i32New The new value to assigned to *pi32.
1131 * @param i32Old The old value to *pi32 compare with.
1132 * @param pi32Old Pointer store the old value at.
1133 *
1134 * @remarks x86: Requires a 486 or later.
1135 */
1136DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old)
1137{
1138 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1139}
1140
1141
1142/**
1143 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1144 * passing back old value, ordered.
1145 *
1146 * @returns true if xchg was done.
1147 * @returns false if xchg wasn't done.
1148 *
1149 * @param pu64 Pointer to the 64-bit variable to update.
1150 * @param u64New The 64-bit value to assign to *pu64.
1151 * @param u64Old The value to compare with.
1152 * @param pu64Old Pointer store the old value at.
1153 *
1154 * @remarks x86: Requires a Pentium or later.
1155 */
1156#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1157 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1158DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old);
1159#else
1160DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old)
1161{
1162# if RT_INLINE_ASM_USES_INTRIN
1163 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1164
1165# elif defined(RT_ARCH_AMD64)
1166# if RT_INLINE_ASM_GNU_STYLE
1167 uint8_t u8Ret;
1168 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1169 "setz %1\n\t"
1170 : "=m" (*pu64),
1171 "=qm" (u8Ret),
1172 "=a" (*pu64Old)
1173 : "r" (u64New),
1174 "a" (u64Old),
1175 "m" (*pu64));
1176 return (bool)u8Ret;
1177# else
1178 bool fRet;
1179 __asm
1180 {
1181 mov rdx, [pu32]
1182 mov rax, [u64Old]
1183 mov rcx, [u64New]
1184 lock cmpxchg [rdx], rcx
1185 mov rdx, [pu64Old]
1186 mov [rdx], rax
1187 setz al
1188 mov [fRet], al
1189 }
1190 return fRet;
1191# endif
1192# else /* !RT_ARCH_AMD64 */
1193# if RT_INLINE_ASM_GNU_STYLE
1194 uint64_t u64Ret;
1195# if defined(PIC) || defined(__PIC__)
1196 /* NB: this code uses a memory clobber description, because the clean
1197 * solution with an output value for *pu64 makes gcc run out of registers.
1198 * This will cause suboptimal code, and anyone with a better solution is
1199 * welcome to improve this. */
1200 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1201 "lock; cmpxchg8b %3\n\t"
1202 "xchgl %%ebx, %1\n\t"
1203 : "=A" (u64Ret)
1204 : "DS" ((uint32_t)u64New),
1205 "c" ((uint32_t)(u64New >> 32)),
1206 "m" (*pu64),
1207 "0" (u64Old)
1208 : "memory" );
1209# else /* !PIC */
1210 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1211 : "=A" (u64Ret),
1212 "=m" (*pu64)
1213 : "b" ((uint32_t)u64New),
1214 "c" ((uint32_t)(u64New >> 32)),
1215 "m" (*pu64),
1216 "0" (u64Old));
1217# endif
1218 *pu64Old = u64Ret;
1219 return u64Ret == u64Old;
1220# else
1221 uint32_t u32Ret;
1222 __asm
1223 {
1224 mov ebx, dword ptr [u64New]
1225 mov ecx, dword ptr [u64New + 4]
1226 mov edi, [pu64]
1227 mov eax, dword ptr [u64Old]
1228 mov edx, dword ptr [u64Old + 4]
1229 lock cmpxchg8b [edi]
1230 mov ebx, [pu64Old]
1231 mov [ebx], eax
1232 setz al
1233 movzx eax, al
1234 add ebx, 4
1235 mov [ebx], edx
1236 mov dword ptr [u32Ret], eax
1237 }
1238 return !!u32Ret;
1239# endif
1240# endif /* !RT_ARCH_AMD64 */
1241}
1242#endif
1243
1244
1245/**
1246 * Atomically Compare and exchange a signed 64-bit value, additionally
1247 * passing back old value, ordered.
1248 *
1249 * @returns true if xchg was done.
1250 * @returns false if xchg wasn't done.
1251 *
1252 * @param pi64 Pointer to the 64-bit variable to update.
1253 * @param i64 The 64-bit value to assign to *pu64.
1254 * @param i64Old The value to compare with.
1255 * @param pi64Old Pointer store the old value at.
1256 *
1257 * @remarks x86: Requires a Pentium or later.
1258 */
1259DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old)
1260{
1261 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1262}
1263
1264/** @def ASMAtomicCmpXchgExHandle
1265 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1266 *
1267 * @param ph Pointer to the value to update.
1268 * @param hNew The new value to assigned to *pu.
1269 * @param hOld The old value to *pu compare with.
1270 * @param fRc Where to store the result.
1271 * @param phOldVal Pointer to where to store the old value.
1272 *
1273 * @remarks This doesn't currently work for all handles (like RTFILE).
1274 */
1275#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1276# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1277 do { \
1278 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1279 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1280 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1281 } while (0)
1282#elif HC_ARCH_BITS == 64
1283# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1284 do { \
1285 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1286 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1287 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1288 } while (0)
1289#else
1290# error HC_ARCH_BITS
1291#endif
1292
1293
1294/** @def ASMAtomicCmpXchgExSize
1295 * Atomically Compare and Exchange a value which size might differ
1296 * between platforms or compilers. Additionally passes back old value.
1297 *
1298 * @param pu Pointer to the value to update.
1299 * @param uNew The new value to assigned to *pu.
1300 * @param uOld The old value to *pu compare with.
1301 * @param fRc Where to store the result.
1302 * @param puOldVal Pointer to where to store the old value.
1303 *
1304 * @remarks x86: Requires a 486 or later.
1305 */
1306#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1307 do { \
1308 switch (sizeof(*(pu))) { \
1309 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1310 break; \
1311 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1312 break; \
1313 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1314 (fRc) = false; \
1315 (uOldVal) = 0; \
1316 break; \
1317 } \
1318 } while (0)
1319
1320
1321/**
1322 * Atomically Compare and Exchange a pointer value, additionally
1323 * passing back old value, ordered.
1324 *
1325 * @returns true if xchg was done.
1326 * @returns false if xchg wasn't done.
1327 *
1328 * @param ppv Pointer to the value to update.
1329 * @param pvNew The new value to assigned to *ppv.
1330 * @param pvOld The old value to *ppv compare with.
1331 * @param ppvOld Pointer store the old value at.
1332 *
1333 * @remarks x86: Requires a 486 or later.
1334 */
1335DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1336 void RT_FAR * RT_FAR *ppvOld)
1337{
1338#if ARCH_BITS == 32 || ARCH_BITS == 16
1339 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1340#elif ARCH_BITS == 64
1341 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1342#else
1343# error "ARCH_BITS is bogus"
1344#endif
1345}
1346
1347
1348/**
1349 * Atomically Compare and Exchange a pointer value, additionally
1350 * passing back old value, ordered.
1351 *
1352 * @returns true if xchg was done.
1353 * @returns false if xchg wasn't done.
1354 *
1355 * @param ppv Pointer to the value to update.
1356 * @param pvNew The new value to assigned to *ppv.
1357 * @param pvOld The old value to *ppv compare with.
1358 * @param ppvOld Pointer store the old value at.
1359 *
1360 * @remarks This is relatively type safe on GCC platforms.
1361 * @remarks x86: Requires a 486 or later.
1362 */
1363#ifdef __GNUC__
1364# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1365 __extension__ \
1366 ({\
1367 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1368 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1369 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1370 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1371 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1372 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1373 (void **)ppvOldTypeChecked); \
1374 fMacroRet; \
1375 })
1376#else
1377# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1378 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1379#endif
1380
1381
1382/**
1383 * Virtualization unfriendly serializing instruction, always exits.
1384 */
1385#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1386DECLASM(void) ASMSerializeInstructionCpuId(void);
1387#else
1388DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1389{
1390# if RT_INLINE_ASM_GNU_STYLE
1391 RTCCUINTREG xAX = 0;
1392# ifdef RT_ARCH_AMD64
1393 __asm__ __volatile__ ("cpuid"
1394 : "=a" (xAX)
1395 : "0" (xAX)
1396 : "rbx", "rcx", "rdx", "memory");
1397# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1398 __asm__ __volatile__ ("push %%ebx\n\t"
1399 "cpuid\n\t"
1400 "pop %%ebx\n\t"
1401 : "=a" (xAX)
1402 : "0" (xAX)
1403 : "ecx", "edx", "memory");
1404# else
1405 __asm__ __volatile__ ("cpuid"
1406 : "=a" (xAX)
1407 : "0" (xAX)
1408 : "ebx", "ecx", "edx", "memory");
1409# endif
1410
1411# elif RT_INLINE_ASM_USES_INTRIN
1412 int aInfo[4];
1413 _ReadWriteBarrier();
1414 __cpuid(aInfo, 0);
1415
1416# else
1417 __asm
1418 {
1419 push ebx
1420 xor eax, eax
1421 cpuid
1422 pop ebx
1423 }
1424# endif
1425}
1426#endif
1427
1428/**
1429 * Virtualization friendly serializing instruction, though more expensive.
1430 */
1431#if RT_INLINE_ASM_EXTERNAL
1432DECLASM(void) ASMSerializeInstructionIRet(void);
1433#else
1434DECLINLINE(void) ASMSerializeInstructionIRet(void)
1435{
1436# if RT_INLINE_ASM_GNU_STYLE
1437# ifdef RT_ARCH_AMD64
1438 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1439 "subq $128, %%rsp\n\t" /*redzone*/
1440 "mov %%ss, %%eax\n\t"
1441 "pushq %%rax\n\t"
1442 "pushq %%r10\n\t"
1443 "pushfq\n\t"
1444 "movl %%cs, %%eax\n\t"
1445 "pushq %%rax\n\t"
1446 "leaq 1f(%%rip), %%rax\n\t"
1447 "pushq %%rax\n\t"
1448 "iretq\n\t"
1449 "1:\n\t"
1450 ::: "rax", "r10", "memory");
1451# else
1452 __asm__ __volatile__ ("pushfl\n\t"
1453 "pushl %%cs\n\t"
1454 "pushl $1f\n\t"
1455 "iretl\n\t"
1456 "1:\n\t"
1457 ::: "memory");
1458# endif
1459
1460# else
1461 __asm
1462 {
1463 pushfd
1464 push cs
1465 push la_ret
1466 iretd
1467 la_ret:
1468 }
1469# endif
1470}
1471#endif
1472
1473/**
1474 * Virtualization friendlier serializing instruction, may still cause exits.
1475 */
1476#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1477DECLASM(void) ASMSerializeInstructionRdTscp(void);
1478#else
1479DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1480{
1481# if RT_INLINE_ASM_GNU_STYLE
1482 /* rdtscp is not supported by ancient linux build VM of course :-( */
1483# ifdef RT_ARCH_AMD64
1484 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1485 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1486# else
1487 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1488 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1489# endif
1490# else
1491# if RT_INLINE_ASM_USES_INTRIN >= 15
1492 uint32_t uIgnore;
1493 _ReadWriteBarrier();
1494 (void)__rdtscp(&uIgnore);
1495 (void)uIgnore;
1496# else
1497 __asm
1498 {
1499 rdtscp
1500 }
1501# endif
1502# endif
1503}
1504#endif
1505
1506
1507/**
1508 * Serialize Instruction.
1509 */
1510#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1511# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1512#else
1513# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1514#endif
1515
1516
1517/**
1518 * Memory fence, waits for any pending writes and reads to complete.
1519 */
1520DECLINLINE(void) ASMMemoryFence(void)
1521{
1522 /** @todo use mfence? check if all cpus we care for support it. */
1523#if ARCH_BITS == 16
1524 uint16_t volatile u16;
1525 ASMAtomicXchgU16(&u16, 0);
1526#else
1527 uint32_t volatile u32;
1528 ASMAtomicXchgU32(&u32, 0);
1529#endif
1530}
1531
1532
1533/**
1534 * Write fence, waits for any pending writes to complete.
1535 */
1536DECLINLINE(void) ASMWriteFence(void)
1537{
1538 /** @todo use sfence? check if all cpus we care for support it. */
1539 ASMMemoryFence();
1540}
1541
1542
1543/**
1544 * Read fence, waits for any pending reads to complete.
1545 */
1546DECLINLINE(void) ASMReadFence(void)
1547{
1548 /** @todo use lfence? check if all cpus we care for support it. */
1549 ASMMemoryFence();
1550}
1551
1552
1553/**
1554 * Atomically reads an unsigned 8-bit value, ordered.
1555 *
1556 * @returns Current *pu8 value
1557 * @param pu8 Pointer to the 8-bit variable to read.
1558 */
1559DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8)
1560{
1561 ASMMemoryFence();
1562 return *pu8; /* byte reads are atomic on x86 */
1563}
1564
1565
1566/**
1567 * Atomically reads an unsigned 8-bit value, unordered.
1568 *
1569 * @returns Current *pu8 value
1570 * @param pu8 Pointer to the 8-bit variable to read.
1571 */
1572DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8)
1573{
1574 return *pu8; /* byte reads are atomic on x86 */
1575}
1576
1577
1578/**
1579 * Atomically reads a signed 8-bit value, ordered.
1580 *
1581 * @returns Current *pi8 value
1582 * @param pi8 Pointer to the 8-bit variable to read.
1583 */
1584DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8)
1585{
1586 ASMMemoryFence();
1587 return *pi8; /* byte reads are atomic on x86 */
1588}
1589
1590
1591/**
1592 * Atomically reads a signed 8-bit value, unordered.
1593 *
1594 * @returns Current *pi8 value
1595 * @param pi8 Pointer to the 8-bit variable to read.
1596 */
1597DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8)
1598{
1599 return *pi8; /* byte reads are atomic on x86 */
1600}
1601
1602
1603/**
1604 * Atomically reads an unsigned 16-bit value, ordered.
1605 *
1606 * @returns Current *pu16 value
1607 * @param pu16 Pointer to the 16-bit variable to read.
1608 */
1609DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16)
1610{
1611 ASMMemoryFence();
1612 Assert(!((uintptr_t)pu16 & 1));
1613 return *pu16;
1614}
1615
1616
1617/**
1618 * Atomically reads an unsigned 16-bit value, unordered.
1619 *
1620 * @returns Current *pu16 value
1621 * @param pu16 Pointer to the 16-bit variable to read.
1622 */
1623DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16)
1624{
1625 Assert(!((uintptr_t)pu16 & 1));
1626 return *pu16;
1627}
1628
1629
1630/**
1631 * Atomically reads a signed 16-bit value, ordered.
1632 *
1633 * @returns Current *pi16 value
1634 * @param pi16 Pointer to the 16-bit variable to read.
1635 */
1636DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16)
1637{
1638 ASMMemoryFence();
1639 Assert(!((uintptr_t)pi16 & 1));
1640 return *pi16;
1641}
1642
1643
1644/**
1645 * Atomically reads a signed 16-bit value, unordered.
1646 *
1647 * @returns Current *pi16 value
1648 * @param pi16 Pointer to the 16-bit variable to read.
1649 */
1650DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16)
1651{
1652 Assert(!((uintptr_t)pi16 & 1));
1653 return *pi16;
1654}
1655
1656
1657/**
1658 * Atomically reads an unsigned 32-bit value, ordered.
1659 *
1660 * @returns Current *pu32 value
1661 * @param pu32 Pointer to the 32-bit variable to read.
1662 */
1663DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32)
1664{
1665 ASMMemoryFence();
1666 Assert(!((uintptr_t)pu32 & 3));
1667#if ARCH_BITS == 16
1668 AssertFailed(); /** @todo 16-bit */
1669#endif
1670 return *pu32;
1671}
1672
1673
1674/**
1675 * Atomically reads an unsigned 32-bit value, unordered.
1676 *
1677 * @returns Current *pu32 value
1678 * @param pu32 Pointer to the 32-bit variable to read.
1679 */
1680DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32)
1681{
1682 Assert(!((uintptr_t)pu32 & 3));
1683#if ARCH_BITS == 16
1684 AssertFailed(); /** @todo 16-bit */
1685#endif
1686 return *pu32;
1687}
1688
1689
1690/**
1691 * Atomically reads a signed 32-bit value, ordered.
1692 *
1693 * @returns Current *pi32 value
1694 * @param pi32 Pointer to the 32-bit variable to read.
1695 */
1696DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32)
1697{
1698 ASMMemoryFence();
1699 Assert(!((uintptr_t)pi32 & 3));
1700#if ARCH_BITS == 16
1701 AssertFailed(); /** @todo 16-bit */
1702#endif
1703 return *pi32;
1704}
1705
1706
1707/**
1708 * Atomically reads a signed 32-bit value, unordered.
1709 *
1710 * @returns Current *pi32 value
1711 * @param pi32 Pointer to the 32-bit variable to read.
1712 */
1713DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32)
1714{
1715 Assert(!((uintptr_t)pi32 & 3));
1716#if ARCH_BITS == 16
1717 AssertFailed(); /** @todo 16-bit */
1718#endif
1719 return *pi32;
1720}
1721
1722
1723/**
1724 * Atomically reads an unsigned 64-bit value, ordered.
1725 *
1726 * @returns Current *pu64 value
1727 * @param pu64 Pointer to the 64-bit variable to read.
1728 * The memory pointed to must be writable.
1729 *
1730 * @remarks This may fault if the memory is read-only!
1731 * @remarks x86: Requires a Pentium or later.
1732 */
1733#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1734 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1735DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64);
1736#else
1737DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64)
1738{
1739 uint64_t u64;
1740# ifdef RT_ARCH_AMD64
1741 Assert(!((uintptr_t)pu64 & 7));
1742/*# if RT_INLINE_ASM_GNU_STYLE
1743 __asm__ __volatile__( "mfence\n\t"
1744 "movq %1, %0\n\t"
1745 : "=r" (u64)
1746 : "m" (*pu64));
1747# else
1748 __asm
1749 {
1750 mfence
1751 mov rdx, [pu64]
1752 mov rax, [rdx]
1753 mov [u64], rax
1754 }
1755# endif*/
1756 ASMMemoryFence();
1757 u64 = *pu64;
1758# else /* !RT_ARCH_AMD64 */
1759# if RT_INLINE_ASM_GNU_STYLE
1760# if defined(PIC) || defined(__PIC__)
1761 uint32_t u32EBX = 0;
1762 Assert(!((uintptr_t)pu64 & 7));
1763 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1764 "lock; cmpxchg8b (%5)\n\t"
1765 "movl %3, %%ebx\n\t"
1766 : "=A" (u64),
1767# if RT_GNUC_PREREQ(4, 3)
1768 "+m" (*pu64)
1769# else
1770 "=m" (*pu64)
1771# endif
1772 : "0" (0ULL),
1773 "m" (u32EBX),
1774 "c" (0),
1775 "S" (pu64));
1776# else /* !PIC */
1777 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1778 : "=A" (u64),
1779 "+m" (*pu64)
1780 : "0" (0ULL),
1781 "b" (0),
1782 "c" (0));
1783# endif
1784# else
1785 Assert(!((uintptr_t)pu64 & 7));
1786 __asm
1787 {
1788 xor eax, eax
1789 xor edx, edx
1790 mov edi, pu64
1791 xor ecx, ecx
1792 xor ebx, ebx
1793 lock cmpxchg8b [edi]
1794 mov dword ptr [u64], eax
1795 mov dword ptr [u64 + 4], edx
1796 }
1797# endif
1798# endif /* !RT_ARCH_AMD64 */
1799 return u64;
1800}
1801#endif
1802
1803
1804/**
1805 * Atomically reads an unsigned 64-bit value, unordered.
1806 *
1807 * @returns Current *pu64 value
1808 * @param pu64 Pointer to the 64-bit variable to read.
1809 * The memory pointed to must be writable.
1810 *
1811 * @remarks This may fault if the memory is read-only!
1812 * @remarks x86: Requires a Pentium or later.
1813 */
1814#if !defined(RT_ARCH_AMD64) \
1815 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1816 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1817DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64);
1818#else
1819DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64)
1820{
1821 uint64_t u64;
1822# ifdef RT_ARCH_AMD64
1823 Assert(!((uintptr_t)pu64 & 7));
1824/*# if RT_INLINE_ASM_GNU_STYLE
1825 Assert(!((uintptr_t)pu64 & 7));
1826 __asm__ __volatile__("movq %1, %0\n\t"
1827 : "=r" (u64)
1828 : "m" (*pu64));
1829# else
1830 __asm
1831 {
1832 mov rdx, [pu64]
1833 mov rax, [rdx]
1834 mov [u64], rax
1835 }
1836# endif */
1837 u64 = *pu64;
1838# else /* !RT_ARCH_AMD64 */
1839# if RT_INLINE_ASM_GNU_STYLE
1840# if defined(PIC) || defined(__PIC__)
1841 uint32_t u32EBX = 0;
1842 uint32_t u32Spill;
1843 Assert(!((uintptr_t)pu64 & 7));
1844 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1845 "xor %%ecx,%%ecx\n\t"
1846 "xor %%edx,%%edx\n\t"
1847 "xchgl %%ebx, %3\n\t"
1848 "lock; cmpxchg8b (%4)\n\t"
1849 "movl %3, %%ebx\n\t"
1850 : "=A" (u64),
1851# if RT_GNUC_PREREQ(4, 3)
1852 "+m" (*pu64),
1853# else
1854 "=m" (*pu64),
1855# endif
1856 "=c" (u32Spill)
1857 : "m" (u32EBX),
1858 "S" (pu64));
1859# else /* !PIC */
1860 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1861 : "=A" (u64),
1862 "+m" (*pu64)
1863 : "0" (0ULL),
1864 "b" (0),
1865 "c" (0));
1866# endif
1867# else
1868 Assert(!((uintptr_t)pu64 & 7));
1869 __asm
1870 {
1871 xor eax, eax
1872 xor edx, edx
1873 mov edi, pu64
1874 xor ecx, ecx
1875 xor ebx, ebx
1876 lock cmpxchg8b [edi]
1877 mov dword ptr [u64], eax
1878 mov dword ptr [u64 + 4], edx
1879 }
1880# endif
1881# endif /* !RT_ARCH_AMD64 */
1882 return u64;
1883}
1884#endif
1885
1886
1887/**
1888 * Atomically reads a signed 64-bit value, ordered.
1889 *
1890 * @returns Current *pi64 value
1891 * @param pi64 Pointer to the 64-bit variable to read.
1892 * The memory pointed to must be writable.
1893 *
1894 * @remarks This may fault if the memory is read-only!
1895 * @remarks x86: Requires a Pentium or later.
1896 */
1897DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64)
1898{
1899 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
1900}
1901
1902
1903/**
1904 * Atomically reads a signed 64-bit value, unordered.
1905 *
1906 * @returns Current *pi64 value
1907 * @param pi64 Pointer to the 64-bit variable to read.
1908 * The memory pointed to must be writable.
1909 *
1910 * @remarks This will fault if the memory is read-only!
1911 * @remarks x86: Requires a Pentium or later.
1912 */
1913DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64)
1914{
1915 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
1916}
1917
1918
1919/**
1920 * Atomically reads a size_t value, ordered.
1921 *
1922 * @returns Current *pcb value
1923 * @param pcb Pointer to the size_t variable to read.
1924 */
1925DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb)
1926{
1927#if ARCH_BITS == 64
1928 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
1929#elif ARCH_BITS == 32
1930 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
1931#elif ARCH_BITS == 16
1932 AssertCompileSize(size_t, 2);
1933 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
1934#else
1935# error "Unsupported ARCH_BITS value"
1936#endif
1937}
1938
1939
1940/**
1941 * Atomically reads a size_t value, unordered.
1942 *
1943 * @returns Current *pcb value
1944 * @param pcb Pointer to the size_t variable to read.
1945 */
1946DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb)
1947{
1948#if ARCH_BITS == 64 || ARCH_BITS == 16
1949 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
1950#elif ARCH_BITS == 32
1951 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
1952#elif ARCH_BITS == 16
1953 AssertCompileSize(size_t, 2);
1954 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
1955#else
1956# error "Unsupported ARCH_BITS value"
1957#endif
1958}
1959
1960
1961/**
1962 * Atomically reads a pointer value, ordered.
1963 *
1964 * @returns Current *pv value
1965 * @param ppv Pointer to the pointer variable to read.
1966 *
1967 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1968 * requires less typing (no casts).
1969 */
1970DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv)
1971{
1972#if ARCH_BITS == 32 || ARCH_BITS == 16
1973 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
1974#elif ARCH_BITS == 64
1975 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
1976#else
1977# error "ARCH_BITS is bogus"
1978#endif
1979}
1980
1981/**
1982 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1983 *
1984 * @returns Current *pv value
1985 * @param ppv Pointer to the pointer variable to read.
1986 * @param Type The type of *ppv, sans volatile.
1987 */
1988#ifdef __GNUC__
1989# define ASMAtomicReadPtrT(ppv, Type) \
1990 __extension__ \
1991 ({\
1992 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1993 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1994 pvTypeChecked; \
1995 })
1996#else
1997# define ASMAtomicReadPtrT(ppv, Type) \
1998 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
1999#endif
2000
2001
2002/**
2003 * Atomically reads a pointer value, unordered.
2004 *
2005 * @returns Current *pv value
2006 * @param ppv Pointer to the pointer variable to read.
2007 *
2008 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2009 * requires less typing (no casts).
2010 */
2011DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2012{
2013#if ARCH_BITS == 32 || ARCH_BITS == 16
2014 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2015#elif ARCH_BITS == 64
2016 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2017#else
2018# error "ARCH_BITS is bogus"
2019#endif
2020}
2021
2022
2023/**
2024 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2025 *
2026 * @returns Current *pv value
2027 * @param ppv Pointer to the pointer variable to read.
2028 * @param Type The type of *ppv, sans volatile.
2029 */
2030#ifdef __GNUC__
2031# define ASMAtomicUoReadPtrT(ppv, Type) \
2032 __extension__ \
2033 ({\
2034 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2035 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2036 pvTypeChecked; \
2037 })
2038#else
2039# define ASMAtomicUoReadPtrT(ppv, Type) \
2040 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2041#endif
2042
2043
2044/**
2045 * Atomically reads a boolean value, ordered.
2046 *
2047 * @returns Current *pf value
2048 * @param pf Pointer to the boolean variable to read.
2049 */
2050DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf)
2051{
2052 ASMMemoryFence();
2053 return *pf; /* byte reads are atomic on x86 */
2054}
2055
2056
2057/**
2058 * Atomically reads a boolean value, unordered.
2059 *
2060 * @returns Current *pf value
2061 * @param pf Pointer to the boolean variable to read.
2062 */
2063DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf)
2064{
2065 return *pf; /* byte reads are atomic on x86 */
2066}
2067
2068
2069/**
2070 * Atomically read a typical IPRT handle value, ordered.
2071 *
2072 * @param ph Pointer to the handle variable to read.
2073 * @param phRes Where to store the result.
2074 *
2075 * @remarks This doesn't currently work for all handles (like RTFILE).
2076 */
2077#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2078# define ASMAtomicReadHandle(ph, phRes) \
2079 do { \
2080 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2081 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2082 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2083 } while (0)
2084#elif HC_ARCH_BITS == 64
2085# define ASMAtomicReadHandle(ph, phRes) \
2086 do { \
2087 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2088 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2089 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2090 } while (0)
2091#else
2092# error HC_ARCH_BITS
2093#endif
2094
2095
2096/**
2097 * Atomically read a typical IPRT handle value, unordered.
2098 *
2099 * @param ph Pointer to the handle variable to read.
2100 * @param phRes Where to store the result.
2101 *
2102 * @remarks This doesn't currently work for all handles (like RTFILE).
2103 */
2104#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2105# define ASMAtomicUoReadHandle(ph, phRes) \
2106 do { \
2107 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2108 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2109 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2110 } while (0)
2111#elif HC_ARCH_BITS == 64
2112# define ASMAtomicUoReadHandle(ph, phRes) \
2113 do { \
2114 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2115 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2116 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2117 } while (0)
2118#else
2119# error HC_ARCH_BITS
2120#endif
2121
2122
2123/**
2124 * Atomically read a value which size might differ
2125 * between platforms or compilers, ordered.
2126 *
2127 * @param pu Pointer to the variable to read.
2128 * @param puRes Where to store the result.
2129 */
2130#define ASMAtomicReadSize(pu, puRes) \
2131 do { \
2132 switch (sizeof(*(pu))) { \
2133 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2134 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2135 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2136 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2137 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2138 } \
2139 } while (0)
2140
2141
2142/**
2143 * Atomically read a value which size might differ
2144 * between platforms or compilers, unordered.
2145 *
2146 * @param pu Pointer to the variable to read.
2147 * @param puRes Where to store the result.
2148 */
2149#define ASMAtomicUoReadSize(pu, puRes) \
2150 do { \
2151 switch (sizeof(*(pu))) { \
2152 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2153 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2154 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2155 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2156 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2157 } \
2158 } while (0)
2159
2160
2161/**
2162 * Atomically writes an unsigned 8-bit value, ordered.
2163 *
2164 * @param pu8 Pointer to the 8-bit variable.
2165 * @param u8 The 8-bit value to assign to *pu8.
2166 */
2167DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2168{
2169 ASMAtomicXchgU8(pu8, u8);
2170}
2171
2172
2173/**
2174 * Atomically writes an unsigned 8-bit value, unordered.
2175 *
2176 * @param pu8 Pointer to the 8-bit variable.
2177 * @param u8 The 8-bit value to assign to *pu8.
2178 */
2179DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2180{
2181 *pu8 = u8; /* byte writes are atomic on x86 */
2182}
2183
2184
2185/**
2186 * Atomically writes a signed 8-bit value, ordered.
2187 *
2188 * @param pi8 Pointer to the 8-bit variable to read.
2189 * @param i8 The 8-bit value to assign to *pi8.
2190 */
2191DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2192{
2193 ASMAtomicXchgS8(pi8, i8);
2194}
2195
2196
2197/**
2198 * Atomically writes a signed 8-bit value, unordered.
2199 *
2200 * @param pi8 Pointer to the 8-bit variable to write.
2201 * @param i8 The 8-bit value to assign to *pi8.
2202 */
2203DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2204{
2205 *pi8 = i8; /* byte writes are atomic on x86 */
2206}
2207
2208
2209/**
2210 * Atomically writes an unsigned 16-bit value, ordered.
2211 *
2212 * @param pu16 Pointer to the 16-bit variable to write.
2213 * @param u16 The 16-bit value to assign to *pu16.
2214 */
2215DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2216{
2217 ASMAtomicXchgU16(pu16, u16);
2218}
2219
2220
2221/**
2222 * Atomically writes an unsigned 16-bit value, unordered.
2223 *
2224 * @param pu16 Pointer to the 16-bit variable to write.
2225 * @param u16 The 16-bit value to assign to *pu16.
2226 */
2227DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2228{
2229 Assert(!((uintptr_t)pu16 & 1));
2230 *pu16 = u16;
2231}
2232
2233
2234/**
2235 * Atomically writes a signed 16-bit value, ordered.
2236 *
2237 * @param pi16 Pointer to the 16-bit variable to write.
2238 * @param i16 The 16-bit value to assign to *pi16.
2239 */
2240DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2241{
2242 ASMAtomicXchgS16(pi16, i16);
2243}
2244
2245
2246/**
2247 * Atomically writes a signed 16-bit value, unordered.
2248 *
2249 * @param pi16 Pointer to the 16-bit variable to write.
2250 * @param i16 The 16-bit value to assign to *pi16.
2251 */
2252DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2253{
2254 Assert(!((uintptr_t)pi16 & 1));
2255 *pi16 = i16;
2256}
2257
2258
2259/**
2260 * Atomically writes an unsigned 32-bit value, ordered.
2261 *
2262 * @param pu32 Pointer to the 32-bit variable to write.
2263 * @param u32 The 32-bit value to assign to *pu32.
2264 */
2265DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2266{
2267 ASMAtomicXchgU32(pu32, u32);
2268}
2269
2270
2271/**
2272 * Atomically writes an unsigned 32-bit value, unordered.
2273 *
2274 * @param pu32 Pointer to the 32-bit variable to write.
2275 * @param u32 The 32-bit value to assign to *pu32.
2276 */
2277DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2278{
2279 Assert(!((uintptr_t)pu32 & 3));
2280#if ARCH_BITS >= 32
2281 *pu32 = u32;
2282#else
2283 ASMAtomicXchgU32(pu32, u32);
2284#endif
2285}
2286
2287
2288/**
2289 * Atomically writes a signed 32-bit value, ordered.
2290 *
2291 * @param pi32 Pointer to the 32-bit variable to write.
2292 * @param i32 The 32-bit value to assign to *pi32.
2293 */
2294DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2295{
2296 ASMAtomicXchgS32(pi32, i32);
2297}
2298
2299
2300/**
2301 * Atomically writes a signed 32-bit value, unordered.
2302 *
2303 * @param pi32 Pointer to the 32-bit variable to write.
2304 * @param i32 The 32-bit value to assign to *pi32.
2305 */
2306DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2307{
2308 Assert(!((uintptr_t)pi32 & 3));
2309#if ARCH_BITS >= 32
2310 *pi32 = i32;
2311#else
2312 ASMAtomicXchgS32(pi32, i32);
2313#endif
2314}
2315
2316
2317/**
2318 * Atomically writes an unsigned 64-bit value, ordered.
2319 *
2320 * @param pu64 Pointer to the 64-bit variable to write.
2321 * @param u64 The 64-bit value to assign to *pu64.
2322 */
2323DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2324{
2325 ASMAtomicXchgU64(pu64, u64);
2326}
2327
2328
2329/**
2330 * Atomically writes an unsigned 64-bit value, unordered.
2331 *
2332 * @param pu64 Pointer to the 64-bit variable to write.
2333 * @param u64 The 64-bit value to assign to *pu64.
2334 */
2335DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2336{
2337 Assert(!((uintptr_t)pu64 & 7));
2338#if ARCH_BITS == 64
2339 *pu64 = u64;
2340#else
2341 ASMAtomicXchgU64(pu64, u64);
2342#endif
2343}
2344
2345
2346/**
2347 * Atomically writes a signed 64-bit value, ordered.
2348 *
2349 * @param pi64 Pointer to the 64-bit variable to write.
2350 * @param i64 The 64-bit value to assign to *pi64.
2351 */
2352DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2353{
2354 ASMAtomicXchgS64(pi64, i64);
2355}
2356
2357
2358/**
2359 * Atomically writes a signed 64-bit value, unordered.
2360 *
2361 * @param pi64 Pointer to the 64-bit variable to write.
2362 * @param i64 The 64-bit value to assign to *pi64.
2363 */
2364DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2365{
2366 Assert(!((uintptr_t)pi64 & 7));
2367#if ARCH_BITS == 64
2368 *pi64 = i64;
2369#else
2370 ASMAtomicXchgS64(pi64, i64);
2371#endif
2372}
2373
2374
2375/**
2376 * Atomically writes a boolean value, unordered.
2377 *
2378 * @param pf Pointer to the boolean variable to write.
2379 * @param f The boolean value to assign to *pf.
2380 */
2381DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f)
2382{
2383 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
2384}
2385
2386
2387/**
2388 * Atomically writes a boolean value, unordered.
2389 *
2390 * @param pf Pointer to the boolean variable to write.
2391 * @param f The boolean value to assign to *pf.
2392 */
2393DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f)
2394{
2395 *pf = f; /* byte writes are atomic on x86 */
2396}
2397
2398
2399/**
2400 * Atomically writes a pointer value, ordered.
2401 *
2402 * @param ppv Pointer to the pointer variable to write.
2403 * @param pv The pointer value to assign to *ppv.
2404 */
2405DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv)
2406{
2407#if ARCH_BITS == 32 || ARCH_BITS == 16
2408 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
2409#elif ARCH_BITS == 64
2410 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
2411#else
2412# error "ARCH_BITS is bogus"
2413#endif
2414}
2415
2416
2417/**
2418 * Atomically writes a pointer value, ordered.
2419 *
2420 * @param ppv Pointer to the pointer variable to write.
2421 * @param pv The pointer value to assign to *ppv. If NULL use
2422 * ASMAtomicWriteNullPtr or you'll land in trouble.
2423 *
2424 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2425 * NULL.
2426 */
2427#ifdef __GNUC__
2428# define ASMAtomicWritePtr(ppv, pv) \
2429 do \
2430 { \
2431 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
2432 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2433 \
2434 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2435 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2436 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2437 \
2438 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
2439 } while (0)
2440#else
2441# define ASMAtomicWritePtr(ppv, pv) \
2442 do \
2443 { \
2444 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2445 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2446 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2447 \
2448 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
2449 } while (0)
2450#endif
2451
2452
2453/**
2454 * Atomically sets a pointer to NULL, ordered.
2455 *
2456 * @param ppv Pointer to the pointer variable that should be set to NULL.
2457 *
2458 * @remarks This is relatively type safe on GCC platforms.
2459 */
2460#ifdef __GNUC__
2461# define ASMAtomicWriteNullPtr(ppv) \
2462 do \
2463 { \
2464 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2465 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2466 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2467 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
2468 } while (0)
2469#else
2470# define ASMAtomicWriteNullPtr(ppv) \
2471 do \
2472 { \
2473 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2474 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2475 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
2476 } while (0)
2477#endif
2478
2479
2480/**
2481 * Atomically writes a pointer value, unordered.
2482 *
2483 * @returns Current *pv value
2484 * @param ppv Pointer to the pointer variable.
2485 * @param pv The pointer value to assign to *ppv. If NULL use
2486 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2487 *
2488 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2489 * NULL.
2490 */
2491#ifdef __GNUC__
2492# define ASMAtomicUoWritePtr(ppv, pv) \
2493 do \
2494 { \
2495 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2496 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2497 \
2498 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2499 AssertCompile(sizeof(pv) == sizeof(void *)); \
2500 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2501 \
2502 *(ppvTypeChecked) = pvTypeChecked; \
2503 } while (0)
2504#else
2505# define ASMAtomicUoWritePtr(ppv, pv) \
2506 do \
2507 { \
2508 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2509 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2510 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2511 *(ppv) = pv; \
2512 } while (0)
2513#endif
2514
2515
2516/**
2517 * Atomically sets a pointer to NULL, unordered.
2518 *
2519 * @param ppv Pointer to the pointer variable that should be set to NULL.
2520 *
2521 * @remarks This is relatively type safe on GCC platforms.
2522 */
2523#ifdef __GNUC__
2524# define ASMAtomicUoWriteNullPtr(ppv) \
2525 do \
2526 { \
2527 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2528 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2529 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2530 *(ppvTypeChecked) = NULL; \
2531 } while (0)
2532#else
2533# define ASMAtomicUoWriteNullPtr(ppv) \
2534 do \
2535 { \
2536 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2537 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2538 *(ppv) = NULL; \
2539 } while (0)
2540#endif
2541
2542
2543/**
2544 * Atomically write a typical IPRT handle value, ordered.
2545 *
2546 * @param ph Pointer to the variable to update.
2547 * @param hNew The value to assign to *ph.
2548 *
2549 * @remarks This doesn't currently work for all handles (like RTFILE).
2550 */
2551#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2552# define ASMAtomicWriteHandle(ph, hNew) \
2553 do { \
2554 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2555 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
2556 } while (0)
2557#elif HC_ARCH_BITS == 64
2558# define ASMAtomicWriteHandle(ph, hNew) \
2559 do { \
2560 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2561 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
2562 } while (0)
2563#else
2564# error HC_ARCH_BITS
2565#endif
2566
2567
2568/**
2569 * Atomically write a typical IPRT handle value, unordered.
2570 *
2571 * @param ph Pointer to the variable to update.
2572 * @param hNew The value to assign to *ph.
2573 *
2574 * @remarks This doesn't currently work for all handles (like RTFILE).
2575 */
2576#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2577# define ASMAtomicUoWriteHandle(ph, hNew) \
2578 do { \
2579 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2580 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
2581 } while (0)
2582#elif HC_ARCH_BITS == 64
2583# define ASMAtomicUoWriteHandle(ph, hNew) \
2584 do { \
2585 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2586 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
2587 } while (0)
2588#else
2589# error HC_ARCH_BITS
2590#endif
2591
2592
2593/**
2594 * Atomically write a value which size might differ
2595 * between platforms or compilers, ordered.
2596 *
2597 * @param pu Pointer to the variable to update.
2598 * @param uNew The value to assign to *pu.
2599 */
2600#define ASMAtomicWriteSize(pu, uNew) \
2601 do { \
2602 switch (sizeof(*(pu))) { \
2603 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2604 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2605 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2606 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2607 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2608 } \
2609 } while (0)
2610
2611/**
2612 * Atomically write a value which size might differ
2613 * between platforms or compilers, unordered.
2614 *
2615 * @param pu Pointer to the variable to update.
2616 * @param uNew The value to assign to *pu.
2617 */
2618#define ASMAtomicUoWriteSize(pu, uNew) \
2619 do { \
2620 switch (sizeof(*(pu))) { \
2621 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2622 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2623 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2624 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2625 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2626 } \
2627 } while (0)
2628
2629
2630
2631/**
2632 * Atomically exchanges and adds to a 16-bit value, ordered.
2633 *
2634 * @returns The old value.
2635 * @param pu16 Pointer to the value.
2636 * @param u16 Number to add.
2637 *
2638 * @remarks Currently not implemented, just to make 16-bit code happy.
2639 * @remarks x86: Requires a 486 or later.
2640 */
2641DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16);
2642
2643
2644/**
2645 * Atomically exchanges and adds to a 32-bit value, ordered.
2646 *
2647 * @returns The old value.
2648 * @param pu32 Pointer to the value.
2649 * @param u32 Number to add.
2650 *
2651 * @remarks x86: Requires a 486 or later.
2652 */
2653#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2654DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
2655#else
2656DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2657{
2658# if RT_INLINE_ASM_USES_INTRIN
2659 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
2660 return u32;
2661
2662# elif RT_INLINE_ASM_GNU_STYLE
2663 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2664 : "=r" (u32),
2665 "=m" (*pu32)
2666 : "0" (u32),
2667 "m" (*pu32)
2668 : "memory");
2669 return u32;
2670# else
2671 __asm
2672 {
2673 mov eax, [u32]
2674# ifdef RT_ARCH_AMD64
2675 mov rdx, [pu32]
2676 lock xadd [rdx], eax
2677# else
2678 mov edx, [pu32]
2679 lock xadd [edx], eax
2680# endif
2681 mov [u32], eax
2682 }
2683 return u32;
2684# endif
2685}
2686#endif
2687
2688
2689/**
2690 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2691 *
2692 * @returns The old value.
2693 * @param pi32 Pointer to the value.
2694 * @param i32 Number to add.
2695 *
2696 * @remarks x86: Requires a 486 or later.
2697 */
2698DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2699{
2700 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
2701}
2702
2703
2704/**
2705 * Atomically exchanges and adds to a 64-bit value, ordered.
2706 *
2707 * @returns The old value.
2708 * @param pu64 Pointer to the value.
2709 * @param u64 Number to add.
2710 *
2711 * @remarks x86: Requires a Pentium or later.
2712 */
2713#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2714DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
2715#else
2716DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2717{
2718# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2719 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
2720 return u64;
2721
2722# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2723 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2724 : "=r" (u64),
2725 "=m" (*pu64)
2726 : "0" (u64),
2727 "m" (*pu64)
2728 : "memory");
2729 return u64;
2730# else
2731 uint64_t u64Old;
2732 for (;;)
2733 {
2734 uint64_t u64New;
2735 u64Old = ASMAtomicUoReadU64(pu64);
2736 u64New = u64Old + u64;
2737 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2738 break;
2739 ASMNopPause();
2740 }
2741 return u64Old;
2742# endif
2743}
2744#endif
2745
2746
2747/**
2748 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2749 *
2750 * @returns The old value.
2751 * @param pi64 Pointer to the value.
2752 * @param i64 Number to add.
2753 *
2754 * @remarks x86: Requires a Pentium or later.
2755 */
2756DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2757{
2758 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
2759}
2760
2761
2762/**
2763 * Atomically exchanges and adds to a size_t value, ordered.
2764 *
2765 * @returns The old value.
2766 * @param pcb Pointer to the size_t value.
2767 * @param cb Number to add.
2768 */
2769DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb)
2770{
2771#if ARCH_BITS == 64
2772 AssertCompileSize(size_t, 8);
2773 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
2774#elif ARCH_BITS == 32
2775 AssertCompileSize(size_t, 4);
2776 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
2777#elif ARCH_BITS == 16
2778 AssertCompileSize(size_t, 2);
2779 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
2780#else
2781# error "Unsupported ARCH_BITS value"
2782#endif
2783}
2784
2785
2786/**
2787 * Atomically exchanges and adds a value which size might differ between
2788 * platforms or compilers, ordered.
2789 *
2790 * @param pu Pointer to the variable to update.
2791 * @param uNew The value to add to *pu.
2792 * @param puOld Where to store the old value.
2793 */
2794#define ASMAtomicAddSize(pu, uNew, puOld) \
2795 do { \
2796 switch (sizeof(*(pu))) { \
2797 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2798 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2799 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2800 } \
2801 } while (0)
2802
2803
2804
2805/**
2806 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2807 *
2808 * @returns The old value.
2809 * @param pu16 Pointer to the value.
2810 * @param u16 Number to subtract.
2811 *
2812 * @remarks x86: Requires a 486 or later.
2813 */
2814DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16)
2815{
2816 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2817}
2818
2819
2820/**
2821 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2822 *
2823 * @returns The old value.
2824 * @param pi16 Pointer to the value.
2825 * @param i16 Number to subtract.
2826 *
2827 * @remarks x86: Requires a 486 or later.
2828 */
2829DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16)
2830{
2831 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
2832}
2833
2834
2835/**
2836 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2837 *
2838 * @returns The old value.
2839 * @param pu32 Pointer to the value.
2840 * @param u32 Number to subtract.
2841 *
2842 * @remarks x86: Requires a 486 or later.
2843 */
2844DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2845{
2846 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2847}
2848
2849
2850/**
2851 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2852 *
2853 * @returns The old value.
2854 * @param pi32 Pointer to the value.
2855 * @param i32 Number to subtract.
2856 *
2857 * @remarks x86: Requires a 486 or later.
2858 */
2859DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2860{
2861 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
2862}
2863
2864
2865/**
2866 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2867 *
2868 * @returns The old value.
2869 * @param pu64 Pointer to the value.
2870 * @param u64 Number to subtract.
2871 *
2872 * @remarks x86: Requires a Pentium or later.
2873 */
2874DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2875{
2876 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2877}
2878
2879
2880/**
2881 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2882 *
2883 * @returns The old value.
2884 * @param pi64 Pointer to the value.
2885 * @param i64 Number to subtract.
2886 *
2887 * @remarks x86: Requires a Pentium or later.
2888 */
2889DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2890{
2891 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
2892}
2893
2894
2895/**
2896 * Atomically exchanges and subtracts to a size_t value, ordered.
2897 *
2898 * @returns The old value.
2899 * @param pcb Pointer to the size_t value.
2900 * @param cb Number to subtract.
2901 *
2902 * @remarks x86: Requires a 486 or later.
2903 */
2904DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb)
2905{
2906#if ARCH_BITS == 64
2907 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
2908#elif ARCH_BITS == 32
2909 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
2910#elif ARCH_BITS == 16
2911 AssertCompileSize(size_t, 2);
2912 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
2913#else
2914# error "Unsupported ARCH_BITS value"
2915#endif
2916}
2917
2918
2919/**
2920 * Atomically exchanges and subtracts a value which size might differ between
2921 * platforms or compilers, ordered.
2922 *
2923 * @param pu Pointer to the variable to update.
2924 * @param uNew The value to subtract to *pu.
2925 * @param puOld Where to store the old value.
2926 *
2927 * @remarks x86: Requires a 486 or later.
2928 */
2929#define ASMAtomicSubSize(pu, uNew, puOld) \
2930 do { \
2931 switch (sizeof(*(pu))) { \
2932 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2933 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2934 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2935 } \
2936 } while (0)
2937
2938
2939
2940/**
2941 * Atomically increment a 16-bit value, ordered.
2942 *
2943 * @returns The new value.
2944 * @param pu16 Pointer to the value to increment.
2945 * @remarks Not implemented. Just to make 16-bit code happy.
2946 *
2947 * @remarks x86: Requires a 486 or later.
2948 */
2949DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16);
2950
2951
2952/**
2953 * Atomically increment a 32-bit value, ordered.
2954 *
2955 * @returns The new value.
2956 * @param pu32 Pointer to the value to increment.
2957 *
2958 * @remarks x86: Requires a 486 or later.
2959 */
2960#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2961DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32);
2962#else
2963DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32)
2964{
2965 uint32_t u32;
2966# if RT_INLINE_ASM_USES_INTRIN
2967 u32 = _InterlockedIncrement((long RT_FAR *)pu32);
2968 return u32;
2969
2970# elif RT_INLINE_ASM_GNU_STYLE
2971 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2972 : "=r" (u32),
2973 "=m" (*pu32)
2974 : "0" (1),
2975 "m" (*pu32)
2976 : "memory");
2977 return u32+1;
2978# else
2979 __asm
2980 {
2981 mov eax, 1
2982# ifdef RT_ARCH_AMD64
2983 mov rdx, [pu32]
2984 lock xadd [rdx], eax
2985# else
2986 mov edx, [pu32]
2987 lock xadd [edx], eax
2988# endif
2989 mov u32, eax
2990 }
2991 return u32+1;
2992# endif
2993}
2994#endif
2995
2996
2997/**
2998 * Atomically increment a signed 32-bit value, ordered.
2999 *
3000 * @returns The new value.
3001 * @param pi32 Pointer to the value to increment.
3002 *
3003 * @remarks x86: Requires a 486 or later.
3004 */
3005DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32)
3006{
3007 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3008}
3009
3010
3011/**
3012 * Atomically increment a 64-bit value, ordered.
3013 *
3014 * @returns The new value.
3015 * @param pu64 Pointer to the value to increment.
3016 *
3017 * @remarks x86: Requires a Pentium or later.
3018 */
3019#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3020DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64);
3021#else
3022DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64)
3023{
3024# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3025 uint64_t u64;
3026 u64 = _InterlockedIncrement64((__int64 RT_FAR *)pu64);
3027 return u64;
3028
3029# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3030 uint64_t u64;
3031 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3032 : "=r" (u64),
3033 "=m" (*pu64)
3034 : "0" (1),
3035 "m" (*pu64)
3036 : "memory");
3037 return u64 + 1;
3038# else
3039 return ASMAtomicAddU64(pu64, 1) + 1;
3040# endif
3041}
3042#endif
3043
3044
3045/**
3046 * Atomically increment a signed 64-bit value, ordered.
3047 *
3048 * @returns The new value.
3049 * @param pi64 Pointer to the value to increment.
3050 *
3051 * @remarks x86: Requires a Pentium or later.
3052 */
3053DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64)
3054{
3055 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3056}
3057
3058
3059/**
3060 * Atomically increment a size_t value, ordered.
3061 *
3062 * @returns The new value.
3063 * @param pcb Pointer to the value to increment.
3064 *
3065 * @remarks x86: Requires a 486 or later.
3066 */
3067DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb)
3068{
3069#if ARCH_BITS == 64
3070 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3071#elif ARCH_BITS == 32
3072 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3073#elif ARCH_BITS == 16
3074 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3075#else
3076# error "Unsupported ARCH_BITS value"
3077#endif
3078}
3079
3080
3081
3082/**
3083 * Atomically decrement an unsigned 32-bit value, ordered.
3084 *
3085 * @returns The new value.
3086 * @param pu16 Pointer to the value to decrement.
3087 * @remarks Not implemented. Just to make 16-bit code happy.
3088 *
3089 * @remarks x86: Requires a 486 or later.
3090 */
3091DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16);
3092
3093
3094/**
3095 * Atomically decrement an unsigned 32-bit value, ordered.
3096 *
3097 * @returns The new value.
3098 * @param pu32 Pointer to the value to decrement.
3099 *
3100 * @remarks x86: Requires a 486 or later.
3101 */
3102#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3103DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32);
3104#else
3105DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32)
3106{
3107 uint32_t u32;
3108# if RT_INLINE_ASM_USES_INTRIN
3109 u32 = _InterlockedDecrement((long RT_FAR *)pu32);
3110 return u32;
3111
3112# elif RT_INLINE_ASM_GNU_STYLE
3113 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3114 : "=r" (u32),
3115 "=m" (*pu32)
3116 : "0" (-1),
3117 "m" (*pu32)
3118 : "memory");
3119 return u32-1;
3120# else
3121 __asm
3122 {
3123 mov eax, -1
3124# ifdef RT_ARCH_AMD64
3125 mov rdx, [pu32]
3126 lock xadd [rdx], eax
3127# else
3128 mov edx, [pu32]
3129 lock xadd [edx], eax
3130# endif
3131 mov u32, eax
3132 }
3133 return u32-1;
3134# endif
3135}
3136#endif
3137
3138
3139/**
3140 * Atomically decrement a signed 32-bit value, ordered.
3141 *
3142 * @returns The new value.
3143 * @param pi32 Pointer to the value to decrement.
3144 *
3145 * @remarks x86: Requires a 486 or later.
3146 */
3147DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32)
3148{
3149 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
3150}
3151
3152
3153/**
3154 * Atomically decrement an unsigned 64-bit value, ordered.
3155 *
3156 * @returns The new value.
3157 * @param pu64 Pointer to the value to decrement.
3158 *
3159 * @remarks x86: Requires a Pentium or later.
3160 */
3161#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3162DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64);
3163#else
3164DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64)
3165{
3166# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3167 uint64_t u64 = _InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
3168 return u64;
3169
3170# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3171 uint64_t u64;
3172 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3173 : "=r" (u64),
3174 "=m" (*pu64)
3175 : "0" (~(uint64_t)0),
3176 "m" (*pu64)
3177 : "memory");
3178 return u64-1;
3179# else
3180 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3181# endif
3182}
3183#endif
3184
3185
3186/**
3187 * Atomically decrement a signed 64-bit value, ordered.
3188 *
3189 * @returns The new value.
3190 * @param pi64 Pointer to the value to decrement.
3191 *
3192 * @remarks x86: Requires a Pentium or later.
3193 */
3194DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64)
3195{
3196 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
3197}
3198
3199
3200/**
3201 * Atomically decrement a size_t value, ordered.
3202 *
3203 * @returns The new value.
3204 * @param pcb Pointer to the value to decrement.
3205 *
3206 * @remarks x86: Requires a 486 or later.
3207 */
3208DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb)
3209{
3210#if ARCH_BITS == 64
3211 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
3212#elif ARCH_BITS == 32
3213 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
3214#elif ARCH_BITS == 16
3215 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
3216#else
3217# error "Unsupported ARCH_BITS value"
3218#endif
3219}
3220
3221
3222/**
3223 * Atomically Or an unsigned 32-bit value, ordered.
3224 *
3225 * @param pu32 Pointer to the pointer variable to OR u32 with.
3226 * @param u32 The value to OR *pu32 with.
3227 *
3228 * @remarks x86: Requires a 386 or later.
3229 */
3230#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3231DECLASM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3232#else
3233DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3234{
3235# if RT_INLINE_ASM_USES_INTRIN
3236 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
3237
3238# elif RT_INLINE_ASM_GNU_STYLE
3239 __asm__ __volatile__("lock; orl %1, %0\n\t"
3240 : "=m" (*pu32)
3241 : "ir" (u32),
3242 "m" (*pu32));
3243# else
3244 __asm
3245 {
3246 mov eax, [u32]
3247# ifdef RT_ARCH_AMD64
3248 mov rdx, [pu32]
3249 lock or [rdx], eax
3250# else
3251 mov edx, [pu32]
3252 lock or [edx], eax
3253# endif
3254 }
3255# endif
3256}
3257#endif
3258
3259
3260/**
3261 * Atomically Or a signed 32-bit value, ordered.
3262 *
3263 * @param pi32 Pointer to the pointer variable to OR u32 with.
3264 * @param i32 The value to OR *pu32 with.
3265 *
3266 * @remarks x86: Requires a 386 or later.
3267 */
3268DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3269{
3270 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3271}
3272
3273
3274/**
3275 * Atomically Or an unsigned 64-bit value, ordered.
3276 *
3277 * @param pu64 Pointer to the pointer variable to OR u64 with.
3278 * @param u64 The value to OR *pu64 with.
3279 *
3280 * @remarks x86: Requires a Pentium or later.
3281 */
3282#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3283DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3284#else
3285DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3286{
3287# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3288 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
3289
3290# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3291 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3292 : "=m" (*pu64)
3293 : "r" (u64),
3294 "m" (*pu64));
3295# else
3296 for (;;)
3297 {
3298 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3299 uint64_t u64New = u64Old | u64;
3300 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3301 break;
3302 ASMNopPause();
3303 }
3304# endif
3305}
3306#endif
3307
3308
3309/**
3310 * Atomically Or a signed 64-bit value, ordered.
3311 *
3312 * @param pi64 Pointer to the pointer variable to OR u64 with.
3313 * @param i64 The value to OR *pu64 with.
3314 *
3315 * @remarks x86: Requires a Pentium or later.
3316 */
3317DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3318{
3319 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3320}
3321
3322
3323/**
3324 * Atomically And an unsigned 32-bit value, ordered.
3325 *
3326 * @param pu32 Pointer to the pointer variable to AND u32 with.
3327 * @param u32 The value to AND *pu32 with.
3328 *
3329 * @remarks x86: Requires a 386 or later.
3330 */
3331#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3332DECLASM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3333#else
3334DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3335{
3336# if RT_INLINE_ASM_USES_INTRIN
3337 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
3338
3339# elif RT_INLINE_ASM_GNU_STYLE
3340 __asm__ __volatile__("lock; andl %1, %0\n\t"
3341 : "=m" (*pu32)
3342 : "ir" (u32),
3343 "m" (*pu32));
3344# else
3345 __asm
3346 {
3347 mov eax, [u32]
3348# ifdef RT_ARCH_AMD64
3349 mov rdx, [pu32]
3350 lock and [rdx], eax
3351# else
3352 mov edx, [pu32]
3353 lock and [edx], eax
3354# endif
3355 }
3356# endif
3357}
3358#endif
3359
3360
3361/**
3362 * Atomically And a signed 32-bit value, ordered.
3363 *
3364 * @param pi32 Pointer to the pointer variable to AND i32 with.
3365 * @param i32 The value to AND *pi32 with.
3366 *
3367 * @remarks x86: Requires a 386 or later.
3368 */
3369DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3370{
3371 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3372}
3373
3374
3375/**
3376 * Atomically And an unsigned 64-bit value, ordered.
3377 *
3378 * @param pu64 Pointer to the pointer variable to AND u64 with.
3379 * @param u64 The value to AND *pu64 with.
3380 *
3381 * @remarks x86: Requires a Pentium or later.
3382 */
3383#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3384DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3385#else
3386DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3387{
3388# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3389 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
3390
3391# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3392 __asm__ __volatile__("lock; andq %1, %0\n\t"
3393 : "=m" (*pu64)
3394 : "r" (u64),
3395 "m" (*pu64));
3396# else
3397 for (;;)
3398 {
3399 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3400 uint64_t u64New = u64Old & u64;
3401 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3402 break;
3403 ASMNopPause();
3404 }
3405# endif
3406}
3407#endif
3408
3409
3410/**
3411 * Atomically And a signed 64-bit value, ordered.
3412 *
3413 * @param pi64 Pointer to the pointer variable to AND i64 with.
3414 * @param i64 The value to AND *pi64 with.
3415 *
3416 * @remarks x86: Requires a Pentium or later.
3417 */
3418DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3419{
3420 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3421}
3422
3423
3424/**
3425 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3426 *
3427 * @param pu32 Pointer to the pointer variable to OR u32 with.
3428 * @param u32 The value to OR *pu32 with.
3429 *
3430 * @remarks x86: Requires a 386 or later.
3431 */
3432#if RT_INLINE_ASM_EXTERNAL
3433DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3434#else
3435DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3436{
3437# if RT_INLINE_ASM_GNU_STYLE
3438 __asm__ __volatile__("orl %1, %0\n\t"
3439 : "=m" (*pu32)
3440 : "ir" (u32),
3441 "m" (*pu32));
3442# else
3443 __asm
3444 {
3445 mov eax, [u32]
3446# ifdef RT_ARCH_AMD64
3447 mov rdx, [pu32]
3448 or [rdx], eax
3449# else
3450 mov edx, [pu32]
3451 or [edx], eax
3452# endif
3453 }
3454# endif
3455}
3456#endif
3457
3458
3459/**
3460 * Atomically OR a signed 32-bit value, unordered.
3461 *
3462 * @param pi32 Pointer to the pointer variable to OR u32 with.
3463 * @param i32 The value to OR *pu32 with.
3464 *
3465 * @remarks x86: Requires a 386 or later.
3466 */
3467DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3468{
3469 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3470}
3471
3472
3473/**
3474 * Atomically OR an unsigned 64-bit value, unordered.
3475 *
3476 * @param pu64 Pointer to the pointer variable to OR u64 with.
3477 * @param u64 The value to OR *pu64 with.
3478 *
3479 * @remarks x86: Requires a Pentium or later.
3480 */
3481#if RT_INLINE_ASM_EXTERNAL
3482DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3483#else
3484DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3485{
3486# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3487 __asm__ __volatile__("orq %1, %q0\n\t"
3488 : "=m" (*pu64)
3489 : "r" (u64),
3490 "m" (*pu64));
3491# else
3492 for (;;)
3493 {
3494 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3495 uint64_t u64New = u64Old | u64;
3496 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3497 break;
3498 ASMNopPause();
3499 }
3500# endif
3501}
3502#endif
3503
3504
3505/**
3506 * Atomically Or a signed 64-bit value, unordered.
3507 *
3508 * @param pi64 Pointer to the pointer variable to OR u64 with.
3509 * @param i64 The value to OR *pu64 with.
3510 *
3511 * @remarks x86: Requires a Pentium or later.
3512 */
3513DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3514{
3515 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3516}
3517
3518
3519/**
3520 * Atomically And an unsigned 32-bit value, unordered.
3521 *
3522 * @param pu32 Pointer to the pointer variable to AND u32 with.
3523 * @param u32 The value to AND *pu32 with.
3524 *
3525 * @remarks x86: Requires a 386 or later.
3526 */
3527#if RT_INLINE_ASM_EXTERNAL
3528DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3529#else
3530DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3531{
3532# if RT_INLINE_ASM_GNU_STYLE
3533 __asm__ __volatile__("andl %1, %0\n\t"
3534 : "=m" (*pu32)
3535 : "ir" (u32),
3536 "m" (*pu32));
3537# else
3538 __asm
3539 {
3540 mov eax, [u32]
3541# ifdef RT_ARCH_AMD64
3542 mov rdx, [pu32]
3543 and [rdx], eax
3544# else
3545 mov edx, [pu32]
3546 and [edx], eax
3547# endif
3548 }
3549# endif
3550}
3551#endif
3552
3553
3554/**
3555 * Atomically And a signed 32-bit value, unordered.
3556 *
3557 * @param pi32 Pointer to the pointer variable to AND i32 with.
3558 * @param i32 The value to AND *pi32 with.
3559 *
3560 * @remarks x86: Requires a 386 or later.
3561 */
3562DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3563{
3564 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3565}
3566
3567
3568/**
3569 * Atomically And an unsigned 64-bit value, unordered.
3570 *
3571 * @param pu64 Pointer to the pointer variable to AND u64 with.
3572 * @param u64 The value to AND *pu64 with.
3573 *
3574 * @remarks x86: Requires a Pentium or later.
3575 */
3576#if RT_INLINE_ASM_EXTERNAL
3577DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3578#else
3579DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3580{
3581# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3582 __asm__ __volatile__("andq %1, %0\n\t"
3583 : "=m" (*pu64)
3584 : "r" (u64),
3585 "m" (*pu64));
3586# else
3587 for (;;)
3588 {
3589 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3590 uint64_t u64New = u64Old & u64;
3591 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3592 break;
3593 ASMNopPause();
3594 }
3595# endif
3596}
3597#endif
3598
3599
3600/**
3601 * Atomically And a signed 64-bit value, unordered.
3602 *
3603 * @param pi64 Pointer to the pointer variable to AND i64 with.
3604 * @param i64 The value to AND *pi64 with.
3605 *
3606 * @remarks x86: Requires a Pentium or later.
3607 */
3608DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3609{
3610 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3611}
3612
3613
3614/**
3615 * Atomically increment an unsigned 32-bit value, unordered.
3616 *
3617 * @returns the new value.
3618 * @param pu32 Pointer to the variable to increment.
3619 *
3620 * @remarks x86: Requires a 486 or later.
3621 */
3622#if RT_INLINE_ASM_EXTERNAL
3623DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32);
3624#else
3625DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32)
3626{
3627 uint32_t u32;
3628# if RT_INLINE_ASM_GNU_STYLE
3629 __asm__ __volatile__("xaddl %0, %1\n\t"
3630 : "=r" (u32),
3631 "=m" (*pu32)
3632 : "0" (1),
3633 "m" (*pu32)
3634 : "memory");
3635 return u32 + 1;
3636# else
3637 __asm
3638 {
3639 mov eax, 1
3640# ifdef RT_ARCH_AMD64
3641 mov rdx, [pu32]
3642 xadd [rdx], eax
3643# else
3644 mov edx, [pu32]
3645 xadd [edx], eax
3646# endif
3647 mov u32, eax
3648 }
3649 return u32 + 1;
3650# endif
3651}
3652#endif
3653
3654
3655/**
3656 * Atomically decrement an unsigned 32-bit value, unordered.
3657 *
3658 * @returns the new value.
3659 * @param pu32 Pointer to the variable to decrement.
3660 *
3661 * @remarks x86: Requires a 486 or later.
3662 */
3663#if RT_INLINE_ASM_EXTERNAL
3664DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32);
3665#else
3666DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32)
3667{
3668 uint32_t u32;
3669# if RT_INLINE_ASM_GNU_STYLE
3670 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3671 : "=r" (u32),
3672 "=m" (*pu32)
3673 : "0" (-1),
3674 "m" (*pu32)
3675 : "memory");
3676 return u32 - 1;
3677# else
3678 __asm
3679 {
3680 mov eax, -1
3681# ifdef RT_ARCH_AMD64
3682 mov rdx, [pu32]
3683 xadd [rdx], eax
3684# else
3685 mov edx, [pu32]
3686 xadd [edx], eax
3687# endif
3688 mov u32, eax
3689 }
3690 return u32 - 1;
3691# endif
3692}
3693#endif
3694
3695
3696/** @def RT_ASM_PAGE_SIZE
3697 * We try avoid dragging in iprt/param.h here.
3698 * @internal
3699 */
3700#if defined(RT_ARCH_SPARC64)
3701# define RT_ASM_PAGE_SIZE 0x2000
3702# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3703# if PAGE_SIZE != 0x2000
3704# error "PAGE_SIZE is not 0x2000!"
3705# endif
3706# endif
3707#else
3708# define RT_ASM_PAGE_SIZE 0x1000
3709# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3710# if PAGE_SIZE != 0x1000
3711# error "PAGE_SIZE is not 0x1000!"
3712# endif
3713# endif
3714#endif
3715
3716/**
3717 * Zeros a 4K memory page.
3718 *
3719 * @param pv Pointer to the memory block. This must be page aligned.
3720 */
3721#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3722DECLASM(void) ASMMemZeroPage(volatile void RT_FAR *pv);
3723# else
3724DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv)
3725{
3726# if RT_INLINE_ASM_USES_INTRIN
3727# ifdef RT_ARCH_AMD64
3728 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3729# else
3730 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3731# endif
3732
3733# elif RT_INLINE_ASM_GNU_STYLE
3734 RTCCUINTREG uDummy;
3735# ifdef RT_ARCH_AMD64
3736 __asm__ __volatile__("rep stosq"
3737 : "=D" (pv),
3738 "=c" (uDummy)
3739 : "0" (pv),
3740 "c" (RT_ASM_PAGE_SIZE >> 3),
3741 "a" (0)
3742 : "memory");
3743# else
3744 __asm__ __volatile__("rep stosl"
3745 : "=D" (pv),
3746 "=c" (uDummy)
3747 : "0" (pv),
3748 "c" (RT_ASM_PAGE_SIZE >> 2),
3749 "a" (0)
3750 : "memory");
3751# endif
3752# else
3753 __asm
3754 {
3755# ifdef RT_ARCH_AMD64
3756 xor rax, rax
3757 mov ecx, 0200h
3758 mov rdi, [pv]
3759 rep stosq
3760# else
3761 xor eax, eax
3762 mov ecx, 0400h
3763 mov edi, [pv]
3764 rep stosd
3765# endif
3766 }
3767# endif
3768}
3769# endif
3770
3771
3772/**
3773 * Zeros a memory block with a 32-bit aligned size.
3774 *
3775 * @param pv Pointer to the memory block.
3776 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3777 */
3778#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3779DECLASM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb);
3780#else
3781DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb)
3782{
3783# if RT_INLINE_ASM_USES_INTRIN
3784# ifdef RT_ARCH_AMD64
3785 if (!(cb & 7))
3786 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
3787 else
3788# endif
3789 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
3790
3791# elif RT_INLINE_ASM_GNU_STYLE
3792 __asm__ __volatile__("rep stosl"
3793 : "=D" (pv),
3794 "=c" (cb)
3795 : "0" (pv),
3796 "1" (cb >> 2),
3797 "a" (0)
3798 : "memory");
3799# else
3800 __asm
3801 {
3802 xor eax, eax
3803# ifdef RT_ARCH_AMD64
3804 mov rcx, [cb]
3805 shr rcx, 2
3806 mov rdi, [pv]
3807# else
3808 mov ecx, [cb]
3809 shr ecx, 2
3810 mov edi, [pv]
3811# endif
3812 rep stosd
3813 }
3814# endif
3815}
3816#endif
3817
3818
3819/**
3820 * Fills a memory block with a 32-bit aligned size.
3821 *
3822 * @param pv Pointer to the memory block.
3823 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3824 * @param u32 The value to fill with.
3825 */
3826#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3827DECLASM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32);
3828#else
3829DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32)
3830{
3831# if RT_INLINE_ASM_USES_INTRIN
3832# ifdef RT_ARCH_AMD64
3833 if (!(cb & 7))
3834 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3835 else
3836# endif
3837 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
3838
3839# elif RT_INLINE_ASM_GNU_STYLE
3840 __asm__ __volatile__("rep stosl"
3841 : "=D" (pv),
3842 "=c" (cb)
3843 : "0" (pv),
3844 "1" (cb >> 2),
3845 "a" (u32)
3846 : "memory");
3847# else
3848 __asm
3849 {
3850# ifdef RT_ARCH_AMD64
3851 mov rcx, [cb]
3852 shr rcx, 2
3853 mov rdi, [pv]
3854# else
3855 mov ecx, [cb]
3856 shr ecx, 2
3857 mov edi, [pv]
3858# endif
3859 mov eax, [u32]
3860 rep stosd
3861 }
3862# endif
3863}
3864#endif
3865
3866
3867/**
3868 * Checks if a memory block is all zeros.
3869 *
3870 * @returns Pointer to the first non-zero byte.
3871 * @returns NULL if all zero.
3872 *
3873 * @param pv Pointer to the memory block.
3874 * @param cb Number of bytes in the block.
3875 *
3876 * @todo Fix name, it is a predicate function but it's not returning boolean!
3877 */
3878#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
3879DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb);
3880#else
3881DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb)
3882{
3883 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
3884 for (; cb; cb--, pb++)
3885 if (RT_LIKELY(*pb == 0))
3886 { /* likely */ }
3887 else
3888 return (void RT_FAR *)pb;
3889 return NULL;
3890}
3891#endif
3892
3893
3894/**
3895 * Checks if a memory block is all zeros.
3896 *
3897 * @returns true if zero, false if not.
3898 *
3899 * @param pv Pointer to the memory block.
3900 * @param cb Number of bytes in the block.
3901 *
3902 * @sa ASMMemFirstNonZero
3903 */
3904DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb)
3905{
3906 return ASMMemFirstNonZero(pv, cb) == NULL;
3907}
3908
3909
3910/**
3911 * Checks if a memory page is all zeros.
3912 *
3913 * @returns true / false.
3914 *
3915 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3916 * boundary
3917 */
3918DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage)
3919{
3920# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3921 union { RTCCUINTREG r; bool f; } uAX;
3922 RTCCUINTREG xCX, xDI;
3923 Assert(!((uintptr_t)pvPage & 15));
3924 __asm__ __volatile__("repe; "
3925# ifdef RT_ARCH_AMD64
3926 "scasq\n\t"
3927# else
3928 "scasl\n\t"
3929# endif
3930 "setnc %%al\n\t"
3931 : "=&c" (xCX),
3932 "=&D" (xDI),
3933 "=&a" (uAX.r)
3934 : "mr" (pvPage),
3935# ifdef RT_ARCH_AMD64
3936 "0" (RT_ASM_PAGE_SIZE/8),
3937# else
3938 "0" (RT_ASM_PAGE_SIZE/4),
3939# endif
3940 "1" (pvPage),
3941 "2" (0));
3942 return uAX.f;
3943# else
3944 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
3945 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3946 Assert(!((uintptr_t)pvPage & 15));
3947 for (;;)
3948 {
3949 if (puPtr[0]) return false;
3950 if (puPtr[4]) return false;
3951
3952 if (puPtr[2]) return false;
3953 if (puPtr[6]) return false;
3954
3955 if (puPtr[1]) return false;
3956 if (puPtr[5]) return false;
3957
3958 if (puPtr[3]) return false;
3959 if (puPtr[7]) return false;
3960
3961 if (!--cLeft)
3962 return true;
3963 puPtr += 8;
3964 }
3965# endif
3966}
3967
3968
3969/**
3970 * Checks if a memory block is filled with the specified byte, returning the
3971 * first mismatch.
3972 *
3973 * This is sort of an inverted memchr.
3974 *
3975 * @returns Pointer to the byte which doesn't equal u8.
3976 * @returns NULL if all equal to u8.
3977 *
3978 * @param pv Pointer to the memory block.
3979 * @param cb Number of bytes in the block.
3980 * @param u8 The value it's supposed to be filled with.
3981 *
3982 * @remarks No alignment requirements.
3983 */
3984#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3985 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
3986DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8);
3987#else
3988DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
3989{
3990 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
3991 for (; cb; cb--, pb++)
3992 if (RT_LIKELY(*pb == u8))
3993 { /* likely */ }
3994 else
3995 return (void *)pb;
3996 return NULL;
3997}
3998#endif
3999
4000
4001/**
4002 * Checks if a memory block is filled with the specified byte.
4003 *
4004 * @returns true if all matching, false if not.
4005 *
4006 * @param pv Pointer to the memory block.
4007 * @param cb Number of bytes in the block.
4008 * @param u8 The value it's supposed to be filled with.
4009 *
4010 * @remarks No alignment requirements.
4011 */
4012DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4013{
4014 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4015}
4016
4017
4018/**
4019 * Checks if a memory block is filled with the specified 32-bit value.
4020 *
4021 * This is a sort of inverted memchr.
4022 *
4023 * @returns Pointer to the first value which doesn't equal u32.
4024 * @returns NULL if all equal to u32.
4025 *
4026 * @param pv Pointer to the memory block.
4027 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4028 * @param u32 The value it's supposed to be filled with.
4029 */
4030DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32)
4031{
4032/** @todo rewrite this in inline assembly? */
4033 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
4034 for (; cb; cb -= 4, pu32++)
4035 if (RT_LIKELY(*pu32 == u32))
4036 { /* likely */ }
4037 else
4038 return (uint32_t RT_FAR *)pu32;
4039 return NULL;
4040}
4041
4042
4043/**
4044 * Probes a byte pointer for read access.
4045 *
4046 * While the function will not fault if the byte is not read accessible,
4047 * the idea is to do this in a safe place like before acquiring locks
4048 * and such like.
4049 *
4050 * Also, this functions guarantees that an eager compiler is not going
4051 * to optimize the probing away.
4052 *
4053 * @param pvByte Pointer to the byte.
4054 */
4055#if RT_INLINE_ASM_EXTERNAL
4056DECLASM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte);
4057#else
4058DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte)
4059{
4060 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4061 uint8_t u8;
4062# if RT_INLINE_ASM_GNU_STYLE
4063 __asm__ __volatile__("movb (%1), %0\n\t"
4064 : "=r" (u8)
4065 : "r" (pvByte));
4066# else
4067 __asm
4068 {
4069# ifdef RT_ARCH_AMD64
4070 mov rax, [pvByte]
4071 mov al, [rax]
4072# else
4073 mov eax, [pvByte]
4074 mov al, [eax]
4075# endif
4076 mov [u8], al
4077 }
4078# endif
4079 return u8;
4080}
4081#endif
4082
4083/**
4084 * Probes a buffer for read access page by page.
4085 *
4086 * While the function will fault if the buffer is not fully read
4087 * accessible, the idea is to do this in a safe place like before
4088 * acquiring locks and such like.
4089 *
4090 * Also, this functions guarantees that an eager compiler is not going
4091 * to optimize the probing away.
4092 *
4093 * @param pvBuf Pointer to the buffer.
4094 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4095 */
4096DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf)
4097{
4098 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4099 /* the first byte */
4100 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
4101 ASMProbeReadByte(pu8);
4102
4103 /* the pages in between pages. */
4104 while (cbBuf > RT_ASM_PAGE_SIZE)
4105 {
4106 ASMProbeReadByte(pu8);
4107 cbBuf -= RT_ASM_PAGE_SIZE;
4108 pu8 += RT_ASM_PAGE_SIZE;
4109 }
4110
4111 /* the last byte */
4112 ASMProbeReadByte(pu8 + cbBuf - 1);
4113}
4114
4115
4116
4117/** @defgroup grp_inline_bits Bit Operations
4118 * @{
4119 */
4120
4121
4122/**
4123 * Sets a bit in a bitmap.
4124 *
4125 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4126 * @param iBit The bit to set.
4127 *
4128 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4129 * However, doing so will yield better performance as well as avoiding
4130 * traps accessing the last bits in the bitmap.
4131 */
4132#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4133DECLASM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4134#else
4135DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4136{
4137# if RT_INLINE_ASM_USES_INTRIN
4138 _bittestandset((long RT_FAR *)pvBitmap, iBit);
4139
4140# elif RT_INLINE_ASM_GNU_STYLE
4141 __asm__ __volatile__("btsl %1, %0"
4142 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4143 : "Ir" (iBit),
4144 "m" (*(volatile long RT_FAR *)pvBitmap)
4145 : "memory");
4146# else
4147 __asm
4148 {
4149# ifdef RT_ARCH_AMD64
4150 mov rax, [pvBitmap]
4151 mov edx, [iBit]
4152 bts [rax], edx
4153# else
4154 mov eax, [pvBitmap]
4155 mov edx, [iBit]
4156 bts [eax], edx
4157# endif
4158 }
4159# endif
4160}
4161#endif
4162
4163
4164/**
4165 * Atomically sets a bit in a bitmap, ordered.
4166 *
4167 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4168 * the memory access isn't atomic!
4169 * @param iBit The bit to set.
4170 *
4171 * @remarks x86: Requires a 386 or later.
4172 */
4173#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4174DECLASM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4175#else
4176DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4177{
4178 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4179# if RT_INLINE_ASM_USES_INTRIN
4180 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4181# elif RT_INLINE_ASM_GNU_STYLE
4182 __asm__ __volatile__("lock; btsl %1, %0"
4183 : "=m" (*(volatile long *)pvBitmap)
4184 : "Ir" (iBit),
4185 "m" (*(volatile long *)pvBitmap)
4186 : "memory");
4187# else
4188 __asm
4189 {
4190# ifdef RT_ARCH_AMD64
4191 mov rax, [pvBitmap]
4192 mov edx, [iBit]
4193 lock bts [rax], edx
4194# else
4195 mov eax, [pvBitmap]
4196 mov edx, [iBit]
4197 lock bts [eax], edx
4198# endif
4199 }
4200# endif
4201}
4202#endif
4203
4204
4205/**
4206 * Clears a bit in a bitmap.
4207 *
4208 * @param pvBitmap Pointer to the bitmap.
4209 * @param iBit The bit to clear.
4210 *
4211 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4212 * However, doing so will yield better performance as well as avoiding
4213 * traps accessing the last bits in the bitmap.
4214 */
4215#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4216DECLASM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4217#else
4218DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4219{
4220# if RT_INLINE_ASM_USES_INTRIN
4221 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4222
4223# elif RT_INLINE_ASM_GNU_STYLE
4224 __asm__ __volatile__("btrl %1, %0"
4225 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4226 : "Ir" (iBit),
4227 "m" (*(volatile long RT_FAR *)pvBitmap)
4228 : "memory");
4229# else
4230 __asm
4231 {
4232# ifdef RT_ARCH_AMD64
4233 mov rax, [pvBitmap]
4234 mov edx, [iBit]
4235 btr [rax], edx
4236# else
4237 mov eax, [pvBitmap]
4238 mov edx, [iBit]
4239 btr [eax], edx
4240# endif
4241 }
4242# endif
4243}
4244#endif
4245
4246
4247/**
4248 * Atomically clears a bit in a bitmap, ordered.
4249 *
4250 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4251 * the memory access isn't atomic!
4252 * @param iBit The bit to toggle set.
4253 *
4254 * @remarks No memory barrier, take care on smp.
4255 * @remarks x86: Requires a 386 or later.
4256 */
4257#if RT_INLINE_ASM_EXTERNAL
4258DECLASM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4259#else
4260DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4261{
4262 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4263# if RT_INLINE_ASM_GNU_STYLE
4264 __asm__ __volatile__("lock; btrl %1, %0"
4265 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4266 : "Ir" (iBit),
4267 "m" (*(volatile long RT_FAR *)pvBitmap)
4268 : "memory");
4269# else
4270 __asm
4271 {
4272# ifdef RT_ARCH_AMD64
4273 mov rax, [pvBitmap]
4274 mov edx, [iBit]
4275 lock btr [rax], edx
4276# else
4277 mov eax, [pvBitmap]
4278 mov edx, [iBit]
4279 lock btr [eax], edx
4280# endif
4281 }
4282# endif
4283}
4284#endif
4285
4286
4287/**
4288 * Toggles a bit in a bitmap.
4289 *
4290 * @param pvBitmap Pointer to the bitmap.
4291 * @param iBit The bit to toggle.
4292 *
4293 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4294 * However, doing so will yield better performance as well as avoiding
4295 * traps accessing the last bits in the bitmap.
4296 */
4297#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4298DECLASM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4299#else
4300DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4301{
4302# if RT_INLINE_ASM_USES_INTRIN
4303 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4304# elif RT_INLINE_ASM_GNU_STYLE
4305 __asm__ __volatile__("btcl %1, %0"
4306 : "=m" (*(volatile long *)pvBitmap)
4307 : "Ir" (iBit),
4308 "m" (*(volatile long *)pvBitmap)
4309 : "memory");
4310# else
4311 __asm
4312 {
4313# ifdef RT_ARCH_AMD64
4314 mov rax, [pvBitmap]
4315 mov edx, [iBit]
4316 btc [rax], edx
4317# else
4318 mov eax, [pvBitmap]
4319 mov edx, [iBit]
4320 btc [eax], edx
4321# endif
4322 }
4323# endif
4324}
4325#endif
4326
4327
4328/**
4329 * Atomically toggles a bit in a bitmap, ordered.
4330 *
4331 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4332 * the memory access isn't atomic!
4333 * @param iBit The bit to test and set.
4334 *
4335 * @remarks x86: Requires a 386 or later.
4336 */
4337#if RT_INLINE_ASM_EXTERNAL
4338DECLASM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4339#else
4340DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4341{
4342 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4343# if RT_INLINE_ASM_GNU_STYLE
4344 __asm__ __volatile__("lock; btcl %1, %0"
4345 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4346 : "Ir" (iBit),
4347 "m" (*(volatile long RT_FAR *)pvBitmap)
4348 : "memory");
4349# else
4350 __asm
4351 {
4352# ifdef RT_ARCH_AMD64
4353 mov rax, [pvBitmap]
4354 mov edx, [iBit]
4355 lock btc [rax], edx
4356# else
4357 mov eax, [pvBitmap]
4358 mov edx, [iBit]
4359 lock btc [eax], edx
4360# endif
4361 }
4362# endif
4363}
4364#endif
4365
4366
4367/**
4368 * Tests and sets a bit in a bitmap.
4369 *
4370 * @returns true if the bit was set.
4371 * @returns false if the bit was clear.
4372 *
4373 * @param pvBitmap Pointer to the bitmap.
4374 * @param iBit The bit to test and set.
4375 *
4376 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4377 * However, doing so will yield better performance as well as avoiding
4378 * traps accessing the last bits in the bitmap.
4379 */
4380#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4381DECLASM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4382#else
4383DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4384{
4385 union { bool f; uint32_t u32; uint8_t u8; } rc;
4386# if RT_INLINE_ASM_USES_INTRIN
4387 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
4388
4389# elif RT_INLINE_ASM_GNU_STYLE
4390 __asm__ __volatile__("btsl %2, %1\n\t"
4391 "setc %b0\n\t"
4392 "andl $1, %0\n\t"
4393 : "=q" (rc.u32),
4394 "=m" (*(volatile long RT_FAR *)pvBitmap)
4395 : "Ir" (iBit),
4396 "m" (*(volatile long RT_FAR *)pvBitmap)
4397 : "memory");
4398# else
4399 __asm
4400 {
4401 mov edx, [iBit]
4402# ifdef RT_ARCH_AMD64
4403 mov rax, [pvBitmap]
4404 bts [rax], edx
4405# else
4406 mov eax, [pvBitmap]
4407 bts [eax], edx
4408# endif
4409 setc al
4410 and eax, 1
4411 mov [rc.u32], eax
4412 }
4413# endif
4414 return rc.f;
4415}
4416#endif
4417
4418
4419/**
4420 * Atomically tests and sets a bit in a bitmap, ordered.
4421 *
4422 * @returns true if the bit was set.
4423 * @returns false if the bit was clear.
4424 *
4425 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4426 * the memory access isn't atomic!
4427 * @param iBit The bit to set.
4428 *
4429 * @remarks x86: Requires a 386 or later.
4430 */
4431#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4432DECLASM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4433#else
4434DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4435{
4436 union { bool f; uint32_t u32; uint8_t u8; } rc;
4437 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4438# if RT_INLINE_ASM_USES_INTRIN
4439 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4440# elif RT_INLINE_ASM_GNU_STYLE
4441 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4442 "setc %b0\n\t"
4443 "andl $1, %0\n\t"
4444 : "=q" (rc.u32),
4445 "=m" (*(volatile long RT_FAR *)pvBitmap)
4446 : "Ir" (iBit),
4447 "m" (*(volatile long RT_FAR *)pvBitmap)
4448 : "memory");
4449# else
4450 __asm
4451 {
4452 mov edx, [iBit]
4453# ifdef RT_ARCH_AMD64
4454 mov rax, [pvBitmap]
4455 lock bts [rax], edx
4456# else
4457 mov eax, [pvBitmap]
4458 lock bts [eax], edx
4459# endif
4460 setc al
4461 and eax, 1
4462 mov [rc.u32], eax
4463 }
4464# endif
4465 return rc.f;
4466}
4467#endif
4468
4469
4470/**
4471 * Tests and clears a bit in a bitmap.
4472 *
4473 * @returns true if the bit was set.
4474 * @returns false if the bit was clear.
4475 *
4476 * @param pvBitmap Pointer to the bitmap.
4477 * @param iBit The bit to test and clear.
4478 *
4479 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4480 * However, doing so will yield better performance as well as avoiding
4481 * traps accessing the last bits in the bitmap.
4482 */
4483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4484DECLASM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4485#else
4486DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4487{
4488 union { bool f; uint32_t u32; uint8_t u8; } rc;
4489# if RT_INLINE_ASM_USES_INTRIN
4490 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4491
4492# elif RT_INLINE_ASM_GNU_STYLE
4493 __asm__ __volatile__("btrl %2, %1\n\t"
4494 "setc %b0\n\t"
4495 "andl $1, %0\n\t"
4496 : "=q" (rc.u32),
4497 "=m" (*(volatile long RT_FAR *)pvBitmap)
4498 : "Ir" (iBit),
4499 "m" (*(volatile long RT_FAR *)pvBitmap)
4500 : "memory");
4501# else
4502 __asm
4503 {
4504 mov edx, [iBit]
4505# ifdef RT_ARCH_AMD64
4506 mov rax, [pvBitmap]
4507 btr [rax], edx
4508# else
4509 mov eax, [pvBitmap]
4510 btr [eax], edx
4511# endif
4512 setc al
4513 and eax, 1
4514 mov [rc.u32], eax
4515 }
4516# endif
4517 return rc.f;
4518}
4519#endif
4520
4521
4522/**
4523 * Atomically tests and clears a bit in a bitmap, ordered.
4524 *
4525 * @returns true if the bit was set.
4526 * @returns false if the bit was clear.
4527 *
4528 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4529 * the memory access isn't atomic!
4530 * @param iBit The bit to test and clear.
4531 *
4532 * @remarks No memory barrier, take care on smp.
4533 * @remarks x86: Requires a 386 or later.
4534 */
4535#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4536DECLASM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4537#else
4538DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4539{
4540 union { bool f; uint32_t u32; uint8_t u8; } rc;
4541 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4542# if RT_INLINE_ASM_USES_INTRIN
4543 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
4544
4545# elif RT_INLINE_ASM_GNU_STYLE
4546 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4547 "setc %b0\n\t"
4548 "andl $1, %0\n\t"
4549 : "=q" (rc.u32),
4550 "=m" (*(volatile long RT_FAR *)pvBitmap)
4551 : "Ir" (iBit),
4552 "m" (*(volatile long RT_FAR *)pvBitmap)
4553 : "memory");
4554# else
4555 __asm
4556 {
4557 mov edx, [iBit]
4558# ifdef RT_ARCH_AMD64
4559 mov rax, [pvBitmap]
4560 lock btr [rax], edx
4561# else
4562 mov eax, [pvBitmap]
4563 lock btr [eax], edx
4564# endif
4565 setc al
4566 and eax, 1
4567 mov [rc.u32], eax
4568 }
4569# endif
4570 return rc.f;
4571}
4572#endif
4573
4574
4575/**
4576 * Tests and toggles a bit in a bitmap.
4577 *
4578 * @returns true if the bit was set.
4579 * @returns false if the bit was clear.
4580 *
4581 * @param pvBitmap Pointer to the bitmap.
4582 * @param iBit The bit to test and toggle.
4583 *
4584 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4585 * However, doing so will yield better performance as well as avoiding
4586 * traps accessing the last bits in the bitmap.
4587 */
4588#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4589DECLASM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4590#else
4591DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4592{
4593 union { bool f; uint32_t u32; uint8_t u8; } rc;
4594# if RT_INLINE_ASM_USES_INTRIN
4595 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4596
4597# elif RT_INLINE_ASM_GNU_STYLE
4598 __asm__ __volatile__("btcl %2, %1\n\t"
4599 "setc %b0\n\t"
4600 "andl $1, %0\n\t"
4601 : "=q" (rc.u32),
4602 "=m" (*(volatile long RT_FAR *)pvBitmap)
4603 : "Ir" (iBit),
4604 "m" (*(volatile long RT_FAR *)pvBitmap)
4605 : "memory");
4606# else
4607 __asm
4608 {
4609 mov edx, [iBit]
4610# ifdef RT_ARCH_AMD64
4611 mov rax, [pvBitmap]
4612 btc [rax], edx
4613# else
4614 mov eax, [pvBitmap]
4615 btc [eax], edx
4616# endif
4617 setc al
4618 and eax, 1
4619 mov [rc.u32], eax
4620 }
4621# endif
4622 return rc.f;
4623}
4624#endif
4625
4626
4627/**
4628 * Atomically tests and toggles a bit in a bitmap, ordered.
4629 *
4630 * @returns true if the bit was set.
4631 * @returns false if the bit was clear.
4632 *
4633 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4634 * the memory access isn't atomic!
4635 * @param iBit The bit to test and toggle.
4636 *
4637 * @remarks x86: Requires a 386 or later.
4638 */
4639#if RT_INLINE_ASM_EXTERNAL
4640DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4641#else
4642DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4643{
4644 union { bool f; uint32_t u32; uint8_t u8; } rc;
4645 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4646# if RT_INLINE_ASM_GNU_STYLE
4647 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4648 "setc %b0\n\t"
4649 "andl $1, %0\n\t"
4650 : "=q" (rc.u32),
4651 "=m" (*(volatile long RT_FAR *)pvBitmap)
4652 : "Ir" (iBit),
4653 "m" (*(volatile long RT_FAR *)pvBitmap)
4654 : "memory");
4655# else
4656 __asm
4657 {
4658 mov edx, [iBit]
4659# ifdef RT_ARCH_AMD64
4660 mov rax, [pvBitmap]
4661 lock btc [rax], edx
4662# else
4663 mov eax, [pvBitmap]
4664 lock btc [eax], edx
4665# endif
4666 setc al
4667 and eax, 1
4668 mov [rc.u32], eax
4669 }
4670# endif
4671 return rc.f;
4672}
4673#endif
4674
4675
4676/**
4677 * Tests if a bit in a bitmap is set.
4678 *
4679 * @returns true if the bit is set.
4680 * @returns false if the bit is clear.
4681 *
4682 * @param pvBitmap Pointer to the bitmap.
4683 * @param iBit The bit to test.
4684 *
4685 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4686 * However, doing so will yield better performance as well as avoiding
4687 * traps accessing the last bits in the bitmap.
4688 */
4689#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4690DECLASM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit);
4691#else
4692DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit)
4693{
4694 union { bool f; uint32_t u32; uint8_t u8; } rc;
4695# if RT_INLINE_ASM_USES_INTRIN
4696 rc.u32 = _bittest((long *)pvBitmap, iBit);
4697# elif RT_INLINE_ASM_GNU_STYLE
4698
4699 __asm__ __volatile__("btl %2, %1\n\t"
4700 "setc %b0\n\t"
4701 "andl $1, %0\n\t"
4702 : "=q" (rc.u32)
4703 : "m" (*(const volatile long RT_FAR *)pvBitmap),
4704 "Ir" (iBit)
4705 : "memory");
4706# else
4707 __asm
4708 {
4709 mov edx, [iBit]
4710# ifdef RT_ARCH_AMD64
4711 mov rax, [pvBitmap]
4712 bt [rax], edx
4713# else
4714 mov eax, [pvBitmap]
4715 bt [eax], edx
4716# endif
4717 setc al
4718 and eax, 1
4719 mov [rc.u32], eax
4720 }
4721# endif
4722 return rc.f;
4723}
4724#endif
4725
4726
4727/**
4728 * Clears a bit range within a bitmap.
4729 *
4730 * @param pvBitmap Pointer to the bitmap.
4731 * @param iBitStart The First bit to clear.
4732 * @param iBitEnd The first bit not to clear.
4733 */
4734DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4735{
4736 if (iBitStart < iBitEnd)
4737 {
4738 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4739 int32_t iStart = iBitStart & ~31;
4740 int32_t iEnd = iBitEnd & ~31;
4741 if (iStart == iEnd)
4742 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4743 else
4744 {
4745 /* bits in first dword. */
4746 if (iBitStart & 31)
4747 {
4748 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4749 pu32++;
4750 iBitStart = iStart + 32;
4751 }
4752
4753 /* whole dword. */
4754 if (iBitStart != iEnd)
4755 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4756
4757 /* bits in last dword. */
4758 if (iBitEnd & 31)
4759 {
4760 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4761 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4762 }
4763 }
4764 }
4765}
4766
4767
4768/**
4769 * Sets a bit range within a bitmap.
4770 *
4771 * @param pvBitmap Pointer to the bitmap.
4772 * @param iBitStart The First bit to set.
4773 * @param iBitEnd The first bit not to set.
4774 */
4775DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4776{
4777 if (iBitStart < iBitEnd)
4778 {
4779 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4780 int32_t iStart = iBitStart & ~31;
4781 int32_t iEnd = iBitEnd & ~31;
4782 if (iStart == iEnd)
4783 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4784 else
4785 {
4786 /* bits in first dword. */
4787 if (iBitStart & 31)
4788 {
4789 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4790 pu32++;
4791 iBitStart = iStart + 32;
4792 }
4793
4794 /* whole dword. */
4795 if (iBitStart != iEnd)
4796 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4797
4798 /* bits in last dword. */
4799 if (iBitEnd & 31)
4800 {
4801 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
4802 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4803 }
4804 }
4805 }
4806}
4807
4808
4809/**
4810 * Finds the first clear bit in a bitmap.
4811 *
4812 * @returns Index of the first zero bit.
4813 * @returns -1 if no clear bit was found.
4814 * @param pvBitmap Pointer to the bitmap.
4815 * @param cBits The number of bits in the bitmap. Multiple of 32.
4816 */
4817#if RT_INLINE_ASM_EXTERNAL
4818DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
4819#else
4820DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
4821{
4822 if (cBits)
4823 {
4824 int32_t iBit;
4825# if RT_INLINE_ASM_GNU_STYLE
4826 RTCCUINTREG uEAX, uECX, uEDI;
4827 cBits = RT_ALIGN_32(cBits, 32);
4828 __asm__ __volatile__("repe; scasl\n\t"
4829 "je 1f\n\t"
4830# ifdef RT_ARCH_AMD64
4831 "lea -4(%%rdi), %%rdi\n\t"
4832 "xorl (%%rdi), %%eax\n\t"
4833 "subq %5, %%rdi\n\t"
4834# else
4835 "lea -4(%%edi), %%edi\n\t"
4836 "xorl (%%edi), %%eax\n\t"
4837 "subl %5, %%edi\n\t"
4838# endif
4839 "shll $3, %%edi\n\t"
4840 "bsfl %%eax, %%edx\n\t"
4841 "addl %%edi, %%edx\n\t"
4842 "1:\t\n"
4843 : "=d" (iBit),
4844 "=&c" (uECX),
4845 "=&D" (uEDI),
4846 "=&a" (uEAX)
4847 : "0" (0xffffffff),
4848 "mr" (pvBitmap),
4849 "1" (cBits >> 5),
4850 "2" (pvBitmap),
4851 "3" (0xffffffff));
4852# else
4853 cBits = RT_ALIGN_32(cBits, 32);
4854 __asm
4855 {
4856# ifdef RT_ARCH_AMD64
4857 mov rdi, [pvBitmap]
4858 mov rbx, rdi
4859# else
4860 mov edi, [pvBitmap]
4861 mov ebx, edi
4862# endif
4863 mov edx, 0ffffffffh
4864 mov eax, edx
4865 mov ecx, [cBits]
4866 shr ecx, 5
4867 repe scasd
4868 je done
4869
4870# ifdef RT_ARCH_AMD64
4871 lea rdi, [rdi - 4]
4872 xor eax, [rdi]
4873 sub rdi, rbx
4874# else
4875 lea edi, [edi - 4]
4876 xor eax, [edi]
4877 sub edi, ebx
4878# endif
4879 shl edi, 3
4880 bsf edx, eax
4881 add edx, edi
4882 done:
4883 mov [iBit], edx
4884 }
4885# endif
4886 return iBit;
4887 }
4888 return -1;
4889}
4890#endif
4891
4892
4893/**
4894 * Finds the next clear bit in a bitmap.
4895 *
4896 * @returns Index of the first zero bit.
4897 * @returns -1 if no clear bit was found.
4898 * @param pvBitmap Pointer to the bitmap.
4899 * @param cBits The number of bits in the bitmap. Multiple of 32.
4900 * @param iBitPrev The bit returned from the last search.
4901 * The search will start at iBitPrev + 1.
4902 */
4903#if RT_INLINE_ASM_EXTERNAL
4904DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4905#else
4906DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4907{
4908 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
4909 int iBit = ++iBitPrev & 31;
4910 if (iBit)
4911 {
4912 /*
4913 * Inspect the 32-bit word containing the unaligned bit.
4914 */
4915 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4916
4917# if RT_INLINE_ASM_USES_INTRIN
4918 unsigned long ulBit = 0;
4919 if (_BitScanForward(&ulBit, u32))
4920 return ulBit + iBitPrev;
4921# else
4922# if RT_INLINE_ASM_GNU_STYLE
4923 __asm__ __volatile__("bsf %1, %0\n\t"
4924 "jnz 1f\n\t"
4925 "movl $-1, %0\n\t"
4926 "1:\n\t"
4927 : "=r" (iBit)
4928 : "r" (u32));
4929# else
4930 __asm
4931 {
4932 mov edx, [u32]
4933 bsf eax, edx
4934 jnz done
4935 mov eax, 0ffffffffh
4936 done:
4937 mov [iBit], eax
4938 }
4939# endif
4940 if (iBit >= 0)
4941 return iBit + iBitPrev;
4942# endif
4943
4944 /*
4945 * Skip ahead and see if there is anything left to search.
4946 */
4947 iBitPrev |= 31;
4948 iBitPrev++;
4949 if (cBits <= (uint32_t)iBitPrev)
4950 return -1;
4951 }
4952
4953 /*
4954 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4955 */
4956 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4957 if (iBit >= 0)
4958 iBit += iBitPrev;
4959 return iBit;
4960}
4961#endif
4962
4963
4964/**
4965 * Finds the first set bit in a bitmap.
4966 *
4967 * @returns Index of the first set bit.
4968 * @returns -1 if no clear bit was found.
4969 * @param pvBitmap Pointer to the bitmap.
4970 * @param cBits The number of bits in the bitmap. Multiple of 32.
4971 */
4972#if RT_INLINE_ASM_EXTERNAL
4973DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
4974#else
4975DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
4976{
4977 if (cBits)
4978 {
4979 int32_t iBit;
4980# if RT_INLINE_ASM_GNU_STYLE
4981 RTCCUINTREG uEAX, uECX, uEDI;
4982 cBits = RT_ALIGN_32(cBits, 32);
4983 __asm__ __volatile__("repe; scasl\n\t"
4984 "je 1f\n\t"
4985# ifdef RT_ARCH_AMD64
4986 "lea -4(%%rdi), %%rdi\n\t"
4987 "movl (%%rdi), %%eax\n\t"
4988 "subq %5, %%rdi\n\t"
4989# else
4990 "lea -4(%%edi), %%edi\n\t"
4991 "movl (%%edi), %%eax\n\t"
4992 "subl %5, %%edi\n\t"
4993# endif
4994 "shll $3, %%edi\n\t"
4995 "bsfl %%eax, %%edx\n\t"
4996 "addl %%edi, %%edx\n\t"
4997 "1:\t\n"
4998 : "=d" (iBit),
4999 "=&c" (uECX),
5000 "=&D" (uEDI),
5001 "=&a" (uEAX)
5002 : "0" (0xffffffff),
5003 "mr" (pvBitmap),
5004 "1" (cBits >> 5),
5005 "2" (pvBitmap),
5006 "3" (0));
5007# else
5008 cBits = RT_ALIGN_32(cBits, 32);
5009 __asm
5010 {
5011# ifdef RT_ARCH_AMD64
5012 mov rdi, [pvBitmap]
5013 mov rbx, rdi
5014# else
5015 mov edi, [pvBitmap]
5016 mov ebx, edi
5017# endif
5018 mov edx, 0ffffffffh
5019 xor eax, eax
5020 mov ecx, [cBits]
5021 shr ecx, 5
5022 repe scasd
5023 je done
5024# ifdef RT_ARCH_AMD64
5025 lea rdi, [rdi - 4]
5026 mov eax, [rdi]
5027 sub rdi, rbx
5028# else
5029 lea edi, [edi - 4]
5030 mov eax, [edi]
5031 sub edi, ebx
5032# endif
5033 shl edi, 3
5034 bsf edx, eax
5035 add edx, edi
5036 done:
5037 mov [iBit], edx
5038 }
5039# endif
5040 return iBit;
5041 }
5042 return -1;
5043}
5044#endif
5045
5046
5047/**
5048 * Finds the next set bit in a bitmap.
5049 *
5050 * @returns Index of the next set bit.
5051 * @returns -1 if no set bit was found.
5052 * @param pvBitmap Pointer to the bitmap.
5053 * @param cBits The number of bits in the bitmap. Multiple of 32.
5054 * @param iBitPrev The bit returned from the last search.
5055 * The search will start at iBitPrev + 1.
5056 */
5057#if RT_INLINE_ASM_EXTERNAL
5058DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5059#else
5060DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5061{
5062 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5063 int iBit = ++iBitPrev & 31;
5064 if (iBit)
5065 {
5066 /*
5067 * Inspect the 32-bit word containing the unaligned bit.
5068 */
5069 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5070
5071# if RT_INLINE_ASM_USES_INTRIN
5072 unsigned long ulBit = 0;
5073 if (_BitScanForward(&ulBit, u32))
5074 return ulBit + iBitPrev;
5075# else
5076# if RT_INLINE_ASM_GNU_STYLE
5077 __asm__ __volatile__("bsf %1, %0\n\t"
5078 "jnz 1f\n\t"
5079 "movl $-1, %0\n\t"
5080 "1:\n\t"
5081 : "=r" (iBit)
5082 : "r" (u32));
5083# else
5084 __asm
5085 {
5086 mov edx, [u32]
5087 bsf eax, edx
5088 jnz done
5089 mov eax, 0ffffffffh
5090 done:
5091 mov [iBit], eax
5092 }
5093# endif
5094 if (iBit >= 0)
5095 return iBit + iBitPrev;
5096# endif
5097
5098 /*
5099 * Skip ahead and see if there is anything left to search.
5100 */
5101 iBitPrev |= 31;
5102 iBitPrev++;
5103 if (cBits <= (uint32_t)iBitPrev)
5104 return -1;
5105 }
5106
5107 /*
5108 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5109 */
5110 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5111 if (iBit >= 0)
5112 iBit += iBitPrev;
5113 return iBit;
5114}
5115#endif
5116
5117
5118/**
5119 * Finds the first bit which is set in the given 32-bit integer.
5120 * Bits are numbered from 1 (least significant) to 32.
5121 *
5122 * @returns index [1..32] of the first set bit.
5123 * @returns 0 if all bits are cleared.
5124 * @param u32 Integer to search for set bits.
5125 * @remarks Similar to ffs() in BSD.
5126 */
5127#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5128DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5129#else
5130DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5131{
5132# if RT_INLINE_ASM_USES_INTRIN
5133 unsigned long iBit;
5134 if (_BitScanForward(&iBit, u32))
5135 iBit++;
5136 else
5137 iBit = 0;
5138# elif RT_INLINE_ASM_GNU_STYLE
5139 uint32_t iBit;
5140 __asm__ __volatile__("bsf %1, %0\n\t"
5141 "jnz 1f\n\t"
5142 "xorl %0, %0\n\t"
5143 "jmp 2f\n"
5144 "1:\n\t"
5145 "incl %0\n"
5146 "2:\n\t"
5147 : "=r" (iBit)
5148 : "rm" (u32));
5149# else
5150 uint32_t iBit;
5151 _asm
5152 {
5153 bsf eax, [u32]
5154 jnz found
5155 xor eax, eax
5156 jmp done
5157 found:
5158 inc eax
5159 done:
5160 mov [iBit], eax
5161 }
5162# endif
5163 return iBit;
5164}
5165#endif
5166
5167
5168/**
5169 * Finds the first bit which is set in the given 32-bit integer.
5170 * Bits are numbered from 1 (least significant) to 32.
5171 *
5172 * @returns index [1..32] of the first set bit.
5173 * @returns 0 if all bits are cleared.
5174 * @param i32 Integer to search for set bits.
5175 * @remark Similar to ffs() in BSD.
5176 */
5177DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5178{
5179 return ASMBitFirstSetU32((uint32_t)i32);
5180}
5181
5182
5183/**
5184 * Finds the first bit which is set in the given 64-bit integer.
5185 *
5186 * Bits are numbered from 1 (least significant) to 64.
5187 *
5188 * @returns index [1..64] of the first set bit.
5189 * @returns 0 if all bits are cleared.
5190 * @param u64 Integer to search for set bits.
5191 * @remarks Similar to ffs() in BSD.
5192 */
5193#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5194DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5195#else
5196DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5197{
5198# if RT_INLINE_ASM_USES_INTRIN
5199 unsigned long iBit;
5200# if ARCH_BITS == 64
5201 if (_BitScanForward64(&iBit, u64))
5202 iBit++;
5203 else
5204 iBit = 0;
5205# else
5206 if (_BitScanForward(&iBit, (uint32_t)u64))
5207 iBit++;
5208 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5209 iBit += 33;
5210 else
5211 iBit = 0;
5212# endif
5213# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5214 uint64_t iBit;
5215 __asm__ __volatile__("bsfq %1, %0\n\t"
5216 "jnz 1f\n\t"
5217 "xorl %k0, %k0\n\t"
5218 "jmp 2f\n"
5219 "1:\n\t"
5220 "incl %k0\n"
5221 "2:\n\t"
5222 : "=r" (iBit)
5223 : "rm" (u64));
5224# else
5225 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5226 if (!iBit)
5227 {
5228 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5229 if (iBit)
5230 iBit += 32;
5231 }
5232# endif
5233 return (unsigned)iBit;
5234}
5235#endif
5236
5237
5238/**
5239 * Finds the first bit which is set in the given 16-bit integer.
5240 *
5241 * Bits are numbered from 1 (least significant) to 16.
5242 *
5243 * @returns index [1..16] of the first set bit.
5244 * @returns 0 if all bits are cleared.
5245 * @param u16 Integer to search for set bits.
5246 * @remarks For 16-bit bs3kit code.
5247 */
5248#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5249DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5250#else
5251DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5252{
5253 return ASMBitFirstSetU32((uint32_t)u16);
5254}
5255#endif
5256
5257
5258/**
5259 * Finds the last bit which is set in the given 32-bit integer.
5260 * Bits are numbered from 1 (least significant) to 32.
5261 *
5262 * @returns index [1..32] of the last set bit.
5263 * @returns 0 if all bits are cleared.
5264 * @param u32 Integer to search for set bits.
5265 * @remark Similar to fls() in BSD.
5266 */
5267#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5268DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5269#else
5270DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5271{
5272# if RT_INLINE_ASM_USES_INTRIN
5273 unsigned long iBit;
5274 if (_BitScanReverse(&iBit, u32))
5275 iBit++;
5276 else
5277 iBit = 0;
5278# elif RT_INLINE_ASM_GNU_STYLE
5279 uint32_t iBit;
5280 __asm__ __volatile__("bsrl %1, %0\n\t"
5281 "jnz 1f\n\t"
5282 "xorl %0, %0\n\t"
5283 "jmp 2f\n"
5284 "1:\n\t"
5285 "incl %0\n"
5286 "2:\n\t"
5287 : "=r" (iBit)
5288 : "rm" (u32));
5289# else
5290 uint32_t iBit;
5291 _asm
5292 {
5293 bsr eax, [u32]
5294 jnz found
5295 xor eax, eax
5296 jmp done
5297 found:
5298 inc eax
5299 done:
5300 mov [iBit], eax
5301 }
5302# endif
5303 return iBit;
5304}
5305#endif
5306
5307
5308/**
5309 * Finds the last bit which is set in the given 32-bit integer.
5310 * Bits are numbered from 1 (least significant) to 32.
5311 *
5312 * @returns index [1..32] of the last set bit.
5313 * @returns 0 if all bits are cleared.
5314 * @param i32 Integer to search for set bits.
5315 * @remark Similar to fls() in BSD.
5316 */
5317DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5318{
5319 return ASMBitLastSetU32((uint32_t)i32);
5320}
5321
5322
5323/**
5324 * Finds the last bit which is set in the given 64-bit integer.
5325 *
5326 * Bits are numbered from 1 (least significant) to 64.
5327 *
5328 * @returns index [1..64] of the last set bit.
5329 * @returns 0 if all bits are cleared.
5330 * @param u64 Integer to search for set bits.
5331 * @remark Similar to fls() in BSD.
5332 */
5333#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5334DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5335#else
5336DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5337{
5338# if RT_INLINE_ASM_USES_INTRIN
5339 unsigned long iBit;
5340# if ARCH_BITS == 64
5341 if (_BitScanReverse64(&iBit, u64))
5342 iBit++;
5343 else
5344 iBit = 0;
5345# else
5346 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5347 iBit += 33;
5348 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5349 iBit++;
5350 else
5351 iBit = 0;
5352# endif
5353# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5354 uint64_t iBit;
5355 __asm__ __volatile__("bsrq %1, %0\n\t"
5356 "jnz 1f\n\t"
5357 "xorl %k0, %k0\n\t"
5358 "jmp 2f\n"
5359 "1:\n\t"
5360 "incl %k0\n"
5361 "2:\n\t"
5362 : "=r" (iBit)
5363 : "rm" (u64));
5364# else
5365 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5366 if (iBit)
5367 iBit += 32;
5368 else
5369 iBit = ASMBitLastSetU32((uint32_t)u64);
5370#endif
5371 return (unsigned)iBit;
5372}
5373#endif
5374
5375
5376/**
5377 * Finds the last bit which is set in the given 16-bit integer.
5378 *
5379 * Bits are numbered from 1 (least significant) to 16.
5380 *
5381 * @returns index [1..16] of the last set bit.
5382 * @returns 0 if all bits are cleared.
5383 * @param u16 Integer to search for set bits.
5384 * @remarks For 16-bit bs3kit code.
5385 */
5386#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5387DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5388#else
5389DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5390{
5391 return ASMBitLastSetU32((uint32_t)u16);
5392}
5393#endif
5394
5395
5396/**
5397 * Reverse the byte order of the given 16-bit integer.
5398 *
5399 * @returns Revert
5400 * @param u16 16-bit integer value.
5401 */
5402#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5403DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5404#else
5405DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5406{
5407# if RT_INLINE_ASM_USES_INTRIN
5408 u16 = _byteswap_ushort(u16);
5409# elif RT_INLINE_ASM_GNU_STYLE
5410 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5411# else
5412 _asm
5413 {
5414 mov ax, [u16]
5415 ror ax, 8
5416 mov [u16], ax
5417 }
5418# endif
5419 return u16;
5420}
5421#endif
5422
5423
5424/**
5425 * Reverse the byte order of the given 32-bit integer.
5426 *
5427 * @returns Revert
5428 * @param u32 32-bit integer value.
5429 */
5430#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5431DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5432#else
5433DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5434{
5435# if RT_INLINE_ASM_USES_INTRIN
5436 u32 = _byteswap_ulong(u32);
5437# elif RT_INLINE_ASM_GNU_STYLE
5438 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5439# else
5440 _asm
5441 {
5442 mov eax, [u32]
5443 bswap eax
5444 mov [u32], eax
5445 }
5446# endif
5447 return u32;
5448}
5449#endif
5450
5451
5452/**
5453 * Reverse the byte order of the given 64-bit integer.
5454 *
5455 * @returns Revert
5456 * @param u64 64-bit integer value.
5457 */
5458DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5459{
5460#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5461 u64 = _byteswap_uint64(u64);
5462#else
5463 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5464 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5465#endif
5466 return u64;
5467}
5468
5469
5470/**
5471 * Rotate 32-bit unsigned value to the left by @a cShift.
5472 *
5473 * @returns Rotated value.
5474 * @param u32 The value to rotate.
5475 * @param cShift How many bits to rotate by.
5476 */
5477#ifdef __WATCOMC__
5478DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5479#else
5480DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5481{
5482# if RT_INLINE_ASM_USES_INTRIN
5483 return _rotl(u32, cShift);
5484# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5485 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5486 return u32;
5487# else
5488 cShift &= 31;
5489 return (u32 << cShift) | (u32 >> (32 - cShift));
5490# endif
5491}
5492#endif
5493
5494
5495/**
5496 * Rotate 32-bit unsigned value to the right by @a cShift.
5497 *
5498 * @returns Rotated value.
5499 * @param u32 The value to rotate.
5500 * @param cShift How many bits to rotate by.
5501 */
5502#ifdef __WATCOMC__
5503DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5504#else
5505DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5506{
5507# if RT_INLINE_ASM_USES_INTRIN
5508 return _rotr(u32, cShift);
5509# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5510 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5511 return u32;
5512# else
5513 cShift &= 31;
5514 return (u32 >> cShift) | (u32 << (32 - cShift));
5515# endif
5516}
5517#endif
5518
5519
5520/**
5521 * Rotate 64-bit unsigned value to the left by @a cShift.
5522 *
5523 * @returns Rotated value.
5524 * @param u64 The value to rotate.
5525 * @param cShift How many bits to rotate by.
5526 */
5527DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5528{
5529#if RT_INLINE_ASM_USES_INTRIN
5530 return _rotl64(u64, cShift);
5531#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5532 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5533 return u64;
5534#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5535 uint32_t uSpill;
5536 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5537 "jz 1f\n\t"
5538 "xchgl %%eax, %%edx\n\t"
5539 "1:\n\t"
5540 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5541 "jz 2f\n\t"
5542 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5543 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5544 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5545 "2:\n\t" /* } */
5546 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5547 : "0" (u64),
5548 "1" (cShift));
5549 return u64;
5550#else
5551 cShift &= 63;
5552 return (u64 << cShift) | (u64 >> (64 - cShift));
5553#endif
5554}
5555
5556
5557/**
5558 * Rotate 64-bit unsigned value to the right by @a cShift.
5559 *
5560 * @returns Rotated value.
5561 * @param u64 The value to rotate.
5562 * @param cShift How many bits to rotate by.
5563 */
5564DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5565{
5566#if RT_INLINE_ASM_USES_INTRIN
5567 return _rotr64(u64, cShift);
5568#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5569 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5570 return u64;
5571#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5572 uint32_t uSpill;
5573 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5574 "jz 1f\n\t"
5575 "xchgl %%eax, %%edx\n\t"
5576 "1:\n\t"
5577 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5578 "jz 2f\n\t"
5579 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5580 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5581 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5582 "2:\n\t" /* } */
5583 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5584 : "0" (u64),
5585 "1" (cShift));
5586 return u64;
5587#else
5588 cShift &= 63;
5589 return (u64 >> cShift) | (u64 << (64 - cShift));
5590#endif
5591}
5592
5593/** @} */
5594
5595
5596/** @} */
5597
5598#endif
5599
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette