VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 74704

Last change on this file since 74704 was 74456, checked in by vboxsync, 6 years ago

iprt/asm.h: doxygen fix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 164.0 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2017 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# pragma warning(push)
44# pragma warning(disable:4668) /* Several incorrect __cplusplus uses. */
45# pragma warning(disable:4255) /* Incorrect __slwpcb prototype. */
46# include <intrin.h>
47# pragma warning(pop)
48 /* Emit the intrinsics at all optimization levels. */
49# pragma intrinsic(_ReadWriteBarrier)
50# pragma intrinsic(__cpuid)
51# pragma intrinsic(__stosd)
52# pragma intrinsic(__stosw)
53# pragma intrinsic(__stosb)
54# pragma intrinsic(_BitScanForward)
55# pragma intrinsic(_BitScanReverse)
56# pragma intrinsic(_bittest)
57# pragma intrinsic(_bittestandset)
58# pragma intrinsic(_bittestandreset)
59# pragma intrinsic(_bittestandcomplement)
60# pragma intrinsic(_byteswap_ushort)
61# pragma intrinsic(_byteswap_ulong)
62# pragma intrinsic(_interlockedbittestandset)
63# pragma intrinsic(_interlockedbittestandreset)
64# pragma intrinsic(_InterlockedAnd)
65# pragma intrinsic(_InterlockedOr)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# pragma intrinsic(_InterlockedExchangeAdd64)
81# pragma intrinsic(_InterlockedAnd64)
82# pragma intrinsic(_InterlockedOr64)
83# pragma intrinsic(_InterlockedIncrement64)
84# pragma intrinsic(_InterlockedDecrement64)
85# endif
86#endif
87
88/*
89 * Include #pragma aux definitions for Watcom C/C++.
90 */
91#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
92# include "asm-watcom-x86-16.h"
93#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
94# include "asm-watcom-x86-32.h"
95#endif
96
97
98
99/** @defgroup grp_rt_asm ASM - Assembly Routines
100 * @ingroup grp_rt
101 *
102 * @remarks The difference between ordered and unordered atomic operations are that
103 * the former will complete outstanding reads and writes before continuing
104 * while the latter doesn't make any promises about the order. Ordered
105 * operations doesn't, it seems, make any 100% promise wrt to whether
106 * the operation will complete before any subsequent memory access.
107 * (please, correct if wrong.)
108 *
109 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
110 * are unordered (note the Uo).
111 *
112 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
113 * or even optimize assembler instructions away. For instance, in the following code
114 * the second rdmsr instruction is optimized away because gcc treats that instruction
115 * as deterministic:
116 *
117 * @code
118 * static inline uint64_t rdmsr_low(int idx)
119 * {
120 * uint32_t low;
121 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
122 * }
123 * ...
124 * uint32_t msr1 = rdmsr_low(1);
125 * foo(msr1);
126 * msr1 = rdmsr_low(1);
127 * bar(msr1);
128 * @endcode
129 *
130 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
131 * use the result of the first call as input parameter for bar() as well. For rdmsr this
132 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
133 * machine status information in general.
134 *
135 * @{
136 */
137
138
139/** @def RT_INLINE_ASM_GCC_4_3_X_X86
140 * Used to work around some 4.3.x register allocation issues in this version of
141 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
142 * definitely not for 5.x */
143#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
144# define RT_INLINE_ASM_GCC_4_3_X_X86 1
145#else
146# define RT_INLINE_ASM_GCC_4_3_X_X86 0
147#endif
148
149/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
150 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
151 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
152 * mode, x86.
153 *
154 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
155 * when in PIC mode on x86.
156 */
157#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
158# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
159# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
160# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
161# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
162# elif ( (defined(PIC) || defined(__PIC__)) \
163 && defined(RT_ARCH_X86) \
164 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
165 || defined(RT_OS_DARWIN)) )
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
167# else
168# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
169# endif
170#endif
171
172
173/** @def ASMReturnAddress
174 * Gets the return address of the current (or calling if you like) function or method.
175 */
176#ifdef _MSC_VER
177# ifdef __cplusplus
178extern "C"
179# endif
180void * _ReturnAddress(void);
181# pragma intrinsic(_ReturnAddress)
182# define ASMReturnAddress() _ReturnAddress()
183#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
184# define ASMReturnAddress() __builtin_return_address(0)
185#elif defined(__WATCOMC__)
186# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
187#else
188# error "Unsupported compiler."
189#endif
190
191
192/**
193 * Compiler memory barrier.
194 *
195 * Ensure that the compiler does not use any cached (register/tmp stack) memory
196 * values or any outstanding writes when returning from this function.
197 *
198 * This function must be used if non-volatile data is modified by a
199 * device or the VMM. Typical cases are port access, MMIO access,
200 * trapping instruction, etc.
201 */
202#if RT_INLINE_ASM_GNU_STYLE
203# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
204#elif RT_INLINE_ASM_USES_INTRIN
205# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
206#elif defined(__WATCOMC__)
207void ASMCompilerBarrier(void);
208#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
209DECLINLINE(void) ASMCompilerBarrier(void)
210{
211 __asm
212 {
213 }
214}
215#endif
216
217
218/** @def ASMBreakpoint
219 * Debugger Breakpoint.
220 * @deprecated Use RT_BREAKPOINT instead.
221 * @internal
222 */
223#define ASMBreakpoint() RT_BREAKPOINT()
224
225
226/**
227 * Spinloop hint for platforms that have these, empty function on the other
228 * platforms.
229 *
230 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
231 * spin locks.
232 */
233#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
234DECLASM(void) ASMNopPause(void);
235#else
236DECLINLINE(void) ASMNopPause(void)
237{
238# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
239# if RT_INLINE_ASM_GNU_STYLE
240 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
241# else
242 __asm {
243 _emit 0f3h
244 _emit 090h
245 }
246# endif
247# else
248 /* dummy */
249# endif
250}
251#endif
252
253
254/**
255 * Atomically Exchange an unsigned 8-bit value, ordered.
256 *
257 * @returns Current *pu8 value
258 * @param pu8 Pointer to the 8-bit variable to update.
259 * @param u8 The 8-bit value to assign to *pu8.
260 */
261#if RT_INLINE_ASM_EXTERNAL
262DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8);
263#else
264DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
265{
266# if RT_INLINE_ASM_GNU_STYLE
267 __asm__ __volatile__("xchgb %0, %1\n\t"
268 : "=m" (*pu8),
269 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
270 : "1" (u8),
271 "m" (*pu8));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rdx, [pu8]
277 mov al, [u8]
278 xchg [rdx], al
279 mov [u8], al
280# else
281 mov edx, [pu8]
282 mov al, [u8]
283 xchg [edx], al
284 mov [u8], al
285# endif
286 }
287# endif
288 return u8;
289}
290#endif
291
292
293/**
294 * Atomically Exchange a signed 8-bit value, ordered.
295 *
296 * @returns Current *pu8 value
297 * @param pi8 Pointer to the 8-bit variable to update.
298 * @param i8 The 8-bit value to assign to *pi8.
299 */
300DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8)
301{
302 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
303}
304
305
306/**
307 * Atomically Exchange a bool value, ordered.
308 *
309 * @returns Current *pf value
310 * @param pf Pointer to the 8-bit variable to update.
311 * @param f The 8-bit value to assign to *pi8.
312 */
313DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f)
314{
315#ifdef _MSC_VER
316 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
317#else
318 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
319#endif
320}
321
322
323/**
324 * Atomically Exchange an unsigned 16-bit value, ordered.
325 *
326 * @returns Current *pu16 value
327 * @param pu16 Pointer to the 16-bit variable to update.
328 * @param u16 The 16-bit value to assign to *pu16.
329 */
330#if RT_INLINE_ASM_EXTERNAL
331DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16);
332#else
333DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
334{
335# if RT_INLINE_ASM_GNU_STYLE
336 __asm__ __volatile__("xchgw %0, %1\n\t"
337 : "=m" (*pu16),
338 "=r" (u16)
339 : "1" (u16),
340 "m" (*pu16));
341# else
342 __asm
343 {
344# ifdef RT_ARCH_AMD64
345 mov rdx, [pu16]
346 mov ax, [u16]
347 xchg [rdx], ax
348 mov [u16], ax
349# else
350 mov edx, [pu16]
351 mov ax, [u16]
352 xchg [edx], ax
353 mov [u16], ax
354# endif
355 }
356# endif
357 return u16;
358}
359#endif
360
361
362/**
363 * Atomically Exchange a signed 16-bit value, ordered.
364 *
365 * @returns Current *pu16 value
366 * @param pi16 Pointer to the 16-bit variable to update.
367 * @param i16 The 16-bit value to assign to *pi16.
368 */
369DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16)
370{
371 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
372}
373
374
375/**
376 * Atomically Exchange an unsigned 32-bit value, ordered.
377 *
378 * @returns Current *pu32 value
379 * @param pu32 Pointer to the 32-bit variable to update.
380 * @param u32 The 32-bit value to assign to *pu32.
381 *
382 * @remarks Does not work on 286 and earlier.
383 */
384#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
385DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32);
386#else
387DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
388{
389# if RT_INLINE_ASM_GNU_STYLE
390 __asm__ __volatile__("xchgl %0, %1\n\t"
391 : "=m" (*pu32),
392 "=r" (u32)
393 : "1" (u32),
394 "m" (*pu32));
395
396# elif RT_INLINE_ASM_USES_INTRIN
397 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
398
399# else
400 __asm
401 {
402# ifdef RT_ARCH_AMD64
403 mov rdx, [pu32]
404 mov eax, u32
405 xchg [rdx], eax
406 mov [u32], eax
407# else
408 mov edx, [pu32]
409 mov eax, u32
410 xchg [edx], eax
411 mov [u32], eax
412# endif
413 }
414# endif
415 return u32;
416}
417#endif
418
419
420/**
421 * Atomically Exchange a signed 32-bit value, ordered.
422 *
423 * @returns Current *pu32 value
424 * @param pi32 Pointer to the 32-bit variable to update.
425 * @param i32 The 32-bit value to assign to *pi32.
426 */
427DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32)
428{
429 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
430}
431
432
433/**
434 * Atomically Exchange an unsigned 64-bit value, ordered.
435 *
436 * @returns Current *pu64 value
437 * @param pu64 Pointer to the 64-bit variable to update.
438 * @param u64 The 64-bit value to assign to *pu64.
439 *
440 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
441 */
442#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
443 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
444DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64);
445#else
446DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
447{
448# if defined(RT_ARCH_AMD64)
449# if RT_INLINE_ASM_USES_INTRIN
450 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
451
452# elif RT_INLINE_ASM_GNU_STYLE
453 __asm__ __volatile__("xchgq %0, %1\n\t"
454 : "=m" (*pu64),
455 "=r" (u64)
456 : "1" (u64),
457 "m" (*pu64));
458# else
459 __asm
460 {
461 mov rdx, [pu64]
462 mov rax, [u64]
463 xchg [rdx], rax
464 mov [u64], rax
465 }
466# endif
467# else /* !RT_ARCH_AMD64 */
468# if RT_INLINE_ASM_GNU_STYLE
469# if defined(PIC) || defined(__PIC__)
470 uint32_t u32EBX = (uint32_t)u64;
471 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
472 "xchgl %%ebx, %3\n\t"
473 "1:\n\t"
474 "lock; cmpxchg8b (%5)\n\t"
475 "jnz 1b\n\t"
476 "movl %3, %%ebx\n\t"
477 /*"xchgl %%esi, %5\n\t"*/
478 : "=A" (u64),
479 "=m" (*pu64)
480 : "0" (*pu64),
481 "m" ( u32EBX ),
482 "c" ( (uint32_t)(u64 >> 32) ),
483 "S" (pu64));
484# else /* !PIC */
485 __asm__ __volatile__("1:\n\t"
486 "lock; cmpxchg8b %1\n\t"
487 "jnz 1b\n\t"
488 : "=A" (u64),
489 "=m" (*pu64)
490 : "0" (*pu64),
491 "b" ( (uint32_t)u64 ),
492 "c" ( (uint32_t)(u64 >> 32) ));
493# endif
494# else
495 __asm
496 {
497 mov ebx, dword ptr [u64]
498 mov ecx, dword ptr [u64 + 4]
499 mov edi, pu64
500 mov eax, dword ptr [edi]
501 mov edx, dword ptr [edi + 4]
502 retry:
503 lock cmpxchg8b [edi]
504 jnz retry
505 mov dword ptr [u64], eax
506 mov dword ptr [u64 + 4], edx
507 }
508# endif
509# endif /* !RT_ARCH_AMD64 */
510 return u64;
511}
512#endif
513
514
515/**
516 * Atomically Exchange an signed 64-bit value, ordered.
517 *
518 * @returns Current *pi64 value
519 * @param pi64 Pointer to the 64-bit variable to update.
520 * @param i64 The 64-bit value to assign to *pi64.
521 */
522DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64)
523{
524 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
525}
526
527
528/**
529 * Atomically Exchange a size_t value, ordered.
530 *
531 * @returns Current *ppv value
532 * @param puDst Pointer to the size_t variable to update.
533 * @param uNew The new value to assign to *puDst.
534 */
535DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew)
536{
537#if ARCH_BITS == 16
538 AssertCompile(sizeof(size_t) == 2);
539 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
540#elif ARCH_BITS == 32
541 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
542#elif ARCH_BITS == 64
543 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
544#else
545# error "ARCH_BITS is bogus"
546#endif
547}
548
549
550/**
551 * Atomically Exchange a pointer value, ordered.
552 *
553 * @returns Current *ppv value
554 * @param ppv Pointer to the pointer variable to update.
555 * @param pv The pointer value to assign to *ppv.
556 */
557DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv)
558{
559#if ARCH_BITS == 32 || ARCH_BITS == 16
560 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
561#elif ARCH_BITS == 64
562 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
563#else
564# error "ARCH_BITS is bogus"
565#endif
566}
567
568
569/**
570 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
571 *
572 * @returns Current *pv value
573 * @param ppv Pointer to the pointer variable to update.
574 * @param pv The pointer value to assign to *ppv.
575 * @param Type The type of *ppv, sans volatile.
576 */
577#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
578# define ASMAtomicXchgPtrT(ppv, pv, Type) \
579 __extension__ \
580 ({\
581 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
582 Type const pvTypeChecked = (pv); \
583 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
584 pvTypeCheckedRet; \
585 })
586#else
587# define ASMAtomicXchgPtrT(ppv, pv, Type) \
588 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
589#endif
590
591
592/**
593 * Atomically Exchange a raw-mode context pointer value, ordered.
594 *
595 * @returns Current *ppv value
596 * @param ppvRC Pointer to the pointer variable to update.
597 * @param pvRC The pointer value to assign to *ppv.
598 */
599DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC)
600{
601 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
602}
603
604
605/**
606 * Atomically Exchange a ring-0 pointer value, ordered.
607 *
608 * @returns Current *ppv value
609 * @param ppvR0 Pointer to the pointer variable to update.
610 * @param pvR0 The pointer value to assign to *ppv.
611 */
612DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0)
613{
614#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
615 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
616#elif R0_ARCH_BITS == 64
617 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
618#else
619# error "R0_ARCH_BITS is bogus"
620#endif
621}
622
623
624/**
625 * Atomically Exchange a ring-3 pointer value, ordered.
626 *
627 * @returns Current *ppv value
628 * @param ppvR3 Pointer to the pointer variable to update.
629 * @param pvR3 The pointer value to assign to *ppv.
630 */
631DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3)
632{
633#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
634 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
635#elif R3_ARCH_BITS == 64
636 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
637#else
638# error "R3_ARCH_BITS is bogus"
639#endif
640}
641
642
643/** @def ASMAtomicXchgHandle
644 * Atomically Exchange a typical IPRT handle value, ordered.
645 *
646 * @param ph Pointer to the value to update.
647 * @param hNew The new value to assigned to *pu.
648 * @param phRes Where to store the current *ph value.
649 *
650 * @remarks This doesn't currently work for all handles (like RTFILE).
651 */
652#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
653# define ASMAtomicXchgHandle(ph, hNew, phRes) \
654 do { \
655 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
656 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
657 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
658 } while (0)
659#elif HC_ARCH_BITS == 64
660# define ASMAtomicXchgHandle(ph, hNew, phRes) \
661 do { \
662 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
663 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
664 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
665 } while (0)
666#else
667# error HC_ARCH_BITS
668#endif
669
670
671/**
672 * Atomically Exchange a value which size might differ
673 * between platforms or compilers, ordered.
674 *
675 * @param pu Pointer to the variable to update.
676 * @param uNew The value to assign to *pu.
677 * @todo This is busted as its missing the result argument.
678 */
679#define ASMAtomicXchgSize(pu, uNew) \
680 do { \
681 switch (sizeof(*(pu))) { \
682 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
683 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
684 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
685 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
686 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
687 } \
688 } while (0)
689
690/**
691 * Atomically Exchange a value which size might differ
692 * between platforms or compilers, ordered.
693 *
694 * @param pu Pointer to the variable to update.
695 * @param uNew The value to assign to *pu.
696 * @param puRes Where to store the current *pu value.
697 */
698#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
699 do { \
700 switch (sizeof(*(pu))) { \
701 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
702 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
703 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
704 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
705 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
706 } \
707 } while (0)
708
709
710
711/**
712 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
713 *
714 * @returns true if xchg was done.
715 * @returns false if xchg wasn't done.
716 *
717 * @param pu8 Pointer to the value to update.
718 * @param u8New The new value to assigned to *pu8.
719 * @param u8Old The old value to *pu8 compare with.
720 *
721 * @remarks x86: Requires a 486 or later.
722 */
723#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
724DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old);
725#else
726DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old)
727{
728 uint8_t u8Ret;
729 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
730 "setz %1\n\t"
731 : "=m" (*pu8),
732 "=qm" (u8Ret),
733 "=a" (u8Old)
734 : "q" (u8New),
735 "2" (u8Old),
736 "m" (*pu8));
737 return (bool)u8Ret;
738}
739#endif
740
741
742/**
743 * Atomically Compare and Exchange a signed 8-bit value, ordered.
744 *
745 * @returns true if xchg was done.
746 * @returns false if xchg wasn't done.
747 *
748 * @param pi8 Pointer to the value to update.
749 * @param i8New The new value to assigned to *pi8.
750 * @param i8Old The old value to *pi8 compare with.
751 *
752 * @remarks x86: Requires a 486 or later.
753 */
754DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old)
755{
756 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
757}
758
759
760/**
761 * Atomically Compare and Exchange a bool value, ordered.
762 *
763 * @returns true if xchg was done.
764 * @returns false if xchg wasn't done.
765 *
766 * @param pf Pointer to the value to update.
767 * @param fNew The new value to assigned to *pf.
768 * @param fOld The old value to *pf compare with.
769 *
770 * @remarks x86: Requires a 486 or later.
771 */
772DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld)
773{
774 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
775}
776
777
778/**
779 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
780 *
781 * @returns true if xchg was done.
782 * @returns false if xchg wasn't done.
783 *
784 * @param pu32 Pointer to the value to update.
785 * @param u32New The new value to assigned to *pu32.
786 * @param u32Old The old value to *pu32 compare with.
787 *
788 * @remarks x86: Requires a 486 or later.
789 */
790#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
791DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old);
792#else
793DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old)
794{
795# if RT_INLINE_ASM_GNU_STYLE
796 uint8_t u8Ret;
797 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
798 "setz %1\n\t"
799 : "=m" (*pu32),
800 "=qm" (u8Ret),
801 "=a" (u32Old)
802 : "r" (u32New),
803 "2" (u32Old),
804 "m" (*pu32));
805 return (bool)u8Ret;
806
807# elif RT_INLINE_ASM_USES_INTRIN
808 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
809
810# else
811 uint32_t u32Ret;
812 __asm
813 {
814# ifdef RT_ARCH_AMD64
815 mov rdx, [pu32]
816# else
817 mov edx, [pu32]
818# endif
819 mov eax, [u32Old]
820 mov ecx, [u32New]
821# ifdef RT_ARCH_AMD64
822 lock cmpxchg [rdx], ecx
823# else
824 lock cmpxchg [edx], ecx
825# endif
826 setz al
827 movzx eax, al
828 mov [u32Ret], eax
829 }
830 return !!u32Ret;
831# endif
832}
833#endif
834
835
836/**
837 * Atomically Compare and Exchange a signed 32-bit value, ordered.
838 *
839 * @returns true if xchg was done.
840 * @returns false if xchg wasn't done.
841 *
842 * @param pi32 Pointer to the value to update.
843 * @param i32New The new value to assigned to *pi32.
844 * @param i32Old The old value to *pi32 compare with.
845 *
846 * @remarks x86: Requires a 486 or later.
847 */
848DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old)
849{
850 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
851}
852
853
854/**
855 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
856 *
857 * @returns true if xchg was done.
858 * @returns false if xchg wasn't done.
859 *
860 * @param pu64 Pointer to the 64-bit variable to update.
861 * @param u64New The 64-bit value to assign to *pu64.
862 * @param u64Old The value to compare with.
863 *
864 * @remarks x86: Requires a Pentium or later.
865 */
866#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
867 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
868DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old);
869#else
870DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old)
871{
872# if RT_INLINE_ASM_USES_INTRIN
873 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
874
875# elif defined(RT_ARCH_AMD64)
876# if RT_INLINE_ASM_GNU_STYLE
877 uint8_t u8Ret;
878 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
879 "setz %1\n\t"
880 : "=m" (*pu64),
881 "=qm" (u8Ret),
882 "=a" (u64Old)
883 : "r" (u64New),
884 "2" (u64Old),
885 "m" (*pu64));
886 return (bool)u8Ret;
887# else
888 bool fRet;
889 __asm
890 {
891 mov rdx, [pu32]
892 mov rax, [u64Old]
893 mov rcx, [u64New]
894 lock cmpxchg [rdx], rcx
895 setz al
896 mov [fRet], al
897 }
898 return fRet;
899# endif
900# else /* !RT_ARCH_AMD64 */
901 uint32_t u32Ret;
902# if RT_INLINE_ASM_GNU_STYLE
903# if defined(PIC) || defined(__PIC__)
904 uint32_t u32EBX = (uint32_t)u64New;
905 uint32_t u32Spill;
906 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
907 "lock; cmpxchg8b (%6)\n\t"
908 "setz %%al\n\t"
909 "movl %4, %%ebx\n\t"
910 "movzbl %%al, %%eax\n\t"
911 : "=a" (u32Ret),
912 "=d" (u32Spill),
913# if RT_GNUC_PREREQ(4, 3)
914 "+m" (*pu64)
915# else
916 "=m" (*pu64)
917# endif
918 : "A" (u64Old),
919 "m" ( u32EBX ),
920 "c" ( (uint32_t)(u64New >> 32) ),
921 "S" (pu64));
922# else /* !PIC */
923 uint32_t u32Spill;
924 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
925 "setz %%al\n\t"
926 "movzbl %%al, %%eax\n\t"
927 : "=a" (u32Ret),
928 "=d" (u32Spill),
929 "+m" (*pu64)
930 : "A" (u64Old),
931 "b" ( (uint32_t)u64New ),
932 "c" ( (uint32_t)(u64New >> 32) ));
933# endif
934 return (bool)u32Ret;
935# else
936 __asm
937 {
938 mov ebx, dword ptr [u64New]
939 mov ecx, dword ptr [u64New + 4]
940 mov edi, [pu64]
941 mov eax, dword ptr [u64Old]
942 mov edx, dword ptr [u64Old + 4]
943 lock cmpxchg8b [edi]
944 setz al
945 movzx eax, al
946 mov dword ptr [u32Ret], eax
947 }
948 return !!u32Ret;
949# endif
950# endif /* !RT_ARCH_AMD64 */
951}
952#endif
953
954
955/**
956 * Atomically Compare and exchange a signed 64-bit value, ordered.
957 *
958 * @returns true if xchg was done.
959 * @returns false if xchg wasn't done.
960 *
961 * @param pi64 Pointer to the 64-bit variable to update.
962 * @param i64 The 64-bit value to assign to *pu64.
963 * @param i64Old The value to compare with.
964 *
965 * @remarks x86: Requires a Pentium or later.
966 */
967DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old)
968{
969 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
970}
971
972
973/**
974 * Atomically Compare and Exchange a pointer value, ordered.
975 *
976 * @returns true if xchg was done.
977 * @returns false if xchg wasn't done.
978 *
979 * @param ppv Pointer to the value to update.
980 * @param pvNew The new value to assigned to *ppv.
981 * @param pvOld The old value to *ppv compare with.
982 *
983 * @remarks x86: Requires a 486 or later.
984 */
985DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld)
986{
987#if ARCH_BITS == 32 || ARCH_BITS == 16
988 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
989#elif ARCH_BITS == 64
990 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
991#else
992# error "ARCH_BITS is bogus"
993#endif
994}
995
996
997/**
998 * Atomically Compare and Exchange a pointer value, ordered.
999 *
1000 * @returns true if xchg was done.
1001 * @returns false if xchg wasn't done.
1002 *
1003 * @param ppv Pointer to the value to update.
1004 * @param pvNew The new value to assigned to *ppv.
1005 * @param pvOld The old value to *ppv compare with.
1006 *
1007 * @remarks This is relatively type safe on GCC platforms.
1008 * @remarks x86: Requires a 486 or later.
1009 */
1010#ifdef __GNUC__
1011# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1012 __extension__ \
1013 ({\
1014 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1015 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1016 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1017 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1018 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1019 fMacroRet; \
1020 })
1021#else
1022# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1023 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1024#endif
1025
1026
1027/** @def ASMAtomicCmpXchgHandle
1028 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1029 *
1030 * @param ph Pointer to the value to update.
1031 * @param hNew The new value to assigned to *pu.
1032 * @param hOld The old value to *pu compare with.
1033 * @param fRc Where to store the result.
1034 *
1035 * @remarks This doesn't currently work for all handles (like RTFILE).
1036 * @remarks x86: Requires a 486 or later.
1037 */
1038#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1039# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1040 do { \
1041 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1042 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1043 } while (0)
1044#elif HC_ARCH_BITS == 64
1045# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1046 do { \
1047 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1048 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1049 } while (0)
1050#else
1051# error HC_ARCH_BITS
1052#endif
1053
1054
1055/** @def ASMAtomicCmpXchgSize
1056 * Atomically Compare and Exchange a value which size might differ
1057 * between platforms or compilers, ordered.
1058 *
1059 * @param pu Pointer to the value to update.
1060 * @param uNew The new value to assigned to *pu.
1061 * @param uOld The old value to *pu compare with.
1062 * @param fRc Where to store the result.
1063 *
1064 * @remarks x86: Requires a 486 or later.
1065 */
1066#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1067 do { \
1068 switch (sizeof(*(pu))) { \
1069 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1070 break; \
1071 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1072 break; \
1073 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1074 (fRc) = false; \
1075 break; \
1076 } \
1077 } while (0)
1078
1079
1080/**
1081 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1082 * passes back old value, ordered.
1083 *
1084 * @returns true if xchg was done.
1085 * @returns false if xchg wasn't done.
1086 *
1087 * @param pu32 Pointer to the value to update.
1088 * @param u32New The new value to assigned to *pu32.
1089 * @param u32Old The old value to *pu32 compare with.
1090 * @param pu32Old Pointer store the old value at.
1091 *
1092 * @remarks x86: Requires a 486 or later.
1093 */
1094#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1095DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old);
1096#else
1097DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old)
1098{
1099# if RT_INLINE_ASM_GNU_STYLE
1100 uint8_t u8Ret;
1101 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1102 "setz %1\n\t"
1103 : "=m" (*pu32),
1104 "=qm" (u8Ret),
1105 "=a" (*pu32Old)
1106 : "r" (u32New),
1107 "a" (u32Old),
1108 "m" (*pu32));
1109 return (bool)u8Ret;
1110
1111# elif RT_INLINE_ASM_USES_INTRIN
1112 return (*pu32Old =_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1113
1114# else
1115 uint32_t u32Ret;
1116 __asm
1117 {
1118# ifdef RT_ARCH_AMD64
1119 mov rdx, [pu32]
1120# else
1121 mov edx, [pu32]
1122# endif
1123 mov eax, [u32Old]
1124 mov ecx, [u32New]
1125# ifdef RT_ARCH_AMD64
1126 lock cmpxchg [rdx], ecx
1127 mov rdx, [pu32Old]
1128 mov [rdx], eax
1129# else
1130 lock cmpxchg [edx], ecx
1131 mov edx, [pu32Old]
1132 mov [edx], eax
1133# endif
1134 setz al
1135 movzx eax, al
1136 mov [u32Ret], eax
1137 }
1138 return !!u32Ret;
1139# endif
1140}
1141#endif
1142
1143
1144/**
1145 * Atomically Compare and Exchange a signed 32-bit value, additionally
1146 * passes back old value, ordered.
1147 *
1148 * @returns true if xchg was done.
1149 * @returns false if xchg wasn't done.
1150 *
1151 * @param pi32 Pointer to the value to update.
1152 * @param i32New The new value to assigned to *pi32.
1153 * @param i32Old The old value to *pi32 compare with.
1154 * @param pi32Old Pointer store the old value at.
1155 *
1156 * @remarks x86: Requires a 486 or later.
1157 */
1158DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old)
1159{
1160 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1161}
1162
1163
1164/**
1165 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1166 * passing back old value, ordered.
1167 *
1168 * @returns true if xchg was done.
1169 * @returns false if xchg wasn't done.
1170 *
1171 * @param pu64 Pointer to the 64-bit variable to update.
1172 * @param u64New The 64-bit value to assign to *pu64.
1173 * @param u64Old The value to compare with.
1174 * @param pu64Old Pointer store the old value at.
1175 *
1176 * @remarks x86: Requires a Pentium or later.
1177 */
1178#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1179 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1180DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old);
1181#else
1182DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old)
1183{
1184# if RT_INLINE_ASM_USES_INTRIN
1185 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1186
1187# elif defined(RT_ARCH_AMD64)
1188# if RT_INLINE_ASM_GNU_STYLE
1189 uint8_t u8Ret;
1190 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1191 "setz %1\n\t"
1192 : "=m" (*pu64),
1193 "=qm" (u8Ret),
1194 "=a" (*pu64Old)
1195 : "r" (u64New),
1196 "a" (u64Old),
1197 "m" (*pu64));
1198 return (bool)u8Ret;
1199# else
1200 bool fRet;
1201 __asm
1202 {
1203 mov rdx, [pu32]
1204 mov rax, [u64Old]
1205 mov rcx, [u64New]
1206 lock cmpxchg [rdx], rcx
1207 mov rdx, [pu64Old]
1208 mov [rdx], rax
1209 setz al
1210 mov [fRet], al
1211 }
1212 return fRet;
1213# endif
1214# else /* !RT_ARCH_AMD64 */
1215# if RT_INLINE_ASM_GNU_STYLE
1216 uint64_t u64Ret;
1217# if defined(PIC) || defined(__PIC__)
1218 /* NB: this code uses a memory clobber description, because the clean
1219 * solution with an output value for *pu64 makes gcc run out of registers.
1220 * This will cause suboptimal code, and anyone with a better solution is
1221 * welcome to improve this. */
1222 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1223 "lock; cmpxchg8b %3\n\t"
1224 "xchgl %%ebx, %1\n\t"
1225 : "=A" (u64Ret)
1226 : "DS" ((uint32_t)u64New),
1227 "c" ((uint32_t)(u64New >> 32)),
1228 "m" (*pu64),
1229 "0" (u64Old)
1230 : "memory" );
1231# else /* !PIC */
1232 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1233 : "=A" (u64Ret),
1234 "=m" (*pu64)
1235 : "b" ((uint32_t)u64New),
1236 "c" ((uint32_t)(u64New >> 32)),
1237 "m" (*pu64),
1238 "0" (u64Old));
1239# endif
1240 *pu64Old = u64Ret;
1241 return u64Ret == u64Old;
1242# else
1243 uint32_t u32Ret;
1244 __asm
1245 {
1246 mov ebx, dword ptr [u64New]
1247 mov ecx, dword ptr [u64New + 4]
1248 mov edi, [pu64]
1249 mov eax, dword ptr [u64Old]
1250 mov edx, dword ptr [u64Old + 4]
1251 lock cmpxchg8b [edi]
1252 mov ebx, [pu64Old]
1253 mov [ebx], eax
1254 setz al
1255 movzx eax, al
1256 add ebx, 4
1257 mov [ebx], edx
1258 mov dword ptr [u32Ret], eax
1259 }
1260 return !!u32Ret;
1261# endif
1262# endif /* !RT_ARCH_AMD64 */
1263}
1264#endif
1265
1266
1267/**
1268 * Atomically Compare and exchange a signed 64-bit value, additionally
1269 * passing back old value, ordered.
1270 *
1271 * @returns true if xchg was done.
1272 * @returns false if xchg wasn't done.
1273 *
1274 * @param pi64 Pointer to the 64-bit variable to update.
1275 * @param i64 The 64-bit value to assign to *pu64.
1276 * @param i64Old The value to compare with.
1277 * @param pi64Old Pointer store the old value at.
1278 *
1279 * @remarks x86: Requires a Pentium or later.
1280 */
1281DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old)
1282{
1283 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1284}
1285
1286/** @def ASMAtomicCmpXchgExHandle
1287 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1288 *
1289 * @param ph Pointer to the value to update.
1290 * @param hNew The new value to assigned to *pu.
1291 * @param hOld The old value to *pu compare with.
1292 * @param fRc Where to store the result.
1293 * @param phOldVal Pointer to where to store the old value.
1294 *
1295 * @remarks This doesn't currently work for all handles (like RTFILE).
1296 */
1297#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1298# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1299 do { \
1300 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1301 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1302 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1303 } while (0)
1304#elif HC_ARCH_BITS == 64
1305# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1306 do { \
1307 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1308 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1309 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1310 } while (0)
1311#else
1312# error HC_ARCH_BITS
1313#endif
1314
1315
1316/** @def ASMAtomicCmpXchgExSize
1317 * Atomically Compare and Exchange a value which size might differ
1318 * between platforms or compilers. Additionally passes back old value.
1319 *
1320 * @param pu Pointer to the value to update.
1321 * @param uNew The new value to assigned to *pu.
1322 * @param uOld The old value to *pu compare with.
1323 * @param fRc Where to store the result.
1324 * @param puOldVal Pointer to where to store the old value.
1325 *
1326 * @remarks x86: Requires a 486 or later.
1327 */
1328#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1329 do { \
1330 switch (sizeof(*(pu))) { \
1331 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1332 break; \
1333 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1334 break; \
1335 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1336 (fRc) = false; \
1337 (uOldVal) = 0; \
1338 break; \
1339 } \
1340 } while (0)
1341
1342
1343/**
1344 * Atomically Compare and Exchange a pointer value, additionally
1345 * passing back old value, ordered.
1346 *
1347 * @returns true if xchg was done.
1348 * @returns false if xchg wasn't done.
1349 *
1350 * @param ppv Pointer to the value to update.
1351 * @param pvNew The new value to assigned to *ppv.
1352 * @param pvOld The old value to *ppv compare with.
1353 * @param ppvOld Pointer store the old value at.
1354 *
1355 * @remarks x86: Requires a 486 or later.
1356 */
1357DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1358 void RT_FAR * RT_FAR *ppvOld)
1359{
1360#if ARCH_BITS == 32 || ARCH_BITS == 16
1361 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1362#elif ARCH_BITS == 64
1363 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1364#else
1365# error "ARCH_BITS is bogus"
1366#endif
1367}
1368
1369
1370/**
1371 * Atomically Compare and Exchange a pointer value, additionally
1372 * passing back old value, ordered.
1373 *
1374 * @returns true if xchg was done.
1375 * @returns false if xchg wasn't done.
1376 *
1377 * @param ppv Pointer to the value to update.
1378 * @param pvNew The new value to assigned to *ppv.
1379 * @param pvOld The old value to *ppv compare with.
1380 * @param ppvOld Pointer store the old value at.
1381 *
1382 * @remarks This is relatively type safe on GCC platforms.
1383 * @remarks x86: Requires a 486 or later.
1384 */
1385#ifdef __GNUC__
1386# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1387 __extension__ \
1388 ({\
1389 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1390 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1391 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1392 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1393 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1394 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1395 (void **)ppvOldTypeChecked); \
1396 fMacroRet; \
1397 })
1398#else
1399# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1400 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1401#endif
1402
1403
1404/**
1405 * Virtualization unfriendly serializing instruction, always exits.
1406 */
1407#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1408DECLASM(void) ASMSerializeInstructionCpuId(void);
1409#else
1410DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1411{
1412# if RT_INLINE_ASM_GNU_STYLE
1413 RTCCUINTREG xAX = 0;
1414# ifdef RT_ARCH_AMD64
1415 __asm__ __volatile__ ("cpuid"
1416 : "=a" (xAX)
1417 : "0" (xAX)
1418 : "rbx", "rcx", "rdx", "memory");
1419# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1420 __asm__ __volatile__ ("push %%ebx\n\t"
1421 "cpuid\n\t"
1422 "pop %%ebx\n\t"
1423 : "=a" (xAX)
1424 : "0" (xAX)
1425 : "ecx", "edx", "memory");
1426# else
1427 __asm__ __volatile__ ("cpuid"
1428 : "=a" (xAX)
1429 : "0" (xAX)
1430 : "ebx", "ecx", "edx", "memory");
1431# endif
1432
1433# elif RT_INLINE_ASM_USES_INTRIN
1434 int aInfo[4];
1435 _ReadWriteBarrier();
1436 __cpuid(aInfo, 0);
1437
1438# else
1439 __asm
1440 {
1441 push ebx
1442 xor eax, eax
1443 cpuid
1444 pop ebx
1445 }
1446# endif
1447}
1448#endif
1449
1450/**
1451 * Virtualization friendly serializing instruction, though more expensive.
1452 */
1453#if RT_INLINE_ASM_EXTERNAL
1454DECLASM(void) ASMSerializeInstructionIRet(void);
1455#else
1456DECLINLINE(void) ASMSerializeInstructionIRet(void)
1457{
1458# if RT_INLINE_ASM_GNU_STYLE
1459# ifdef RT_ARCH_AMD64
1460 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1461 "subq $128, %%rsp\n\t" /*redzone*/
1462 "mov %%ss, %%eax\n\t"
1463 "pushq %%rax\n\t"
1464 "pushq %%r10\n\t"
1465 "pushfq\n\t"
1466 "movl %%cs, %%eax\n\t"
1467 "pushq %%rax\n\t"
1468 "leaq 1f(%%rip), %%rax\n\t"
1469 "pushq %%rax\n\t"
1470 "iretq\n\t"
1471 "1:\n\t"
1472 ::: "rax", "r10", "memory");
1473# else
1474 __asm__ __volatile__ ("pushfl\n\t"
1475 "pushl %%cs\n\t"
1476 "pushl $1f\n\t"
1477 "iretl\n\t"
1478 "1:\n\t"
1479 ::: "memory");
1480# endif
1481
1482# else
1483 __asm
1484 {
1485 pushfd
1486 push cs
1487 push la_ret
1488 iretd
1489 la_ret:
1490 }
1491# endif
1492}
1493#endif
1494
1495/**
1496 * Virtualization friendlier serializing instruction, may still cause exits.
1497 */
1498#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1499DECLASM(void) ASMSerializeInstructionRdTscp(void);
1500#else
1501DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1502{
1503# if RT_INLINE_ASM_GNU_STYLE
1504 /* rdtscp is not supported by ancient linux build VM of course :-( */
1505# ifdef RT_ARCH_AMD64
1506 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1507 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1508# else
1509 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1510 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1511# endif
1512# else
1513# if RT_INLINE_ASM_USES_INTRIN >= 15
1514 uint32_t uIgnore;
1515 _ReadWriteBarrier();
1516 (void)__rdtscp(&uIgnore);
1517 (void)uIgnore;
1518# else
1519 __asm
1520 {
1521 rdtscp
1522 }
1523# endif
1524# endif
1525}
1526#endif
1527
1528
1529/**
1530 * Serialize Instruction.
1531 */
1532#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1533# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1534#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
1535# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1536#elif defined(RT_ARCH_SPARC64)
1537RTDECL(void) ASMSerializeInstruction(void);
1538#else
1539# error "Port me"
1540#endif
1541
1542
1543/**
1544 * Memory fence, waits for any pending writes and reads to complete.
1545 */
1546DECLINLINE(void) ASMMemoryFence(void)
1547{
1548#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1549# if RT_INLINE_ASM_GNU_STYLE
1550 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
1551# elif RT_INLINE_ASM_USES_INTRIN
1552 _mm_mfence();
1553# else
1554 __asm
1555 {
1556 _emit 0x0f
1557 _emit 0xae
1558 _emit 0xf0
1559 }
1560# endif
1561#elif ARCH_BITS == 16
1562 uint16_t volatile u16;
1563 ASMAtomicXchgU16(&u16, 0);
1564#else
1565 uint32_t volatile u32;
1566 ASMAtomicXchgU32(&u32, 0);
1567#endif
1568}
1569
1570
1571/**
1572 * Write fence, waits for any pending writes to complete.
1573 */
1574DECLINLINE(void) ASMWriteFence(void)
1575{
1576#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1577# if RT_INLINE_ASM_GNU_STYLE
1578 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
1579# elif RT_INLINE_ASM_USES_INTRIN
1580 _mm_sfence();
1581# else
1582 __asm
1583 {
1584 _emit 0x0f
1585 _emit 0xae
1586 _emit 0xf8
1587 }
1588# endif
1589#else
1590 ASMMemoryFence();
1591#endif
1592}
1593
1594
1595/**
1596 * Read fence, waits for any pending reads to complete.
1597 */
1598DECLINLINE(void) ASMReadFence(void)
1599{
1600#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1601# if RT_INLINE_ASM_GNU_STYLE
1602 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
1603# elif RT_INLINE_ASM_USES_INTRIN
1604 _mm_lfence();
1605# else
1606 __asm
1607 {
1608 _emit 0x0f
1609 _emit 0xae
1610 _emit 0xe8
1611 }
1612# endif
1613#else
1614 ASMMemoryFence();
1615#endif
1616}
1617
1618
1619/**
1620 * Atomically reads an unsigned 8-bit value, ordered.
1621 *
1622 * @returns Current *pu8 value
1623 * @param pu8 Pointer to the 8-bit variable to read.
1624 */
1625DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8)
1626{
1627 ASMMemoryFence();
1628 return *pu8; /* byte reads are atomic on x86 */
1629}
1630
1631
1632/**
1633 * Atomically reads an unsigned 8-bit value, unordered.
1634 *
1635 * @returns Current *pu8 value
1636 * @param pu8 Pointer to the 8-bit variable to read.
1637 */
1638DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8)
1639{
1640 return *pu8; /* byte reads are atomic on x86 */
1641}
1642
1643
1644/**
1645 * Atomically reads a signed 8-bit value, ordered.
1646 *
1647 * @returns Current *pi8 value
1648 * @param pi8 Pointer to the 8-bit variable to read.
1649 */
1650DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8)
1651{
1652 ASMMemoryFence();
1653 return *pi8; /* byte reads are atomic on x86 */
1654}
1655
1656
1657/**
1658 * Atomically reads a signed 8-bit value, unordered.
1659 *
1660 * @returns Current *pi8 value
1661 * @param pi8 Pointer to the 8-bit variable to read.
1662 */
1663DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8)
1664{
1665 return *pi8; /* byte reads are atomic on x86 */
1666}
1667
1668
1669/**
1670 * Atomically reads an unsigned 16-bit value, ordered.
1671 *
1672 * @returns Current *pu16 value
1673 * @param pu16 Pointer to the 16-bit variable to read.
1674 */
1675DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16)
1676{
1677 ASMMemoryFence();
1678 Assert(!((uintptr_t)pu16 & 1));
1679 return *pu16;
1680}
1681
1682
1683/**
1684 * Atomically reads an unsigned 16-bit value, unordered.
1685 *
1686 * @returns Current *pu16 value
1687 * @param pu16 Pointer to the 16-bit variable to read.
1688 */
1689DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16)
1690{
1691 Assert(!((uintptr_t)pu16 & 1));
1692 return *pu16;
1693}
1694
1695
1696/**
1697 * Atomically reads a signed 16-bit value, ordered.
1698 *
1699 * @returns Current *pi16 value
1700 * @param pi16 Pointer to the 16-bit variable to read.
1701 */
1702DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16)
1703{
1704 ASMMemoryFence();
1705 Assert(!((uintptr_t)pi16 & 1));
1706 return *pi16;
1707}
1708
1709
1710/**
1711 * Atomically reads a signed 16-bit value, unordered.
1712 *
1713 * @returns Current *pi16 value
1714 * @param pi16 Pointer to the 16-bit variable to read.
1715 */
1716DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16)
1717{
1718 Assert(!((uintptr_t)pi16 & 1));
1719 return *pi16;
1720}
1721
1722
1723/**
1724 * Atomically reads an unsigned 32-bit value, ordered.
1725 *
1726 * @returns Current *pu32 value
1727 * @param pu32 Pointer to the 32-bit variable to read.
1728 */
1729DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32)
1730{
1731 ASMMemoryFence();
1732 Assert(!((uintptr_t)pu32 & 3));
1733#if ARCH_BITS == 16
1734 AssertFailed(); /** @todo 16-bit */
1735#endif
1736 return *pu32;
1737}
1738
1739
1740/**
1741 * Atomically reads an unsigned 32-bit value, unordered.
1742 *
1743 * @returns Current *pu32 value
1744 * @param pu32 Pointer to the 32-bit variable to read.
1745 */
1746DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32)
1747{
1748 Assert(!((uintptr_t)pu32 & 3));
1749#if ARCH_BITS == 16
1750 AssertFailed(); /** @todo 16-bit */
1751#endif
1752 return *pu32;
1753}
1754
1755
1756/**
1757 * Atomically reads a signed 32-bit value, ordered.
1758 *
1759 * @returns Current *pi32 value
1760 * @param pi32 Pointer to the 32-bit variable to read.
1761 */
1762DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32)
1763{
1764 ASMMemoryFence();
1765 Assert(!((uintptr_t)pi32 & 3));
1766#if ARCH_BITS == 16
1767 AssertFailed(); /** @todo 16-bit */
1768#endif
1769 return *pi32;
1770}
1771
1772
1773/**
1774 * Atomically reads a signed 32-bit value, unordered.
1775 *
1776 * @returns Current *pi32 value
1777 * @param pi32 Pointer to the 32-bit variable to read.
1778 */
1779DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32)
1780{
1781 Assert(!((uintptr_t)pi32 & 3));
1782#if ARCH_BITS == 16
1783 AssertFailed(); /** @todo 16-bit */
1784#endif
1785 return *pi32;
1786}
1787
1788
1789/**
1790 * Atomically reads an unsigned 64-bit value, ordered.
1791 *
1792 * @returns Current *pu64 value
1793 * @param pu64 Pointer to the 64-bit variable to read.
1794 * The memory pointed to must be writable.
1795 *
1796 * @remarks This may fault if the memory is read-only!
1797 * @remarks x86: Requires a Pentium or later.
1798 */
1799#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1800 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1801DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64);
1802#else
1803DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64)
1804{
1805 uint64_t u64;
1806# ifdef RT_ARCH_AMD64
1807 Assert(!((uintptr_t)pu64 & 7));
1808/*# if RT_INLINE_ASM_GNU_STYLE
1809 __asm__ __volatile__( "mfence\n\t"
1810 "movq %1, %0\n\t"
1811 : "=r" (u64)
1812 : "m" (*pu64));
1813# else
1814 __asm
1815 {
1816 mfence
1817 mov rdx, [pu64]
1818 mov rax, [rdx]
1819 mov [u64], rax
1820 }
1821# endif*/
1822 ASMMemoryFence();
1823 u64 = *pu64;
1824# else /* !RT_ARCH_AMD64 */
1825# if RT_INLINE_ASM_GNU_STYLE
1826# if defined(PIC) || defined(__PIC__)
1827 uint32_t u32EBX = 0;
1828 Assert(!((uintptr_t)pu64 & 7));
1829 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1830 "lock; cmpxchg8b (%5)\n\t"
1831 "movl %3, %%ebx\n\t"
1832 : "=A" (u64),
1833# if RT_GNUC_PREREQ(4, 3)
1834 "+m" (*pu64)
1835# else
1836 "=m" (*pu64)
1837# endif
1838 : "0" (0ULL),
1839 "m" (u32EBX),
1840 "c" (0),
1841 "S" (pu64));
1842# else /* !PIC */
1843 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1844 : "=A" (u64),
1845 "+m" (*pu64)
1846 : "0" (0ULL),
1847 "b" (0),
1848 "c" (0));
1849# endif
1850# else
1851 Assert(!((uintptr_t)pu64 & 7));
1852 __asm
1853 {
1854 xor eax, eax
1855 xor edx, edx
1856 mov edi, pu64
1857 xor ecx, ecx
1858 xor ebx, ebx
1859 lock cmpxchg8b [edi]
1860 mov dword ptr [u64], eax
1861 mov dword ptr [u64 + 4], edx
1862 }
1863# endif
1864# endif /* !RT_ARCH_AMD64 */
1865 return u64;
1866}
1867#endif
1868
1869
1870/**
1871 * Atomically reads an unsigned 64-bit value, unordered.
1872 *
1873 * @returns Current *pu64 value
1874 * @param pu64 Pointer to the 64-bit variable to read.
1875 * The memory pointed to must be writable.
1876 *
1877 * @remarks This may fault if the memory is read-only!
1878 * @remarks x86: Requires a Pentium or later.
1879 */
1880#if !defined(RT_ARCH_AMD64) \
1881 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1882 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1883DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64);
1884#else
1885DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64)
1886{
1887 uint64_t u64;
1888# ifdef RT_ARCH_AMD64
1889 Assert(!((uintptr_t)pu64 & 7));
1890/*# if RT_INLINE_ASM_GNU_STYLE
1891 Assert(!((uintptr_t)pu64 & 7));
1892 __asm__ __volatile__("movq %1, %0\n\t"
1893 : "=r" (u64)
1894 : "m" (*pu64));
1895# else
1896 __asm
1897 {
1898 mov rdx, [pu64]
1899 mov rax, [rdx]
1900 mov [u64], rax
1901 }
1902# endif */
1903 u64 = *pu64;
1904# else /* !RT_ARCH_AMD64 */
1905# if RT_INLINE_ASM_GNU_STYLE
1906# if defined(PIC) || defined(__PIC__)
1907 uint32_t u32EBX = 0;
1908 uint32_t u32Spill;
1909 Assert(!((uintptr_t)pu64 & 7));
1910 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1911 "xor %%ecx,%%ecx\n\t"
1912 "xor %%edx,%%edx\n\t"
1913 "xchgl %%ebx, %3\n\t"
1914 "lock; cmpxchg8b (%4)\n\t"
1915 "movl %3, %%ebx\n\t"
1916 : "=A" (u64),
1917# if RT_GNUC_PREREQ(4, 3)
1918 "+m" (*pu64),
1919# else
1920 "=m" (*pu64),
1921# endif
1922 "=c" (u32Spill)
1923 : "m" (u32EBX),
1924 "S" (pu64));
1925# else /* !PIC */
1926 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1927 : "=A" (u64),
1928 "+m" (*pu64)
1929 : "0" (0ULL),
1930 "b" (0),
1931 "c" (0));
1932# endif
1933# else
1934 Assert(!((uintptr_t)pu64 & 7));
1935 __asm
1936 {
1937 xor eax, eax
1938 xor edx, edx
1939 mov edi, pu64
1940 xor ecx, ecx
1941 xor ebx, ebx
1942 lock cmpxchg8b [edi]
1943 mov dword ptr [u64], eax
1944 mov dword ptr [u64 + 4], edx
1945 }
1946# endif
1947# endif /* !RT_ARCH_AMD64 */
1948 return u64;
1949}
1950#endif
1951
1952
1953/**
1954 * Atomically reads a signed 64-bit value, ordered.
1955 *
1956 * @returns Current *pi64 value
1957 * @param pi64 Pointer to the 64-bit variable to read.
1958 * The memory pointed to must be writable.
1959 *
1960 * @remarks This may fault if the memory is read-only!
1961 * @remarks x86: Requires a Pentium or later.
1962 */
1963DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64)
1964{
1965 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
1966}
1967
1968
1969/**
1970 * Atomically reads a signed 64-bit value, unordered.
1971 *
1972 * @returns Current *pi64 value
1973 * @param pi64 Pointer to the 64-bit variable to read.
1974 * The memory pointed to must be writable.
1975 *
1976 * @remarks This will fault if the memory is read-only!
1977 * @remarks x86: Requires a Pentium or later.
1978 */
1979DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64)
1980{
1981 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
1982}
1983
1984
1985/**
1986 * Atomically reads a size_t value, ordered.
1987 *
1988 * @returns Current *pcb value
1989 * @param pcb Pointer to the size_t variable to read.
1990 */
1991DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb)
1992{
1993#if ARCH_BITS == 64
1994 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
1995#elif ARCH_BITS == 32
1996 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
1997#elif ARCH_BITS == 16
1998 AssertCompileSize(size_t, 2);
1999 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
2000#else
2001# error "Unsupported ARCH_BITS value"
2002#endif
2003}
2004
2005
2006/**
2007 * Atomically reads a size_t value, unordered.
2008 *
2009 * @returns Current *pcb value
2010 * @param pcb Pointer to the size_t variable to read.
2011 */
2012DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb)
2013{
2014#if ARCH_BITS == 64 || ARCH_BITS == 16
2015 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2016#elif ARCH_BITS == 32
2017 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2018#elif ARCH_BITS == 16
2019 AssertCompileSize(size_t, 2);
2020 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2021#else
2022# error "Unsupported ARCH_BITS value"
2023#endif
2024}
2025
2026
2027/**
2028 * Atomically reads a pointer value, ordered.
2029 *
2030 * @returns Current *pv value
2031 * @param ppv Pointer to the pointer variable to read.
2032 *
2033 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2034 * requires less typing (no casts).
2035 */
2036DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2037{
2038#if ARCH_BITS == 32 || ARCH_BITS == 16
2039 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2040#elif ARCH_BITS == 64
2041 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2042#else
2043# error "ARCH_BITS is bogus"
2044#endif
2045}
2046
2047/**
2048 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2049 *
2050 * @returns Current *pv value
2051 * @param ppv Pointer to the pointer variable to read.
2052 * @param Type The type of *ppv, sans volatile.
2053 */
2054#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2055# define ASMAtomicReadPtrT(ppv, Type) \
2056 __extension__ \
2057 ({\
2058 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2059 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2060 pvTypeChecked; \
2061 })
2062#else
2063# define ASMAtomicReadPtrT(ppv, Type) \
2064 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2065#endif
2066
2067
2068/**
2069 * Atomically reads a pointer value, unordered.
2070 *
2071 * @returns Current *pv value
2072 * @param ppv Pointer to the pointer variable to read.
2073 *
2074 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2075 * requires less typing (no casts).
2076 */
2077DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2078{
2079#if ARCH_BITS == 32 || ARCH_BITS == 16
2080 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2081#elif ARCH_BITS == 64
2082 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2083#else
2084# error "ARCH_BITS is bogus"
2085#endif
2086}
2087
2088
2089/**
2090 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2091 *
2092 * @returns Current *pv value
2093 * @param ppv Pointer to the pointer variable to read.
2094 * @param Type The type of *ppv, sans volatile.
2095 */
2096#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2097# define ASMAtomicUoReadPtrT(ppv, Type) \
2098 __extension__ \
2099 ({\
2100 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2101 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2102 pvTypeChecked; \
2103 })
2104#else
2105# define ASMAtomicUoReadPtrT(ppv, Type) \
2106 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2107#endif
2108
2109
2110/**
2111 * Atomically reads a boolean value, ordered.
2112 *
2113 * @returns Current *pf value
2114 * @param pf Pointer to the boolean variable to read.
2115 */
2116DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf)
2117{
2118 ASMMemoryFence();
2119 return *pf; /* byte reads are atomic on x86 */
2120}
2121
2122
2123/**
2124 * Atomically reads a boolean value, unordered.
2125 *
2126 * @returns Current *pf value
2127 * @param pf Pointer to the boolean variable to read.
2128 */
2129DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf)
2130{
2131 return *pf; /* byte reads are atomic on x86 */
2132}
2133
2134
2135/**
2136 * Atomically read a typical IPRT handle value, ordered.
2137 *
2138 * @param ph Pointer to the handle variable to read.
2139 * @param phRes Where to store the result.
2140 *
2141 * @remarks This doesn't currently work for all handles (like RTFILE).
2142 */
2143#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2144# define ASMAtomicReadHandle(ph, phRes) \
2145 do { \
2146 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2147 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2148 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2149 } while (0)
2150#elif HC_ARCH_BITS == 64
2151# define ASMAtomicReadHandle(ph, phRes) \
2152 do { \
2153 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2154 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2155 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2156 } while (0)
2157#else
2158# error HC_ARCH_BITS
2159#endif
2160
2161
2162/**
2163 * Atomically read a typical IPRT handle value, unordered.
2164 *
2165 * @param ph Pointer to the handle variable to read.
2166 * @param phRes Where to store the result.
2167 *
2168 * @remarks This doesn't currently work for all handles (like RTFILE).
2169 */
2170#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2171# define ASMAtomicUoReadHandle(ph, phRes) \
2172 do { \
2173 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2174 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2175 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2176 } while (0)
2177#elif HC_ARCH_BITS == 64
2178# define ASMAtomicUoReadHandle(ph, phRes) \
2179 do { \
2180 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2181 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2182 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2183 } while (0)
2184#else
2185# error HC_ARCH_BITS
2186#endif
2187
2188
2189/**
2190 * Atomically read a value which size might differ
2191 * between platforms or compilers, ordered.
2192 *
2193 * @param pu Pointer to the variable to read.
2194 * @param puRes Where to store the result.
2195 */
2196#define ASMAtomicReadSize(pu, puRes) \
2197 do { \
2198 switch (sizeof(*(pu))) { \
2199 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2200 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2201 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2202 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2203 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2204 } \
2205 } while (0)
2206
2207
2208/**
2209 * Atomically read a value which size might differ
2210 * between platforms or compilers, unordered.
2211 *
2212 * @param pu Pointer to the variable to read.
2213 * @param puRes Where to store the result.
2214 */
2215#define ASMAtomicUoReadSize(pu, puRes) \
2216 do { \
2217 switch (sizeof(*(pu))) { \
2218 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2219 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2220 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2221 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2222 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2223 } \
2224 } while (0)
2225
2226
2227/**
2228 * Atomically writes an unsigned 8-bit value, ordered.
2229 *
2230 * @param pu8 Pointer to the 8-bit variable.
2231 * @param u8 The 8-bit value to assign to *pu8.
2232 */
2233DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2234{
2235 ASMAtomicXchgU8(pu8, u8);
2236}
2237
2238
2239/**
2240 * Atomically writes an unsigned 8-bit value, unordered.
2241 *
2242 * @param pu8 Pointer to the 8-bit variable.
2243 * @param u8 The 8-bit value to assign to *pu8.
2244 */
2245DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2246{
2247 *pu8 = u8; /* byte writes are atomic on x86 */
2248}
2249
2250
2251/**
2252 * Atomically writes a signed 8-bit value, ordered.
2253 *
2254 * @param pi8 Pointer to the 8-bit variable to read.
2255 * @param i8 The 8-bit value to assign to *pi8.
2256 */
2257DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2258{
2259 ASMAtomicXchgS8(pi8, i8);
2260}
2261
2262
2263/**
2264 * Atomically writes a signed 8-bit value, unordered.
2265 *
2266 * @param pi8 Pointer to the 8-bit variable to write.
2267 * @param i8 The 8-bit value to assign to *pi8.
2268 */
2269DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2270{
2271 *pi8 = i8; /* byte writes are atomic on x86 */
2272}
2273
2274
2275/**
2276 * Atomically writes an unsigned 16-bit value, ordered.
2277 *
2278 * @param pu16 Pointer to the 16-bit variable to write.
2279 * @param u16 The 16-bit value to assign to *pu16.
2280 */
2281DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2282{
2283 ASMAtomicXchgU16(pu16, u16);
2284}
2285
2286
2287/**
2288 * Atomically writes an unsigned 16-bit value, unordered.
2289 *
2290 * @param pu16 Pointer to the 16-bit variable to write.
2291 * @param u16 The 16-bit value to assign to *pu16.
2292 */
2293DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2294{
2295 Assert(!((uintptr_t)pu16 & 1));
2296 *pu16 = u16;
2297}
2298
2299
2300/**
2301 * Atomically writes a signed 16-bit value, ordered.
2302 *
2303 * @param pi16 Pointer to the 16-bit variable to write.
2304 * @param i16 The 16-bit value to assign to *pi16.
2305 */
2306DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2307{
2308 ASMAtomicXchgS16(pi16, i16);
2309}
2310
2311
2312/**
2313 * Atomically writes a signed 16-bit value, unordered.
2314 *
2315 * @param pi16 Pointer to the 16-bit variable to write.
2316 * @param i16 The 16-bit value to assign to *pi16.
2317 */
2318DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2319{
2320 Assert(!((uintptr_t)pi16 & 1));
2321 *pi16 = i16;
2322}
2323
2324
2325/**
2326 * Atomically writes an unsigned 32-bit value, ordered.
2327 *
2328 * @param pu32 Pointer to the 32-bit variable to write.
2329 * @param u32 The 32-bit value to assign to *pu32.
2330 */
2331DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2332{
2333 ASMAtomicXchgU32(pu32, u32);
2334}
2335
2336
2337/**
2338 * Atomically writes an unsigned 32-bit value, unordered.
2339 *
2340 * @param pu32 Pointer to the 32-bit variable to write.
2341 * @param u32 The 32-bit value to assign to *pu32.
2342 */
2343DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2344{
2345 Assert(!((uintptr_t)pu32 & 3));
2346#if ARCH_BITS >= 32
2347 *pu32 = u32;
2348#else
2349 ASMAtomicXchgU32(pu32, u32);
2350#endif
2351}
2352
2353
2354/**
2355 * Atomically writes a signed 32-bit value, ordered.
2356 *
2357 * @param pi32 Pointer to the 32-bit variable to write.
2358 * @param i32 The 32-bit value to assign to *pi32.
2359 */
2360DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2361{
2362 ASMAtomicXchgS32(pi32, i32);
2363}
2364
2365
2366/**
2367 * Atomically writes a signed 32-bit value, unordered.
2368 *
2369 * @param pi32 Pointer to the 32-bit variable to write.
2370 * @param i32 The 32-bit value to assign to *pi32.
2371 */
2372DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2373{
2374 Assert(!((uintptr_t)pi32 & 3));
2375#if ARCH_BITS >= 32
2376 *pi32 = i32;
2377#else
2378 ASMAtomicXchgS32(pi32, i32);
2379#endif
2380}
2381
2382
2383/**
2384 * Atomically writes an unsigned 64-bit value, ordered.
2385 *
2386 * @param pu64 Pointer to the 64-bit variable to write.
2387 * @param u64 The 64-bit value to assign to *pu64.
2388 */
2389DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2390{
2391 ASMAtomicXchgU64(pu64, u64);
2392}
2393
2394
2395/**
2396 * Atomically writes an unsigned 64-bit value, unordered.
2397 *
2398 * @param pu64 Pointer to the 64-bit variable to write.
2399 * @param u64 The 64-bit value to assign to *pu64.
2400 */
2401DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2402{
2403 Assert(!((uintptr_t)pu64 & 7));
2404#if ARCH_BITS == 64
2405 *pu64 = u64;
2406#else
2407 ASMAtomicXchgU64(pu64, u64);
2408#endif
2409}
2410
2411
2412/**
2413 * Atomically writes a signed 64-bit value, ordered.
2414 *
2415 * @param pi64 Pointer to the 64-bit variable to write.
2416 * @param i64 The 64-bit value to assign to *pi64.
2417 */
2418DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2419{
2420 ASMAtomicXchgS64(pi64, i64);
2421}
2422
2423
2424/**
2425 * Atomically writes a signed 64-bit value, unordered.
2426 *
2427 * @param pi64 Pointer to the 64-bit variable to write.
2428 * @param i64 The 64-bit value to assign to *pi64.
2429 */
2430DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2431{
2432 Assert(!((uintptr_t)pi64 & 7));
2433#if ARCH_BITS == 64
2434 *pi64 = i64;
2435#else
2436 ASMAtomicXchgS64(pi64, i64);
2437#endif
2438}
2439
2440
2441/**
2442 * Atomically writes a size_t value, ordered.
2443 *
2444 * @returns nothing.
2445 * @param pcb Pointer to the size_t variable to write.
2446 * @param cb The value to assign to *pcb.
2447 */
2448DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb)
2449{
2450#if ARCH_BITS == 64
2451 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
2452#elif ARCH_BITS == 32
2453 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
2454#elif ARCH_BITS == 16
2455 AssertCompileSize(size_t, 2);
2456 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
2457#else
2458# error "Unsupported ARCH_BITS value"
2459#endif
2460}
2461
2462
2463/**
2464 * Atomically writes a boolean value, unordered.
2465 *
2466 * @param pf Pointer to the boolean variable to write.
2467 * @param f The boolean value to assign to *pf.
2468 */
2469DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f)
2470{
2471 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
2472}
2473
2474
2475/**
2476 * Atomically writes a boolean value, unordered.
2477 *
2478 * @param pf Pointer to the boolean variable to write.
2479 * @param f The boolean value to assign to *pf.
2480 */
2481DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f)
2482{
2483 *pf = f; /* byte writes are atomic on x86 */
2484}
2485
2486
2487/**
2488 * Atomically writes a pointer value, ordered.
2489 *
2490 * @param ppv Pointer to the pointer variable to write.
2491 * @param pv The pointer value to assign to *ppv.
2492 */
2493DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv)
2494{
2495#if ARCH_BITS == 32 || ARCH_BITS == 16
2496 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
2497#elif ARCH_BITS == 64
2498 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
2499#else
2500# error "ARCH_BITS is bogus"
2501#endif
2502}
2503
2504
2505/**
2506 * Atomically writes a pointer value, ordered.
2507 *
2508 * @param ppv Pointer to the pointer variable to write.
2509 * @param pv The pointer value to assign to *ppv. If NULL use
2510 * ASMAtomicWriteNullPtr or you'll land in trouble.
2511 *
2512 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2513 * NULL.
2514 */
2515#ifdef __GNUC__
2516# define ASMAtomicWritePtr(ppv, pv) \
2517 do \
2518 { \
2519 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
2520 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2521 \
2522 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2523 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2524 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2525 \
2526 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
2527 } while (0)
2528#else
2529# define ASMAtomicWritePtr(ppv, pv) \
2530 do \
2531 { \
2532 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2533 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2534 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2535 \
2536 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
2537 } while (0)
2538#endif
2539
2540
2541/**
2542 * Atomically sets a pointer to NULL, ordered.
2543 *
2544 * @param ppv Pointer to the pointer variable that should be set to NULL.
2545 *
2546 * @remarks This is relatively type safe on GCC platforms.
2547 */
2548#ifdef __GNUC__
2549# define ASMAtomicWriteNullPtr(ppv) \
2550 do \
2551 { \
2552 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2553 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2554 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2555 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
2556 } while (0)
2557#else
2558# define ASMAtomicWriteNullPtr(ppv) \
2559 do \
2560 { \
2561 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2562 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2563 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
2564 } while (0)
2565#endif
2566
2567
2568/**
2569 * Atomically writes a pointer value, unordered.
2570 *
2571 * @returns Current *pv value
2572 * @param ppv Pointer to the pointer variable.
2573 * @param pv The pointer value to assign to *ppv. If NULL use
2574 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2575 *
2576 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2577 * NULL.
2578 */
2579#ifdef __GNUC__
2580# define ASMAtomicUoWritePtr(ppv, pv) \
2581 do \
2582 { \
2583 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2584 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2585 \
2586 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2587 AssertCompile(sizeof(pv) == sizeof(void *)); \
2588 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2589 \
2590 *(ppvTypeChecked) = pvTypeChecked; \
2591 } while (0)
2592#else
2593# define ASMAtomicUoWritePtr(ppv, pv) \
2594 do \
2595 { \
2596 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2597 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2598 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2599 *(ppv) = pv; \
2600 } while (0)
2601#endif
2602
2603
2604/**
2605 * Atomically sets a pointer to NULL, unordered.
2606 *
2607 * @param ppv Pointer to the pointer variable that should be set to NULL.
2608 *
2609 * @remarks This is relatively type safe on GCC platforms.
2610 */
2611#ifdef __GNUC__
2612# define ASMAtomicUoWriteNullPtr(ppv) \
2613 do \
2614 { \
2615 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2616 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2617 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2618 *(ppvTypeChecked) = NULL; \
2619 } while (0)
2620#else
2621# define ASMAtomicUoWriteNullPtr(ppv) \
2622 do \
2623 { \
2624 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2625 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2626 *(ppv) = NULL; \
2627 } while (0)
2628#endif
2629
2630
2631/**
2632 * Atomically write a typical IPRT handle value, ordered.
2633 *
2634 * @param ph Pointer to the variable to update.
2635 * @param hNew The value to assign to *ph.
2636 *
2637 * @remarks This doesn't currently work for all handles (like RTFILE).
2638 */
2639#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2640# define ASMAtomicWriteHandle(ph, hNew) \
2641 do { \
2642 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2643 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
2644 } while (0)
2645#elif HC_ARCH_BITS == 64
2646# define ASMAtomicWriteHandle(ph, hNew) \
2647 do { \
2648 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2649 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
2650 } while (0)
2651#else
2652# error HC_ARCH_BITS
2653#endif
2654
2655
2656/**
2657 * Atomically write a typical IPRT handle value, unordered.
2658 *
2659 * @param ph Pointer to the variable to update.
2660 * @param hNew The value to assign to *ph.
2661 *
2662 * @remarks This doesn't currently work for all handles (like RTFILE).
2663 */
2664#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2665# define ASMAtomicUoWriteHandle(ph, hNew) \
2666 do { \
2667 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2668 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
2669 } while (0)
2670#elif HC_ARCH_BITS == 64
2671# define ASMAtomicUoWriteHandle(ph, hNew) \
2672 do { \
2673 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2674 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
2675 } while (0)
2676#else
2677# error HC_ARCH_BITS
2678#endif
2679
2680
2681/**
2682 * Atomically write a value which size might differ
2683 * between platforms or compilers, ordered.
2684 *
2685 * @param pu Pointer to the variable to update.
2686 * @param uNew The value to assign to *pu.
2687 */
2688#define ASMAtomicWriteSize(pu, uNew) \
2689 do { \
2690 switch (sizeof(*(pu))) { \
2691 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2692 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2693 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2694 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2695 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2696 } \
2697 } while (0)
2698
2699/**
2700 * Atomically write a value which size might differ
2701 * between platforms or compilers, unordered.
2702 *
2703 * @param pu Pointer to the variable to update.
2704 * @param uNew The value to assign to *pu.
2705 */
2706#define ASMAtomicUoWriteSize(pu, uNew) \
2707 do { \
2708 switch (sizeof(*(pu))) { \
2709 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2710 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2711 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2712 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2713 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2714 } \
2715 } while (0)
2716
2717
2718
2719/**
2720 * Atomically exchanges and adds to a 16-bit value, ordered.
2721 *
2722 * @returns The old value.
2723 * @param pu16 Pointer to the value.
2724 * @param u16 Number to add.
2725 *
2726 * @remarks Currently not implemented, just to make 16-bit code happy.
2727 * @remarks x86: Requires a 486 or later.
2728 */
2729DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16);
2730
2731
2732/**
2733 * Atomically exchanges and adds to a 32-bit value, ordered.
2734 *
2735 * @returns The old value.
2736 * @param pu32 Pointer to the value.
2737 * @param u32 Number to add.
2738 *
2739 * @remarks x86: Requires a 486 or later.
2740 */
2741#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2742DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
2743#else
2744DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2745{
2746# if RT_INLINE_ASM_USES_INTRIN
2747 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
2748 return u32;
2749
2750# elif RT_INLINE_ASM_GNU_STYLE
2751 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2752 : "=r" (u32),
2753 "=m" (*pu32)
2754 : "0" (u32),
2755 "m" (*pu32)
2756 : "memory");
2757 return u32;
2758# else
2759 __asm
2760 {
2761 mov eax, [u32]
2762# ifdef RT_ARCH_AMD64
2763 mov rdx, [pu32]
2764 lock xadd [rdx], eax
2765# else
2766 mov edx, [pu32]
2767 lock xadd [edx], eax
2768# endif
2769 mov [u32], eax
2770 }
2771 return u32;
2772# endif
2773}
2774#endif
2775
2776
2777/**
2778 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2779 *
2780 * @returns The old value.
2781 * @param pi32 Pointer to the value.
2782 * @param i32 Number to add.
2783 *
2784 * @remarks x86: Requires a 486 or later.
2785 */
2786DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2787{
2788 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
2789}
2790
2791
2792/**
2793 * Atomically exchanges and adds to a 64-bit value, ordered.
2794 *
2795 * @returns The old value.
2796 * @param pu64 Pointer to the value.
2797 * @param u64 Number to add.
2798 *
2799 * @remarks x86: Requires a Pentium or later.
2800 */
2801#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2802DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
2803#else
2804DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2805{
2806# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2807 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
2808 return u64;
2809
2810# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2811 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2812 : "=r" (u64),
2813 "=m" (*pu64)
2814 : "0" (u64),
2815 "m" (*pu64)
2816 : "memory");
2817 return u64;
2818# else
2819 uint64_t u64Old;
2820 for (;;)
2821 {
2822 uint64_t u64New;
2823 u64Old = ASMAtomicUoReadU64(pu64);
2824 u64New = u64Old + u64;
2825 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2826 break;
2827 ASMNopPause();
2828 }
2829 return u64Old;
2830# endif
2831}
2832#endif
2833
2834
2835/**
2836 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2837 *
2838 * @returns The old value.
2839 * @param pi64 Pointer to the value.
2840 * @param i64 Number to add.
2841 *
2842 * @remarks x86: Requires a Pentium or later.
2843 */
2844DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2845{
2846 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
2847}
2848
2849
2850/**
2851 * Atomically exchanges and adds to a size_t value, ordered.
2852 *
2853 * @returns The old value.
2854 * @param pcb Pointer to the size_t value.
2855 * @param cb Number to add.
2856 */
2857DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb)
2858{
2859#if ARCH_BITS == 64
2860 AssertCompileSize(size_t, 8);
2861 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
2862#elif ARCH_BITS == 32
2863 AssertCompileSize(size_t, 4);
2864 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
2865#elif ARCH_BITS == 16
2866 AssertCompileSize(size_t, 2);
2867 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
2868#else
2869# error "Unsupported ARCH_BITS value"
2870#endif
2871}
2872
2873
2874/**
2875 * Atomically exchanges and adds a value which size might differ between
2876 * platforms or compilers, ordered.
2877 *
2878 * @param pu Pointer to the variable to update.
2879 * @param uNew The value to add to *pu.
2880 * @param puOld Where to store the old value.
2881 */
2882#define ASMAtomicAddSize(pu, uNew, puOld) \
2883 do { \
2884 switch (sizeof(*(pu))) { \
2885 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2886 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2887 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2888 } \
2889 } while (0)
2890
2891
2892
2893/**
2894 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2895 *
2896 * @returns The old value.
2897 * @param pu16 Pointer to the value.
2898 * @param u16 Number to subtract.
2899 *
2900 * @remarks x86: Requires a 486 or later.
2901 */
2902DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16)
2903{
2904 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2905}
2906
2907
2908/**
2909 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2910 *
2911 * @returns The old value.
2912 * @param pi16 Pointer to the value.
2913 * @param i16 Number to subtract.
2914 *
2915 * @remarks x86: Requires a 486 or later.
2916 */
2917DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16)
2918{
2919 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
2920}
2921
2922
2923/**
2924 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2925 *
2926 * @returns The old value.
2927 * @param pu32 Pointer to the value.
2928 * @param u32 Number to subtract.
2929 *
2930 * @remarks x86: Requires a 486 or later.
2931 */
2932DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2933{
2934 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2935}
2936
2937
2938/**
2939 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2940 *
2941 * @returns The old value.
2942 * @param pi32 Pointer to the value.
2943 * @param i32 Number to subtract.
2944 *
2945 * @remarks x86: Requires a 486 or later.
2946 */
2947DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2948{
2949 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
2950}
2951
2952
2953/**
2954 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2955 *
2956 * @returns The old value.
2957 * @param pu64 Pointer to the value.
2958 * @param u64 Number to subtract.
2959 *
2960 * @remarks x86: Requires a Pentium or later.
2961 */
2962DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2963{
2964 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2965}
2966
2967
2968/**
2969 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2970 *
2971 * @returns The old value.
2972 * @param pi64 Pointer to the value.
2973 * @param i64 Number to subtract.
2974 *
2975 * @remarks x86: Requires a Pentium or later.
2976 */
2977DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2978{
2979 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
2980}
2981
2982
2983/**
2984 * Atomically exchanges and subtracts to a size_t value, ordered.
2985 *
2986 * @returns The old value.
2987 * @param pcb Pointer to the size_t value.
2988 * @param cb Number to subtract.
2989 *
2990 * @remarks x86: Requires a 486 or later.
2991 */
2992DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb)
2993{
2994#if ARCH_BITS == 64
2995 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
2996#elif ARCH_BITS == 32
2997 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
2998#elif ARCH_BITS == 16
2999 AssertCompileSize(size_t, 2);
3000 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
3001#else
3002# error "Unsupported ARCH_BITS value"
3003#endif
3004}
3005
3006
3007/**
3008 * Atomically exchanges and subtracts a value which size might differ between
3009 * platforms or compilers, ordered.
3010 *
3011 * @param pu Pointer to the variable to update.
3012 * @param uNew The value to subtract to *pu.
3013 * @param puOld Where to store the old value.
3014 *
3015 * @remarks x86: Requires a 486 or later.
3016 */
3017#define ASMAtomicSubSize(pu, uNew, puOld) \
3018 do { \
3019 switch (sizeof(*(pu))) { \
3020 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3021 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3022 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3023 } \
3024 } while (0)
3025
3026
3027
3028/**
3029 * Atomically increment a 16-bit value, ordered.
3030 *
3031 * @returns The new value.
3032 * @param pu16 Pointer to the value to increment.
3033 * @remarks Not implemented. Just to make 16-bit code happy.
3034 *
3035 * @remarks x86: Requires a 486 or later.
3036 */
3037DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16);
3038
3039
3040/**
3041 * Atomically increment a 32-bit value, ordered.
3042 *
3043 * @returns The new value.
3044 * @param pu32 Pointer to the value to increment.
3045 *
3046 * @remarks x86: Requires a 486 or later.
3047 */
3048#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3049DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32);
3050#else
3051DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32)
3052{
3053 uint32_t u32;
3054# if RT_INLINE_ASM_USES_INTRIN
3055 u32 = _InterlockedIncrement((long RT_FAR *)pu32);
3056 return u32;
3057
3058# elif RT_INLINE_ASM_GNU_STYLE
3059 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3060 : "=r" (u32),
3061 "=m" (*pu32)
3062 : "0" (1),
3063 "m" (*pu32)
3064 : "memory");
3065 return u32+1;
3066# else
3067 __asm
3068 {
3069 mov eax, 1
3070# ifdef RT_ARCH_AMD64
3071 mov rdx, [pu32]
3072 lock xadd [rdx], eax
3073# else
3074 mov edx, [pu32]
3075 lock xadd [edx], eax
3076# endif
3077 mov u32, eax
3078 }
3079 return u32+1;
3080# endif
3081}
3082#endif
3083
3084
3085/**
3086 * Atomically increment a signed 32-bit value, ordered.
3087 *
3088 * @returns The new value.
3089 * @param pi32 Pointer to the value to increment.
3090 *
3091 * @remarks x86: Requires a 486 or later.
3092 */
3093DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32)
3094{
3095 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3096}
3097
3098
3099/**
3100 * Atomically increment a 64-bit value, ordered.
3101 *
3102 * @returns The new value.
3103 * @param pu64 Pointer to the value to increment.
3104 *
3105 * @remarks x86: Requires a Pentium or later.
3106 */
3107#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3108DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64);
3109#else
3110DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64)
3111{
3112# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3113 uint64_t u64;
3114 u64 = _InterlockedIncrement64((__int64 RT_FAR *)pu64);
3115 return u64;
3116
3117# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3118 uint64_t u64;
3119 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3120 : "=r" (u64),
3121 "=m" (*pu64)
3122 : "0" (1),
3123 "m" (*pu64)
3124 : "memory");
3125 return u64 + 1;
3126# else
3127 return ASMAtomicAddU64(pu64, 1) + 1;
3128# endif
3129}
3130#endif
3131
3132
3133/**
3134 * Atomically increment a signed 64-bit value, ordered.
3135 *
3136 * @returns The new value.
3137 * @param pi64 Pointer to the value to increment.
3138 *
3139 * @remarks x86: Requires a Pentium or later.
3140 */
3141DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64)
3142{
3143 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3144}
3145
3146
3147/**
3148 * Atomically increment a size_t value, ordered.
3149 *
3150 * @returns The new value.
3151 * @param pcb Pointer to the value to increment.
3152 *
3153 * @remarks x86: Requires a 486 or later.
3154 */
3155DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb)
3156{
3157#if ARCH_BITS == 64
3158 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3159#elif ARCH_BITS == 32
3160 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3161#elif ARCH_BITS == 16
3162 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3163#else
3164# error "Unsupported ARCH_BITS value"
3165#endif
3166}
3167
3168
3169
3170/**
3171 * Atomically decrement an unsigned 32-bit value, ordered.
3172 *
3173 * @returns The new value.
3174 * @param pu16 Pointer to the value to decrement.
3175 * @remarks Not implemented. Just to make 16-bit code happy.
3176 *
3177 * @remarks x86: Requires a 486 or later.
3178 */
3179DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16);
3180
3181
3182/**
3183 * Atomically decrement an unsigned 32-bit value, ordered.
3184 *
3185 * @returns The new value.
3186 * @param pu32 Pointer to the value to decrement.
3187 *
3188 * @remarks x86: Requires a 486 or later.
3189 */
3190#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3191DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32);
3192#else
3193DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32)
3194{
3195 uint32_t u32;
3196# if RT_INLINE_ASM_USES_INTRIN
3197 u32 = _InterlockedDecrement((long RT_FAR *)pu32);
3198 return u32;
3199
3200# elif RT_INLINE_ASM_GNU_STYLE
3201 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3202 : "=r" (u32),
3203 "=m" (*pu32)
3204 : "0" (-1),
3205 "m" (*pu32)
3206 : "memory");
3207 return u32-1;
3208# else
3209 __asm
3210 {
3211 mov eax, -1
3212# ifdef RT_ARCH_AMD64
3213 mov rdx, [pu32]
3214 lock xadd [rdx], eax
3215# else
3216 mov edx, [pu32]
3217 lock xadd [edx], eax
3218# endif
3219 mov u32, eax
3220 }
3221 return u32-1;
3222# endif
3223}
3224#endif
3225
3226
3227/**
3228 * Atomically decrement a signed 32-bit value, ordered.
3229 *
3230 * @returns The new value.
3231 * @param pi32 Pointer to the value to decrement.
3232 *
3233 * @remarks x86: Requires a 486 or later.
3234 */
3235DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32)
3236{
3237 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
3238}
3239
3240
3241/**
3242 * Atomically decrement an unsigned 64-bit value, ordered.
3243 *
3244 * @returns The new value.
3245 * @param pu64 Pointer to the value to decrement.
3246 *
3247 * @remarks x86: Requires a Pentium or later.
3248 */
3249#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3250DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64);
3251#else
3252DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64)
3253{
3254# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3255 uint64_t u64 = _InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
3256 return u64;
3257
3258# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3259 uint64_t u64;
3260 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3261 : "=r" (u64),
3262 "=m" (*pu64)
3263 : "0" (~(uint64_t)0),
3264 "m" (*pu64)
3265 : "memory");
3266 return u64-1;
3267# else
3268 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3269# endif
3270}
3271#endif
3272
3273
3274/**
3275 * Atomically decrement a signed 64-bit value, ordered.
3276 *
3277 * @returns The new value.
3278 * @param pi64 Pointer to the value to decrement.
3279 *
3280 * @remarks x86: Requires a Pentium or later.
3281 */
3282DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64)
3283{
3284 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
3285}
3286
3287
3288/**
3289 * Atomically decrement a size_t value, ordered.
3290 *
3291 * @returns The new value.
3292 * @param pcb Pointer to the value to decrement.
3293 *
3294 * @remarks x86: Requires a 486 or later.
3295 */
3296DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb)
3297{
3298#if ARCH_BITS == 64
3299 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
3300#elif ARCH_BITS == 32
3301 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
3302#elif ARCH_BITS == 16
3303 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
3304#else
3305# error "Unsupported ARCH_BITS value"
3306#endif
3307}
3308
3309
3310/**
3311 * Atomically Or an unsigned 32-bit value, ordered.
3312 *
3313 * @param pu32 Pointer to the pointer variable to OR u32 with.
3314 * @param u32 The value to OR *pu32 with.
3315 *
3316 * @remarks x86: Requires a 386 or later.
3317 */
3318#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3319DECLASM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3320#else
3321DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3322{
3323# if RT_INLINE_ASM_USES_INTRIN
3324 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
3325
3326# elif RT_INLINE_ASM_GNU_STYLE
3327 __asm__ __volatile__("lock; orl %1, %0\n\t"
3328 : "=m" (*pu32)
3329 : "ir" (u32),
3330 "m" (*pu32));
3331# else
3332 __asm
3333 {
3334 mov eax, [u32]
3335# ifdef RT_ARCH_AMD64
3336 mov rdx, [pu32]
3337 lock or [rdx], eax
3338# else
3339 mov edx, [pu32]
3340 lock or [edx], eax
3341# endif
3342 }
3343# endif
3344}
3345#endif
3346
3347
3348/**
3349 * Atomically Or a signed 32-bit value, ordered.
3350 *
3351 * @param pi32 Pointer to the pointer variable to OR u32 with.
3352 * @param i32 The value to OR *pu32 with.
3353 *
3354 * @remarks x86: Requires a 386 or later.
3355 */
3356DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3357{
3358 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3359}
3360
3361
3362/**
3363 * Atomically Or an unsigned 64-bit value, ordered.
3364 *
3365 * @param pu64 Pointer to the pointer variable to OR u64 with.
3366 * @param u64 The value to OR *pu64 with.
3367 *
3368 * @remarks x86: Requires a Pentium or later.
3369 */
3370#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3371DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3372#else
3373DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3374{
3375# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3376 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
3377
3378# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3379 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3380 : "=m" (*pu64)
3381 : "r" (u64),
3382 "m" (*pu64));
3383# else
3384 for (;;)
3385 {
3386 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3387 uint64_t u64New = u64Old | u64;
3388 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3389 break;
3390 ASMNopPause();
3391 }
3392# endif
3393}
3394#endif
3395
3396
3397/**
3398 * Atomically Or a signed 64-bit value, ordered.
3399 *
3400 * @param pi64 Pointer to the pointer variable to OR u64 with.
3401 * @param i64 The value to OR *pu64 with.
3402 *
3403 * @remarks x86: Requires a Pentium or later.
3404 */
3405DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3406{
3407 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3408}
3409
3410
3411/**
3412 * Atomically And an unsigned 32-bit value, ordered.
3413 *
3414 * @param pu32 Pointer to the pointer variable to AND u32 with.
3415 * @param u32 The value to AND *pu32 with.
3416 *
3417 * @remarks x86: Requires a 386 or later.
3418 */
3419#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3420DECLASM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3421#else
3422DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3423{
3424# if RT_INLINE_ASM_USES_INTRIN
3425 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
3426
3427# elif RT_INLINE_ASM_GNU_STYLE
3428 __asm__ __volatile__("lock; andl %1, %0\n\t"
3429 : "=m" (*pu32)
3430 : "ir" (u32),
3431 "m" (*pu32));
3432# else
3433 __asm
3434 {
3435 mov eax, [u32]
3436# ifdef RT_ARCH_AMD64
3437 mov rdx, [pu32]
3438 lock and [rdx], eax
3439# else
3440 mov edx, [pu32]
3441 lock and [edx], eax
3442# endif
3443 }
3444# endif
3445}
3446#endif
3447
3448
3449/**
3450 * Atomically And a signed 32-bit value, ordered.
3451 *
3452 * @param pi32 Pointer to the pointer variable to AND i32 with.
3453 * @param i32 The value to AND *pi32 with.
3454 *
3455 * @remarks x86: Requires a 386 or later.
3456 */
3457DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3458{
3459 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3460}
3461
3462
3463/**
3464 * Atomically And an unsigned 64-bit value, ordered.
3465 *
3466 * @param pu64 Pointer to the pointer variable to AND u64 with.
3467 * @param u64 The value to AND *pu64 with.
3468 *
3469 * @remarks x86: Requires a Pentium or later.
3470 */
3471#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3472DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3473#else
3474DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3475{
3476# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3477 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
3478
3479# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3480 __asm__ __volatile__("lock; andq %1, %0\n\t"
3481 : "=m" (*pu64)
3482 : "r" (u64),
3483 "m" (*pu64));
3484# else
3485 for (;;)
3486 {
3487 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3488 uint64_t u64New = u64Old & u64;
3489 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3490 break;
3491 ASMNopPause();
3492 }
3493# endif
3494}
3495#endif
3496
3497
3498/**
3499 * Atomically And a signed 64-bit value, ordered.
3500 *
3501 * @param pi64 Pointer to the pointer variable to AND i64 with.
3502 * @param i64 The value to AND *pi64 with.
3503 *
3504 * @remarks x86: Requires a Pentium or later.
3505 */
3506DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3507{
3508 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3509}
3510
3511
3512/**
3513 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3514 *
3515 * @param pu32 Pointer to the pointer variable to OR u32 with.
3516 * @param u32 The value to OR *pu32 with.
3517 *
3518 * @remarks x86: Requires a 386 or later.
3519 */
3520#if RT_INLINE_ASM_EXTERNAL
3521DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3522#else
3523DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3524{
3525# if RT_INLINE_ASM_GNU_STYLE
3526 __asm__ __volatile__("orl %1, %0\n\t"
3527 : "=m" (*pu32)
3528 : "ir" (u32),
3529 "m" (*pu32));
3530# else
3531 __asm
3532 {
3533 mov eax, [u32]
3534# ifdef RT_ARCH_AMD64
3535 mov rdx, [pu32]
3536 or [rdx], eax
3537# else
3538 mov edx, [pu32]
3539 or [edx], eax
3540# endif
3541 }
3542# endif
3543}
3544#endif
3545
3546
3547/**
3548 * Atomically OR a signed 32-bit value, unordered.
3549 *
3550 * @param pi32 Pointer to the pointer variable to OR u32 with.
3551 * @param i32 The value to OR *pu32 with.
3552 *
3553 * @remarks x86: Requires a 386 or later.
3554 */
3555DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3556{
3557 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3558}
3559
3560
3561/**
3562 * Atomically OR an unsigned 64-bit value, unordered.
3563 *
3564 * @param pu64 Pointer to the pointer variable to OR u64 with.
3565 * @param u64 The value to OR *pu64 with.
3566 *
3567 * @remarks x86: Requires a Pentium or later.
3568 */
3569#if RT_INLINE_ASM_EXTERNAL
3570DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3571#else
3572DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3573{
3574# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3575 __asm__ __volatile__("orq %1, %q0\n\t"
3576 : "=m" (*pu64)
3577 : "r" (u64),
3578 "m" (*pu64));
3579# else
3580 for (;;)
3581 {
3582 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3583 uint64_t u64New = u64Old | u64;
3584 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3585 break;
3586 ASMNopPause();
3587 }
3588# endif
3589}
3590#endif
3591
3592
3593/**
3594 * Atomically Or a signed 64-bit value, unordered.
3595 *
3596 * @param pi64 Pointer to the pointer variable to OR u64 with.
3597 * @param i64 The value to OR *pu64 with.
3598 *
3599 * @remarks x86: Requires a Pentium or later.
3600 */
3601DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3602{
3603 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3604}
3605
3606
3607/**
3608 * Atomically And an unsigned 32-bit value, unordered.
3609 *
3610 * @param pu32 Pointer to the pointer variable to AND u32 with.
3611 * @param u32 The value to AND *pu32 with.
3612 *
3613 * @remarks x86: Requires a 386 or later.
3614 */
3615#if RT_INLINE_ASM_EXTERNAL
3616DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3617#else
3618DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3619{
3620# if RT_INLINE_ASM_GNU_STYLE
3621 __asm__ __volatile__("andl %1, %0\n\t"
3622 : "=m" (*pu32)
3623 : "ir" (u32),
3624 "m" (*pu32));
3625# else
3626 __asm
3627 {
3628 mov eax, [u32]
3629# ifdef RT_ARCH_AMD64
3630 mov rdx, [pu32]
3631 and [rdx], eax
3632# else
3633 mov edx, [pu32]
3634 and [edx], eax
3635# endif
3636 }
3637# endif
3638}
3639#endif
3640
3641
3642/**
3643 * Atomically And a signed 32-bit value, unordered.
3644 *
3645 * @param pi32 Pointer to the pointer variable to AND i32 with.
3646 * @param i32 The value to AND *pi32 with.
3647 *
3648 * @remarks x86: Requires a 386 or later.
3649 */
3650DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3651{
3652 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3653}
3654
3655
3656/**
3657 * Atomically And an unsigned 64-bit value, unordered.
3658 *
3659 * @param pu64 Pointer to the pointer variable to AND u64 with.
3660 * @param u64 The value to AND *pu64 with.
3661 *
3662 * @remarks x86: Requires a Pentium or later.
3663 */
3664#if RT_INLINE_ASM_EXTERNAL
3665DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3666#else
3667DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3668{
3669# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3670 __asm__ __volatile__("andq %1, %0\n\t"
3671 : "=m" (*pu64)
3672 : "r" (u64),
3673 "m" (*pu64));
3674# else
3675 for (;;)
3676 {
3677 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3678 uint64_t u64New = u64Old & u64;
3679 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3680 break;
3681 ASMNopPause();
3682 }
3683# endif
3684}
3685#endif
3686
3687
3688/**
3689 * Atomically And a signed 64-bit value, unordered.
3690 *
3691 * @param pi64 Pointer to the pointer variable to AND i64 with.
3692 * @param i64 The value to AND *pi64 with.
3693 *
3694 * @remarks x86: Requires a Pentium or later.
3695 */
3696DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3697{
3698 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3699}
3700
3701
3702/**
3703 * Atomically increment an unsigned 32-bit value, unordered.
3704 *
3705 * @returns the new value.
3706 * @param pu32 Pointer to the variable to increment.
3707 *
3708 * @remarks x86: Requires a 486 or later.
3709 */
3710#if RT_INLINE_ASM_EXTERNAL
3711DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32);
3712#else
3713DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32)
3714{
3715 uint32_t u32;
3716# if RT_INLINE_ASM_GNU_STYLE
3717 __asm__ __volatile__("xaddl %0, %1\n\t"
3718 : "=r" (u32),
3719 "=m" (*pu32)
3720 : "0" (1),
3721 "m" (*pu32)
3722 : "memory");
3723 return u32 + 1;
3724# else
3725 __asm
3726 {
3727 mov eax, 1
3728# ifdef RT_ARCH_AMD64
3729 mov rdx, [pu32]
3730 xadd [rdx], eax
3731# else
3732 mov edx, [pu32]
3733 xadd [edx], eax
3734# endif
3735 mov u32, eax
3736 }
3737 return u32 + 1;
3738# endif
3739}
3740#endif
3741
3742
3743/**
3744 * Atomically decrement an unsigned 32-bit value, unordered.
3745 *
3746 * @returns the new value.
3747 * @param pu32 Pointer to the variable to decrement.
3748 *
3749 * @remarks x86: Requires a 486 or later.
3750 */
3751#if RT_INLINE_ASM_EXTERNAL
3752DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32);
3753#else
3754DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32)
3755{
3756 uint32_t u32;
3757# if RT_INLINE_ASM_GNU_STYLE
3758 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3759 : "=r" (u32),
3760 "=m" (*pu32)
3761 : "0" (-1),
3762 "m" (*pu32)
3763 : "memory");
3764 return u32 - 1;
3765# else
3766 __asm
3767 {
3768 mov eax, -1
3769# ifdef RT_ARCH_AMD64
3770 mov rdx, [pu32]
3771 xadd [rdx], eax
3772# else
3773 mov edx, [pu32]
3774 xadd [edx], eax
3775# endif
3776 mov u32, eax
3777 }
3778 return u32 - 1;
3779# endif
3780}
3781#endif
3782
3783
3784/** @def RT_ASM_PAGE_SIZE
3785 * We try avoid dragging in iprt/param.h here.
3786 * @internal
3787 */
3788#if defined(RT_ARCH_SPARC64)
3789# define RT_ASM_PAGE_SIZE 0x2000
3790# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3791# if PAGE_SIZE != 0x2000
3792# error "PAGE_SIZE is not 0x2000!"
3793# endif
3794# endif
3795#else
3796# define RT_ASM_PAGE_SIZE 0x1000
3797# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3798# if PAGE_SIZE != 0x1000
3799# error "PAGE_SIZE is not 0x1000!"
3800# endif
3801# endif
3802#endif
3803
3804/**
3805 * Zeros a 4K memory page.
3806 *
3807 * @param pv Pointer to the memory block. This must be page aligned.
3808 */
3809#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3810DECLASM(void) ASMMemZeroPage(volatile void RT_FAR *pv);
3811# else
3812DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv)
3813{
3814# if RT_INLINE_ASM_USES_INTRIN
3815# ifdef RT_ARCH_AMD64
3816 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3817# else
3818 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3819# endif
3820
3821# elif RT_INLINE_ASM_GNU_STYLE
3822 RTCCUINTREG uDummy;
3823# ifdef RT_ARCH_AMD64
3824 __asm__ __volatile__("rep stosq"
3825 : "=D" (pv),
3826 "=c" (uDummy)
3827 : "0" (pv),
3828 "c" (RT_ASM_PAGE_SIZE >> 3),
3829 "a" (0)
3830 : "memory");
3831# else
3832 __asm__ __volatile__("rep stosl"
3833 : "=D" (pv),
3834 "=c" (uDummy)
3835 : "0" (pv),
3836 "c" (RT_ASM_PAGE_SIZE >> 2),
3837 "a" (0)
3838 : "memory");
3839# endif
3840# else
3841 __asm
3842 {
3843# ifdef RT_ARCH_AMD64
3844 xor rax, rax
3845 mov ecx, 0200h
3846 mov rdi, [pv]
3847 rep stosq
3848# else
3849 xor eax, eax
3850 mov ecx, 0400h
3851 mov edi, [pv]
3852 rep stosd
3853# endif
3854 }
3855# endif
3856}
3857# endif
3858
3859
3860/**
3861 * Zeros a memory block with a 32-bit aligned size.
3862 *
3863 * @param pv Pointer to the memory block.
3864 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3865 */
3866#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3867DECLASM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb);
3868#else
3869DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb)
3870{
3871# if RT_INLINE_ASM_USES_INTRIN
3872# ifdef RT_ARCH_AMD64
3873 if (!(cb & 7))
3874 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
3875 else
3876# endif
3877 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
3878
3879# elif RT_INLINE_ASM_GNU_STYLE
3880 __asm__ __volatile__("rep stosl"
3881 : "=D" (pv),
3882 "=c" (cb)
3883 : "0" (pv),
3884 "1" (cb >> 2),
3885 "a" (0)
3886 : "memory");
3887# else
3888 __asm
3889 {
3890 xor eax, eax
3891# ifdef RT_ARCH_AMD64
3892 mov rcx, [cb]
3893 shr rcx, 2
3894 mov rdi, [pv]
3895# else
3896 mov ecx, [cb]
3897 shr ecx, 2
3898 mov edi, [pv]
3899# endif
3900 rep stosd
3901 }
3902# endif
3903}
3904#endif
3905
3906
3907/**
3908 * Fills a memory block with a 32-bit aligned size.
3909 *
3910 * @param pv Pointer to the memory block.
3911 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3912 * @param u32 The value to fill with.
3913 */
3914#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3915DECLASM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32);
3916#else
3917DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32)
3918{
3919# if RT_INLINE_ASM_USES_INTRIN
3920# ifdef RT_ARCH_AMD64
3921 if (!(cb & 7))
3922 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3923 else
3924# endif
3925 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
3926
3927# elif RT_INLINE_ASM_GNU_STYLE
3928 __asm__ __volatile__("rep stosl"
3929 : "=D" (pv),
3930 "=c" (cb)
3931 : "0" (pv),
3932 "1" (cb >> 2),
3933 "a" (u32)
3934 : "memory");
3935# else
3936 __asm
3937 {
3938# ifdef RT_ARCH_AMD64
3939 mov rcx, [cb]
3940 shr rcx, 2
3941 mov rdi, [pv]
3942# else
3943 mov ecx, [cb]
3944 shr ecx, 2
3945 mov edi, [pv]
3946# endif
3947 mov eax, [u32]
3948 rep stosd
3949 }
3950# endif
3951}
3952#endif
3953
3954
3955/**
3956 * Checks if a memory block is all zeros.
3957 *
3958 * @returns Pointer to the first non-zero byte.
3959 * @returns NULL if all zero.
3960 *
3961 * @param pv Pointer to the memory block.
3962 * @param cb Number of bytes in the block.
3963 *
3964 * @todo Fix name, it is a predicate function but it's not returning boolean!
3965 */
3966#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3967 && !defined(RT_ARCH_SPARC64) \
3968 && !defined(RT_ARCH_SPARC)
3969DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb);
3970#else
3971DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb)
3972{
3973 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
3974 for (; cb; cb--, pb++)
3975 if (RT_LIKELY(*pb == 0))
3976 { /* likely */ }
3977 else
3978 return (void RT_FAR *)pb;
3979 return NULL;
3980}
3981#endif
3982
3983
3984/**
3985 * Checks if a memory block is all zeros.
3986 *
3987 * @returns true if zero, false if not.
3988 *
3989 * @param pv Pointer to the memory block.
3990 * @param cb Number of bytes in the block.
3991 *
3992 * @sa ASMMemFirstNonZero
3993 */
3994DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb)
3995{
3996 return ASMMemFirstNonZero(pv, cb) == NULL;
3997}
3998
3999
4000/**
4001 * Checks if a memory page is all zeros.
4002 *
4003 * @returns true / false.
4004 *
4005 * @param pvPage Pointer to the page. Must be aligned on 16 byte
4006 * boundary
4007 */
4008DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage)
4009{
4010# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
4011 union { RTCCUINTREG r; bool f; } uAX;
4012 RTCCUINTREG xCX, xDI;
4013 Assert(!((uintptr_t)pvPage & 15));
4014 __asm__ __volatile__("repe; "
4015# ifdef RT_ARCH_AMD64
4016 "scasq\n\t"
4017# else
4018 "scasl\n\t"
4019# endif
4020 "setnc %%al\n\t"
4021 : "=&c" (xCX),
4022 "=&D" (xDI),
4023 "=&a" (uAX.r)
4024 : "mr" (pvPage),
4025# ifdef RT_ARCH_AMD64
4026 "0" (RT_ASM_PAGE_SIZE/8),
4027# else
4028 "0" (RT_ASM_PAGE_SIZE/4),
4029# endif
4030 "1" (pvPage),
4031 "2" (0));
4032 return uAX.f;
4033# else
4034 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
4035 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
4036 Assert(!((uintptr_t)pvPage & 15));
4037 for (;;)
4038 {
4039 if (puPtr[0]) return false;
4040 if (puPtr[4]) return false;
4041
4042 if (puPtr[2]) return false;
4043 if (puPtr[6]) return false;
4044
4045 if (puPtr[1]) return false;
4046 if (puPtr[5]) return false;
4047
4048 if (puPtr[3]) return false;
4049 if (puPtr[7]) return false;
4050
4051 if (!--cLeft)
4052 return true;
4053 puPtr += 8;
4054 }
4055# endif
4056}
4057
4058
4059/**
4060 * Checks if a memory block is filled with the specified byte, returning the
4061 * first mismatch.
4062 *
4063 * This is sort of an inverted memchr.
4064 *
4065 * @returns Pointer to the byte which doesn't equal u8.
4066 * @returns NULL if all equal to u8.
4067 *
4068 * @param pv Pointer to the memory block.
4069 * @param cb Number of bytes in the block.
4070 * @param u8 The value it's supposed to be filled with.
4071 *
4072 * @remarks No alignment requirements.
4073 */
4074#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
4075 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL)) \
4076 && !defined(RT_ARCH_SPARC64) \
4077 && !defined(RT_ARCH_SPARC)
4078DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8);
4079#else
4080DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4081{
4082 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4083 for (; cb; cb--, pb++)
4084 if (RT_LIKELY(*pb == u8))
4085 { /* likely */ }
4086 else
4087 return (void *)pb;
4088 return NULL;
4089}
4090#endif
4091
4092
4093/**
4094 * Checks if a memory block is filled with the specified byte.
4095 *
4096 * @returns true if all matching, false if not.
4097 *
4098 * @param pv Pointer to the memory block.
4099 * @param cb Number of bytes in the block.
4100 * @param u8 The value it's supposed to be filled with.
4101 *
4102 * @remarks No alignment requirements.
4103 */
4104DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4105{
4106 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4107}
4108
4109
4110/**
4111 * Checks if a memory block is filled with the specified 32-bit value.
4112 *
4113 * This is a sort of inverted memchr.
4114 *
4115 * @returns Pointer to the first value which doesn't equal u32.
4116 * @returns NULL if all equal to u32.
4117 *
4118 * @param pv Pointer to the memory block.
4119 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4120 * @param u32 The value it's supposed to be filled with.
4121 */
4122DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32)
4123{
4124/** @todo rewrite this in inline assembly? */
4125 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
4126 for (; cb; cb -= 4, pu32++)
4127 if (RT_LIKELY(*pu32 == u32))
4128 { /* likely */ }
4129 else
4130 return (uint32_t RT_FAR *)pu32;
4131 return NULL;
4132}
4133
4134
4135/**
4136 * Probes a byte pointer for read access.
4137 *
4138 * While the function will not fault if the byte is not read accessible,
4139 * the idea is to do this in a safe place like before acquiring locks
4140 * and such like.
4141 *
4142 * Also, this functions guarantees that an eager compiler is not going
4143 * to optimize the probing away.
4144 *
4145 * @param pvByte Pointer to the byte.
4146 */
4147#if RT_INLINE_ASM_EXTERNAL
4148DECLASM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte);
4149#else
4150DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte)
4151{
4152 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4153 uint8_t u8;
4154# if RT_INLINE_ASM_GNU_STYLE
4155 __asm__ __volatile__("movb (%1), %0\n\t"
4156 : "=r" (u8)
4157 : "r" (pvByte));
4158# else
4159 __asm
4160 {
4161# ifdef RT_ARCH_AMD64
4162 mov rax, [pvByte]
4163 mov al, [rax]
4164# else
4165 mov eax, [pvByte]
4166 mov al, [eax]
4167# endif
4168 mov [u8], al
4169 }
4170# endif
4171 return u8;
4172}
4173#endif
4174
4175/**
4176 * Probes a buffer for read access page by page.
4177 *
4178 * While the function will fault if the buffer is not fully read
4179 * accessible, the idea is to do this in a safe place like before
4180 * acquiring locks and such like.
4181 *
4182 * Also, this functions guarantees that an eager compiler is not going
4183 * to optimize the probing away.
4184 *
4185 * @param pvBuf Pointer to the buffer.
4186 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4187 */
4188DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf)
4189{
4190 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4191 /* the first byte */
4192 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
4193 ASMProbeReadByte(pu8);
4194
4195 /* the pages in between pages. */
4196 while (cbBuf > RT_ASM_PAGE_SIZE)
4197 {
4198 ASMProbeReadByte(pu8);
4199 cbBuf -= RT_ASM_PAGE_SIZE;
4200 pu8 += RT_ASM_PAGE_SIZE;
4201 }
4202
4203 /* the last byte */
4204 ASMProbeReadByte(pu8 + cbBuf - 1);
4205}
4206
4207
4208
4209/** @defgroup grp_inline_bits Bit Operations
4210 * @{
4211 */
4212
4213
4214/**
4215 * Sets a bit in a bitmap.
4216 *
4217 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4218 * @param iBit The bit to set.
4219 *
4220 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4221 * However, doing so will yield better performance as well as avoiding
4222 * traps accessing the last bits in the bitmap.
4223 */
4224#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4225DECLASM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4226#else
4227DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4228{
4229# if RT_INLINE_ASM_USES_INTRIN
4230 _bittestandset((long RT_FAR *)pvBitmap, iBit);
4231
4232# elif RT_INLINE_ASM_GNU_STYLE
4233 __asm__ __volatile__("btsl %1, %0"
4234 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4235 : "Ir" (iBit),
4236 "m" (*(volatile long RT_FAR *)pvBitmap)
4237 : "memory");
4238# else
4239 __asm
4240 {
4241# ifdef RT_ARCH_AMD64
4242 mov rax, [pvBitmap]
4243 mov edx, [iBit]
4244 bts [rax], edx
4245# else
4246 mov eax, [pvBitmap]
4247 mov edx, [iBit]
4248 bts [eax], edx
4249# endif
4250 }
4251# endif
4252}
4253#endif
4254
4255
4256/**
4257 * Atomically sets a bit in a bitmap, ordered.
4258 *
4259 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4260 * the memory access isn't atomic!
4261 * @param iBit The bit to set.
4262 *
4263 * @remarks x86: Requires a 386 or later.
4264 */
4265#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4266DECLASM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4267#else
4268DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4269{
4270 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4271# if RT_INLINE_ASM_USES_INTRIN
4272 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4273# elif RT_INLINE_ASM_GNU_STYLE
4274 __asm__ __volatile__("lock; btsl %1, %0"
4275 : "=m" (*(volatile long *)pvBitmap)
4276 : "Ir" (iBit),
4277 "m" (*(volatile long *)pvBitmap)
4278 : "memory");
4279# else
4280 __asm
4281 {
4282# ifdef RT_ARCH_AMD64
4283 mov rax, [pvBitmap]
4284 mov edx, [iBit]
4285 lock bts [rax], edx
4286# else
4287 mov eax, [pvBitmap]
4288 mov edx, [iBit]
4289 lock bts [eax], edx
4290# endif
4291 }
4292# endif
4293}
4294#endif
4295
4296
4297/**
4298 * Clears a bit in a bitmap.
4299 *
4300 * @param pvBitmap Pointer to the bitmap.
4301 * @param iBit The bit to clear.
4302 *
4303 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4304 * However, doing so will yield better performance as well as avoiding
4305 * traps accessing the last bits in the bitmap.
4306 */
4307#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4308DECLASM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4309#else
4310DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4311{
4312# if RT_INLINE_ASM_USES_INTRIN
4313 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4314
4315# elif RT_INLINE_ASM_GNU_STYLE
4316 __asm__ __volatile__("btrl %1, %0"
4317 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4318 : "Ir" (iBit),
4319 "m" (*(volatile long RT_FAR *)pvBitmap)
4320 : "memory");
4321# else
4322 __asm
4323 {
4324# ifdef RT_ARCH_AMD64
4325 mov rax, [pvBitmap]
4326 mov edx, [iBit]
4327 btr [rax], edx
4328# else
4329 mov eax, [pvBitmap]
4330 mov edx, [iBit]
4331 btr [eax], edx
4332# endif
4333 }
4334# endif
4335}
4336#endif
4337
4338
4339/**
4340 * Atomically clears a bit in a bitmap, ordered.
4341 *
4342 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4343 * the memory access isn't atomic!
4344 * @param iBit The bit to toggle set.
4345 *
4346 * @remarks No memory barrier, take care on smp.
4347 * @remarks x86: Requires a 386 or later.
4348 */
4349#if RT_INLINE_ASM_EXTERNAL
4350DECLASM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4351#else
4352DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4353{
4354 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4355# if RT_INLINE_ASM_GNU_STYLE
4356 __asm__ __volatile__("lock; btrl %1, %0"
4357 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4358 : "Ir" (iBit),
4359 "m" (*(volatile long RT_FAR *)pvBitmap)
4360 : "memory");
4361# else
4362 __asm
4363 {
4364# ifdef RT_ARCH_AMD64
4365 mov rax, [pvBitmap]
4366 mov edx, [iBit]
4367 lock btr [rax], edx
4368# else
4369 mov eax, [pvBitmap]
4370 mov edx, [iBit]
4371 lock btr [eax], edx
4372# endif
4373 }
4374# endif
4375}
4376#endif
4377
4378
4379/**
4380 * Toggles a bit in a bitmap.
4381 *
4382 * @param pvBitmap Pointer to the bitmap.
4383 * @param iBit The bit to toggle.
4384 *
4385 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4386 * However, doing so will yield better performance as well as avoiding
4387 * traps accessing the last bits in the bitmap.
4388 */
4389#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4390DECLASM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4391#else
4392DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4393{
4394# if RT_INLINE_ASM_USES_INTRIN
4395 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4396# elif RT_INLINE_ASM_GNU_STYLE
4397 __asm__ __volatile__("btcl %1, %0"
4398 : "=m" (*(volatile long *)pvBitmap)
4399 : "Ir" (iBit),
4400 "m" (*(volatile long *)pvBitmap)
4401 : "memory");
4402# else
4403 __asm
4404 {
4405# ifdef RT_ARCH_AMD64
4406 mov rax, [pvBitmap]
4407 mov edx, [iBit]
4408 btc [rax], edx
4409# else
4410 mov eax, [pvBitmap]
4411 mov edx, [iBit]
4412 btc [eax], edx
4413# endif
4414 }
4415# endif
4416}
4417#endif
4418
4419
4420/**
4421 * Atomically toggles a bit in a bitmap, ordered.
4422 *
4423 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4424 * the memory access isn't atomic!
4425 * @param iBit The bit to test and set.
4426 *
4427 * @remarks x86: Requires a 386 or later.
4428 */
4429#if RT_INLINE_ASM_EXTERNAL
4430DECLASM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4431#else
4432DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4433{
4434 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4435# if RT_INLINE_ASM_GNU_STYLE
4436 __asm__ __volatile__("lock; btcl %1, %0"
4437 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4438 : "Ir" (iBit),
4439 "m" (*(volatile long RT_FAR *)pvBitmap)
4440 : "memory");
4441# else
4442 __asm
4443 {
4444# ifdef RT_ARCH_AMD64
4445 mov rax, [pvBitmap]
4446 mov edx, [iBit]
4447 lock btc [rax], edx
4448# else
4449 mov eax, [pvBitmap]
4450 mov edx, [iBit]
4451 lock btc [eax], edx
4452# endif
4453 }
4454# endif
4455}
4456#endif
4457
4458
4459/**
4460 * Tests and sets a bit in a bitmap.
4461 *
4462 * @returns true if the bit was set.
4463 * @returns false if the bit was clear.
4464 *
4465 * @param pvBitmap Pointer to the bitmap.
4466 * @param iBit The bit to test and set.
4467 *
4468 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4469 * However, doing so will yield better performance as well as avoiding
4470 * traps accessing the last bits in the bitmap.
4471 */
4472#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4473DECLASM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4474#else
4475DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4476{
4477 union { bool f; uint32_t u32; uint8_t u8; } rc;
4478# if RT_INLINE_ASM_USES_INTRIN
4479 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
4480
4481# elif RT_INLINE_ASM_GNU_STYLE
4482 __asm__ __volatile__("btsl %2, %1\n\t"
4483 "setc %b0\n\t"
4484 "andl $1, %0\n\t"
4485 : "=q" (rc.u32),
4486 "=m" (*(volatile long RT_FAR *)pvBitmap)
4487 : "Ir" (iBit),
4488 "m" (*(volatile long RT_FAR *)pvBitmap)
4489 : "memory");
4490# else
4491 __asm
4492 {
4493 mov edx, [iBit]
4494# ifdef RT_ARCH_AMD64
4495 mov rax, [pvBitmap]
4496 bts [rax], edx
4497# else
4498 mov eax, [pvBitmap]
4499 bts [eax], edx
4500# endif
4501 setc al
4502 and eax, 1
4503 mov [rc.u32], eax
4504 }
4505# endif
4506 return rc.f;
4507}
4508#endif
4509
4510
4511/**
4512 * Atomically tests and sets a bit in a bitmap, ordered.
4513 *
4514 * @returns true if the bit was set.
4515 * @returns false if the bit was clear.
4516 *
4517 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4518 * the memory access isn't atomic!
4519 * @param iBit The bit to set.
4520 *
4521 * @remarks x86: Requires a 386 or later.
4522 */
4523#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4524DECLASM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4525#else
4526DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4527{
4528 union { bool f; uint32_t u32; uint8_t u8; } rc;
4529 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4530# if RT_INLINE_ASM_USES_INTRIN
4531 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4532# elif RT_INLINE_ASM_GNU_STYLE
4533 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4534 "setc %b0\n\t"
4535 "andl $1, %0\n\t"
4536 : "=q" (rc.u32),
4537 "=m" (*(volatile long RT_FAR *)pvBitmap)
4538 : "Ir" (iBit),
4539 "m" (*(volatile long RT_FAR *)pvBitmap)
4540 : "memory");
4541# else
4542 __asm
4543 {
4544 mov edx, [iBit]
4545# ifdef RT_ARCH_AMD64
4546 mov rax, [pvBitmap]
4547 lock bts [rax], edx
4548# else
4549 mov eax, [pvBitmap]
4550 lock bts [eax], edx
4551# endif
4552 setc al
4553 and eax, 1
4554 mov [rc.u32], eax
4555 }
4556# endif
4557 return rc.f;
4558}
4559#endif
4560
4561
4562/**
4563 * Tests and clears a bit in a bitmap.
4564 *
4565 * @returns true if the bit was set.
4566 * @returns false if the bit was clear.
4567 *
4568 * @param pvBitmap Pointer to the bitmap.
4569 * @param iBit The bit to test and clear.
4570 *
4571 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4572 * However, doing so will yield better performance as well as avoiding
4573 * traps accessing the last bits in the bitmap.
4574 */
4575#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4576DECLASM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4577#else
4578DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4579{
4580 union { bool f; uint32_t u32; uint8_t u8; } rc;
4581# if RT_INLINE_ASM_USES_INTRIN
4582 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4583
4584# elif RT_INLINE_ASM_GNU_STYLE
4585 __asm__ __volatile__("btrl %2, %1\n\t"
4586 "setc %b0\n\t"
4587 "andl $1, %0\n\t"
4588 : "=q" (rc.u32),
4589 "=m" (*(volatile long RT_FAR *)pvBitmap)
4590 : "Ir" (iBit),
4591 "m" (*(volatile long RT_FAR *)pvBitmap)
4592 : "memory");
4593# else
4594 __asm
4595 {
4596 mov edx, [iBit]
4597# ifdef RT_ARCH_AMD64
4598 mov rax, [pvBitmap]
4599 btr [rax], edx
4600# else
4601 mov eax, [pvBitmap]
4602 btr [eax], edx
4603# endif
4604 setc al
4605 and eax, 1
4606 mov [rc.u32], eax
4607 }
4608# endif
4609 return rc.f;
4610}
4611#endif
4612
4613
4614/**
4615 * Atomically tests and clears a bit in a bitmap, ordered.
4616 *
4617 * @returns true if the bit was set.
4618 * @returns false if the bit was clear.
4619 *
4620 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4621 * the memory access isn't atomic!
4622 * @param iBit The bit to test and clear.
4623 *
4624 * @remarks No memory barrier, take care on smp.
4625 * @remarks x86: Requires a 386 or later.
4626 */
4627#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4628DECLASM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4629#else
4630DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4631{
4632 union { bool f; uint32_t u32; uint8_t u8; } rc;
4633 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4634# if RT_INLINE_ASM_USES_INTRIN
4635 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
4636
4637# elif RT_INLINE_ASM_GNU_STYLE
4638 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4639 "setc %b0\n\t"
4640 "andl $1, %0\n\t"
4641 : "=q" (rc.u32),
4642 "=m" (*(volatile long RT_FAR *)pvBitmap)
4643 : "Ir" (iBit),
4644 "m" (*(volatile long RT_FAR *)pvBitmap)
4645 : "memory");
4646# else
4647 __asm
4648 {
4649 mov edx, [iBit]
4650# ifdef RT_ARCH_AMD64
4651 mov rax, [pvBitmap]
4652 lock btr [rax], edx
4653# else
4654 mov eax, [pvBitmap]
4655 lock btr [eax], edx
4656# endif
4657 setc al
4658 and eax, 1
4659 mov [rc.u32], eax
4660 }
4661# endif
4662 return rc.f;
4663}
4664#endif
4665
4666
4667/**
4668 * Tests and toggles a bit in a bitmap.
4669 *
4670 * @returns true if the bit was set.
4671 * @returns false if the bit was clear.
4672 *
4673 * @param pvBitmap Pointer to the bitmap.
4674 * @param iBit The bit to test and toggle.
4675 *
4676 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4677 * However, doing so will yield better performance as well as avoiding
4678 * traps accessing the last bits in the bitmap.
4679 */
4680#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4681DECLASM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4682#else
4683DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4684{
4685 union { bool f; uint32_t u32; uint8_t u8; } rc;
4686# if RT_INLINE_ASM_USES_INTRIN
4687 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4688
4689# elif RT_INLINE_ASM_GNU_STYLE
4690 __asm__ __volatile__("btcl %2, %1\n\t"
4691 "setc %b0\n\t"
4692 "andl $1, %0\n\t"
4693 : "=q" (rc.u32),
4694 "=m" (*(volatile long RT_FAR *)pvBitmap)
4695 : "Ir" (iBit),
4696 "m" (*(volatile long RT_FAR *)pvBitmap)
4697 : "memory");
4698# else
4699 __asm
4700 {
4701 mov edx, [iBit]
4702# ifdef RT_ARCH_AMD64
4703 mov rax, [pvBitmap]
4704 btc [rax], edx
4705# else
4706 mov eax, [pvBitmap]
4707 btc [eax], edx
4708# endif
4709 setc al
4710 and eax, 1
4711 mov [rc.u32], eax
4712 }
4713# endif
4714 return rc.f;
4715}
4716#endif
4717
4718
4719/**
4720 * Atomically tests and toggles a bit in a bitmap, ordered.
4721 *
4722 * @returns true if the bit was set.
4723 * @returns false if the bit was clear.
4724 *
4725 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4726 * the memory access isn't atomic!
4727 * @param iBit The bit to test and toggle.
4728 *
4729 * @remarks x86: Requires a 386 or later.
4730 */
4731#if RT_INLINE_ASM_EXTERNAL
4732DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4733#else
4734DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4735{
4736 union { bool f; uint32_t u32; uint8_t u8; } rc;
4737 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4738# if RT_INLINE_ASM_GNU_STYLE
4739 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4740 "setc %b0\n\t"
4741 "andl $1, %0\n\t"
4742 : "=q" (rc.u32),
4743 "=m" (*(volatile long RT_FAR *)pvBitmap)
4744 : "Ir" (iBit),
4745 "m" (*(volatile long RT_FAR *)pvBitmap)
4746 : "memory");
4747# else
4748 __asm
4749 {
4750 mov edx, [iBit]
4751# ifdef RT_ARCH_AMD64
4752 mov rax, [pvBitmap]
4753 lock btc [rax], edx
4754# else
4755 mov eax, [pvBitmap]
4756 lock btc [eax], edx
4757# endif
4758 setc al
4759 and eax, 1
4760 mov [rc.u32], eax
4761 }
4762# endif
4763 return rc.f;
4764}
4765#endif
4766
4767
4768/**
4769 * Tests if a bit in a bitmap is set.
4770 *
4771 * @returns true if the bit is set.
4772 * @returns false if the bit is clear.
4773 *
4774 * @param pvBitmap Pointer to the bitmap.
4775 * @param iBit The bit to test.
4776 *
4777 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4778 * However, doing so will yield better performance as well as avoiding
4779 * traps accessing the last bits in the bitmap.
4780 */
4781#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4782DECLASM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit);
4783#else
4784DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit)
4785{
4786 union { bool f; uint32_t u32; uint8_t u8; } rc;
4787# if RT_INLINE_ASM_USES_INTRIN
4788 rc.u32 = _bittest((long *)pvBitmap, iBit);
4789# elif RT_INLINE_ASM_GNU_STYLE
4790
4791 __asm__ __volatile__("btl %2, %1\n\t"
4792 "setc %b0\n\t"
4793 "andl $1, %0\n\t"
4794 : "=q" (rc.u32)
4795 : "m" (*(const volatile long RT_FAR *)pvBitmap),
4796 "Ir" (iBit)
4797 : "memory");
4798# else
4799 __asm
4800 {
4801 mov edx, [iBit]
4802# ifdef RT_ARCH_AMD64
4803 mov rax, [pvBitmap]
4804 bt [rax], edx
4805# else
4806 mov eax, [pvBitmap]
4807 bt [eax], edx
4808# endif
4809 setc al
4810 and eax, 1
4811 mov [rc.u32], eax
4812 }
4813# endif
4814 return rc.f;
4815}
4816#endif
4817
4818
4819/**
4820 * Clears a bit range within a bitmap.
4821 *
4822 * @param pvBitmap Pointer to the bitmap.
4823 * @param iBitStart The First bit to clear.
4824 * @param iBitEnd The first bit not to clear.
4825 */
4826DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4827{
4828 if (iBitStart < iBitEnd)
4829 {
4830 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4831 int32_t iStart = iBitStart & ~31;
4832 int32_t iEnd = iBitEnd & ~31;
4833 if (iStart == iEnd)
4834 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4835 else
4836 {
4837 /* bits in first dword. */
4838 if (iBitStart & 31)
4839 {
4840 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4841 pu32++;
4842 iBitStart = iStart + 32;
4843 }
4844
4845 /* whole dword. */
4846 if (iBitStart != iEnd)
4847 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4848
4849 /* bits in last dword. */
4850 if (iBitEnd & 31)
4851 {
4852 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4853 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4854 }
4855 }
4856 }
4857}
4858
4859
4860/**
4861 * Sets a bit range within a bitmap.
4862 *
4863 * @param pvBitmap Pointer to the bitmap.
4864 * @param iBitStart The First bit to set.
4865 * @param iBitEnd The first bit not to set.
4866 */
4867DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4868{
4869 if (iBitStart < iBitEnd)
4870 {
4871 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4872 int32_t iStart = iBitStart & ~31;
4873 int32_t iEnd = iBitEnd & ~31;
4874 if (iStart == iEnd)
4875 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4876 else
4877 {
4878 /* bits in first dword. */
4879 if (iBitStart & 31)
4880 {
4881 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4882 pu32++;
4883 iBitStart = iStart + 32;
4884 }
4885
4886 /* whole dword. */
4887 if (iBitStart != iEnd)
4888 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4889
4890 /* bits in last dword. */
4891 if (iBitEnd & 31)
4892 {
4893 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
4894 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4895 }
4896 }
4897 }
4898}
4899
4900
4901/**
4902 * Finds the first clear bit in a bitmap.
4903 *
4904 * @returns Index of the first zero bit.
4905 * @returns -1 if no clear bit was found.
4906 * @param pvBitmap Pointer to the bitmap.
4907 * @param cBits The number of bits in the bitmap. Multiple of 32.
4908 */
4909#if RT_INLINE_ASM_EXTERNAL
4910DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
4911#else
4912DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
4913{
4914 if (cBits)
4915 {
4916 int32_t iBit;
4917# if RT_INLINE_ASM_GNU_STYLE
4918 RTCCUINTREG uEAX, uECX, uEDI;
4919 cBits = RT_ALIGN_32(cBits, 32);
4920 __asm__ __volatile__("repe; scasl\n\t"
4921 "je 1f\n\t"
4922# ifdef RT_ARCH_AMD64
4923 "lea -4(%%rdi), %%rdi\n\t"
4924 "xorl (%%rdi), %%eax\n\t"
4925 "subq %5, %%rdi\n\t"
4926# else
4927 "lea -4(%%edi), %%edi\n\t"
4928 "xorl (%%edi), %%eax\n\t"
4929 "subl %5, %%edi\n\t"
4930# endif
4931 "shll $3, %%edi\n\t"
4932 "bsfl %%eax, %%edx\n\t"
4933 "addl %%edi, %%edx\n\t"
4934 "1:\t\n"
4935 : "=d" (iBit),
4936 "=&c" (uECX),
4937 "=&D" (uEDI),
4938 "=&a" (uEAX)
4939 : "0" (0xffffffff),
4940 "mr" (pvBitmap),
4941 "1" (cBits >> 5),
4942 "2" (pvBitmap),
4943 "3" (0xffffffff));
4944# else
4945 cBits = RT_ALIGN_32(cBits, 32);
4946 __asm
4947 {
4948# ifdef RT_ARCH_AMD64
4949 mov rdi, [pvBitmap]
4950 mov rbx, rdi
4951# else
4952 mov edi, [pvBitmap]
4953 mov ebx, edi
4954# endif
4955 mov edx, 0ffffffffh
4956 mov eax, edx
4957 mov ecx, [cBits]
4958 shr ecx, 5
4959 repe scasd
4960 je done
4961
4962# ifdef RT_ARCH_AMD64
4963 lea rdi, [rdi - 4]
4964 xor eax, [rdi]
4965 sub rdi, rbx
4966# else
4967 lea edi, [edi - 4]
4968 xor eax, [edi]
4969 sub edi, ebx
4970# endif
4971 shl edi, 3
4972 bsf edx, eax
4973 add edx, edi
4974 done:
4975 mov [iBit], edx
4976 }
4977# endif
4978 return iBit;
4979 }
4980 return -1;
4981}
4982#endif
4983
4984
4985/**
4986 * Finds the next clear bit in a bitmap.
4987 *
4988 * @returns Index of the first zero bit.
4989 * @returns -1 if no clear bit was found.
4990 * @param pvBitmap Pointer to the bitmap.
4991 * @param cBits The number of bits in the bitmap. Multiple of 32.
4992 * @param iBitPrev The bit returned from the last search.
4993 * The search will start at iBitPrev + 1.
4994 */
4995#if RT_INLINE_ASM_EXTERNAL
4996DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4997#else
4998DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4999{
5000 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5001 int iBit = ++iBitPrev & 31;
5002 if (iBit)
5003 {
5004 /*
5005 * Inspect the 32-bit word containing the unaligned bit.
5006 */
5007 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
5008
5009# if RT_INLINE_ASM_USES_INTRIN
5010 unsigned long ulBit = 0;
5011 if (_BitScanForward(&ulBit, u32))
5012 return ulBit + iBitPrev;
5013# else
5014# if RT_INLINE_ASM_GNU_STYLE
5015 __asm__ __volatile__("bsf %1, %0\n\t"
5016 "jnz 1f\n\t"
5017 "movl $-1, %0\n\t"
5018 "1:\n\t"
5019 : "=r" (iBit)
5020 : "r" (u32));
5021# else
5022 __asm
5023 {
5024 mov edx, [u32]
5025 bsf eax, edx
5026 jnz done
5027 mov eax, 0ffffffffh
5028 done:
5029 mov [iBit], eax
5030 }
5031# endif
5032 if (iBit >= 0)
5033 return iBit + iBitPrev;
5034# endif
5035
5036 /*
5037 * Skip ahead and see if there is anything left to search.
5038 */
5039 iBitPrev |= 31;
5040 iBitPrev++;
5041 if (cBits <= (uint32_t)iBitPrev)
5042 return -1;
5043 }
5044
5045 /*
5046 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5047 */
5048 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5049 if (iBit >= 0)
5050 iBit += iBitPrev;
5051 return iBit;
5052}
5053#endif
5054
5055
5056/**
5057 * Finds the first set bit in a bitmap.
5058 *
5059 * @returns Index of the first set bit.
5060 * @returns -1 if no clear bit was found.
5061 * @param pvBitmap Pointer to the bitmap.
5062 * @param cBits The number of bits in the bitmap. Multiple of 32.
5063 */
5064#if RT_INLINE_ASM_EXTERNAL
5065DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
5066#else
5067DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
5068{
5069 if (cBits)
5070 {
5071 int32_t iBit;
5072# if RT_INLINE_ASM_GNU_STYLE
5073 RTCCUINTREG uEAX, uECX, uEDI;
5074 cBits = RT_ALIGN_32(cBits, 32);
5075 __asm__ __volatile__("repe; scasl\n\t"
5076 "je 1f\n\t"
5077# ifdef RT_ARCH_AMD64
5078 "lea -4(%%rdi), %%rdi\n\t"
5079 "movl (%%rdi), %%eax\n\t"
5080 "subq %5, %%rdi\n\t"
5081# else
5082 "lea -4(%%edi), %%edi\n\t"
5083 "movl (%%edi), %%eax\n\t"
5084 "subl %5, %%edi\n\t"
5085# endif
5086 "shll $3, %%edi\n\t"
5087 "bsfl %%eax, %%edx\n\t"
5088 "addl %%edi, %%edx\n\t"
5089 "1:\t\n"
5090 : "=d" (iBit),
5091 "=&c" (uECX),
5092 "=&D" (uEDI),
5093 "=&a" (uEAX)
5094 : "0" (0xffffffff),
5095 "mr" (pvBitmap),
5096 "1" (cBits >> 5),
5097 "2" (pvBitmap),
5098 "3" (0));
5099# else
5100 cBits = RT_ALIGN_32(cBits, 32);
5101 __asm
5102 {
5103# ifdef RT_ARCH_AMD64
5104 mov rdi, [pvBitmap]
5105 mov rbx, rdi
5106# else
5107 mov edi, [pvBitmap]
5108 mov ebx, edi
5109# endif
5110 mov edx, 0ffffffffh
5111 xor eax, eax
5112 mov ecx, [cBits]
5113 shr ecx, 5
5114 repe scasd
5115 je done
5116# ifdef RT_ARCH_AMD64
5117 lea rdi, [rdi - 4]
5118 mov eax, [rdi]
5119 sub rdi, rbx
5120# else
5121 lea edi, [edi - 4]
5122 mov eax, [edi]
5123 sub edi, ebx
5124# endif
5125 shl edi, 3
5126 bsf edx, eax
5127 add edx, edi
5128 done:
5129 mov [iBit], edx
5130 }
5131# endif
5132 return iBit;
5133 }
5134 return -1;
5135}
5136#endif
5137
5138
5139/**
5140 * Finds the next set bit in a bitmap.
5141 *
5142 * @returns Index of the next set bit.
5143 * @returns -1 if no set bit was found.
5144 * @param pvBitmap Pointer to the bitmap.
5145 * @param cBits The number of bits in the bitmap. Multiple of 32.
5146 * @param iBitPrev The bit returned from the last search.
5147 * The search will start at iBitPrev + 1.
5148 */
5149#if RT_INLINE_ASM_EXTERNAL
5150DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5151#else
5152DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5153{
5154 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5155 int iBit = ++iBitPrev & 31;
5156 if (iBit)
5157 {
5158 /*
5159 * Inspect the 32-bit word containing the unaligned bit.
5160 */
5161 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5162
5163# if RT_INLINE_ASM_USES_INTRIN
5164 unsigned long ulBit = 0;
5165 if (_BitScanForward(&ulBit, u32))
5166 return ulBit + iBitPrev;
5167# else
5168# if RT_INLINE_ASM_GNU_STYLE
5169 __asm__ __volatile__("bsf %1, %0\n\t"
5170 "jnz 1f\n\t"
5171 "movl $-1, %0\n\t"
5172 "1:\n\t"
5173 : "=r" (iBit)
5174 : "r" (u32));
5175# else
5176 __asm
5177 {
5178 mov edx, [u32]
5179 bsf eax, edx
5180 jnz done
5181 mov eax, 0ffffffffh
5182 done:
5183 mov [iBit], eax
5184 }
5185# endif
5186 if (iBit >= 0)
5187 return iBit + iBitPrev;
5188# endif
5189
5190 /*
5191 * Skip ahead and see if there is anything left to search.
5192 */
5193 iBitPrev |= 31;
5194 iBitPrev++;
5195 if (cBits <= (uint32_t)iBitPrev)
5196 return -1;
5197 }
5198
5199 /*
5200 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5201 */
5202 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5203 if (iBit >= 0)
5204 iBit += iBitPrev;
5205 return iBit;
5206}
5207#endif
5208
5209
5210/**
5211 * Finds the first bit which is set in the given 32-bit integer.
5212 * Bits are numbered from 1 (least significant) to 32.
5213 *
5214 * @returns index [1..32] of the first set bit.
5215 * @returns 0 if all bits are cleared.
5216 * @param u32 Integer to search for set bits.
5217 * @remarks Similar to ffs() in BSD.
5218 */
5219#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5220DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5221#else
5222DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5223{
5224# if RT_INLINE_ASM_USES_INTRIN
5225 unsigned long iBit;
5226 if (_BitScanForward(&iBit, u32))
5227 iBit++;
5228 else
5229 iBit = 0;
5230# elif RT_INLINE_ASM_GNU_STYLE
5231 uint32_t iBit;
5232 __asm__ __volatile__("bsf %1, %0\n\t"
5233 "jnz 1f\n\t"
5234 "xorl %0, %0\n\t"
5235 "jmp 2f\n"
5236 "1:\n\t"
5237 "incl %0\n"
5238 "2:\n\t"
5239 : "=r" (iBit)
5240 : "rm" (u32));
5241# else
5242 uint32_t iBit;
5243 _asm
5244 {
5245 bsf eax, [u32]
5246 jnz found
5247 xor eax, eax
5248 jmp done
5249 found:
5250 inc eax
5251 done:
5252 mov [iBit], eax
5253 }
5254# endif
5255 return iBit;
5256}
5257#endif
5258
5259
5260/**
5261 * Finds the first bit which is set in the given 32-bit integer.
5262 * Bits are numbered from 1 (least significant) to 32.
5263 *
5264 * @returns index [1..32] of the first set bit.
5265 * @returns 0 if all bits are cleared.
5266 * @param i32 Integer to search for set bits.
5267 * @remark Similar to ffs() in BSD.
5268 */
5269DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5270{
5271 return ASMBitFirstSetU32((uint32_t)i32);
5272}
5273
5274
5275/**
5276 * Finds the first bit which is set in the given 64-bit integer.
5277 *
5278 * Bits are numbered from 1 (least significant) to 64.
5279 *
5280 * @returns index [1..64] of the first set bit.
5281 * @returns 0 if all bits are cleared.
5282 * @param u64 Integer to search for set bits.
5283 * @remarks Similar to ffs() in BSD.
5284 */
5285#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5286DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5287#else
5288DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5289{
5290# if RT_INLINE_ASM_USES_INTRIN
5291 unsigned long iBit;
5292# if ARCH_BITS == 64
5293 if (_BitScanForward64(&iBit, u64))
5294 iBit++;
5295 else
5296 iBit = 0;
5297# else
5298 if (_BitScanForward(&iBit, (uint32_t)u64))
5299 iBit++;
5300 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5301 iBit += 33;
5302 else
5303 iBit = 0;
5304# endif
5305# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5306 uint64_t iBit;
5307 __asm__ __volatile__("bsfq %1, %0\n\t"
5308 "jnz 1f\n\t"
5309 "xorl %k0, %k0\n\t"
5310 "jmp 2f\n"
5311 "1:\n\t"
5312 "incl %k0\n"
5313 "2:\n\t"
5314 : "=r" (iBit)
5315 : "rm" (u64));
5316# else
5317 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5318 if (!iBit)
5319 {
5320 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5321 if (iBit)
5322 iBit += 32;
5323 }
5324# endif
5325 return (unsigned)iBit;
5326}
5327#endif
5328
5329
5330/**
5331 * Finds the first bit which is set in the given 16-bit integer.
5332 *
5333 * Bits are numbered from 1 (least significant) to 16.
5334 *
5335 * @returns index [1..16] of the first set bit.
5336 * @returns 0 if all bits are cleared.
5337 * @param u16 Integer to search for set bits.
5338 * @remarks For 16-bit bs3kit code.
5339 */
5340#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5341DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5342#else
5343DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5344{
5345 return ASMBitFirstSetU32((uint32_t)u16);
5346}
5347#endif
5348
5349
5350/**
5351 * Finds the last bit which is set in the given 32-bit integer.
5352 * Bits are numbered from 1 (least significant) to 32.
5353 *
5354 * @returns index [1..32] of the last set bit.
5355 * @returns 0 if all bits are cleared.
5356 * @param u32 Integer to search for set bits.
5357 * @remark Similar to fls() in BSD.
5358 */
5359#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5360DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5361#else
5362DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5363{
5364# if RT_INLINE_ASM_USES_INTRIN
5365 unsigned long iBit;
5366 if (_BitScanReverse(&iBit, u32))
5367 iBit++;
5368 else
5369 iBit = 0;
5370# elif RT_INLINE_ASM_GNU_STYLE
5371 uint32_t iBit;
5372 __asm__ __volatile__("bsrl %1, %0\n\t"
5373 "jnz 1f\n\t"
5374 "xorl %0, %0\n\t"
5375 "jmp 2f\n"
5376 "1:\n\t"
5377 "incl %0\n"
5378 "2:\n\t"
5379 : "=r" (iBit)
5380 : "rm" (u32));
5381# else
5382 uint32_t iBit;
5383 _asm
5384 {
5385 bsr eax, [u32]
5386 jnz found
5387 xor eax, eax
5388 jmp done
5389 found:
5390 inc eax
5391 done:
5392 mov [iBit], eax
5393 }
5394# endif
5395 return iBit;
5396}
5397#endif
5398
5399
5400/**
5401 * Finds the last bit which is set in the given 32-bit integer.
5402 * Bits are numbered from 1 (least significant) to 32.
5403 *
5404 * @returns index [1..32] of the last set bit.
5405 * @returns 0 if all bits are cleared.
5406 * @param i32 Integer to search for set bits.
5407 * @remark Similar to fls() in BSD.
5408 */
5409DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5410{
5411 return ASMBitLastSetU32((uint32_t)i32);
5412}
5413
5414
5415/**
5416 * Finds the last bit which is set in the given 64-bit integer.
5417 *
5418 * Bits are numbered from 1 (least significant) to 64.
5419 *
5420 * @returns index [1..64] of the last set bit.
5421 * @returns 0 if all bits are cleared.
5422 * @param u64 Integer to search for set bits.
5423 * @remark Similar to fls() in BSD.
5424 */
5425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5426DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5427#else
5428DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5429{
5430# if RT_INLINE_ASM_USES_INTRIN
5431 unsigned long iBit;
5432# if ARCH_BITS == 64
5433 if (_BitScanReverse64(&iBit, u64))
5434 iBit++;
5435 else
5436 iBit = 0;
5437# else
5438 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5439 iBit += 33;
5440 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5441 iBit++;
5442 else
5443 iBit = 0;
5444# endif
5445# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5446 uint64_t iBit;
5447 __asm__ __volatile__("bsrq %1, %0\n\t"
5448 "jnz 1f\n\t"
5449 "xorl %k0, %k0\n\t"
5450 "jmp 2f\n"
5451 "1:\n\t"
5452 "incl %k0\n"
5453 "2:\n\t"
5454 : "=r" (iBit)
5455 : "rm" (u64));
5456# else
5457 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5458 if (iBit)
5459 iBit += 32;
5460 else
5461 iBit = ASMBitLastSetU32((uint32_t)u64);
5462#endif
5463 return (unsigned)iBit;
5464}
5465#endif
5466
5467
5468/**
5469 * Finds the last bit which is set in the given 16-bit integer.
5470 *
5471 * Bits are numbered from 1 (least significant) to 16.
5472 *
5473 * @returns index [1..16] of the last set bit.
5474 * @returns 0 if all bits are cleared.
5475 * @param u16 Integer to search for set bits.
5476 * @remarks For 16-bit bs3kit code.
5477 */
5478#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5479DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5480#else
5481DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5482{
5483 return ASMBitLastSetU32((uint32_t)u16);
5484}
5485#endif
5486
5487
5488/**
5489 * Reverse the byte order of the given 16-bit integer.
5490 *
5491 * @returns Revert
5492 * @param u16 16-bit integer value.
5493 */
5494#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5495DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5496#else
5497DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5498{
5499# if RT_INLINE_ASM_USES_INTRIN
5500 u16 = _byteswap_ushort(u16);
5501# elif RT_INLINE_ASM_GNU_STYLE
5502 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5503# else
5504 _asm
5505 {
5506 mov ax, [u16]
5507 ror ax, 8
5508 mov [u16], ax
5509 }
5510# endif
5511 return u16;
5512}
5513#endif
5514
5515
5516/**
5517 * Reverse the byte order of the given 32-bit integer.
5518 *
5519 * @returns Revert
5520 * @param u32 32-bit integer value.
5521 */
5522#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5523DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5524#else
5525DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5526{
5527# if RT_INLINE_ASM_USES_INTRIN
5528 u32 = _byteswap_ulong(u32);
5529# elif RT_INLINE_ASM_GNU_STYLE
5530 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5531# else
5532 _asm
5533 {
5534 mov eax, [u32]
5535 bswap eax
5536 mov [u32], eax
5537 }
5538# endif
5539 return u32;
5540}
5541#endif
5542
5543
5544/**
5545 * Reverse the byte order of the given 64-bit integer.
5546 *
5547 * @returns Revert
5548 * @param u64 64-bit integer value.
5549 */
5550DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5551{
5552#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5553 u64 = _byteswap_uint64(u64);
5554#else
5555 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5556 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5557#endif
5558 return u64;
5559}
5560
5561
5562/**
5563 * Rotate 32-bit unsigned value to the left by @a cShift.
5564 *
5565 * @returns Rotated value.
5566 * @param u32 The value to rotate.
5567 * @param cShift How many bits to rotate by.
5568 */
5569#ifdef __WATCOMC__
5570DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5571#else
5572DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5573{
5574# if RT_INLINE_ASM_USES_INTRIN
5575 return _rotl(u32, cShift);
5576# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5577 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5578 return u32;
5579# else
5580 cShift &= 31;
5581 return (u32 << cShift) | (u32 >> (32 - cShift));
5582# endif
5583}
5584#endif
5585
5586
5587/**
5588 * Rotate 32-bit unsigned value to the right by @a cShift.
5589 *
5590 * @returns Rotated value.
5591 * @param u32 The value to rotate.
5592 * @param cShift How many bits to rotate by.
5593 */
5594#ifdef __WATCOMC__
5595DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5596#else
5597DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5598{
5599# if RT_INLINE_ASM_USES_INTRIN
5600 return _rotr(u32, cShift);
5601# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5602 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5603 return u32;
5604# else
5605 cShift &= 31;
5606 return (u32 >> cShift) | (u32 << (32 - cShift));
5607# endif
5608}
5609#endif
5610
5611
5612/**
5613 * Rotate 64-bit unsigned value to the left by @a cShift.
5614 *
5615 * @returns Rotated value.
5616 * @param u64 The value to rotate.
5617 * @param cShift How many bits to rotate by.
5618 */
5619DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5620{
5621#if RT_INLINE_ASM_USES_INTRIN
5622 return _rotl64(u64, cShift);
5623#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5624 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5625 return u64;
5626#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5627 uint32_t uSpill;
5628 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5629 "jz 1f\n\t"
5630 "xchgl %%eax, %%edx\n\t"
5631 "1:\n\t"
5632 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5633 "jz 2f\n\t"
5634 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5635 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5636 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5637 "2:\n\t" /* } */
5638 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5639 : "0" (u64),
5640 "1" (cShift));
5641 return u64;
5642#else
5643 cShift &= 63;
5644 return (u64 << cShift) | (u64 >> (64 - cShift));
5645#endif
5646}
5647
5648
5649/**
5650 * Rotate 64-bit unsigned value to the right by @a cShift.
5651 *
5652 * @returns Rotated value.
5653 * @param u64 The value to rotate.
5654 * @param cShift How many bits to rotate by.
5655 */
5656DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5657{
5658#if RT_INLINE_ASM_USES_INTRIN
5659 return _rotr64(u64, cShift);
5660#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5661 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5662 return u64;
5663#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5664 uint32_t uSpill;
5665 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5666 "jz 1f\n\t"
5667 "xchgl %%eax, %%edx\n\t"
5668 "1:\n\t"
5669 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5670 "jz 2f\n\t"
5671 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5672 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5673 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5674 "2:\n\t" /* } */
5675 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5676 : "0" (u64),
5677 "1" (cShift));
5678 return u64;
5679#else
5680 cShift &= 63;
5681 return (u64 >> cShift) | (u64 << (64 - cShift));
5682#endif
5683}
5684
5685/** @} */
5686
5687
5688/** @} */
5689
5690#endif
5691
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette