VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 93837

Last change on this file since 93837 was 93837, checked in by vboxsync, 3 years ago

iprt/asm.h: Added ASMAtomicCmpXchgU128 and friends for AMD64 and ARM64, implemented ASMAtomicCmpWriteU128 et al for ARM64. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 247.9 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2022 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46/* Emit the intrinsics at all optimization levels. */
47# include <iprt/sanitized/intrin.h>
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(_BitScanForward)
54# pragma intrinsic(_BitScanReverse)
55# pragma intrinsic(_bittest)
56# pragma intrinsic(_bittestandset)
57# pragma intrinsic(_bittestandreset)
58# pragma intrinsic(_bittestandcomplement)
59# pragma intrinsic(_byteswap_ushort)
60# pragma intrinsic(_byteswap_ulong)
61# pragma intrinsic(_interlockedbittestandset)
62# pragma intrinsic(_interlockedbittestandreset)
63# pragma intrinsic(_InterlockedAnd)
64# pragma intrinsic(_InterlockedOr)
65# pragma intrinsic(_InterlockedXor)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange8)
72# pragma intrinsic(_InterlockedCompareExchange16)
73# pragma intrinsic(_InterlockedCompareExchange64)
74# pragma intrinsic(_rotl)
75# pragma intrinsic(_rotr)
76# pragma intrinsic(_rotl64)
77# pragma intrinsic(_rotr64)
78# ifdef RT_ARCH_AMD64
79# pragma intrinsic(__stosq)
80# pragma intrinsic(_byteswap_uint64)
81# pragma intrinsic(_InterlockedCompareExchange128)
82# pragma intrinsic(_InterlockedExchange64)
83# pragma intrinsic(_InterlockedExchangeAdd64)
84# pragma intrinsic(_InterlockedAnd64)
85# pragma intrinsic(_InterlockedOr64)
86# pragma intrinsic(_InterlockedIncrement64)
87# pragma intrinsic(_InterlockedDecrement64)
88# endif
89#endif
90
91/*
92 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
93 */
94#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
95# include "asm-watcom-x86-16.h"
96#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
97# include "asm-watcom-x86-32.h"
98#endif
99
100
101/** @defgroup grp_rt_asm ASM - Assembly Routines
102 * @ingroup grp_rt
103 *
104 * @remarks The difference between ordered and unordered atomic operations are
105 * that the former will complete outstanding reads and writes before
106 * continuing while the latter doesn't make any promises about the
107 * order. Ordered operations doesn't, it seems, make any 100% promise
108 * wrt to whether the operation will complete before any subsequent
109 * memory access. (please, correct if wrong.)
110 *
111 * ASMAtomicSomething operations are all ordered, while
112 * ASMAtomicUoSomething are unordered (note the Uo).
113 *
114 * Please note that ordered operations does not necessarily imply a
115 * compiler (memory) barrier. The user has to use the
116 * ASMCompilerBarrier() macro when that is deemed necessary.
117 *
118 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
119 * to reorder or even optimize assembler instructions away. For
120 * instance, in the following code the second rdmsr instruction is
121 * optimized away because gcc treats that instruction as deterministic:
122 *
123 * @code
124 * static inline uint64_t rdmsr_low(int idx)
125 * {
126 * uint32_t low;
127 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
128 * }
129 * ...
130 * uint32_t msr1 = rdmsr_low(1);
131 * foo(msr1);
132 * msr1 = rdmsr_low(1);
133 * bar(msr1);
134 * @endcode
135 *
136 * The input parameter of rdmsr_low is the same for both calls and
137 * therefore gcc will use the result of the first call as input
138 * parameter for bar() as well. For rdmsr this is not acceptable as
139 * this instruction is _not_ deterministic. This applies to reading
140 * machine status information in general.
141 *
142 * @{
143 */
144
145
146/** @def RT_INLINE_ASM_GCC_4_3_X_X86
147 * Used to work around some 4.3.x register allocation issues in this version of
148 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
149 * definitely not for 5.x */
150#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
151# define RT_INLINE_ASM_GCC_4_3_X_X86 1
152#else
153# define RT_INLINE_ASM_GCC_4_3_X_X86 0
154#endif
155
156/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
157 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
158 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
159 * mode, x86.
160 *
161 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
162 * when in PIC mode on x86.
163 */
164#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
165# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
167# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
168# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
169# elif ( (defined(PIC) || defined(__PIC__)) \
170 && defined(RT_ARCH_X86) \
171 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
172 || defined(RT_OS_DARWIN)) )
173# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
174# else
175# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
176# endif
177#endif
178
179
180/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
181 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
182#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
183# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
184#else
185# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
186#endif
187
188/*
189 * ARM is great fun.
190 */
191#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
192
193# define RTASM_ARM_NO_BARRIER
194# ifdef RT_ARCH_ARM64
195# define RTASM_ARM_NO_BARRIER_IN_REG
196# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
197# define RTASM_ARM_DSB_SY "dsb sy\n\t"
198# define RTASM_ARM_DSB_SY_IN_REG
199# define RTASM_ARM_DSB_SY_COMMA_IN_REG
200# define RTASM_ARM_DMB_SY "dmb sy\n\t"
201# define RTASM_ARM_DMB_SY_IN_REG
202# define RTASM_ARM_DMB_SY_COMMA_IN_REG
203# define RTASM_ARM_DMB_ST "dmb st\n\t"
204# define RTASM_ARM_DMB_ST_IN_REG
205# define RTASM_ARM_DMB_ST_COMMA_IN_REG
206# define RTASM_ARM_DMB_LD "dmb ld\n\t"
207# define RTASM_ARM_DMB_LD_IN_REG
208# define RTASM_ARM_DMB_LD_COMMA_IN_REG
209# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
210# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
211 uint32_t rcSpill; \
212 uint32_t u32NewRet; \
213 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
214 RTASM_ARM_##barrier_type /* before lable? */ \
215 "ldaxr %w[uNew], %[pMem]\n\t" \
216 modify64 \
217 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
218 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
219 : [pMem] "+m" (*a_pu32Mem) \
220 , [uNew] "=&r" (u32NewRet) \
221 , [rc] "=&r" (rcSpill) \
222 : in_reg \
223 : "cc")
224# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
225 uint32_t rcSpill; \
226 uint32_t u32OldRet; \
227 uint32_t u32NewSpill; \
228 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
229 RTASM_ARM_##barrier_type /* before lable? */ \
230 "ldaxr %w[uOld], %[pMem]\n\t" \
231 modify64 \
232 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
233 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
234 : [pMem] "+m" (*a_pu32Mem) \
235 , [uOld] "=&r" (u32OldRet) \
236 , [uNew] "=&r" (u32NewSpill) \
237 , [rc] "=&r" (rcSpill) \
238 : in_reg \
239 : "cc")
240# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
241 uint32_t rcSpill; \
242 uint64_t u64NewRet; \
243 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
244 RTASM_ARM_##barrier_type /* before lable? */ \
245 "ldaxr %[uNew], %[pMem]\n\t" \
246 modify64 \
247 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
248 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
249 : [pMem] "+m" (*a_pu64Mem) \
250 , [uNew] "=&r" (u64NewRet) \
251 , [rc] "=&r" (rcSpill) \
252 : in_reg \
253 : "cc")
254# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
255 uint32_t rcSpill; \
256 uint64_t u64OldRet; \
257 uint64_t u64NewSpill; \
258 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
259 RTASM_ARM_##barrier_type /* before lable? */ \
260 "ldaxr %[uOld], %[pMem]\n\t" \
261 modify64 \
262 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
263 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
264 : [pMem] "+m" (*a_pu64Mem) \
265 , [uOld] "=&r" (u64OldRet) \
266 , [uNew] "=&r" (u64NewSpill) \
267 , [rc] "=&r" (rcSpill) \
268 : in_reg \
269 : "cc")
270
271# else /* RT_ARCH_ARM32 */
272# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
273# if RT_ARCH_ARM32 >= 7
274# warning armv7
275# define RTASM_ARM_NO_BARRIER_IN_REG
276# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
277# define RTASM_ARM_DSB_SY "dsb sy\n\t"
278# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
279# define RTASM_ARM_DMB_SY "dmb sy\n\t"
280# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
281# define RTASM_ARM_DMB_ST "dmb st\n\t"
282# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
283# define RTASM_ARM_DMB_LD "dmb ld\n\t"
284# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
285
286# elif RT_ARCH_ARM32 >= 6
287# warning armv6
288# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
289# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
290# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
291# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
292# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
293# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
294# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
295# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
296# elif RT_ARCH_ARM32 >= 4
297# warning armv5 or older
298# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
299# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
300# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
301# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
302# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
303# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
304# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
305# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
306# else
307# error "huh? Odd RT_ARCH_ARM32 value!"
308# endif
309# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
310# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
311# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
312# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
313# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
314 uint32_t rcSpill; \
315 uint32_t u32NewRet; \
316 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
317 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
318 "ldrex %[uNew], %[pMem]\n\t" \
319 modify32 \
320 "strex %[rc], %[uNew], %[pMem]\n\t" \
321 "cmp %[rc], #0\n\t" \
322 "bne .Ltry_again_" #name "_%=\n\t" \
323 : [pMem] "+m" (*a_pu32Mem) \
324 , [uNew] "=&r" (u32NewRet) \
325 , [rc] "=&r" (rcSpill) \
326 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
327 , in_reg \
328 : "cc")
329# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
330 uint32_t rcSpill; \
331 uint32_t u32OldRet; \
332 uint32_t u32NewSpill; \
333 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
334 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
335 "ldrex %[uOld], %[pMem]\n\t" \
336 modify32 \
337 "strex %[rc], %[uNew], %[pMem]\n\t" \
338 "cmp %[rc], #0\n\t" \
339 "bne .Ltry_again_" #name "_%=\n\t" \
340 : [pMem] "+m" (*a_pu32Mem) \
341 , [uOld] "=&r" (u32OldRet) \
342 , [uNew] "=&r" (u32NewSpill) \
343 , [rc] "=&r" (rcSpill) \
344 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
345 , in_reg \
346 : "cc")
347# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
348 uint32_t rcSpill; \
349 uint64_t u64NewRet; \
350 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
351 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
352 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
353 modify32 \
354 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
355 "cmp %[rc], #0\n\t" \
356 "bne .Ltry_again_" #name "_%=\n\t" \
357 : [pMem] "+m" (*a_pu64Mem), \
358 [uNew] "=&r" (u64NewRet), \
359 [rc] "=&r" (rcSpill) \
360 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
361 , in_reg \
362 : "cc")
363# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
364 uint32_t rcSpill; \
365 uint64_t u64OldRet; \
366 uint64_t u64NewSpill; \
367 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
368 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
369 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
370 modify32 \
371 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
372 "cmp %[rc], #0\n\t" \
373 "bne .Ltry_again_" #name "_%=\n\t" \
374 : [pMem] "+m" (*a_pu64Mem), \
375 [uOld] "=&r" (u64OldRet), \
376 [uNew] "=&r" (u64NewSpill), \
377 [rc] "=&r" (rcSpill) \
378 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
379 , in_reg \
380 : "cc")
381# endif /* RT_ARCH_ARM32 */
382#endif
383
384
385/** @def ASMReturnAddress
386 * Gets the return address of the current (or calling if you like) function or method.
387 */
388#ifdef _MSC_VER
389# ifdef __cplusplus
390extern "C"
391# endif
392void * _ReturnAddress(void);
393# pragma intrinsic(_ReturnAddress)
394# define ASMReturnAddress() _ReturnAddress()
395#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
396# define ASMReturnAddress() __builtin_return_address(0)
397#elif defined(__WATCOMC__)
398# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
399#else
400# error "Unsupported compiler."
401#endif
402
403
404/**
405 * Compiler memory barrier.
406 *
407 * Ensure that the compiler does not use any cached (register/tmp stack) memory
408 * values or any outstanding writes when returning from this function.
409 *
410 * This function must be used if non-volatile data is modified by a
411 * device or the VMM. Typical cases are port access, MMIO access,
412 * trapping instruction, etc.
413 */
414#if RT_INLINE_ASM_GNU_STYLE
415# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
416#elif RT_INLINE_ASM_USES_INTRIN
417# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
418#elif defined(__WATCOMC__)
419void ASMCompilerBarrier(void);
420#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
421DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
422{
423 __asm
424 {
425 }
426}
427#endif
428
429
430/** @def ASMBreakpoint
431 * Debugger Breakpoint.
432 * @deprecated Use RT_BREAKPOINT instead.
433 * @internal
434 */
435#define ASMBreakpoint() RT_BREAKPOINT()
436
437
438/**
439 * Spinloop hint for platforms that have these, empty function on the other
440 * platforms.
441 *
442 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
443 * spin locks.
444 */
445#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
446RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
447#else
448DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
449{
450# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
451# if RT_INLINE_ASM_GNU_STYLE
452 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
453# else
454 __asm {
455 _emit 0f3h
456 _emit 090h
457 }
458# endif
459
460# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
461 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
462
463# else
464 /* dummy */
465# endif
466}
467#endif
468
469
470/**
471 * Atomically Exchange an unsigned 8-bit value, ordered.
472 *
473 * @returns Current *pu8 value
474 * @param pu8 Pointer to the 8-bit variable to update.
475 * @param u8 The 8-bit value to assign to *pu8.
476 */
477#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
478RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
479#else
480DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
481{
482# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
483# if RT_INLINE_ASM_GNU_STYLE
484 __asm__ __volatile__("xchgb %0, %1\n\t"
485 : "=m" (*pu8)
486 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
487 : "1" (u8)
488 , "m" (*pu8));
489# else
490 __asm
491 {
492# ifdef RT_ARCH_AMD64
493 mov rdx, [pu8]
494 mov al, [u8]
495 xchg [rdx], al
496 mov [u8], al
497# else
498 mov edx, [pu8]
499 mov al, [u8]
500 xchg [edx], al
501 mov [u8], al
502# endif
503 }
504# endif
505 return u8;
506
507# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
508 uint32_t uOld;
509 uint32_t rcSpill;
510 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU8_%=:\n\t"
511 RTASM_ARM_DMB_SY
512# if defined(RT_ARCH_ARM64)
513 "ldaxrb %w[uOld], %[pMem]\n\t"
514 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
515 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU8_%=\n\t"
516# else
517 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
518 "strexb %[rc], %[uNew], %[pMem]\n\t"
519 "cmp %[rc], #0\n\t"
520 "bne .Ltry_again_ASMAtomicXchgU8_%=\n\t"
521# endif
522 : [pMem] "+m" (*pu8)
523 , [uOld] "=&r" (uOld)
524 , [rc] "=&r" (rcSpill)
525 : [uNew] "r" ((uint32_t)u8)
526 RTASM_ARM_DMB_SY_COMMA_IN_REG
527 : "cc");
528 return (uint8_t)uOld;
529
530# else
531# error "Port me"
532# endif
533}
534#endif
535
536
537/**
538 * Atomically Exchange a signed 8-bit value, ordered.
539 *
540 * @returns Current *pu8 value
541 * @param pi8 Pointer to the 8-bit variable to update.
542 * @param i8 The 8-bit value to assign to *pi8.
543 */
544DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
545{
546 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
547}
548
549
550/**
551 * Atomically Exchange a bool value, ordered.
552 *
553 * @returns Current *pf value
554 * @param pf Pointer to the 8-bit variable to update.
555 * @param f The 8-bit value to assign to *pi8.
556 */
557DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
558{
559#ifdef _MSC_VER
560 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
561#else
562 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
563#endif
564}
565
566
567/**
568 * Atomically Exchange an unsigned 16-bit value, ordered.
569 *
570 * @returns Current *pu16 value
571 * @param pu16 Pointer to the 16-bit variable to update.
572 * @param u16 The 16-bit value to assign to *pu16.
573 */
574#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
575RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
576#else
577DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
578{
579# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
580# if RT_INLINE_ASM_GNU_STYLE
581 __asm__ __volatile__("xchgw %0, %1\n\t"
582 : "=m" (*pu16)
583 , "=r" (u16)
584 : "1" (u16)
585 , "m" (*pu16));
586# else
587 __asm
588 {
589# ifdef RT_ARCH_AMD64
590 mov rdx, [pu16]
591 mov ax, [u16]
592 xchg [rdx], ax
593 mov [u16], ax
594# else
595 mov edx, [pu16]
596 mov ax, [u16]
597 xchg [edx], ax
598 mov [u16], ax
599# endif
600 }
601# endif
602 return u16;
603
604# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
605 uint32_t uOld;
606 uint32_t rcSpill;
607 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU16_%=:\n\t"
608 RTASM_ARM_DMB_SY
609# if defined(RT_ARCH_ARM64)
610 "ldaxrh %w[uOld], %[pMem]\n\t"
611 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
612 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU16_%=\n\t"
613# else
614 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
615 "strexh %[rc], %[uNew], %[pMem]\n\t"
616 "cmp %[rc], #0\n\t"
617 "bne .Ltry_again_ASMAtomicXchgU16_%=\n\t"
618# endif
619 : [pMem] "+m" (*pu16)
620 , [uOld] "=&r" (uOld)
621 , [rc] "=&r" (rcSpill)
622 : [uNew] "r" ((uint32_t)u16)
623 RTASM_ARM_DMB_SY_COMMA_IN_REG
624 : "cc");
625 return (uint16_t)uOld;
626
627# else
628# error "Port me"
629# endif
630}
631#endif
632
633
634/**
635 * Atomically Exchange a signed 16-bit value, ordered.
636 *
637 * @returns Current *pu16 value
638 * @param pi16 Pointer to the 16-bit variable to update.
639 * @param i16 The 16-bit value to assign to *pi16.
640 */
641DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
642{
643 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
644}
645
646
647/**
648 * Atomically Exchange an unsigned 32-bit value, ordered.
649 *
650 * @returns Current *pu32 value
651 * @param pu32 Pointer to the 32-bit variable to update.
652 * @param u32 The 32-bit value to assign to *pu32.
653 *
654 * @remarks Does not work on 286 and earlier.
655 */
656#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
657RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
658#else
659DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
660{
661# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
662# if RT_INLINE_ASM_GNU_STYLE
663 __asm__ __volatile__("xchgl %0, %1\n\t"
664 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
665 , "=r" (u32)
666 : "1" (u32)
667 , "m" (*pu32));
668
669# elif RT_INLINE_ASM_USES_INTRIN
670 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
671
672# else
673 __asm
674 {
675# ifdef RT_ARCH_AMD64
676 mov rdx, [pu32]
677 mov eax, u32
678 xchg [rdx], eax
679 mov [u32], eax
680# else
681 mov edx, [pu32]
682 mov eax, u32
683 xchg [edx], eax
684 mov [u32], eax
685# endif
686 }
687# endif
688 return u32;
689
690# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
691 uint32_t uOld;
692 uint32_t rcSpill;
693 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU32_%=:\n\t"
694 RTASM_ARM_DMB_SY
695# if defined(RT_ARCH_ARM64)
696 "ldaxr %w[uOld], %[pMem]\n\t"
697 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
698 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU32_%=\n\t"
699# else
700 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
701 "strex %[rc], %[uNew], %[pMem]\n\t"
702 "cmp %[rc], #0\n\t"
703 "bne .Ltry_again_ASMAtomicXchgU32_%=\n\t"
704# endif
705 : [pMem] "+m" (*pu32)
706 , [uOld] "=&r" (uOld)
707 , [rc] "=&r" (rcSpill)
708 : [uNew] "r" (u32)
709 RTASM_ARM_DMB_SY_COMMA_IN_REG
710 : "cc");
711 return uOld;
712
713# else
714# error "Port me"
715# endif
716}
717#endif
718
719
720/**
721 * Atomically Exchange a signed 32-bit value, ordered.
722 *
723 * @returns Current *pu32 value
724 * @param pi32 Pointer to the 32-bit variable to update.
725 * @param i32 The 32-bit value to assign to *pi32.
726 */
727DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
728{
729 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
730}
731
732
733/**
734 * Atomically Exchange an unsigned 64-bit value, ordered.
735 *
736 * @returns Current *pu64 value
737 * @param pu64 Pointer to the 64-bit variable to update.
738 * @param u64 The 64-bit value to assign to *pu64.
739 *
740 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
741 */
742#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
743 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
744RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
745#else
746DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
747{
748# if defined(RT_ARCH_AMD64)
749# if RT_INLINE_ASM_USES_INTRIN
750 return _InterlockedExchange64((__int64 *)pu64, u64);
751
752# elif RT_INLINE_ASM_GNU_STYLE
753 __asm__ __volatile__("xchgq %0, %1\n\t"
754 : "=m" (*pu64)
755 , "=r" (u64)
756 : "1" (u64)
757 , "m" (*pu64));
758 return u64;
759# else
760 __asm
761 {
762 mov rdx, [pu64]
763 mov rax, [u64]
764 xchg [rdx], rax
765 mov [u64], rax
766 }
767 return u64;
768# endif
769
770# elif defined(RT_ARCH_X86)
771# if RT_INLINE_ASM_GNU_STYLE
772# if defined(PIC) || defined(__PIC__)
773 uint32_t u32EBX = (uint32_t)u64;
774 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
775 "xchgl %%ebx, %3\n\t"
776 "1:\n\t"
777 "lock; cmpxchg8b (%5)\n\t"
778 "jnz 1b\n\t"
779 "movl %3, %%ebx\n\t"
780 /*"xchgl %%esi, %5\n\t"*/
781 : "=A" (u64)
782 , "=m" (*pu64)
783 : "0" (*pu64)
784 , "m" ( u32EBX )
785 , "c" ( (uint32_t)(u64 >> 32) )
786 , "S" (pu64)
787 : "cc");
788# else /* !PIC */
789 __asm__ __volatile__("1:\n\t"
790 "lock; cmpxchg8b %1\n\t"
791 "jnz 1b\n\t"
792 : "=A" (u64)
793 , "=m" (*pu64)
794 : "0" (*pu64)
795 , "b" ( (uint32_t)u64 )
796 , "c" ( (uint32_t)(u64 >> 32) )
797 : "cc");
798# endif
799# else
800 __asm
801 {
802 mov ebx, dword ptr [u64]
803 mov ecx, dword ptr [u64 + 4]
804 mov edi, pu64
805 mov eax, dword ptr [edi]
806 mov edx, dword ptr [edi + 4]
807 retry:
808 lock cmpxchg8b [edi]
809 jnz retry
810 mov dword ptr [u64], eax
811 mov dword ptr [u64 + 4], edx
812 }
813# endif
814 return u64;
815
816# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
817 uint32_t rcSpill;
818 uint64_t uOld;
819 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU64_%=:\n\t"
820 RTASM_ARM_DMB_SY
821# if defined(RT_ARCH_ARM64)
822 "ldaxr %[uOld], %[pMem]\n\t"
823 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
824 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU64_%=\n\t"
825# else
826 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
827 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
828 "cmp %[rc], #0\n\t"
829 "bne .Ltry_again_ASMAtomicXchgU64_%=\n\t"
830# endif
831 : [pMem] "+m" (*pu64)
832 , [uOld] "=&r" (uOld)
833 , [rc] "=&r" (rcSpill)
834 : [uNew] "r" (u64)
835 RTASM_ARM_DMB_SY_COMMA_IN_REG
836 : "cc");
837 return uOld;
838
839# else
840# error "Port me"
841# endif
842}
843#endif
844
845
846/**
847 * Atomically Exchange an signed 64-bit value, ordered.
848 *
849 * @returns Current *pi64 value
850 * @param pi64 Pointer to the 64-bit variable to update.
851 * @param i64 The 64-bit value to assign to *pi64.
852 */
853DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
854{
855 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
856}
857
858
859/**
860 * Atomically Exchange a size_t value, ordered.
861 *
862 * @returns Current *ppv value
863 * @param puDst Pointer to the size_t variable to update.
864 * @param uNew The new value to assign to *puDst.
865 */
866DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
867{
868#if ARCH_BITS == 16
869 AssertCompile(sizeof(size_t) == 2);
870 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
871#elif ARCH_BITS == 32
872 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
873#elif ARCH_BITS == 64
874 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
875#else
876# error "ARCH_BITS is bogus"
877#endif
878}
879
880
881/**
882 * Atomically Exchange a pointer value, ordered.
883 *
884 * @returns Current *ppv value
885 * @param ppv Pointer to the pointer variable to update.
886 * @param pv The pointer value to assign to *ppv.
887 */
888DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
889{
890#if ARCH_BITS == 32 || ARCH_BITS == 16
891 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
892#elif ARCH_BITS == 64
893 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
894#else
895# error "ARCH_BITS is bogus"
896#endif
897}
898
899
900/**
901 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
902 *
903 * @returns Current *pv value
904 * @param ppv Pointer to the pointer variable to update.
905 * @param pv The pointer value to assign to *ppv.
906 * @param Type The type of *ppv, sans volatile.
907 */
908#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
909# define ASMAtomicXchgPtrT(ppv, pv, Type) \
910 __extension__ \
911 ({\
912 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
913 Type const pvTypeChecked = (pv); \
914 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
915 pvTypeCheckedRet; \
916 })
917#else
918# define ASMAtomicXchgPtrT(ppv, pv, Type) \
919 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
920#endif
921
922
923/**
924 * Atomically Exchange a raw-mode context pointer value, ordered.
925 *
926 * @returns Current *ppv value
927 * @param ppvRC Pointer to the pointer variable to update.
928 * @param pvRC The pointer value to assign to *ppv.
929 */
930DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
931{
932 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
933}
934
935
936/**
937 * Atomically Exchange a ring-0 pointer value, ordered.
938 *
939 * @returns Current *ppv value
940 * @param ppvR0 Pointer to the pointer variable to update.
941 * @param pvR0 The pointer value to assign to *ppv.
942 */
943DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
944{
945#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
946 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
947#elif R0_ARCH_BITS == 64
948 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
949#else
950# error "R0_ARCH_BITS is bogus"
951#endif
952}
953
954
955/**
956 * Atomically Exchange a ring-3 pointer value, ordered.
957 *
958 * @returns Current *ppv value
959 * @param ppvR3 Pointer to the pointer variable to update.
960 * @param pvR3 The pointer value to assign to *ppv.
961 */
962DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
963{
964#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
965 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
966#elif R3_ARCH_BITS == 64
967 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
968#else
969# error "R3_ARCH_BITS is bogus"
970#endif
971}
972
973
974/** @def ASMAtomicXchgHandle
975 * Atomically Exchange a typical IPRT handle value, ordered.
976 *
977 * @param ph Pointer to the value to update.
978 * @param hNew The new value to assigned to *pu.
979 * @param phRes Where to store the current *ph value.
980 *
981 * @remarks This doesn't currently work for all handles (like RTFILE).
982 */
983#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
984# define ASMAtomicXchgHandle(ph, hNew, phRes) \
985 do { \
986 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
987 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
988 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
989 } while (0)
990#elif HC_ARCH_BITS == 64
991# define ASMAtomicXchgHandle(ph, hNew, phRes) \
992 do { \
993 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
994 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
995 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
996 } while (0)
997#else
998# error HC_ARCH_BITS
999#endif
1000
1001
1002/**
1003 * Atomically Exchange a value which size might differ
1004 * between platforms or compilers, ordered.
1005 *
1006 * @param pu Pointer to the variable to update.
1007 * @param uNew The value to assign to *pu.
1008 * @todo This is busted as its missing the result argument.
1009 */
1010#define ASMAtomicXchgSize(pu, uNew) \
1011 do { \
1012 switch (sizeof(*(pu))) { \
1013 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1014 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1015 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1016 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1017 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1018 } \
1019 } while (0)
1020
1021/**
1022 * Atomically Exchange a value which size might differ
1023 * between platforms or compilers, ordered.
1024 *
1025 * @param pu Pointer to the variable to update.
1026 * @param uNew The value to assign to *pu.
1027 * @param puRes Where to store the current *pu value.
1028 */
1029#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1030 do { \
1031 switch (sizeof(*(pu))) { \
1032 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1033 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1034 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1035 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1036 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1037 } \
1038 } while (0)
1039
1040
1041
1042/**
1043 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1044 *
1045 * @returns true if xchg was done.
1046 * @returns false if xchg wasn't done.
1047 *
1048 * @param pu8 Pointer to the value to update.
1049 * @param u8New The new value to assigned to *pu8.
1050 * @param u8Old The old value to *pu8 compare with.
1051 *
1052 * @remarks x86: Requires a 486 or later.
1053 * @todo Rename ASMAtomicCmpWriteU8
1054 */
1055#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1056RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1057#else
1058DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1059{
1060# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1061 uint8_t u8Ret;
1062 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1063 "setz %1\n\t"
1064 : "=m" (*pu8)
1065 , "=qm" (u8Ret)
1066 , "=a" (u8Old)
1067 : "q" (u8New)
1068 , "2" (u8Old)
1069 , "m" (*pu8)
1070 : "cc");
1071 return (bool)u8Ret;
1072
1073# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1074 union { uint32_t u; bool f; } fXchg;
1075 uint32_t u32Spill;
1076 uint32_t rcSpill;
1077 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1078 RTASM_ARM_DMB_SY
1079# if defined(RT_ARCH_ARM64)
1080 "ldaxrb %w[uOld], %[pMem]\n\t"
1081 "cmp %w[uOld], %w[uCmp]\n\t"
1082 "bne 1f\n\t" /* stop here if not equal */
1083 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1084 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1085 "mov %w[fXchg], #1\n\t"
1086# else
1087 "ldrexb %[uOld], %[pMem]\n\t"
1088 "teq %[uOld], %[uCmp]\n\t"
1089 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1090 "bne 1f\n\t" /* stop here if not equal */
1091 "cmp %[rc], #0\n\t"
1092 "bne .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1093 "mov %[fXchg], #1\n\t"
1094# endif
1095 "1:\n\t"
1096 : [pMem] "+m" (*pu8)
1097 , [uOld] "=&r" (u32Spill)
1098 , [rc] "=&r" (rcSpill)
1099 , [fXchg] "=&r" (fXchg.u)
1100 : [uCmp] "r" ((uint32_t)u8Old)
1101 , [uNew] "r" ((uint32_t)u8New)
1102 , "[fXchg]" (0)
1103 RTASM_ARM_DMB_SY_COMMA_IN_REG
1104 : "cc");
1105 return fXchg.f;
1106
1107# else
1108# error "Port me"
1109# endif
1110}
1111#endif
1112
1113
1114/**
1115 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1116 *
1117 * @returns true if xchg was done.
1118 * @returns false if xchg wasn't done.
1119 *
1120 * @param pi8 Pointer to the value to update.
1121 * @param i8New The new value to assigned to *pi8.
1122 * @param i8Old The old value to *pi8 compare with.
1123 *
1124 * @remarks x86: Requires a 486 or later.
1125 * @todo Rename ASMAtomicCmpWriteS8
1126 */
1127DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1128{
1129 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1130}
1131
1132
1133/**
1134 * Atomically Compare and Exchange a bool value, ordered.
1135 *
1136 * @returns true if xchg was done.
1137 * @returns false if xchg wasn't done.
1138 *
1139 * @param pf Pointer to the value to update.
1140 * @param fNew The new value to assigned to *pf.
1141 * @param fOld The old value to *pf compare with.
1142 *
1143 * @remarks x86: Requires a 486 or later.
1144 * @todo Rename ASMAtomicCmpWriteBool
1145 */
1146DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1147{
1148 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1149}
1150
1151
1152/**
1153 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1154 *
1155 * @returns true if xchg was done.
1156 * @returns false if xchg wasn't done.
1157 *
1158 * @param pu32 Pointer to the value to update.
1159 * @param u32New The new value to assigned to *pu32.
1160 * @param u32Old The old value to *pu32 compare with.
1161 *
1162 * @remarks x86: Requires a 486 or later.
1163 * @todo Rename ASMAtomicCmpWriteU32
1164 */
1165#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1166RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1167#else
1168DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1169{
1170# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1171# if RT_INLINE_ASM_GNU_STYLE
1172 uint8_t u8Ret;
1173 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1174 "setz %1\n\t"
1175 : "=m" (*pu32)
1176 , "=qm" (u8Ret)
1177 , "=a" (u32Old)
1178 : "r" (u32New)
1179 , "2" (u32Old)
1180 , "m" (*pu32)
1181 : "cc");
1182 return (bool)u8Ret;
1183
1184# elif RT_INLINE_ASM_USES_INTRIN
1185 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1186
1187# else
1188 uint32_t u32Ret;
1189 __asm
1190 {
1191# ifdef RT_ARCH_AMD64
1192 mov rdx, [pu32]
1193# else
1194 mov edx, [pu32]
1195# endif
1196 mov eax, [u32Old]
1197 mov ecx, [u32New]
1198# ifdef RT_ARCH_AMD64
1199 lock cmpxchg [rdx], ecx
1200# else
1201 lock cmpxchg [edx], ecx
1202# endif
1203 setz al
1204 movzx eax, al
1205 mov [u32Ret], eax
1206 }
1207 return !!u32Ret;
1208# endif
1209
1210# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1211 union { uint32_t u; bool f; } fXchg;
1212 uint32_t u32Spill;
1213 uint32_t rcSpill;
1214 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1215 RTASM_ARM_DMB_SY
1216# if defined(RT_ARCH_ARM64)
1217 "ldaxr %w[uOld], %[pMem]\n\t"
1218 "cmp %w[uOld], %w[uCmp]\n\t"
1219 "bne 1f\n\t" /* stop here if not equal */
1220 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1221 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1222 "mov %w[fXchg], #1\n\t"
1223# else
1224 "ldrex %[uOld], %[pMem]\n\t"
1225 "teq %[uOld], %[uCmp]\n\t"
1226 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1227 "bne 1f\n\t" /* stop here if not equal */
1228 "cmp %[rc], #0\n\t"
1229 "bne .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1230 "mov %[fXchg], #1\n\t"
1231# endif
1232 "1:\n\t"
1233 : [pMem] "+m" (*pu32)
1234 , [uOld] "=&r" (u32Spill)
1235 , [rc] "=&r" (rcSpill)
1236 , [fXchg] "=&r" (fXchg.u)
1237 : [uCmp] "r" (u32Old)
1238 , [uNew] "r" (u32New)
1239 , "[fXchg]" (0)
1240 RTASM_ARM_DMB_SY_COMMA_IN_REG
1241 : "cc");
1242 return fXchg.f;
1243
1244# else
1245# error "Port me"
1246# endif
1247}
1248#endif
1249
1250
1251/**
1252 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1253 *
1254 * @returns true if xchg was done.
1255 * @returns false if xchg wasn't done.
1256 *
1257 * @param pi32 Pointer to the value to update.
1258 * @param i32New The new value to assigned to *pi32.
1259 * @param i32Old The old value to *pi32 compare with.
1260 *
1261 * @remarks x86: Requires a 486 or later.
1262 * @todo Rename ASMAtomicCmpWriteS32
1263 */
1264DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1265{
1266 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1267}
1268
1269
1270/**
1271 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1272 *
1273 * @returns true if xchg was done.
1274 * @returns false if xchg wasn't done.
1275 *
1276 * @param pu64 Pointer to the 64-bit variable to update.
1277 * @param u64New The 64-bit value to assign to *pu64.
1278 * @param u64Old The value to compare with.
1279 *
1280 * @remarks x86: Requires a Pentium or later.
1281 * @todo Rename ASMAtomicCmpWriteU64
1282 */
1283#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1284 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1285RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1286#else
1287DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1288{
1289# if RT_INLINE_ASM_USES_INTRIN
1290 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1291
1292# elif defined(RT_ARCH_AMD64)
1293# if RT_INLINE_ASM_GNU_STYLE
1294 uint8_t u8Ret;
1295 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1296 "setz %1\n\t"
1297 : "=m" (*pu64)
1298 , "=qm" (u8Ret)
1299 , "=a" (u64Old)
1300 : "r" (u64New)
1301 , "2" (u64Old)
1302 , "m" (*pu64)
1303 : "cc");
1304 return (bool)u8Ret;
1305# else
1306 bool fRet;
1307 __asm
1308 {
1309 mov rdx, [pu32]
1310 mov rax, [u64Old]
1311 mov rcx, [u64New]
1312 lock cmpxchg [rdx], rcx
1313 setz al
1314 mov [fRet], al
1315 }
1316 return fRet;
1317# endif
1318
1319# elif defined(RT_ARCH_X86)
1320 uint32_t u32Ret;
1321# if RT_INLINE_ASM_GNU_STYLE
1322# if defined(PIC) || defined(__PIC__)
1323 uint32_t u32EBX = (uint32_t)u64New;
1324 uint32_t u32Spill;
1325 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1326 "lock; cmpxchg8b (%6)\n\t"
1327 "setz %%al\n\t"
1328 "movl %4, %%ebx\n\t"
1329 "movzbl %%al, %%eax\n\t"
1330 : "=a" (u32Ret)
1331 , "=d" (u32Spill)
1332# if RT_GNUC_PREREQ(4, 3)
1333 , "+m" (*pu64)
1334# else
1335 , "=m" (*pu64)
1336# endif
1337 : "A" (u64Old)
1338 , "m" ( u32EBX )
1339 , "c" ( (uint32_t)(u64New >> 32) )
1340 , "S" (pu64)
1341 : "cc");
1342# else /* !PIC */
1343 uint32_t u32Spill;
1344 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1345 "setz %%al\n\t"
1346 "movzbl %%al, %%eax\n\t"
1347 : "=a" (u32Ret)
1348 , "=d" (u32Spill)
1349 , "+m" (*pu64)
1350 : "A" (u64Old)
1351 , "b" ( (uint32_t)u64New )
1352 , "c" ( (uint32_t)(u64New >> 32) )
1353 : "cc");
1354# endif
1355 return (bool)u32Ret;
1356# else
1357 __asm
1358 {
1359 mov ebx, dword ptr [u64New]
1360 mov ecx, dword ptr [u64New + 4]
1361 mov edi, [pu64]
1362 mov eax, dword ptr [u64Old]
1363 mov edx, dword ptr [u64Old + 4]
1364 lock cmpxchg8b [edi]
1365 setz al
1366 movzx eax, al
1367 mov dword ptr [u32Ret], eax
1368 }
1369 return !!u32Ret;
1370# endif
1371
1372# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1373 union { uint32_t u; bool f; } fXchg;
1374 uint64_t u64Spill;
1375 uint32_t rcSpill;
1376 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1377 RTASM_ARM_DMB_SY
1378# if defined(RT_ARCH_ARM64)
1379 "ldaxr %[uOld], %[pMem]\n\t"
1380 "cmp %[uOld], %[uCmp]\n\t"
1381 "bne 1f\n\t" /* stop here if not equal */
1382 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1383 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1384 "mov %w[fXchg], #1\n\t"
1385# else
1386 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1387 "teq %[uOld], %[uCmp]\n\t"
1388 "teqeq %H[uOld], %H[uCmp]\n\t"
1389 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1390 "bne 1f\n\t" /* stop here if not equal */
1391 "cmp %[rc], #0\n\t"
1392 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1393 "mov %[fXchg], #1\n\t"
1394# endif
1395 "1:\n\t"
1396 : [pMem] "+m" (*pu64)
1397 , [uOld] "=&r" (u64Spill)
1398 , [rc] "=&r" (rcSpill)
1399 , [fXchg] "=&r" (fXchg.u)
1400 : [uCmp] "r" (u64Old)
1401 , [uNew] "r" (u64New)
1402 , "[fXchg]" (0)
1403 RTASM_ARM_DMB_SY_COMMA_IN_REG
1404 : "cc");
1405 return fXchg.f;
1406
1407# else
1408# error "Port me"
1409# endif
1410}
1411#endif
1412
1413
1414/**
1415 * Atomically Compare and exchange a signed 64-bit value, ordered.
1416 *
1417 * @returns true if xchg was done.
1418 * @returns false if xchg wasn't done.
1419 *
1420 * @param pi64 Pointer to the 64-bit variable to update.
1421 * @param i64 The 64-bit value to assign to *pu64.
1422 * @param i64Old The value to compare with.
1423 *
1424 * @remarks x86: Requires a Pentium or later.
1425 * @todo Rename ASMAtomicCmpWriteS64
1426 */
1427DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1428{
1429 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1430}
1431
1432#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
1433
1434/** @def RTASM_HAVE_CMP_WRITE_U128
1435 * Indicates that we've got ASMAtomicCmpWriteU128(), ASMAtomicCmpWriteU128v2()
1436 * and ASMAtomicCmpWriteExU128() available. */
1437# define RTASM_HAVE_CMP_WRITE_U128 1
1438
1439
1440/**
1441 * Atomically compare and write an unsigned 128-bit value, ordered.
1442 *
1443 * @returns true if write was done.
1444 * @returns false if write wasn't done.
1445 *
1446 * @param pu128 Pointer to the 128-bit variable to update.
1447 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
1448 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
1449 * @param u64OldHi The high 64-bit of the value to compare with.
1450 * @param u64OldLo The low 64-bit of the value to compare with.
1451 *
1452 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1453 */
1454# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
1455DECLASM(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1456 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_PROTO;
1457# else
1458DECLINLINE(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1459 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_DEF
1460{
1461# if RT_INLINE_ASM_USES_INTRIN
1462 __int64 ai64Cmp[2];
1463 ai64Cmp[0] = u64OldLo;
1464 ai64Cmp[1] = u64OldHi;
1465 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, ai64Cmp) != 0;
1466
1467# elif (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1468 return __sync_bool_compare_and_swap(pu128, ((uint128_t)u64OldHi << 64) | u64OldLo, ((uint128_t)u64NewHi << 64) | u64NewLo);
1469
1470# elif defined(RT_ARCH_AMD64)
1471# if RT_INLINE_ASM_GNU_STYLE
1472 uint64_t u64Ret;
1473 uint64_t u64Spill;
1474 __asm__ __volatile__("lock; cmpxchg16b %2\n\t"
1475 "setz %%al\n\t"
1476 "movzbl %%al, %%eax\n\t"
1477 : "=a" (u64Ret)
1478 , "=d" (u64Spill)
1479 , "+m" (*pu128)
1480 : "a" (u64OldLo)
1481 , "d" (u64OldHi)
1482 , "b" (u64NewLo)
1483 , "c" (u64NewHi)
1484 : "cc");
1485
1486 return (bool)u64Ret;
1487# else
1488# error "Port me"
1489# endif
1490# else
1491# error "Port me"
1492# endif
1493}
1494# endif
1495
1496
1497/**
1498 * Atomically compare and write an unsigned 128-bit value, ordered.
1499 *
1500 * @returns true if write was done.
1501 * @returns false if write wasn't done.
1502 *
1503 * @param pu128 Pointer to the 128-bit variable to update.
1504 * @param u128New The 128-bit value to assign to *pu128.
1505 * @param u128Old The value to compare with.
1506 *
1507 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1508 */
1509DECLINLINE(bool) ASMAtomicCmpWriteU128(volatile uint128_t *pu128, const uint128_t u128New, const uint128_t u128Old) RT_NOTHROW_DEF
1510{
1511# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
1512# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1513 return __sync_bool_compare_and_swap(pu128, u128Old, u128New);
1514# else
1515 return ASMAtomicCmpWriteU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
1516 (uint64_t)(u128Old >> 64), (uint64_t)u128Old);
1517# endif
1518# else
1519 return ASMAtomicCmpWriteU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo);
1520# endif
1521}
1522
1523
1524/**
1525 * RTUINT128U wrapper for ASMAtomicCmpWriteU128.
1526 */
1527DECLINLINE(bool) ASMAtomicCmpWriteU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
1528 const RTUINT128U u128Old) RT_NOTHROW_DEF
1529{
1530# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1531 return ASMAtomicCmpWriteU128(&pu128->u, u128New.u, u128Old.u);
1532# else
1533 return ASMAtomicCmpWriteU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo);
1534# endif
1535}
1536
1537#endif /* RT_ARCH_AMD64 || RT_ARCH_ARM64 */
1538
1539/**
1540 * Atomically Compare and Exchange a pointer value, ordered.
1541 *
1542 * @returns true if xchg was done.
1543 * @returns false if xchg wasn't done.
1544 *
1545 * @param ppv Pointer to the value to update.
1546 * @param pvNew The new value to assigned to *ppv.
1547 * @param pvOld The old value to *ppv compare with.
1548 *
1549 * @remarks x86: Requires a 486 or later.
1550 * @todo Rename ASMAtomicCmpWritePtrVoid
1551 */
1552DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1553{
1554#if ARCH_BITS == 32 || ARCH_BITS == 16
1555 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1556#elif ARCH_BITS == 64
1557 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1558#else
1559# error "ARCH_BITS is bogus"
1560#endif
1561}
1562
1563
1564/**
1565 * Atomically Compare and Exchange a pointer value, ordered.
1566 *
1567 * @returns true if xchg was done.
1568 * @returns false if xchg wasn't done.
1569 *
1570 * @param ppv Pointer to the value to update.
1571 * @param pvNew The new value to assigned to *ppv.
1572 * @param pvOld The old value to *ppv compare with.
1573 *
1574 * @remarks This is relatively type safe on GCC platforms.
1575 * @remarks x86: Requires a 486 or later.
1576 * @todo Rename ASMAtomicCmpWritePtr
1577 */
1578#ifdef __GNUC__
1579# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1580 __extension__ \
1581 ({\
1582 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1583 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1584 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1585 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1586 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1587 fMacroRet; \
1588 })
1589#else
1590# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1591 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1592#endif
1593
1594
1595/** @def ASMAtomicCmpXchgHandle
1596 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1597 *
1598 * @param ph Pointer to the value to update.
1599 * @param hNew The new value to assigned to *pu.
1600 * @param hOld The old value to *pu compare with.
1601 * @param fRc Where to store the result.
1602 *
1603 * @remarks This doesn't currently work for all handles (like RTFILE).
1604 * @remarks x86: Requires a 486 or later.
1605 * @todo Rename ASMAtomicCmpWriteHandle
1606 */
1607#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1608# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1609 do { \
1610 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1611 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1612 } while (0)
1613#elif HC_ARCH_BITS == 64
1614# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1615 do { \
1616 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1617 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1618 } while (0)
1619#else
1620# error HC_ARCH_BITS
1621#endif
1622
1623
1624/** @def ASMAtomicCmpXchgSize
1625 * Atomically Compare and Exchange a value which size might differ
1626 * between platforms or compilers, ordered.
1627 *
1628 * @param pu Pointer to the value to update.
1629 * @param uNew The new value to assigned to *pu.
1630 * @param uOld The old value to *pu compare with.
1631 * @param fRc Where to store the result.
1632 *
1633 * @remarks x86: Requires a 486 or later.
1634 * @todo Rename ASMAtomicCmpWriteSize
1635 */
1636#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1637 do { \
1638 switch (sizeof(*(pu))) { \
1639 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1640 break; \
1641 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1642 break; \
1643 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1644 (fRc) = false; \
1645 break; \
1646 } \
1647 } while (0)
1648
1649
1650/**
1651 * Atomically Compare and Exchange an unsigned 8-bit value, additionally passes
1652 * back old value, ordered.
1653 *
1654 * @returns true if xchg was done.
1655 * @returns false if xchg wasn't done.
1656 *
1657 * @param pu8 Pointer to the value to update.
1658 * @param u8New The new value to assigned to *pu32.
1659 * @param u8Old The old value to *pu8 compare with.
1660 * @param pu8Old Pointer store the old value at.
1661 *
1662 * @remarks x86: Requires a 486 or later.
1663 */
1664#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1665RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_PROTO;
1666#else
1667DECLINLINE(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_DEF
1668{
1669# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1670# if RT_INLINE_ASM_GNU_STYLE
1671 uint8_t u8Ret;
1672 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1673 "setz %1\n\t"
1674 : "=m" (*pu8)
1675 , "=qm" (u8Ret)
1676 , "=a" (*pu8Old)
1677# if defined(RT_ARCH_X86)
1678 : "q" (u8New)
1679# else
1680 : "r" (u8New)
1681# endif
1682 , "a" (u8Old)
1683 , "m" (*pu8)
1684 : "cc");
1685 return (bool)u8Ret;
1686
1687# elif RT_INLINE_ASM_USES_INTRIN
1688 return (*pu8Old = _InterlockedCompareExchange8((char RT_FAR *)pu8, u8New, u8Old)) == u8Old;
1689
1690# else
1691 uint8_t u8Ret;
1692 __asm
1693 {
1694# ifdef RT_ARCH_AMD64
1695 mov rdx, [pu8]
1696# else
1697 mov edx, [pu8]
1698# endif
1699 mov eax, [u8Old]
1700 mov ecx, [u8New]
1701# ifdef RT_ARCH_AMD64
1702 lock cmpxchg [rdx], ecx
1703 mov rdx, [pu8Old]
1704 mov [rdx], eax
1705# else
1706 lock cmpxchg [edx], ecx
1707 mov edx, [pu8Old]
1708 mov [edx], eax
1709# endif
1710 setz al
1711 movzx eax, al
1712 mov [u8Ret], eax
1713 }
1714 return !!u8Ret;
1715# endif
1716
1717# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1718 union { uint8_t u; bool f; } fXchg;
1719 uint8_t u8ActualOld;
1720 uint8_t rcSpill;
1721 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU8_%=:\n\t"
1722 RTASM_ARM_DMB_SY
1723# if defined(RT_ARCH_ARM64)
1724 "ldaxrb %w[uOld], %[pMem]\n\t"
1725 "cmp %w[uOld], %w[uCmp]\n\t"
1726 "bne 1f\n\t" /* stop here if not equal */
1727 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1728 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1729 "mov %w[fXchg], #1\n\t"
1730# else
1731 "ldrexb %[uOld], %[pMem]\n\t"
1732 "teq %[uOld], %[uCmp]\n\t"
1733 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1734 "bne 1f\n\t" /* stop here if not equal */
1735 "cmp %[rc], #0\n\t"
1736 "bne .Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1737 "mov %[fXchg], #1\n\t"
1738# endif
1739 "1:\n\t"
1740 : [pMem] "+m" (*pu8)
1741 , [uOld] "=&r" (u8ActualOld)
1742 , [rc] "=&r" (rcSpill)
1743 , [fXchg] "=&r" (fXchg.u)
1744 : [uCmp] "r" (u8Old)
1745 , [uNew] "r" (u8New)
1746 , "[fXchg]" (0)
1747 RTASM_ARM_DMB_SY_COMMA_IN_REG
1748 : "cc");
1749 *pu8Old = u8ActualOld;
1750 return fXchg.f;
1751
1752# else
1753# error "Port me"
1754# endif
1755}
1756#endif
1757
1758
1759/**
1760 * Atomically Compare and Exchange a signed 8-bit value, additionally
1761 * passes back old value, ordered.
1762 *
1763 * @returns true if xchg was done.
1764 * @returns false if xchg wasn't done.
1765 *
1766 * @param pi8 Pointer to the value to update.
1767 * @param i8New The new value to assigned to *pi8.
1768 * @param i8Old The old value to *pi8 compare with.
1769 * @param pi8Old Pointer store the old value at.
1770 *
1771 * @remarks x86: Requires a 486 or later.
1772 */
1773DECLINLINE(bool) ASMAtomicCmpXchgExS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old, int8_t RT_FAR *pi8Old) RT_NOTHROW_DEF
1774{
1775 return ASMAtomicCmpXchgExU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old, (uint8_t RT_FAR *)pi8Old);
1776}
1777
1778
1779/**
1780 * Atomically Compare and Exchange an unsigned 16-bit value, additionally passes
1781 * back old value, ordered.
1782 *
1783 * @returns true if xchg was done.
1784 * @returns false if xchg wasn't done.
1785 *
1786 * @param pu16 Pointer to the value to update.
1787 * @param u16New The new value to assigned to *pu16.
1788 * @param u16Old The old value to *pu32 compare with.
1789 * @param pu16Old Pointer store the old value at.
1790 *
1791 * @remarks x86: Requires a 486 or later.
1792 */
1793#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1794RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_PROTO;
1795#else
1796DECLINLINE(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_DEF
1797{
1798# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1799# if RT_INLINE_ASM_GNU_STYLE
1800 uint8_t u8Ret;
1801 __asm__ __volatile__("lock; cmpxchgw %3, %0\n\t"
1802 "setz %1\n\t"
1803 : "=m" (*pu16)
1804 , "=qm" (u8Ret)
1805 , "=a" (*pu16Old)
1806 : "r" (u16New)
1807 , "a" (u16Old)
1808 , "m" (*pu16)
1809 : "cc");
1810 return (bool)u8Ret;
1811
1812# elif RT_INLINE_ASM_USES_INTRIN
1813 return (*pu16Old = _InterlockedCompareExchange16((short RT_FAR *)pu16, u16New, u16Old)) == u16Old;
1814
1815# else
1816 uint16_t u16Ret;
1817 __asm
1818 {
1819# ifdef RT_ARCH_AMD64
1820 mov rdx, [pu16]
1821# else
1822 mov edx, [pu16]
1823# endif
1824 mov eax, [u16Old]
1825 mov ecx, [u16New]
1826# ifdef RT_ARCH_AMD64
1827 lock cmpxchg [rdx], ecx
1828 mov rdx, [pu16Old]
1829 mov [rdx], eax
1830# else
1831 lock cmpxchg [edx], ecx
1832 mov edx, [pu16Old]
1833 mov [edx], eax
1834# endif
1835 setz al
1836 movzx eax, al
1837 mov [u16Ret], eax
1838 }
1839 return !!u16Ret;
1840# endif
1841
1842# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1843 union { uint16_t u; bool f; } fXchg;
1844 uint16_t u16ActualOld;
1845 uint16_t rcSpill;
1846 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU16_%=:\n\t"
1847 RTASM_ARM_DMB_SY
1848# if defined(RT_ARCH_ARM64)
1849 "ldaxrh %w[uOld], %[pMem]\n\t"
1850 "cmp %w[uOld], %w[uCmp]\n\t"
1851 "bne 1f\n\t" /* stop here if not equal */
1852 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
1853 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1854 "mov %w[fXchg], #1\n\t"
1855# else
1856 "ldrexh %[uOld], %[pMem]\n\t"
1857 "teq %[uOld], %[uCmp]\n\t"
1858 "strexheq %[rc], %[uNew], %[pMem]\n\t"
1859 "bne 1f\n\t" /* stop here if not equal */
1860 "cmp %[rc], #0\n\t"
1861 "bne .Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1862 "mov %[fXchg], #1\n\t"
1863# endif
1864 "1:\n\t"
1865 : [pMem] "+m" (*pu16)
1866 , [uOld] "=&r" (u16ActualOld)
1867 , [rc] "=&r" (rcSpill)
1868 , [fXchg] "=&r" (fXchg.u)
1869 : [uCmp] "r" (u16Old)
1870 , [uNew] "r" (u16New)
1871 , "[fXchg]" (0)
1872 RTASM_ARM_DMB_SY_COMMA_IN_REG
1873 : "cc");
1874 *pu16Old = u16ActualOld;
1875 return fXchg.f;
1876
1877# else
1878# error "Port me"
1879# endif
1880}
1881#endif
1882
1883
1884/**
1885 * Atomically Compare and Exchange a signed 16-bit value, additionally
1886 * passes back old value, ordered.
1887 *
1888 * @returns true if xchg was done.
1889 * @returns false if xchg wasn't done.
1890 *
1891 * @param pi16 Pointer to the value to update.
1892 * @param i16New The new value to assigned to *pi16.
1893 * @param i16Old The old value to *pi16 compare with.
1894 * @param pi16Old Pointer store the old value at.
1895 *
1896 * @remarks x86: Requires a 486 or later.
1897 */
1898DECLINLINE(bool) ASMAtomicCmpXchgExS16(volatile int16_t RT_FAR *pi16, const int16_t i16New, const int16_t i16Old, int16_t RT_FAR *pi16Old) RT_NOTHROW_DEF
1899{
1900 return ASMAtomicCmpXchgExU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16New, (uint16_t)i16Old, (uint16_t RT_FAR *)pi16Old);
1901}
1902
1903
1904/**
1905 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1906 * passes back old value, ordered.
1907 *
1908 * @returns true if xchg was done.
1909 * @returns false if xchg wasn't done.
1910 *
1911 * @param pu32 Pointer to the value to update.
1912 * @param u32New The new value to assigned to *pu32.
1913 * @param u32Old The old value to *pu32 compare with.
1914 * @param pu32Old Pointer store the old value at.
1915 *
1916 * @remarks x86: Requires a 486 or later.
1917 */
1918#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1919RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1920#else
1921DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1922{
1923# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1924# if RT_INLINE_ASM_GNU_STYLE
1925 uint8_t u8Ret;
1926 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1927 "setz %1\n\t"
1928 : "=m" (*pu32)
1929 , "=qm" (u8Ret)
1930 , "=a" (*pu32Old)
1931 : "r" (u32New)
1932 , "a" (u32Old)
1933 , "m" (*pu32)
1934 : "cc");
1935 return (bool)u8Ret;
1936
1937# elif RT_INLINE_ASM_USES_INTRIN
1938 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1939
1940# else
1941 uint32_t u32Ret;
1942 __asm
1943 {
1944# ifdef RT_ARCH_AMD64
1945 mov rdx, [pu32]
1946# else
1947 mov edx, [pu32]
1948# endif
1949 mov eax, [u32Old]
1950 mov ecx, [u32New]
1951# ifdef RT_ARCH_AMD64
1952 lock cmpxchg [rdx], ecx
1953 mov rdx, [pu32Old]
1954 mov [rdx], eax
1955# else
1956 lock cmpxchg [edx], ecx
1957 mov edx, [pu32Old]
1958 mov [edx], eax
1959# endif
1960 setz al
1961 movzx eax, al
1962 mov [u32Ret], eax
1963 }
1964 return !!u32Ret;
1965# endif
1966
1967# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1968 union { uint32_t u; bool f; } fXchg;
1969 uint32_t u32ActualOld;
1970 uint32_t rcSpill;
1971 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1972 RTASM_ARM_DMB_SY
1973# if defined(RT_ARCH_ARM64)
1974 "ldaxr %w[uOld], %[pMem]\n\t"
1975 "cmp %w[uOld], %w[uCmp]\n\t"
1976 "bne 1f\n\t" /* stop here if not equal */
1977 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1978 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1979 "mov %w[fXchg], #1\n\t"
1980# else
1981 "ldrex %[uOld], %[pMem]\n\t"
1982 "teq %[uOld], %[uCmp]\n\t"
1983 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1984 "bne 1f\n\t" /* stop here if not equal */
1985 "cmp %[rc], #0\n\t"
1986 "bne .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1987 "mov %[fXchg], #1\n\t"
1988# endif
1989 "1:\n\t"
1990 : [pMem] "+m" (*pu32)
1991 , [uOld] "=&r" (u32ActualOld)
1992 , [rc] "=&r" (rcSpill)
1993 , [fXchg] "=&r" (fXchg.u)
1994 : [uCmp] "r" (u32Old)
1995 , [uNew] "r" (u32New)
1996 , "[fXchg]" (0)
1997 RTASM_ARM_DMB_SY_COMMA_IN_REG
1998 : "cc");
1999 *pu32Old = u32ActualOld;
2000 return fXchg.f;
2001
2002# else
2003# error "Port me"
2004# endif
2005}
2006#endif
2007
2008
2009/**
2010 * Atomically Compare and Exchange a signed 32-bit value, additionally
2011 * passes back old value, ordered.
2012 *
2013 * @returns true if xchg was done.
2014 * @returns false if xchg wasn't done.
2015 *
2016 * @param pi32 Pointer to the value to update.
2017 * @param i32New The new value to assigned to *pi32.
2018 * @param i32Old The old value to *pi32 compare with.
2019 * @param pi32Old Pointer store the old value at.
2020 *
2021 * @remarks x86: Requires a 486 or later.
2022 */
2023DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
2024{
2025 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
2026}
2027
2028
2029/**
2030 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2031 * passing back old value, ordered.
2032 *
2033 * @returns true if xchg was done.
2034 * @returns false if xchg wasn't done.
2035 *
2036 * @param pu64 Pointer to the 64-bit variable to update.
2037 * @param u64New The 64-bit value to assign to *pu64.
2038 * @param u64Old The value to compare with.
2039 * @param pu64Old Pointer store the old value at.
2040 *
2041 * @remarks x86: Requires a Pentium or later.
2042 */
2043#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2044 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2045RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
2046#else
2047DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
2048{
2049# if RT_INLINE_ASM_USES_INTRIN
2050 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
2051
2052# elif defined(RT_ARCH_AMD64)
2053# if RT_INLINE_ASM_GNU_STYLE
2054 uint8_t u8Ret;
2055 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2056 "setz %1\n\t"
2057 : "=m" (*pu64)
2058 , "=qm" (u8Ret)
2059 , "=a" (*pu64Old)
2060 : "r" (u64New)
2061 , "a" (u64Old)
2062 , "m" (*pu64)
2063 : "cc");
2064 return (bool)u8Ret;
2065# else
2066 bool fRet;
2067 __asm
2068 {
2069 mov rdx, [pu32]
2070 mov rax, [u64Old]
2071 mov rcx, [u64New]
2072 lock cmpxchg [rdx], rcx
2073 mov rdx, [pu64Old]
2074 mov [rdx], rax
2075 setz al
2076 mov [fRet], al
2077 }
2078 return fRet;
2079# endif
2080
2081# elif defined(RT_ARCH_X86)
2082# if RT_INLINE_ASM_GNU_STYLE
2083 uint64_t u64Ret;
2084# if defined(PIC) || defined(__PIC__)
2085 /* NB: this code uses a memory clobber description, because the clean
2086 * solution with an output value for *pu64 makes gcc run out of registers.
2087 * This will cause suboptimal code, and anyone with a better solution is
2088 * welcome to improve this. */
2089 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2090 "lock; cmpxchg8b %3\n\t"
2091 "xchgl %%ebx, %1\n\t"
2092 : "=A" (u64Ret)
2093 : "DS" ((uint32_t)u64New)
2094 , "c" ((uint32_t)(u64New >> 32))
2095 , "m" (*pu64)
2096 , "0" (u64Old)
2097 : "memory"
2098 , "cc" );
2099# else /* !PIC */
2100 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2101 : "=A" (u64Ret)
2102 , "=m" (*pu64)
2103 : "b" ((uint32_t)u64New)
2104 , "c" ((uint32_t)(u64New >> 32))
2105 , "m" (*pu64)
2106 , "0" (u64Old)
2107 : "cc");
2108# endif
2109 *pu64Old = u64Ret;
2110 return u64Ret == u64Old;
2111# else
2112 uint32_t u32Ret;
2113 __asm
2114 {
2115 mov ebx, dword ptr [u64New]
2116 mov ecx, dword ptr [u64New + 4]
2117 mov edi, [pu64]
2118 mov eax, dword ptr [u64Old]
2119 mov edx, dword ptr [u64Old + 4]
2120 lock cmpxchg8b [edi]
2121 mov ebx, [pu64Old]
2122 mov [ebx], eax
2123 setz al
2124 movzx eax, al
2125 add ebx, 4
2126 mov [ebx], edx
2127 mov dword ptr [u32Ret], eax
2128 }
2129 return !!u32Ret;
2130# endif
2131
2132# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2133 union { uint32_t u; bool f; } fXchg;
2134 uint64_t u64ActualOld;
2135 uint32_t rcSpill;
2136 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
2137 RTASM_ARM_DMB_SY
2138# if defined(RT_ARCH_ARM64)
2139 "ldaxr %[uOld], %[pMem]\n\t"
2140 "cmp %[uOld], %[uCmp]\n\t"
2141 "bne 1f\n\t" /* stop here if not equal */
2142 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
2143 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2144 "mov %w[fXchg], #1\n\t"
2145# else
2146 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
2147 "teq %[uOld], %[uCmp]\n\t"
2148 "teqeq %H[uOld], %H[uCmp]\n\t"
2149 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
2150 "bne 1f\n\t" /* stop here if not equal */
2151 "cmp %[rc], #0\n\t"
2152 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2153 "mov %[fXchg], #1\n\t"
2154# endif
2155 "1:\n\t"
2156 : [pMem] "+m" (*pu64)
2157 , [uOld] "=&r" (u64ActualOld)
2158 , [rc] "=&r" (rcSpill)
2159 , [fXchg] "=&r" (fXchg.u)
2160 : [uCmp] "r" (u64Old)
2161 , [uNew] "r" (u64New)
2162 , "[fXchg]" (0)
2163 RTASM_ARM_DMB_SY_COMMA_IN_REG
2164 : "cc");
2165 *pu64Old = u64ActualOld;
2166 return fXchg.f;
2167
2168# else
2169# error "Port me"
2170# endif
2171}
2172#endif
2173
2174
2175/**
2176 * Atomically Compare and exchange a signed 64-bit value, additionally
2177 * passing back old value, ordered.
2178 *
2179 * @returns true if xchg was done.
2180 * @returns false if xchg wasn't done.
2181 *
2182 * @param pi64 Pointer to the 64-bit variable to update.
2183 * @param i64 The 64-bit value to assign to *pu64.
2184 * @param i64Old The value to compare with.
2185 * @param pi64Old Pointer store the old value at.
2186 *
2187 * @remarks x86: Requires a Pentium or later.
2188 */
2189DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
2190{
2191 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
2192}
2193
2194#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
2195
2196/** @def RTASM_HAVE_CMP_XCHG_U128
2197 * Indicates that we've got ASMAtomicCmpSwapU128(), ASMAtomicCmpSwapU128v2()
2198 * and ASMAtomicCmpSwapExU128() available. */
2199# define RTASM_HAVE_CMP_XCHG_U128 1
2200
2201
2202/**
2203 * Atomically compare and exchange an unsigned 128-bit value, ordered.
2204 *
2205 * @returns true if exchange was done.
2206 * @returns false if exchange wasn't done.
2207 *
2208 * @param pu128 Pointer to the 128-bit variable to update.
2209 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
2210 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
2211 * @param u64OldHi The high 64-bit of the value to compare with.
2212 * @param u64OldLo The low 64-bit of the value to compare with.
2213 * @param pu128Old Where to return the old value.
2214 *
2215 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
2216 */
2217# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
2218DECLASM(bool) ASMAtomicCmpXchgU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
2219 const uint64_t u64OldHi, const uint64_t u64OldLo, uint128_t *pu128Old) RT_NOTHROW_PROTO;
2220# else
2221DECLINLINE(bool) ASMAtomicCmpXchgU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
2222 const uint64_t u64OldHi, const uint64_t u64OldLo, uint128_t *pu128Old) RT_NOTHROW_DEF
2223{
2224# if RT_INLINE_ASM_USES_INTRIN
2225 pu128Old->Hi = u64OldHi;
2226 pu128Old->Lo = u64OldLo;
2227 AssertCompileMemberOffset(uint128_t, Lo, 0);
2228 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, (__int64 *)&pu128Old->Lo) != 0;
2229
2230# elif (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2231 uint128_t const uCmp = ((uint128_t)u64OldHi << 64) | u64OldLo;
2232 uint128_t const uOld = __sync_val_compare_and_swap(pu128, uCmp, ((uint128_t)u64NewHi << 64) | u64NewLo);
2233 *pu128Old = uOld;
2234 return uCmp == uOld;
2235
2236# elif defined(RT_ARCH_AMD64)
2237# if RT_INLINE_ASM_GNU_STYLE
2238 uint8_t bRet;
2239 uint64_t u64RetHi, u64RetLo;
2240 __asm__ __volatile__("lock; cmpxchg16b %3\n\t"
2241 "setz %b0\n\t"
2242 : "=r" (bRet)
2243 , "=a" (u64RetLo)
2244 , "=d" (u64RetHi)
2245 , "+m" (*pu128)
2246 : "a" (u64OldLo)
2247 , "d" (u64OldHi)
2248 , "b" (u64NewLo)
2249 , "c" (u64NewHi)
2250 : "cc");
2251 *pu128Old = ((uint128_t)u64RetHi << 64) | u64RetLo;
2252 return (bool)bRet;
2253# else
2254# error "Port me"
2255# endif
2256# else
2257# error "Port me"
2258# endif
2259}
2260# endif
2261
2262
2263/**
2264 * Atomically compare and exchange an unsigned 128-bit value, ordered.
2265 *
2266 * @returns true if exchange was done.
2267 * @returns false if exchange wasn't done.
2268 *
2269 * @param pu128 Pointer to the 128-bit variable to update.
2270 * @param u128New The 128-bit value to assign to *pu128.
2271 * @param u128Old The value to compare with.
2272 * @param pu128Old Where to return the old value.
2273 *
2274 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
2275 */
2276DECLINLINE(bool) ASMAtomicCmpXchgU128(volatile uint128_t *pu128, const uint128_t u128New,
2277 const uint128_t u128Old, uint128_t *pu128Old) RT_NOTHROW_DEF
2278{
2279# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
2280# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2281 uint128_t const uSwapped = __sync_val_compare_and_swap(pu128, u128Old, u128New);
2282 *pu128Old = uSwapped;
2283 return uSwapped == u128Old;
2284# else
2285 return ASMAtomicCmpXchgU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
2286 (uint64_t)(u128Old >> 64), (uint64_t)u128Old, pu128Old);
2287# endif
2288# else
2289 return ASMAtomicCmpXchgU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo, pu128Old);
2290# endif
2291}
2292
2293
2294/**
2295 * RTUINT128U wrapper for ASMAtomicCmpXchgU128.
2296 */
2297DECLINLINE(bool) ASMAtomicCmpXchgU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
2298 const RTUINT128U u128Old, PRTUINT128U pu128Old) RT_NOTHROW_DEF
2299{
2300# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2301 return ASMAtomicCmpXchgU128(&pu128->u, u128New.u, u128Old.u, &pu128Old->u);
2302# else
2303 return ASMAtomicCmpXchgU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo, &pu128Old->u);
2304# endif
2305}
2306
2307#endif /* RT_ARCH_AMD64 || RT_ARCH_ARM64 */
2308
2309
2310
2311/** @def ASMAtomicCmpXchgExHandle
2312 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2313 *
2314 * @param ph Pointer to the value to update.
2315 * @param hNew The new value to assigned to *pu.
2316 * @param hOld The old value to *pu compare with.
2317 * @param fRc Where to store the result.
2318 * @param phOldVal Pointer to where to store the old value.
2319 *
2320 * @remarks This doesn't currently work for all handles (like RTFILE).
2321 */
2322#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2323# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2324 do { \
2325 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
2326 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
2327 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(ph), (uint32_t)(hNew), (uint32_t)(hOld), (uint32_t RT_FAR *)(phOldVal)); \
2328 } while (0)
2329#elif HC_ARCH_BITS == 64
2330# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2331 do { \
2332 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2333 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
2334 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(ph), (uint64_t)(hNew), (uint64_t)(hOld), (uint64_t RT_FAR *)(phOldVal)); \
2335 } while (0)
2336#else
2337# error HC_ARCH_BITS
2338#endif
2339
2340
2341/** @def ASMAtomicCmpXchgExSize
2342 * Atomically Compare and Exchange a value which size might differ
2343 * between platforms or compilers. Additionally passes back old value.
2344 *
2345 * @param pu Pointer to the value to update.
2346 * @param uNew The new value to assigned to *pu.
2347 * @param uOld The old value to *pu compare with.
2348 * @param fRc Where to store the result.
2349 * @param puOldVal Pointer to where to store the old value.
2350 *
2351 * @remarks x86: Requires a 486 or later.
2352 */
2353#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
2354 do { \
2355 switch (sizeof(*(pu))) { \
2356 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
2357 break; \
2358 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
2359 break; \
2360 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2361 (fRc) = false; \
2362 (uOldVal) = 0; \
2363 break; \
2364 } \
2365 } while (0)
2366
2367
2368/**
2369 * Atomically Compare and Exchange a pointer value, additionally
2370 * passing back old value, ordered.
2371 *
2372 * @returns true if xchg was done.
2373 * @returns false if xchg wasn't done.
2374 *
2375 * @param ppv Pointer to the value to update.
2376 * @param pvNew The new value to assigned to *ppv.
2377 * @param pvOld The old value to *ppv compare with.
2378 * @param ppvOld Pointer store the old value at.
2379 *
2380 * @remarks x86: Requires a 486 or later.
2381 */
2382DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
2383 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
2384{
2385#if ARCH_BITS == 32 || ARCH_BITS == 16
2386 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
2387#elif ARCH_BITS == 64
2388 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
2389#else
2390# error "ARCH_BITS is bogus"
2391#endif
2392}
2393
2394
2395/**
2396 * Atomically Compare and Exchange a pointer value, additionally
2397 * passing back old value, ordered.
2398 *
2399 * @returns true if xchg was done.
2400 * @returns false if xchg wasn't done.
2401 *
2402 * @param ppv Pointer to the value to update.
2403 * @param pvNew The new value to assigned to *ppv.
2404 * @param pvOld The old value to *ppv compare with.
2405 * @param ppvOld Pointer store the old value at.
2406 *
2407 * @remarks This is relatively type safe on GCC platforms.
2408 * @remarks x86: Requires a 486 or later.
2409 */
2410#ifdef __GNUC__
2411# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2412 __extension__ \
2413 ({\
2414 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2415 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
2416 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
2417 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
2418 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
2419 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
2420 (void **)ppvOldTypeChecked); \
2421 fMacroRet; \
2422 })
2423#else
2424# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2425 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
2426#endif
2427
2428
2429/**
2430 * Virtualization unfriendly serializing instruction, always exits.
2431 */
2432#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2433RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
2434#else
2435DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
2436{
2437# if RT_INLINE_ASM_GNU_STYLE
2438 RTCCUINTREG xAX = 0;
2439# ifdef RT_ARCH_AMD64
2440 __asm__ __volatile__ ("cpuid"
2441 : "=a" (xAX)
2442 : "0" (xAX)
2443 : "rbx", "rcx", "rdx", "memory");
2444# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
2445 __asm__ __volatile__ ("push %%ebx\n\t"
2446 "cpuid\n\t"
2447 "pop %%ebx\n\t"
2448 : "=a" (xAX)
2449 : "0" (xAX)
2450 : "ecx", "edx", "memory");
2451# else
2452 __asm__ __volatile__ ("cpuid"
2453 : "=a" (xAX)
2454 : "0" (xAX)
2455 : "ebx", "ecx", "edx", "memory");
2456# endif
2457
2458# elif RT_INLINE_ASM_USES_INTRIN
2459 int aInfo[4];
2460 _ReadWriteBarrier();
2461 __cpuid(aInfo, 0);
2462
2463# else
2464 __asm
2465 {
2466 push ebx
2467 xor eax, eax
2468 cpuid
2469 pop ebx
2470 }
2471# endif
2472}
2473#endif
2474
2475/**
2476 * Virtualization friendly serializing instruction, though more expensive.
2477 */
2478#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2479RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
2480#else
2481DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
2482{
2483# if RT_INLINE_ASM_GNU_STYLE
2484# ifdef RT_ARCH_AMD64
2485 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
2486 "subq $128, %%rsp\n\t" /*redzone*/
2487 "mov %%ss, %%eax\n\t"
2488 "pushq %%rax\n\t"
2489 "pushq %%r10\n\t"
2490 "pushfq\n\t"
2491 "movl %%cs, %%eax\n\t"
2492 "pushq %%rax\n\t"
2493 "leaq 1f(%%rip), %%rax\n\t"
2494 "pushq %%rax\n\t"
2495 "iretq\n\t"
2496 "1:\n\t"
2497 ::: "rax", "r10", "memory", "cc");
2498# else
2499 __asm__ __volatile__ ("pushfl\n\t"
2500 "pushl %%cs\n\t"
2501 "pushl $1f\n\t"
2502 "iretl\n\t"
2503 "1:\n\t"
2504 ::: "memory");
2505# endif
2506
2507# else
2508 __asm
2509 {
2510 pushfd
2511 push cs
2512 push la_ret
2513 iretd
2514 la_ret:
2515 }
2516# endif
2517}
2518#endif
2519
2520/**
2521 * Virtualization friendlier serializing instruction, may still cause exits.
2522 */
2523#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < RT_MSC_VER_VS2008) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2524RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2525#else
2526DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2527{
2528# if RT_INLINE_ASM_GNU_STYLE
2529 /* rdtscp is not supported by ancient linux build VM of course :-( */
2530# ifdef RT_ARCH_AMD64
2531 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2532 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2533# else
2534 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2535 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2536# endif
2537# else
2538# if RT_INLINE_ASM_USES_INTRIN >= RT_MSC_VER_VS2008
2539 uint32_t uIgnore;
2540 _ReadWriteBarrier();
2541 (void)__rdtscp(&uIgnore);
2542 (void)uIgnore;
2543# else
2544 __asm
2545 {
2546 rdtscp
2547 }
2548# endif
2549# endif
2550}
2551#endif
2552
2553
2554/**
2555 * Serialize Instruction (both data store and instruction flush).
2556 */
2557#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2558# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2559#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2560# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2561#elif defined(RT_ARCH_SPARC64)
2562RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2563#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2564DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2565{
2566 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2567}
2568#else
2569# error "Port me"
2570#endif
2571
2572
2573/**
2574 * Memory fence, waits for any pending writes and reads to complete.
2575 * @note No implicit compiler barrier (which is probably stupid).
2576 */
2577DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2578{
2579#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2580# if RT_INLINE_ASM_GNU_STYLE
2581 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2582# elif RT_INLINE_ASM_USES_INTRIN
2583 _mm_mfence();
2584# else
2585 __asm
2586 {
2587 _emit 0x0f
2588 _emit 0xae
2589 _emit 0xf0
2590 }
2591# endif
2592#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2593 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2594#elif ARCH_BITS == 16
2595 uint16_t volatile u16;
2596 ASMAtomicXchgU16(&u16, 0);
2597#else
2598 uint32_t volatile u32;
2599 ASMAtomicXchgU32(&u32, 0);
2600#endif
2601}
2602
2603
2604/**
2605 * Write fence, waits for any pending writes to complete.
2606 * @note No implicit compiler barrier (which is probably stupid).
2607 */
2608DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2609{
2610#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2611# if RT_INLINE_ASM_GNU_STYLE
2612 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2613# elif RT_INLINE_ASM_USES_INTRIN
2614 _mm_sfence();
2615# else
2616 __asm
2617 {
2618 _emit 0x0f
2619 _emit 0xae
2620 _emit 0xf8
2621 }
2622# endif
2623#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2624 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2625#else
2626 ASMMemoryFence();
2627#endif
2628}
2629
2630
2631/**
2632 * Read fence, waits for any pending reads to complete.
2633 * @note No implicit compiler barrier (which is probably stupid).
2634 */
2635DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2636{
2637#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2638# if RT_INLINE_ASM_GNU_STYLE
2639 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2640# elif RT_INLINE_ASM_USES_INTRIN
2641 _mm_lfence();
2642# else
2643 __asm
2644 {
2645 _emit 0x0f
2646 _emit 0xae
2647 _emit 0xe8
2648 }
2649# endif
2650#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2651 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2652#else
2653 ASMMemoryFence();
2654#endif
2655}
2656
2657
2658/**
2659 * Atomically reads an unsigned 8-bit value, ordered.
2660 *
2661 * @returns Current *pu8 value
2662 * @param pu8 Pointer to the 8-bit variable to read.
2663 */
2664DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2665{
2666#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2667 uint32_t u32;
2668 __asm__ __volatile__(".Lstart_ASMAtomicReadU8_%=:\n\t"
2669 RTASM_ARM_DMB_SY
2670# if defined(RT_ARCH_ARM64)
2671 "ldxrb %w[uDst], %[pMem]\n\t"
2672# else
2673 "ldrexb %[uDst], %[pMem]\n\t"
2674# endif
2675 : [uDst] "=&r" (u32)
2676 : [pMem] "m" (*pu8)
2677 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2678 return (uint8_t)u32;
2679#else
2680 ASMMemoryFence();
2681 return *pu8; /* byte reads are atomic on x86 */
2682#endif
2683}
2684
2685
2686/**
2687 * Atomically reads an unsigned 8-bit value, unordered.
2688 *
2689 * @returns Current *pu8 value
2690 * @param pu8 Pointer to the 8-bit variable to read.
2691 */
2692DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2693{
2694#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2695 uint32_t u32;
2696 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU8_%=:\n\t"
2697# if defined(RT_ARCH_ARM64)
2698 "ldxrb %w[uDst], %[pMem]\n\t"
2699# else
2700 "ldrexb %[uDst], %[pMem]\n\t"
2701# endif
2702 : [uDst] "=&r" (u32)
2703 : [pMem] "m" (*pu8));
2704 return (uint8_t)u32;
2705#else
2706 return *pu8; /* byte reads are atomic on x86 */
2707#endif
2708}
2709
2710
2711/**
2712 * Atomically reads a signed 8-bit value, ordered.
2713 *
2714 * @returns Current *pi8 value
2715 * @param pi8 Pointer to the 8-bit variable to read.
2716 */
2717DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2718{
2719 ASMMemoryFence();
2720#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2721 int32_t i32;
2722 __asm__ __volatile__(".Lstart_ASMAtomicReadS8_%=:\n\t"
2723 RTASM_ARM_DMB_SY
2724# if defined(RT_ARCH_ARM64)
2725 "ldxrb %w[iDst], %[pMem]\n\t"
2726# else
2727 "ldrexb %[iDst], %[pMem]\n\t"
2728# endif
2729 : [iDst] "=&r" (i32)
2730 : [pMem] "m" (*pi8)
2731 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2732 return (int8_t)i32;
2733#else
2734 return *pi8; /* byte reads are atomic on x86 */
2735#endif
2736}
2737
2738
2739/**
2740 * Atomically reads a signed 8-bit value, unordered.
2741 *
2742 * @returns Current *pi8 value
2743 * @param pi8 Pointer to the 8-bit variable to read.
2744 */
2745DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2746{
2747#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2748 int32_t i32;
2749 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS8_%=:\n\t"
2750# if defined(RT_ARCH_ARM64)
2751 "ldxrb %w[iDst], %[pMem]\n\t"
2752# else
2753 "ldrexb %[iDst], %[pMem]\n\t"
2754# endif
2755 : [iDst] "=&r" (i32)
2756 : [pMem] "m" (*pi8));
2757 return (int8_t)i32;
2758#else
2759 return *pi8; /* byte reads are atomic on x86 */
2760#endif
2761}
2762
2763
2764/**
2765 * Atomically reads an unsigned 16-bit value, ordered.
2766 *
2767 * @returns Current *pu16 value
2768 * @param pu16 Pointer to the 16-bit variable to read.
2769 */
2770DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2771{
2772 Assert(!((uintptr_t)pu16 & 1));
2773#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2774 uint32_t u32;
2775 __asm__ __volatile__(".Lstart_ASMAtomicReadU16_%=:\n\t"
2776 RTASM_ARM_DMB_SY
2777# if defined(RT_ARCH_ARM64)
2778 "ldxrh %w[uDst], %[pMem]\n\t"
2779# else
2780 "ldrexh %[uDst], %[pMem]\n\t"
2781# endif
2782 : [uDst] "=&r" (u32)
2783 : [pMem] "m" (*pu16)
2784 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2785 return (uint16_t)u32;
2786#else
2787 ASMMemoryFence();
2788 return *pu16;
2789#endif
2790}
2791
2792
2793/**
2794 * Atomically reads an unsigned 16-bit value, unordered.
2795 *
2796 * @returns Current *pu16 value
2797 * @param pu16 Pointer to the 16-bit variable to read.
2798 */
2799DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2800{
2801 Assert(!((uintptr_t)pu16 & 1));
2802#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2803 uint32_t u32;
2804 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU16_%=:\n\t"
2805# if defined(RT_ARCH_ARM64)
2806 "ldxrh %w[uDst], %[pMem]\n\t"
2807# else
2808 "ldrexh %[uDst], %[pMem]\n\t"
2809# endif
2810 : [uDst] "=&r" (u32)
2811 : [pMem] "m" (*pu16));
2812 return (uint16_t)u32;
2813#else
2814 return *pu16;
2815#endif
2816}
2817
2818
2819/**
2820 * Atomically reads a signed 16-bit value, ordered.
2821 *
2822 * @returns Current *pi16 value
2823 * @param pi16 Pointer to the 16-bit variable to read.
2824 */
2825DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2826{
2827 Assert(!((uintptr_t)pi16 & 1));
2828#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2829 int32_t i32;
2830 __asm__ __volatile__(".Lstart_ASMAtomicReadS16_%=:\n\t"
2831 RTASM_ARM_DMB_SY
2832# if defined(RT_ARCH_ARM64)
2833 "ldxrh %w[iDst], %[pMem]\n\t"
2834# else
2835 "ldrexh %[iDst], %[pMem]\n\t"
2836# endif
2837 : [iDst] "=&r" (i32)
2838 : [pMem] "m" (*pi16)
2839 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2840 return (int16_t)i32;
2841#else
2842 ASMMemoryFence();
2843 return *pi16;
2844#endif
2845}
2846
2847
2848/**
2849 * Atomically reads a signed 16-bit value, unordered.
2850 *
2851 * @returns Current *pi16 value
2852 * @param pi16 Pointer to the 16-bit variable to read.
2853 */
2854DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2855{
2856 Assert(!((uintptr_t)pi16 & 1));
2857#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2858 int32_t i32;
2859 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS16_%=:\n\t"
2860# if defined(RT_ARCH_ARM64)
2861 "ldxrh %w[iDst], %[pMem]\n\t"
2862# else
2863 "ldrexh %[iDst], %[pMem]\n\t"
2864# endif
2865 : [iDst] "=&r" (i32)
2866 : [pMem] "m" (*pi16));
2867 return (int16_t)i32;
2868#else
2869 return *pi16;
2870#endif
2871}
2872
2873
2874/**
2875 * Atomically reads an unsigned 32-bit value, ordered.
2876 *
2877 * @returns Current *pu32 value
2878 * @param pu32 Pointer to the 32-bit variable to read.
2879 */
2880DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2881{
2882 Assert(!((uintptr_t)pu32 & 3));
2883#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2884 uint32_t u32;
2885 __asm__ __volatile__(".Lstart_ASMAtomicReadU32_%=:\n\t"
2886 RTASM_ARM_DMB_SY
2887# if defined(RT_ARCH_ARM64)
2888 "ldxr %w[uDst], %[pMem]\n\t"
2889# else
2890 "ldrex %[uDst], %[pMem]\n\t"
2891# endif
2892 : [uDst] "=&r" (u32)
2893 : [pMem] "m" (*pu32)
2894 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2895 return u32;
2896#else
2897 ASMMemoryFence();
2898# if ARCH_BITS == 16
2899 AssertFailed(); /** @todo 16-bit */
2900# endif
2901 return *pu32;
2902#endif
2903}
2904
2905
2906/**
2907 * Atomically reads an unsigned 32-bit value, unordered.
2908 *
2909 * @returns Current *pu32 value
2910 * @param pu32 Pointer to the 32-bit variable to read.
2911 */
2912DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2913{
2914 Assert(!((uintptr_t)pu32 & 3));
2915#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2916 uint32_t u32;
2917 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU32_%=:\n\t"
2918# if defined(RT_ARCH_ARM64)
2919 "ldxr %w[uDst], %[pMem]\n\t"
2920# else
2921 "ldrex %[uDst], %[pMem]\n\t"
2922# endif
2923 : [uDst] "=&r" (u32)
2924 : [pMem] "m" (*pu32));
2925 return u32;
2926#else
2927# if ARCH_BITS == 16
2928 AssertFailed(); /** @todo 16-bit */
2929# endif
2930 return *pu32;
2931#endif
2932}
2933
2934
2935/**
2936 * Atomically reads a signed 32-bit value, ordered.
2937 *
2938 * @returns Current *pi32 value
2939 * @param pi32 Pointer to the 32-bit variable to read.
2940 */
2941DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2942{
2943 Assert(!((uintptr_t)pi32 & 3));
2944#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2945 int32_t i32;
2946 __asm__ __volatile__(".Lstart_ASMAtomicReadS32_%=:\n\t"
2947 RTASM_ARM_DMB_SY
2948# if defined(RT_ARCH_ARM64)
2949 "ldxr %w[iDst], %[pMem]\n\t"
2950# else
2951 "ldrex %[iDst], %[pMem]\n\t"
2952# endif
2953 : [iDst] "=&r" (i32)
2954 : [pMem] "m" (*pi32)
2955 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2956 return i32;
2957#else
2958 ASMMemoryFence();
2959# if ARCH_BITS == 16
2960 AssertFailed(); /** @todo 16-bit */
2961# endif
2962 return *pi32;
2963#endif
2964}
2965
2966
2967/**
2968 * Atomically reads a signed 32-bit value, unordered.
2969 *
2970 * @returns Current *pi32 value
2971 * @param pi32 Pointer to the 32-bit variable to read.
2972 */
2973DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2974{
2975 Assert(!((uintptr_t)pi32 & 3));
2976#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2977 int32_t i32;
2978 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS32_%=:\n\t"
2979# if defined(RT_ARCH_ARM64)
2980 "ldxr %w[iDst], %[pMem]\n\t"
2981# else
2982 "ldrex %[iDst], %[pMem]\n\t"
2983# endif
2984 : [iDst] "=&r" (i32)
2985 : [pMem] "m" (*pi32));
2986 return i32;
2987
2988#else
2989# if ARCH_BITS == 16
2990 AssertFailed(); /** @todo 16-bit */
2991# endif
2992 return *pi32;
2993#endif
2994}
2995
2996
2997/**
2998 * Atomically reads an unsigned 64-bit value, ordered.
2999 *
3000 * @returns Current *pu64 value
3001 * @param pu64 Pointer to the 64-bit variable to read.
3002 * The memory pointed to must be writable.
3003 *
3004 * @remarks This may fault if the memory is read-only!
3005 * @remarks x86: Requires a Pentium or later.
3006 */
3007#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
3008 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
3009RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
3010#else
3011DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
3012{
3013 uint64_t u64;
3014# ifdef RT_ARCH_AMD64
3015 Assert(!((uintptr_t)pu64 & 7));
3016/*# if RT_INLINE_ASM_GNU_STYLE
3017 __asm__ __volatile__( "mfence\n\t"
3018 "movq %1, %0\n\t"
3019 : "=r" (u64)
3020 : "m" (*pu64));
3021# else
3022 __asm
3023 {
3024 mfence
3025 mov rdx, [pu64]
3026 mov rax, [rdx]
3027 mov [u64], rax
3028 }
3029# endif*/
3030 ASMMemoryFence();
3031 u64 = *pu64;
3032
3033# elif defined(RT_ARCH_X86)
3034# if RT_INLINE_ASM_GNU_STYLE
3035# if defined(PIC) || defined(__PIC__)
3036 uint32_t u32EBX = 0;
3037 Assert(!((uintptr_t)pu64 & 7));
3038 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3039 "lock; cmpxchg8b (%5)\n\t"
3040 "movl %3, %%ebx\n\t"
3041 : "=A" (u64)
3042# if RT_GNUC_PREREQ(4, 3)
3043 , "+m" (*pu64)
3044# else
3045 , "=m" (*pu64)
3046# endif
3047 : "0" (0ULL)
3048 , "m" (u32EBX)
3049 , "c" (0)
3050 , "S" (pu64)
3051 : "cc");
3052# else /* !PIC */
3053 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3054 : "=A" (u64)
3055 , "+m" (*pu64)
3056 : "0" (0ULL)
3057 , "b" (0)
3058 , "c" (0)
3059 : "cc");
3060# endif
3061# else
3062 Assert(!((uintptr_t)pu64 & 7));
3063 __asm
3064 {
3065 xor eax, eax
3066 xor edx, edx
3067 mov edi, pu64
3068 xor ecx, ecx
3069 xor ebx, ebx
3070 lock cmpxchg8b [edi]
3071 mov dword ptr [u64], eax
3072 mov dword ptr [u64 + 4], edx
3073 }
3074# endif
3075
3076# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3077 Assert(!((uintptr_t)pu64 & 7));
3078 __asm__ __volatile__(".Lstart_ASMAtomicReadU64_%=:\n\t"
3079 RTASM_ARM_DMB_SY
3080# if defined(RT_ARCH_ARM64)
3081 "ldxr %[uDst], %[pMem]\n\t"
3082# else
3083 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3084# endif
3085 : [uDst] "=&r" (u64)
3086 : [pMem] "m" (*pu64)
3087 RTASM_ARM_DMB_SY_COMMA_IN_REG);
3088
3089# else
3090# error "Port me"
3091# endif
3092 return u64;
3093}
3094#endif
3095
3096
3097/**
3098 * Atomically reads an unsigned 64-bit value, unordered.
3099 *
3100 * @returns Current *pu64 value
3101 * @param pu64 Pointer to the 64-bit variable to read.
3102 * The memory pointed to must be writable.
3103 *
3104 * @remarks This may fault if the memory is read-only!
3105 * @remarks x86: Requires a Pentium or later.
3106 */
3107#if !defined(RT_ARCH_AMD64) \
3108 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
3109 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
3110RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
3111#else
3112DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
3113{
3114 uint64_t u64;
3115# ifdef RT_ARCH_AMD64
3116 Assert(!((uintptr_t)pu64 & 7));
3117/*# if RT_INLINE_ASM_GNU_STYLE
3118 Assert(!((uintptr_t)pu64 & 7));
3119 __asm__ __volatile__("movq %1, %0\n\t"
3120 : "=r" (u64)
3121 : "m" (*pu64));
3122# else
3123 __asm
3124 {
3125 mov rdx, [pu64]
3126 mov rax, [rdx]
3127 mov [u64], rax
3128 }
3129# endif */
3130 u64 = *pu64;
3131
3132# elif defined(RT_ARCH_X86)
3133# if RT_INLINE_ASM_GNU_STYLE
3134# if defined(PIC) || defined(__PIC__)
3135 uint32_t u32EBX = 0;
3136 uint32_t u32Spill;
3137 Assert(!((uintptr_t)pu64 & 7));
3138 __asm__ __volatile__("xor %%eax,%%eax\n\t"
3139 "xor %%ecx,%%ecx\n\t"
3140 "xor %%edx,%%edx\n\t"
3141 "xchgl %%ebx, %3\n\t"
3142 "lock; cmpxchg8b (%4)\n\t"
3143 "movl %3, %%ebx\n\t"
3144 : "=A" (u64)
3145# if RT_GNUC_PREREQ(4, 3)
3146 , "+m" (*pu64)
3147# else
3148 , "=m" (*pu64)
3149# endif
3150 , "=c" (u32Spill)
3151 : "m" (u32EBX)
3152 , "S" (pu64)
3153 : "cc");
3154# else /* !PIC */
3155 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3156 : "=A" (u64)
3157 , "+m" (*pu64)
3158 : "0" (0ULL)
3159 , "b" (0)
3160 , "c" (0)
3161 : "cc");
3162# endif
3163# else
3164 Assert(!((uintptr_t)pu64 & 7));
3165 __asm
3166 {
3167 xor eax, eax
3168 xor edx, edx
3169 mov edi, pu64
3170 xor ecx, ecx
3171 xor ebx, ebx
3172 lock cmpxchg8b [edi]
3173 mov dword ptr [u64], eax
3174 mov dword ptr [u64 + 4], edx
3175 }
3176# endif
3177
3178# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3179 Assert(!((uintptr_t)pu64 & 7));
3180 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU64_%=:\n\t"
3181# if defined(RT_ARCH_ARM64)
3182 "ldxr %[uDst], %[pMem]\n\t"
3183# else
3184 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3185# endif
3186 : [uDst] "=&r" (u64)
3187 : [pMem] "m" (*pu64));
3188
3189# else
3190# error "Port me"
3191# endif
3192 return u64;
3193}
3194#endif
3195
3196
3197/**
3198 * Atomically reads a signed 64-bit value, ordered.
3199 *
3200 * @returns Current *pi64 value
3201 * @param pi64 Pointer to the 64-bit variable to read.
3202 * The memory pointed to must be writable.
3203 *
3204 * @remarks This may fault if the memory is read-only!
3205 * @remarks x86: Requires a Pentium or later.
3206 */
3207DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3208{
3209 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
3210}
3211
3212
3213/**
3214 * Atomically reads a signed 64-bit value, unordered.
3215 *
3216 * @returns Current *pi64 value
3217 * @param pi64 Pointer to the 64-bit variable to read.
3218 * The memory pointed to must be writable.
3219 *
3220 * @remarks This will fault if the memory is read-only!
3221 * @remarks x86: Requires a Pentium or later.
3222 */
3223DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3224{
3225 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
3226}
3227
3228
3229/**
3230 * Atomically reads a size_t value, ordered.
3231 *
3232 * @returns Current *pcb value
3233 * @param pcb Pointer to the size_t variable to read.
3234 */
3235DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3236{
3237#if ARCH_BITS == 64
3238 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
3239#elif ARCH_BITS == 32
3240 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
3241#elif ARCH_BITS == 16
3242 AssertCompileSize(size_t, 2);
3243 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
3244#else
3245# error "Unsupported ARCH_BITS value"
3246#endif
3247}
3248
3249
3250/**
3251 * Atomically reads a size_t value, unordered.
3252 *
3253 * @returns Current *pcb value
3254 * @param pcb Pointer to the size_t variable to read.
3255 */
3256DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3257{
3258#if ARCH_BITS == 64 || ARCH_BITS == 16
3259 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
3260#elif ARCH_BITS == 32
3261 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
3262#elif ARCH_BITS == 16
3263 AssertCompileSize(size_t, 2);
3264 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
3265#else
3266# error "Unsupported ARCH_BITS value"
3267#endif
3268}
3269
3270
3271/**
3272 * Atomically reads a pointer value, ordered.
3273 *
3274 * @returns Current *pv value
3275 * @param ppv Pointer to the pointer variable to read.
3276 *
3277 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
3278 * requires less typing (no casts).
3279 */
3280DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3281{
3282#if ARCH_BITS == 32 || ARCH_BITS == 16
3283 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3284#elif ARCH_BITS == 64
3285 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3286#else
3287# error "ARCH_BITS is bogus"
3288#endif
3289}
3290
3291/**
3292 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
3293 *
3294 * @returns Current *pv value
3295 * @param ppv Pointer to the pointer variable to read.
3296 * @param Type The type of *ppv, sans volatile.
3297 */
3298#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3299# define ASMAtomicReadPtrT(ppv, Type) \
3300 __extension__ \
3301 ({\
3302 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
3303 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
3304 pvTypeChecked; \
3305 })
3306#else
3307# define ASMAtomicReadPtrT(ppv, Type) \
3308 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3309#endif
3310
3311
3312/**
3313 * Atomically reads a pointer value, unordered.
3314 *
3315 * @returns Current *pv value
3316 * @param ppv Pointer to the pointer variable to read.
3317 *
3318 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
3319 * requires less typing (no casts).
3320 */
3321DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3322{
3323#if ARCH_BITS == 32 || ARCH_BITS == 16
3324 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3325#elif ARCH_BITS == 64
3326 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3327#else
3328# error "ARCH_BITS is bogus"
3329#endif
3330}
3331
3332
3333/**
3334 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
3335 *
3336 * @returns Current *pv value
3337 * @param ppv Pointer to the pointer variable to read.
3338 * @param Type The type of *ppv, sans volatile.
3339 */
3340#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3341# define ASMAtomicUoReadPtrT(ppv, Type) \
3342 __extension__ \
3343 ({\
3344 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3345 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
3346 pvTypeChecked; \
3347 })
3348#else
3349# define ASMAtomicUoReadPtrT(ppv, Type) \
3350 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3351#endif
3352
3353
3354/**
3355 * Atomically reads a boolean value, ordered.
3356 *
3357 * @returns Current *pf value
3358 * @param pf Pointer to the boolean variable to read.
3359 */
3360DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3361{
3362 ASMMemoryFence();
3363 return *pf; /* byte reads are atomic on x86 */
3364}
3365
3366
3367/**
3368 * Atomically reads a boolean value, unordered.
3369 *
3370 * @returns Current *pf value
3371 * @param pf Pointer to the boolean variable to read.
3372 */
3373DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3374{
3375 return *pf; /* byte reads are atomic on x86 */
3376}
3377
3378
3379/**
3380 * Atomically read a typical IPRT handle value, ordered.
3381 *
3382 * @param ph Pointer to the handle variable to read.
3383 * @param phRes Where to store the result.
3384 *
3385 * @remarks This doesn't currently work for all handles (like RTFILE).
3386 */
3387#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3388# define ASMAtomicReadHandle(ph, phRes) \
3389 do { \
3390 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3391 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3392 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
3393 } while (0)
3394#elif HC_ARCH_BITS == 64
3395# define ASMAtomicReadHandle(ph, phRes) \
3396 do { \
3397 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3398 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3399 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
3400 } while (0)
3401#else
3402# error HC_ARCH_BITS
3403#endif
3404
3405
3406/**
3407 * Atomically read a typical IPRT handle value, unordered.
3408 *
3409 * @param ph Pointer to the handle variable to read.
3410 * @param phRes Where to store the result.
3411 *
3412 * @remarks This doesn't currently work for all handles (like RTFILE).
3413 */
3414#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3415# define ASMAtomicUoReadHandle(ph, phRes) \
3416 do { \
3417 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3418 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3419 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
3420 } while (0)
3421#elif HC_ARCH_BITS == 64
3422# define ASMAtomicUoReadHandle(ph, phRes) \
3423 do { \
3424 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3425 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3426 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
3427 } while (0)
3428#else
3429# error HC_ARCH_BITS
3430#endif
3431
3432
3433/**
3434 * Atomically read a value which size might differ
3435 * between platforms or compilers, ordered.
3436 *
3437 * @param pu Pointer to the variable to read.
3438 * @param puRes Where to store the result.
3439 */
3440#define ASMAtomicReadSize(pu, puRes) \
3441 do { \
3442 switch (sizeof(*(pu))) { \
3443 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3444 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3445 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3446 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3447 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3448 } \
3449 } while (0)
3450
3451
3452/**
3453 * Atomically read a value which size might differ
3454 * between platforms or compilers, unordered.
3455 *
3456 * @param pu Pointer to the variable to read.
3457 * @param puRes Where to store the result.
3458 */
3459#define ASMAtomicUoReadSize(pu, puRes) \
3460 do { \
3461 switch (sizeof(*(pu))) { \
3462 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3463 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3464 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3465 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3466 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3467 } \
3468 } while (0)
3469
3470
3471/**
3472 * Atomically writes an unsigned 8-bit value, ordered.
3473 *
3474 * @param pu8 Pointer to the 8-bit variable.
3475 * @param u8 The 8-bit value to assign to *pu8.
3476 */
3477DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3478{
3479 /** @todo Any possible ARM32/ARM64 optimizations here? */
3480 ASMAtomicXchgU8(pu8, u8);
3481}
3482
3483
3484/**
3485 * Atomically writes an unsigned 8-bit value, unordered.
3486 *
3487 * @param pu8 Pointer to the 8-bit variable.
3488 * @param u8 The 8-bit value to assign to *pu8.
3489 */
3490DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3491{
3492 /** @todo Any possible ARM32/ARM64 improvements here? */
3493 *pu8 = u8; /* byte writes are atomic on x86 */
3494}
3495
3496
3497/**
3498 * Atomically writes a signed 8-bit value, ordered.
3499 *
3500 * @param pi8 Pointer to the 8-bit variable to read.
3501 * @param i8 The 8-bit value to assign to *pi8.
3502 */
3503DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3504{
3505 /** @todo Any possible ARM32/ARM64 optimizations here? */
3506 ASMAtomicXchgS8(pi8, i8);
3507}
3508
3509
3510/**
3511 * Atomically writes a signed 8-bit value, unordered.
3512 *
3513 * @param pi8 Pointer to the 8-bit variable to write.
3514 * @param i8 The 8-bit value to assign to *pi8.
3515 */
3516DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3517{
3518 *pi8 = i8; /* byte writes are atomic on x86 */
3519}
3520
3521
3522/**
3523 * Atomically writes an unsigned 16-bit value, ordered.
3524 *
3525 * @param pu16 Pointer to the 16-bit variable to write.
3526 * @param u16 The 16-bit value to assign to *pu16.
3527 */
3528DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3529{
3530 /** @todo Any possible ARM32/ARM64 optimizations here? */
3531 ASMAtomicXchgU16(pu16, u16);
3532}
3533
3534
3535/**
3536 * Atomically writes an unsigned 16-bit value, unordered.
3537 *
3538 * @param pu16 Pointer to the 16-bit variable to write.
3539 * @param u16 The 16-bit value to assign to *pu16.
3540 */
3541DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3542{
3543 Assert(!((uintptr_t)pu16 & 1));
3544 *pu16 = u16;
3545}
3546
3547
3548/**
3549 * Atomically writes a signed 16-bit value, ordered.
3550 *
3551 * @param pi16 Pointer to the 16-bit variable to write.
3552 * @param i16 The 16-bit value to assign to *pi16.
3553 */
3554DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3555{
3556 /** @todo Any possible ARM32/ARM64 optimizations here? */
3557 ASMAtomicXchgS16(pi16, i16);
3558}
3559
3560
3561/**
3562 * Atomically writes a signed 16-bit value, unordered.
3563 *
3564 * @param pi16 Pointer to the 16-bit variable to write.
3565 * @param i16 The 16-bit value to assign to *pi16.
3566 */
3567DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3568{
3569 Assert(!((uintptr_t)pi16 & 1));
3570 *pi16 = i16;
3571}
3572
3573
3574/**
3575 * Atomically writes an unsigned 32-bit value, ordered.
3576 *
3577 * @param pu32 Pointer to the 32-bit variable to write.
3578 * @param u32 The 32-bit value to assign to *pu32.
3579 */
3580DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3581{
3582 /** @todo Any possible ARM32/ARM64 optimizations here? */
3583 ASMAtomicXchgU32(pu32, u32);
3584}
3585
3586
3587/**
3588 * Atomically writes an unsigned 32-bit value, unordered.
3589 *
3590 * @param pu32 Pointer to the 32-bit variable to write.
3591 * @param u32 The 32-bit value to assign to *pu32.
3592 */
3593DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3594{
3595 Assert(!((uintptr_t)pu32 & 3));
3596#if ARCH_BITS >= 32
3597 *pu32 = u32;
3598#else
3599 ASMAtomicXchgU32(pu32, u32);
3600#endif
3601}
3602
3603
3604/**
3605 * Atomically writes a signed 32-bit value, ordered.
3606 *
3607 * @param pi32 Pointer to the 32-bit variable to write.
3608 * @param i32 The 32-bit value to assign to *pi32.
3609 */
3610DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3611{
3612 ASMAtomicXchgS32(pi32, i32);
3613}
3614
3615
3616/**
3617 * Atomically writes a signed 32-bit value, unordered.
3618 *
3619 * @param pi32 Pointer to the 32-bit variable to write.
3620 * @param i32 The 32-bit value to assign to *pi32.
3621 */
3622DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3623{
3624 Assert(!((uintptr_t)pi32 & 3));
3625#if ARCH_BITS >= 32
3626 *pi32 = i32;
3627#else
3628 ASMAtomicXchgS32(pi32, i32);
3629#endif
3630}
3631
3632
3633/**
3634 * Atomically writes an unsigned 64-bit value, ordered.
3635 *
3636 * @param pu64 Pointer to the 64-bit variable to write.
3637 * @param u64 The 64-bit value to assign to *pu64.
3638 */
3639DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3640{
3641 /** @todo Any possible ARM32/ARM64 optimizations here? */
3642 ASMAtomicXchgU64(pu64, u64);
3643}
3644
3645
3646/**
3647 * Atomically writes an unsigned 64-bit value, unordered.
3648 *
3649 * @param pu64 Pointer to the 64-bit variable to write.
3650 * @param u64 The 64-bit value to assign to *pu64.
3651 */
3652DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3653{
3654 Assert(!((uintptr_t)pu64 & 7));
3655#if ARCH_BITS == 64
3656 *pu64 = u64;
3657#else
3658 ASMAtomicXchgU64(pu64, u64);
3659#endif
3660}
3661
3662
3663/**
3664 * Atomically writes a signed 64-bit value, ordered.
3665 *
3666 * @param pi64 Pointer to the 64-bit variable to write.
3667 * @param i64 The 64-bit value to assign to *pi64.
3668 */
3669DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3670{
3671 /** @todo Any possible ARM32/ARM64 optimizations here? */
3672 ASMAtomicXchgS64(pi64, i64);
3673}
3674
3675
3676/**
3677 * Atomically writes a signed 64-bit value, unordered.
3678 *
3679 * @param pi64 Pointer to the 64-bit variable to write.
3680 * @param i64 The 64-bit value to assign to *pi64.
3681 */
3682DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3683{
3684 Assert(!((uintptr_t)pi64 & 7));
3685#if ARCH_BITS == 64
3686 *pi64 = i64;
3687#else
3688 ASMAtomicXchgS64(pi64, i64);
3689#endif
3690}
3691
3692
3693/**
3694 * Atomically writes a size_t value, ordered.
3695 *
3696 * @returns nothing.
3697 * @param pcb Pointer to the size_t variable to write.
3698 * @param cb The value to assign to *pcb.
3699 */
3700DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3701{
3702#if ARCH_BITS == 64
3703 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3704#elif ARCH_BITS == 32
3705 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3706#elif ARCH_BITS == 16
3707 AssertCompileSize(size_t, 2);
3708 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3709#else
3710# error "Unsupported ARCH_BITS value"
3711#endif
3712}
3713
3714
3715/**
3716 * Atomically writes a size_t value, unordered.
3717 *
3718 * @returns nothing.
3719 * @param pcb Pointer to the size_t variable to write.
3720 * @param cb The value to assign to *pcb.
3721 */
3722DECLINLINE(void) ASMAtomicUoWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3723{
3724#if ARCH_BITS == 64
3725 ASMAtomicUoWriteU64((uint64_t volatile *)pcb, cb);
3726#elif ARCH_BITS == 32
3727 ASMAtomicUoWriteU32((uint32_t volatile *)pcb, cb);
3728#elif ARCH_BITS == 16
3729 AssertCompileSize(size_t, 2);
3730 ASMAtomicUoWriteU16((uint16_t volatile *)pcb, cb);
3731#else
3732# error "Unsupported ARCH_BITS value"
3733#endif
3734}
3735
3736
3737/**
3738 * Atomically writes a boolean value, unordered.
3739 *
3740 * @param pf Pointer to the boolean variable to write.
3741 * @param f The boolean value to assign to *pf.
3742 */
3743DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3744{
3745 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3746}
3747
3748
3749/**
3750 * Atomically writes a boolean value, unordered.
3751 *
3752 * @param pf Pointer to the boolean variable to write.
3753 * @param f The boolean value to assign to *pf.
3754 */
3755DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3756{
3757 *pf = f; /* byte writes are atomic on x86 */
3758}
3759
3760
3761/**
3762 * Atomically writes a pointer value, ordered.
3763 *
3764 * @param ppv Pointer to the pointer variable to write.
3765 * @param pv The pointer value to assign to *ppv.
3766 */
3767DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3768{
3769#if ARCH_BITS == 32 || ARCH_BITS == 16
3770 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3771#elif ARCH_BITS == 64
3772 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3773#else
3774# error "ARCH_BITS is bogus"
3775#endif
3776}
3777
3778
3779/**
3780 * Atomically writes a pointer value, unordered.
3781 *
3782 * @param ppv Pointer to the pointer variable to write.
3783 * @param pv The pointer value to assign to *ppv.
3784 */
3785DECLINLINE(void) ASMAtomicUoWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3786{
3787#if ARCH_BITS == 32 || ARCH_BITS == 16
3788 ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3789#elif ARCH_BITS == 64
3790 ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3791#else
3792# error "ARCH_BITS is bogus"
3793#endif
3794}
3795
3796
3797/**
3798 * Atomically writes a pointer value, ordered.
3799 *
3800 * @param ppv Pointer to the pointer variable to write.
3801 * @param pv The pointer value to assign to *ppv. If NULL use
3802 * ASMAtomicWriteNullPtr or you'll land in trouble.
3803 *
3804 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3805 * NULL.
3806 */
3807#ifdef __GNUC__
3808# define ASMAtomicWritePtr(ppv, pv) \
3809 do \
3810 { \
3811 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3812 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3813 \
3814 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3815 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3816 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3817 \
3818 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3819 } while (0)
3820#else
3821# define ASMAtomicWritePtr(ppv, pv) \
3822 do \
3823 { \
3824 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3825 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3826 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3827 \
3828 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3829 } while (0)
3830#endif
3831
3832
3833/**
3834 * Atomically sets a pointer to NULL, ordered.
3835 *
3836 * @param ppv Pointer to the pointer variable that should be set to NULL.
3837 *
3838 * @remarks This is relatively type safe on GCC platforms.
3839 */
3840#if RT_GNUC_PREREQ(4, 2)
3841# define ASMAtomicWriteNullPtr(ppv) \
3842 do \
3843 { \
3844 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3845 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3846 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3847 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3848 } while (0)
3849#else
3850# define ASMAtomicWriteNullPtr(ppv) \
3851 do \
3852 { \
3853 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3854 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3855 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3856 } while (0)
3857#endif
3858
3859
3860/**
3861 * Atomically writes a pointer value, unordered.
3862 *
3863 * @returns Current *pv value
3864 * @param ppv Pointer to the pointer variable.
3865 * @param pv The pointer value to assign to *ppv. If NULL use
3866 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3867 *
3868 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3869 * NULL.
3870 */
3871#if RT_GNUC_PREREQ(4, 2)
3872# define ASMAtomicUoWritePtr(ppv, pv) \
3873 do \
3874 { \
3875 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3876 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3877 \
3878 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3879 AssertCompile(sizeof(pv) == sizeof(void *)); \
3880 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3881 \
3882 *(ppvTypeChecked) = pvTypeChecked; \
3883 } while (0)
3884#else
3885# define ASMAtomicUoWritePtr(ppv, pv) \
3886 do \
3887 { \
3888 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3889 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3890 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3891 *(ppv) = pv; \
3892 } while (0)
3893#endif
3894
3895
3896/**
3897 * Atomically sets a pointer to NULL, unordered.
3898 *
3899 * @param ppv Pointer to the pointer variable that should be set to NULL.
3900 *
3901 * @remarks This is relatively type safe on GCC platforms.
3902 */
3903#ifdef __GNUC__
3904# define ASMAtomicUoWriteNullPtr(ppv) \
3905 do \
3906 { \
3907 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3908 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3909 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3910 *(ppvTypeChecked) = NULL; \
3911 } while (0)
3912#else
3913# define ASMAtomicUoWriteNullPtr(ppv) \
3914 do \
3915 { \
3916 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3917 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3918 *(ppv) = NULL; \
3919 } while (0)
3920#endif
3921
3922
3923/**
3924 * Atomically write a typical IPRT handle value, ordered.
3925 *
3926 * @param ph Pointer to the variable to update.
3927 * @param hNew The value to assign to *ph.
3928 *
3929 * @remarks This doesn't currently work for all handles (like RTFILE).
3930 */
3931#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3932# define ASMAtomicWriteHandle(ph, hNew) \
3933 do { \
3934 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3935 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3936 } while (0)
3937#elif HC_ARCH_BITS == 64
3938# define ASMAtomicWriteHandle(ph, hNew) \
3939 do { \
3940 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3941 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3942 } while (0)
3943#else
3944# error HC_ARCH_BITS
3945#endif
3946
3947
3948/**
3949 * Atomically write a typical IPRT handle value, unordered.
3950 *
3951 * @param ph Pointer to the variable to update.
3952 * @param hNew The value to assign to *ph.
3953 *
3954 * @remarks This doesn't currently work for all handles (like RTFILE).
3955 */
3956#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3957# define ASMAtomicUoWriteHandle(ph, hNew) \
3958 do { \
3959 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3960 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3961 } while (0)
3962#elif HC_ARCH_BITS == 64
3963# define ASMAtomicUoWriteHandle(ph, hNew) \
3964 do { \
3965 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3966 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
3967 } while (0)
3968#else
3969# error HC_ARCH_BITS
3970#endif
3971
3972
3973/**
3974 * Atomically write a value which size might differ
3975 * between platforms or compilers, ordered.
3976 *
3977 * @param pu Pointer to the variable to update.
3978 * @param uNew The value to assign to *pu.
3979 */
3980#define ASMAtomicWriteSize(pu, uNew) \
3981 do { \
3982 switch (sizeof(*(pu))) { \
3983 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3984 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3985 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3986 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3987 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3988 } \
3989 } while (0)
3990
3991/**
3992 * Atomically write a value which size might differ
3993 * between platforms or compilers, unordered.
3994 *
3995 * @param pu Pointer to the variable to update.
3996 * @param uNew The value to assign to *pu.
3997 */
3998#define ASMAtomicUoWriteSize(pu, uNew) \
3999 do { \
4000 switch (sizeof(*(pu))) { \
4001 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
4002 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
4003 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4004 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4005 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4006 } \
4007 } while (0)
4008
4009
4010
4011/**
4012 * Atomically exchanges and adds to a 16-bit value, ordered.
4013 *
4014 * @returns The old value.
4015 * @param pu16 Pointer to the value.
4016 * @param u16 Number to add.
4017 *
4018 * @remarks Currently not implemented, just to make 16-bit code happy.
4019 * @remarks x86: Requires a 486 or later.
4020 */
4021RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
4022
4023
4024/**
4025 * Atomically exchanges and adds to a 32-bit value, ordered.
4026 *
4027 * @returns The old value.
4028 * @param pu32 Pointer to the value.
4029 * @param u32 Number to add.
4030 *
4031 * @remarks x86: Requires a 486 or later.
4032 */
4033#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4034RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4035#else
4036DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4037{
4038# if RT_INLINE_ASM_USES_INTRIN
4039 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
4040 return u32;
4041
4042# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4043# if RT_INLINE_ASM_GNU_STYLE
4044 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4045 : "=r" (u32)
4046 , "=m" (*pu32)
4047 : "0" (u32)
4048 , "m" (*pu32)
4049 : "memory"
4050 , "cc");
4051 return u32;
4052# else
4053 __asm
4054 {
4055 mov eax, [u32]
4056# ifdef RT_ARCH_AMD64
4057 mov rdx, [pu32]
4058 lock xadd [rdx], eax
4059# else
4060 mov edx, [pu32]
4061 lock xadd [edx], eax
4062# endif
4063 mov [u32], eax
4064 }
4065 return u32;
4066# endif
4067
4068# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4069 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
4070 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
4071 "add %[uNew], %[uOld], %[uVal]\n\t",
4072 [uVal] "r" (u32));
4073 return u32OldRet;
4074
4075# else
4076# error "Port me"
4077# endif
4078}
4079#endif
4080
4081
4082/**
4083 * Atomically exchanges and adds to a signed 32-bit value, ordered.
4084 *
4085 * @returns The old value.
4086 * @param pi32 Pointer to the value.
4087 * @param i32 Number to add.
4088 *
4089 * @remarks x86: Requires a 486 or later.
4090 */
4091DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4092{
4093 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4094}
4095
4096
4097/**
4098 * Atomically exchanges and adds to a 64-bit value, ordered.
4099 *
4100 * @returns The old value.
4101 * @param pu64 Pointer to the value.
4102 * @param u64 Number to add.
4103 *
4104 * @remarks x86: Requires a Pentium or later.
4105 */
4106#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4107DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4108#else
4109DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4110{
4111# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4112 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
4113 return u64;
4114
4115# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4116 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4117 : "=r" (u64)
4118 , "=m" (*pu64)
4119 : "0" (u64)
4120 , "m" (*pu64)
4121 : "memory"
4122 , "cc");
4123 return u64;
4124
4125# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4126 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
4127 "add %[uNew], %[uOld], %[uVal]\n\t"
4128 ,
4129 "add %[uNew], %[uOld], %[uVal]\n\t"
4130 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
4131 [uVal] "r" (u64));
4132 return u64OldRet;
4133
4134# else
4135 uint64_t u64Old;
4136 for (;;)
4137 {
4138 uint64_t u64New;
4139 u64Old = ASMAtomicUoReadU64(pu64);
4140 u64New = u64Old + u64;
4141 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4142 break;
4143 ASMNopPause();
4144 }
4145 return u64Old;
4146# endif
4147}
4148#endif
4149
4150
4151/**
4152 * Atomically exchanges and adds to a signed 64-bit value, ordered.
4153 *
4154 * @returns The old value.
4155 * @param pi64 Pointer to the value.
4156 * @param i64 Number to add.
4157 *
4158 * @remarks x86: Requires a Pentium or later.
4159 */
4160DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4161{
4162 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4163}
4164
4165
4166/**
4167 * Atomically exchanges and adds to a size_t value, ordered.
4168 *
4169 * @returns The old value.
4170 * @param pcb Pointer to the size_t value.
4171 * @param cb Number to add.
4172 */
4173DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4174{
4175#if ARCH_BITS == 64
4176 AssertCompileSize(size_t, 8);
4177 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
4178#elif ARCH_BITS == 32
4179 AssertCompileSize(size_t, 4);
4180 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
4181#elif ARCH_BITS == 16
4182 AssertCompileSize(size_t, 2);
4183 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
4184#else
4185# error "Unsupported ARCH_BITS value"
4186#endif
4187}
4188
4189
4190/**
4191 * Atomically exchanges and adds a value which size might differ between
4192 * platforms or compilers, ordered.
4193 *
4194 * @param pu Pointer to the variable to update.
4195 * @param uNew The value to add to *pu.
4196 * @param puOld Where to store the old value.
4197 */
4198#define ASMAtomicAddSize(pu, uNew, puOld) \
4199 do { \
4200 switch (sizeof(*(pu))) { \
4201 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4202 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4203 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
4204 } \
4205 } while (0)
4206
4207
4208
4209/**
4210 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
4211 *
4212 * @returns The old value.
4213 * @param pu16 Pointer to the value.
4214 * @param u16 Number to subtract.
4215 *
4216 * @remarks x86: Requires a 486 or later.
4217 */
4218DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
4219{
4220 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
4221}
4222
4223
4224/**
4225 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
4226 *
4227 * @returns The old value.
4228 * @param pi16 Pointer to the value.
4229 * @param i16 Number to subtract.
4230 *
4231 * @remarks x86: Requires a 486 or later.
4232 */
4233DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
4234{
4235 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
4236}
4237
4238
4239/**
4240 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
4241 *
4242 * @returns The old value.
4243 * @param pu32 Pointer to the value.
4244 * @param u32 Number to subtract.
4245 *
4246 * @remarks x86: Requires a 486 or later.
4247 */
4248DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4249{
4250 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
4251}
4252
4253
4254/**
4255 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
4256 *
4257 * @returns The old value.
4258 * @param pi32 Pointer to the value.
4259 * @param i32 Number to subtract.
4260 *
4261 * @remarks x86: Requires a 486 or later.
4262 */
4263DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4264{
4265 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
4266}
4267
4268
4269/**
4270 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
4271 *
4272 * @returns The old value.
4273 * @param pu64 Pointer to the value.
4274 * @param u64 Number to subtract.
4275 *
4276 * @remarks x86: Requires a Pentium or later.
4277 */
4278DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4279{
4280 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
4281}
4282
4283
4284/**
4285 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
4286 *
4287 * @returns The old value.
4288 * @param pi64 Pointer to the value.
4289 * @param i64 Number to subtract.
4290 *
4291 * @remarks x86: Requires a Pentium or later.
4292 */
4293DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4294{
4295 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
4296}
4297
4298
4299/**
4300 * Atomically exchanges and subtracts to a size_t value, ordered.
4301 *
4302 * @returns The old value.
4303 * @param pcb Pointer to the size_t value.
4304 * @param cb Number to subtract.
4305 *
4306 * @remarks x86: Requires a 486 or later.
4307 */
4308DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4309{
4310#if ARCH_BITS == 64
4311 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
4312#elif ARCH_BITS == 32
4313 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
4314#elif ARCH_BITS == 16
4315 AssertCompileSize(size_t, 2);
4316 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
4317#else
4318# error "Unsupported ARCH_BITS value"
4319#endif
4320}
4321
4322
4323/**
4324 * Atomically exchanges and subtracts a value which size might differ between
4325 * platforms or compilers, ordered.
4326 *
4327 * @param pu Pointer to the variable to update.
4328 * @param uNew The value to subtract to *pu.
4329 * @param puOld Where to store the old value.
4330 *
4331 * @remarks x86: Requires a 486 or later.
4332 */
4333#define ASMAtomicSubSize(pu, uNew, puOld) \
4334 do { \
4335 switch (sizeof(*(pu))) { \
4336 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4337 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4338 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
4339 } \
4340 } while (0)
4341
4342
4343
4344/**
4345 * Atomically increment a 16-bit value, ordered.
4346 *
4347 * @returns The new value.
4348 * @param pu16 Pointer to the value to increment.
4349 * @remarks Not implemented. Just to make 16-bit code happy.
4350 *
4351 * @remarks x86: Requires a 486 or later.
4352 */
4353RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4354
4355
4356/**
4357 * Atomically increment a 32-bit value, ordered.
4358 *
4359 * @returns The new value.
4360 * @param pu32 Pointer to the value to increment.
4361 *
4362 * @remarks x86: Requires a 486 or later.
4363 */
4364#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4365RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4366#else
4367DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4368{
4369# if RT_INLINE_ASM_USES_INTRIN
4370 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
4371
4372# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4373# if RT_INLINE_ASM_GNU_STYLE
4374 uint32_t u32;
4375 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4376 : "=r" (u32)
4377 , "=m" (*pu32)
4378 : "0" (1)
4379 , "m" (*pu32)
4380 : "memory"
4381 , "cc");
4382 return u32+1;
4383# else
4384 __asm
4385 {
4386 mov eax, 1
4387# ifdef RT_ARCH_AMD64
4388 mov rdx, [pu32]
4389 lock xadd [rdx], eax
4390# else
4391 mov edx, [pu32]
4392 lock xadd [edx], eax
4393# endif
4394 mov u32, eax
4395 }
4396 return u32+1;
4397# endif
4398
4399# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4400 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
4401 "add %w[uNew], %w[uNew], #1\n\t",
4402 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4403 "X" (0) /* dummy */);
4404 return u32NewRet;
4405
4406# else
4407 return ASMAtomicAddU32(pu32, 1) + 1;
4408# endif
4409}
4410#endif
4411
4412
4413/**
4414 * Atomically increment a signed 32-bit value, ordered.
4415 *
4416 * @returns The new value.
4417 * @param pi32 Pointer to the value to increment.
4418 *
4419 * @remarks x86: Requires a 486 or later.
4420 */
4421DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4422{
4423 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
4424}
4425
4426
4427/**
4428 * Atomically increment a 64-bit value, ordered.
4429 *
4430 * @returns The new value.
4431 * @param pu64 Pointer to the value to increment.
4432 *
4433 * @remarks x86: Requires a Pentium or later.
4434 */
4435#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4436DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4437#else
4438DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4439{
4440# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4441 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
4442
4443# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4444 uint64_t u64;
4445 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4446 : "=r" (u64)
4447 , "=m" (*pu64)
4448 : "0" (1)
4449 , "m" (*pu64)
4450 : "memory"
4451 , "cc");
4452 return u64 + 1;
4453
4454# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4455 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
4456 "add %[uNew], %[uNew], #1\n\t"
4457 ,
4458 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4459 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4460 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4461 return u64NewRet;
4462
4463# else
4464 return ASMAtomicAddU64(pu64, 1) + 1;
4465# endif
4466}
4467#endif
4468
4469
4470/**
4471 * Atomically increment a signed 64-bit value, ordered.
4472 *
4473 * @returns The new value.
4474 * @param pi64 Pointer to the value to increment.
4475 *
4476 * @remarks x86: Requires a Pentium or later.
4477 */
4478DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4479{
4480 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
4481}
4482
4483
4484/**
4485 * Atomically increment a size_t value, ordered.
4486 *
4487 * @returns The new value.
4488 * @param pcb Pointer to the value to increment.
4489 *
4490 * @remarks x86: Requires a 486 or later.
4491 */
4492DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4493{
4494#if ARCH_BITS == 64
4495 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
4496#elif ARCH_BITS == 32
4497 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
4498#elif ARCH_BITS == 16
4499 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
4500#else
4501# error "Unsupported ARCH_BITS value"
4502#endif
4503}
4504
4505
4506
4507/**
4508 * Atomically decrement an unsigned 32-bit value, ordered.
4509 *
4510 * @returns The new value.
4511 * @param pu16 Pointer to the value to decrement.
4512 * @remarks Not implemented. Just to make 16-bit code happy.
4513 *
4514 * @remarks x86: Requires a 486 or later.
4515 */
4516RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4517
4518
4519/**
4520 * Atomically decrement an unsigned 32-bit value, ordered.
4521 *
4522 * @returns The new value.
4523 * @param pu32 Pointer to the value to decrement.
4524 *
4525 * @remarks x86: Requires a 486 or later.
4526 */
4527#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4528RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4529#else
4530DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4531{
4532# if RT_INLINE_ASM_USES_INTRIN
4533 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4534
4535# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4536# if RT_INLINE_ASM_GNU_STYLE
4537 uint32_t u32;
4538 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4539 : "=r" (u32)
4540 , "=m" (*pu32)
4541 : "0" (-1)
4542 , "m" (*pu32)
4543 : "memory"
4544 , "cc");
4545 return u32-1;
4546# else
4547 uint32_t u32;
4548 __asm
4549 {
4550 mov eax, -1
4551# ifdef RT_ARCH_AMD64
4552 mov rdx, [pu32]
4553 lock xadd [rdx], eax
4554# else
4555 mov edx, [pu32]
4556 lock xadd [edx], eax
4557# endif
4558 mov u32, eax
4559 }
4560 return u32-1;
4561# endif
4562
4563# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4564 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4565 "sub %w[uNew], %w[uNew], #1\n\t",
4566 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4567 "X" (0) /* dummy */);
4568 return u32NewRet;
4569
4570# else
4571 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4572# endif
4573}
4574#endif
4575
4576
4577/**
4578 * Atomically decrement a signed 32-bit value, ordered.
4579 *
4580 * @returns The new value.
4581 * @param pi32 Pointer to the value to decrement.
4582 *
4583 * @remarks x86: Requires a 486 or later.
4584 */
4585DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4586{
4587 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4588}
4589
4590
4591/**
4592 * Atomically decrement an unsigned 64-bit value, ordered.
4593 *
4594 * @returns The new value.
4595 * @param pu64 Pointer to the value to decrement.
4596 *
4597 * @remarks x86: Requires a Pentium or later.
4598 */
4599#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4600RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4601#else
4602DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4603{
4604# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4605 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4606
4607# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4608 uint64_t u64;
4609 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4610 : "=r" (u64)
4611 , "=m" (*pu64)
4612 : "0" (~(uint64_t)0)
4613 , "m" (*pu64)
4614 : "memory"
4615 , "cc");
4616 return u64-1;
4617
4618# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4619 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4620 "sub %[uNew], %[uNew], #1\n\t"
4621 ,
4622 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4623 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4624 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4625 return u64NewRet;
4626
4627# else
4628 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4629# endif
4630}
4631#endif
4632
4633
4634/**
4635 * Atomically decrement a signed 64-bit value, ordered.
4636 *
4637 * @returns The new value.
4638 * @param pi64 Pointer to the value to decrement.
4639 *
4640 * @remarks x86: Requires a Pentium or later.
4641 */
4642DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4643{
4644 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4645}
4646
4647
4648/**
4649 * Atomically decrement a size_t value, ordered.
4650 *
4651 * @returns The new value.
4652 * @param pcb Pointer to the value to decrement.
4653 *
4654 * @remarks x86: Requires a 486 or later.
4655 */
4656DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4657{
4658#if ARCH_BITS == 64
4659 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4660#elif ARCH_BITS == 32
4661 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4662#elif ARCH_BITS == 16
4663 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4664#else
4665# error "Unsupported ARCH_BITS value"
4666#endif
4667}
4668
4669
4670/**
4671 * Atomically Or an unsigned 32-bit value, ordered.
4672 *
4673 * @param pu32 Pointer to the pointer variable to OR u32 with.
4674 * @param u32 The value to OR *pu32 with.
4675 *
4676 * @remarks x86: Requires a 386 or later.
4677 */
4678#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4679RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4680#else
4681DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4682{
4683# if RT_INLINE_ASM_USES_INTRIN
4684 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4685
4686# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4687# if RT_INLINE_ASM_GNU_STYLE
4688 __asm__ __volatile__("lock; orl %1, %0\n\t"
4689 : "=m" (*pu32)
4690 : "ir" (u32)
4691 , "m" (*pu32)
4692 : "cc");
4693# else
4694 __asm
4695 {
4696 mov eax, [u32]
4697# ifdef RT_ARCH_AMD64
4698 mov rdx, [pu32]
4699 lock or [rdx], eax
4700# else
4701 mov edx, [pu32]
4702 lock or [edx], eax
4703# endif
4704 }
4705# endif
4706
4707# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4708 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4709 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4710 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4711 "orr %[uNew], %[uNew], %[uVal]\n\t",
4712 [uVal] "r" (u32));
4713
4714# else
4715# error "Port me"
4716# endif
4717}
4718#endif
4719
4720
4721/**
4722 * Atomically OR an unsigned 32-bit value, ordered, extended version (for bitmap
4723 * fallback).
4724 *
4725 * @returns Old value.
4726 * @param pu32 Pointer to the variable to OR @a u32 with.
4727 * @param u32 The value to OR @a *pu32 with.
4728 */
4729DECLINLINE(uint32_t) ASMAtomicOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4730{
4731#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4732 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicOrEx32, pu32, DMB_SY,
4733 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4734 "orr %[uNew], %[uOld], %[uVal]\n\t",
4735 [uVal] "r" (u32));
4736 return u32OldRet;
4737
4738#else
4739 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4740 uint32_t u32New;
4741 do
4742 u32New = u32RetOld | u32;
4743 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4744 return u32RetOld;
4745#endif
4746}
4747
4748
4749/**
4750 * Atomically Or a signed 32-bit value, ordered.
4751 *
4752 * @param pi32 Pointer to the pointer variable to OR u32 with.
4753 * @param i32 The value to OR *pu32 with.
4754 *
4755 * @remarks x86: Requires a 386 or later.
4756 */
4757DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4758{
4759 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4760}
4761
4762
4763/**
4764 * Atomically Or an unsigned 64-bit value, ordered.
4765 *
4766 * @param pu64 Pointer to the pointer variable to OR u64 with.
4767 * @param u64 The value to OR *pu64 with.
4768 *
4769 * @remarks x86: Requires a Pentium or later.
4770 */
4771#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4772DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4773#else
4774DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4775{
4776# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4777 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4778
4779# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4780 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4781 : "=m" (*pu64)
4782 : "r" (u64)
4783 , "m" (*pu64)
4784 : "cc");
4785
4786# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4787 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4788 "orr %[uNew], %[uNew], %[uVal]\n\t"
4789 ,
4790 "orr %[uNew], %[uNew], %[uVal]\n\t"
4791 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4792 [uVal] "r" (u64));
4793
4794# else
4795 for (;;)
4796 {
4797 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4798 uint64_t u64New = u64Old | u64;
4799 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4800 break;
4801 ASMNopPause();
4802 }
4803# endif
4804}
4805#endif
4806
4807
4808/**
4809 * Atomically Or a signed 64-bit value, ordered.
4810 *
4811 * @param pi64 Pointer to the pointer variable to OR u64 with.
4812 * @param i64 The value to OR *pu64 with.
4813 *
4814 * @remarks x86: Requires a Pentium or later.
4815 */
4816DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4817{
4818 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4819}
4820
4821
4822/**
4823 * Atomically And an unsigned 32-bit value, ordered.
4824 *
4825 * @param pu32 Pointer to the pointer variable to AND u32 with.
4826 * @param u32 The value to AND *pu32 with.
4827 *
4828 * @remarks x86: Requires a 386 or later.
4829 */
4830#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4831RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4832#else
4833DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4834{
4835# if RT_INLINE_ASM_USES_INTRIN
4836 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4837
4838# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4839# if RT_INLINE_ASM_GNU_STYLE
4840 __asm__ __volatile__("lock; andl %1, %0\n\t"
4841 : "=m" (*pu32)
4842 : "ir" (u32)
4843 , "m" (*pu32)
4844 : "cc");
4845# else
4846 __asm
4847 {
4848 mov eax, [u32]
4849# ifdef RT_ARCH_AMD64
4850 mov rdx, [pu32]
4851 lock and [rdx], eax
4852# else
4853 mov edx, [pu32]
4854 lock and [edx], eax
4855# endif
4856 }
4857# endif
4858
4859# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4860 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4861 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4862 "and %[uNew], %[uNew], %[uVal]\n\t",
4863 [uVal] "r" (u32));
4864
4865# else
4866# error "Port me"
4867# endif
4868}
4869#endif
4870
4871
4872/**
4873 * Atomically AND an unsigned 32-bit value, ordered, extended version.
4874 *
4875 * @returns Old value.
4876 * @param pu32 Pointer to the variable to AND @a u32 with.
4877 * @param u32 The value to AND @a *pu32 with.
4878 */
4879DECLINLINE(uint32_t) ASMAtomicAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4880{
4881#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4882 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAndEx32, pu32, DMB_SY,
4883 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4884 "and %[uNew], %[uOld], %[uVal]\n\t",
4885 [uVal] "r" (u32));
4886 return u32OldRet;
4887
4888#else
4889 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4890 uint32_t u32New;
4891 do
4892 u32New = u32RetOld & u32;
4893 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4894 return u32RetOld;
4895#endif
4896}
4897
4898
4899/**
4900 * Atomically And a signed 32-bit value, ordered.
4901 *
4902 * @param pi32 Pointer to the pointer variable to AND i32 with.
4903 * @param i32 The value to AND *pi32 with.
4904 *
4905 * @remarks x86: Requires a 386 or later.
4906 */
4907DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4908{
4909 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4910}
4911
4912
4913/**
4914 * Atomically And an unsigned 64-bit value, ordered.
4915 *
4916 * @param pu64 Pointer to the pointer variable to AND u64 with.
4917 * @param u64 The value to AND *pu64 with.
4918 *
4919 * @remarks x86: Requires a Pentium or later.
4920 */
4921#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4922DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4923#else
4924DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4925{
4926# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4927 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4928
4929# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4930 __asm__ __volatile__("lock; andq %1, %0\n\t"
4931 : "=m" (*pu64)
4932 : "r" (u64)
4933 , "m" (*pu64)
4934 : "cc");
4935
4936# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4937 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
4938 "and %[uNew], %[uNew], %[uVal]\n\t"
4939 ,
4940 "and %[uNew], %[uNew], %[uVal]\n\t"
4941 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4942 [uVal] "r" (u64));
4943
4944# else
4945 for (;;)
4946 {
4947 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4948 uint64_t u64New = u64Old & u64;
4949 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4950 break;
4951 ASMNopPause();
4952 }
4953# endif
4954}
4955#endif
4956
4957
4958/**
4959 * Atomically And a signed 64-bit value, ordered.
4960 *
4961 * @param pi64 Pointer to the pointer variable to AND i64 with.
4962 * @param i64 The value to AND *pi64 with.
4963 *
4964 * @remarks x86: Requires a Pentium or later.
4965 */
4966DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4967{
4968 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4969}
4970
4971
4972/**
4973 * Atomically XOR an unsigned 32-bit value and a memory location, ordered.
4974 *
4975 * @param pu32 Pointer to the variable to XOR @a u32 with.
4976 * @param u32 The value to XOR @a *pu32 with.
4977 *
4978 * @remarks x86: Requires a 386 or later.
4979 */
4980#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4981RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4982#else
4983DECLINLINE(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4984{
4985# if RT_INLINE_ASM_USES_INTRIN
4986 _InterlockedXor((long volatile RT_FAR *)pu32, u32);
4987
4988# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4989# if RT_INLINE_ASM_GNU_STYLE
4990 __asm__ __volatile__("lock; xorl %1, %0\n\t"
4991 : "=m" (*pu32)
4992 : "ir" (u32)
4993 , "m" (*pu32)
4994 : "cc");
4995# else
4996 __asm
4997 {
4998 mov eax, [u32]
4999# ifdef RT_ARCH_AMD64
5000 mov rdx, [pu32]
5001 lock xor [rdx], eax
5002# else
5003 mov edx, [pu32]
5004 lock xor [edx], eax
5005# endif
5006 }
5007# endif
5008
5009# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5010 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicXor32, pu32, DMB_SY,
5011 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5012 "eor %[uNew], %[uNew], %[uVal]\n\t",
5013 [uVal] "r" (u32));
5014
5015# else
5016# error "Port me"
5017# endif
5018}
5019#endif
5020
5021
5022/**
5023 * Atomically XOR an unsigned 32-bit value and a memory location, ordered,
5024 * extended version (for bitmaps).
5025 *
5026 * @returns Old value.
5027 * @param pu32 Pointer to the variable to XOR @a u32 with.
5028 * @param u32 The value to XOR @a *pu32 with.
5029 */
5030DECLINLINE(uint32_t) ASMAtomicXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5031{
5032#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5033 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicXorEx32, pu32, DMB_SY,
5034 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5035 "eor %[uNew], %[uOld], %[uVal]\n\t",
5036 [uVal] "r" (u32));
5037 return u32OldRet;
5038
5039#else
5040 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
5041 uint32_t u32New;
5042 do
5043 u32New = u32RetOld ^ u32;
5044 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
5045 return u32RetOld;
5046#endif
5047}
5048
5049
5050/**
5051 * Atomically XOR a signed 32-bit value, ordered.
5052 *
5053 * @param pi32 Pointer to the variable to XOR i32 with.
5054 * @param i32 The value to XOR *pi32 with.
5055 *
5056 * @remarks x86: Requires a 386 or later.
5057 */
5058DECLINLINE(void) ASMAtomicXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5059{
5060 ASMAtomicXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5061}
5062
5063
5064/**
5065 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
5066 *
5067 * @param pu32 Pointer to the pointer variable to OR u32 with.
5068 * @param u32 The value to OR *pu32 with.
5069 *
5070 * @remarks x86: Requires a 386 or later.
5071 */
5072#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5073RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5074#else
5075DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5076{
5077# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5078# if RT_INLINE_ASM_GNU_STYLE
5079 __asm__ __volatile__("orl %1, %0\n\t"
5080 : "=m" (*pu32)
5081 : "ir" (u32)
5082 , "m" (*pu32)
5083 : "cc");
5084# else
5085 __asm
5086 {
5087 mov eax, [u32]
5088# ifdef RT_ARCH_AMD64
5089 mov rdx, [pu32]
5090 or [rdx], eax
5091# else
5092 mov edx, [pu32]
5093 or [edx], eax
5094# endif
5095 }
5096# endif
5097
5098# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5099 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
5100 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
5101 "orr %[uNew], %[uNew], %[uVal]\n\t",
5102 [uVal] "r" (u32));
5103
5104# else
5105# error "Port me"
5106# endif
5107}
5108#endif
5109
5110
5111/**
5112 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe,
5113 * extended version (for bitmap fallback).
5114 *
5115 * @returns Old value.
5116 * @param pu32 Pointer to the variable to OR @a u32 with.
5117 * @param u32 The value to OR @a *pu32 with.
5118 */
5119DECLINLINE(uint32_t) ASMAtomicUoOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5120{
5121#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5122 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoOrExU32, pu32, NO_BARRIER,
5123 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
5124 "orr %[uNew], %[uOld], %[uVal]\n\t",
5125 [uVal] "r" (u32));
5126 return u32OldRet;
5127
5128#else
5129 return ASMAtomicOrExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5130#endif
5131}
5132
5133
5134/**
5135 * Atomically OR a signed 32-bit value, unordered.
5136 *
5137 * @param pi32 Pointer to the pointer variable to OR u32 with.
5138 * @param i32 The value to OR *pu32 with.
5139 *
5140 * @remarks x86: Requires a 386 or later.
5141 */
5142DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5143{
5144 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5145}
5146
5147
5148/**
5149 * Atomically OR an unsigned 64-bit value, unordered.
5150 *
5151 * @param pu64 Pointer to the pointer variable to OR u64 with.
5152 * @param u64 The value to OR *pu64 with.
5153 *
5154 * @remarks x86: Requires a Pentium or later.
5155 */
5156#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5157DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5158#else
5159DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5160{
5161# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5162 __asm__ __volatile__("orq %1, %q0\n\t"
5163 : "=m" (*pu64)
5164 : "r" (u64)
5165 , "m" (*pu64)
5166 : "cc");
5167
5168# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5169 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
5170 "orr %[uNew], %[uNew], %[uVal]\n\t"
5171 ,
5172 "orr %[uNew], %[uNew], %[uVal]\n\t"
5173 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
5174 [uVal] "r" (u64));
5175
5176# else
5177 for (;;)
5178 {
5179 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5180 uint64_t u64New = u64Old | u64;
5181 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5182 break;
5183 ASMNopPause();
5184 }
5185# endif
5186}
5187#endif
5188
5189
5190/**
5191 * Atomically Or a signed 64-bit value, unordered.
5192 *
5193 * @param pi64 Pointer to the pointer variable to OR u64 with.
5194 * @param i64 The value to OR *pu64 with.
5195 *
5196 * @remarks x86: Requires a Pentium or later.
5197 */
5198DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5199{
5200 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5201}
5202
5203
5204/**
5205 * Atomically And an unsigned 32-bit value, unordered.
5206 *
5207 * @param pu32 Pointer to the pointer variable to AND u32 with.
5208 * @param u32 The value to AND *pu32 with.
5209 *
5210 * @remarks x86: Requires a 386 or later.
5211 */
5212#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5213RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5214#else
5215DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5216{
5217# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5218# if RT_INLINE_ASM_GNU_STYLE
5219 __asm__ __volatile__("andl %1, %0\n\t"
5220 : "=m" (*pu32)
5221 : "ir" (u32)
5222 , "m" (*pu32)
5223 : "cc");
5224# else
5225 __asm
5226 {
5227 mov eax, [u32]
5228# ifdef RT_ARCH_AMD64
5229 mov rdx, [pu32]
5230 and [rdx], eax
5231# else
5232 mov edx, [pu32]
5233 and [edx], eax
5234# endif
5235 }
5236# endif
5237
5238# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5239 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
5240 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
5241 "and %[uNew], %[uNew], %[uVal]\n\t",
5242 [uVal] "r" (u32));
5243
5244# else
5245# error "Port me"
5246# endif
5247}
5248#endif
5249
5250
5251/**
5252 * Atomically AND an unsigned 32-bit value, unordered, extended version (for
5253 * bitmap fallback).
5254 *
5255 * @returns Old value.
5256 * @param pu32 Pointer to the pointer to AND @a u32 with.
5257 * @param u32 The value to AND @a *pu32 with.
5258 */
5259DECLINLINE(uint32_t) ASMAtomicUoAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5260{
5261#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5262 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoAndEx32, pu32, NO_BARRIER,
5263 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
5264 "and %[uNew], %[uOld], %[uVal]\n\t",
5265 [uVal] "r" (u32));
5266 return u32OldRet;
5267
5268#else
5269 return ASMAtomicAndExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5270#endif
5271}
5272
5273
5274/**
5275 * Atomically And a signed 32-bit value, unordered.
5276 *
5277 * @param pi32 Pointer to the pointer variable to AND i32 with.
5278 * @param i32 The value to AND *pi32 with.
5279 *
5280 * @remarks x86: Requires a 386 or later.
5281 */
5282DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5283{
5284 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5285}
5286
5287
5288/**
5289 * Atomically And an unsigned 64-bit value, unordered.
5290 *
5291 * @param pu64 Pointer to the pointer variable to AND u64 with.
5292 * @param u64 The value to AND *pu64 with.
5293 *
5294 * @remarks x86: Requires a Pentium or later.
5295 */
5296#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5297DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5298#else
5299DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5300{
5301# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5302 __asm__ __volatile__("andq %1, %0\n\t"
5303 : "=m" (*pu64)
5304 : "r" (u64)
5305 , "m" (*pu64)
5306 : "cc");
5307
5308# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5309 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
5310 "and %[uNew], %[uNew], %[uVal]\n\t"
5311 ,
5312 "and %[uNew], %[uNew], %[uVal]\n\t"
5313 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
5314 [uVal] "r" (u64));
5315
5316# else
5317 for (;;)
5318 {
5319 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5320 uint64_t u64New = u64Old & u64;
5321 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5322 break;
5323 ASMNopPause();
5324 }
5325# endif
5326}
5327#endif
5328
5329
5330/**
5331 * Atomically And a signed 64-bit value, unordered.
5332 *
5333 * @param pi64 Pointer to the pointer variable to AND i64 with.
5334 * @param i64 The value to AND *pi64 with.
5335 *
5336 * @remarks x86: Requires a Pentium or later.
5337 */
5338DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5339{
5340 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5341}
5342
5343
5344/**
5345 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe.
5346 *
5347 * @param pu32 Pointer to the variable to XOR @a u32 with.
5348 * @param u32 The value to OR @a *pu32 with.
5349 *
5350 * @remarks x86: Requires a 386 or later.
5351 */
5352#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5353RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5354#else
5355DECLINLINE(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5356{
5357# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5358# if RT_INLINE_ASM_GNU_STYLE
5359 __asm__ __volatile__("xorl %1, %0\n\t"
5360 : "=m" (*pu32)
5361 : "ir" (u32)
5362 , "m" (*pu32)
5363 : "cc");
5364# else
5365 __asm
5366 {
5367 mov eax, [u32]
5368# ifdef RT_ARCH_AMD64
5369 mov rdx, [pu32]
5370 xor [rdx], eax
5371# else
5372 mov edx, [pu32]
5373 xor [edx], eax
5374# endif
5375 }
5376# endif
5377
5378# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5379 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoXorU32, pu32, NO_BARRIER,
5380 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5381 "eor %[uNew], %[uNew], %[uVal]\n\t",
5382 [uVal] "r" (u32));
5383
5384# else
5385# error "Port me"
5386# endif
5387}
5388#endif
5389
5390
5391/**
5392 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe,
5393 * extended version (for bitmap fallback).
5394 *
5395 * @returns Old value.
5396 * @param pu32 Pointer to the variable to XOR @a u32 with.
5397 * @param u32 The value to OR @a *pu32 with.
5398 */
5399DECLINLINE(uint32_t) ASMAtomicUoXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5400{
5401#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5402 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoXorExU32, pu32, NO_BARRIER,
5403 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5404 "eor %[uNew], %[uOld], %[uVal]\n\t",
5405 [uVal] "r" (u32));
5406 return u32OldRet;
5407
5408#else
5409 return ASMAtomicXorExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5410#endif
5411}
5412
5413
5414/**
5415 * Atomically XOR a signed 32-bit value, unordered.
5416 *
5417 * @param pi32 Pointer to the variable to XOR @a u32 with.
5418 * @param i32 The value to XOR @a *pu32 with.
5419 *
5420 * @remarks x86: Requires a 386 or later.
5421 */
5422DECLINLINE(void) ASMAtomicUoXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5423{
5424 ASMAtomicUoXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5425}
5426
5427
5428/**
5429 * Atomically increment an unsigned 32-bit value, unordered.
5430 *
5431 * @returns the new value.
5432 * @param pu32 Pointer to the variable to increment.
5433 *
5434 * @remarks x86: Requires a 486 or later.
5435 */
5436#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5437RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5438#else
5439DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5440{
5441# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5442 uint32_t u32;
5443# if RT_INLINE_ASM_GNU_STYLE
5444 __asm__ __volatile__("xaddl %0, %1\n\t"
5445 : "=r" (u32)
5446 , "=m" (*pu32)
5447 : "0" (1)
5448 , "m" (*pu32)
5449 : "memory" /** @todo why 'memory'? */
5450 , "cc");
5451 return u32 + 1;
5452# else
5453 __asm
5454 {
5455 mov eax, 1
5456# ifdef RT_ARCH_AMD64
5457 mov rdx, [pu32]
5458 xadd [rdx], eax
5459# else
5460 mov edx, [pu32]
5461 xadd [edx], eax
5462# endif
5463 mov u32, eax
5464 }
5465 return u32 + 1;
5466# endif
5467
5468# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5469 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
5470 "add %w[uNew], %w[uNew], #1\n\t",
5471 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5472 "X" (0) /* dummy */);
5473 return u32NewRet;
5474
5475# else
5476# error "Port me"
5477# endif
5478}
5479#endif
5480
5481
5482/**
5483 * Atomically decrement an unsigned 32-bit value, unordered.
5484 *
5485 * @returns the new value.
5486 * @param pu32 Pointer to the variable to decrement.
5487 *
5488 * @remarks x86: Requires a 486 or later.
5489 */
5490#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5491RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5492#else
5493DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5494{
5495# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5496 uint32_t u32;
5497# if RT_INLINE_ASM_GNU_STYLE
5498 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
5499 : "=r" (u32)
5500 , "=m" (*pu32)
5501 : "0" (-1)
5502 , "m" (*pu32)
5503 : "memory"
5504 , "cc");
5505 return u32 - 1;
5506# else
5507 __asm
5508 {
5509 mov eax, -1
5510# ifdef RT_ARCH_AMD64
5511 mov rdx, [pu32]
5512 xadd [rdx], eax
5513# else
5514 mov edx, [pu32]
5515 xadd [edx], eax
5516# endif
5517 mov u32, eax
5518 }
5519 return u32 - 1;
5520# endif
5521
5522# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5523 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
5524 "sub %w[uNew], %w[uNew], #1\n\t",
5525 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5526 "X" (0) /* dummy */);
5527 return u32NewRet;
5528
5529# else
5530# error "Port me"
5531# endif
5532}
5533#endif
5534
5535
5536/** @def RT_ASM_PAGE_SIZE
5537 * We try avoid dragging in iprt/param.h here.
5538 * @internal
5539 */
5540#if defined(RT_ARCH_SPARC64)
5541# define RT_ASM_PAGE_SIZE 0x2000
5542# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5543# if PAGE_SIZE != 0x2000
5544# error "PAGE_SIZE is not 0x2000!"
5545# endif
5546# endif
5547#elif defined(RT_ARCH_ARM64)
5548# define RT_ASM_PAGE_SIZE 0x4000
5549# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
5550# if PAGE_SIZE != 0x4000
5551# error "PAGE_SIZE is not 0x4000!"
5552# endif
5553# endif
5554#else
5555# define RT_ASM_PAGE_SIZE 0x1000
5556# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5557# if PAGE_SIZE != 0x1000
5558# error "PAGE_SIZE is not 0x1000!"
5559# endif
5560# endif
5561#endif
5562
5563/**
5564 * Zeros a 4K memory page.
5565 *
5566 * @param pv Pointer to the memory block. This must be page aligned.
5567 */
5568#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5569RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
5570# else
5571DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
5572{
5573# if RT_INLINE_ASM_USES_INTRIN
5574# ifdef RT_ARCH_AMD64
5575 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
5576# else
5577 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
5578# endif
5579
5580# elif RT_INLINE_ASM_GNU_STYLE
5581 RTCCUINTREG uDummy;
5582# ifdef RT_ARCH_AMD64
5583 __asm__ __volatile__("rep stosq"
5584 : "=D" (pv),
5585 "=c" (uDummy)
5586 : "0" (pv),
5587 "c" (RT_ASM_PAGE_SIZE >> 3),
5588 "a" (0)
5589 : "memory");
5590# else
5591 __asm__ __volatile__("rep stosl"
5592 : "=D" (pv),
5593 "=c" (uDummy)
5594 : "0" (pv),
5595 "c" (RT_ASM_PAGE_SIZE >> 2),
5596 "a" (0)
5597 : "memory");
5598# endif
5599# else
5600 __asm
5601 {
5602# ifdef RT_ARCH_AMD64
5603 xor rax, rax
5604 mov ecx, 0200h
5605 mov rdi, [pv]
5606 rep stosq
5607# else
5608 xor eax, eax
5609 mov ecx, 0400h
5610 mov edi, [pv]
5611 rep stosd
5612# endif
5613 }
5614# endif
5615}
5616# endif
5617
5618
5619/**
5620 * Zeros a memory block with a 32-bit aligned size.
5621 *
5622 * @param pv Pointer to the memory block.
5623 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5624 */
5625#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5626RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5627#else
5628DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5629{
5630# if RT_INLINE_ASM_USES_INTRIN
5631# ifdef RT_ARCH_AMD64
5632 if (!(cb & 7))
5633 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
5634 else
5635# endif
5636 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
5637
5638# elif RT_INLINE_ASM_GNU_STYLE
5639 __asm__ __volatile__("rep stosl"
5640 : "=D" (pv),
5641 "=c" (cb)
5642 : "0" (pv),
5643 "1" (cb >> 2),
5644 "a" (0)
5645 : "memory");
5646# else
5647 __asm
5648 {
5649 xor eax, eax
5650# ifdef RT_ARCH_AMD64
5651 mov rcx, [cb]
5652 shr rcx, 2
5653 mov rdi, [pv]
5654# else
5655 mov ecx, [cb]
5656 shr ecx, 2
5657 mov edi, [pv]
5658# endif
5659 rep stosd
5660 }
5661# endif
5662}
5663#endif
5664
5665
5666/**
5667 * Fills a memory block with a 32-bit aligned size.
5668 *
5669 * @param pv Pointer to the memory block.
5670 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5671 * @param u32 The value to fill with.
5672 */
5673#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5674RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
5675#else
5676DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5677{
5678# if RT_INLINE_ASM_USES_INTRIN
5679# ifdef RT_ARCH_AMD64
5680 if (!(cb & 7))
5681 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5682 else
5683# endif
5684 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
5685
5686# elif RT_INLINE_ASM_GNU_STYLE
5687 __asm__ __volatile__("rep stosl"
5688 : "=D" (pv),
5689 "=c" (cb)
5690 : "0" (pv),
5691 "1" (cb >> 2),
5692 "a" (u32)
5693 : "memory");
5694# else
5695 __asm
5696 {
5697# ifdef RT_ARCH_AMD64
5698 mov rcx, [cb]
5699 shr rcx, 2
5700 mov rdi, [pv]
5701# else
5702 mov ecx, [cb]
5703 shr ecx, 2
5704 mov edi, [pv]
5705# endif
5706 mov eax, [u32]
5707 rep stosd
5708 }
5709# endif
5710}
5711#endif
5712
5713
5714/**
5715 * Checks if a memory block is all zeros.
5716 *
5717 * @returns Pointer to the first non-zero byte.
5718 * @returns NULL if all zero.
5719 *
5720 * @param pv Pointer to the memory block.
5721 * @param cb Number of bytes in the block.
5722 */
5723#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
5724DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5725#else
5726DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5727{
5728/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
5729 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5730 for (; cb; cb--, pb++)
5731 if (RT_LIKELY(*pb == 0))
5732 { /* likely */ }
5733 else
5734 return (void RT_FAR *)pb;
5735 return NULL;
5736}
5737#endif
5738
5739
5740/**
5741 * Checks if a memory block is all zeros.
5742 *
5743 * @returns true if zero, false if not.
5744 *
5745 * @param pv Pointer to the memory block.
5746 * @param cb Number of bytes in the block.
5747 *
5748 * @sa ASMMemFirstNonZero
5749 */
5750DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5751{
5752 return ASMMemFirstNonZero(pv, cb) == NULL;
5753}
5754
5755
5756/**
5757 * Checks if a memory page is all zeros.
5758 *
5759 * @returns true / false.
5760 *
5761 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5762 * boundary
5763 */
5764DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
5765{
5766# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5767 union { RTCCUINTREG r; bool f; } uAX;
5768 RTCCUINTREG xCX, xDI;
5769 Assert(!((uintptr_t)pvPage & 15));
5770 __asm__ __volatile__("repe; "
5771# ifdef RT_ARCH_AMD64
5772 "scasq\n\t"
5773# else
5774 "scasl\n\t"
5775# endif
5776 "setnc %%al\n\t"
5777 : "=&c" (xCX)
5778 , "=&D" (xDI)
5779 , "=&a" (uAX.r)
5780 : "mr" (pvPage)
5781# ifdef RT_ARCH_AMD64
5782 , "0" (RT_ASM_PAGE_SIZE/8)
5783# else
5784 , "0" (RT_ASM_PAGE_SIZE/4)
5785# endif
5786 , "1" (pvPage)
5787 , "2" (0)
5788 : "cc");
5789 return uAX.f;
5790# else
5791 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
5792 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
5793 Assert(!((uintptr_t)pvPage & 15));
5794 for (;;)
5795 {
5796 if (puPtr[0]) return false;
5797 if (puPtr[4]) return false;
5798
5799 if (puPtr[2]) return false;
5800 if (puPtr[6]) return false;
5801
5802 if (puPtr[1]) return false;
5803 if (puPtr[5]) return false;
5804
5805 if (puPtr[3]) return false;
5806 if (puPtr[7]) return false;
5807
5808 if (!--cLeft)
5809 return true;
5810 puPtr += 8;
5811 }
5812# endif
5813}
5814
5815
5816/**
5817 * Checks if a memory block is filled with the specified byte, returning the
5818 * first mismatch.
5819 *
5820 * This is sort of an inverted memchr.
5821 *
5822 * @returns Pointer to the byte which doesn't equal u8.
5823 * @returns NULL if all equal to u8.
5824 *
5825 * @param pv Pointer to the memory block.
5826 * @param cb Number of bytes in the block.
5827 * @param u8 The value it's supposed to be filled with.
5828 *
5829 * @remarks No alignment requirements.
5830 */
5831#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5832 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5833DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5834#else
5835DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5836{
5837/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5838 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5839 for (; cb; cb--, pb++)
5840 if (RT_LIKELY(*pb == u8))
5841 { /* likely */ }
5842 else
5843 return (void *)pb;
5844 return NULL;
5845}
5846#endif
5847
5848
5849/**
5850 * Checks if a memory block is filled with the specified byte.
5851 *
5852 * @returns true if all matching, false if not.
5853 *
5854 * @param pv Pointer to the memory block.
5855 * @param cb Number of bytes in the block.
5856 * @param u8 The value it's supposed to be filled with.
5857 *
5858 * @remarks No alignment requirements.
5859 */
5860DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5861{
5862 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5863}
5864
5865
5866/**
5867 * Checks if a memory block is filled with the specified 32-bit value.
5868 *
5869 * This is a sort of inverted memchr.
5870 *
5871 * @returns Pointer to the first value which doesn't equal u32.
5872 * @returns NULL if all equal to u32.
5873 *
5874 * @param pv Pointer to the memory block.
5875 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5876 * @param u32 The value it's supposed to be filled with.
5877 */
5878DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5879{
5880/** @todo rewrite this in inline assembly? */
5881 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5882 for (; cb; cb -= 4, pu32++)
5883 if (RT_LIKELY(*pu32 == u32))
5884 { /* likely */ }
5885 else
5886 return (uint32_t RT_FAR *)pu32;
5887 return NULL;
5888}
5889
5890
5891/**
5892 * Probes a byte pointer for read access.
5893 *
5894 * While the function will not fault if the byte is not read accessible,
5895 * the idea is to do this in a safe place like before acquiring locks
5896 * and such like.
5897 *
5898 * Also, this functions guarantees that an eager compiler is not going
5899 * to optimize the probing away.
5900 *
5901 * @param pvByte Pointer to the byte.
5902 */
5903#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5904RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
5905#else
5906DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
5907{
5908# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5909 uint8_t u8;
5910# if RT_INLINE_ASM_GNU_STYLE
5911 __asm__ __volatile__("movb %1, %0\n\t"
5912 : "=q" (u8)
5913 : "m" (*(const uint8_t *)pvByte));
5914# else
5915 __asm
5916 {
5917# ifdef RT_ARCH_AMD64
5918 mov rax, [pvByte]
5919 mov al, [rax]
5920# else
5921 mov eax, [pvByte]
5922 mov al, [eax]
5923# endif
5924 mov [u8], al
5925 }
5926# endif
5927 return u8;
5928
5929# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5930 uint32_t u32;
5931 __asm__ __volatile__(".Lstart_ASMProbeReadByte_%=:\n\t"
5932# if defined(RT_ARCH_ARM64)
5933 "ldxrb %w[uDst], %[pMem]\n\t"
5934# else
5935 "ldrexb %[uDst], %[pMem]\n\t"
5936# endif
5937 : [uDst] "=&r" (u32)
5938 : [pMem] "m" (*(uint8_t const *)pvByte));
5939 return (uint8_t)u32;
5940
5941# else
5942# error "Port me"
5943# endif
5944}
5945#endif
5946
5947/**
5948 * Probes a buffer for read access page by page.
5949 *
5950 * While the function will fault if the buffer is not fully read
5951 * accessible, the idea is to do this in a safe place like before
5952 * acquiring locks and such like.
5953 *
5954 * Also, this functions guarantees that an eager compiler is not going
5955 * to optimize the probing away.
5956 *
5957 * @param pvBuf Pointer to the buffer.
5958 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5959 */
5960DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
5961{
5962 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5963 /* the first byte */
5964 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
5965 ASMProbeReadByte(pu8);
5966
5967 /* the pages in between pages. */
5968 while (cbBuf > RT_ASM_PAGE_SIZE)
5969 {
5970 ASMProbeReadByte(pu8);
5971 cbBuf -= RT_ASM_PAGE_SIZE;
5972 pu8 += RT_ASM_PAGE_SIZE;
5973 }
5974
5975 /* the last byte */
5976 ASMProbeReadByte(pu8 + cbBuf - 1);
5977}
5978
5979
5980/**
5981 * Reverse the byte order of the given 16-bit integer.
5982 *
5983 * @returns Revert
5984 * @param u16 16-bit integer value.
5985 */
5986#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5987RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
5988#else
5989DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
5990{
5991# if RT_INLINE_ASM_USES_INTRIN
5992 return _byteswap_ushort(u16);
5993
5994# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5995# if RT_INLINE_ASM_GNU_STYLE
5996 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
5997# else
5998 _asm
5999 {
6000 mov ax, [u16]
6001 ror ax, 8
6002 mov [u16], ax
6003 }
6004# endif
6005 return u16;
6006
6007# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6008 uint32_t u32Ret;
6009 __asm__ __volatile__(
6010# if defined(RT_ARCH_ARM64)
6011 "rev16 %w[uRet], %w[uVal]\n\t"
6012# else
6013 "rev16 %[uRet], %[uVal]\n\t"
6014# endif
6015 : [uRet] "=r" (u32Ret)
6016 : [uVal] "r" (u16));
6017 return (uint16_t)u32Ret;
6018
6019# else
6020# error "Port me"
6021# endif
6022}
6023#endif
6024
6025
6026/**
6027 * Reverse the byte order of the given 32-bit integer.
6028 *
6029 * @returns Revert
6030 * @param u32 32-bit integer value.
6031 */
6032#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6033RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
6034#else
6035DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
6036{
6037# if RT_INLINE_ASM_USES_INTRIN
6038 return _byteswap_ulong(u32);
6039
6040# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6041# if RT_INLINE_ASM_GNU_STYLE
6042 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6043# else
6044 _asm
6045 {
6046 mov eax, [u32]
6047 bswap eax
6048 mov [u32], eax
6049 }
6050# endif
6051 return u32;
6052
6053# elif defined(RT_ARCH_ARM64)
6054 uint64_t u64Ret;
6055 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t"
6056 : [uRet] "=r" (u64Ret)
6057 : [uVal] "r" ((uint64_t)u32));
6058 return (uint32_t)u64Ret;
6059
6060# elif defined(RT_ARCH_ARM32)
6061 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
6062 : [uRet] "=r" (u32)
6063 : [uVal] "[uRet]" (u32));
6064 return u32;
6065
6066# else
6067# error "Port me"
6068# endif
6069}
6070#endif
6071
6072
6073/**
6074 * Reverse the byte order of the given 64-bit integer.
6075 *
6076 * @returns Revert
6077 * @param u64 64-bit integer value.
6078 */
6079DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
6080{
6081#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6082 return _byteswap_uint64(u64);
6083
6084# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6085 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64));
6086 return u64;
6087
6088# elif defined(RT_ARCH_ARM64)
6089 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
6090 : [uRet] "=r" (u64)
6091 : [uVal] "[uRet]" (u64));
6092 return u64;
6093
6094#else
6095 return (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6096 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6097#endif
6098}
6099
6100
6101
6102/** @defgroup grp_inline_bits Bit Operations
6103 * @{
6104 */
6105
6106
6107/**
6108 * Sets a bit in a bitmap.
6109 *
6110 * @param pvBitmap Pointer to the bitmap (little endian). This should be
6111 * 32-bit aligned.
6112 * @param iBit The bit to set.
6113 *
6114 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6115 * However, doing so will yield better performance as well as avoiding
6116 * traps accessing the last bits in the bitmap.
6117 */
6118#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6119RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6120#else
6121DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6122{
6123# if RT_INLINE_ASM_USES_INTRIN
6124 _bittestandset((long RT_FAR *)pvBitmap, iBit);
6125
6126# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6127# if RT_INLINE_ASM_GNU_STYLE
6128 __asm__ __volatile__("btsl %1, %0"
6129 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6130 : "Ir" (iBit)
6131 , "m" (*(volatile long RT_FAR *)pvBitmap)
6132 : "memory"
6133 , "cc");
6134# else
6135 __asm
6136 {
6137# ifdef RT_ARCH_AMD64
6138 mov rax, [pvBitmap]
6139 mov edx, [iBit]
6140 bts [rax], edx
6141# else
6142 mov eax, [pvBitmap]
6143 mov edx, [iBit]
6144 bts [eax], edx
6145# endif
6146 }
6147# endif
6148
6149# else
6150 int32_t offBitmap = iBit / 32;
6151 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6152 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6153# endif
6154}
6155#endif
6156
6157
6158/**
6159 * Atomically sets a bit in a bitmap, ordered.
6160 *
6161 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6162 * aligned, otherwise the memory access isn't atomic!
6163 * @param iBit The bit to set.
6164 *
6165 * @remarks x86: Requires a 386 or later.
6166 */
6167#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6168RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6169#else
6170DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6171{
6172 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6173# if RT_INLINE_ASM_USES_INTRIN
6174 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6175# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6176# if RT_INLINE_ASM_GNU_STYLE
6177 __asm__ __volatile__("lock; btsl %1, %0"
6178 : "=m" (*(volatile long *)pvBitmap)
6179 : "Ir" (iBit)
6180 , "m" (*(volatile long *)pvBitmap)
6181 : "memory"
6182 , "cc");
6183# else
6184 __asm
6185 {
6186# ifdef RT_ARCH_AMD64
6187 mov rax, [pvBitmap]
6188 mov edx, [iBit]
6189 lock bts [rax], edx
6190# else
6191 mov eax, [pvBitmap]
6192 mov edx, [iBit]
6193 lock bts [eax], edx
6194# endif
6195 }
6196# endif
6197
6198# else
6199 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6200# endif
6201}
6202#endif
6203
6204
6205/**
6206 * Clears a bit in a bitmap.
6207 *
6208 * @param pvBitmap Pointer to the bitmap (little endian).
6209 * @param iBit The bit to clear.
6210 *
6211 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6212 * However, doing so will yield better performance as well as avoiding
6213 * traps accessing the last bits in the bitmap.
6214 */
6215#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6216RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6217#else
6218DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6219{
6220# if RT_INLINE_ASM_USES_INTRIN
6221 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6222
6223# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6224# if RT_INLINE_ASM_GNU_STYLE
6225 __asm__ __volatile__("btrl %1, %0"
6226 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6227 : "Ir" (iBit)
6228 , "m" (*(volatile long RT_FAR *)pvBitmap)
6229 : "memory"
6230 , "cc");
6231# else
6232 __asm
6233 {
6234# ifdef RT_ARCH_AMD64
6235 mov rax, [pvBitmap]
6236 mov edx, [iBit]
6237 btr [rax], edx
6238# else
6239 mov eax, [pvBitmap]
6240 mov edx, [iBit]
6241 btr [eax], edx
6242# endif
6243 }
6244# endif
6245
6246# else
6247 int32_t offBitmap = iBit / 32;
6248 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6249 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6250# endif
6251}
6252#endif
6253
6254
6255/**
6256 * Atomically clears a bit in a bitmap, ordered.
6257 *
6258 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6259 * aligned, otherwise the memory access isn't atomic!
6260 * @param iBit The bit to toggle set.
6261 *
6262 * @remarks No memory barrier, take care on smp.
6263 * @remarks x86: Requires a 386 or later.
6264 */
6265#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6266RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6267#else
6268DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6269{
6270 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6271# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6272# if RT_INLINE_ASM_GNU_STYLE
6273 __asm__ __volatile__("lock; btrl %1, %0"
6274 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6275 : "Ir" (iBit)
6276 , "m" (*(volatile long RT_FAR *)pvBitmap)
6277 : "memory"
6278 , "cc");
6279# else
6280 __asm
6281 {
6282# ifdef RT_ARCH_AMD64
6283 mov rax, [pvBitmap]
6284 mov edx, [iBit]
6285 lock btr [rax], edx
6286# else
6287 mov eax, [pvBitmap]
6288 mov edx, [iBit]
6289 lock btr [eax], edx
6290# endif
6291 }
6292# endif
6293# else
6294 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6295# endif
6296}
6297#endif
6298
6299
6300/**
6301 * Toggles a bit in a bitmap.
6302 *
6303 * @param pvBitmap Pointer to the bitmap (little endian).
6304 * @param iBit The bit to toggle.
6305 *
6306 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6307 * However, doing so will yield better performance as well as avoiding
6308 * traps accessing the last bits in the bitmap.
6309 */
6310#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6311RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6312#else
6313DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6314{
6315# if RT_INLINE_ASM_USES_INTRIN
6316 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6317# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6318# if RT_INLINE_ASM_GNU_STYLE
6319 __asm__ __volatile__("btcl %1, %0"
6320 : "=m" (*(volatile long *)pvBitmap)
6321 : "Ir" (iBit)
6322 , "m" (*(volatile long *)pvBitmap)
6323 : "memory"
6324 , "cc");
6325# else
6326 __asm
6327 {
6328# ifdef RT_ARCH_AMD64
6329 mov rax, [pvBitmap]
6330 mov edx, [iBit]
6331 btc [rax], edx
6332# else
6333 mov eax, [pvBitmap]
6334 mov edx, [iBit]
6335 btc [eax], edx
6336# endif
6337 }
6338# endif
6339# else
6340 int32_t offBitmap = iBit / 32;
6341 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6342 ASMAtomicUoXorU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6343# endif
6344}
6345#endif
6346
6347
6348/**
6349 * Atomically toggles a bit in a bitmap, ordered.
6350 *
6351 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6352 * aligned, otherwise the memory access isn't atomic!
6353 * @param iBit The bit to test and set.
6354 *
6355 * @remarks x86: Requires a 386 or later.
6356 */
6357#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6358RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6359#else
6360DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6361{
6362 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6363# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6364# if RT_INLINE_ASM_GNU_STYLE
6365 __asm__ __volatile__("lock; btcl %1, %0"
6366 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6367 : "Ir" (iBit)
6368 , "m" (*(volatile long RT_FAR *)pvBitmap)
6369 : "memory"
6370 , "cc");
6371# else
6372 __asm
6373 {
6374# ifdef RT_ARCH_AMD64
6375 mov rax, [pvBitmap]
6376 mov edx, [iBit]
6377 lock btc [rax], edx
6378# else
6379 mov eax, [pvBitmap]
6380 mov edx, [iBit]
6381 lock btc [eax], edx
6382# endif
6383 }
6384# endif
6385# else
6386 ASMAtomicXorU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6387# endif
6388}
6389#endif
6390
6391
6392/**
6393 * Tests and sets a bit in a bitmap.
6394 *
6395 * @returns true if the bit was set.
6396 * @returns false if the bit was clear.
6397 *
6398 * @param pvBitmap Pointer to the bitmap (little endian).
6399 * @param iBit The bit to test and set.
6400 *
6401 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6402 * However, doing so will yield better performance as well as avoiding
6403 * traps accessing the last bits in the bitmap.
6404 */
6405#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6406RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6407#else
6408DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6409{
6410 union { bool f; uint32_t u32; uint8_t u8; } rc;
6411# if RT_INLINE_ASM_USES_INTRIN
6412 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
6413
6414# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6415# if RT_INLINE_ASM_GNU_STYLE
6416 __asm__ __volatile__("btsl %2, %1\n\t"
6417 "setc %b0\n\t"
6418 "andl $1, %0\n\t"
6419 : "=q" (rc.u32)
6420 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6421 : "Ir" (iBit)
6422 , "m" (*(volatile long RT_FAR *)pvBitmap)
6423 : "memory"
6424 , "cc");
6425# else
6426 __asm
6427 {
6428 mov edx, [iBit]
6429# ifdef RT_ARCH_AMD64
6430 mov rax, [pvBitmap]
6431 bts [rax], edx
6432# else
6433 mov eax, [pvBitmap]
6434 bts [eax], edx
6435# endif
6436 setc al
6437 and eax, 1
6438 mov [rc.u32], eax
6439 }
6440# endif
6441
6442# else
6443 int32_t offBitmap = iBit / 32;
6444 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6445 rc.u32 = RT_LE2H_U32(ASMAtomicUoOrExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6446 >> (iBit & 31);
6447 rc.u32 &= 1;
6448# endif
6449 return rc.f;
6450}
6451#endif
6452
6453
6454/**
6455 * Atomically tests and sets a bit in a bitmap, ordered.
6456 *
6457 * @returns true if the bit was set.
6458 * @returns false if the bit was clear.
6459 *
6460 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6461 * aligned, otherwise the memory access isn't atomic!
6462 * @param iBit The bit to set.
6463 *
6464 * @remarks x86: Requires a 386 or later.
6465 */
6466#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6467RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6468#else
6469DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6470{
6471 union { bool f; uint32_t u32; uint8_t u8; } rc;
6472 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6473# if RT_INLINE_ASM_USES_INTRIN
6474 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6475# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6476# if RT_INLINE_ASM_GNU_STYLE
6477 __asm__ __volatile__("lock; btsl %2, %1\n\t"
6478 "setc %b0\n\t"
6479 "andl $1, %0\n\t"
6480 : "=q" (rc.u32)
6481 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6482 : "Ir" (iBit)
6483 , "m" (*(volatile long RT_FAR *)pvBitmap)
6484 : "memory"
6485 , "cc");
6486# else
6487 __asm
6488 {
6489 mov edx, [iBit]
6490# ifdef RT_ARCH_AMD64
6491 mov rax, [pvBitmap]
6492 lock bts [rax], edx
6493# else
6494 mov eax, [pvBitmap]
6495 lock bts [eax], edx
6496# endif
6497 setc al
6498 and eax, 1
6499 mov [rc.u32], eax
6500 }
6501# endif
6502
6503# else
6504 rc.u32 = RT_LE2H_U32(ASMAtomicOrExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6505 >> (iBit & 31);
6506 rc.u32 &= 1;
6507# endif
6508 return rc.f;
6509}
6510#endif
6511
6512
6513/**
6514 * Tests and clears a bit in a bitmap.
6515 *
6516 * @returns true if the bit was set.
6517 * @returns false if the bit was clear.
6518 *
6519 * @param pvBitmap Pointer to the bitmap (little endian).
6520 * @param iBit The bit to test and clear.
6521 *
6522 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6523 * However, doing so will yield better performance as well as avoiding
6524 * traps accessing the last bits in the bitmap.
6525 */
6526#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6527RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6528#else
6529DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6530{
6531 union { bool f; uint32_t u32; uint8_t u8; } rc;
6532# if RT_INLINE_ASM_USES_INTRIN
6533 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6534
6535# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6536# if RT_INLINE_ASM_GNU_STYLE
6537 __asm__ __volatile__("btrl %2, %1\n\t"
6538 "setc %b0\n\t"
6539 "andl $1, %0\n\t"
6540 : "=q" (rc.u32)
6541 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6542 : "Ir" (iBit)
6543 , "m" (*(volatile long RT_FAR *)pvBitmap)
6544 : "memory"
6545 , "cc");
6546# else
6547 __asm
6548 {
6549 mov edx, [iBit]
6550# ifdef RT_ARCH_AMD64
6551 mov rax, [pvBitmap]
6552 btr [rax], edx
6553# else
6554 mov eax, [pvBitmap]
6555 btr [eax], edx
6556# endif
6557 setc al
6558 and eax, 1
6559 mov [rc.u32], eax
6560 }
6561# endif
6562
6563# else
6564 int32_t offBitmap = iBit / 32;
6565 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6566 rc.u32 = RT_LE2H_U32(ASMAtomicUoAndExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6567 >> (iBit & 31);
6568 rc.u32 &= 1;
6569# endif
6570 return rc.f;
6571}
6572#endif
6573
6574
6575/**
6576 * Atomically tests and clears a bit in a bitmap, ordered.
6577 *
6578 * @returns true if the bit was set.
6579 * @returns false if the bit was clear.
6580 *
6581 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6582 * aligned, otherwise the memory access isn't atomic!
6583 * @param iBit The bit to test and clear.
6584 *
6585 * @remarks No memory barrier, take care on smp.
6586 * @remarks x86: Requires a 386 or later.
6587 */
6588#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6589RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6590#else
6591DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6592{
6593 union { bool f; uint32_t u32; uint8_t u8; } rc;
6594 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6595# if RT_INLINE_ASM_USES_INTRIN
6596 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
6597
6598# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6599# if RT_INLINE_ASM_GNU_STYLE
6600 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6601 "setc %b0\n\t"
6602 "andl $1, %0\n\t"
6603 : "=q" (rc.u32)
6604 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6605 : "Ir" (iBit)
6606 , "m" (*(volatile long RT_FAR *)pvBitmap)
6607 : "memory"
6608 , "cc");
6609# else
6610 __asm
6611 {
6612 mov edx, [iBit]
6613# ifdef RT_ARCH_AMD64
6614 mov rax, [pvBitmap]
6615 lock btr [rax], edx
6616# else
6617 mov eax, [pvBitmap]
6618 lock btr [eax], edx
6619# endif
6620 setc al
6621 and eax, 1
6622 mov [rc.u32], eax
6623 }
6624# endif
6625
6626# else
6627 rc.u32 = RT_LE2H_U32(ASMAtomicAndExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6628 >> (iBit & 31);
6629 rc.u32 &= 1;
6630# endif
6631 return rc.f;
6632}
6633#endif
6634
6635
6636/**
6637 * Tests and toggles a bit in a bitmap.
6638 *
6639 * @returns true if the bit was set.
6640 * @returns false if the bit was clear.
6641 *
6642 * @param pvBitmap Pointer to the bitmap (little endian).
6643 * @param iBit The bit to test and toggle.
6644 *
6645 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6646 * However, doing so will yield better performance as well as avoiding
6647 * traps accessing the last bits in the bitmap.
6648 */
6649#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6650RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6651#else
6652DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6653{
6654 union { bool f; uint32_t u32; uint8_t u8; } rc;
6655# if RT_INLINE_ASM_USES_INTRIN
6656 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6657
6658# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6659# if RT_INLINE_ASM_GNU_STYLE
6660 __asm__ __volatile__("btcl %2, %1\n\t"
6661 "setc %b0\n\t"
6662 "andl $1, %0\n\t"
6663 : "=q" (rc.u32)
6664 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6665 : "Ir" (iBit)
6666 , "m" (*(volatile long RT_FAR *)pvBitmap)
6667 : "memory"
6668 , "cc");
6669# else
6670 __asm
6671 {
6672 mov edx, [iBit]
6673# ifdef RT_ARCH_AMD64
6674 mov rax, [pvBitmap]
6675 btc [rax], edx
6676# else
6677 mov eax, [pvBitmap]
6678 btc [eax], edx
6679# endif
6680 setc al
6681 and eax, 1
6682 mov [rc.u32], eax
6683 }
6684# endif
6685
6686# else
6687 int32_t offBitmap = iBit / 32;
6688 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6689 rc.u32 = RT_LE2H_U32(ASMAtomicUoXorExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6690 >> (iBit & 31);
6691 rc.u32 &= 1;
6692# endif
6693 return rc.f;
6694}
6695#endif
6696
6697
6698/**
6699 * Atomically tests and toggles a bit in a bitmap, ordered.
6700 *
6701 * @returns true if the bit was set.
6702 * @returns false if the bit was clear.
6703 *
6704 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6705 * aligned, otherwise the memory access isn't atomic!
6706 * @param iBit The bit to test and toggle.
6707 *
6708 * @remarks x86: Requires a 386 or later.
6709 */
6710#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6711RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6712#else
6713DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6714{
6715 union { bool f; uint32_t u32; uint8_t u8; } rc;
6716 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6717# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6718# if RT_INLINE_ASM_GNU_STYLE
6719 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6720 "setc %b0\n\t"
6721 "andl $1, %0\n\t"
6722 : "=q" (rc.u32)
6723 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6724 : "Ir" (iBit)
6725 , "m" (*(volatile long RT_FAR *)pvBitmap)
6726 : "memory"
6727 , "cc");
6728# else
6729 __asm
6730 {
6731 mov edx, [iBit]
6732# ifdef RT_ARCH_AMD64
6733 mov rax, [pvBitmap]
6734 lock btc [rax], edx
6735# else
6736 mov eax, [pvBitmap]
6737 lock btc [eax], edx
6738# endif
6739 setc al
6740 and eax, 1
6741 mov [rc.u32], eax
6742 }
6743# endif
6744
6745# else
6746 rc.u32 = RT_H2LE_U32(ASMAtomicXorExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_LE2H_U32(RT_BIT_32(iBit & 31))))
6747 >> (iBit & 31);
6748 rc.u32 &= 1;
6749# endif
6750 return rc.f;
6751}
6752#endif
6753
6754
6755/**
6756 * Tests if a bit in a bitmap is set.
6757 *
6758 * @returns true if the bit is set.
6759 * @returns false if the bit is clear.
6760 *
6761 * @param pvBitmap Pointer to the bitmap (little endian).
6762 * @param iBit The bit to test.
6763 *
6764 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6765 * However, doing so will yield better performance as well as avoiding
6766 * traps accessing the last bits in the bitmap.
6767 */
6768#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6769RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6770#else
6771DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6772{
6773 union { bool f; uint32_t u32; uint8_t u8; } rc;
6774# if RT_INLINE_ASM_USES_INTRIN
6775 rc.u32 = _bittest((long *)pvBitmap, iBit);
6776
6777# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6778# if RT_INLINE_ASM_GNU_STYLE
6779
6780 __asm__ __volatile__("btl %2, %1\n\t"
6781 "setc %b0\n\t"
6782 "andl $1, %0\n\t"
6783 : "=q" (rc.u32)
6784 : "m" (*(const volatile long RT_FAR *)pvBitmap)
6785 , "Ir" (iBit)
6786 : "memory"
6787 , "cc");
6788# else
6789 __asm
6790 {
6791 mov edx, [iBit]
6792# ifdef RT_ARCH_AMD64
6793 mov rax, [pvBitmap]
6794 bt [rax], edx
6795# else
6796 mov eax, [pvBitmap]
6797 bt [eax], edx
6798# endif
6799 setc al
6800 and eax, 1
6801 mov [rc.u32], eax
6802 }
6803# endif
6804
6805# else
6806 int32_t offBitmap = iBit / 32;
6807 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6808 rc.u32 = RT_LE2H_U32(ASMAtomicUoReadU32(&((uint32_t volatile *)pvBitmap)[offBitmap])) >> (iBit & 31);
6809 rc.u32 &= 1;
6810# endif
6811 return rc.f;
6812}
6813#endif
6814
6815
6816/**
6817 * Clears a bit range within a bitmap.
6818 *
6819 * @param pvBitmap Pointer to the bitmap (little endian).
6820 * @param iBitStart The First bit to clear.
6821 * @param iBitEnd The first bit not to clear.
6822 */
6823DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6824{
6825 if (iBitStart < iBitEnd)
6826 {
6827 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6828 int32_t iStart = iBitStart & ~31;
6829 int32_t iEnd = iBitEnd & ~31;
6830 if (iStart == iEnd)
6831 *pu32 &= RT_H2LE_U32(((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6832 else
6833 {
6834 /* bits in first dword. */
6835 if (iBitStart & 31)
6836 {
6837 *pu32 &= RT_H2LE_U32((UINT32_C(1) << (iBitStart & 31)) - 1);
6838 pu32++;
6839 iBitStart = iStart + 32;
6840 }
6841
6842 /* whole dwords. */
6843 if (iBitStart != iEnd)
6844 ASMMemZero32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3);
6845
6846 /* bits in last dword. */
6847 if (iBitEnd & 31)
6848 {
6849 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6850 *pu32 &= RT_H2LE_U32(~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6851 }
6852 }
6853 }
6854}
6855
6856
6857/**
6858 * Sets a bit range within a bitmap.
6859 *
6860 * @param pvBitmap Pointer to the bitmap (little endian).
6861 * @param iBitStart The First bit to set.
6862 * @param iBitEnd The first bit not to set.
6863 */
6864DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6865{
6866 if (iBitStart < iBitEnd)
6867 {
6868 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6869 int32_t iStart = iBitStart & ~31;
6870 int32_t iEnd = iBitEnd & ~31;
6871 if (iStart == iEnd)
6872 *pu32 |= RT_H2LE_U32(((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31));
6873 else
6874 {
6875 /* bits in first dword. */
6876 if (iBitStart & 31)
6877 {
6878 *pu32 |= RT_H2LE_U32(~((UINT32_C(1) << (iBitStart & 31)) - 1));
6879 pu32++;
6880 iBitStart = iStart + 32;
6881 }
6882
6883 /* whole dword. */
6884 if (iBitStart != iEnd)
6885 ASMMemFill32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3, ~UINT32_C(0));
6886
6887 /* bits in last dword. */
6888 if (iBitEnd & 31)
6889 {
6890 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6891 *pu32 |= RT_H2LE_U32((UINT32_C(1) << (iBitEnd & 31)) - 1);
6892 }
6893 }
6894 }
6895}
6896
6897
6898/**
6899 * Finds the first clear bit in a bitmap.
6900 *
6901 * @returns Index of the first zero bit.
6902 * @returns -1 if no clear bit was found.
6903 * @param pvBitmap Pointer to the bitmap (little endian).
6904 * @param cBits The number of bits in the bitmap. Multiple of 32.
6905 */
6906#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6907DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6908#else
6909DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6910{
6911 if (cBits)
6912 {
6913 int32_t iBit;
6914# if RT_INLINE_ASM_GNU_STYLE
6915 RTCCUINTREG uEAX, uECX, uEDI;
6916 cBits = RT_ALIGN_32(cBits, 32);
6917 __asm__ __volatile__("repe; scasl\n\t"
6918 "je 1f\n\t"
6919# ifdef RT_ARCH_AMD64
6920 "lea -4(%%rdi), %%rdi\n\t"
6921 "xorl (%%rdi), %%eax\n\t"
6922 "subq %5, %%rdi\n\t"
6923# else
6924 "lea -4(%%edi), %%edi\n\t"
6925 "xorl (%%edi), %%eax\n\t"
6926 "subl %5, %%edi\n\t"
6927# endif
6928 "shll $3, %%edi\n\t"
6929 "bsfl %%eax, %%edx\n\t"
6930 "addl %%edi, %%edx\n\t"
6931 "1:\t\n"
6932 : "=d" (iBit)
6933 , "=&c" (uECX)
6934 , "=&D" (uEDI)
6935 , "=&a" (uEAX)
6936 : "0" (0xffffffff)
6937 , "mr" (pvBitmap)
6938 , "1" (cBits >> 5)
6939 , "2" (pvBitmap)
6940 , "3" (0xffffffff)
6941 : "cc");
6942# else
6943 cBits = RT_ALIGN_32(cBits, 32);
6944 __asm
6945 {
6946# ifdef RT_ARCH_AMD64
6947 mov rdi, [pvBitmap]
6948 mov rbx, rdi
6949# else
6950 mov edi, [pvBitmap]
6951 mov ebx, edi
6952# endif
6953 mov edx, 0ffffffffh
6954 mov eax, edx
6955 mov ecx, [cBits]
6956 shr ecx, 5
6957 repe scasd
6958 je done
6959
6960# ifdef RT_ARCH_AMD64
6961 lea rdi, [rdi - 4]
6962 xor eax, [rdi]
6963 sub rdi, rbx
6964# else
6965 lea edi, [edi - 4]
6966 xor eax, [edi]
6967 sub edi, ebx
6968# endif
6969 shl edi, 3
6970 bsf edx, eax
6971 add edx, edi
6972 done:
6973 mov [iBit], edx
6974 }
6975# endif
6976 return iBit;
6977 }
6978 return -1;
6979}
6980#endif
6981
6982
6983/**
6984 * Finds the next clear bit in a bitmap.
6985 *
6986 * @returns Index of the first zero bit.
6987 * @returns -1 if no clear bit was found.
6988 * @param pvBitmap Pointer to the bitmap (little endian).
6989 * @param cBits The number of bits in the bitmap. Multiple of 32.
6990 * @param iBitPrev The bit returned from the last search.
6991 * The search will start at iBitPrev + 1.
6992 */
6993#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6994DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6995#else
6996DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6997{
6998 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6999 int iBit = ++iBitPrev & 31;
7000 if (iBit)
7001 {
7002 /*
7003 * Inspect the 32-bit word containing the unaligned bit.
7004 */
7005 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
7006
7007# if RT_INLINE_ASM_USES_INTRIN
7008 unsigned long ulBit = 0;
7009 if (_BitScanForward(&ulBit, u32))
7010 return ulBit + iBitPrev;
7011# else
7012# if RT_INLINE_ASM_GNU_STYLE
7013 __asm__ __volatile__("bsf %1, %0\n\t"
7014 "jnz 1f\n\t"
7015 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
7016 "1:\n\t"
7017 : "=r" (iBit)
7018 : "r" (u32)
7019 : "cc");
7020# else
7021 __asm
7022 {
7023 mov edx, [u32]
7024 bsf eax, edx
7025 jnz done
7026 mov eax, 0ffffffffh
7027 done:
7028 mov [iBit], eax
7029 }
7030# endif
7031 if (iBit >= 0)
7032 return iBit + (int)iBitPrev;
7033# endif
7034
7035 /*
7036 * Skip ahead and see if there is anything left to search.
7037 */
7038 iBitPrev |= 31;
7039 iBitPrev++;
7040 if (cBits <= (uint32_t)iBitPrev)
7041 return -1;
7042 }
7043
7044 /*
7045 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7046 */
7047 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7048 if (iBit >= 0)
7049 iBit += iBitPrev;
7050 return iBit;
7051}
7052#endif
7053
7054
7055/**
7056 * Finds the first set bit in a bitmap.
7057 *
7058 * @returns Index of the first set bit.
7059 * @returns -1 if no clear bit was found.
7060 * @param pvBitmap Pointer to the bitmap (little endian).
7061 * @param cBits The number of bits in the bitmap. Multiple of 32.
7062 */
7063#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7064DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
7065#else
7066DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
7067{
7068 if (cBits)
7069 {
7070 int32_t iBit;
7071# if RT_INLINE_ASM_GNU_STYLE
7072 RTCCUINTREG uEAX, uECX, uEDI;
7073 cBits = RT_ALIGN_32(cBits, 32);
7074 __asm__ __volatile__("repe; scasl\n\t"
7075 "je 1f\n\t"
7076# ifdef RT_ARCH_AMD64
7077 "lea -4(%%rdi), %%rdi\n\t"
7078 "movl (%%rdi), %%eax\n\t"
7079 "subq %5, %%rdi\n\t"
7080# else
7081 "lea -4(%%edi), %%edi\n\t"
7082 "movl (%%edi), %%eax\n\t"
7083 "subl %5, %%edi\n\t"
7084# endif
7085 "shll $3, %%edi\n\t"
7086 "bsfl %%eax, %%edx\n\t"
7087 "addl %%edi, %%edx\n\t"
7088 "1:\t\n"
7089 : "=d" (iBit)
7090 , "=&c" (uECX)
7091 , "=&D" (uEDI)
7092 , "=&a" (uEAX)
7093 : "0" (0xffffffff)
7094 , "mr" (pvBitmap)
7095 , "1" (cBits >> 5)
7096 , "2" (pvBitmap)
7097 , "3" (0)
7098 : "cc");
7099# else
7100 cBits = RT_ALIGN_32(cBits, 32);
7101 __asm
7102 {
7103# ifdef RT_ARCH_AMD64
7104 mov rdi, [pvBitmap]
7105 mov rbx, rdi
7106# else
7107 mov edi, [pvBitmap]
7108 mov ebx, edi
7109# endif
7110 mov edx, 0ffffffffh
7111 xor eax, eax
7112 mov ecx, [cBits]
7113 shr ecx, 5
7114 repe scasd
7115 je done
7116# ifdef RT_ARCH_AMD64
7117 lea rdi, [rdi - 4]
7118 mov eax, [rdi]
7119 sub rdi, rbx
7120# else
7121 lea edi, [edi - 4]
7122 mov eax, [edi]
7123 sub edi, ebx
7124# endif
7125 shl edi, 3
7126 bsf edx, eax
7127 add edx, edi
7128 done:
7129 mov [iBit], edx
7130 }
7131# endif
7132 return iBit;
7133 }
7134 return -1;
7135}
7136#endif
7137
7138
7139/**
7140 * Finds the next set bit in a bitmap.
7141 *
7142 * @returns Index of the next set bit.
7143 * @returns -1 if no set bit was found.
7144 * @param pvBitmap Pointer to the bitmap (little endian).
7145 * @param cBits The number of bits in the bitmap. Multiple of 32.
7146 * @param iBitPrev The bit returned from the last search.
7147 * The search will start at iBitPrev + 1.
7148 */
7149#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7150DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
7151#else
7152DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
7153{
7154 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
7155 int iBit = ++iBitPrev & 31;
7156 if (iBit)
7157 {
7158 /*
7159 * Inspect the 32-bit word containing the unaligned bit.
7160 */
7161 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
7162
7163# if RT_INLINE_ASM_USES_INTRIN
7164 unsigned long ulBit = 0;
7165 if (_BitScanForward(&ulBit, u32))
7166 return ulBit + iBitPrev;
7167# else
7168# if RT_INLINE_ASM_GNU_STYLE
7169 __asm__ __volatile__("bsf %1, %0\n\t"
7170 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
7171 "movl $-1, %0\n\t"
7172 "1:\n\t"
7173 : "=r" (iBit)
7174 : "r" (u32)
7175 : "cc");
7176# else
7177 __asm
7178 {
7179 mov edx, [u32]
7180 bsf eax, edx
7181 jnz done
7182 mov eax, 0ffffffffh
7183 done:
7184 mov [iBit], eax
7185 }
7186# endif
7187 if (iBit >= 0)
7188 return iBit + (int)iBitPrev;
7189# endif
7190
7191 /*
7192 * Skip ahead and see if there is anything left to search.
7193 */
7194 iBitPrev |= 31;
7195 iBitPrev++;
7196 if (cBits <= (uint32_t)iBitPrev)
7197 return -1;
7198 }
7199
7200 /*
7201 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7202 */
7203 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7204 if (iBit >= 0)
7205 iBit += iBitPrev;
7206 return iBit;
7207}
7208#endif
7209
7210
7211/**
7212 * Finds the first bit which is set in the given 32-bit integer.
7213 * Bits are numbered from 1 (least significant) to 32.
7214 *
7215 * @returns index [1..32] of the first set bit.
7216 * @returns 0 if all bits are cleared.
7217 * @param u32 Integer to search for set bits.
7218 * @remarks Similar to ffs() in BSD.
7219 */
7220#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7221RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7222#else
7223DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
7224{
7225# if RT_INLINE_ASM_USES_INTRIN
7226 unsigned long iBit;
7227 if (_BitScanForward(&iBit, u32))
7228 iBit++;
7229 else
7230 iBit = 0;
7231
7232# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7233# if RT_INLINE_ASM_GNU_STYLE
7234 uint32_t iBit;
7235 __asm__ __volatile__("bsf %1, %0\n\t"
7236 "jnz 1f\n\t"
7237 "xorl %0, %0\n\t"
7238 "jmp 2f\n"
7239 "1:\n\t"
7240 "incl %0\n"
7241 "2:\n\t"
7242 : "=r" (iBit)
7243 : "rm" (u32)
7244 : "cc");
7245# else
7246 uint32_t iBit;
7247 _asm
7248 {
7249 bsf eax, [u32]
7250 jnz found
7251 xor eax, eax
7252 jmp done
7253 found:
7254 inc eax
7255 done:
7256 mov [iBit], eax
7257 }
7258# endif
7259
7260# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7261 /*
7262 * Using the "count leading zeros (clz)" instruction here because there
7263 * is no dedicated instruction to get the first set bit.
7264 * Need to reverse the bits in the value with "rbit" first because
7265 * "clz" starts counting from the most significant bit.
7266 */
7267 uint32_t iBit;
7268 __asm__ __volatile__(
7269# if defined(RT_ARCH_ARM64)
7270 "rbit %w[uVal], %w[uVal]\n\t"
7271 "clz %w[iBit], %w[uVal]\n\t"
7272# else
7273 "rbit %[uVal], %[uVal]\n\t"
7274 "clz %[iBit], %[uVal]\n\t"
7275# endif
7276 : [uVal] "=r" (u32)
7277 , [iBit] "=r" (iBit)
7278 : "[uVal]" (u32));
7279 if (iBit != 32)
7280 iBit++;
7281 else
7282 iBit = 0; /* No bit set. */
7283
7284# else
7285# error "Port me"
7286# endif
7287 return iBit;
7288}
7289#endif
7290
7291
7292/**
7293 * Finds the first bit which is set in the given 32-bit integer.
7294 * Bits are numbered from 1 (least significant) to 32.
7295 *
7296 * @returns index [1..32] of the first set bit.
7297 * @returns 0 if all bits are cleared.
7298 * @param i32 Integer to search for set bits.
7299 * @remark Similar to ffs() in BSD.
7300 */
7301DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
7302{
7303 return ASMBitFirstSetU32((uint32_t)i32);
7304}
7305
7306
7307/**
7308 * Finds the first bit which is set in the given 64-bit integer.
7309 *
7310 * Bits are numbered from 1 (least significant) to 64.
7311 *
7312 * @returns index [1..64] of the first set bit.
7313 * @returns 0 if all bits are cleared.
7314 * @param u64 Integer to search for set bits.
7315 * @remarks Similar to ffs() in BSD.
7316 */
7317#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7318RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7319#else
7320DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
7321{
7322# if RT_INLINE_ASM_USES_INTRIN
7323 unsigned long iBit;
7324# if ARCH_BITS == 64
7325 if (_BitScanForward64(&iBit, u64))
7326 iBit++;
7327 else
7328 iBit = 0;
7329# else
7330 if (_BitScanForward(&iBit, (uint32_t)u64))
7331 iBit++;
7332 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
7333 iBit += 33;
7334 else
7335 iBit = 0;
7336# endif
7337
7338# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7339 uint64_t iBit;
7340 __asm__ __volatile__("bsfq %1, %0\n\t"
7341 "jnz 1f\n\t"
7342 "xorl %k0, %k0\n\t"
7343 "jmp 2f\n"
7344 "1:\n\t"
7345 "incl %k0\n"
7346 "2:\n\t"
7347 : "=r" (iBit)
7348 : "rm" (u64)
7349 : "cc");
7350
7351# elif defined(RT_ARCH_ARM64)
7352 uint64_t iBit;
7353 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
7354 "clz %[iBit], %[uVal]\n\t"
7355 : [uVal] "=r" (u64)
7356 , [iBit] "=r" (iBit)
7357 : "[uVal]" (u64));
7358 if (iBit != 64)
7359 iBit++;
7360 else
7361 iBit = 0; /* No bit set. */
7362
7363# else
7364 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
7365 if (!iBit)
7366 {
7367 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
7368 if (iBit)
7369 iBit += 32;
7370 }
7371# endif
7372 return (unsigned)iBit;
7373}
7374#endif
7375
7376
7377/**
7378 * Finds the first bit which is set in the given 16-bit integer.
7379 *
7380 * Bits are numbered from 1 (least significant) to 16.
7381 *
7382 * @returns index [1..16] of the first set bit.
7383 * @returns 0 if all bits are cleared.
7384 * @param u16 Integer to search for set bits.
7385 * @remarks For 16-bit bs3kit code.
7386 */
7387#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7388RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7389#else
7390DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
7391{
7392 return ASMBitFirstSetU32((uint32_t)u16);
7393}
7394#endif
7395
7396
7397/**
7398 * Finds the last bit which is set in the given 32-bit integer.
7399 * Bits are numbered from 1 (least significant) to 32.
7400 *
7401 * @returns index [1..32] of the last set bit.
7402 * @returns 0 if all bits are cleared.
7403 * @param u32 Integer to search for set bits.
7404 * @remark Similar to fls() in BSD.
7405 */
7406#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7407RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7408#else
7409DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
7410{
7411# if RT_INLINE_ASM_USES_INTRIN
7412 unsigned long iBit;
7413 if (_BitScanReverse(&iBit, u32))
7414 iBit++;
7415 else
7416 iBit = 0;
7417
7418# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7419# if RT_INLINE_ASM_GNU_STYLE
7420 uint32_t iBit;
7421 __asm__ __volatile__("bsrl %1, %0\n\t"
7422 "jnz 1f\n\t"
7423 "xorl %0, %0\n\t"
7424 "jmp 2f\n"
7425 "1:\n\t"
7426 "incl %0\n"
7427 "2:\n\t"
7428 : "=r" (iBit)
7429 : "rm" (u32)
7430 : "cc");
7431# else
7432 uint32_t iBit;
7433 _asm
7434 {
7435 bsr eax, [u32]
7436 jnz found
7437 xor eax, eax
7438 jmp done
7439 found:
7440 inc eax
7441 done:
7442 mov [iBit], eax
7443 }
7444# endif
7445
7446# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7447 uint32_t iBit;
7448 __asm__ __volatile__(
7449# if defined(RT_ARCH_ARM64)
7450 "clz %w[iBit], %w[uVal]\n\t"
7451# else
7452 "clz %[iBit], %[uVal]\n\t"
7453# endif
7454 : [iBit] "=r" (iBit)
7455 : [uVal] "r" (u32));
7456 iBit = 32 - iBit;
7457
7458# else
7459# error "Port me"
7460# endif
7461 return iBit;
7462}
7463#endif
7464
7465
7466/**
7467 * Finds the last bit which is set in the given 32-bit integer.
7468 * Bits are numbered from 1 (least significant) to 32.
7469 *
7470 * @returns index [1..32] of the last set bit.
7471 * @returns 0 if all bits are cleared.
7472 * @param i32 Integer to search for set bits.
7473 * @remark Similar to fls() in BSD.
7474 */
7475DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
7476{
7477 return ASMBitLastSetU32((uint32_t)i32);
7478}
7479
7480
7481/**
7482 * Finds the last bit which is set in the given 64-bit integer.
7483 *
7484 * Bits are numbered from 1 (least significant) to 64.
7485 *
7486 * @returns index [1..64] of the last set bit.
7487 * @returns 0 if all bits are cleared.
7488 * @param u64 Integer to search for set bits.
7489 * @remark Similar to fls() in BSD.
7490 */
7491#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7492RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7493#else
7494DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
7495{
7496# if RT_INLINE_ASM_USES_INTRIN
7497 unsigned long iBit;
7498# if ARCH_BITS == 64
7499 if (_BitScanReverse64(&iBit, u64))
7500 iBit++;
7501 else
7502 iBit = 0;
7503# else
7504 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7505 iBit += 33;
7506 else if (_BitScanReverse(&iBit, (uint32_t)u64))
7507 iBit++;
7508 else
7509 iBit = 0;
7510# endif
7511
7512# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7513 uint64_t iBit;
7514 __asm__ __volatile__("bsrq %1, %0\n\t"
7515 "jnz 1f\n\t"
7516 "xorl %k0, %k0\n\t"
7517 "jmp 2f\n"
7518 "1:\n\t"
7519 "incl %k0\n"
7520 "2:\n\t"
7521 : "=r" (iBit)
7522 : "rm" (u64)
7523 : "cc");
7524
7525# elif defined(RT_ARCH_ARM64)
7526 uint64_t iBit;
7527 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7528 : [iBit] "=r" (iBit)
7529 : [uVal] "r" (u64));
7530 iBit = 64 - iBit;
7531
7532# else
7533 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
7534 if (iBit)
7535 iBit += 32;
7536 else
7537 iBit = ASMBitLastSetU32((uint32_t)u64);
7538# endif
7539 return (unsigned)iBit;
7540}
7541#endif
7542
7543
7544/**
7545 * Finds the last bit which is set in the given 16-bit integer.
7546 *
7547 * Bits are numbered from 1 (least significant) to 16.
7548 *
7549 * @returns index [1..16] of the last set bit.
7550 * @returns 0 if all bits are cleared.
7551 * @param u16 Integer to search for set bits.
7552 * @remarks For 16-bit bs3kit code.
7553 */
7554#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7555RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7556#else
7557DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
7558{
7559 return ASMBitLastSetU32((uint32_t)u16);
7560}
7561#endif
7562
7563
7564/**
7565 * Rotate 32-bit unsigned value to the left by @a cShift.
7566 *
7567 * @returns Rotated value.
7568 * @param u32 The value to rotate.
7569 * @param cShift How many bits to rotate by.
7570 */
7571#ifdef __WATCOMC__
7572RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7573#else
7574DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7575{
7576# if RT_INLINE_ASM_USES_INTRIN
7577 return _rotl(u32, cShift);
7578
7579# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7580 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7581 return u32;
7582
7583# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7584 __asm__ __volatile__(
7585# if defined(RT_ARCH_ARM64)
7586 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7587# else
7588 "ror %[uRet], %[uVal], %[cShift]\n\t"
7589# endif
7590 : [uRet] "=r" (u32)
7591 : [uVal] "[uRet]" (u32)
7592 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */
7593 return u32;
7594
7595# else
7596 cShift &= 31;
7597 return (u32 << cShift) | (u32 >> (32 - cShift));
7598# endif
7599}
7600#endif
7601
7602
7603/**
7604 * Rotate 32-bit unsigned value to the right by @a cShift.
7605 *
7606 * @returns Rotated value.
7607 * @param u32 The value to rotate.
7608 * @param cShift How many bits to rotate by.
7609 */
7610#ifdef __WATCOMC__
7611RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7612#else
7613DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7614{
7615# if RT_INLINE_ASM_USES_INTRIN
7616 return _rotr(u32, cShift);
7617
7618# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7619 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7620 return u32;
7621
7622# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7623 __asm__ __volatile__(
7624# if defined(RT_ARCH_ARM64)
7625 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7626# else
7627 "ror %[uRet], %[uVal], %[cShift]\n\t"
7628# endif
7629 : [uRet] "=r" (u32)
7630 : [uVal] "[uRet]" (u32)
7631 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */
7632 return u32;
7633
7634# else
7635 cShift &= 31;
7636 return (u32 >> cShift) | (u32 << (32 - cShift));
7637# endif
7638}
7639#endif
7640
7641
7642/**
7643 * Rotate 64-bit unsigned value to the left by @a cShift.
7644 *
7645 * @returns Rotated value.
7646 * @param u64 The value to rotate.
7647 * @param cShift How many bits to rotate by.
7648 */
7649DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7650{
7651#if RT_INLINE_ASM_USES_INTRIN
7652 return _rotl64(u64, cShift);
7653
7654#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7655 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7656 return u64;
7657
7658#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7659 uint32_t uSpill;
7660 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7661 "jz 1f\n\t"
7662 "xchgl %%eax, %%edx\n\t"
7663 "1:\n\t"
7664 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7665 "jz 2f\n\t"
7666 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7667 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
7668 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
7669 "2:\n\t" /* } */
7670 : "=A" (u64)
7671 , "=c" (cShift)
7672 , "=r" (uSpill)
7673 : "0" (u64)
7674 , "1" (cShift)
7675 : "cc");
7676 return u64;
7677
7678# elif defined(RT_ARCH_ARM64)
7679 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7680 : [uRet] "=r" (u64)
7681 : [uVal] "[uRet]" (u64)
7682 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */
7683 return u64;
7684
7685#else
7686 cShift &= 63;
7687 return (u64 << cShift) | (u64 >> (64 - cShift));
7688#endif
7689}
7690
7691
7692/**
7693 * Rotate 64-bit unsigned value to the right by @a cShift.
7694 *
7695 * @returns Rotated value.
7696 * @param u64 The value to rotate.
7697 * @param cShift How many bits to rotate by.
7698 */
7699DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7700{
7701#if RT_INLINE_ASM_USES_INTRIN
7702 return _rotr64(u64, cShift);
7703
7704#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7705 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7706 return u64;
7707
7708#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7709 uint32_t uSpill;
7710 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7711 "jz 1f\n\t"
7712 "xchgl %%eax, %%edx\n\t"
7713 "1:\n\t"
7714 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7715 "jz 2f\n\t"
7716 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7717 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
7718 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
7719 "2:\n\t" /* } */
7720 : "=A" (u64)
7721 , "=c" (cShift)
7722 , "=r" (uSpill)
7723 : "0" (u64)
7724 , "1" (cShift)
7725 : "cc");
7726 return u64;
7727
7728# elif defined(RT_ARCH_ARM64)
7729 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7730 : [uRet] "=r" (u64)
7731 : [uVal] "[uRet]" (u64)
7732 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */
7733 return u64;
7734
7735#else
7736 cShift &= 63;
7737 return (u64 >> cShift) | (u64 << (64 - cShift));
7738#endif
7739}
7740
7741/** @} */
7742
7743
7744/** @} */
7745
7746/*
7747 * Include #pragma aux definitions for Watcom C/C++.
7748 */
7749#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
7750# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
7751# undef IPRT_INCLUDED_asm_watcom_x86_16_h
7752# include "asm-watcom-x86-16.h"
7753#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
7754# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
7755# undef IPRT_INCLUDED_asm_watcom_x86_32_h
7756# include "asm-watcom-x86-32.h"
7757#endif
7758
7759#endif /* !IPRT_INCLUDED_asm_h */
7760
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette