VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 93759

Last change on this file since 93759 was 93759, checked in by vboxsync, 3 years ago

IPRT/asm.h,tstRTInlineAsm: Added 8-bit and 16-bit extended cmpxchg functions (needed for IEM). [build fix attempt] bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 242.8 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2022 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46/* Emit the intrinsics at all optimization levels. */
47# include <iprt/sanitized/intrin.h>
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(_BitScanForward)
54# pragma intrinsic(_BitScanReverse)
55# pragma intrinsic(_bittest)
56# pragma intrinsic(_bittestandset)
57# pragma intrinsic(_bittestandreset)
58# pragma intrinsic(_bittestandcomplement)
59# pragma intrinsic(_byteswap_ushort)
60# pragma intrinsic(_byteswap_ulong)
61# pragma intrinsic(_interlockedbittestandset)
62# pragma intrinsic(_interlockedbittestandreset)
63# pragma intrinsic(_InterlockedAnd)
64# pragma intrinsic(_InterlockedOr)
65# pragma intrinsic(_InterlockedXor)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange8)
72# pragma intrinsic(_InterlockedCompareExchange16)
73# pragma intrinsic(_InterlockedCompareExchange64)
74# pragma intrinsic(_rotl)
75# pragma intrinsic(_rotr)
76# pragma intrinsic(_rotl64)
77# pragma intrinsic(_rotr64)
78# ifdef RT_ARCH_AMD64
79# pragma intrinsic(__stosq)
80# pragma intrinsic(_byteswap_uint64)
81# pragma intrinsic(_InterlockedCompareExchange128)
82# pragma intrinsic(_InterlockedExchange64)
83# pragma intrinsic(_InterlockedExchangeAdd64)
84# pragma intrinsic(_InterlockedAnd64)
85# pragma intrinsic(_InterlockedOr64)
86# pragma intrinsic(_InterlockedIncrement64)
87# pragma intrinsic(_InterlockedDecrement64)
88# endif
89#endif
90
91/*
92 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
93 */
94#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
95# include "asm-watcom-x86-16.h"
96#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
97# include "asm-watcom-x86-32.h"
98#endif
99
100
101/** @defgroup grp_rt_asm ASM - Assembly Routines
102 * @ingroup grp_rt
103 *
104 * @remarks The difference between ordered and unordered atomic operations are
105 * that the former will complete outstanding reads and writes before
106 * continuing while the latter doesn't make any promises about the
107 * order. Ordered operations doesn't, it seems, make any 100% promise
108 * wrt to whether the operation will complete before any subsequent
109 * memory access. (please, correct if wrong.)
110 *
111 * ASMAtomicSomething operations are all ordered, while
112 * ASMAtomicUoSomething are unordered (note the Uo).
113 *
114 * Please note that ordered operations does not necessarily imply a
115 * compiler (memory) barrier. The user has to use the
116 * ASMCompilerBarrier() macro when that is deemed necessary.
117 *
118 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
119 * to reorder or even optimize assembler instructions away. For
120 * instance, in the following code the second rdmsr instruction is
121 * optimized away because gcc treats that instruction as deterministic:
122 *
123 * @code
124 * static inline uint64_t rdmsr_low(int idx)
125 * {
126 * uint32_t low;
127 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
128 * }
129 * ...
130 * uint32_t msr1 = rdmsr_low(1);
131 * foo(msr1);
132 * msr1 = rdmsr_low(1);
133 * bar(msr1);
134 * @endcode
135 *
136 * The input parameter of rdmsr_low is the same for both calls and
137 * therefore gcc will use the result of the first call as input
138 * parameter for bar() as well. For rdmsr this is not acceptable as
139 * this instruction is _not_ deterministic. This applies to reading
140 * machine status information in general.
141 *
142 * @{
143 */
144
145
146/** @def RT_INLINE_ASM_GCC_4_3_X_X86
147 * Used to work around some 4.3.x register allocation issues in this version of
148 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
149 * definitely not for 5.x */
150#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
151# define RT_INLINE_ASM_GCC_4_3_X_X86 1
152#else
153# define RT_INLINE_ASM_GCC_4_3_X_X86 0
154#endif
155
156/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
157 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
158 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
159 * mode, x86.
160 *
161 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
162 * when in PIC mode on x86.
163 */
164#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
165# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
167# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
168# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
169# elif ( (defined(PIC) || defined(__PIC__)) \
170 && defined(RT_ARCH_X86) \
171 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
172 || defined(RT_OS_DARWIN)) )
173# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
174# else
175# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
176# endif
177#endif
178
179
180/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
181 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
182#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
183# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
184#else
185# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
186#endif
187
188/*
189 * ARM is great fun.
190 */
191#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
192
193# define RTASM_ARM_NO_BARRIER
194# ifdef RT_ARCH_ARM64
195# define RTASM_ARM_NO_BARRIER_IN_REG
196# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
197# define RTASM_ARM_DSB_SY "dsb sy\n\t"
198# define RTASM_ARM_DSB_SY_IN_REG
199# define RTASM_ARM_DSB_SY_COMMA_IN_REG
200# define RTASM_ARM_DMB_SY "dmb sy\n\t"
201# define RTASM_ARM_DMB_SY_IN_REG
202# define RTASM_ARM_DMB_SY_COMMA_IN_REG
203# define RTASM_ARM_DMB_ST "dmb st\n\t"
204# define RTASM_ARM_DMB_ST_IN_REG
205# define RTASM_ARM_DMB_ST_COMMA_IN_REG
206# define RTASM_ARM_DMB_LD "dmb ld\n\t"
207# define RTASM_ARM_DMB_LD_IN_REG
208# define RTASM_ARM_DMB_LD_COMMA_IN_REG
209# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
210# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
211 uint32_t rcSpill; \
212 uint32_t u32NewRet; \
213 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
214 RTASM_ARM_##barrier_type /* before lable? */ \
215 "ldaxr %w[uNew], %[pMem]\n\t" \
216 modify64 \
217 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
218 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
219 : [pMem] "+m" (*a_pu32Mem) \
220 , [uNew] "=&r" (u32NewRet) \
221 , [rc] "=&r" (rcSpill) \
222 : in_reg \
223 : "cc")
224# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
225 uint32_t rcSpill; \
226 uint32_t u32OldRet; \
227 uint32_t u32NewSpill; \
228 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
229 RTASM_ARM_##barrier_type /* before lable? */ \
230 "ldaxr %w[uOld], %[pMem]\n\t" \
231 modify64 \
232 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
233 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
234 : [pMem] "+m" (*a_pu32Mem) \
235 , [uOld] "=&r" (u32OldRet) \
236 , [uNew] "=&r" (u32NewSpill) \
237 , [rc] "=&r" (rcSpill) \
238 : in_reg \
239 : "cc")
240# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
241 uint32_t rcSpill; \
242 uint64_t u64NewRet; \
243 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
244 RTASM_ARM_##barrier_type /* before lable? */ \
245 "ldaxr %[uNew], %[pMem]\n\t" \
246 modify64 \
247 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
248 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
249 : [pMem] "+m" (*a_pu64Mem) \
250 , [uNew] "=&r" (u64NewRet) \
251 , [rc] "=&r" (rcSpill) \
252 : in_reg \
253 : "cc")
254# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
255 uint32_t rcSpill; \
256 uint64_t u64OldRet; \
257 uint64_t u64NewSpill; \
258 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
259 RTASM_ARM_##barrier_type /* before lable? */ \
260 "ldaxr %[uOld], %[pMem]\n\t" \
261 modify64 \
262 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
263 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
264 : [pMem] "+m" (*a_pu64Mem) \
265 , [uOld] "=&r" (u64OldRet) \
266 , [uNew] "=&r" (u64NewSpill) \
267 , [rc] "=&r" (rcSpill) \
268 : in_reg \
269 : "cc")
270
271# else /* RT_ARCH_ARM32 */
272# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
273# if RT_ARCH_ARM32 >= 7
274# warning armv7
275# define RTASM_ARM_NO_BARRIER_IN_REG
276# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
277# define RTASM_ARM_DSB_SY "dsb sy\n\t"
278# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
279# define RTASM_ARM_DMB_SY "dmb sy\n\t"
280# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
281# define RTASM_ARM_DMB_ST "dmb st\n\t"
282# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
283# define RTASM_ARM_DMB_LD "dmb ld\n\t"
284# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
285
286# elif RT_ARCH_ARM32 >= 6
287# warning armv6
288# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
289# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
290# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
291# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
292# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
293# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
294# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
295# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
296# elif RT_ARCH_ARM32 >= 4
297# warning armv5 or older
298# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
299# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
300# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
301# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
302# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
303# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
304# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
305# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
306# else
307# error "huh? Odd RT_ARCH_ARM32 value!"
308# endif
309# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
310# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
311# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
312# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
313# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
314 uint32_t rcSpill; \
315 uint32_t u32NewRet; \
316 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
317 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
318 "ldrex %[uNew], %[pMem]\n\t" \
319 modify32 \
320 "strex %[rc], %[uNew], %[pMem]\n\t" \
321 "cmp %[rc], #0\n\t" \
322 "bne .Ltry_again_" #name "_%=\n\t" \
323 : [pMem] "+m" (*a_pu32Mem) \
324 , [uNew] "=&r" (u32NewRet) \
325 , [rc] "=&r" (rcSpill) \
326 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
327 , in_reg \
328 : "cc")
329# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
330 uint32_t rcSpill; \
331 uint32_t u32OldRet; \
332 uint32_t u32NewSpill; \
333 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
334 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
335 "ldrex %[uOld], %[pMem]\n\t" \
336 modify32 \
337 "strex %[rc], %[uNew], %[pMem]\n\t" \
338 "cmp %[rc], #0\n\t" \
339 "bne .Ltry_again_" #name "_%=\n\t" \
340 : [pMem] "+m" (*a_pu32Mem) \
341 , [uOld] "=&r" (u32OldRet) \
342 , [uNew] "=&r" (u32NewSpill) \
343 , [rc] "=&r" (rcSpill) \
344 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
345 , in_reg \
346 : "cc")
347# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
348 uint32_t rcSpill; \
349 uint64_t u64NewRet; \
350 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
351 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
352 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
353 modify32 \
354 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
355 "cmp %[rc], #0\n\t" \
356 "bne .Ltry_again_" #name "_%=\n\t" \
357 : [pMem] "+m" (*a_pu64Mem), \
358 [uNew] "=&r" (u64NewRet), \
359 [rc] "=&r" (rcSpill) \
360 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
361 , in_reg \
362 : "cc")
363# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
364 uint32_t rcSpill; \
365 uint64_t u64OldRet; \
366 uint64_t u64NewSpill; \
367 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
368 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
369 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
370 modify32 \
371 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
372 "cmp %[rc], #0\n\t" \
373 "bne .Ltry_again_" #name "_%=\n\t" \
374 : [pMem] "+m" (*a_pu64Mem), \
375 [uOld] "=&r" (u64OldRet), \
376 [uNew] "=&r" (u64NewSpill), \
377 [rc] "=&r" (rcSpill) \
378 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
379 , in_reg \
380 : "cc")
381# endif /* RT_ARCH_ARM32 */
382#endif
383
384
385/** @def ASMReturnAddress
386 * Gets the return address of the current (or calling if you like) function or method.
387 */
388#ifdef _MSC_VER
389# ifdef __cplusplus
390extern "C"
391# endif
392void * _ReturnAddress(void);
393# pragma intrinsic(_ReturnAddress)
394# define ASMReturnAddress() _ReturnAddress()
395#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
396# define ASMReturnAddress() __builtin_return_address(0)
397#elif defined(__WATCOMC__)
398# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
399#else
400# error "Unsupported compiler."
401#endif
402
403
404/**
405 * Compiler memory barrier.
406 *
407 * Ensure that the compiler does not use any cached (register/tmp stack) memory
408 * values or any outstanding writes when returning from this function.
409 *
410 * This function must be used if non-volatile data is modified by a
411 * device or the VMM. Typical cases are port access, MMIO access,
412 * trapping instruction, etc.
413 */
414#if RT_INLINE_ASM_GNU_STYLE
415# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
416#elif RT_INLINE_ASM_USES_INTRIN
417# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
418#elif defined(__WATCOMC__)
419void ASMCompilerBarrier(void);
420#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
421DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
422{
423 __asm
424 {
425 }
426}
427#endif
428
429
430/** @def ASMBreakpoint
431 * Debugger Breakpoint.
432 * @deprecated Use RT_BREAKPOINT instead.
433 * @internal
434 */
435#define ASMBreakpoint() RT_BREAKPOINT()
436
437
438/**
439 * Spinloop hint for platforms that have these, empty function on the other
440 * platforms.
441 *
442 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
443 * spin locks.
444 */
445#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
446RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
447#else
448DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
449{
450# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
451# if RT_INLINE_ASM_GNU_STYLE
452 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
453# else
454 __asm {
455 _emit 0f3h
456 _emit 090h
457 }
458# endif
459
460# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
461 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
462
463# else
464 /* dummy */
465# endif
466}
467#endif
468
469
470/**
471 * Atomically Exchange an unsigned 8-bit value, ordered.
472 *
473 * @returns Current *pu8 value
474 * @param pu8 Pointer to the 8-bit variable to update.
475 * @param u8 The 8-bit value to assign to *pu8.
476 */
477#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
478RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
479#else
480DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
481{
482# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
483# if RT_INLINE_ASM_GNU_STYLE
484 __asm__ __volatile__("xchgb %0, %1\n\t"
485 : "=m" (*pu8)
486 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
487 : "1" (u8)
488 , "m" (*pu8));
489# else
490 __asm
491 {
492# ifdef RT_ARCH_AMD64
493 mov rdx, [pu8]
494 mov al, [u8]
495 xchg [rdx], al
496 mov [u8], al
497# else
498 mov edx, [pu8]
499 mov al, [u8]
500 xchg [edx], al
501 mov [u8], al
502# endif
503 }
504# endif
505 return u8;
506
507# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
508 uint32_t uOld;
509 uint32_t rcSpill;
510 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU8_%=:\n\t"
511 RTASM_ARM_DMB_SY
512# if defined(RT_ARCH_ARM64)
513 "ldaxrb %w[uOld], %[pMem]\n\t"
514 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
515 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU8_%=\n\t"
516# else
517 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
518 "strexb %[rc], %[uNew], %[pMem]\n\t"
519 "cmp %[rc], #0\n\t"
520 "bne .Ltry_again_ASMAtomicXchgU8_%=\n\t"
521# endif
522 : [pMem] "+m" (*pu8)
523 , [uOld] "=&r" (uOld)
524 , [rc] "=&r" (rcSpill)
525 : [uNew] "r" ((uint32_t)u8)
526 RTASM_ARM_DMB_SY_COMMA_IN_REG
527 : "cc");
528 return (uint8_t)uOld;
529
530# else
531# error "Port me"
532# endif
533}
534#endif
535
536
537/**
538 * Atomically Exchange a signed 8-bit value, ordered.
539 *
540 * @returns Current *pu8 value
541 * @param pi8 Pointer to the 8-bit variable to update.
542 * @param i8 The 8-bit value to assign to *pi8.
543 */
544DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
545{
546 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
547}
548
549
550/**
551 * Atomically Exchange a bool value, ordered.
552 *
553 * @returns Current *pf value
554 * @param pf Pointer to the 8-bit variable to update.
555 * @param f The 8-bit value to assign to *pi8.
556 */
557DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
558{
559#ifdef _MSC_VER
560 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
561#else
562 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
563#endif
564}
565
566
567/**
568 * Atomically Exchange an unsigned 16-bit value, ordered.
569 *
570 * @returns Current *pu16 value
571 * @param pu16 Pointer to the 16-bit variable to update.
572 * @param u16 The 16-bit value to assign to *pu16.
573 */
574#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
575RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
576#else
577DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
578{
579# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
580# if RT_INLINE_ASM_GNU_STYLE
581 __asm__ __volatile__("xchgw %0, %1\n\t"
582 : "=m" (*pu16)
583 , "=r" (u16)
584 : "1" (u16)
585 , "m" (*pu16));
586# else
587 __asm
588 {
589# ifdef RT_ARCH_AMD64
590 mov rdx, [pu16]
591 mov ax, [u16]
592 xchg [rdx], ax
593 mov [u16], ax
594# else
595 mov edx, [pu16]
596 mov ax, [u16]
597 xchg [edx], ax
598 mov [u16], ax
599# endif
600 }
601# endif
602 return u16;
603
604# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
605 uint32_t uOld;
606 uint32_t rcSpill;
607 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU16_%=:\n\t"
608 RTASM_ARM_DMB_SY
609# if defined(RT_ARCH_ARM64)
610 "ldaxrh %w[uOld], %[pMem]\n\t"
611 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
612 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU16_%=\n\t"
613# else
614 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
615 "strexh %[rc], %[uNew], %[pMem]\n\t"
616 "cmp %[rc], #0\n\t"
617 "bne .Ltry_again_ASMAtomicXchgU16_%=\n\t"
618# endif
619 : [pMem] "+m" (*pu16)
620 , [uOld] "=&r" (uOld)
621 , [rc] "=&r" (rcSpill)
622 : [uNew] "r" ((uint32_t)u16)
623 RTASM_ARM_DMB_SY_COMMA_IN_REG
624 : "cc");
625 return (uint16_t)uOld;
626
627# else
628# error "Port me"
629# endif
630}
631#endif
632
633
634/**
635 * Atomically Exchange a signed 16-bit value, ordered.
636 *
637 * @returns Current *pu16 value
638 * @param pi16 Pointer to the 16-bit variable to update.
639 * @param i16 The 16-bit value to assign to *pi16.
640 */
641DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
642{
643 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
644}
645
646
647/**
648 * Atomically Exchange an unsigned 32-bit value, ordered.
649 *
650 * @returns Current *pu32 value
651 * @param pu32 Pointer to the 32-bit variable to update.
652 * @param u32 The 32-bit value to assign to *pu32.
653 *
654 * @remarks Does not work on 286 and earlier.
655 */
656#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
657RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
658#else
659DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
660{
661# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
662# if RT_INLINE_ASM_GNU_STYLE
663 __asm__ __volatile__("xchgl %0, %1\n\t"
664 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
665 , "=r" (u32)
666 : "1" (u32)
667 , "m" (*pu32));
668
669# elif RT_INLINE_ASM_USES_INTRIN
670 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
671
672# else
673 __asm
674 {
675# ifdef RT_ARCH_AMD64
676 mov rdx, [pu32]
677 mov eax, u32
678 xchg [rdx], eax
679 mov [u32], eax
680# else
681 mov edx, [pu32]
682 mov eax, u32
683 xchg [edx], eax
684 mov [u32], eax
685# endif
686 }
687# endif
688 return u32;
689
690# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
691 uint32_t uOld;
692 uint32_t rcSpill;
693 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU32_%=:\n\t"
694 RTASM_ARM_DMB_SY
695# if defined(RT_ARCH_ARM64)
696 "ldaxr %w[uOld], %[pMem]\n\t"
697 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
698 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU32_%=\n\t"
699# else
700 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
701 "strex %[rc], %[uNew], %[pMem]\n\t"
702 "cmp %[rc], #0\n\t"
703 "bne .Ltry_again_ASMAtomicXchgU32_%=\n\t"
704# endif
705 : [pMem] "+m" (*pu32)
706 , [uOld] "=&r" (uOld)
707 , [rc] "=&r" (rcSpill)
708 : [uNew] "r" (u32)
709 RTASM_ARM_DMB_SY_COMMA_IN_REG
710 : "cc");
711 return uOld;
712
713# else
714# error "Port me"
715# endif
716}
717#endif
718
719
720/**
721 * Atomically Exchange a signed 32-bit value, ordered.
722 *
723 * @returns Current *pu32 value
724 * @param pi32 Pointer to the 32-bit variable to update.
725 * @param i32 The 32-bit value to assign to *pi32.
726 */
727DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
728{
729 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
730}
731
732
733/**
734 * Atomically Exchange an unsigned 64-bit value, ordered.
735 *
736 * @returns Current *pu64 value
737 * @param pu64 Pointer to the 64-bit variable to update.
738 * @param u64 The 64-bit value to assign to *pu64.
739 *
740 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
741 */
742#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
743 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
744RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
745#else
746DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
747{
748# if defined(RT_ARCH_AMD64)
749# if RT_INLINE_ASM_USES_INTRIN
750 return _InterlockedExchange64((__int64 *)pu64, u64);
751
752# elif RT_INLINE_ASM_GNU_STYLE
753 __asm__ __volatile__("xchgq %0, %1\n\t"
754 : "=m" (*pu64)
755 , "=r" (u64)
756 : "1" (u64)
757 , "m" (*pu64));
758 return u64;
759# else
760 __asm
761 {
762 mov rdx, [pu64]
763 mov rax, [u64]
764 xchg [rdx], rax
765 mov [u64], rax
766 }
767 return u64;
768# endif
769
770# elif defined(RT_ARCH_X86)
771# if RT_INLINE_ASM_GNU_STYLE
772# if defined(PIC) || defined(__PIC__)
773 uint32_t u32EBX = (uint32_t)u64;
774 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
775 "xchgl %%ebx, %3\n\t"
776 "1:\n\t"
777 "lock; cmpxchg8b (%5)\n\t"
778 "jnz 1b\n\t"
779 "movl %3, %%ebx\n\t"
780 /*"xchgl %%esi, %5\n\t"*/
781 : "=A" (u64)
782 , "=m" (*pu64)
783 : "0" (*pu64)
784 , "m" ( u32EBX )
785 , "c" ( (uint32_t)(u64 >> 32) )
786 , "S" (pu64)
787 : "cc");
788# else /* !PIC */
789 __asm__ __volatile__("1:\n\t"
790 "lock; cmpxchg8b %1\n\t"
791 "jnz 1b\n\t"
792 : "=A" (u64)
793 , "=m" (*pu64)
794 : "0" (*pu64)
795 , "b" ( (uint32_t)u64 )
796 , "c" ( (uint32_t)(u64 >> 32) )
797 : "cc");
798# endif
799# else
800 __asm
801 {
802 mov ebx, dword ptr [u64]
803 mov ecx, dword ptr [u64 + 4]
804 mov edi, pu64
805 mov eax, dword ptr [edi]
806 mov edx, dword ptr [edi + 4]
807 retry:
808 lock cmpxchg8b [edi]
809 jnz retry
810 mov dword ptr [u64], eax
811 mov dword ptr [u64 + 4], edx
812 }
813# endif
814 return u64;
815
816# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
817 uint32_t rcSpill;
818 uint64_t uOld;
819 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU64_%=:\n\t"
820 RTASM_ARM_DMB_SY
821# if defined(RT_ARCH_ARM64)
822 "ldaxr %[uOld], %[pMem]\n\t"
823 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
824 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU64_%=\n\t"
825# else
826 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
827 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
828 "cmp %[rc], #0\n\t"
829 "bne .Ltry_again_ASMAtomicXchgU64_%=\n\t"
830# endif
831 : [pMem] "+m" (*pu64)
832 , [uOld] "=&r" (uOld)
833 , [rc] "=&r" (rcSpill)
834 : [uNew] "r" (u64)
835 RTASM_ARM_DMB_SY_COMMA_IN_REG
836 : "cc");
837 return uOld;
838
839# else
840# error "Port me"
841# endif
842}
843#endif
844
845
846/**
847 * Atomically Exchange an signed 64-bit value, ordered.
848 *
849 * @returns Current *pi64 value
850 * @param pi64 Pointer to the 64-bit variable to update.
851 * @param i64 The 64-bit value to assign to *pi64.
852 */
853DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
854{
855 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
856}
857
858
859/**
860 * Atomically Exchange a size_t value, ordered.
861 *
862 * @returns Current *ppv value
863 * @param puDst Pointer to the size_t variable to update.
864 * @param uNew The new value to assign to *puDst.
865 */
866DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
867{
868#if ARCH_BITS == 16
869 AssertCompile(sizeof(size_t) == 2);
870 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
871#elif ARCH_BITS == 32
872 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
873#elif ARCH_BITS == 64
874 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
875#else
876# error "ARCH_BITS is bogus"
877#endif
878}
879
880
881/**
882 * Atomically Exchange a pointer value, ordered.
883 *
884 * @returns Current *ppv value
885 * @param ppv Pointer to the pointer variable to update.
886 * @param pv The pointer value to assign to *ppv.
887 */
888DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
889{
890#if ARCH_BITS == 32 || ARCH_BITS == 16
891 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
892#elif ARCH_BITS == 64
893 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
894#else
895# error "ARCH_BITS is bogus"
896#endif
897}
898
899
900/**
901 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
902 *
903 * @returns Current *pv value
904 * @param ppv Pointer to the pointer variable to update.
905 * @param pv The pointer value to assign to *ppv.
906 * @param Type The type of *ppv, sans volatile.
907 */
908#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
909# define ASMAtomicXchgPtrT(ppv, pv, Type) \
910 __extension__ \
911 ({\
912 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
913 Type const pvTypeChecked = (pv); \
914 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
915 pvTypeCheckedRet; \
916 })
917#else
918# define ASMAtomicXchgPtrT(ppv, pv, Type) \
919 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
920#endif
921
922
923/**
924 * Atomically Exchange a raw-mode context pointer value, ordered.
925 *
926 * @returns Current *ppv value
927 * @param ppvRC Pointer to the pointer variable to update.
928 * @param pvRC The pointer value to assign to *ppv.
929 */
930DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
931{
932 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
933}
934
935
936/**
937 * Atomically Exchange a ring-0 pointer value, ordered.
938 *
939 * @returns Current *ppv value
940 * @param ppvR0 Pointer to the pointer variable to update.
941 * @param pvR0 The pointer value to assign to *ppv.
942 */
943DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
944{
945#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
946 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
947#elif R0_ARCH_BITS == 64
948 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
949#else
950# error "R0_ARCH_BITS is bogus"
951#endif
952}
953
954
955/**
956 * Atomically Exchange a ring-3 pointer value, ordered.
957 *
958 * @returns Current *ppv value
959 * @param ppvR3 Pointer to the pointer variable to update.
960 * @param pvR3 The pointer value to assign to *ppv.
961 */
962DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
963{
964#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
965 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
966#elif R3_ARCH_BITS == 64
967 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
968#else
969# error "R3_ARCH_BITS is bogus"
970#endif
971}
972
973
974/** @def ASMAtomicXchgHandle
975 * Atomically Exchange a typical IPRT handle value, ordered.
976 *
977 * @param ph Pointer to the value to update.
978 * @param hNew The new value to assigned to *pu.
979 * @param phRes Where to store the current *ph value.
980 *
981 * @remarks This doesn't currently work for all handles (like RTFILE).
982 */
983#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
984# define ASMAtomicXchgHandle(ph, hNew, phRes) \
985 do { \
986 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
987 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
988 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
989 } while (0)
990#elif HC_ARCH_BITS == 64
991# define ASMAtomicXchgHandle(ph, hNew, phRes) \
992 do { \
993 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
994 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
995 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
996 } while (0)
997#else
998# error HC_ARCH_BITS
999#endif
1000
1001
1002/**
1003 * Atomically Exchange a value which size might differ
1004 * between platforms or compilers, ordered.
1005 *
1006 * @param pu Pointer to the variable to update.
1007 * @param uNew The value to assign to *pu.
1008 * @todo This is busted as its missing the result argument.
1009 */
1010#define ASMAtomicXchgSize(pu, uNew) \
1011 do { \
1012 switch (sizeof(*(pu))) { \
1013 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1014 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1015 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1016 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1017 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1018 } \
1019 } while (0)
1020
1021/**
1022 * Atomically Exchange a value which size might differ
1023 * between platforms or compilers, ordered.
1024 *
1025 * @param pu Pointer to the variable to update.
1026 * @param uNew The value to assign to *pu.
1027 * @param puRes Where to store the current *pu value.
1028 */
1029#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1030 do { \
1031 switch (sizeof(*(pu))) { \
1032 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1033 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1034 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1035 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1036 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1037 } \
1038 } while (0)
1039
1040
1041
1042/**
1043 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1044 *
1045 * @returns true if xchg was done.
1046 * @returns false if xchg wasn't done.
1047 *
1048 * @param pu8 Pointer to the value to update.
1049 * @param u8New The new value to assigned to *pu8.
1050 * @param u8Old The old value to *pu8 compare with.
1051 *
1052 * @remarks x86: Requires a 486 or later.
1053 * @todo Rename ASMAtomicCmpWriteU8
1054 */
1055#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1056RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1057#else
1058DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1059{
1060# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1061 uint8_t u8Ret;
1062 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1063 "setz %1\n\t"
1064 : "=m" (*pu8)
1065 , "=qm" (u8Ret)
1066 , "=a" (u8Old)
1067 : "q" (u8New)
1068 , "2" (u8Old)
1069 , "m" (*pu8)
1070 : "cc");
1071 return (bool)u8Ret;
1072
1073# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1074 union { uint32_t u; bool f; } fXchg;
1075 uint32_t u32Spill;
1076 uint32_t rcSpill;
1077 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1078 RTASM_ARM_DMB_SY
1079# if defined(RT_ARCH_ARM64)
1080 "ldaxrb %w[uOld], %[pMem]\n\t"
1081 "cmp %w[uOld], %w[uCmp]\n\t"
1082 "bne 1f\n\t" /* stop here if not equal */
1083 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1084 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1085 "mov %w[fXchg], #1\n\t"
1086# else
1087 "ldrexb %[uOld], %[pMem]\n\t"
1088 "teq %[uOld], %[uCmp]\n\t"
1089 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1090 "bne 1f\n\t" /* stop here if not equal */
1091 "cmp %[rc], #0\n\t"
1092 "bne .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1093 "mov %[fXchg], #1\n\t"
1094# endif
1095 "1:\n\t"
1096 : [pMem] "+m" (*pu8)
1097 , [uOld] "=&r" (u32Spill)
1098 , [rc] "=&r" (rcSpill)
1099 , [fXchg] "=&r" (fXchg.u)
1100 : [uCmp] "r" ((uint32_t)u8Old)
1101 , [uNew] "r" ((uint32_t)u8New)
1102 , "[fXchg]" (0)
1103 RTASM_ARM_DMB_SY_COMMA_IN_REG
1104 : "cc");
1105 return fXchg.f;
1106
1107# else
1108# error "Port me"
1109# endif
1110}
1111#endif
1112
1113
1114/**
1115 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1116 *
1117 * @returns true if xchg was done.
1118 * @returns false if xchg wasn't done.
1119 *
1120 * @param pi8 Pointer to the value to update.
1121 * @param i8New The new value to assigned to *pi8.
1122 * @param i8Old The old value to *pi8 compare with.
1123 *
1124 * @remarks x86: Requires a 486 or later.
1125 * @todo Rename ASMAtomicCmpWriteS8
1126 */
1127DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1128{
1129 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1130}
1131
1132
1133/**
1134 * Atomically Compare and Exchange a bool value, ordered.
1135 *
1136 * @returns true if xchg was done.
1137 * @returns false if xchg wasn't done.
1138 *
1139 * @param pf Pointer to the value to update.
1140 * @param fNew The new value to assigned to *pf.
1141 * @param fOld The old value to *pf compare with.
1142 *
1143 * @remarks x86: Requires a 486 or later.
1144 * @todo Rename ASMAtomicCmpWriteBool
1145 */
1146DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1147{
1148 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1149}
1150
1151
1152/**
1153 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1154 *
1155 * @returns true if xchg was done.
1156 * @returns false if xchg wasn't done.
1157 *
1158 * @param pu32 Pointer to the value to update.
1159 * @param u32New The new value to assigned to *pu32.
1160 * @param u32Old The old value to *pu32 compare with.
1161 *
1162 * @remarks x86: Requires a 486 or later.
1163 * @todo Rename ASMAtomicCmpWriteU32
1164 */
1165#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1166RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1167#else
1168DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1169{
1170# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1171# if RT_INLINE_ASM_GNU_STYLE
1172 uint8_t u8Ret;
1173 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1174 "setz %1\n\t"
1175 : "=m" (*pu32)
1176 , "=qm" (u8Ret)
1177 , "=a" (u32Old)
1178 : "r" (u32New)
1179 , "2" (u32Old)
1180 , "m" (*pu32)
1181 : "cc");
1182 return (bool)u8Ret;
1183
1184# elif RT_INLINE_ASM_USES_INTRIN
1185 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1186
1187# else
1188 uint32_t u32Ret;
1189 __asm
1190 {
1191# ifdef RT_ARCH_AMD64
1192 mov rdx, [pu32]
1193# else
1194 mov edx, [pu32]
1195# endif
1196 mov eax, [u32Old]
1197 mov ecx, [u32New]
1198# ifdef RT_ARCH_AMD64
1199 lock cmpxchg [rdx], ecx
1200# else
1201 lock cmpxchg [edx], ecx
1202# endif
1203 setz al
1204 movzx eax, al
1205 mov [u32Ret], eax
1206 }
1207 return !!u32Ret;
1208# endif
1209
1210# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1211 union { uint32_t u; bool f; } fXchg;
1212 uint32_t u32Spill;
1213 uint32_t rcSpill;
1214 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1215 RTASM_ARM_DMB_SY
1216# if defined(RT_ARCH_ARM64)
1217 "ldaxr %w[uOld], %[pMem]\n\t"
1218 "cmp %w[uOld], %w[uCmp]\n\t"
1219 "bne 1f\n\t" /* stop here if not equal */
1220 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1221 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1222 "mov %w[fXchg], #1\n\t"
1223# else
1224 "ldrex %[uOld], %[pMem]\n\t"
1225 "teq %[uOld], %[uCmp]\n\t"
1226 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1227 "bne 1f\n\t" /* stop here if not equal */
1228 "cmp %[rc], #0\n\t"
1229 "bne .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1230 "mov %[fXchg], #1\n\t"
1231# endif
1232 "1:\n\t"
1233 : [pMem] "+m" (*pu32)
1234 , [uOld] "=&r" (u32Spill)
1235 , [rc] "=&r" (rcSpill)
1236 , [fXchg] "=&r" (fXchg.u)
1237 : [uCmp] "r" (u32Old)
1238 , [uNew] "r" (u32New)
1239 , "[fXchg]" (0)
1240 RTASM_ARM_DMB_SY_COMMA_IN_REG
1241 : "cc");
1242 return fXchg.f;
1243
1244# else
1245# error "Port me"
1246# endif
1247}
1248#endif
1249
1250
1251/**
1252 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1253 *
1254 * @returns true if xchg was done.
1255 * @returns false if xchg wasn't done.
1256 *
1257 * @param pi32 Pointer to the value to update.
1258 * @param i32New The new value to assigned to *pi32.
1259 * @param i32Old The old value to *pi32 compare with.
1260 *
1261 * @remarks x86: Requires a 486 or later.
1262 * @todo Rename ASMAtomicCmpWriteS32
1263 */
1264DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1265{
1266 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1267}
1268
1269
1270/**
1271 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1272 *
1273 * @returns true if xchg was done.
1274 * @returns false if xchg wasn't done.
1275 *
1276 * @param pu64 Pointer to the 64-bit variable to update.
1277 * @param u64New The 64-bit value to assign to *pu64.
1278 * @param u64Old The value to compare with.
1279 *
1280 * @remarks x86: Requires a Pentium or later.
1281 * @todo Rename ASMAtomicCmpWriteU64
1282 */
1283#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1284 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1285RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1286#else
1287DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1288{
1289# if RT_INLINE_ASM_USES_INTRIN
1290 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1291
1292# elif defined(RT_ARCH_AMD64)
1293# if RT_INLINE_ASM_GNU_STYLE
1294 uint8_t u8Ret;
1295 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1296 "setz %1\n\t"
1297 : "=m" (*pu64)
1298 , "=qm" (u8Ret)
1299 , "=a" (u64Old)
1300 : "r" (u64New)
1301 , "2" (u64Old)
1302 , "m" (*pu64)
1303 : "cc");
1304 return (bool)u8Ret;
1305# else
1306 bool fRet;
1307 __asm
1308 {
1309 mov rdx, [pu32]
1310 mov rax, [u64Old]
1311 mov rcx, [u64New]
1312 lock cmpxchg [rdx], rcx
1313 setz al
1314 mov [fRet], al
1315 }
1316 return fRet;
1317# endif
1318
1319# elif defined(RT_ARCH_X86)
1320 uint32_t u32Ret;
1321# if RT_INLINE_ASM_GNU_STYLE
1322# if defined(PIC) || defined(__PIC__)
1323 uint32_t u32EBX = (uint32_t)u64New;
1324 uint32_t u32Spill;
1325 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1326 "lock; cmpxchg8b (%6)\n\t"
1327 "setz %%al\n\t"
1328 "movl %4, %%ebx\n\t"
1329 "movzbl %%al, %%eax\n\t"
1330 : "=a" (u32Ret)
1331 , "=d" (u32Spill)
1332# if RT_GNUC_PREREQ(4, 3)
1333 , "+m" (*pu64)
1334# else
1335 , "=m" (*pu64)
1336# endif
1337 : "A" (u64Old)
1338 , "m" ( u32EBX )
1339 , "c" ( (uint32_t)(u64New >> 32) )
1340 , "S" (pu64)
1341 : "cc");
1342# else /* !PIC */
1343 uint32_t u32Spill;
1344 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1345 "setz %%al\n\t"
1346 "movzbl %%al, %%eax\n\t"
1347 : "=a" (u32Ret)
1348 , "=d" (u32Spill)
1349 , "+m" (*pu64)
1350 : "A" (u64Old)
1351 , "b" ( (uint32_t)u64New )
1352 , "c" ( (uint32_t)(u64New >> 32) )
1353 : "cc");
1354# endif
1355 return (bool)u32Ret;
1356# else
1357 __asm
1358 {
1359 mov ebx, dword ptr [u64New]
1360 mov ecx, dword ptr [u64New + 4]
1361 mov edi, [pu64]
1362 mov eax, dword ptr [u64Old]
1363 mov edx, dword ptr [u64Old + 4]
1364 lock cmpxchg8b [edi]
1365 setz al
1366 movzx eax, al
1367 mov dword ptr [u32Ret], eax
1368 }
1369 return !!u32Ret;
1370# endif
1371
1372# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1373 union { uint32_t u; bool f; } fXchg;
1374 uint64_t u64Spill;
1375 uint32_t rcSpill;
1376 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1377 RTASM_ARM_DMB_SY
1378# if defined(RT_ARCH_ARM64)
1379 "ldaxr %[uOld], %[pMem]\n\t"
1380 "cmp %[uOld], %[uCmp]\n\t"
1381 "bne 1f\n\t" /* stop here if not equal */
1382 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1383 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1384 "mov %w[fXchg], #1\n\t"
1385# else
1386 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1387 "teq %[uOld], %[uCmp]\n\t"
1388 "teqeq %H[uOld], %H[uCmp]\n\t"
1389 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1390 "bne 1f\n\t" /* stop here if not equal */
1391 "cmp %[rc], #0\n\t"
1392 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1393 "mov %[fXchg], #1\n\t"
1394# endif
1395 "1:\n\t"
1396 : [pMem] "+m" (*pu64)
1397 , [uOld] "=&r" (u64Spill)
1398 , [rc] "=&r" (rcSpill)
1399 , [fXchg] "=&r" (fXchg.u)
1400 : [uCmp] "r" (u64Old)
1401 , [uNew] "r" (u64New)
1402 , "[fXchg]" (0)
1403 RTASM_ARM_DMB_SY_COMMA_IN_REG
1404 : "cc");
1405 return fXchg.f;
1406
1407# else
1408# error "Port me"
1409# endif
1410}
1411#endif
1412
1413
1414/**
1415 * Atomically Compare and exchange a signed 64-bit value, ordered.
1416 *
1417 * @returns true if xchg was done.
1418 * @returns false if xchg wasn't done.
1419 *
1420 * @param pi64 Pointer to the 64-bit variable to update.
1421 * @param i64 The 64-bit value to assign to *pu64.
1422 * @param i64Old The value to compare with.
1423 *
1424 * @remarks x86: Requires a Pentium or later.
1425 * @todo Rename ASMAtomicCmpWriteS64
1426 */
1427DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1428{
1429 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1430}
1431
1432#if defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)
1433
1434/** @def RTASM_HAVE_CMP_WRITE_U128
1435 * Indicates that we've got ASMAtomicCmpWriteU128() available. */
1436# define RTASM_HAVE_CMP_WRITE_U128 1
1437
1438
1439/**
1440 * Atomically compare and write an unsigned 128-bit value, ordered.
1441 *
1442 * @returns true if write was done.
1443 * @returns false if write wasn't done.
1444 *
1445 * @param pu128 Pointer to the 128-bit variable to update.
1446 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
1447 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
1448 * @param u64OldHi The high 64-bit of the value to compare with.
1449 * @param u64OldLo The low 64-bit of the value to compare with.
1450 *
1451 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1452 */
1453# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
1454DECLASM(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1455 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_PROTO;
1456# else
1457DECLINLINE(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1458 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_DEF
1459{
1460# if RT_INLINE_ASM_USES_INTRIN
1461 __int64 ai64Cmp[2];
1462 ai64Cmp[0] = u64OldLo;
1463 ai64Cmp[1] = u64OldHi;
1464 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, ai64Cmp) != 0;
1465
1466# elif defined(RT_ARCH_AMD64)
1467# if RT_INLINE_ASM_GNU_STYLE
1468 uint64_t u64Ret;
1469 uint64_t u64Spill;
1470 __asm__ __volatile__("lock; cmpxchg16b %2\n\t"
1471 "setz %%al\n\t"
1472 "movzbl %%al, %%eax\n\t"
1473 : "=a" (u64Ret)
1474 , "=d" (u64Spill)
1475 , "+m" (*pu128)
1476 : "a" (u64OldLo)
1477 , "d" (u64OldHi)
1478 , "b" (u64NewLo)
1479 , "c" (u64NewHi)
1480 : "cc");
1481
1482 return (bool)u64Ret;
1483# else
1484# error "Port me"
1485# endif
1486# else
1487# error "Port me"
1488# endif
1489}
1490# endif
1491
1492
1493/**
1494 * Atomically compare and write an unsigned 128-bit value, ordered.
1495 *
1496 * @returns true if write was done.
1497 * @returns false if write wasn't done.
1498 *
1499 * @param pu128 Pointer to the 128-bit variable to update.
1500 * @param u128New The 128-bit value to assign to *pu128.
1501 * @param u128Old The value to compare with.
1502 *
1503 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1504 */
1505DECLINLINE(bool) ASMAtomicCmpWriteU128(volatile uint128_t *pu128, const uint128_t u128New, const uint128_t u128Old) RT_NOTHROW_DEF
1506{
1507# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
1508 return ASMAtomicCmpWriteU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
1509 (uint64_t)(u128Old >> 64), (uint64_t)u128Old);
1510# else
1511 return ASMAtomicCmpWriteU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo);
1512# endif
1513}
1514
1515
1516/**
1517 * RTUINT128U wrapper for ASMAtomicCmpWriteU128.
1518 */
1519DECLINLINE(bool) ASMAtomicCmpWriteU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
1520 const RTUINT128U u128Old) RT_NOTHROW_DEF
1521{
1522 return ASMAtomicCmpWriteU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo);
1523}
1524
1525#endif /* RT_ARCH_AMD64 */
1526
1527
1528/**
1529 * Atomically Compare and Exchange a pointer value, ordered.
1530 *
1531 * @returns true if xchg was done.
1532 * @returns false if xchg wasn't done.
1533 *
1534 * @param ppv Pointer to the value to update.
1535 * @param pvNew The new value to assigned to *ppv.
1536 * @param pvOld The old value to *ppv compare with.
1537 *
1538 * @remarks x86: Requires a 486 or later.
1539 * @todo Rename ASMAtomicCmpWritePtrVoid
1540 */
1541DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1542{
1543#if ARCH_BITS == 32 || ARCH_BITS == 16
1544 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1545#elif ARCH_BITS == 64
1546 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1547#else
1548# error "ARCH_BITS is bogus"
1549#endif
1550}
1551
1552
1553/**
1554 * Atomically Compare and Exchange a pointer value, ordered.
1555 *
1556 * @returns true if xchg was done.
1557 * @returns false if xchg wasn't done.
1558 *
1559 * @param ppv Pointer to the value to update.
1560 * @param pvNew The new value to assigned to *ppv.
1561 * @param pvOld The old value to *ppv compare with.
1562 *
1563 * @remarks This is relatively type safe on GCC platforms.
1564 * @remarks x86: Requires a 486 or later.
1565 * @todo Rename ASMAtomicCmpWritePtr
1566 */
1567#ifdef __GNUC__
1568# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1569 __extension__ \
1570 ({\
1571 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1572 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1573 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1574 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1575 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1576 fMacroRet; \
1577 })
1578#else
1579# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1580 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1581#endif
1582
1583
1584/** @def ASMAtomicCmpXchgHandle
1585 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1586 *
1587 * @param ph Pointer to the value to update.
1588 * @param hNew The new value to assigned to *pu.
1589 * @param hOld The old value to *pu compare with.
1590 * @param fRc Where to store the result.
1591 *
1592 * @remarks This doesn't currently work for all handles (like RTFILE).
1593 * @remarks x86: Requires a 486 or later.
1594 * @todo Rename ASMAtomicCmpWriteHandle
1595 */
1596#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1597# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1598 do { \
1599 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1600 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1601 } while (0)
1602#elif HC_ARCH_BITS == 64
1603# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1604 do { \
1605 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1606 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1607 } while (0)
1608#else
1609# error HC_ARCH_BITS
1610#endif
1611
1612
1613/** @def ASMAtomicCmpXchgSize
1614 * Atomically Compare and Exchange a value which size might differ
1615 * between platforms or compilers, ordered.
1616 *
1617 * @param pu Pointer to the value to update.
1618 * @param uNew The new value to assigned to *pu.
1619 * @param uOld The old value to *pu compare with.
1620 * @param fRc Where to store the result.
1621 *
1622 * @remarks x86: Requires a 486 or later.
1623 * @todo Rename ASMAtomicCmpWriteSize
1624 */
1625#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1626 do { \
1627 switch (sizeof(*(pu))) { \
1628 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1629 break; \
1630 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1631 break; \
1632 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1633 (fRc) = false; \
1634 break; \
1635 } \
1636 } while (0)
1637
1638
1639/**
1640 * Atomically Compare and Exchange an unsigned 8-bit value, additionally passes
1641 * back old value, ordered.
1642 *
1643 * @returns true if xchg was done.
1644 * @returns false if xchg wasn't done.
1645 *
1646 * @param pu8 Pointer to the value to update.
1647 * @param u8New The new value to assigned to *pu32.
1648 * @param u8Old The old value to *pu8 compare with.
1649 * @param pu8Old Pointer store the old value at.
1650 *
1651 * @remarks x86: Requires a 486 or later.
1652 */
1653#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1654RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_PROTO;
1655#else
1656DECLINLINE(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_DEF
1657{
1658# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1659# if RT_INLINE_ASM_GNU_STYLE
1660 uint8_t u8Ret;
1661 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1662 "setz %1\n\t"
1663 : "=m" (*pu8)
1664 , "=qm" (u8Ret)
1665 , "=a" (*pu8Old)
1666# if defined(RT_ARCH_X86)
1667 : "q" (u8New)
1668# else
1669 : "r" (u8New)
1670# endif
1671 , "a" (u8Old)
1672 , "m" (*pu8)
1673 : "cc");
1674 return (bool)u8Ret;
1675
1676# elif RT_INLINE_ASM_USES_INTRIN
1677 return (*pu8Old = _InterlockedCompareExchange8((char RT_FAR *)pu8, u8New, u8Old)) == u8Old;
1678
1679# else
1680 uint8_t u8Ret;
1681 __asm
1682 {
1683# ifdef RT_ARCH_AMD64
1684 mov rdx, [pu8]
1685# else
1686 mov edx, [pu8]
1687# endif
1688 mov eax, [u8Old]
1689 mov ecx, [u8New]
1690# ifdef RT_ARCH_AMD64
1691 lock cmpxchg [rdx], ecx
1692 mov rdx, [pu8Old]
1693 mov [rdx], eax
1694# else
1695 lock cmpxchg [edx], ecx
1696 mov edx, [pu8Old]
1697 mov [edx], eax
1698# endif
1699 setz al
1700 movzx eax, al
1701 mov [u8Ret], eax
1702 }
1703 return !!u8Ret;
1704# endif
1705
1706# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1707 union { uint8_t u; bool f; } fXchg;
1708 uint8_t u8ActualOld;
1709 uint8_t rcSpill;
1710 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU8_%=:\n\t"
1711 RTASM_ARM_DMB_SY
1712# if defined(RT_ARCH_ARM64)
1713 "ldaxrb %w[uOld], %[pMem]\n\t"
1714 "cmp %w[uOld], %w[uCmp]\n\t"
1715 "bne 1f\n\t" /* stop here if not equal */
1716 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1717 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1718 "mov %w[fXchg], #1\n\t"
1719# else
1720 "ldrexb %[uOld], %[pMem]\n\t"
1721 "teq %[uOld], %[uCmp]\n\t"
1722 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1723 "bne 1f\n\t" /* stop here if not equal */
1724 "cmp %[rc], #0\n\t"
1725 "bne .Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1726 "mov %[fXchg], #1\n\t"
1727# endif
1728 "1:\n\t"
1729 : [pMem] "+m" (*pu8)
1730 , [uOld] "=&r" (u8ActualOld)
1731 , [rc] "=&r" (rcSpill)
1732 , [fXchg] "=&r" (fXchg.u)
1733 : [uCmp] "r" (u8Old)
1734 , [uNew] "r" (u8New)
1735 , "[fXchg]" (0)
1736 RTASM_ARM_DMB_SY_COMMA_IN_REG
1737 : "cc");
1738 *pu8Old = u8ActualOld;
1739 return fXchg.f;
1740
1741# else
1742# error "Port me"
1743# endif
1744}
1745#endif
1746
1747
1748/**
1749 * Atomically Compare and Exchange a signed 8-bit value, additionally
1750 * passes back old value, ordered.
1751 *
1752 * @returns true if xchg was done.
1753 * @returns false if xchg wasn't done.
1754 *
1755 * @param pi8 Pointer to the value to update.
1756 * @param i8New The new value to assigned to *pi8.
1757 * @param i8Old The old value to *pi8 compare with.
1758 * @param pi8Old Pointer store the old value at.
1759 *
1760 * @remarks x86: Requires a 486 or later.
1761 */
1762DECLINLINE(bool) ASMAtomicCmpXchgExS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old, int8_t RT_FAR *pi8Old) RT_NOTHROW_DEF
1763{
1764 return ASMAtomicCmpXchgExU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old, (uint8_t RT_FAR *)pi8Old);
1765}
1766
1767
1768/**
1769 * Atomically Compare and Exchange an unsigned 16-bit value, additionally passes
1770 * back old value, ordered.
1771 *
1772 * @returns true if xchg was done.
1773 * @returns false if xchg wasn't done.
1774 *
1775 * @param pu16 Pointer to the value to update.
1776 * @param u16New The new value to assigned to *pu16.
1777 * @param u16Old The old value to *pu32 compare with.
1778 * @param pu16Old Pointer store the old value at.
1779 *
1780 * @remarks x86: Requires a 486 or later.
1781 */
1782#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1783RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_PROTO;
1784#else
1785DECLINLINE(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_DEF
1786{
1787# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1788# if RT_INLINE_ASM_GNU_STYLE
1789 uint8_t u8Ret;
1790 __asm__ __volatile__("lock; cmpxchgw %3, %0\n\t"
1791 "setz %1\n\t"
1792 : "=m" (*pu16)
1793 , "=qm" (u8Ret)
1794 , "=a" (*pu16Old)
1795 : "r" (u16New)
1796 , "a" (u16Old)
1797 , "m" (*pu16)
1798 : "cc");
1799 return (bool)u8Ret;
1800
1801# elif RT_INLINE_ASM_USES_INTRIN
1802 return (*pu16Old = _InterlockedCompareExchange16((short RT_FAR *)pu16, u16New, u16Old)) == u16Old;
1803
1804# else
1805 uint16_t u16Ret;
1806 __asm
1807 {
1808# ifdef RT_ARCH_AMD64
1809 mov rdx, [pu16]
1810# else
1811 mov edx, [pu16]
1812# endif
1813 mov eax, [u16Old]
1814 mov ecx, [u16New]
1815# ifdef RT_ARCH_AMD64
1816 lock cmpxchg [rdx], ecx
1817 mov rdx, [pu16Old]
1818 mov [rdx], eax
1819# else
1820 lock cmpxchg [edx], ecx
1821 mov edx, [pu16Old]
1822 mov [edx], eax
1823# endif
1824 setz al
1825 movzx eax, al
1826 mov [u16Ret], eax
1827 }
1828 return !!u16Ret;
1829# endif
1830
1831# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1832 union { uint16_t u; bool f; } fXchg;
1833 uint16_t u16ActualOld;
1834 uint16_t rcSpill;
1835 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU16_%=:\n\t"
1836 RTASM_ARM_DMB_SY
1837# if defined(RT_ARCH_ARM64)
1838 "ldaxrh %w[uOld], %[pMem]\n\t"
1839 "cmp %w[uOld], %w[uCmp]\n\t"
1840 "bne 1f\n\t" /* stop here if not equal */
1841 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
1842 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1843 "mov %w[fXchg], #1\n\t"
1844# else
1845 "ldrexh %[uOld], %[pMem]\n\t"
1846 "teq %[uOld], %[uCmp]\n\t"
1847 "strexheq %[rc], %[uNew], %[pMem]\n\t"
1848 "bne 1f\n\t" /* stop here if not equal */
1849 "cmp %[rc], #0\n\t"
1850 "bne .Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1851 "mov %[fXchg], #1\n\t"
1852# endif
1853 "1:\n\t"
1854 : [pMem] "+m" (*pu16)
1855 , [uOld] "=&r" (u16ActualOld)
1856 , [rc] "=&r" (rcSpill)
1857 , [fXchg] "=&r" (fXchg.u)
1858 : [uCmp] "r" (u16Old)
1859 , [uNew] "r" (u16New)
1860 , "[fXchg]" (0)
1861 RTASM_ARM_DMB_SY_COMMA_IN_REG
1862 : "cc");
1863 *pu16Old = u16ActualOld;
1864 return fXchg.f;
1865
1866# else
1867# error "Port me"
1868# endif
1869}
1870#endif
1871
1872
1873/**
1874 * Atomically Compare and Exchange a signed 16-bit value, additionally
1875 * passes back old value, ordered.
1876 *
1877 * @returns true if xchg was done.
1878 * @returns false if xchg wasn't done.
1879 *
1880 * @param pi16 Pointer to the value to update.
1881 * @param i16New The new value to assigned to *pi16.
1882 * @param i16Old The old value to *pi16 compare with.
1883 * @param pi16Old Pointer store the old value at.
1884 *
1885 * @remarks x86: Requires a 486 or later.
1886 */
1887DECLINLINE(bool) ASMAtomicCmpXchgExS16(volatile int16_t RT_FAR *pi16, const int16_t i16New, const int16_t i16Old, int16_t RT_FAR *pi16Old) RT_NOTHROW_DEF
1888{
1889 return ASMAtomicCmpXchgExU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16New, (uint16_t)i16Old, (uint16_t RT_FAR *)pi16Old);
1890}
1891
1892
1893/**
1894 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1895 * passes back old value, ordered.
1896 *
1897 * @returns true if xchg was done.
1898 * @returns false if xchg wasn't done.
1899 *
1900 * @param pu32 Pointer to the value to update.
1901 * @param u32New The new value to assigned to *pu32.
1902 * @param u32Old The old value to *pu32 compare with.
1903 * @param pu32Old Pointer store the old value at.
1904 *
1905 * @remarks x86: Requires a 486 or later.
1906 */
1907#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1908RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1909#else
1910DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1911{
1912# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1913# if RT_INLINE_ASM_GNU_STYLE
1914 uint8_t u8Ret;
1915 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1916 "setz %1\n\t"
1917 : "=m" (*pu32)
1918 , "=qm" (u8Ret)
1919 , "=a" (*pu32Old)
1920 : "r" (u32New)
1921 , "a" (u32Old)
1922 , "m" (*pu32)
1923 : "cc");
1924 return (bool)u8Ret;
1925
1926# elif RT_INLINE_ASM_USES_INTRIN
1927 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1928
1929# else
1930 uint32_t u32Ret;
1931 __asm
1932 {
1933# ifdef RT_ARCH_AMD64
1934 mov rdx, [pu32]
1935# else
1936 mov edx, [pu32]
1937# endif
1938 mov eax, [u32Old]
1939 mov ecx, [u32New]
1940# ifdef RT_ARCH_AMD64
1941 lock cmpxchg [rdx], ecx
1942 mov rdx, [pu32Old]
1943 mov [rdx], eax
1944# else
1945 lock cmpxchg [edx], ecx
1946 mov edx, [pu32Old]
1947 mov [edx], eax
1948# endif
1949 setz al
1950 movzx eax, al
1951 mov [u32Ret], eax
1952 }
1953 return !!u32Ret;
1954# endif
1955
1956# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1957 union { uint32_t u; bool f; } fXchg;
1958 uint32_t u32ActualOld;
1959 uint32_t rcSpill;
1960 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1961 RTASM_ARM_DMB_SY
1962# if defined(RT_ARCH_ARM64)
1963 "ldaxr %w[uOld], %[pMem]\n\t"
1964 "cmp %w[uOld], %w[uCmp]\n\t"
1965 "bne 1f\n\t" /* stop here if not equal */
1966 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1967 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1968 "mov %w[fXchg], #1\n\t"
1969# else
1970 "ldrex %[uOld], %[pMem]\n\t"
1971 "teq %[uOld], %[uCmp]\n\t"
1972 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1973 "bne 1f\n\t" /* stop here if not equal */
1974 "cmp %[rc], #0\n\t"
1975 "bne .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1976 "mov %[fXchg], #1\n\t"
1977# endif
1978 "1:\n\t"
1979 : [pMem] "+m" (*pu32)
1980 , [uOld] "=&r" (u32ActualOld)
1981 , [rc] "=&r" (rcSpill)
1982 , [fXchg] "=&r" (fXchg.u)
1983 : [uCmp] "r" (u32Old)
1984 , [uNew] "r" (u32New)
1985 , "[fXchg]" (0)
1986 RTASM_ARM_DMB_SY_COMMA_IN_REG
1987 : "cc");
1988 *pu32Old = u32ActualOld;
1989 return fXchg.f;
1990
1991# else
1992# error "Port me"
1993# endif
1994}
1995#endif
1996
1997
1998/**
1999 * Atomically Compare and Exchange a signed 32-bit value, additionally
2000 * passes back old value, ordered.
2001 *
2002 * @returns true if xchg was done.
2003 * @returns false if xchg wasn't done.
2004 *
2005 * @param pi32 Pointer to the value to update.
2006 * @param i32New The new value to assigned to *pi32.
2007 * @param i32Old The old value to *pi32 compare with.
2008 * @param pi32Old Pointer store the old value at.
2009 *
2010 * @remarks x86: Requires a 486 or later.
2011 */
2012DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
2013{
2014 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
2015}
2016
2017
2018/**
2019 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2020 * passing back old value, ordered.
2021 *
2022 * @returns true if xchg was done.
2023 * @returns false if xchg wasn't done.
2024 *
2025 * @param pu64 Pointer to the 64-bit variable to update.
2026 * @param u64New The 64-bit value to assign to *pu64.
2027 * @param u64Old The value to compare with.
2028 * @param pu64Old Pointer store the old value at.
2029 *
2030 * @remarks x86: Requires a Pentium or later.
2031 */
2032#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2033 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2034RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
2035#else
2036DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
2037{
2038# if RT_INLINE_ASM_USES_INTRIN
2039 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
2040
2041# elif defined(RT_ARCH_AMD64)
2042# if RT_INLINE_ASM_GNU_STYLE
2043 uint8_t u8Ret;
2044 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2045 "setz %1\n\t"
2046 : "=m" (*pu64)
2047 , "=qm" (u8Ret)
2048 , "=a" (*pu64Old)
2049 : "r" (u64New)
2050 , "a" (u64Old)
2051 , "m" (*pu64)
2052 : "cc");
2053 return (bool)u8Ret;
2054# else
2055 bool fRet;
2056 __asm
2057 {
2058 mov rdx, [pu32]
2059 mov rax, [u64Old]
2060 mov rcx, [u64New]
2061 lock cmpxchg [rdx], rcx
2062 mov rdx, [pu64Old]
2063 mov [rdx], rax
2064 setz al
2065 mov [fRet], al
2066 }
2067 return fRet;
2068# endif
2069
2070# elif defined(RT_ARCH_X86)
2071# if RT_INLINE_ASM_GNU_STYLE
2072 uint64_t u64Ret;
2073# if defined(PIC) || defined(__PIC__)
2074 /* NB: this code uses a memory clobber description, because the clean
2075 * solution with an output value for *pu64 makes gcc run out of registers.
2076 * This will cause suboptimal code, and anyone with a better solution is
2077 * welcome to improve this. */
2078 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2079 "lock; cmpxchg8b %3\n\t"
2080 "xchgl %%ebx, %1\n\t"
2081 : "=A" (u64Ret)
2082 : "DS" ((uint32_t)u64New)
2083 , "c" ((uint32_t)(u64New >> 32))
2084 , "m" (*pu64)
2085 , "0" (u64Old)
2086 : "memory"
2087 , "cc" );
2088# else /* !PIC */
2089 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2090 : "=A" (u64Ret)
2091 , "=m" (*pu64)
2092 : "b" ((uint32_t)u64New)
2093 , "c" ((uint32_t)(u64New >> 32))
2094 , "m" (*pu64)
2095 , "0" (u64Old)
2096 : "cc");
2097# endif
2098 *pu64Old = u64Ret;
2099 return u64Ret == u64Old;
2100# else
2101 uint32_t u32Ret;
2102 __asm
2103 {
2104 mov ebx, dword ptr [u64New]
2105 mov ecx, dword ptr [u64New + 4]
2106 mov edi, [pu64]
2107 mov eax, dword ptr [u64Old]
2108 mov edx, dword ptr [u64Old + 4]
2109 lock cmpxchg8b [edi]
2110 mov ebx, [pu64Old]
2111 mov [ebx], eax
2112 setz al
2113 movzx eax, al
2114 add ebx, 4
2115 mov [ebx], edx
2116 mov dword ptr [u32Ret], eax
2117 }
2118 return !!u32Ret;
2119# endif
2120
2121# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2122 union { uint32_t u; bool f; } fXchg;
2123 uint64_t u64ActualOld;
2124 uint32_t rcSpill;
2125 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
2126 RTASM_ARM_DMB_SY
2127# if defined(RT_ARCH_ARM64)
2128 "ldaxr %[uOld], %[pMem]\n\t"
2129 "cmp %[uOld], %[uCmp]\n\t"
2130 "bne 1f\n\t" /* stop here if not equal */
2131 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
2132 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2133 "mov %w[fXchg], #1\n\t"
2134# else
2135 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
2136 "teq %[uOld], %[uCmp]\n\t"
2137 "teqeq %H[uOld], %H[uCmp]\n\t"
2138 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
2139 "bne 1f\n\t" /* stop here if not equal */
2140 "cmp %[rc], #0\n\t"
2141 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2142 "mov %[fXchg], #1\n\t"
2143# endif
2144 "1:\n\t"
2145 : [pMem] "+m" (*pu64)
2146 , [uOld] "=&r" (u64ActualOld)
2147 , [rc] "=&r" (rcSpill)
2148 , [fXchg] "=&r" (fXchg.u)
2149 : [uCmp] "r" (u64Old)
2150 , [uNew] "r" (u64New)
2151 , "[fXchg]" (0)
2152 RTASM_ARM_DMB_SY_COMMA_IN_REG
2153 : "cc");
2154 *pu64Old = u64ActualOld;
2155 return fXchg.f;
2156
2157# else
2158# error "Port me"
2159# endif
2160}
2161#endif
2162
2163
2164/**
2165 * Atomically Compare and exchange a signed 64-bit value, additionally
2166 * passing back old value, ordered.
2167 *
2168 * @returns true if xchg was done.
2169 * @returns false if xchg wasn't done.
2170 *
2171 * @param pi64 Pointer to the 64-bit variable to update.
2172 * @param i64 The 64-bit value to assign to *pu64.
2173 * @param i64Old The value to compare with.
2174 * @param pi64Old Pointer store the old value at.
2175 *
2176 * @remarks x86: Requires a Pentium or later.
2177 */
2178DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
2179{
2180 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
2181}
2182
2183/** @def ASMAtomicCmpXchgExHandle
2184 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2185 *
2186 * @param ph Pointer to the value to update.
2187 * @param hNew The new value to assigned to *pu.
2188 * @param hOld The old value to *pu compare with.
2189 * @param fRc Where to store the result.
2190 * @param phOldVal Pointer to where to store the old value.
2191 *
2192 * @remarks This doesn't currently work for all handles (like RTFILE).
2193 */
2194#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2195# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2196 do { \
2197 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
2198 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
2199 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(ph), (uint32_t)(hNew), (uint32_t)(hOld), (uint32_t RT_FAR *)(phOldVal)); \
2200 } while (0)
2201#elif HC_ARCH_BITS == 64
2202# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2203 do { \
2204 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2205 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
2206 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(ph), (uint64_t)(hNew), (uint64_t)(hOld), (uint64_t RT_FAR *)(phOldVal)); \
2207 } while (0)
2208#else
2209# error HC_ARCH_BITS
2210#endif
2211
2212
2213/** @def ASMAtomicCmpXchgExSize
2214 * Atomically Compare and Exchange a value which size might differ
2215 * between platforms or compilers. Additionally passes back old value.
2216 *
2217 * @param pu Pointer to the value to update.
2218 * @param uNew The new value to assigned to *pu.
2219 * @param uOld The old value to *pu compare with.
2220 * @param fRc Where to store the result.
2221 * @param puOldVal Pointer to where to store the old value.
2222 *
2223 * @remarks x86: Requires a 486 or later.
2224 */
2225#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
2226 do { \
2227 switch (sizeof(*(pu))) { \
2228 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
2229 break; \
2230 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
2231 break; \
2232 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2233 (fRc) = false; \
2234 (uOldVal) = 0; \
2235 break; \
2236 } \
2237 } while (0)
2238
2239
2240/**
2241 * Atomically Compare and Exchange a pointer value, additionally
2242 * passing back old value, ordered.
2243 *
2244 * @returns true if xchg was done.
2245 * @returns false if xchg wasn't done.
2246 *
2247 * @param ppv Pointer to the value to update.
2248 * @param pvNew The new value to assigned to *ppv.
2249 * @param pvOld The old value to *ppv compare with.
2250 * @param ppvOld Pointer store the old value at.
2251 *
2252 * @remarks x86: Requires a 486 or later.
2253 */
2254DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
2255 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
2256{
2257#if ARCH_BITS == 32 || ARCH_BITS == 16
2258 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
2259#elif ARCH_BITS == 64
2260 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
2261#else
2262# error "ARCH_BITS is bogus"
2263#endif
2264}
2265
2266
2267/**
2268 * Atomically Compare and Exchange a pointer value, additionally
2269 * passing back old value, ordered.
2270 *
2271 * @returns true if xchg was done.
2272 * @returns false if xchg wasn't done.
2273 *
2274 * @param ppv Pointer to the value to update.
2275 * @param pvNew The new value to assigned to *ppv.
2276 * @param pvOld The old value to *ppv compare with.
2277 * @param ppvOld Pointer store the old value at.
2278 *
2279 * @remarks This is relatively type safe on GCC platforms.
2280 * @remarks x86: Requires a 486 or later.
2281 */
2282#ifdef __GNUC__
2283# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2284 __extension__ \
2285 ({\
2286 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2287 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
2288 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
2289 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
2290 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
2291 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
2292 (void **)ppvOldTypeChecked); \
2293 fMacroRet; \
2294 })
2295#else
2296# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2297 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
2298#endif
2299
2300
2301/**
2302 * Virtualization unfriendly serializing instruction, always exits.
2303 */
2304#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2305RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
2306#else
2307DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
2308{
2309# if RT_INLINE_ASM_GNU_STYLE
2310 RTCCUINTREG xAX = 0;
2311# ifdef RT_ARCH_AMD64
2312 __asm__ __volatile__ ("cpuid"
2313 : "=a" (xAX)
2314 : "0" (xAX)
2315 : "rbx", "rcx", "rdx", "memory");
2316# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
2317 __asm__ __volatile__ ("push %%ebx\n\t"
2318 "cpuid\n\t"
2319 "pop %%ebx\n\t"
2320 : "=a" (xAX)
2321 : "0" (xAX)
2322 : "ecx", "edx", "memory");
2323# else
2324 __asm__ __volatile__ ("cpuid"
2325 : "=a" (xAX)
2326 : "0" (xAX)
2327 : "ebx", "ecx", "edx", "memory");
2328# endif
2329
2330# elif RT_INLINE_ASM_USES_INTRIN
2331 int aInfo[4];
2332 _ReadWriteBarrier();
2333 __cpuid(aInfo, 0);
2334
2335# else
2336 __asm
2337 {
2338 push ebx
2339 xor eax, eax
2340 cpuid
2341 pop ebx
2342 }
2343# endif
2344}
2345#endif
2346
2347/**
2348 * Virtualization friendly serializing instruction, though more expensive.
2349 */
2350#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2351RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
2352#else
2353DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
2354{
2355# if RT_INLINE_ASM_GNU_STYLE
2356# ifdef RT_ARCH_AMD64
2357 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
2358 "subq $128, %%rsp\n\t" /*redzone*/
2359 "mov %%ss, %%eax\n\t"
2360 "pushq %%rax\n\t"
2361 "pushq %%r10\n\t"
2362 "pushfq\n\t"
2363 "movl %%cs, %%eax\n\t"
2364 "pushq %%rax\n\t"
2365 "leaq 1f(%%rip), %%rax\n\t"
2366 "pushq %%rax\n\t"
2367 "iretq\n\t"
2368 "1:\n\t"
2369 ::: "rax", "r10", "memory", "cc");
2370# else
2371 __asm__ __volatile__ ("pushfl\n\t"
2372 "pushl %%cs\n\t"
2373 "pushl $1f\n\t"
2374 "iretl\n\t"
2375 "1:\n\t"
2376 ::: "memory");
2377# endif
2378
2379# else
2380 __asm
2381 {
2382 pushfd
2383 push cs
2384 push la_ret
2385 iretd
2386 la_ret:
2387 }
2388# endif
2389}
2390#endif
2391
2392/**
2393 * Virtualization friendlier serializing instruction, may still cause exits.
2394 */
2395#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < RT_MSC_VER_VS2008) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2396RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2397#else
2398DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2399{
2400# if RT_INLINE_ASM_GNU_STYLE
2401 /* rdtscp is not supported by ancient linux build VM of course :-( */
2402# ifdef RT_ARCH_AMD64
2403 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2404 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2405# else
2406 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2407 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2408# endif
2409# else
2410# if RT_INLINE_ASM_USES_INTRIN >= RT_MSC_VER_VS2008
2411 uint32_t uIgnore;
2412 _ReadWriteBarrier();
2413 (void)__rdtscp(&uIgnore);
2414 (void)uIgnore;
2415# else
2416 __asm
2417 {
2418 rdtscp
2419 }
2420# endif
2421# endif
2422}
2423#endif
2424
2425
2426/**
2427 * Serialize Instruction (both data store and instruction flush).
2428 */
2429#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2430# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2431#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2432# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2433#elif defined(RT_ARCH_SPARC64)
2434RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2435#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2436DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2437{
2438 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2439}
2440#else
2441# error "Port me"
2442#endif
2443
2444
2445/**
2446 * Memory fence, waits for any pending writes and reads to complete.
2447 * @note No implicit compiler barrier (which is probably stupid).
2448 */
2449DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2450{
2451#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2452# if RT_INLINE_ASM_GNU_STYLE
2453 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2454# elif RT_INLINE_ASM_USES_INTRIN
2455 _mm_mfence();
2456# else
2457 __asm
2458 {
2459 _emit 0x0f
2460 _emit 0xae
2461 _emit 0xf0
2462 }
2463# endif
2464#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2465 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2466#elif ARCH_BITS == 16
2467 uint16_t volatile u16;
2468 ASMAtomicXchgU16(&u16, 0);
2469#else
2470 uint32_t volatile u32;
2471 ASMAtomicXchgU32(&u32, 0);
2472#endif
2473}
2474
2475
2476/**
2477 * Write fence, waits for any pending writes to complete.
2478 * @note No implicit compiler barrier (which is probably stupid).
2479 */
2480DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2481{
2482#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2483# if RT_INLINE_ASM_GNU_STYLE
2484 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2485# elif RT_INLINE_ASM_USES_INTRIN
2486 _mm_sfence();
2487# else
2488 __asm
2489 {
2490 _emit 0x0f
2491 _emit 0xae
2492 _emit 0xf8
2493 }
2494# endif
2495#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2496 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2497#else
2498 ASMMemoryFence();
2499#endif
2500}
2501
2502
2503/**
2504 * Read fence, waits for any pending reads to complete.
2505 * @note No implicit compiler barrier (which is probably stupid).
2506 */
2507DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2508{
2509#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2510# if RT_INLINE_ASM_GNU_STYLE
2511 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2512# elif RT_INLINE_ASM_USES_INTRIN
2513 _mm_lfence();
2514# else
2515 __asm
2516 {
2517 _emit 0x0f
2518 _emit 0xae
2519 _emit 0xe8
2520 }
2521# endif
2522#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2523 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2524#else
2525 ASMMemoryFence();
2526#endif
2527}
2528
2529
2530/**
2531 * Atomically reads an unsigned 8-bit value, ordered.
2532 *
2533 * @returns Current *pu8 value
2534 * @param pu8 Pointer to the 8-bit variable to read.
2535 */
2536DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2537{
2538#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2539 uint32_t u32;
2540 __asm__ __volatile__(".Lstart_ASMAtomicReadU8_%=:\n\t"
2541 RTASM_ARM_DMB_SY
2542# if defined(RT_ARCH_ARM64)
2543 "ldxrb %w[uDst], %[pMem]\n\t"
2544# else
2545 "ldrexb %[uDst], %[pMem]\n\t"
2546# endif
2547 : [uDst] "=&r" (u32)
2548 : [pMem] "m" (*pu8)
2549 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2550 return (uint8_t)u32;
2551#else
2552 ASMMemoryFence();
2553 return *pu8; /* byte reads are atomic on x86 */
2554#endif
2555}
2556
2557
2558/**
2559 * Atomically reads an unsigned 8-bit value, unordered.
2560 *
2561 * @returns Current *pu8 value
2562 * @param pu8 Pointer to the 8-bit variable to read.
2563 */
2564DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2565{
2566#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2567 uint32_t u32;
2568 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU8_%=:\n\t"
2569# if defined(RT_ARCH_ARM64)
2570 "ldxrb %w[uDst], %[pMem]\n\t"
2571# else
2572 "ldrexb %[uDst], %[pMem]\n\t"
2573# endif
2574 : [uDst] "=&r" (u32)
2575 : [pMem] "m" (*pu8));
2576 return (uint8_t)u32;
2577#else
2578 return *pu8; /* byte reads are atomic on x86 */
2579#endif
2580}
2581
2582
2583/**
2584 * Atomically reads a signed 8-bit value, ordered.
2585 *
2586 * @returns Current *pi8 value
2587 * @param pi8 Pointer to the 8-bit variable to read.
2588 */
2589DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2590{
2591 ASMMemoryFence();
2592#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2593 int32_t i32;
2594 __asm__ __volatile__(".Lstart_ASMAtomicReadS8_%=:\n\t"
2595 RTASM_ARM_DMB_SY
2596# if defined(RT_ARCH_ARM64)
2597 "ldxrb %w[iDst], %[pMem]\n\t"
2598# else
2599 "ldrexb %[iDst], %[pMem]\n\t"
2600# endif
2601 : [iDst] "=&r" (i32)
2602 : [pMem] "m" (*pi8)
2603 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2604 return (int8_t)i32;
2605#else
2606 return *pi8; /* byte reads are atomic on x86 */
2607#endif
2608}
2609
2610
2611/**
2612 * Atomically reads a signed 8-bit value, unordered.
2613 *
2614 * @returns Current *pi8 value
2615 * @param pi8 Pointer to the 8-bit variable to read.
2616 */
2617DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2618{
2619#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2620 int32_t i32;
2621 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS8_%=:\n\t"
2622# if defined(RT_ARCH_ARM64)
2623 "ldxrb %w[iDst], %[pMem]\n\t"
2624# else
2625 "ldrexb %[iDst], %[pMem]\n\t"
2626# endif
2627 : [iDst] "=&r" (i32)
2628 : [pMem] "m" (*pi8));
2629 return (int8_t)i32;
2630#else
2631 return *pi8; /* byte reads are atomic on x86 */
2632#endif
2633}
2634
2635
2636/**
2637 * Atomically reads an unsigned 16-bit value, ordered.
2638 *
2639 * @returns Current *pu16 value
2640 * @param pu16 Pointer to the 16-bit variable to read.
2641 */
2642DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2643{
2644 Assert(!((uintptr_t)pu16 & 1));
2645#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2646 uint32_t u32;
2647 __asm__ __volatile__(".Lstart_ASMAtomicReadU16_%=:\n\t"
2648 RTASM_ARM_DMB_SY
2649# if defined(RT_ARCH_ARM64)
2650 "ldxrh %w[uDst], %[pMem]\n\t"
2651# else
2652 "ldrexh %[uDst], %[pMem]\n\t"
2653# endif
2654 : [uDst] "=&r" (u32)
2655 : [pMem] "m" (*pu16)
2656 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2657 return (uint16_t)u32;
2658#else
2659 ASMMemoryFence();
2660 return *pu16;
2661#endif
2662}
2663
2664
2665/**
2666 * Atomically reads an unsigned 16-bit value, unordered.
2667 *
2668 * @returns Current *pu16 value
2669 * @param pu16 Pointer to the 16-bit variable to read.
2670 */
2671DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2672{
2673 Assert(!((uintptr_t)pu16 & 1));
2674#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2675 uint32_t u32;
2676 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU16_%=:\n\t"
2677# if defined(RT_ARCH_ARM64)
2678 "ldxrh %w[uDst], %[pMem]\n\t"
2679# else
2680 "ldrexh %[uDst], %[pMem]\n\t"
2681# endif
2682 : [uDst] "=&r" (u32)
2683 : [pMem] "m" (*pu16));
2684 return (uint16_t)u32;
2685#else
2686 return *pu16;
2687#endif
2688}
2689
2690
2691/**
2692 * Atomically reads a signed 16-bit value, ordered.
2693 *
2694 * @returns Current *pi16 value
2695 * @param pi16 Pointer to the 16-bit variable to read.
2696 */
2697DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2698{
2699 Assert(!((uintptr_t)pi16 & 1));
2700#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2701 int32_t i32;
2702 __asm__ __volatile__(".Lstart_ASMAtomicReadS16_%=:\n\t"
2703 RTASM_ARM_DMB_SY
2704# if defined(RT_ARCH_ARM64)
2705 "ldxrh %w[iDst], %[pMem]\n\t"
2706# else
2707 "ldrexh %[iDst], %[pMem]\n\t"
2708# endif
2709 : [iDst] "=&r" (i32)
2710 : [pMem] "m" (*pi16)
2711 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2712 return (int16_t)i32;
2713#else
2714 ASMMemoryFence();
2715 return *pi16;
2716#endif
2717}
2718
2719
2720/**
2721 * Atomically reads a signed 16-bit value, unordered.
2722 *
2723 * @returns Current *pi16 value
2724 * @param pi16 Pointer to the 16-bit variable to read.
2725 */
2726DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2727{
2728 Assert(!((uintptr_t)pi16 & 1));
2729#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2730 int32_t i32;
2731 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS16_%=:\n\t"
2732# if defined(RT_ARCH_ARM64)
2733 "ldxrh %w[iDst], %[pMem]\n\t"
2734# else
2735 "ldrexh %[iDst], %[pMem]\n\t"
2736# endif
2737 : [iDst] "=&r" (i32)
2738 : [pMem] "m" (*pi16));
2739 return (int16_t)i32;
2740#else
2741 return *pi16;
2742#endif
2743}
2744
2745
2746/**
2747 * Atomically reads an unsigned 32-bit value, ordered.
2748 *
2749 * @returns Current *pu32 value
2750 * @param pu32 Pointer to the 32-bit variable to read.
2751 */
2752DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2753{
2754 Assert(!((uintptr_t)pu32 & 3));
2755#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2756 uint32_t u32;
2757 __asm__ __volatile__(".Lstart_ASMAtomicReadU32_%=:\n\t"
2758 RTASM_ARM_DMB_SY
2759# if defined(RT_ARCH_ARM64)
2760 "ldxr %w[uDst], %[pMem]\n\t"
2761# else
2762 "ldrex %[uDst], %[pMem]\n\t"
2763# endif
2764 : [uDst] "=&r" (u32)
2765 : [pMem] "m" (*pu32)
2766 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2767 return u32;
2768#else
2769 ASMMemoryFence();
2770# if ARCH_BITS == 16
2771 AssertFailed(); /** @todo 16-bit */
2772# endif
2773 return *pu32;
2774#endif
2775}
2776
2777
2778/**
2779 * Atomically reads an unsigned 32-bit value, unordered.
2780 *
2781 * @returns Current *pu32 value
2782 * @param pu32 Pointer to the 32-bit variable to read.
2783 */
2784DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2785{
2786 Assert(!((uintptr_t)pu32 & 3));
2787#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2788 uint32_t u32;
2789 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU32_%=:\n\t"
2790# if defined(RT_ARCH_ARM64)
2791 "ldxr %w[uDst], %[pMem]\n\t"
2792# else
2793 "ldrex %[uDst], %[pMem]\n\t"
2794# endif
2795 : [uDst] "=&r" (u32)
2796 : [pMem] "m" (*pu32));
2797 return u32;
2798#else
2799# if ARCH_BITS == 16
2800 AssertFailed(); /** @todo 16-bit */
2801# endif
2802 return *pu32;
2803#endif
2804}
2805
2806
2807/**
2808 * Atomically reads a signed 32-bit value, ordered.
2809 *
2810 * @returns Current *pi32 value
2811 * @param pi32 Pointer to the 32-bit variable to read.
2812 */
2813DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2814{
2815 Assert(!((uintptr_t)pi32 & 3));
2816#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2817 int32_t i32;
2818 __asm__ __volatile__(".Lstart_ASMAtomicReadS32_%=:\n\t"
2819 RTASM_ARM_DMB_SY
2820# if defined(RT_ARCH_ARM64)
2821 "ldxr %w[iDst], %[pMem]\n\t"
2822# else
2823 "ldrex %[iDst], %[pMem]\n\t"
2824# endif
2825 : [iDst] "=&r" (i32)
2826 : [pMem] "m" (*pi32)
2827 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2828 return i32;
2829#else
2830 ASMMemoryFence();
2831# if ARCH_BITS == 16
2832 AssertFailed(); /** @todo 16-bit */
2833# endif
2834 return *pi32;
2835#endif
2836}
2837
2838
2839/**
2840 * Atomically reads a signed 32-bit value, unordered.
2841 *
2842 * @returns Current *pi32 value
2843 * @param pi32 Pointer to the 32-bit variable to read.
2844 */
2845DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2846{
2847 Assert(!((uintptr_t)pi32 & 3));
2848#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2849 int32_t i32;
2850 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS32_%=:\n\t"
2851# if defined(RT_ARCH_ARM64)
2852 "ldxr %w[iDst], %[pMem]\n\t"
2853# else
2854 "ldrex %[iDst], %[pMem]\n\t"
2855# endif
2856 : [iDst] "=&r" (i32)
2857 : [pMem] "m" (*pi32));
2858 return i32;
2859
2860#else
2861# if ARCH_BITS == 16
2862 AssertFailed(); /** @todo 16-bit */
2863# endif
2864 return *pi32;
2865#endif
2866}
2867
2868
2869/**
2870 * Atomically reads an unsigned 64-bit value, ordered.
2871 *
2872 * @returns Current *pu64 value
2873 * @param pu64 Pointer to the 64-bit variable to read.
2874 * The memory pointed to must be writable.
2875 *
2876 * @remarks This may fault if the memory is read-only!
2877 * @remarks x86: Requires a Pentium or later.
2878 */
2879#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
2880 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2881RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2882#else
2883DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2884{
2885 uint64_t u64;
2886# ifdef RT_ARCH_AMD64
2887 Assert(!((uintptr_t)pu64 & 7));
2888/*# if RT_INLINE_ASM_GNU_STYLE
2889 __asm__ __volatile__( "mfence\n\t"
2890 "movq %1, %0\n\t"
2891 : "=r" (u64)
2892 : "m" (*pu64));
2893# else
2894 __asm
2895 {
2896 mfence
2897 mov rdx, [pu64]
2898 mov rax, [rdx]
2899 mov [u64], rax
2900 }
2901# endif*/
2902 ASMMemoryFence();
2903 u64 = *pu64;
2904
2905# elif defined(RT_ARCH_X86)
2906# if RT_INLINE_ASM_GNU_STYLE
2907# if defined(PIC) || defined(__PIC__)
2908 uint32_t u32EBX = 0;
2909 Assert(!((uintptr_t)pu64 & 7));
2910 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2911 "lock; cmpxchg8b (%5)\n\t"
2912 "movl %3, %%ebx\n\t"
2913 : "=A" (u64)
2914# if RT_GNUC_PREREQ(4, 3)
2915 , "+m" (*pu64)
2916# else
2917 , "=m" (*pu64)
2918# endif
2919 : "0" (0ULL)
2920 , "m" (u32EBX)
2921 , "c" (0)
2922 , "S" (pu64)
2923 : "cc");
2924# else /* !PIC */
2925 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2926 : "=A" (u64)
2927 , "+m" (*pu64)
2928 : "0" (0ULL)
2929 , "b" (0)
2930 , "c" (0)
2931 : "cc");
2932# endif
2933# else
2934 Assert(!((uintptr_t)pu64 & 7));
2935 __asm
2936 {
2937 xor eax, eax
2938 xor edx, edx
2939 mov edi, pu64
2940 xor ecx, ecx
2941 xor ebx, ebx
2942 lock cmpxchg8b [edi]
2943 mov dword ptr [u64], eax
2944 mov dword ptr [u64 + 4], edx
2945 }
2946# endif
2947
2948# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2949 Assert(!((uintptr_t)pu64 & 7));
2950 __asm__ __volatile__(".Lstart_ASMAtomicReadU64_%=:\n\t"
2951 RTASM_ARM_DMB_SY
2952# if defined(RT_ARCH_ARM64)
2953 "ldxr %[uDst], %[pMem]\n\t"
2954# else
2955 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2956# endif
2957 : [uDst] "=&r" (u64)
2958 : [pMem] "m" (*pu64)
2959 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2960
2961# else
2962# error "Port me"
2963# endif
2964 return u64;
2965}
2966#endif
2967
2968
2969/**
2970 * Atomically reads an unsigned 64-bit value, unordered.
2971 *
2972 * @returns Current *pu64 value
2973 * @param pu64 Pointer to the 64-bit variable to read.
2974 * The memory pointed to must be writable.
2975 *
2976 * @remarks This may fault if the memory is read-only!
2977 * @remarks x86: Requires a Pentium or later.
2978 */
2979#if !defined(RT_ARCH_AMD64) \
2980 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2981 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
2982RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2983#else
2984DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2985{
2986 uint64_t u64;
2987# ifdef RT_ARCH_AMD64
2988 Assert(!((uintptr_t)pu64 & 7));
2989/*# if RT_INLINE_ASM_GNU_STYLE
2990 Assert(!((uintptr_t)pu64 & 7));
2991 __asm__ __volatile__("movq %1, %0\n\t"
2992 : "=r" (u64)
2993 : "m" (*pu64));
2994# else
2995 __asm
2996 {
2997 mov rdx, [pu64]
2998 mov rax, [rdx]
2999 mov [u64], rax
3000 }
3001# endif */
3002 u64 = *pu64;
3003
3004# elif defined(RT_ARCH_X86)
3005# if RT_INLINE_ASM_GNU_STYLE
3006# if defined(PIC) || defined(__PIC__)
3007 uint32_t u32EBX = 0;
3008 uint32_t u32Spill;
3009 Assert(!((uintptr_t)pu64 & 7));
3010 __asm__ __volatile__("xor %%eax,%%eax\n\t"
3011 "xor %%ecx,%%ecx\n\t"
3012 "xor %%edx,%%edx\n\t"
3013 "xchgl %%ebx, %3\n\t"
3014 "lock; cmpxchg8b (%4)\n\t"
3015 "movl %3, %%ebx\n\t"
3016 : "=A" (u64)
3017# if RT_GNUC_PREREQ(4, 3)
3018 , "+m" (*pu64)
3019# else
3020 , "=m" (*pu64)
3021# endif
3022 , "=c" (u32Spill)
3023 : "m" (u32EBX)
3024 , "S" (pu64)
3025 : "cc");
3026# else /* !PIC */
3027 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3028 : "=A" (u64)
3029 , "+m" (*pu64)
3030 : "0" (0ULL)
3031 , "b" (0)
3032 , "c" (0)
3033 : "cc");
3034# endif
3035# else
3036 Assert(!((uintptr_t)pu64 & 7));
3037 __asm
3038 {
3039 xor eax, eax
3040 xor edx, edx
3041 mov edi, pu64
3042 xor ecx, ecx
3043 xor ebx, ebx
3044 lock cmpxchg8b [edi]
3045 mov dword ptr [u64], eax
3046 mov dword ptr [u64 + 4], edx
3047 }
3048# endif
3049
3050# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3051 Assert(!((uintptr_t)pu64 & 7));
3052 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU64_%=:\n\t"
3053# if defined(RT_ARCH_ARM64)
3054 "ldxr %[uDst], %[pMem]\n\t"
3055# else
3056 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3057# endif
3058 : [uDst] "=&r" (u64)
3059 : [pMem] "m" (*pu64));
3060
3061# else
3062# error "Port me"
3063# endif
3064 return u64;
3065}
3066#endif
3067
3068
3069/**
3070 * Atomically reads a signed 64-bit value, ordered.
3071 *
3072 * @returns Current *pi64 value
3073 * @param pi64 Pointer to the 64-bit variable to read.
3074 * The memory pointed to must be writable.
3075 *
3076 * @remarks This may fault if the memory is read-only!
3077 * @remarks x86: Requires a Pentium or later.
3078 */
3079DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3080{
3081 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
3082}
3083
3084
3085/**
3086 * Atomically reads a signed 64-bit value, unordered.
3087 *
3088 * @returns Current *pi64 value
3089 * @param pi64 Pointer to the 64-bit variable to read.
3090 * The memory pointed to must be writable.
3091 *
3092 * @remarks This will fault if the memory is read-only!
3093 * @remarks x86: Requires a Pentium or later.
3094 */
3095DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3096{
3097 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
3098}
3099
3100
3101/**
3102 * Atomically reads a size_t value, ordered.
3103 *
3104 * @returns Current *pcb value
3105 * @param pcb Pointer to the size_t variable to read.
3106 */
3107DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3108{
3109#if ARCH_BITS == 64
3110 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
3111#elif ARCH_BITS == 32
3112 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
3113#elif ARCH_BITS == 16
3114 AssertCompileSize(size_t, 2);
3115 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
3116#else
3117# error "Unsupported ARCH_BITS value"
3118#endif
3119}
3120
3121
3122/**
3123 * Atomically reads a size_t value, unordered.
3124 *
3125 * @returns Current *pcb value
3126 * @param pcb Pointer to the size_t variable to read.
3127 */
3128DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3129{
3130#if ARCH_BITS == 64 || ARCH_BITS == 16
3131 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
3132#elif ARCH_BITS == 32
3133 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
3134#elif ARCH_BITS == 16
3135 AssertCompileSize(size_t, 2);
3136 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
3137#else
3138# error "Unsupported ARCH_BITS value"
3139#endif
3140}
3141
3142
3143/**
3144 * Atomically reads a pointer value, ordered.
3145 *
3146 * @returns Current *pv value
3147 * @param ppv Pointer to the pointer variable to read.
3148 *
3149 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
3150 * requires less typing (no casts).
3151 */
3152DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3153{
3154#if ARCH_BITS == 32 || ARCH_BITS == 16
3155 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3156#elif ARCH_BITS == 64
3157 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3158#else
3159# error "ARCH_BITS is bogus"
3160#endif
3161}
3162
3163/**
3164 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
3165 *
3166 * @returns Current *pv value
3167 * @param ppv Pointer to the pointer variable to read.
3168 * @param Type The type of *ppv, sans volatile.
3169 */
3170#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3171# define ASMAtomicReadPtrT(ppv, Type) \
3172 __extension__ \
3173 ({\
3174 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
3175 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
3176 pvTypeChecked; \
3177 })
3178#else
3179# define ASMAtomicReadPtrT(ppv, Type) \
3180 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3181#endif
3182
3183
3184/**
3185 * Atomically reads a pointer value, unordered.
3186 *
3187 * @returns Current *pv value
3188 * @param ppv Pointer to the pointer variable to read.
3189 *
3190 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
3191 * requires less typing (no casts).
3192 */
3193DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3194{
3195#if ARCH_BITS == 32 || ARCH_BITS == 16
3196 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3197#elif ARCH_BITS == 64
3198 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3199#else
3200# error "ARCH_BITS is bogus"
3201#endif
3202}
3203
3204
3205/**
3206 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
3207 *
3208 * @returns Current *pv value
3209 * @param ppv Pointer to the pointer variable to read.
3210 * @param Type The type of *ppv, sans volatile.
3211 */
3212#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3213# define ASMAtomicUoReadPtrT(ppv, Type) \
3214 __extension__ \
3215 ({\
3216 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3217 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
3218 pvTypeChecked; \
3219 })
3220#else
3221# define ASMAtomicUoReadPtrT(ppv, Type) \
3222 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3223#endif
3224
3225
3226/**
3227 * Atomically reads a boolean value, ordered.
3228 *
3229 * @returns Current *pf value
3230 * @param pf Pointer to the boolean variable to read.
3231 */
3232DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3233{
3234 ASMMemoryFence();
3235 return *pf; /* byte reads are atomic on x86 */
3236}
3237
3238
3239/**
3240 * Atomically reads a boolean value, unordered.
3241 *
3242 * @returns Current *pf value
3243 * @param pf Pointer to the boolean variable to read.
3244 */
3245DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3246{
3247 return *pf; /* byte reads are atomic on x86 */
3248}
3249
3250
3251/**
3252 * Atomically read a typical IPRT handle value, ordered.
3253 *
3254 * @param ph Pointer to the handle variable to read.
3255 * @param phRes Where to store the result.
3256 *
3257 * @remarks This doesn't currently work for all handles (like RTFILE).
3258 */
3259#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3260# define ASMAtomicReadHandle(ph, phRes) \
3261 do { \
3262 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3263 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3264 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
3265 } while (0)
3266#elif HC_ARCH_BITS == 64
3267# define ASMAtomicReadHandle(ph, phRes) \
3268 do { \
3269 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3270 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3271 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
3272 } while (0)
3273#else
3274# error HC_ARCH_BITS
3275#endif
3276
3277
3278/**
3279 * Atomically read a typical IPRT handle value, unordered.
3280 *
3281 * @param ph Pointer to the handle variable to read.
3282 * @param phRes Where to store the result.
3283 *
3284 * @remarks This doesn't currently work for all handles (like RTFILE).
3285 */
3286#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3287# define ASMAtomicUoReadHandle(ph, phRes) \
3288 do { \
3289 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3290 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3291 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
3292 } while (0)
3293#elif HC_ARCH_BITS == 64
3294# define ASMAtomicUoReadHandle(ph, phRes) \
3295 do { \
3296 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3297 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3298 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
3299 } while (0)
3300#else
3301# error HC_ARCH_BITS
3302#endif
3303
3304
3305/**
3306 * Atomically read a value which size might differ
3307 * between platforms or compilers, ordered.
3308 *
3309 * @param pu Pointer to the variable to read.
3310 * @param puRes Where to store the result.
3311 */
3312#define ASMAtomicReadSize(pu, puRes) \
3313 do { \
3314 switch (sizeof(*(pu))) { \
3315 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3316 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3317 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3318 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3319 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3320 } \
3321 } while (0)
3322
3323
3324/**
3325 * Atomically read a value which size might differ
3326 * between platforms or compilers, unordered.
3327 *
3328 * @param pu Pointer to the variable to read.
3329 * @param puRes Where to store the result.
3330 */
3331#define ASMAtomicUoReadSize(pu, puRes) \
3332 do { \
3333 switch (sizeof(*(pu))) { \
3334 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3335 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3336 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3337 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3338 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3339 } \
3340 } while (0)
3341
3342
3343/**
3344 * Atomically writes an unsigned 8-bit value, ordered.
3345 *
3346 * @param pu8 Pointer to the 8-bit variable.
3347 * @param u8 The 8-bit value to assign to *pu8.
3348 */
3349DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3350{
3351 /** @todo Any possible ARM32/ARM64 optimizations here? */
3352 ASMAtomicXchgU8(pu8, u8);
3353}
3354
3355
3356/**
3357 * Atomically writes an unsigned 8-bit value, unordered.
3358 *
3359 * @param pu8 Pointer to the 8-bit variable.
3360 * @param u8 The 8-bit value to assign to *pu8.
3361 */
3362DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3363{
3364 /** @todo Any possible ARM32/ARM64 improvements here? */
3365 *pu8 = u8; /* byte writes are atomic on x86 */
3366}
3367
3368
3369/**
3370 * Atomically writes a signed 8-bit value, ordered.
3371 *
3372 * @param pi8 Pointer to the 8-bit variable to read.
3373 * @param i8 The 8-bit value to assign to *pi8.
3374 */
3375DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3376{
3377 /** @todo Any possible ARM32/ARM64 optimizations here? */
3378 ASMAtomicXchgS8(pi8, i8);
3379}
3380
3381
3382/**
3383 * Atomically writes a signed 8-bit value, unordered.
3384 *
3385 * @param pi8 Pointer to the 8-bit variable to write.
3386 * @param i8 The 8-bit value to assign to *pi8.
3387 */
3388DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3389{
3390 *pi8 = i8; /* byte writes are atomic on x86 */
3391}
3392
3393
3394/**
3395 * Atomically writes an unsigned 16-bit value, ordered.
3396 *
3397 * @param pu16 Pointer to the 16-bit variable to write.
3398 * @param u16 The 16-bit value to assign to *pu16.
3399 */
3400DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3401{
3402 /** @todo Any possible ARM32/ARM64 optimizations here? */
3403 ASMAtomicXchgU16(pu16, u16);
3404}
3405
3406
3407/**
3408 * Atomically writes an unsigned 16-bit value, unordered.
3409 *
3410 * @param pu16 Pointer to the 16-bit variable to write.
3411 * @param u16 The 16-bit value to assign to *pu16.
3412 */
3413DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3414{
3415 Assert(!((uintptr_t)pu16 & 1));
3416 *pu16 = u16;
3417}
3418
3419
3420/**
3421 * Atomically writes a signed 16-bit value, ordered.
3422 *
3423 * @param pi16 Pointer to the 16-bit variable to write.
3424 * @param i16 The 16-bit value to assign to *pi16.
3425 */
3426DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3427{
3428 /** @todo Any possible ARM32/ARM64 optimizations here? */
3429 ASMAtomicXchgS16(pi16, i16);
3430}
3431
3432
3433/**
3434 * Atomically writes a signed 16-bit value, unordered.
3435 *
3436 * @param pi16 Pointer to the 16-bit variable to write.
3437 * @param i16 The 16-bit value to assign to *pi16.
3438 */
3439DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3440{
3441 Assert(!((uintptr_t)pi16 & 1));
3442 *pi16 = i16;
3443}
3444
3445
3446/**
3447 * Atomically writes an unsigned 32-bit value, ordered.
3448 *
3449 * @param pu32 Pointer to the 32-bit variable to write.
3450 * @param u32 The 32-bit value to assign to *pu32.
3451 */
3452DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3453{
3454 /** @todo Any possible ARM32/ARM64 optimizations here? */
3455 ASMAtomicXchgU32(pu32, u32);
3456}
3457
3458
3459/**
3460 * Atomically writes an unsigned 32-bit value, unordered.
3461 *
3462 * @param pu32 Pointer to the 32-bit variable to write.
3463 * @param u32 The 32-bit value to assign to *pu32.
3464 */
3465DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3466{
3467 Assert(!((uintptr_t)pu32 & 3));
3468#if ARCH_BITS >= 32
3469 *pu32 = u32;
3470#else
3471 ASMAtomicXchgU32(pu32, u32);
3472#endif
3473}
3474
3475
3476/**
3477 * Atomically writes a signed 32-bit value, ordered.
3478 *
3479 * @param pi32 Pointer to the 32-bit variable to write.
3480 * @param i32 The 32-bit value to assign to *pi32.
3481 */
3482DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3483{
3484 ASMAtomicXchgS32(pi32, i32);
3485}
3486
3487
3488/**
3489 * Atomically writes a signed 32-bit value, unordered.
3490 *
3491 * @param pi32 Pointer to the 32-bit variable to write.
3492 * @param i32 The 32-bit value to assign to *pi32.
3493 */
3494DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3495{
3496 Assert(!((uintptr_t)pi32 & 3));
3497#if ARCH_BITS >= 32
3498 *pi32 = i32;
3499#else
3500 ASMAtomicXchgS32(pi32, i32);
3501#endif
3502}
3503
3504
3505/**
3506 * Atomically writes an unsigned 64-bit value, ordered.
3507 *
3508 * @param pu64 Pointer to the 64-bit variable to write.
3509 * @param u64 The 64-bit value to assign to *pu64.
3510 */
3511DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3512{
3513 /** @todo Any possible ARM32/ARM64 optimizations here? */
3514 ASMAtomicXchgU64(pu64, u64);
3515}
3516
3517
3518/**
3519 * Atomically writes an unsigned 64-bit value, unordered.
3520 *
3521 * @param pu64 Pointer to the 64-bit variable to write.
3522 * @param u64 The 64-bit value to assign to *pu64.
3523 */
3524DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3525{
3526 Assert(!((uintptr_t)pu64 & 7));
3527#if ARCH_BITS == 64
3528 *pu64 = u64;
3529#else
3530 ASMAtomicXchgU64(pu64, u64);
3531#endif
3532}
3533
3534
3535/**
3536 * Atomically writes a signed 64-bit value, ordered.
3537 *
3538 * @param pi64 Pointer to the 64-bit variable to write.
3539 * @param i64 The 64-bit value to assign to *pi64.
3540 */
3541DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3542{
3543 /** @todo Any possible ARM32/ARM64 optimizations here? */
3544 ASMAtomicXchgS64(pi64, i64);
3545}
3546
3547
3548/**
3549 * Atomically writes a signed 64-bit value, unordered.
3550 *
3551 * @param pi64 Pointer to the 64-bit variable to write.
3552 * @param i64 The 64-bit value to assign to *pi64.
3553 */
3554DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3555{
3556 Assert(!((uintptr_t)pi64 & 7));
3557#if ARCH_BITS == 64
3558 *pi64 = i64;
3559#else
3560 ASMAtomicXchgS64(pi64, i64);
3561#endif
3562}
3563
3564
3565/**
3566 * Atomically writes a size_t value, ordered.
3567 *
3568 * @returns nothing.
3569 * @param pcb Pointer to the size_t variable to write.
3570 * @param cb The value to assign to *pcb.
3571 */
3572DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3573{
3574#if ARCH_BITS == 64
3575 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3576#elif ARCH_BITS == 32
3577 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3578#elif ARCH_BITS == 16
3579 AssertCompileSize(size_t, 2);
3580 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3581#else
3582# error "Unsupported ARCH_BITS value"
3583#endif
3584}
3585
3586
3587/**
3588 * Atomically writes a size_t value, unordered.
3589 *
3590 * @returns nothing.
3591 * @param pcb Pointer to the size_t variable to write.
3592 * @param cb The value to assign to *pcb.
3593 */
3594DECLINLINE(void) ASMAtomicUoWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3595{
3596#if ARCH_BITS == 64
3597 ASMAtomicUoWriteU64((uint64_t volatile *)pcb, cb);
3598#elif ARCH_BITS == 32
3599 ASMAtomicUoWriteU32((uint32_t volatile *)pcb, cb);
3600#elif ARCH_BITS == 16
3601 AssertCompileSize(size_t, 2);
3602 ASMAtomicUoWriteU16((uint16_t volatile *)pcb, cb);
3603#else
3604# error "Unsupported ARCH_BITS value"
3605#endif
3606}
3607
3608
3609/**
3610 * Atomically writes a boolean value, unordered.
3611 *
3612 * @param pf Pointer to the boolean variable to write.
3613 * @param f The boolean value to assign to *pf.
3614 */
3615DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3616{
3617 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3618}
3619
3620
3621/**
3622 * Atomically writes a boolean value, unordered.
3623 *
3624 * @param pf Pointer to the boolean variable to write.
3625 * @param f The boolean value to assign to *pf.
3626 */
3627DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3628{
3629 *pf = f; /* byte writes are atomic on x86 */
3630}
3631
3632
3633/**
3634 * Atomically writes a pointer value, ordered.
3635 *
3636 * @param ppv Pointer to the pointer variable to write.
3637 * @param pv The pointer value to assign to *ppv.
3638 */
3639DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3640{
3641#if ARCH_BITS == 32 || ARCH_BITS == 16
3642 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3643#elif ARCH_BITS == 64
3644 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3645#else
3646# error "ARCH_BITS is bogus"
3647#endif
3648}
3649
3650
3651/**
3652 * Atomically writes a pointer value, unordered.
3653 *
3654 * @param ppv Pointer to the pointer variable to write.
3655 * @param pv The pointer value to assign to *ppv.
3656 */
3657DECLINLINE(void) ASMAtomicUoWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3658{
3659#if ARCH_BITS == 32 || ARCH_BITS == 16
3660 ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3661#elif ARCH_BITS == 64
3662 ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3663#else
3664# error "ARCH_BITS is bogus"
3665#endif
3666}
3667
3668
3669/**
3670 * Atomically writes a pointer value, ordered.
3671 *
3672 * @param ppv Pointer to the pointer variable to write.
3673 * @param pv The pointer value to assign to *ppv. If NULL use
3674 * ASMAtomicWriteNullPtr or you'll land in trouble.
3675 *
3676 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3677 * NULL.
3678 */
3679#ifdef __GNUC__
3680# define ASMAtomicWritePtr(ppv, pv) \
3681 do \
3682 { \
3683 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3684 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3685 \
3686 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3687 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3688 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3689 \
3690 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3691 } while (0)
3692#else
3693# define ASMAtomicWritePtr(ppv, pv) \
3694 do \
3695 { \
3696 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3697 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3698 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3699 \
3700 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3701 } while (0)
3702#endif
3703
3704
3705/**
3706 * Atomically sets a pointer to NULL, ordered.
3707 *
3708 * @param ppv Pointer to the pointer variable that should be set to NULL.
3709 *
3710 * @remarks This is relatively type safe on GCC platforms.
3711 */
3712#if RT_GNUC_PREREQ(4, 2)
3713# define ASMAtomicWriteNullPtr(ppv) \
3714 do \
3715 { \
3716 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3717 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3718 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3719 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3720 } while (0)
3721#else
3722# define ASMAtomicWriteNullPtr(ppv) \
3723 do \
3724 { \
3725 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3726 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3727 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3728 } while (0)
3729#endif
3730
3731
3732/**
3733 * Atomically writes a pointer value, unordered.
3734 *
3735 * @returns Current *pv value
3736 * @param ppv Pointer to the pointer variable.
3737 * @param pv The pointer value to assign to *ppv. If NULL use
3738 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3739 *
3740 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3741 * NULL.
3742 */
3743#if RT_GNUC_PREREQ(4, 2)
3744# define ASMAtomicUoWritePtr(ppv, pv) \
3745 do \
3746 { \
3747 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3748 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3749 \
3750 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3751 AssertCompile(sizeof(pv) == sizeof(void *)); \
3752 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3753 \
3754 *(ppvTypeChecked) = pvTypeChecked; \
3755 } while (0)
3756#else
3757# define ASMAtomicUoWritePtr(ppv, pv) \
3758 do \
3759 { \
3760 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3761 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3762 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3763 *(ppv) = pv; \
3764 } while (0)
3765#endif
3766
3767
3768/**
3769 * Atomically sets a pointer to NULL, unordered.
3770 *
3771 * @param ppv Pointer to the pointer variable that should be set to NULL.
3772 *
3773 * @remarks This is relatively type safe on GCC platforms.
3774 */
3775#ifdef __GNUC__
3776# define ASMAtomicUoWriteNullPtr(ppv) \
3777 do \
3778 { \
3779 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3780 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3781 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3782 *(ppvTypeChecked) = NULL; \
3783 } while (0)
3784#else
3785# define ASMAtomicUoWriteNullPtr(ppv) \
3786 do \
3787 { \
3788 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3789 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3790 *(ppv) = NULL; \
3791 } while (0)
3792#endif
3793
3794
3795/**
3796 * Atomically write a typical IPRT handle value, ordered.
3797 *
3798 * @param ph Pointer to the variable to update.
3799 * @param hNew The value to assign to *ph.
3800 *
3801 * @remarks This doesn't currently work for all handles (like RTFILE).
3802 */
3803#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3804# define ASMAtomicWriteHandle(ph, hNew) \
3805 do { \
3806 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3807 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3808 } while (0)
3809#elif HC_ARCH_BITS == 64
3810# define ASMAtomicWriteHandle(ph, hNew) \
3811 do { \
3812 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3813 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3814 } while (0)
3815#else
3816# error HC_ARCH_BITS
3817#endif
3818
3819
3820/**
3821 * Atomically write a typical IPRT handle value, unordered.
3822 *
3823 * @param ph Pointer to the variable to update.
3824 * @param hNew The value to assign to *ph.
3825 *
3826 * @remarks This doesn't currently work for all handles (like RTFILE).
3827 */
3828#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3829# define ASMAtomicUoWriteHandle(ph, hNew) \
3830 do { \
3831 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3832 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3833 } while (0)
3834#elif HC_ARCH_BITS == 64
3835# define ASMAtomicUoWriteHandle(ph, hNew) \
3836 do { \
3837 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3838 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
3839 } while (0)
3840#else
3841# error HC_ARCH_BITS
3842#endif
3843
3844
3845/**
3846 * Atomically write a value which size might differ
3847 * between platforms or compilers, ordered.
3848 *
3849 * @param pu Pointer to the variable to update.
3850 * @param uNew The value to assign to *pu.
3851 */
3852#define ASMAtomicWriteSize(pu, uNew) \
3853 do { \
3854 switch (sizeof(*(pu))) { \
3855 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3856 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3857 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3858 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3859 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3860 } \
3861 } while (0)
3862
3863/**
3864 * Atomically write a value which size might differ
3865 * between platforms or compilers, unordered.
3866 *
3867 * @param pu Pointer to the variable to update.
3868 * @param uNew The value to assign to *pu.
3869 */
3870#define ASMAtomicUoWriteSize(pu, uNew) \
3871 do { \
3872 switch (sizeof(*(pu))) { \
3873 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3874 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3875 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3876 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3877 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3878 } \
3879 } while (0)
3880
3881
3882
3883/**
3884 * Atomically exchanges and adds to a 16-bit value, ordered.
3885 *
3886 * @returns The old value.
3887 * @param pu16 Pointer to the value.
3888 * @param u16 Number to add.
3889 *
3890 * @remarks Currently not implemented, just to make 16-bit code happy.
3891 * @remarks x86: Requires a 486 or later.
3892 */
3893RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
3894
3895
3896/**
3897 * Atomically exchanges and adds to a 32-bit value, ordered.
3898 *
3899 * @returns The old value.
3900 * @param pu32 Pointer to the value.
3901 * @param u32 Number to add.
3902 *
3903 * @remarks x86: Requires a 486 or later.
3904 */
3905#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3906RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3907#else
3908DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3909{
3910# if RT_INLINE_ASM_USES_INTRIN
3911 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
3912 return u32;
3913
3914# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3915# if RT_INLINE_ASM_GNU_STYLE
3916 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3917 : "=r" (u32)
3918 , "=m" (*pu32)
3919 : "0" (u32)
3920 , "m" (*pu32)
3921 : "memory"
3922 , "cc");
3923 return u32;
3924# else
3925 __asm
3926 {
3927 mov eax, [u32]
3928# ifdef RT_ARCH_AMD64
3929 mov rdx, [pu32]
3930 lock xadd [rdx], eax
3931# else
3932 mov edx, [pu32]
3933 lock xadd [edx], eax
3934# endif
3935 mov [u32], eax
3936 }
3937 return u32;
3938# endif
3939
3940# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3941 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
3942 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
3943 "add %[uNew], %[uOld], %[uVal]\n\t",
3944 [uVal] "r" (u32));
3945 return u32OldRet;
3946
3947# else
3948# error "Port me"
3949# endif
3950}
3951#endif
3952
3953
3954/**
3955 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3956 *
3957 * @returns The old value.
3958 * @param pi32 Pointer to the value.
3959 * @param i32 Number to add.
3960 *
3961 * @remarks x86: Requires a 486 or later.
3962 */
3963DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3964{
3965 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3966}
3967
3968
3969/**
3970 * Atomically exchanges and adds to a 64-bit value, ordered.
3971 *
3972 * @returns The old value.
3973 * @param pu64 Pointer to the value.
3974 * @param u64 Number to add.
3975 *
3976 * @remarks x86: Requires a Pentium or later.
3977 */
3978#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3979DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3980#else
3981DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3982{
3983# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3984 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
3985 return u64;
3986
3987# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3988 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3989 : "=r" (u64)
3990 , "=m" (*pu64)
3991 : "0" (u64)
3992 , "m" (*pu64)
3993 : "memory"
3994 , "cc");
3995 return u64;
3996
3997# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3998 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
3999 "add %[uNew], %[uOld], %[uVal]\n\t"
4000 ,
4001 "add %[uNew], %[uOld], %[uVal]\n\t"
4002 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
4003 [uVal] "r" (u64));
4004 return u64OldRet;
4005
4006# else
4007 uint64_t u64Old;
4008 for (;;)
4009 {
4010 uint64_t u64New;
4011 u64Old = ASMAtomicUoReadU64(pu64);
4012 u64New = u64Old + u64;
4013 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4014 break;
4015 ASMNopPause();
4016 }
4017 return u64Old;
4018# endif
4019}
4020#endif
4021
4022
4023/**
4024 * Atomically exchanges and adds to a signed 64-bit value, ordered.
4025 *
4026 * @returns The old value.
4027 * @param pi64 Pointer to the value.
4028 * @param i64 Number to add.
4029 *
4030 * @remarks x86: Requires a Pentium or later.
4031 */
4032DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4033{
4034 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4035}
4036
4037
4038/**
4039 * Atomically exchanges and adds to a size_t value, ordered.
4040 *
4041 * @returns The old value.
4042 * @param pcb Pointer to the size_t value.
4043 * @param cb Number to add.
4044 */
4045DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4046{
4047#if ARCH_BITS == 64
4048 AssertCompileSize(size_t, 8);
4049 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
4050#elif ARCH_BITS == 32
4051 AssertCompileSize(size_t, 4);
4052 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
4053#elif ARCH_BITS == 16
4054 AssertCompileSize(size_t, 2);
4055 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
4056#else
4057# error "Unsupported ARCH_BITS value"
4058#endif
4059}
4060
4061
4062/**
4063 * Atomically exchanges and adds a value which size might differ between
4064 * platforms or compilers, ordered.
4065 *
4066 * @param pu Pointer to the variable to update.
4067 * @param uNew The value to add to *pu.
4068 * @param puOld Where to store the old value.
4069 */
4070#define ASMAtomicAddSize(pu, uNew, puOld) \
4071 do { \
4072 switch (sizeof(*(pu))) { \
4073 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4074 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4075 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
4076 } \
4077 } while (0)
4078
4079
4080
4081/**
4082 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
4083 *
4084 * @returns The old value.
4085 * @param pu16 Pointer to the value.
4086 * @param u16 Number to subtract.
4087 *
4088 * @remarks x86: Requires a 486 or later.
4089 */
4090DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
4091{
4092 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
4093}
4094
4095
4096/**
4097 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
4098 *
4099 * @returns The old value.
4100 * @param pi16 Pointer to the value.
4101 * @param i16 Number to subtract.
4102 *
4103 * @remarks x86: Requires a 486 or later.
4104 */
4105DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
4106{
4107 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
4108}
4109
4110
4111/**
4112 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
4113 *
4114 * @returns The old value.
4115 * @param pu32 Pointer to the value.
4116 * @param u32 Number to subtract.
4117 *
4118 * @remarks x86: Requires a 486 or later.
4119 */
4120DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4121{
4122 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
4123}
4124
4125
4126/**
4127 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
4128 *
4129 * @returns The old value.
4130 * @param pi32 Pointer to the value.
4131 * @param i32 Number to subtract.
4132 *
4133 * @remarks x86: Requires a 486 or later.
4134 */
4135DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4136{
4137 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
4138}
4139
4140
4141/**
4142 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
4143 *
4144 * @returns The old value.
4145 * @param pu64 Pointer to the value.
4146 * @param u64 Number to subtract.
4147 *
4148 * @remarks x86: Requires a Pentium or later.
4149 */
4150DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4151{
4152 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
4153}
4154
4155
4156/**
4157 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
4158 *
4159 * @returns The old value.
4160 * @param pi64 Pointer to the value.
4161 * @param i64 Number to subtract.
4162 *
4163 * @remarks x86: Requires a Pentium or later.
4164 */
4165DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4166{
4167 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
4168}
4169
4170
4171/**
4172 * Atomically exchanges and subtracts to a size_t value, ordered.
4173 *
4174 * @returns The old value.
4175 * @param pcb Pointer to the size_t value.
4176 * @param cb Number to subtract.
4177 *
4178 * @remarks x86: Requires a 486 or later.
4179 */
4180DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4181{
4182#if ARCH_BITS == 64
4183 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
4184#elif ARCH_BITS == 32
4185 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
4186#elif ARCH_BITS == 16
4187 AssertCompileSize(size_t, 2);
4188 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
4189#else
4190# error "Unsupported ARCH_BITS value"
4191#endif
4192}
4193
4194
4195/**
4196 * Atomically exchanges and subtracts a value which size might differ between
4197 * platforms or compilers, ordered.
4198 *
4199 * @param pu Pointer to the variable to update.
4200 * @param uNew The value to subtract to *pu.
4201 * @param puOld Where to store the old value.
4202 *
4203 * @remarks x86: Requires a 486 or later.
4204 */
4205#define ASMAtomicSubSize(pu, uNew, puOld) \
4206 do { \
4207 switch (sizeof(*(pu))) { \
4208 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4209 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4210 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
4211 } \
4212 } while (0)
4213
4214
4215
4216/**
4217 * Atomically increment a 16-bit value, ordered.
4218 *
4219 * @returns The new value.
4220 * @param pu16 Pointer to the value to increment.
4221 * @remarks Not implemented. Just to make 16-bit code happy.
4222 *
4223 * @remarks x86: Requires a 486 or later.
4224 */
4225RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4226
4227
4228/**
4229 * Atomically increment a 32-bit value, ordered.
4230 *
4231 * @returns The new value.
4232 * @param pu32 Pointer to the value to increment.
4233 *
4234 * @remarks x86: Requires a 486 or later.
4235 */
4236#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4237RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4238#else
4239DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4240{
4241# if RT_INLINE_ASM_USES_INTRIN
4242 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
4243
4244# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4245# if RT_INLINE_ASM_GNU_STYLE
4246 uint32_t u32;
4247 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4248 : "=r" (u32)
4249 , "=m" (*pu32)
4250 : "0" (1)
4251 , "m" (*pu32)
4252 : "memory"
4253 , "cc");
4254 return u32+1;
4255# else
4256 __asm
4257 {
4258 mov eax, 1
4259# ifdef RT_ARCH_AMD64
4260 mov rdx, [pu32]
4261 lock xadd [rdx], eax
4262# else
4263 mov edx, [pu32]
4264 lock xadd [edx], eax
4265# endif
4266 mov u32, eax
4267 }
4268 return u32+1;
4269# endif
4270
4271# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4272 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
4273 "add %w[uNew], %w[uNew], #1\n\t",
4274 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4275 "X" (0) /* dummy */);
4276 return u32NewRet;
4277
4278# else
4279 return ASMAtomicAddU32(pu32, 1) + 1;
4280# endif
4281}
4282#endif
4283
4284
4285/**
4286 * Atomically increment a signed 32-bit value, ordered.
4287 *
4288 * @returns The new value.
4289 * @param pi32 Pointer to the value to increment.
4290 *
4291 * @remarks x86: Requires a 486 or later.
4292 */
4293DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4294{
4295 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
4296}
4297
4298
4299/**
4300 * Atomically increment a 64-bit value, ordered.
4301 *
4302 * @returns The new value.
4303 * @param pu64 Pointer to the value to increment.
4304 *
4305 * @remarks x86: Requires a Pentium or later.
4306 */
4307#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4308DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4309#else
4310DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4311{
4312# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4313 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
4314
4315# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4316 uint64_t u64;
4317 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4318 : "=r" (u64)
4319 , "=m" (*pu64)
4320 : "0" (1)
4321 , "m" (*pu64)
4322 : "memory"
4323 , "cc");
4324 return u64 + 1;
4325
4326# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4327 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
4328 "add %[uNew], %[uNew], #1\n\t"
4329 ,
4330 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4331 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4332 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4333 return u64NewRet;
4334
4335# else
4336 return ASMAtomicAddU64(pu64, 1) + 1;
4337# endif
4338}
4339#endif
4340
4341
4342/**
4343 * Atomically increment a signed 64-bit value, ordered.
4344 *
4345 * @returns The new value.
4346 * @param pi64 Pointer to the value to increment.
4347 *
4348 * @remarks x86: Requires a Pentium or later.
4349 */
4350DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4351{
4352 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
4353}
4354
4355
4356/**
4357 * Atomically increment a size_t value, ordered.
4358 *
4359 * @returns The new value.
4360 * @param pcb Pointer to the value to increment.
4361 *
4362 * @remarks x86: Requires a 486 or later.
4363 */
4364DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4365{
4366#if ARCH_BITS == 64
4367 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
4368#elif ARCH_BITS == 32
4369 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
4370#elif ARCH_BITS == 16
4371 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
4372#else
4373# error "Unsupported ARCH_BITS value"
4374#endif
4375}
4376
4377
4378
4379/**
4380 * Atomically decrement an unsigned 32-bit value, ordered.
4381 *
4382 * @returns The new value.
4383 * @param pu16 Pointer to the value to decrement.
4384 * @remarks Not implemented. Just to make 16-bit code happy.
4385 *
4386 * @remarks x86: Requires a 486 or later.
4387 */
4388RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4389
4390
4391/**
4392 * Atomically decrement an unsigned 32-bit value, ordered.
4393 *
4394 * @returns The new value.
4395 * @param pu32 Pointer to the value to decrement.
4396 *
4397 * @remarks x86: Requires a 486 or later.
4398 */
4399#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4400RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4401#else
4402DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4403{
4404# if RT_INLINE_ASM_USES_INTRIN
4405 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4406
4407# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4408# if RT_INLINE_ASM_GNU_STYLE
4409 uint32_t u32;
4410 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4411 : "=r" (u32)
4412 , "=m" (*pu32)
4413 : "0" (-1)
4414 , "m" (*pu32)
4415 : "memory"
4416 , "cc");
4417 return u32-1;
4418# else
4419 uint32_t u32;
4420 __asm
4421 {
4422 mov eax, -1
4423# ifdef RT_ARCH_AMD64
4424 mov rdx, [pu32]
4425 lock xadd [rdx], eax
4426# else
4427 mov edx, [pu32]
4428 lock xadd [edx], eax
4429# endif
4430 mov u32, eax
4431 }
4432 return u32-1;
4433# endif
4434
4435# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4436 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4437 "sub %w[uNew], %w[uNew], #1\n\t",
4438 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4439 "X" (0) /* dummy */);
4440 return u32NewRet;
4441
4442# else
4443 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4444# endif
4445}
4446#endif
4447
4448
4449/**
4450 * Atomically decrement a signed 32-bit value, ordered.
4451 *
4452 * @returns The new value.
4453 * @param pi32 Pointer to the value to decrement.
4454 *
4455 * @remarks x86: Requires a 486 or later.
4456 */
4457DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4458{
4459 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4460}
4461
4462
4463/**
4464 * Atomically decrement an unsigned 64-bit value, ordered.
4465 *
4466 * @returns The new value.
4467 * @param pu64 Pointer to the value to decrement.
4468 *
4469 * @remarks x86: Requires a Pentium or later.
4470 */
4471#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4472RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4473#else
4474DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4475{
4476# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4477 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4478
4479# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4480 uint64_t u64;
4481 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4482 : "=r" (u64)
4483 , "=m" (*pu64)
4484 : "0" (~(uint64_t)0)
4485 , "m" (*pu64)
4486 : "memory"
4487 , "cc");
4488 return u64-1;
4489
4490# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4491 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4492 "sub %[uNew], %[uNew], #1\n\t"
4493 ,
4494 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4495 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4496 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4497 return u64NewRet;
4498
4499# else
4500 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4501# endif
4502}
4503#endif
4504
4505
4506/**
4507 * Atomically decrement a signed 64-bit value, ordered.
4508 *
4509 * @returns The new value.
4510 * @param pi64 Pointer to the value to decrement.
4511 *
4512 * @remarks x86: Requires a Pentium or later.
4513 */
4514DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4515{
4516 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4517}
4518
4519
4520/**
4521 * Atomically decrement a size_t value, ordered.
4522 *
4523 * @returns The new value.
4524 * @param pcb Pointer to the value to decrement.
4525 *
4526 * @remarks x86: Requires a 486 or later.
4527 */
4528DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4529{
4530#if ARCH_BITS == 64
4531 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4532#elif ARCH_BITS == 32
4533 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4534#elif ARCH_BITS == 16
4535 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4536#else
4537# error "Unsupported ARCH_BITS value"
4538#endif
4539}
4540
4541
4542/**
4543 * Atomically Or an unsigned 32-bit value, ordered.
4544 *
4545 * @param pu32 Pointer to the pointer variable to OR u32 with.
4546 * @param u32 The value to OR *pu32 with.
4547 *
4548 * @remarks x86: Requires a 386 or later.
4549 */
4550#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4551RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4552#else
4553DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4554{
4555# if RT_INLINE_ASM_USES_INTRIN
4556 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4557
4558# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4559# if RT_INLINE_ASM_GNU_STYLE
4560 __asm__ __volatile__("lock; orl %1, %0\n\t"
4561 : "=m" (*pu32)
4562 : "ir" (u32)
4563 , "m" (*pu32)
4564 : "cc");
4565# else
4566 __asm
4567 {
4568 mov eax, [u32]
4569# ifdef RT_ARCH_AMD64
4570 mov rdx, [pu32]
4571 lock or [rdx], eax
4572# else
4573 mov edx, [pu32]
4574 lock or [edx], eax
4575# endif
4576 }
4577# endif
4578
4579# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4580 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4581 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4582 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4583 "orr %[uNew], %[uNew], %[uVal]\n\t",
4584 [uVal] "r" (u32));
4585
4586# else
4587# error "Port me"
4588# endif
4589}
4590#endif
4591
4592
4593/**
4594 * Atomically OR an unsigned 32-bit value, ordered, extended version (for bitmap
4595 * fallback).
4596 *
4597 * @returns Old value.
4598 * @param pu32 Pointer to the variable to OR @a u32 with.
4599 * @param u32 The value to OR @a *pu32 with.
4600 */
4601DECLINLINE(uint32_t) ASMAtomicOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4602{
4603#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4604 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicOrEx32, pu32, DMB_SY,
4605 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4606 "orr %[uNew], %[uOld], %[uVal]\n\t",
4607 [uVal] "r" (u32));
4608 return u32OldRet;
4609
4610#else
4611 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4612 uint32_t u32New;
4613 do
4614 u32New = u32RetOld | u32;
4615 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4616 return u32RetOld;
4617#endif
4618}
4619
4620
4621/**
4622 * Atomically Or a signed 32-bit value, ordered.
4623 *
4624 * @param pi32 Pointer to the pointer variable to OR u32 with.
4625 * @param i32 The value to OR *pu32 with.
4626 *
4627 * @remarks x86: Requires a 386 or later.
4628 */
4629DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4630{
4631 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4632}
4633
4634
4635/**
4636 * Atomically Or an unsigned 64-bit value, ordered.
4637 *
4638 * @param pu64 Pointer to the pointer variable to OR u64 with.
4639 * @param u64 The value to OR *pu64 with.
4640 *
4641 * @remarks x86: Requires a Pentium or later.
4642 */
4643#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4644DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4645#else
4646DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4647{
4648# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4649 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4650
4651# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4652 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4653 : "=m" (*pu64)
4654 : "r" (u64)
4655 , "m" (*pu64)
4656 : "cc");
4657
4658# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4659 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4660 "orr %[uNew], %[uNew], %[uVal]\n\t"
4661 ,
4662 "orr %[uNew], %[uNew], %[uVal]\n\t"
4663 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4664 [uVal] "r" (u64));
4665
4666# else
4667 for (;;)
4668 {
4669 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4670 uint64_t u64New = u64Old | u64;
4671 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4672 break;
4673 ASMNopPause();
4674 }
4675# endif
4676}
4677#endif
4678
4679
4680/**
4681 * Atomically Or a signed 64-bit value, ordered.
4682 *
4683 * @param pi64 Pointer to the pointer variable to OR u64 with.
4684 * @param i64 The value to OR *pu64 with.
4685 *
4686 * @remarks x86: Requires a Pentium or later.
4687 */
4688DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4689{
4690 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4691}
4692
4693
4694/**
4695 * Atomically And an unsigned 32-bit value, ordered.
4696 *
4697 * @param pu32 Pointer to the pointer variable to AND u32 with.
4698 * @param u32 The value to AND *pu32 with.
4699 *
4700 * @remarks x86: Requires a 386 or later.
4701 */
4702#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4703RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4704#else
4705DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4706{
4707# if RT_INLINE_ASM_USES_INTRIN
4708 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4709
4710# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4711# if RT_INLINE_ASM_GNU_STYLE
4712 __asm__ __volatile__("lock; andl %1, %0\n\t"
4713 : "=m" (*pu32)
4714 : "ir" (u32)
4715 , "m" (*pu32)
4716 : "cc");
4717# else
4718 __asm
4719 {
4720 mov eax, [u32]
4721# ifdef RT_ARCH_AMD64
4722 mov rdx, [pu32]
4723 lock and [rdx], eax
4724# else
4725 mov edx, [pu32]
4726 lock and [edx], eax
4727# endif
4728 }
4729# endif
4730
4731# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4732 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4733 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4734 "and %[uNew], %[uNew], %[uVal]\n\t",
4735 [uVal] "r" (u32));
4736
4737# else
4738# error "Port me"
4739# endif
4740}
4741#endif
4742
4743
4744/**
4745 * Atomically AND an unsigned 32-bit value, ordered, extended version.
4746 *
4747 * @returns Old value.
4748 * @param pu32 Pointer to the variable to AND @a u32 with.
4749 * @param u32 The value to AND @a *pu32 with.
4750 */
4751DECLINLINE(uint32_t) ASMAtomicAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4752{
4753#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4754 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAndEx32, pu32, DMB_SY,
4755 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4756 "and %[uNew], %[uOld], %[uVal]\n\t",
4757 [uVal] "r" (u32));
4758 return u32OldRet;
4759
4760#else
4761 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4762 uint32_t u32New;
4763 do
4764 u32New = u32RetOld & u32;
4765 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4766 return u32RetOld;
4767#endif
4768}
4769
4770
4771/**
4772 * Atomically And a signed 32-bit value, ordered.
4773 *
4774 * @param pi32 Pointer to the pointer variable to AND i32 with.
4775 * @param i32 The value to AND *pi32 with.
4776 *
4777 * @remarks x86: Requires a 386 or later.
4778 */
4779DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4780{
4781 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4782}
4783
4784
4785/**
4786 * Atomically And an unsigned 64-bit value, ordered.
4787 *
4788 * @param pu64 Pointer to the pointer variable to AND u64 with.
4789 * @param u64 The value to AND *pu64 with.
4790 *
4791 * @remarks x86: Requires a Pentium or later.
4792 */
4793#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4794DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4795#else
4796DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4797{
4798# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4799 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4800
4801# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4802 __asm__ __volatile__("lock; andq %1, %0\n\t"
4803 : "=m" (*pu64)
4804 : "r" (u64)
4805 , "m" (*pu64)
4806 : "cc");
4807
4808# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4809 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
4810 "and %[uNew], %[uNew], %[uVal]\n\t"
4811 ,
4812 "and %[uNew], %[uNew], %[uVal]\n\t"
4813 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4814 [uVal] "r" (u64));
4815
4816# else
4817 for (;;)
4818 {
4819 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4820 uint64_t u64New = u64Old & u64;
4821 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4822 break;
4823 ASMNopPause();
4824 }
4825# endif
4826}
4827#endif
4828
4829
4830/**
4831 * Atomically And a signed 64-bit value, ordered.
4832 *
4833 * @param pi64 Pointer to the pointer variable to AND i64 with.
4834 * @param i64 The value to AND *pi64 with.
4835 *
4836 * @remarks x86: Requires a Pentium or later.
4837 */
4838DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4839{
4840 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4841}
4842
4843
4844/**
4845 * Atomically XOR an unsigned 32-bit value and a memory location, ordered.
4846 *
4847 * @param pu32 Pointer to the variable to XOR @a u32 with.
4848 * @param u32 The value to XOR @a *pu32 with.
4849 *
4850 * @remarks x86: Requires a 386 or later.
4851 */
4852#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4853RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4854#else
4855DECLINLINE(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4856{
4857# if RT_INLINE_ASM_USES_INTRIN
4858 _InterlockedXor((long volatile RT_FAR *)pu32, u32);
4859
4860# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4861# if RT_INLINE_ASM_GNU_STYLE
4862 __asm__ __volatile__("lock; xorl %1, %0\n\t"
4863 : "=m" (*pu32)
4864 : "ir" (u32)
4865 , "m" (*pu32)
4866 : "cc");
4867# else
4868 __asm
4869 {
4870 mov eax, [u32]
4871# ifdef RT_ARCH_AMD64
4872 mov rdx, [pu32]
4873 lock xor [rdx], eax
4874# else
4875 mov edx, [pu32]
4876 lock xor [edx], eax
4877# endif
4878 }
4879# endif
4880
4881# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4882 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicXor32, pu32, DMB_SY,
4883 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
4884 "eor %[uNew], %[uNew], %[uVal]\n\t",
4885 [uVal] "r" (u32));
4886
4887# else
4888# error "Port me"
4889# endif
4890}
4891#endif
4892
4893
4894/**
4895 * Atomically XOR an unsigned 32-bit value and a memory location, ordered,
4896 * extended version (for bitmaps).
4897 *
4898 * @returns Old value.
4899 * @param pu32 Pointer to the variable to XOR @a u32 with.
4900 * @param u32 The value to XOR @a *pu32 with.
4901 */
4902DECLINLINE(uint32_t) ASMAtomicXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4903{
4904#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4905 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicXorEx32, pu32, DMB_SY,
4906 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
4907 "eor %[uNew], %[uOld], %[uVal]\n\t",
4908 [uVal] "r" (u32));
4909 return u32OldRet;
4910
4911#else
4912 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4913 uint32_t u32New;
4914 do
4915 u32New = u32RetOld ^ u32;
4916 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4917 return u32RetOld;
4918#endif
4919}
4920
4921
4922/**
4923 * Atomically XOR a signed 32-bit value, ordered.
4924 *
4925 * @param pi32 Pointer to the variable to XOR i32 with.
4926 * @param i32 The value to XOR *pi32 with.
4927 *
4928 * @remarks x86: Requires a 386 or later.
4929 */
4930DECLINLINE(void) ASMAtomicXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4931{
4932 ASMAtomicXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4933}
4934
4935
4936/**
4937 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
4938 *
4939 * @param pu32 Pointer to the pointer variable to OR u32 with.
4940 * @param u32 The value to OR *pu32 with.
4941 *
4942 * @remarks x86: Requires a 386 or later.
4943 */
4944#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4945RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4946#else
4947DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4948{
4949# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4950# if RT_INLINE_ASM_GNU_STYLE
4951 __asm__ __volatile__("orl %1, %0\n\t"
4952 : "=m" (*pu32)
4953 : "ir" (u32)
4954 , "m" (*pu32)
4955 : "cc");
4956# else
4957 __asm
4958 {
4959 mov eax, [u32]
4960# ifdef RT_ARCH_AMD64
4961 mov rdx, [pu32]
4962 or [rdx], eax
4963# else
4964 mov edx, [pu32]
4965 or [edx], eax
4966# endif
4967 }
4968# endif
4969
4970# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4971 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
4972 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4973 "orr %[uNew], %[uNew], %[uVal]\n\t",
4974 [uVal] "r" (u32));
4975
4976# else
4977# error "Port me"
4978# endif
4979}
4980#endif
4981
4982
4983/**
4984 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe,
4985 * extended version (for bitmap fallback).
4986 *
4987 * @returns Old value.
4988 * @param pu32 Pointer to the variable to OR @a u32 with.
4989 * @param u32 The value to OR @a *pu32 with.
4990 */
4991DECLINLINE(uint32_t) ASMAtomicUoOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4992{
4993#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4994 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoOrExU32, pu32, NO_BARRIER,
4995 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4996 "orr %[uNew], %[uOld], %[uVal]\n\t",
4997 [uVal] "r" (u32));
4998 return u32OldRet;
4999
5000#else
5001 return ASMAtomicOrExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5002#endif
5003}
5004
5005
5006/**
5007 * Atomically OR a signed 32-bit value, unordered.
5008 *
5009 * @param pi32 Pointer to the pointer variable to OR u32 with.
5010 * @param i32 The value to OR *pu32 with.
5011 *
5012 * @remarks x86: Requires a 386 or later.
5013 */
5014DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5015{
5016 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5017}
5018
5019
5020/**
5021 * Atomically OR an unsigned 64-bit value, unordered.
5022 *
5023 * @param pu64 Pointer to the pointer variable to OR u64 with.
5024 * @param u64 The value to OR *pu64 with.
5025 *
5026 * @remarks x86: Requires a Pentium or later.
5027 */
5028#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5029DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5030#else
5031DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5032{
5033# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5034 __asm__ __volatile__("orq %1, %q0\n\t"
5035 : "=m" (*pu64)
5036 : "r" (u64)
5037 , "m" (*pu64)
5038 : "cc");
5039
5040# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5041 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
5042 "orr %[uNew], %[uNew], %[uVal]\n\t"
5043 ,
5044 "orr %[uNew], %[uNew], %[uVal]\n\t"
5045 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
5046 [uVal] "r" (u64));
5047
5048# else
5049 for (;;)
5050 {
5051 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5052 uint64_t u64New = u64Old | u64;
5053 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5054 break;
5055 ASMNopPause();
5056 }
5057# endif
5058}
5059#endif
5060
5061
5062/**
5063 * Atomically Or a signed 64-bit value, unordered.
5064 *
5065 * @param pi64 Pointer to the pointer variable to OR u64 with.
5066 * @param i64 The value to OR *pu64 with.
5067 *
5068 * @remarks x86: Requires a Pentium or later.
5069 */
5070DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5071{
5072 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5073}
5074
5075
5076/**
5077 * Atomically And an unsigned 32-bit value, unordered.
5078 *
5079 * @param pu32 Pointer to the pointer variable to AND u32 with.
5080 * @param u32 The value to AND *pu32 with.
5081 *
5082 * @remarks x86: Requires a 386 or later.
5083 */
5084#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5085RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5086#else
5087DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5088{
5089# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5090# if RT_INLINE_ASM_GNU_STYLE
5091 __asm__ __volatile__("andl %1, %0\n\t"
5092 : "=m" (*pu32)
5093 : "ir" (u32)
5094 , "m" (*pu32)
5095 : "cc");
5096# else
5097 __asm
5098 {
5099 mov eax, [u32]
5100# ifdef RT_ARCH_AMD64
5101 mov rdx, [pu32]
5102 and [rdx], eax
5103# else
5104 mov edx, [pu32]
5105 and [edx], eax
5106# endif
5107 }
5108# endif
5109
5110# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5111 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
5112 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
5113 "and %[uNew], %[uNew], %[uVal]\n\t",
5114 [uVal] "r" (u32));
5115
5116# else
5117# error "Port me"
5118# endif
5119}
5120#endif
5121
5122
5123/**
5124 * Atomically AND an unsigned 32-bit value, unordered, extended version (for
5125 * bitmap fallback).
5126 *
5127 * @returns Old value.
5128 * @param pu32 Pointer to the pointer to AND @a u32 with.
5129 * @param u32 The value to AND @a *pu32 with.
5130 */
5131DECLINLINE(uint32_t) ASMAtomicUoAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5132{
5133#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5134 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoAndEx32, pu32, NO_BARRIER,
5135 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
5136 "and %[uNew], %[uOld], %[uVal]\n\t",
5137 [uVal] "r" (u32));
5138 return u32OldRet;
5139
5140#else
5141 return ASMAtomicAndExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5142#endif
5143}
5144
5145
5146/**
5147 * Atomically And a signed 32-bit value, unordered.
5148 *
5149 * @param pi32 Pointer to the pointer variable to AND i32 with.
5150 * @param i32 The value to AND *pi32 with.
5151 *
5152 * @remarks x86: Requires a 386 or later.
5153 */
5154DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5155{
5156 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5157}
5158
5159
5160/**
5161 * Atomically And an unsigned 64-bit value, unordered.
5162 *
5163 * @param pu64 Pointer to the pointer variable to AND u64 with.
5164 * @param u64 The value to AND *pu64 with.
5165 *
5166 * @remarks x86: Requires a Pentium or later.
5167 */
5168#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5169DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5170#else
5171DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5172{
5173# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5174 __asm__ __volatile__("andq %1, %0\n\t"
5175 : "=m" (*pu64)
5176 : "r" (u64)
5177 , "m" (*pu64)
5178 : "cc");
5179
5180# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5181 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
5182 "and %[uNew], %[uNew], %[uVal]\n\t"
5183 ,
5184 "and %[uNew], %[uNew], %[uVal]\n\t"
5185 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
5186 [uVal] "r" (u64));
5187
5188# else
5189 for (;;)
5190 {
5191 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5192 uint64_t u64New = u64Old & u64;
5193 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5194 break;
5195 ASMNopPause();
5196 }
5197# endif
5198}
5199#endif
5200
5201
5202/**
5203 * Atomically And a signed 64-bit value, unordered.
5204 *
5205 * @param pi64 Pointer to the pointer variable to AND i64 with.
5206 * @param i64 The value to AND *pi64 with.
5207 *
5208 * @remarks x86: Requires a Pentium or later.
5209 */
5210DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5211{
5212 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5213}
5214
5215
5216/**
5217 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe.
5218 *
5219 * @param pu32 Pointer to the variable to XOR @a u32 with.
5220 * @param u32 The value to OR @a *pu32 with.
5221 *
5222 * @remarks x86: Requires a 386 or later.
5223 */
5224#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5225RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5226#else
5227DECLINLINE(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5228{
5229# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5230# if RT_INLINE_ASM_GNU_STYLE
5231 __asm__ __volatile__("xorl %1, %0\n\t"
5232 : "=m" (*pu32)
5233 : "ir" (u32)
5234 , "m" (*pu32)
5235 : "cc");
5236# else
5237 __asm
5238 {
5239 mov eax, [u32]
5240# ifdef RT_ARCH_AMD64
5241 mov rdx, [pu32]
5242 xor [rdx], eax
5243# else
5244 mov edx, [pu32]
5245 xor [edx], eax
5246# endif
5247 }
5248# endif
5249
5250# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5251 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoXorU32, pu32, NO_BARRIER,
5252 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5253 "eor %[uNew], %[uNew], %[uVal]\n\t",
5254 [uVal] "r" (u32));
5255
5256# else
5257# error "Port me"
5258# endif
5259}
5260#endif
5261
5262
5263/**
5264 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe,
5265 * extended version (for bitmap fallback).
5266 *
5267 * @returns Old value.
5268 * @param pu32 Pointer to the variable to XOR @a u32 with.
5269 * @param u32 The value to OR @a *pu32 with.
5270 */
5271DECLINLINE(uint32_t) ASMAtomicUoXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5272{
5273#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5274 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoXorExU32, pu32, NO_BARRIER,
5275 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5276 "eor %[uNew], %[uOld], %[uVal]\n\t",
5277 [uVal] "r" (u32));
5278 return u32OldRet;
5279
5280#else
5281 return ASMAtomicXorExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5282#endif
5283}
5284
5285
5286/**
5287 * Atomically XOR a signed 32-bit value, unordered.
5288 *
5289 * @param pi32 Pointer to the variable to XOR @a u32 with.
5290 * @param i32 The value to XOR @a *pu32 with.
5291 *
5292 * @remarks x86: Requires a 386 or later.
5293 */
5294DECLINLINE(void) ASMAtomicUoXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5295{
5296 ASMAtomicUoXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5297}
5298
5299
5300/**
5301 * Atomically increment an unsigned 32-bit value, unordered.
5302 *
5303 * @returns the new value.
5304 * @param pu32 Pointer to the variable to increment.
5305 *
5306 * @remarks x86: Requires a 486 or later.
5307 */
5308#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5309RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5310#else
5311DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5312{
5313# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5314 uint32_t u32;
5315# if RT_INLINE_ASM_GNU_STYLE
5316 __asm__ __volatile__("xaddl %0, %1\n\t"
5317 : "=r" (u32)
5318 , "=m" (*pu32)
5319 : "0" (1)
5320 , "m" (*pu32)
5321 : "memory" /** @todo why 'memory'? */
5322 , "cc");
5323 return u32 + 1;
5324# else
5325 __asm
5326 {
5327 mov eax, 1
5328# ifdef RT_ARCH_AMD64
5329 mov rdx, [pu32]
5330 xadd [rdx], eax
5331# else
5332 mov edx, [pu32]
5333 xadd [edx], eax
5334# endif
5335 mov u32, eax
5336 }
5337 return u32 + 1;
5338# endif
5339
5340# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5341 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
5342 "add %w[uNew], %w[uNew], #1\n\t",
5343 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5344 "X" (0) /* dummy */);
5345 return u32NewRet;
5346
5347# else
5348# error "Port me"
5349# endif
5350}
5351#endif
5352
5353
5354/**
5355 * Atomically decrement an unsigned 32-bit value, unordered.
5356 *
5357 * @returns the new value.
5358 * @param pu32 Pointer to the variable to decrement.
5359 *
5360 * @remarks x86: Requires a 486 or later.
5361 */
5362#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5363RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5364#else
5365DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5366{
5367# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5368 uint32_t u32;
5369# if RT_INLINE_ASM_GNU_STYLE
5370 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
5371 : "=r" (u32)
5372 , "=m" (*pu32)
5373 : "0" (-1)
5374 , "m" (*pu32)
5375 : "memory"
5376 , "cc");
5377 return u32 - 1;
5378# else
5379 __asm
5380 {
5381 mov eax, -1
5382# ifdef RT_ARCH_AMD64
5383 mov rdx, [pu32]
5384 xadd [rdx], eax
5385# else
5386 mov edx, [pu32]
5387 xadd [edx], eax
5388# endif
5389 mov u32, eax
5390 }
5391 return u32 - 1;
5392# endif
5393
5394# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5395 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
5396 "sub %w[uNew], %w[uNew], #1\n\t",
5397 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5398 "X" (0) /* dummy */);
5399 return u32NewRet;
5400
5401# else
5402# error "Port me"
5403# endif
5404}
5405#endif
5406
5407
5408/** @def RT_ASM_PAGE_SIZE
5409 * We try avoid dragging in iprt/param.h here.
5410 * @internal
5411 */
5412#if defined(RT_ARCH_SPARC64)
5413# define RT_ASM_PAGE_SIZE 0x2000
5414# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5415# if PAGE_SIZE != 0x2000
5416# error "PAGE_SIZE is not 0x2000!"
5417# endif
5418# endif
5419#elif defined(RT_ARCH_ARM64)
5420# define RT_ASM_PAGE_SIZE 0x4000
5421# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
5422# if PAGE_SIZE != 0x4000
5423# error "PAGE_SIZE is not 0x4000!"
5424# endif
5425# endif
5426#else
5427# define RT_ASM_PAGE_SIZE 0x1000
5428# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5429# if PAGE_SIZE != 0x1000
5430# error "PAGE_SIZE is not 0x1000!"
5431# endif
5432# endif
5433#endif
5434
5435/**
5436 * Zeros a 4K memory page.
5437 *
5438 * @param pv Pointer to the memory block. This must be page aligned.
5439 */
5440#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5441RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
5442# else
5443DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
5444{
5445# if RT_INLINE_ASM_USES_INTRIN
5446# ifdef RT_ARCH_AMD64
5447 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
5448# else
5449 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
5450# endif
5451
5452# elif RT_INLINE_ASM_GNU_STYLE
5453 RTCCUINTREG uDummy;
5454# ifdef RT_ARCH_AMD64
5455 __asm__ __volatile__("rep stosq"
5456 : "=D" (pv),
5457 "=c" (uDummy)
5458 : "0" (pv),
5459 "c" (RT_ASM_PAGE_SIZE >> 3),
5460 "a" (0)
5461 : "memory");
5462# else
5463 __asm__ __volatile__("rep stosl"
5464 : "=D" (pv),
5465 "=c" (uDummy)
5466 : "0" (pv),
5467 "c" (RT_ASM_PAGE_SIZE >> 2),
5468 "a" (0)
5469 : "memory");
5470# endif
5471# else
5472 __asm
5473 {
5474# ifdef RT_ARCH_AMD64
5475 xor rax, rax
5476 mov ecx, 0200h
5477 mov rdi, [pv]
5478 rep stosq
5479# else
5480 xor eax, eax
5481 mov ecx, 0400h
5482 mov edi, [pv]
5483 rep stosd
5484# endif
5485 }
5486# endif
5487}
5488# endif
5489
5490
5491/**
5492 * Zeros a memory block with a 32-bit aligned size.
5493 *
5494 * @param pv Pointer to the memory block.
5495 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5496 */
5497#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5498RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5499#else
5500DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5501{
5502# if RT_INLINE_ASM_USES_INTRIN
5503# ifdef RT_ARCH_AMD64
5504 if (!(cb & 7))
5505 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
5506 else
5507# endif
5508 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
5509
5510# elif RT_INLINE_ASM_GNU_STYLE
5511 __asm__ __volatile__("rep stosl"
5512 : "=D" (pv),
5513 "=c" (cb)
5514 : "0" (pv),
5515 "1" (cb >> 2),
5516 "a" (0)
5517 : "memory");
5518# else
5519 __asm
5520 {
5521 xor eax, eax
5522# ifdef RT_ARCH_AMD64
5523 mov rcx, [cb]
5524 shr rcx, 2
5525 mov rdi, [pv]
5526# else
5527 mov ecx, [cb]
5528 shr ecx, 2
5529 mov edi, [pv]
5530# endif
5531 rep stosd
5532 }
5533# endif
5534}
5535#endif
5536
5537
5538/**
5539 * Fills a memory block with a 32-bit aligned size.
5540 *
5541 * @param pv Pointer to the memory block.
5542 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5543 * @param u32 The value to fill with.
5544 */
5545#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5546RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
5547#else
5548DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5549{
5550# if RT_INLINE_ASM_USES_INTRIN
5551# ifdef RT_ARCH_AMD64
5552 if (!(cb & 7))
5553 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5554 else
5555# endif
5556 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
5557
5558# elif RT_INLINE_ASM_GNU_STYLE
5559 __asm__ __volatile__("rep stosl"
5560 : "=D" (pv),
5561 "=c" (cb)
5562 : "0" (pv),
5563 "1" (cb >> 2),
5564 "a" (u32)
5565 : "memory");
5566# else
5567 __asm
5568 {
5569# ifdef RT_ARCH_AMD64
5570 mov rcx, [cb]
5571 shr rcx, 2
5572 mov rdi, [pv]
5573# else
5574 mov ecx, [cb]
5575 shr ecx, 2
5576 mov edi, [pv]
5577# endif
5578 mov eax, [u32]
5579 rep stosd
5580 }
5581# endif
5582}
5583#endif
5584
5585
5586/**
5587 * Checks if a memory block is all zeros.
5588 *
5589 * @returns Pointer to the first non-zero byte.
5590 * @returns NULL if all zero.
5591 *
5592 * @param pv Pointer to the memory block.
5593 * @param cb Number of bytes in the block.
5594 */
5595#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
5596DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5597#else
5598DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5599{
5600/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
5601 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5602 for (; cb; cb--, pb++)
5603 if (RT_LIKELY(*pb == 0))
5604 { /* likely */ }
5605 else
5606 return (void RT_FAR *)pb;
5607 return NULL;
5608}
5609#endif
5610
5611
5612/**
5613 * Checks if a memory block is all zeros.
5614 *
5615 * @returns true if zero, false if not.
5616 *
5617 * @param pv Pointer to the memory block.
5618 * @param cb Number of bytes in the block.
5619 *
5620 * @sa ASMMemFirstNonZero
5621 */
5622DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5623{
5624 return ASMMemFirstNonZero(pv, cb) == NULL;
5625}
5626
5627
5628/**
5629 * Checks if a memory page is all zeros.
5630 *
5631 * @returns true / false.
5632 *
5633 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5634 * boundary
5635 */
5636DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
5637{
5638# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5639 union { RTCCUINTREG r; bool f; } uAX;
5640 RTCCUINTREG xCX, xDI;
5641 Assert(!((uintptr_t)pvPage & 15));
5642 __asm__ __volatile__("repe; "
5643# ifdef RT_ARCH_AMD64
5644 "scasq\n\t"
5645# else
5646 "scasl\n\t"
5647# endif
5648 "setnc %%al\n\t"
5649 : "=&c" (xCX)
5650 , "=&D" (xDI)
5651 , "=&a" (uAX.r)
5652 : "mr" (pvPage)
5653# ifdef RT_ARCH_AMD64
5654 , "0" (RT_ASM_PAGE_SIZE/8)
5655# else
5656 , "0" (RT_ASM_PAGE_SIZE/4)
5657# endif
5658 , "1" (pvPage)
5659 , "2" (0)
5660 : "cc");
5661 return uAX.f;
5662# else
5663 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
5664 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
5665 Assert(!((uintptr_t)pvPage & 15));
5666 for (;;)
5667 {
5668 if (puPtr[0]) return false;
5669 if (puPtr[4]) return false;
5670
5671 if (puPtr[2]) return false;
5672 if (puPtr[6]) return false;
5673
5674 if (puPtr[1]) return false;
5675 if (puPtr[5]) return false;
5676
5677 if (puPtr[3]) return false;
5678 if (puPtr[7]) return false;
5679
5680 if (!--cLeft)
5681 return true;
5682 puPtr += 8;
5683 }
5684# endif
5685}
5686
5687
5688/**
5689 * Checks if a memory block is filled with the specified byte, returning the
5690 * first mismatch.
5691 *
5692 * This is sort of an inverted memchr.
5693 *
5694 * @returns Pointer to the byte which doesn't equal u8.
5695 * @returns NULL if all equal to u8.
5696 *
5697 * @param pv Pointer to the memory block.
5698 * @param cb Number of bytes in the block.
5699 * @param u8 The value it's supposed to be filled with.
5700 *
5701 * @remarks No alignment requirements.
5702 */
5703#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5704 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5705DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5706#else
5707DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5708{
5709/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5710 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5711 for (; cb; cb--, pb++)
5712 if (RT_LIKELY(*pb == u8))
5713 { /* likely */ }
5714 else
5715 return (void *)pb;
5716 return NULL;
5717}
5718#endif
5719
5720
5721/**
5722 * Checks if a memory block is filled with the specified byte.
5723 *
5724 * @returns true if all matching, false if not.
5725 *
5726 * @param pv Pointer to the memory block.
5727 * @param cb Number of bytes in the block.
5728 * @param u8 The value it's supposed to be filled with.
5729 *
5730 * @remarks No alignment requirements.
5731 */
5732DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5733{
5734 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5735}
5736
5737
5738/**
5739 * Checks if a memory block is filled with the specified 32-bit value.
5740 *
5741 * This is a sort of inverted memchr.
5742 *
5743 * @returns Pointer to the first value which doesn't equal u32.
5744 * @returns NULL if all equal to u32.
5745 *
5746 * @param pv Pointer to the memory block.
5747 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5748 * @param u32 The value it's supposed to be filled with.
5749 */
5750DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5751{
5752/** @todo rewrite this in inline assembly? */
5753 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5754 for (; cb; cb -= 4, pu32++)
5755 if (RT_LIKELY(*pu32 == u32))
5756 { /* likely */ }
5757 else
5758 return (uint32_t RT_FAR *)pu32;
5759 return NULL;
5760}
5761
5762
5763/**
5764 * Probes a byte pointer for read access.
5765 *
5766 * While the function will not fault if the byte is not read accessible,
5767 * the idea is to do this in a safe place like before acquiring locks
5768 * and such like.
5769 *
5770 * Also, this functions guarantees that an eager compiler is not going
5771 * to optimize the probing away.
5772 *
5773 * @param pvByte Pointer to the byte.
5774 */
5775#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5776RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
5777#else
5778DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
5779{
5780# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5781 uint8_t u8;
5782# if RT_INLINE_ASM_GNU_STYLE
5783 __asm__ __volatile__("movb %1, %0\n\t"
5784 : "=q" (u8)
5785 : "m" (*(const uint8_t *)pvByte));
5786# else
5787 __asm
5788 {
5789# ifdef RT_ARCH_AMD64
5790 mov rax, [pvByte]
5791 mov al, [rax]
5792# else
5793 mov eax, [pvByte]
5794 mov al, [eax]
5795# endif
5796 mov [u8], al
5797 }
5798# endif
5799 return u8;
5800
5801# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5802 uint32_t u32;
5803 __asm__ __volatile__(".Lstart_ASMProbeReadByte_%=:\n\t"
5804# if defined(RT_ARCH_ARM64)
5805 "ldxrb %w[uDst], %[pMem]\n\t"
5806# else
5807 "ldrexb %[uDst], %[pMem]\n\t"
5808# endif
5809 : [uDst] "=&r" (u32)
5810 : [pMem] "m" (*(uint8_t const *)pvByte));
5811 return (uint8_t)u32;
5812
5813# else
5814# error "Port me"
5815# endif
5816}
5817#endif
5818
5819/**
5820 * Probes a buffer for read access page by page.
5821 *
5822 * While the function will fault if the buffer is not fully read
5823 * accessible, the idea is to do this in a safe place like before
5824 * acquiring locks and such like.
5825 *
5826 * Also, this functions guarantees that an eager compiler is not going
5827 * to optimize the probing away.
5828 *
5829 * @param pvBuf Pointer to the buffer.
5830 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5831 */
5832DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
5833{
5834 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5835 /* the first byte */
5836 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
5837 ASMProbeReadByte(pu8);
5838
5839 /* the pages in between pages. */
5840 while (cbBuf > RT_ASM_PAGE_SIZE)
5841 {
5842 ASMProbeReadByte(pu8);
5843 cbBuf -= RT_ASM_PAGE_SIZE;
5844 pu8 += RT_ASM_PAGE_SIZE;
5845 }
5846
5847 /* the last byte */
5848 ASMProbeReadByte(pu8 + cbBuf - 1);
5849}
5850
5851
5852/**
5853 * Reverse the byte order of the given 16-bit integer.
5854 *
5855 * @returns Revert
5856 * @param u16 16-bit integer value.
5857 */
5858#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5859RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
5860#else
5861DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
5862{
5863# if RT_INLINE_ASM_USES_INTRIN
5864 return _byteswap_ushort(u16);
5865
5866# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5867# if RT_INLINE_ASM_GNU_STYLE
5868 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
5869# else
5870 _asm
5871 {
5872 mov ax, [u16]
5873 ror ax, 8
5874 mov [u16], ax
5875 }
5876# endif
5877 return u16;
5878
5879# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5880 uint32_t u32Ret;
5881 __asm__ __volatile__(
5882# if defined(RT_ARCH_ARM64)
5883 "rev16 %w[uRet], %w[uVal]\n\t"
5884# else
5885 "rev16 %[uRet], %[uVal]\n\t"
5886# endif
5887 : [uRet] "=r" (u32Ret)
5888 : [uVal] "r" (u16));
5889 return (uint16_t)u32Ret;
5890
5891# else
5892# error "Port me"
5893# endif
5894}
5895#endif
5896
5897
5898/**
5899 * Reverse the byte order of the given 32-bit integer.
5900 *
5901 * @returns Revert
5902 * @param u32 32-bit integer value.
5903 */
5904#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5905RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
5906#else
5907DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
5908{
5909# if RT_INLINE_ASM_USES_INTRIN
5910 return _byteswap_ulong(u32);
5911
5912# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5913# if RT_INLINE_ASM_GNU_STYLE
5914 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5915# else
5916 _asm
5917 {
5918 mov eax, [u32]
5919 bswap eax
5920 mov [u32], eax
5921 }
5922# endif
5923 return u32;
5924
5925# elif defined(RT_ARCH_ARM64)
5926 uint64_t u64Ret;
5927 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t"
5928 : [uRet] "=r" (u64Ret)
5929 : [uVal] "r" ((uint64_t)u32));
5930 return (uint32_t)u64Ret;
5931
5932# elif defined(RT_ARCH_ARM32)
5933 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
5934 : [uRet] "=r" (u32)
5935 : [uVal] "[uRet]" (u32));
5936 return u32;
5937
5938# else
5939# error "Port me"
5940# endif
5941}
5942#endif
5943
5944
5945/**
5946 * Reverse the byte order of the given 64-bit integer.
5947 *
5948 * @returns Revert
5949 * @param u64 64-bit integer value.
5950 */
5951DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
5952{
5953#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5954 return _byteswap_uint64(u64);
5955
5956# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5957 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64));
5958 return u64;
5959
5960# elif defined(RT_ARCH_ARM64)
5961 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
5962 : [uRet] "=r" (u64)
5963 : [uVal] "[uRet]" (u64));
5964 return u64;
5965
5966#else
5967 return (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5968 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5969#endif
5970}
5971
5972
5973
5974/** @defgroup grp_inline_bits Bit Operations
5975 * @{
5976 */
5977
5978
5979/**
5980 * Sets a bit in a bitmap.
5981 *
5982 * @param pvBitmap Pointer to the bitmap (little endian). This should be
5983 * 32-bit aligned.
5984 * @param iBit The bit to set.
5985 *
5986 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5987 * However, doing so will yield better performance as well as avoiding
5988 * traps accessing the last bits in the bitmap.
5989 */
5990#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5991RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5992#else
5993DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5994{
5995# if RT_INLINE_ASM_USES_INTRIN
5996 _bittestandset((long RT_FAR *)pvBitmap, iBit);
5997
5998# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5999# if RT_INLINE_ASM_GNU_STYLE
6000 __asm__ __volatile__("btsl %1, %0"
6001 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6002 : "Ir" (iBit)
6003 , "m" (*(volatile long RT_FAR *)pvBitmap)
6004 : "memory"
6005 , "cc");
6006# else
6007 __asm
6008 {
6009# ifdef RT_ARCH_AMD64
6010 mov rax, [pvBitmap]
6011 mov edx, [iBit]
6012 bts [rax], edx
6013# else
6014 mov eax, [pvBitmap]
6015 mov edx, [iBit]
6016 bts [eax], edx
6017# endif
6018 }
6019# endif
6020
6021# else
6022 int32_t offBitmap = iBit / 32;
6023 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6024 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6025# endif
6026}
6027#endif
6028
6029
6030/**
6031 * Atomically sets a bit in a bitmap, ordered.
6032 *
6033 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6034 * aligned, otherwise the memory access isn't atomic!
6035 * @param iBit The bit to set.
6036 *
6037 * @remarks x86: Requires a 386 or later.
6038 */
6039#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6040RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6041#else
6042DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6043{
6044 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6045# if RT_INLINE_ASM_USES_INTRIN
6046 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6047# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6048# if RT_INLINE_ASM_GNU_STYLE
6049 __asm__ __volatile__("lock; btsl %1, %0"
6050 : "=m" (*(volatile long *)pvBitmap)
6051 : "Ir" (iBit)
6052 , "m" (*(volatile long *)pvBitmap)
6053 : "memory"
6054 , "cc");
6055# else
6056 __asm
6057 {
6058# ifdef RT_ARCH_AMD64
6059 mov rax, [pvBitmap]
6060 mov edx, [iBit]
6061 lock bts [rax], edx
6062# else
6063 mov eax, [pvBitmap]
6064 mov edx, [iBit]
6065 lock bts [eax], edx
6066# endif
6067 }
6068# endif
6069
6070# else
6071 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6072# endif
6073}
6074#endif
6075
6076
6077/**
6078 * Clears a bit in a bitmap.
6079 *
6080 * @param pvBitmap Pointer to the bitmap (little endian).
6081 * @param iBit The bit to clear.
6082 *
6083 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6084 * However, doing so will yield better performance as well as avoiding
6085 * traps accessing the last bits in the bitmap.
6086 */
6087#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6088RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6089#else
6090DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6091{
6092# if RT_INLINE_ASM_USES_INTRIN
6093 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6094
6095# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6096# if RT_INLINE_ASM_GNU_STYLE
6097 __asm__ __volatile__("btrl %1, %0"
6098 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6099 : "Ir" (iBit)
6100 , "m" (*(volatile long RT_FAR *)pvBitmap)
6101 : "memory"
6102 , "cc");
6103# else
6104 __asm
6105 {
6106# ifdef RT_ARCH_AMD64
6107 mov rax, [pvBitmap]
6108 mov edx, [iBit]
6109 btr [rax], edx
6110# else
6111 mov eax, [pvBitmap]
6112 mov edx, [iBit]
6113 btr [eax], edx
6114# endif
6115 }
6116# endif
6117
6118# else
6119 int32_t offBitmap = iBit / 32;
6120 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6121 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6122# endif
6123}
6124#endif
6125
6126
6127/**
6128 * Atomically clears a bit in a bitmap, ordered.
6129 *
6130 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6131 * aligned, otherwise the memory access isn't atomic!
6132 * @param iBit The bit to toggle set.
6133 *
6134 * @remarks No memory barrier, take care on smp.
6135 * @remarks x86: Requires a 386 or later.
6136 */
6137#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6138RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6139#else
6140DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6141{
6142 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6143# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6144# if RT_INLINE_ASM_GNU_STYLE
6145 __asm__ __volatile__("lock; btrl %1, %0"
6146 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6147 : "Ir" (iBit)
6148 , "m" (*(volatile long RT_FAR *)pvBitmap)
6149 : "memory"
6150 , "cc");
6151# else
6152 __asm
6153 {
6154# ifdef RT_ARCH_AMD64
6155 mov rax, [pvBitmap]
6156 mov edx, [iBit]
6157 lock btr [rax], edx
6158# else
6159 mov eax, [pvBitmap]
6160 mov edx, [iBit]
6161 lock btr [eax], edx
6162# endif
6163 }
6164# endif
6165# else
6166 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6167# endif
6168}
6169#endif
6170
6171
6172/**
6173 * Toggles a bit in a bitmap.
6174 *
6175 * @param pvBitmap Pointer to the bitmap (little endian).
6176 * @param iBit The bit to toggle.
6177 *
6178 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6179 * However, doing so will yield better performance as well as avoiding
6180 * traps accessing the last bits in the bitmap.
6181 */
6182#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6183RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6184#else
6185DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6186{
6187# if RT_INLINE_ASM_USES_INTRIN
6188 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6189# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6190# if RT_INLINE_ASM_GNU_STYLE
6191 __asm__ __volatile__("btcl %1, %0"
6192 : "=m" (*(volatile long *)pvBitmap)
6193 : "Ir" (iBit)
6194 , "m" (*(volatile long *)pvBitmap)
6195 : "memory"
6196 , "cc");
6197# else
6198 __asm
6199 {
6200# ifdef RT_ARCH_AMD64
6201 mov rax, [pvBitmap]
6202 mov edx, [iBit]
6203 btc [rax], edx
6204# else
6205 mov eax, [pvBitmap]
6206 mov edx, [iBit]
6207 btc [eax], edx
6208# endif
6209 }
6210# endif
6211# else
6212 int32_t offBitmap = iBit / 32;
6213 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6214 ASMAtomicUoXorU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6215# endif
6216}
6217#endif
6218
6219
6220/**
6221 * Atomically toggles a bit in a bitmap, ordered.
6222 *
6223 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6224 * aligned, otherwise the memory access isn't atomic!
6225 * @param iBit The bit to test and set.
6226 *
6227 * @remarks x86: Requires a 386 or later.
6228 */
6229#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6230RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6231#else
6232DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6233{
6234 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6235# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6236# if RT_INLINE_ASM_GNU_STYLE
6237 __asm__ __volatile__("lock; btcl %1, %0"
6238 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6239 : "Ir" (iBit)
6240 , "m" (*(volatile long RT_FAR *)pvBitmap)
6241 : "memory"
6242 , "cc");
6243# else
6244 __asm
6245 {
6246# ifdef RT_ARCH_AMD64
6247 mov rax, [pvBitmap]
6248 mov edx, [iBit]
6249 lock btc [rax], edx
6250# else
6251 mov eax, [pvBitmap]
6252 mov edx, [iBit]
6253 lock btc [eax], edx
6254# endif
6255 }
6256# endif
6257# else
6258 ASMAtomicXorU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6259# endif
6260}
6261#endif
6262
6263
6264/**
6265 * Tests and sets a bit in a bitmap.
6266 *
6267 * @returns true if the bit was set.
6268 * @returns false if the bit was clear.
6269 *
6270 * @param pvBitmap Pointer to the bitmap (little endian).
6271 * @param iBit The bit to test and set.
6272 *
6273 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6274 * However, doing so will yield better performance as well as avoiding
6275 * traps accessing the last bits in the bitmap.
6276 */
6277#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6278RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6279#else
6280DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6281{
6282 union { bool f; uint32_t u32; uint8_t u8; } rc;
6283# if RT_INLINE_ASM_USES_INTRIN
6284 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
6285
6286# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6287# if RT_INLINE_ASM_GNU_STYLE
6288 __asm__ __volatile__("btsl %2, %1\n\t"
6289 "setc %b0\n\t"
6290 "andl $1, %0\n\t"
6291 : "=q" (rc.u32)
6292 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6293 : "Ir" (iBit)
6294 , "m" (*(volatile long RT_FAR *)pvBitmap)
6295 : "memory"
6296 , "cc");
6297# else
6298 __asm
6299 {
6300 mov edx, [iBit]
6301# ifdef RT_ARCH_AMD64
6302 mov rax, [pvBitmap]
6303 bts [rax], edx
6304# else
6305 mov eax, [pvBitmap]
6306 bts [eax], edx
6307# endif
6308 setc al
6309 and eax, 1
6310 mov [rc.u32], eax
6311 }
6312# endif
6313
6314# else
6315 int32_t offBitmap = iBit / 32;
6316 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6317 rc.u32 = RT_LE2H_U32(ASMAtomicUoOrExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6318 >> (iBit & 31);
6319 rc.u32 &= 1;
6320# endif
6321 return rc.f;
6322}
6323#endif
6324
6325
6326/**
6327 * Atomically tests and sets a bit in a bitmap, ordered.
6328 *
6329 * @returns true if the bit was set.
6330 * @returns false if the bit was clear.
6331 *
6332 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6333 * aligned, otherwise the memory access isn't atomic!
6334 * @param iBit The bit to set.
6335 *
6336 * @remarks x86: Requires a 386 or later.
6337 */
6338#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6339RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6340#else
6341DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6342{
6343 union { bool f; uint32_t u32; uint8_t u8; } rc;
6344 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6345# if RT_INLINE_ASM_USES_INTRIN
6346 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6347# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6348# if RT_INLINE_ASM_GNU_STYLE
6349 __asm__ __volatile__("lock; btsl %2, %1\n\t"
6350 "setc %b0\n\t"
6351 "andl $1, %0\n\t"
6352 : "=q" (rc.u32)
6353 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6354 : "Ir" (iBit)
6355 , "m" (*(volatile long RT_FAR *)pvBitmap)
6356 : "memory"
6357 , "cc");
6358# else
6359 __asm
6360 {
6361 mov edx, [iBit]
6362# ifdef RT_ARCH_AMD64
6363 mov rax, [pvBitmap]
6364 lock bts [rax], edx
6365# else
6366 mov eax, [pvBitmap]
6367 lock bts [eax], edx
6368# endif
6369 setc al
6370 and eax, 1
6371 mov [rc.u32], eax
6372 }
6373# endif
6374
6375# else
6376 rc.u32 = RT_LE2H_U32(ASMAtomicOrExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6377 >> (iBit & 31);
6378 rc.u32 &= 1;
6379# endif
6380 return rc.f;
6381}
6382#endif
6383
6384
6385/**
6386 * Tests and clears a bit in a bitmap.
6387 *
6388 * @returns true if the bit was set.
6389 * @returns false if the bit was clear.
6390 *
6391 * @param pvBitmap Pointer to the bitmap (little endian).
6392 * @param iBit The bit to test and clear.
6393 *
6394 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6395 * However, doing so will yield better performance as well as avoiding
6396 * traps accessing the last bits in the bitmap.
6397 */
6398#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6399RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6400#else
6401DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6402{
6403 union { bool f; uint32_t u32; uint8_t u8; } rc;
6404# if RT_INLINE_ASM_USES_INTRIN
6405 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6406
6407# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6408# if RT_INLINE_ASM_GNU_STYLE
6409 __asm__ __volatile__("btrl %2, %1\n\t"
6410 "setc %b0\n\t"
6411 "andl $1, %0\n\t"
6412 : "=q" (rc.u32)
6413 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6414 : "Ir" (iBit)
6415 , "m" (*(volatile long RT_FAR *)pvBitmap)
6416 : "memory"
6417 , "cc");
6418# else
6419 __asm
6420 {
6421 mov edx, [iBit]
6422# ifdef RT_ARCH_AMD64
6423 mov rax, [pvBitmap]
6424 btr [rax], edx
6425# else
6426 mov eax, [pvBitmap]
6427 btr [eax], edx
6428# endif
6429 setc al
6430 and eax, 1
6431 mov [rc.u32], eax
6432 }
6433# endif
6434
6435# else
6436 int32_t offBitmap = iBit / 32;
6437 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6438 rc.u32 = RT_LE2H_U32(ASMAtomicUoAndExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6439 >> (iBit & 31);
6440 rc.u32 &= 1;
6441# endif
6442 return rc.f;
6443}
6444#endif
6445
6446
6447/**
6448 * Atomically tests and clears a bit in a bitmap, ordered.
6449 *
6450 * @returns true if the bit was set.
6451 * @returns false if the bit was clear.
6452 *
6453 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6454 * aligned, otherwise the memory access isn't atomic!
6455 * @param iBit The bit to test and clear.
6456 *
6457 * @remarks No memory barrier, take care on smp.
6458 * @remarks x86: Requires a 386 or later.
6459 */
6460#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6461RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6462#else
6463DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6464{
6465 union { bool f; uint32_t u32; uint8_t u8; } rc;
6466 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6467# if RT_INLINE_ASM_USES_INTRIN
6468 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
6469
6470# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6471# if RT_INLINE_ASM_GNU_STYLE
6472 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6473 "setc %b0\n\t"
6474 "andl $1, %0\n\t"
6475 : "=q" (rc.u32)
6476 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6477 : "Ir" (iBit)
6478 , "m" (*(volatile long RT_FAR *)pvBitmap)
6479 : "memory"
6480 , "cc");
6481# else
6482 __asm
6483 {
6484 mov edx, [iBit]
6485# ifdef RT_ARCH_AMD64
6486 mov rax, [pvBitmap]
6487 lock btr [rax], edx
6488# else
6489 mov eax, [pvBitmap]
6490 lock btr [eax], edx
6491# endif
6492 setc al
6493 and eax, 1
6494 mov [rc.u32], eax
6495 }
6496# endif
6497
6498# else
6499 rc.u32 = RT_LE2H_U32(ASMAtomicAndExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6500 >> (iBit & 31);
6501 rc.u32 &= 1;
6502# endif
6503 return rc.f;
6504}
6505#endif
6506
6507
6508/**
6509 * Tests and toggles a bit in a bitmap.
6510 *
6511 * @returns true if the bit was set.
6512 * @returns false if the bit was clear.
6513 *
6514 * @param pvBitmap Pointer to the bitmap (little endian).
6515 * @param iBit The bit to test and toggle.
6516 *
6517 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6518 * However, doing so will yield better performance as well as avoiding
6519 * traps accessing the last bits in the bitmap.
6520 */
6521#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6522RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6523#else
6524DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6525{
6526 union { bool f; uint32_t u32; uint8_t u8; } rc;
6527# if RT_INLINE_ASM_USES_INTRIN
6528 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6529
6530# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6531# if RT_INLINE_ASM_GNU_STYLE
6532 __asm__ __volatile__("btcl %2, %1\n\t"
6533 "setc %b0\n\t"
6534 "andl $1, %0\n\t"
6535 : "=q" (rc.u32)
6536 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6537 : "Ir" (iBit)
6538 , "m" (*(volatile long RT_FAR *)pvBitmap)
6539 : "memory"
6540 , "cc");
6541# else
6542 __asm
6543 {
6544 mov edx, [iBit]
6545# ifdef RT_ARCH_AMD64
6546 mov rax, [pvBitmap]
6547 btc [rax], edx
6548# else
6549 mov eax, [pvBitmap]
6550 btc [eax], edx
6551# endif
6552 setc al
6553 and eax, 1
6554 mov [rc.u32], eax
6555 }
6556# endif
6557
6558# else
6559 int32_t offBitmap = iBit / 32;
6560 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6561 rc.u32 = RT_LE2H_U32(ASMAtomicUoXorExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6562 >> (iBit & 31);
6563 rc.u32 &= 1;
6564# endif
6565 return rc.f;
6566}
6567#endif
6568
6569
6570/**
6571 * Atomically tests and toggles a bit in a bitmap, ordered.
6572 *
6573 * @returns true if the bit was set.
6574 * @returns false if the bit was clear.
6575 *
6576 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6577 * aligned, otherwise the memory access isn't atomic!
6578 * @param iBit The bit to test and toggle.
6579 *
6580 * @remarks x86: Requires a 386 or later.
6581 */
6582#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6583RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6584#else
6585DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6586{
6587 union { bool f; uint32_t u32; uint8_t u8; } rc;
6588 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6589# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6590# if RT_INLINE_ASM_GNU_STYLE
6591 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6592 "setc %b0\n\t"
6593 "andl $1, %0\n\t"
6594 : "=q" (rc.u32)
6595 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6596 : "Ir" (iBit)
6597 , "m" (*(volatile long RT_FAR *)pvBitmap)
6598 : "memory"
6599 , "cc");
6600# else
6601 __asm
6602 {
6603 mov edx, [iBit]
6604# ifdef RT_ARCH_AMD64
6605 mov rax, [pvBitmap]
6606 lock btc [rax], edx
6607# else
6608 mov eax, [pvBitmap]
6609 lock btc [eax], edx
6610# endif
6611 setc al
6612 and eax, 1
6613 mov [rc.u32], eax
6614 }
6615# endif
6616
6617# else
6618 rc.u32 = RT_H2LE_U32(ASMAtomicXorExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_LE2H_U32(RT_BIT_32(iBit & 31))))
6619 >> (iBit & 31);
6620 rc.u32 &= 1;
6621# endif
6622 return rc.f;
6623}
6624#endif
6625
6626
6627/**
6628 * Tests if a bit in a bitmap is set.
6629 *
6630 * @returns true if the bit is set.
6631 * @returns false if the bit is clear.
6632 *
6633 * @param pvBitmap Pointer to the bitmap (little endian).
6634 * @param iBit The bit to test.
6635 *
6636 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6637 * However, doing so will yield better performance as well as avoiding
6638 * traps accessing the last bits in the bitmap.
6639 */
6640#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6641RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6642#else
6643DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6644{
6645 union { bool f; uint32_t u32; uint8_t u8; } rc;
6646# if RT_INLINE_ASM_USES_INTRIN
6647 rc.u32 = _bittest((long *)pvBitmap, iBit);
6648
6649# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6650# if RT_INLINE_ASM_GNU_STYLE
6651
6652 __asm__ __volatile__("btl %2, %1\n\t"
6653 "setc %b0\n\t"
6654 "andl $1, %0\n\t"
6655 : "=q" (rc.u32)
6656 : "m" (*(const volatile long RT_FAR *)pvBitmap)
6657 , "Ir" (iBit)
6658 : "memory"
6659 , "cc");
6660# else
6661 __asm
6662 {
6663 mov edx, [iBit]
6664# ifdef RT_ARCH_AMD64
6665 mov rax, [pvBitmap]
6666 bt [rax], edx
6667# else
6668 mov eax, [pvBitmap]
6669 bt [eax], edx
6670# endif
6671 setc al
6672 and eax, 1
6673 mov [rc.u32], eax
6674 }
6675# endif
6676
6677# else
6678 int32_t offBitmap = iBit / 32;
6679 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6680 rc.u32 = RT_LE2H_U32(ASMAtomicUoReadU32(&((uint32_t volatile *)pvBitmap)[offBitmap])) >> (iBit & 31);
6681 rc.u32 &= 1;
6682# endif
6683 return rc.f;
6684}
6685#endif
6686
6687
6688/**
6689 * Clears a bit range within a bitmap.
6690 *
6691 * @param pvBitmap Pointer to the bitmap (little endian).
6692 * @param iBitStart The First bit to clear.
6693 * @param iBitEnd The first bit not to clear.
6694 */
6695DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6696{
6697 if (iBitStart < iBitEnd)
6698 {
6699 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6700 int32_t iStart = iBitStart & ~31;
6701 int32_t iEnd = iBitEnd & ~31;
6702 if (iStart == iEnd)
6703 *pu32 &= RT_H2LE_U32(((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6704 else
6705 {
6706 /* bits in first dword. */
6707 if (iBitStart & 31)
6708 {
6709 *pu32 &= RT_H2LE_U32((UINT32_C(1) << (iBitStart & 31)) - 1);
6710 pu32++;
6711 iBitStart = iStart + 32;
6712 }
6713
6714 /* whole dwords. */
6715 if (iBitStart != iEnd)
6716 ASMMemZero32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3);
6717
6718 /* bits in last dword. */
6719 if (iBitEnd & 31)
6720 {
6721 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6722 *pu32 &= RT_H2LE_U32(~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6723 }
6724 }
6725 }
6726}
6727
6728
6729/**
6730 * Sets a bit range within a bitmap.
6731 *
6732 * @param pvBitmap Pointer to the bitmap (little endian).
6733 * @param iBitStart The First bit to set.
6734 * @param iBitEnd The first bit not to set.
6735 */
6736DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6737{
6738 if (iBitStart < iBitEnd)
6739 {
6740 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6741 int32_t iStart = iBitStart & ~31;
6742 int32_t iEnd = iBitEnd & ~31;
6743 if (iStart == iEnd)
6744 *pu32 |= RT_H2LE_U32(((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31));
6745 else
6746 {
6747 /* bits in first dword. */
6748 if (iBitStart & 31)
6749 {
6750 *pu32 |= RT_H2LE_U32(~((UINT32_C(1) << (iBitStart & 31)) - 1));
6751 pu32++;
6752 iBitStart = iStart + 32;
6753 }
6754
6755 /* whole dword. */
6756 if (iBitStart != iEnd)
6757 ASMMemFill32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3, ~UINT32_C(0));
6758
6759 /* bits in last dword. */
6760 if (iBitEnd & 31)
6761 {
6762 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6763 *pu32 |= RT_H2LE_U32((UINT32_C(1) << (iBitEnd & 31)) - 1);
6764 }
6765 }
6766 }
6767}
6768
6769
6770/**
6771 * Finds the first clear bit in a bitmap.
6772 *
6773 * @returns Index of the first zero bit.
6774 * @returns -1 if no clear bit was found.
6775 * @param pvBitmap Pointer to the bitmap (little endian).
6776 * @param cBits The number of bits in the bitmap. Multiple of 32.
6777 */
6778#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6779DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6780#else
6781DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6782{
6783 if (cBits)
6784 {
6785 int32_t iBit;
6786# if RT_INLINE_ASM_GNU_STYLE
6787 RTCCUINTREG uEAX, uECX, uEDI;
6788 cBits = RT_ALIGN_32(cBits, 32);
6789 __asm__ __volatile__("repe; scasl\n\t"
6790 "je 1f\n\t"
6791# ifdef RT_ARCH_AMD64
6792 "lea -4(%%rdi), %%rdi\n\t"
6793 "xorl (%%rdi), %%eax\n\t"
6794 "subq %5, %%rdi\n\t"
6795# else
6796 "lea -4(%%edi), %%edi\n\t"
6797 "xorl (%%edi), %%eax\n\t"
6798 "subl %5, %%edi\n\t"
6799# endif
6800 "shll $3, %%edi\n\t"
6801 "bsfl %%eax, %%edx\n\t"
6802 "addl %%edi, %%edx\n\t"
6803 "1:\t\n"
6804 : "=d" (iBit)
6805 , "=&c" (uECX)
6806 , "=&D" (uEDI)
6807 , "=&a" (uEAX)
6808 : "0" (0xffffffff)
6809 , "mr" (pvBitmap)
6810 , "1" (cBits >> 5)
6811 , "2" (pvBitmap)
6812 , "3" (0xffffffff)
6813 : "cc");
6814# else
6815 cBits = RT_ALIGN_32(cBits, 32);
6816 __asm
6817 {
6818# ifdef RT_ARCH_AMD64
6819 mov rdi, [pvBitmap]
6820 mov rbx, rdi
6821# else
6822 mov edi, [pvBitmap]
6823 mov ebx, edi
6824# endif
6825 mov edx, 0ffffffffh
6826 mov eax, edx
6827 mov ecx, [cBits]
6828 shr ecx, 5
6829 repe scasd
6830 je done
6831
6832# ifdef RT_ARCH_AMD64
6833 lea rdi, [rdi - 4]
6834 xor eax, [rdi]
6835 sub rdi, rbx
6836# else
6837 lea edi, [edi - 4]
6838 xor eax, [edi]
6839 sub edi, ebx
6840# endif
6841 shl edi, 3
6842 bsf edx, eax
6843 add edx, edi
6844 done:
6845 mov [iBit], edx
6846 }
6847# endif
6848 return iBit;
6849 }
6850 return -1;
6851}
6852#endif
6853
6854
6855/**
6856 * Finds the next clear bit in a bitmap.
6857 *
6858 * @returns Index of the first zero bit.
6859 * @returns -1 if no clear bit was found.
6860 * @param pvBitmap Pointer to the bitmap (little endian).
6861 * @param cBits The number of bits in the bitmap. Multiple of 32.
6862 * @param iBitPrev The bit returned from the last search.
6863 * The search will start at iBitPrev + 1.
6864 */
6865#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6866DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6867#else
6868DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6869{
6870 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6871 int iBit = ++iBitPrev & 31;
6872 if (iBit)
6873 {
6874 /*
6875 * Inspect the 32-bit word containing the unaligned bit.
6876 */
6877 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6878
6879# if RT_INLINE_ASM_USES_INTRIN
6880 unsigned long ulBit = 0;
6881 if (_BitScanForward(&ulBit, u32))
6882 return ulBit + iBitPrev;
6883# else
6884# if RT_INLINE_ASM_GNU_STYLE
6885 __asm__ __volatile__("bsf %1, %0\n\t"
6886 "jnz 1f\n\t"
6887 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
6888 "1:\n\t"
6889 : "=r" (iBit)
6890 : "r" (u32)
6891 : "cc");
6892# else
6893 __asm
6894 {
6895 mov edx, [u32]
6896 bsf eax, edx
6897 jnz done
6898 mov eax, 0ffffffffh
6899 done:
6900 mov [iBit], eax
6901 }
6902# endif
6903 if (iBit >= 0)
6904 return iBit + (int)iBitPrev;
6905# endif
6906
6907 /*
6908 * Skip ahead and see if there is anything left to search.
6909 */
6910 iBitPrev |= 31;
6911 iBitPrev++;
6912 if (cBits <= (uint32_t)iBitPrev)
6913 return -1;
6914 }
6915
6916 /*
6917 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6918 */
6919 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6920 if (iBit >= 0)
6921 iBit += iBitPrev;
6922 return iBit;
6923}
6924#endif
6925
6926
6927/**
6928 * Finds the first set bit in a bitmap.
6929 *
6930 * @returns Index of the first set bit.
6931 * @returns -1 if no clear bit was found.
6932 * @param pvBitmap Pointer to the bitmap (little endian).
6933 * @param cBits The number of bits in the bitmap. Multiple of 32.
6934 */
6935#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6936DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6937#else
6938DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6939{
6940 if (cBits)
6941 {
6942 int32_t iBit;
6943# if RT_INLINE_ASM_GNU_STYLE
6944 RTCCUINTREG uEAX, uECX, uEDI;
6945 cBits = RT_ALIGN_32(cBits, 32);
6946 __asm__ __volatile__("repe; scasl\n\t"
6947 "je 1f\n\t"
6948# ifdef RT_ARCH_AMD64
6949 "lea -4(%%rdi), %%rdi\n\t"
6950 "movl (%%rdi), %%eax\n\t"
6951 "subq %5, %%rdi\n\t"
6952# else
6953 "lea -4(%%edi), %%edi\n\t"
6954 "movl (%%edi), %%eax\n\t"
6955 "subl %5, %%edi\n\t"
6956# endif
6957 "shll $3, %%edi\n\t"
6958 "bsfl %%eax, %%edx\n\t"
6959 "addl %%edi, %%edx\n\t"
6960 "1:\t\n"
6961 : "=d" (iBit)
6962 , "=&c" (uECX)
6963 , "=&D" (uEDI)
6964 , "=&a" (uEAX)
6965 : "0" (0xffffffff)
6966 , "mr" (pvBitmap)
6967 , "1" (cBits >> 5)
6968 , "2" (pvBitmap)
6969 , "3" (0)
6970 : "cc");
6971# else
6972 cBits = RT_ALIGN_32(cBits, 32);
6973 __asm
6974 {
6975# ifdef RT_ARCH_AMD64
6976 mov rdi, [pvBitmap]
6977 mov rbx, rdi
6978# else
6979 mov edi, [pvBitmap]
6980 mov ebx, edi
6981# endif
6982 mov edx, 0ffffffffh
6983 xor eax, eax
6984 mov ecx, [cBits]
6985 shr ecx, 5
6986 repe scasd
6987 je done
6988# ifdef RT_ARCH_AMD64
6989 lea rdi, [rdi - 4]
6990 mov eax, [rdi]
6991 sub rdi, rbx
6992# else
6993 lea edi, [edi - 4]
6994 mov eax, [edi]
6995 sub edi, ebx
6996# endif
6997 shl edi, 3
6998 bsf edx, eax
6999 add edx, edi
7000 done:
7001 mov [iBit], edx
7002 }
7003# endif
7004 return iBit;
7005 }
7006 return -1;
7007}
7008#endif
7009
7010
7011/**
7012 * Finds the next set bit in a bitmap.
7013 *
7014 * @returns Index of the next set bit.
7015 * @returns -1 if no set bit was found.
7016 * @param pvBitmap Pointer to the bitmap (little endian).
7017 * @param cBits The number of bits in the bitmap. Multiple of 32.
7018 * @param iBitPrev The bit returned from the last search.
7019 * The search will start at iBitPrev + 1.
7020 */
7021#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7022DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
7023#else
7024DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
7025{
7026 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
7027 int iBit = ++iBitPrev & 31;
7028 if (iBit)
7029 {
7030 /*
7031 * Inspect the 32-bit word containing the unaligned bit.
7032 */
7033 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
7034
7035# if RT_INLINE_ASM_USES_INTRIN
7036 unsigned long ulBit = 0;
7037 if (_BitScanForward(&ulBit, u32))
7038 return ulBit + iBitPrev;
7039# else
7040# if RT_INLINE_ASM_GNU_STYLE
7041 __asm__ __volatile__("bsf %1, %0\n\t"
7042 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
7043 "movl $-1, %0\n\t"
7044 "1:\n\t"
7045 : "=r" (iBit)
7046 : "r" (u32)
7047 : "cc");
7048# else
7049 __asm
7050 {
7051 mov edx, [u32]
7052 bsf eax, edx
7053 jnz done
7054 mov eax, 0ffffffffh
7055 done:
7056 mov [iBit], eax
7057 }
7058# endif
7059 if (iBit >= 0)
7060 return iBit + (int)iBitPrev;
7061# endif
7062
7063 /*
7064 * Skip ahead and see if there is anything left to search.
7065 */
7066 iBitPrev |= 31;
7067 iBitPrev++;
7068 if (cBits <= (uint32_t)iBitPrev)
7069 return -1;
7070 }
7071
7072 /*
7073 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7074 */
7075 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7076 if (iBit >= 0)
7077 iBit += iBitPrev;
7078 return iBit;
7079}
7080#endif
7081
7082
7083/**
7084 * Finds the first bit which is set in the given 32-bit integer.
7085 * Bits are numbered from 1 (least significant) to 32.
7086 *
7087 * @returns index [1..32] of the first set bit.
7088 * @returns 0 if all bits are cleared.
7089 * @param u32 Integer to search for set bits.
7090 * @remarks Similar to ffs() in BSD.
7091 */
7092#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7093RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7094#else
7095DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
7096{
7097# if RT_INLINE_ASM_USES_INTRIN
7098 unsigned long iBit;
7099 if (_BitScanForward(&iBit, u32))
7100 iBit++;
7101 else
7102 iBit = 0;
7103
7104# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7105# if RT_INLINE_ASM_GNU_STYLE
7106 uint32_t iBit;
7107 __asm__ __volatile__("bsf %1, %0\n\t"
7108 "jnz 1f\n\t"
7109 "xorl %0, %0\n\t"
7110 "jmp 2f\n"
7111 "1:\n\t"
7112 "incl %0\n"
7113 "2:\n\t"
7114 : "=r" (iBit)
7115 : "rm" (u32)
7116 : "cc");
7117# else
7118 uint32_t iBit;
7119 _asm
7120 {
7121 bsf eax, [u32]
7122 jnz found
7123 xor eax, eax
7124 jmp done
7125 found:
7126 inc eax
7127 done:
7128 mov [iBit], eax
7129 }
7130# endif
7131
7132# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7133 /*
7134 * Using the "count leading zeros (clz)" instruction here because there
7135 * is no dedicated instruction to get the first set bit.
7136 * Need to reverse the bits in the value with "rbit" first because
7137 * "clz" starts counting from the most significant bit.
7138 */
7139 uint32_t iBit;
7140 __asm__ __volatile__(
7141# if defined(RT_ARCH_ARM64)
7142 "rbit %w[uVal], %w[uVal]\n\t"
7143 "clz %w[iBit], %w[uVal]\n\t"
7144# else
7145 "rbit %[uVal], %[uVal]\n\t"
7146 "clz %[iBit], %[uVal]\n\t"
7147# endif
7148 : [uVal] "=r" (u32)
7149 , [iBit] "=r" (iBit)
7150 : "[uVal]" (u32));
7151 if (iBit != 32)
7152 iBit++;
7153 else
7154 iBit = 0; /* No bit set. */
7155
7156# else
7157# error "Port me"
7158# endif
7159 return iBit;
7160}
7161#endif
7162
7163
7164/**
7165 * Finds the first bit which is set in the given 32-bit integer.
7166 * Bits are numbered from 1 (least significant) to 32.
7167 *
7168 * @returns index [1..32] of the first set bit.
7169 * @returns 0 if all bits are cleared.
7170 * @param i32 Integer to search for set bits.
7171 * @remark Similar to ffs() in BSD.
7172 */
7173DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
7174{
7175 return ASMBitFirstSetU32((uint32_t)i32);
7176}
7177
7178
7179/**
7180 * Finds the first bit which is set in the given 64-bit integer.
7181 *
7182 * Bits are numbered from 1 (least significant) to 64.
7183 *
7184 * @returns index [1..64] of the first set bit.
7185 * @returns 0 if all bits are cleared.
7186 * @param u64 Integer to search for set bits.
7187 * @remarks Similar to ffs() in BSD.
7188 */
7189#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7190RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7191#else
7192DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
7193{
7194# if RT_INLINE_ASM_USES_INTRIN
7195 unsigned long iBit;
7196# if ARCH_BITS == 64
7197 if (_BitScanForward64(&iBit, u64))
7198 iBit++;
7199 else
7200 iBit = 0;
7201# else
7202 if (_BitScanForward(&iBit, (uint32_t)u64))
7203 iBit++;
7204 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
7205 iBit += 33;
7206 else
7207 iBit = 0;
7208# endif
7209
7210# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7211 uint64_t iBit;
7212 __asm__ __volatile__("bsfq %1, %0\n\t"
7213 "jnz 1f\n\t"
7214 "xorl %k0, %k0\n\t"
7215 "jmp 2f\n"
7216 "1:\n\t"
7217 "incl %k0\n"
7218 "2:\n\t"
7219 : "=r" (iBit)
7220 : "rm" (u64)
7221 : "cc");
7222
7223# elif defined(RT_ARCH_ARM64)
7224 uint64_t iBit;
7225 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
7226 "clz %[iBit], %[uVal]\n\t"
7227 : [uVal] "=r" (u64)
7228 , [iBit] "=r" (iBit)
7229 : "[uVal]" (u64));
7230 if (iBit != 64)
7231 iBit++;
7232 else
7233 iBit = 0; /* No bit set. */
7234
7235# else
7236 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
7237 if (!iBit)
7238 {
7239 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
7240 if (iBit)
7241 iBit += 32;
7242 }
7243# endif
7244 return (unsigned)iBit;
7245}
7246#endif
7247
7248
7249/**
7250 * Finds the first bit which is set in the given 16-bit integer.
7251 *
7252 * Bits are numbered from 1 (least significant) to 16.
7253 *
7254 * @returns index [1..16] of the first set bit.
7255 * @returns 0 if all bits are cleared.
7256 * @param u16 Integer to search for set bits.
7257 * @remarks For 16-bit bs3kit code.
7258 */
7259#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7260RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7261#else
7262DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
7263{
7264 return ASMBitFirstSetU32((uint32_t)u16);
7265}
7266#endif
7267
7268
7269/**
7270 * Finds the last bit which is set in the given 32-bit integer.
7271 * Bits are numbered from 1 (least significant) to 32.
7272 *
7273 * @returns index [1..32] of the last set bit.
7274 * @returns 0 if all bits are cleared.
7275 * @param u32 Integer to search for set bits.
7276 * @remark Similar to fls() in BSD.
7277 */
7278#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7279RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7280#else
7281DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
7282{
7283# if RT_INLINE_ASM_USES_INTRIN
7284 unsigned long iBit;
7285 if (_BitScanReverse(&iBit, u32))
7286 iBit++;
7287 else
7288 iBit = 0;
7289
7290# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7291# if RT_INLINE_ASM_GNU_STYLE
7292 uint32_t iBit;
7293 __asm__ __volatile__("bsrl %1, %0\n\t"
7294 "jnz 1f\n\t"
7295 "xorl %0, %0\n\t"
7296 "jmp 2f\n"
7297 "1:\n\t"
7298 "incl %0\n"
7299 "2:\n\t"
7300 : "=r" (iBit)
7301 : "rm" (u32)
7302 : "cc");
7303# else
7304 uint32_t iBit;
7305 _asm
7306 {
7307 bsr eax, [u32]
7308 jnz found
7309 xor eax, eax
7310 jmp done
7311 found:
7312 inc eax
7313 done:
7314 mov [iBit], eax
7315 }
7316# endif
7317
7318# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7319 uint32_t iBit;
7320 __asm__ __volatile__(
7321# if defined(RT_ARCH_ARM64)
7322 "clz %w[iBit], %w[uVal]\n\t"
7323# else
7324 "clz %[iBit], %[uVal]\n\t"
7325# endif
7326 : [iBit] "=r" (iBit)
7327 : [uVal] "r" (u32));
7328 iBit = 32 - iBit;
7329
7330# else
7331# error "Port me"
7332# endif
7333 return iBit;
7334}
7335#endif
7336
7337
7338/**
7339 * Finds the last bit which is set in the given 32-bit integer.
7340 * Bits are numbered from 1 (least significant) to 32.
7341 *
7342 * @returns index [1..32] of the last set bit.
7343 * @returns 0 if all bits are cleared.
7344 * @param i32 Integer to search for set bits.
7345 * @remark Similar to fls() in BSD.
7346 */
7347DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
7348{
7349 return ASMBitLastSetU32((uint32_t)i32);
7350}
7351
7352
7353/**
7354 * Finds the last bit which is set in the given 64-bit integer.
7355 *
7356 * Bits are numbered from 1 (least significant) to 64.
7357 *
7358 * @returns index [1..64] of the last set bit.
7359 * @returns 0 if all bits are cleared.
7360 * @param u64 Integer to search for set bits.
7361 * @remark Similar to fls() in BSD.
7362 */
7363#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7364RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7365#else
7366DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
7367{
7368# if RT_INLINE_ASM_USES_INTRIN
7369 unsigned long iBit;
7370# if ARCH_BITS == 64
7371 if (_BitScanReverse64(&iBit, u64))
7372 iBit++;
7373 else
7374 iBit = 0;
7375# else
7376 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7377 iBit += 33;
7378 else if (_BitScanReverse(&iBit, (uint32_t)u64))
7379 iBit++;
7380 else
7381 iBit = 0;
7382# endif
7383
7384# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7385 uint64_t iBit;
7386 __asm__ __volatile__("bsrq %1, %0\n\t"
7387 "jnz 1f\n\t"
7388 "xorl %k0, %k0\n\t"
7389 "jmp 2f\n"
7390 "1:\n\t"
7391 "incl %k0\n"
7392 "2:\n\t"
7393 : "=r" (iBit)
7394 : "rm" (u64)
7395 : "cc");
7396
7397# elif defined(RT_ARCH_ARM64)
7398 uint64_t iBit;
7399 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7400 : [iBit] "=r" (iBit)
7401 : [uVal] "r" (u64));
7402 iBit = 64 - iBit;
7403
7404# else
7405 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
7406 if (iBit)
7407 iBit += 32;
7408 else
7409 iBit = ASMBitLastSetU32((uint32_t)u64);
7410# endif
7411 return (unsigned)iBit;
7412}
7413#endif
7414
7415
7416/**
7417 * Finds the last bit which is set in the given 16-bit integer.
7418 *
7419 * Bits are numbered from 1 (least significant) to 16.
7420 *
7421 * @returns index [1..16] of the last set bit.
7422 * @returns 0 if all bits are cleared.
7423 * @param u16 Integer to search for set bits.
7424 * @remarks For 16-bit bs3kit code.
7425 */
7426#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7427RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7428#else
7429DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
7430{
7431 return ASMBitLastSetU32((uint32_t)u16);
7432}
7433#endif
7434
7435
7436/**
7437 * Rotate 32-bit unsigned value to the left by @a cShift.
7438 *
7439 * @returns Rotated value.
7440 * @param u32 The value to rotate.
7441 * @param cShift How many bits to rotate by.
7442 */
7443#ifdef __WATCOMC__
7444RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7445#else
7446DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7447{
7448# if RT_INLINE_ASM_USES_INTRIN
7449 return _rotl(u32, cShift);
7450
7451# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7452 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7453 return u32;
7454
7455# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7456 __asm__ __volatile__(
7457# if defined(RT_ARCH_ARM64)
7458 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7459# else
7460 "ror %[uRet], %[uVal], %[cShift]\n\t"
7461# endif
7462 : [uRet] "=r" (u32)
7463 : [uVal] "[uRet]" (u32)
7464 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */
7465 return u32;
7466
7467# else
7468 cShift &= 31;
7469 return (u32 << cShift) | (u32 >> (32 - cShift));
7470# endif
7471}
7472#endif
7473
7474
7475/**
7476 * Rotate 32-bit unsigned value to the right by @a cShift.
7477 *
7478 * @returns Rotated value.
7479 * @param u32 The value to rotate.
7480 * @param cShift How many bits to rotate by.
7481 */
7482#ifdef __WATCOMC__
7483RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7484#else
7485DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7486{
7487# if RT_INLINE_ASM_USES_INTRIN
7488 return _rotr(u32, cShift);
7489
7490# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7491 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7492 return u32;
7493
7494# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7495 __asm__ __volatile__(
7496# if defined(RT_ARCH_ARM64)
7497 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7498# else
7499 "ror %[uRet], %[uVal], %[cShift]\n\t"
7500# endif
7501 : [uRet] "=r" (u32)
7502 : [uVal] "[uRet]" (u32)
7503 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */
7504 return u32;
7505
7506# else
7507 cShift &= 31;
7508 return (u32 >> cShift) | (u32 << (32 - cShift));
7509# endif
7510}
7511#endif
7512
7513
7514/**
7515 * Rotate 64-bit unsigned value to the left by @a cShift.
7516 *
7517 * @returns Rotated value.
7518 * @param u64 The value to rotate.
7519 * @param cShift How many bits to rotate by.
7520 */
7521DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7522{
7523#if RT_INLINE_ASM_USES_INTRIN
7524 return _rotl64(u64, cShift);
7525
7526#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7527 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7528 return u64;
7529
7530#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7531 uint32_t uSpill;
7532 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7533 "jz 1f\n\t"
7534 "xchgl %%eax, %%edx\n\t"
7535 "1:\n\t"
7536 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7537 "jz 2f\n\t"
7538 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7539 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
7540 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
7541 "2:\n\t" /* } */
7542 : "=A" (u64)
7543 , "=c" (cShift)
7544 , "=r" (uSpill)
7545 : "0" (u64)
7546 , "1" (cShift)
7547 : "cc");
7548 return u64;
7549
7550# elif defined(RT_ARCH_ARM64)
7551 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7552 : [uRet] "=r" (u64)
7553 : [uVal] "[uRet]" (u64)
7554 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */
7555 return u64;
7556
7557#else
7558 cShift &= 63;
7559 return (u64 << cShift) | (u64 >> (64 - cShift));
7560#endif
7561}
7562
7563
7564/**
7565 * Rotate 64-bit unsigned value to the right by @a cShift.
7566 *
7567 * @returns Rotated value.
7568 * @param u64 The value to rotate.
7569 * @param cShift How many bits to rotate by.
7570 */
7571DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7572{
7573#if RT_INLINE_ASM_USES_INTRIN
7574 return _rotr64(u64, cShift);
7575
7576#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7577 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7578 return u64;
7579
7580#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7581 uint32_t uSpill;
7582 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7583 "jz 1f\n\t"
7584 "xchgl %%eax, %%edx\n\t"
7585 "1:\n\t"
7586 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7587 "jz 2f\n\t"
7588 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7589 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
7590 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
7591 "2:\n\t" /* } */
7592 : "=A" (u64)
7593 , "=c" (cShift)
7594 , "=r" (uSpill)
7595 : "0" (u64)
7596 , "1" (cShift)
7597 : "cc");
7598 return u64;
7599
7600# elif defined(RT_ARCH_ARM64)
7601 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7602 : [uRet] "=r" (u64)
7603 : [uVal] "[uRet]" (u64)
7604 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */
7605 return u64;
7606
7607#else
7608 cShift &= 63;
7609 return (u64 >> cShift) | (u64 << (64 - cShift));
7610#endif
7611}
7612
7613/** @} */
7614
7615
7616/** @} */
7617
7618/*
7619 * Include #pragma aux definitions for Watcom C/C++.
7620 */
7621#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
7622# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
7623# undef IPRT_INCLUDED_asm_watcom_x86_16_h
7624# include "asm-watcom-x86-16.h"
7625#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
7626# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
7627# undef IPRT_INCLUDED_asm_watcom_x86_32_h
7628# include "asm-watcom-x86-32.h"
7629#endif
7630
7631#endif /* !IPRT_INCLUDED_asm_h */
7632
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette