VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 91991

Last change on this file since 91991 was 90652, checked in by vboxsync, 3 years ago

iprt/asm.h: Added another version of ASMAtomicCmpWriteU128 that takes the constants as 64-bit values. [doxygen fix] bugref:6695

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 233.8 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2020 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46/* Emit the intrinsics at all optimization levels. */
47# include <iprt/sanitized/intrin.h>
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(_BitScanForward)
54# pragma intrinsic(_BitScanReverse)
55# pragma intrinsic(_bittest)
56# pragma intrinsic(_bittestandset)
57# pragma intrinsic(_bittestandreset)
58# pragma intrinsic(_bittestandcomplement)
59# pragma intrinsic(_byteswap_ushort)
60# pragma intrinsic(_byteswap_ulong)
61# pragma intrinsic(_interlockedbittestandset)
62# pragma intrinsic(_interlockedbittestandreset)
63# pragma intrinsic(_InterlockedAnd)
64# pragma intrinsic(_InterlockedOr)
65# pragma intrinsic(_InterlockedXor)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedCompareExchange128)
80# pragma intrinsic(_InterlockedExchange64)
81# pragma intrinsic(_InterlockedExchangeAdd64)
82# pragma intrinsic(_InterlockedAnd64)
83# pragma intrinsic(_InterlockedOr64)
84# pragma intrinsic(_InterlockedIncrement64)
85# pragma intrinsic(_InterlockedDecrement64)
86# endif
87#endif
88
89/*
90 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
91 */
92#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
93# include "asm-watcom-x86-16.h"
94#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
95# include "asm-watcom-x86-32.h"
96#endif
97
98
99/** @defgroup grp_rt_asm ASM - Assembly Routines
100 * @ingroup grp_rt
101 *
102 * @remarks The difference between ordered and unordered atomic operations are
103 * that the former will complete outstanding reads and writes before
104 * continuing while the latter doesn't make any promises about the
105 * order. Ordered operations doesn't, it seems, make any 100% promise
106 * wrt to whether the operation will complete before any subsequent
107 * memory access. (please, correct if wrong.)
108 *
109 * ASMAtomicSomething operations are all ordered, while
110 * ASMAtomicUoSomething are unordered (note the Uo).
111 *
112 * Please note that ordered operations does not necessarily imply a
113 * compiler (memory) barrier. The user has to use the
114 * ASMCompilerBarrier() macro when that is deemed necessary.
115 *
116 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
117 * to reorder or even optimize assembler instructions away. For
118 * instance, in the following code the second rdmsr instruction is
119 * optimized away because gcc treats that instruction as deterministic:
120 *
121 * @code
122 * static inline uint64_t rdmsr_low(int idx)
123 * {
124 * uint32_t low;
125 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
126 * }
127 * ...
128 * uint32_t msr1 = rdmsr_low(1);
129 * foo(msr1);
130 * msr1 = rdmsr_low(1);
131 * bar(msr1);
132 * @endcode
133 *
134 * The input parameter of rdmsr_low is the same for both calls and
135 * therefore gcc will use the result of the first call as input
136 * parameter for bar() as well. For rdmsr this is not acceptable as
137 * this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143
144/** @def RT_INLINE_ASM_GCC_4_3_X_X86
145 * Used to work around some 4.3.x register allocation issues in this version of
146 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
147 * definitely not for 5.x */
148#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
149# define RT_INLINE_ASM_GCC_4_3_X_X86 1
150#else
151# define RT_INLINE_ASM_GCC_4_3_X_X86 0
152#endif
153
154/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
155 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
156 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
157 * mode, x86.
158 *
159 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
160 * when in PIC mode on x86.
161 */
162#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
163# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
164# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
165# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
167# elif ( (defined(PIC) || defined(__PIC__)) \
168 && defined(RT_ARCH_X86) \
169 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
170 || defined(RT_OS_DARWIN)) )
171# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
172# else
173# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
174# endif
175#endif
176
177
178/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
179 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
180#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
181# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
182#else
183# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
184#endif
185
186/*
187 * ARM is great fun.
188 */
189#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
190
191# define RTASM_ARM_NO_BARRIER
192# ifdef RT_ARCH_ARM64
193# define RTASM_ARM_NO_BARRIER_IN_REG
194# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
195# define RTASM_ARM_DSB_SY "dsb sy\n\t"
196# define RTASM_ARM_DSB_SY_IN_REG
197# define RTASM_ARM_DSB_SY_COMMA_IN_REG
198# define RTASM_ARM_DMB_SY "dmb sy\n\t"
199# define RTASM_ARM_DMB_SY_IN_REG
200# define RTASM_ARM_DMB_SY_COMMA_IN_REG
201# define RTASM_ARM_DMB_ST "dmb st\n\t"
202# define RTASM_ARM_DMB_ST_IN_REG
203# define RTASM_ARM_DMB_ST_COMMA_IN_REG
204# define RTASM_ARM_DMB_LD "dmb ld\n\t"
205# define RTASM_ARM_DMB_LD_IN_REG
206# define RTASM_ARM_DMB_LD_COMMA_IN_REG
207# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
208# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
209 uint32_t rcSpill; \
210 uint32_t u32NewRet; \
211 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
212 RTASM_ARM_##barrier_type /* before lable? */ \
213 "ldaxr %w[uNew], %[pMem]\n\t" \
214 modify64 \
215 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
216 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
217 : [pMem] "+m" (*a_pu32Mem) \
218 , [uNew] "=&r" (u32NewRet) \
219 , [rc] "=&r" (rcSpill) \
220 : in_reg \
221 : "cc")
222# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
223 uint32_t rcSpill; \
224 uint32_t u32OldRet; \
225 uint32_t u32NewSpill; \
226 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
227 RTASM_ARM_##barrier_type /* before lable? */ \
228 "ldaxr %w[uOld], %[pMem]\n\t" \
229 modify64 \
230 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
231 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
232 : [pMem] "+m" (*a_pu32Mem) \
233 , [uOld] "=&r" (u32OldRet) \
234 , [uNew] "=&r" (u32NewSpill) \
235 , [rc] "=&r" (rcSpill) \
236 : in_reg \
237 : "cc")
238# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
239 uint32_t rcSpill; \
240 uint64_t u64NewRet; \
241 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
242 RTASM_ARM_##barrier_type /* before lable? */ \
243 "ldaxr %[uNew], %[pMem]\n\t" \
244 modify64 \
245 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
246 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
247 : [pMem] "+m" (*a_pu64Mem) \
248 , [uNew] "=&r" (u64NewRet) \
249 , [rc] "=&r" (rcSpill) \
250 : in_reg \
251 : "cc")
252# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
253 uint32_t rcSpill; \
254 uint64_t u64OldRet; \
255 uint64_t u64NewSpill; \
256 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
257 RTASM_ARM_##barrier_type /* before lable? */ \
258 "ldaxr %[uOld], %[pMem]\n\t" \
259 modify64 \
260 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
261 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
262 : [pMem] "+m" (*a_pu64Mem) \
263 , [uOld] "=&r" (u64OldRet) \
264 , [uNew] "=&r" (u64NewSpill) \
265 , [rc] "=&r" (rcSpill) \
266 : in_reg \
267 : "cc")
268
269# else /* RT_ARCH_ARM32 */
270# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
271# if RT_ARCH_ARM32 >= 7
272# warning armv7
273# define RTASM_ARM_NO_BARRIER_IN_REG
274# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
275# define RTASM_ARM_DSB_SY "dsb sy\n\t"
276# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
277# define RTASM_ARM_DMB_SY "dmb sy\n\t"
278# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
279# define RTASM_ARM_DMB_ST "dmb st\n\t"
280# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
281# define RTASM_ARM_DMB_LD "dmb ld\n\t"
282# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
283
284# elif RT_ARCH_ARM32 >= 6
285# warning armv6
286# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
287# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
288# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
289# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
290# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
291# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
292# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
293# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
294# elif RT_ARCH_ARM32 >= 4
295# warning armv5 or older
296# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
297# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
298# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
299# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
300# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
301# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
302# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
303# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
304# else
305# error "huh? Odd RT_ARCH_ARM32 value!"
306# endif
307# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
308# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
309# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
310# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
311# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
312 uint32_t rcSpill; \
313 uint32_t u32NewRet; \
314 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
315 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
316 "ldrex %[uNew], %[pMem]\n\t" \
317 modify32 \
318 "strex %[rc], %[uNew], %[pMem]\n\t" \
319 "cmp %[rc], #0\n\t" \
320 "bne .Ltry_again_" #name "_%=\n\t" \
321 : [pMem] "+m" (*a_pu32Mem) \
322 , [uNew] "=&r" (u32NewRet) \
323 , [rc] "=&r" (rcSpill) \
324 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
325 , in_reg \
326 : "cc")
327# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
328 uint32_t rcSpill; \
329 uint32_t u32OldRet; \
330 uint32_t u32NewSpill; \
331 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
332 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
333 "ldrex %[uOld], %[pMem]\n\t" \
334 modify32 \
335 "strex %[rc], %[uNew], %[pMem]\n\t" \
336 "cmp %[rc], #0\n\t" \
337 "bne .Ltry_again_" #name "_%=\n\t" \
338 : [pMem] "+m" (*a_pu32Mem) \
339 , [uOld] "=&r" (u32OldRet) \
340 , [uNew] "=&r" (u32NewSpill) \
341 , [rc] "=&r" (rcSpill) \
342 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
343 , in_reg \
344 : "cc")
345# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
346 uint32_t rcSpill; \
347 uint64_t u64NewRet; \
348 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
349 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
350 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
351 modify32 \
352 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
353 "cmp %[rc], #0\n\t" \
354 "bne .Ltry_again_" #name "_%=\n\t" \
355 : [pMem] "+m" (*a_pu64Mem), \
356 [uNew] "=&r" (u64NewRet), \
357 [rc] "=&r" (rcSpill) \
358 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
359 , in_reg \
360 : "cc")
361# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
362 uint32_t rcSpill; \
363 uint64_t u64OldRet; \
364 uint64_t u64NewSpill; \
365 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
366 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
367 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
368 modify32 \
369 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
370 "cmp %[rc], #0\n\t" \
371 "bne .Ltry_again_" #name "_%=\n\t" \
372 : [pMem] "+m" (*a_pu64Mem), \
373 [uOld] "=&r" (u64OldRet), \
374 [uNew] "=&r" (u64NewSpill), \
375 [rc] "=&r" (rcSpill) \
376 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
377 , in_reg \
378 : "cc")
379# endif /* RT_ARCH_ARM32 */
380#endif
381
382
383/** @def ASMReturnAddress
384 * Gets the return address of the current (or calling if you like) function or method.
385 */
386#ifdef _MSC_VER
387# ifdef __cplusplus
388extern "C"
389# endif
390void * _ReturnAddress(void);
391# pragma intrinsic(_ReturnAddress)
392# define ASMReturnAddress() _ReturnAddress()
393#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
394# define ASMReturnAddress() __builtin_return_address(0)
395#elif defined(__WATCOMC__)
396# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
397#else
398# error "Unsupported compiler."
399#endif
400
401
402/**
403 * Compiler memory barrier.
404 *
405 * Ensure that the compiler does not use any cached (register/tmp stack) memory
406 * values or any outstanding writes when returning from this function.
407 *
408 * This function must be used if non-volatile data is modified by a
409 * device or the VMM. Typical cases are port access, MMIO access,
410 * trapping instruction, etc.
411 */
412#if RT_INLINE_ASM_GNU_STYLE
413# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
414#elif RT_INLINE_ASM_USES_INTRIN
415# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
416#elif defined(__WATCOMC__)
417void ASMCompilerBarrier(void);
418#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
419DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
420{
421 __asm
422 {
423 }
424}
425#endif
426
427
428/** @def ASMBreakpoint
429 * Debugger Breakpoint.
430 * @deprecated Use RT_BREAKPOINT instead.
431 * @internal
432 */
433#define ASMBreakpoint() RT_BREAKPOINT()
434
435
436/**
437 * Spinloop hint for platforms that have these, empty function on the other
438 * platforms.
439 *
440 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
441 * spin locks.
442 */
443#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
444RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
445#else
446DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
447{
448# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
449# if RT_INLINE_ASM_GNU_STYLE
450 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
451# else
452 __asm {
453 _emit 0f3h
454 _emit 090h
455 }
456# endif
457
458# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
459 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
460
461# else
462 /* dummy */
463# endif
464}
465#endif
466
467
468/**
469 * Atomically Exchange an unsigned 8-bit value, ordered.
470 *
471 * @returns Current *pu8 value
472 * @param pu8 Pointer to the 8-bit variable to update.
473 * @param u8 The 8-bit value to assign to *pu8.
474 */
475#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
476RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
477#else
478DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
479{
480# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
481# if RT_INLINE_ASM_GNU_STYLE
482 __asm__ __volatile__("xchgb %0, %1\n\t"
483 : "=m" (*pu8)
484 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
485 : "1" (u8)
486 , "m" (*pu8));
487# else
488 __asm
489 {
490# ifdef RT_ARCH_AMD64
491 mov rdx, [pu8]
492 mov al, [u8]
493 xchg [rdx], al
494 mov [u8], al
495# else
496 mov edx, [pu8]
497 mov al, [u8]
498 xchg [edx], al
499 mov [u8], al
500# endif
501 }
502# endif
503 return u8;
504
505# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
506 uint32_t uOld;
507 uint32_t rcSpill;
508 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU8_%=:\n\t"
509 RTASM_ARM_DMB_SY
510# if defined(RT_ARCH_ARM64)
511 "ldaxrb %w[uOld], %[pMem]\n\t"
512 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
513 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU8_%=\n\t"
514# else
515 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
516 "strexb %[rc], %[uNew], %[pMem]\n\t"
517 "cmp %[rc], #0\n\t"
518 "bne .Ltry_again_ASMAtomicXchgU8_%=\n\t"
519# endif
520 : [pMem] "+m" (*pu8)
521 , [uOld] "=&r" (uOld)
522 , [rc] "=&r" (rcSpill)
523 : [uNew] "r" ((uint32_t)u8)
524 RTASM_ARM_DMB_SY_COMMA_IN_REG
525 : "cc");
526 return (uint8_t)uOld;
527
528# else
529# error "Port me"
530# endif
531}
532#endif
533
534
535/**
536 * Atomically Exchange a signed 8-bit value, ordered.
537 *
538 * @returns Current *pu8 value
539 * @param pi8 Pointer to the 8-bit variable to update.
540 * @param i8 The 8-bit value to assign to *pi8.
541 */
542DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
543{
544 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
545}
546
547
548/**
549 * Atomically Exchange a bool value, ordered.
550 *
551 * @returns Current *pf value
552 * @param pf Pointer to the 8-bit variable to update.
553 * @param f The 8-bit value to assign to *pi8.
554 */
555DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
556{
557#ifdef _MSC_VER
558 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
559#else
560 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
561#endif
562}
563
564
565/**
566 * Atomically Exchange an unsigned 16-bit value, ordered.
567 *
568 * @returns Current *pu16 value
569 * @param pu16 Pointer to the 16-bit variable to update.
570 * @param u16 The 16-bit value to assign to *pu16.
571 */
572#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
573RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
574#else
575DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
576{
577# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
578# if RT_INLINE_ASM_GNU_STYLE
579 __asm__ __volatile__("xchgw %0, %1\n\t"
580 : "=m" (*pu16)
581 , "=r" (u16)
582 : "1" (u16)
583 , "m" (*pu16));
584# else
585 __asm
586 {
587# ifdef RT_ARCH_AMD64
588 mov rdx, [pu16]
589 mov ax, [u16]
590 xchg [rdx], ax
591 mov [u16], ax
592# else
593 mov edx, [pu16]
594 mov ax, [u16]
595 xchg [edx], ax
596 mov [u16], ax
597# endif
598 }
599# endif
600 return u16;
601
602# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
603 uint32_t uOld;
604 uint32_t rcSpill;
605 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU16_%=:\n\t"
606 RTASM_ARM_DMB_SY
607# if defined(RT_ARCH_ARM64)
608 "ldaxrh %w[uOld], %[pMem]\n\t"
609 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
610 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU16_%=\n\t"
611# else
612 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
613 "strexh %[rc], %[uNew], %[pMem]\n\t"
614 "cmp %[rc], #0\n\t"
615 "bne .Ltry_again_ASMAtomicXchgU16_%=\n\t"
616# endif
617 : [pMem] "+m" (*pu16)
618 , [uOld] "=&r" (uOld)
619 , [rc] "=&r" (rcSpill)
620 : [uNew] "r" ((uint32_t)u16)
621 RTASM_ARM_DMB_SY_COMMA_IN_REG
622 : "cc");
623 return (uint16_t)uOld;
624
625# else
626# error "Port me"
627# endif
628}
629#endif
630
631
632/**
633 * Atomically Exchange a signed 16-bit value, ordered.
634 *
635 * @returns Current *pu16 value
636 * @param pi16 Pointer to the 16-bit variable to update.
637 * @param i16 The 16-bit value to assign to *pi16.
638 */
639DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
640{
641 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
642}
643
644
645/**
646 * Atomically Exchange an unsigned 32-bit value, ordered.
647 *
648 * @returns Current *pu32 value
649 * @param pu32 Pointer to the 32-bit variable to update.
650 * @param u32 The 32-bit value to assign to *pu32.
651 *
652 * @remarks Does not work on 286 and earlier.
653 */
654#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
655RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
656#else
657DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
658{
659# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
660# if RT_INLINE_ASM_GNU_STYLE
661 __asm__ __volatile__("xchgl %0, %1\n\t"
662 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
663 , "=r" (u32)
664 : "1" (u32)
665 , "m" (*pu32));
666
667# elif RT_INLINE_ASM_USES_INTRIN
668 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
669
670# else
671 __asm
672 {
673# ifdef RT_ARCH_AMD64
674 mov rdx, [pu32]
675 mov eax, u32
676 xchg [rdx], eax
677 mov [u32], eax
678# else
679 mov edx, [pu32]
680 mov eax, u32
681 xchg [edx], eax
682 mov [u32], eax
683# endif
684 }
685# endif
686 return u32;
687
688# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
689 uint32_t uOld;
690 uint32_t rcSpill;
691 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU32_%=:\n\t"
692 RTASM_ARM_DMB_SY
693# if defined(RT_ARCH_ARM64)
694 "ldaxr %w[uOld], %[pMem]\n\t"
695 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
696 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU32_%=\n\t"
697# else
698 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
699 "strex %[rc], %[uNew], %[pMem]\n\t"
700 "cmp %[rc], #0\n\t"
701 "bne .Ltry_again_ASMAtomicXchgU32_%=\n\t"
702# endif
703 : [pMem] "+m" (*pu32)
704 , [uOld] "=&r" (uOld)
705 , [rc] "=&r" (rcSpill)
706 : [uNew] "r" (u32)
707 RTASM_ARM_DMB_SY_COMMA_IN_REG
708 : "cc");
709 return uOld;
710
711# else
712# error "Port me"
713# endif
714}
715#endif
716
717
718/**
719 * Atomically Exchange a signed 32-bit value, ordered.
720 *
721 * @returns Current *pu32 value
722 * @param pi32 Pointer to the 32-bit variable to update.
723 * @param i32 The 32-bit value to assign to *pi32.
724 */
725DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
726{
727 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
728}
729
730
731/**
732 * Atomically Exchange an unsigned 64-bit value, ordered.
733 *
734 * @returns Current *pu64 value
735 * @param pu64 Pointer to the 64-bit variable to update.
736 * @param u64 The 64-bit value to assign to *pu64.
737 *
738 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
739 */
740#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
741 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
742RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
743#else
744DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
745{
746# if defined(RT_ARCH_AMD64)
747# if RT_INLINE_ASM_USES_INTRIN
748 return _InterlockedExchange64((__int64 *)pu64, u64);
749
750# elif RT_INLINE_ASM_GNU_STYLE
751 __asm__ __volatile__("xchgq %0, %1\n\t"
752 : "=m" (*pu64)
753 , "=r" (u64)
754 : "1" (u64)
755 , "m" (*pu64));
756 return u64;
757# else
758 __asm
759 {
760 mov rdx, [pu64]
761 mov rax, [u64]
762 xchg [rdx], rax
763 mov [u64], rax
764 }
765 return u64;
766# endif
767
768# elif defined(RT_ARCH_X86)
769# if RT_INLINE_ASM_GNU_STYLE
770# if defined(PIC) || defined(__PIC__)
771 uint32_t u32EBX = (uint32_t)u64;
772 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
773 "xchgl %%ebx, %3\n\t"
774 "1:\n\t"
775 "lock; cmpxchg8b (%5)\n\t"
776 "jnz 1b\n\t"
777 "movl %3, %%ebx\n\t"
778 /*"xchgl %%esi, %5\n\t"*/
779 : "=A" (u64)
780 , "=m" (*pu64)
781 : "0" (*pu64)
782 , "m" ( u32EBX )
783 , "c" ( (uint32_t)(u64 >> 32) )
784 , "S" (pu64)
785 : "cc");
786# else /* !PIC */
787 __asm__ __volatile__("1:\n\t"
788 "lock; cmpxchg8b %1\n\t"
789 "jnz 1b\n\t"
790 : "=A" (u64)
791 , "=m" (*pu64)
792 : "0" (*pu64)
793 , "b" ( (uint32_t)u64 )
794 , "c" ( (uint32_t)(u64 >> 32) )
795 : "cc");
796# endif
797# else
798 __asm
799 {
800 mov ebx, dword ptr [u64]
801 mov ecx, dword ptr [u64 + 4]
802 mov edi, pu64
803 mov eax, dword ptr [edi]
804 mov edx, dword ptr [edi + 4]
805 retry:
806 lock cmpxchg8b [edi]
807 jnz retry
808 mov dword ptr [u64], eax
809 mov dword ptr [u64 + 4], edx
810 }
811# endif
812 return u64;
813
814# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
815 uint32_t rcSpill;
816 uint64_t uOld;
817 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU64_%=:\n\t"
818 RTASM_ARM_DMB_SY
819# if defined(RT_ARCH_ARM64)
820 "ldaxr %[uOld], %[pMem]\n\t"
821 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
822 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU64_%=\n\t"
823# else
824 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
825 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
826 "cmp %[rc], #0\n\t"
827 "bne .Ltry_again_ASMAtomicXchgU64_%=\n\t"
828# endif
829 : [pMem] "+m" (*pu64)
830 , [uOld] "=&r" (uOld)
831 , [rc] "=&r" (rcSpill)
832 : [uNew] "r" (u64)
833 RTASM_ARM_DMB_SY_COMMA_IN_REG
834 : "cc");
835 return uOld;
836
837# else
838# error "Port me"
839# endif
840}
841#endif
842
843
844/**
845 * Atomically Exchange an signed 64-bit value, ordered.
846 *
847 * @returns Current *pi64 value
848 * @param pi64 Pointer to the 64-bit variable to update.
849 * @param i64 The 64-bit value to assign to *pi64.
850 */
851DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
852{
853 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
854}
855
856
857/**
858 * Atomically Exchange a size_t value, ordered.
859 *
860 * @returns Current *ppv value
861 * @param puDst Pointer to the size_t variable to update.
862 * @param uNew The new value to assign to *puDst.
863 */
864DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
865{
866#if ARCH_BITS == 16
867 AssertCompile(sizeof(size_t) == 2);
868 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
869#elif ARCH_BITS == 32
870 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
871#elif ARCH_BITS == 64
872 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
873#else
874# error "ARCH_BITS is bogus"
875#endif
876}
877
878
879/**
880 * Atomically Exchange a pointer value, ordered.
881 *
882 * @returns Current *ppv value
883 * @param ppv Pointer to the pointer variable to update.
884 * @param pv The pointer value to assign to *ppv.
885 */
886DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
887{
888#if ARCH_BITS == 32 || ARCH_BITS == 16
889 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
890#elif ARCH_BITS == 64
891 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
892#else
893# error "ARCH_BITS is bogus"
894#endif
895}
896
897
898/**
899 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
900 *
901 * @returns Current *pv value
902 * @param ppv Pointer to the pointer variable to update.
903 * @param pv The pointer value to assign to *ppv.
904 * @param Type The type of *ppv, sans volatile.
905 */
906#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
907# define ASMAtomicXchgPtrT(ppv, pv, Type) \
908 __extension__ \
909 ({\
910 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
911 Type const pvTypeChecked = (pv); \
912 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
913 pvTypeCheckedRet; \
914 })
915#else
916# define ASMAtomicXchgPtrT(ppv, pv, Type) \
917 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
918#endif
919
920
921/**
922 * Atomically Exchange a raw-mode context pointer value, ordered.
923 *
924 * @returns Current *ppv value
925 * @param ppvRC Pointer to the pointer variable to update.
926 * @param pvRC The pointer value to assign to *ppv.
927 */
928DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
929{
930 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
931}
932
933
934/**
935 * Atomically Exchange a ring-0 pointer value, ordered.
936 *
937 * @returns Current *ppv value
938 * @param ppvR0 Pointer to the pointer variable to update.
939 * @param pvR0 The pointer value to assign to *ppv.
940 */
941DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
942{
943#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
944 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
945#elif R0_ARCH_BITS == 64
946 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
947#else
948# error "R0_ARCH_BITS is bogus"
949#endif
950}
951
952
953/**
954 * Atomically Exchange a ring-3 pointer value, ordered.
955 *
956 * @returns Current *ppv value
957 * @param ppvR3 Pointer to the pointer variable to update.
958 * @param pvR3 The pointer value to assign to *ppv.
959 */
960DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
961{
962#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
963 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
964#elif R3_ARCH_BITS == 64
965 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
966#else
967# error "R3_ARCH_BITS is bogus"
968#endif
969}
970
971
972/** @def ASMAtomicXchgHandle
973 * Atomically Exchange a typical IPRT handle value, ordered.
974 *
975 * @param ph Pointer to the value to update.
976 * @param hNew The new value to assigned to *pu.
977 * @param phRes Where to store the current *ph value.
978 *
979 * @remarks This doesn't currently work for all handles (like RTFILE).
980 */
981#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
982# define ASMAtomicXchgHandle(ph, hNew, phRes) \
983 do { \
984 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
985 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
986 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
987 } while (0)
988#elif HC_ARCH_BITS == 64
989# define ASMAtomicXchgHandle(ph, hNew, phRes) \
990 do { \
991 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
992 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
993 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
994 } while (0)
995#else
996# error HC_ARCH_BITS
997#endif
998
999
1000/**
1001 * Atomically Exchange a value which size might differ
1002 * between platforms or compilers, ordered.
1003 *
1004 * @param pu Pointer to the variable to update.
1005 * @param uNew The value to assign to *pu.
1006 * @todo This is busted as its missing the result argument.
1007 */
1008#define ASMAtomicXchgSize(pu, uNew) \
1009 do { \
1010 switch (sizeof(*(pu))) { \
1011 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1012 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1013 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1014 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1015 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1016 } \
1017 } while (0)
1018
1019/**
1020 * Atomically Exchange a value which size might differ
1021 * between platforms or compilers, ordered.
1022 *
1023 * @param pu Pointer to the variable to update.
1024 * @param uNew The value to assign to *pu.
1025 * @param puRes Where to store the current *pu value.
1026 */
1027#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1028 do { \
1029 switch (sizeof(*(pu))) { \
1030 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1031 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1032 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1033 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1034 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1035 } \
1036 } while (0)
1037
1038
1039
1040/**
1041 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1042 *
1043 * @returns true if xchg was done.
1044 * @returns false if xchg wasn't done.
1045 *
1046 * @param pu8 Pointer to the value to update.
1047 * @param u8New The new value to assigned to *pu8.
1048 * @param u8Old The old value to *pu8 compare with.
1049 *
1050 * @remarks x86: Requires a 486 or later.
1051 * @todo Rename ASMAtomicCmpWriteU8
1052 */
1053#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1054RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1055#else
1056DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1057{
1058# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1059 uint8_t u8Ret;
1060 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1061 "setz %1\n\t"
1062 : "=m" (*pu8)
1063 , "=qm" (u8Ret)
1064 , "=a" (u8Old)
1065 : "q" (u8New)
1066 , "2" (u8Old)
1067 , "m" (*pu8)
1068 : "cc");
1069 return (bool)u8Ret;
1070
1071# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1072 union { uint32_t u; bool f; } fXchg;
1073 uint32_t u32Spill;
1074 uint32_t rcSpill;
1075 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1076 RTASM_ARM_DMB_SY
1077# if defined(RT_ARCH_ARM64)
1078 "ldaxrb %w[uOld], %[pMem]\n\t"
1079 "cmp %w[uOld], %w[uCmp]\n\t"
1080 "bne 1f\n\t" /* stop here if not equal */
1081 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1082 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1083 "mov %w[fXchg], #1\n\t"
1084# else
1085 "ldrexb %[uOld], %[pMem]\n\t"
1086 "teq %[uOld], %[uCmp]\n\t"
1087 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1088 "bne 1f\n\t" /* stop here if not equal */
1089 "cmp %[rc], #0\n\t"
1090 "bne .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1091 "mov %[fXchg], #1\n\t"
1092# endif
1093 "1:\n\t"
1094 : [pMem] "+m" (*pu8)
1095 , [uOld] "=&r" (u32Spill)
1096 , [rc] "=&r" (rcSpill)
1097 , [fXchg] "=&r" (fXchg.u)
1098 : [uCmp] "r" ((uint32_t)u8Old)
1099 , [uNew] "r" ((uint32_t)u8New)
1100 , "[fXchg]" (0)
1101 RTASM_ARM_DMB_SY_COMMA_IN_REG
1102 : "cc");
1103 return fXchg.f;
1104
1105# else
1106# error "Port me"
1107# endif
1108}
1109#endif
1110
1111
1112/**
1113 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1114 *
1115 * @returns true if xchg was done.
1116 * @returns false if xchg wasn't done.
1117 *
1118 * @param pi8 Pointer to the value to update.
1119 * @param i8New The new value to assigned to *pi8.
1120 * @param i8Old The old value to *pi8 compare with.
1121 *
1122 * @remarks x86: Requires a 486 or later.
1123 * @todo Rename ASMAtomicCmpWriteS8
1124 */
1125DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1126{
1127 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1128}
1129
1130
1131/**
1132 * Atomically Compare and Exchange a bool value, ordered.
1133 *
1134 * @returns true if xchg was done.
1135 * @returns false if xchg wasn't done.
1136 *
1137 * @param pf Pointer to the value to update.
1138 * @param fNew The new value to assigned to *pf.
1139 * @param fOld The old value to *pf compare with.
1140 *
1141 * @remarks x86: Requires a 486 or later.
1142 * @todo Rename ASMAtomicCmpWriteBool
1143 */
1144DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1145{
1146 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1147}
1148
1149
1150/**
1151 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1152 *
1153 * @returns true if xchg was done.
1154 * @returns false if xchg wasn't done.
1155 *
1156 * @param pu32 Pointer to the value to update.
1157 * @param u32New The new value to assigned to *pu32.
1158 * @param u32Old The old value to *pu32 compare with.
1159 *
1160 * @remarks x86: Requires a 486 or later.
1161 * @todo Rename ASMAtomicCmpWriteU32
1162 */
1163#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1164RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1165#else
1166DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1167{
1168# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1169# if RT_INLINE_ASM_GNU_STYLE
1170 uint8_t u8Ret;
1171 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1172 "setz %1\n\t"
1173 : "=m" (*pu32)
1174 , "=qm" (u8Ret)
1175 , "=a" (u32Old)
1176 : "r" (u32New)
1177 , "2" (u32Old)
1178 , "m" (*pu32)
1179 : "cc");
1180 return (bool)u8Ret;
1181
1182# elif RT_INLINE_ASM_USES_INTRIN
1183 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1184
1185# else
1186 uint32_t u32Ret;
1187 __asm
1188 {
1189# ifdef RT_ARCH_AMD64
1190 mov rdx, [pu32]
1191# else
1192 mov edx, [pu32]
1193# endif
1194 mov eax, [u32Old]
1195 mov ecx, [u32New]
1196# ifdef RT_ARCH_AMD64
1197 lock cmpxchg [rdx], ecx
1198# else
1199 lock cmpxchg [edx], ecx
1200# endif
1201 setz al
1202 movzx eax, al
1203 mov [u32Ret], eax
1204 }
1205 return !!u32Ret;
1206# endif
1207
1208# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1209 union { uint32_t u; bool f; } fXchg;
1210 uint32_t u32Spill;
1211 uint32_t rcSpill;
1212 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1213 RTASM_ARM_DMB_SY
1214# if defined(RT_ARCH_ARM64)
1215 "ldaxr %w[uOld], %[pMem]\n\t"
1216 "cmp %w[uOld], %w[uCmp]\n\t"
1217 "bne 1f\n\t" /* stop here if not equal */
1218 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1219 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1220 "mov %w[fXchg], #1\n\t"
1221# else
1222 "ldrex %[uOld], %[pMem]\n\t"
1223 "teq %[uOld], %[uCmp]\n\t"
1224 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1225 "bne 1f\n\t" /* stop here if not equal */
1226 "cmp %[rc], #0\n\t"
1227 "bne .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1228 "mov %[fXchg], #1\n\t"
1229# endif
1230 "1:\n\t"
1231 : [pMem] "+m" (*pu32)
1232 , [uOld] "=&r" (u32Spill)
1233 , [rc] "=&r" (rcSpill)
1234 , [fXchg] "=&r" (fXchg.u)
1235 : [uCmp] "r" (u32Old)
1236 , [uNew] "r" (u32New)
1237 , "[fXchg]" (0)
1238 RTASM_ARM_DMB_SY_COMMA_IN_REG
1239 : "cc");
1240 return fXchg.f;
1241
1242# else
1243# error "Port me"
1244# endif
1245}
1246#endif
1247
1248
1249/**
1250 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1251 *
1252 * @returns true if xchg was done.
1253 * @returns false if xchg wasn't done.
1254 *
1255 * @param pi32 Pointer to the value to update.
1256 * @param i32New The new value to assigned to *pi32.
1257 * @param i32Old The old value to *pi32 compare with.
1258 *
1259 * @remarks x86: Requires a 486 or later.
1260 * @todo Rename ASMAtomicCmpWriteS32
1261 */
1262DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1263{
1264 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1265}
1266
1267
1268/**
1269 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1270 *
1271 * @returns true if xchg was done.
1272 * @returns false if xchg wasn't done.
1273 *
1274 * @param pu64 Pointer to the 64-bit variable to update.
1275 * @param u64New The 64-bit value to assign to *pu64.
1276 * @param u64Old The value to compare with.
1277 *
1278 * @remarks x86: Requires a Pentium or later.
1279 * @todo Rename ASMAtomicCmpWriteU64
1280 */
1281#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1282 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1283RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1284#else
1285DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1286{
1287# if RT_INLINE_ASM_USES_INTRIN
1288 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1289
1290# elif defined(RT_ARCH_AMD64)
1291# if RT_INLINE_ASM_GNU_STYLE
1292 uint8_t u8Ret;
1293 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1294 "setz %1\n\t"
1295 : "=m" (*pu64)
1296 , "=qm" (u8Ret)
1297 , "=a" (u64Old)
1298 : "r" (u64New)
1299 , "2" (u64Old)
1300 , "m" (*pu64)
1301 : "cc");
1302 return (bool)u8Ret;
1303# else
1304 bool fRet;
1305 __asm
1306 {
1307 mov rdx, [pu32]
1308 mov rax, [u64Old]
1309 mov rcx, [u64New]
1310 lock cmpxchg [rdx], rcx
1311 setz al
1312 mov [fRet], al
1313 }
1314 return fRet;
1315# endif
1316
1317# elif defined(RT_ARCH_X86)
1318 uint32_t u32Ret;
1319# if RT_INLINE_ASM_GNU_STYLE
1320# if defined(PIC) || defined(__PIC__)
1321 uint32_t u32EBX = (uint32_t)u64New;
1322 uint32_t u32Spill;
1323 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1324 "lock; cmpxchg8b (%6)\n\t"
1325 "setz %%al\n\t"
1326 "movl %4, %%ebx\n\t"
1327 "movzbl %%al, %%eax\n\t"
1328 : "=a" (u32Ret)
1329 , "=d" (u32Spill)
1330# if RT_GNUC_PREREQ(4, 3)
1331 , "+m" (*pu64)
1332# else
1333 , "=m" (*pu64)
1334# endif
1335 : "A" (u64Old)
1336 , "m" ( u32EBX )
1337 , "c" ( (uint32_t)(u64New >> 32) )
1338 , "S" (pu64)
1339 : "cc");
1340# else /* !PIC */
1341 uint32_t u32Spill;
1342 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1343 "setz %%al\n\t"
1344 "movzbl %%al, %%eax\n\t"
1345 : "=a" (u32Ret)
1346 , "=d" (u32Spill)
1347 , "+m" (*pu64)
1348 : "A" (u64Old)
1349 , "b" ( (uint32_t)u64New )
1350 , "c" ( (uint32_t)(u64New >> 32) )
1351 : "cc");
1352# endif
1353 return (bool)u32Ret;
1354# else
1355 __asm
1356 {
1357 mov ebx, dword ptr [u64New]
1358 mov ecx, dword ptr [u64New + 4]
1359 mov edi, [pu64]
1360 mov eax, dword ptr [u64Old]
1361 mov edx, dword ptr [u64Old + 4]
1362 lock cmpxchg8b [edi]
1363 setz al
1364 movzx eax, al
1365 mov dword ptr [u32Ret], eax
1366 }
1367 return !!u32Ret;
1368# endif
1369
1370# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1371 union { uint32_t u; bool f; } fXchg;
1372 uint64_t u64Spill;
1373 uint32_t rcSpill;
1374 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1375 RTASM_ARM_DMB_SY
1376# if defined(RT_ARCH_ARM64)
1377 "ldaxr %[uOld], %[pMem]\n\t"
1378 "cmp %[uOld], %[uCmp]\n\t"
1379 "bne 1f\n\t" /* stop here if not equal */
1380 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1381 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1382 "mov %w[fXchg], #1\n\t"
1383# else
1384 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1385 "teq %[uOld], %[uCmp]\n\t"
1386 "teqeq %H[uOld], %H[uCmp]\n\t"
1387 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1388 "bne 1f\n\t" /* stop here if not equal */
1389 "cmp %[rc], #0\n\t"
1390 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1391 "mov %[fXchg], #1\n\t"
1392# endif
1393 "1:\n\t"
1394 : [pMem] "+m" (*pu64)
1395 , [uOld] "=&r" (u64Spill)
1396 , [rc] "=&r" (rcSpill)
1397 , [fXchg] "=&r" (fXchg.u)
1398 : [uCmp] "r" (u64Old)
1399 , [uNew] "r" (u64New)
1400 , "[fXchg]" (0)
1401 RTASM_ARM_DMB_SY_COMMA_IN_REG
1402 : "cc");
1403 return fXchg.f;
1404
1405# else
1406# error "Port me"
1407# endif
1408}
1409#endif
1410
1411
1412/**
1413 * Atomically Compare and exchange a signed 64-bit value, ordered.
1414 *
1415 * @returns true if xchg was done.
1416 * @returns false if xchg wasn't done.
1417 *
1418 * @param pi64 Pointer to the 64-bit variable to update.
1419 * @param i64 The 64-bit value to assign to *pu64.
1420 * @param i64Old The value to compare with.
1421 *
1422 * @remarks x86: Requires a Pentium or later.
1423 * @todo Rename ASMAtomicCmpWriteS64
1424 */
1425DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1426{
1427 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1428}
1429
1430#if defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)
1431
1432/** @def RTASM_HAVE_CMP_WRITE_U128
1433 * Indicates that we've got ASMAtomicCmpWriteU128() available. */
1434# define RTASM_HAVE_CMP_WRITE_U128 1
1435
1436
1437/**
1438 * Atomically compare and write an unsigned 128-bit value, ordered.
1439 *
1440 * @returns true if write was done.
1441 * @returns false if write wasn't done.
1442 *
1443 * @param pu128 Pointer to the 128-bit variable to update.
1444 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
1445 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
1446 * @param u64OldHi The high 64-bit of the value to compare with.
1447 * @param u64OldLo The low 64-bit of the value to compare with.
1448 *
1449 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1450 */
1451# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
1452DECLASM(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1453 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_PROTO;
1454# else
1455DECLINLINE(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1456 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_DEF
1457{
1458# if RT_INLINE_ASM_USES_INTRIN
1459 __int64 ai64Cmp[2];
1460 ai64Cmp[0] = u64OldLo;
1461 ai64Cmp[1] = u64OldHi;
1462 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, ai64Cmp) != 0;
1463
1464# elif defined(RT_ARCH_AMD64)
1465# if RT_INLINE_ASM_GNU_STYLE
1466 uint64_t u64Ret;
1467 uint64_t u64Spill;
1468 __asm__ __volatile__("lock; cmpxchg16b %2\n\t"
1469 "setz %%al\n\t"
1470 "movzbl %%al, %%eax\n\t"
1471 : "=a" (u64Ret)
1472 , "=d" (u64Spill)
1473 , "+m" (*pu128)
1474 : "a" (u64OldLo)
1475 , "d" (u64OldHi)
1476 , "b" (u64NewLo)
1477 , "c" (u64NewHi)
1478 : "cc");
1479
1480 return (bool)u64Ret;
1481# else
1482# error "Port me"
1483# endif
1484# else
1485# error "Port me"
1486# endif
1487}
1488# endif
1489
1490
1491/**
1492 * Atomically compare and write an unsigned 128-bit value, ordered.
1493 *
1494 * @returns true if write was done.
1495 * @returns false if write wasn't done.
1496 *
1497 * @param pu128 Pointer to the 128-bit variable to update.
1498 * @param u128New The 128-bit value to assign to *pu128.
1499 * @param u128Old The value to compare with.
1500 *
1501 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1502 */
1503DECLINLINE(bool) ASMAtomicCmpWriteU128(volatile uint128_t *pu128, const uint128_t u128New, const uint128_t u128Old) RT_NOTHROW_DEF
1504{
1505# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
1506 return ASMAtomicCmpWriteU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
1507 (uint64_t)(u128Old >> 64), (uint64_t)u128Old);
1508# else
1509 return ASMAtomicCmpWriteU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo);
1510# endif
1511}
1512
1513
1514/**
1515 * RTUINT128U wrapper for ASMAtomicCmpWriteU128.
1516 */
1517DECLINLINE(bool) ASMAtomicCmpWriteU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
1518 const RTUINT128U u128Old) RT_NOTHROW_DEF
1519{
1520 return ASMAtomicCmpWriteU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo);
1521}
1522
1523#endif /* RT_ARCH_AMD64 */
1524
1525
1526/**
1527 * Atomically Compare and Exchange a pointer value, ordered.
1528 *
1529 * @returns true if xchg was done.
1530 * @returns false if xchg wasn't done.
1531 *
1532 * @param ppv Pointer to the value to update.
1533 * @param pvNew The new value to assigned to *ppv.
1534 * @param pvOld The old value to *ppv compare with.
1535 *
1536 * @remarks x86: Requires a 486 or later.
1537 * @todo Rename ASMAtomicCmpWritePtrVoid
1538 */
1539DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1540{
1541#if ARCH_BITS == 32 || ARCH_BITS == 16
1542 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1543#elif ARCH_BITS == 64
1544 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1545#else
1546# error "ARCH_BITS is bogus"
1547#endif
1548}
1549
1550
1551/**
1552 * Atomically Compare and Exchange a pointer value, ordered.
1553 *
1554 * @returns true if xchg was done.
1555 * @returns false if xchg wasn't done.
1556 *
1557 * @param ppv Pointer to the value to update.
1558 * @param pvNew The new value to assigned to *ppv.
1559 * @param pvOld The old value to *ppv compare with.
1560 *
1561 * @remarks This is relatively type safe on GCC platforms.
1562 * @remarks x86: Requires a 486 or later.
1563 * @todo Rename ASMAtomicCmpWritePtr
1564 */
1565#ifdef __GNUC__
1566# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1567 __extension__ \
1568 ({\
1569 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1570 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1571 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1572 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1573 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1574 fMacroRet; \
1575 })
1576#else
1577# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1578 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1579#endif
1580
1581
1582/** @def ASMAtomicCmpXchgHandle
1583 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1584 *
1585 * @param ph Pointer to the value to update.
1586 * @param hNew The new value to assigned to *pu.
1587 * @param hOld The old value to *pu compare with.
1588 * @param fRc Where to store the result.
1589 *
1590 * @remarks This doesn't currently work for all handles (like RTFILE).
1591 * @remarks x86: Requires a 486 or later.
1592 * @todo Rename ASMAtomicCmpWriteHandle
1593 */
1594#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1595# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1596 do { \
1597 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1598 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1599 } while (0)
1600#elif HC_ARCH_BITS == 64
1601# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1602 do { \
1603 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1604 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1605 } while (0)
1606#else
1607# error HC_ARCH_BITS
1608#endif
1609
1610
1611/** @def ASMAtomicCmpXchgSize
1612 * Atomically Compare and Exchange a value which size might differ
1613 * between platforms or compilers, ordered.
1614 *
1615 * @param pu Pointer to the value to update.
1616 * @param uNew The new value to assigned to *pu.
1617 * @param uOld The old value to *pu compare with.
1618 * @param fRc Where to store the result.
1619 *
1620 * @remarks x86: Requires a 486 or later.
1621 * @todo Rename ASMAtomicCmpWriteSize
1622 */
1623#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1624 do { \
1625 switch (sizeof(*(pu))) { \
1626 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1627 break; \
1628 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1629 break; \
1630 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1631 (fRc) = false; \
1632 break; \
1633 } \
1634 } while (0)
1635
1636
1637/**
1638 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1639 * passes back old value, ordered.
1640 *
1641 * @returns true if xchg was done.
1642 * @returns false if xchg wasn't done.
1643 *
1644 * @param pu32 Pointer to the value to update.
1645 * @param u32New The new value to assigned to *pu32.
1646 * @param u32Old The old value to *pu32 compare with.
1647 * @param pu32Old Pointer store the old value at.
1648 *
1649 * @remarks x86: Requires a 486 or later.
1650 */
1651#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1652RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1653#else
1654DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1655{
1656# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1657# if RT_INLINE_ASM_GNU_STYLE
1658 uint8_t u8Ret;
1659 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1660 "setz %1\n\t"
1661 : "=m" (*pu32)
1662 , "=qm" (u8Ret)
1663 , "=a" (*pu32Old)
1664 : "r" (u32New)
1665 , "a" (u32Old)
1666 , "m" (*pu32)
1667 : "cc");
1668 return (bool)u8Ret;
1669
1670# elif RT_INLINE_ASM_USES_INTRIN
1671 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1672
1673# else
1674 uint32_t u32Ret;
1675 __asm
1676 {
1677# ifdef RT_ARCH_AMD64
1678 mov rdx, [pu32]
1679# else
1680 mov edx, [pu32]
1681# endif
1682 mov eax, [u32Old]
1683 mov ecx, [u32New]
1684# ifdef RT_ARCH_AMD64
1685 lock cmpxchg [rdx], ecx
1686 mov rdx, [pu32Old]
1687 mov [rdx], eax
1688# else
1689 lock cmpxchg [edx], ecx
1690 mov edx, [pu32Old]
1691 mov [edx], eax
1692# endif
1693 setz al
1694 movzx eax, al
1695 mov [u32Ret], eax
1696 }
1697 return !!u32Ret;
1698# endif
1699
1700# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1701 union { uint32_t u; bool f; } fXchg;
1702 uint32_t u32ActualOld;
1703 uint32_t rcSpill;
1704 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1705 RTASM_ARM_DMB_SY
1706# if defined(RT_ARCH_ARM64)
1707 "ldaxr %w[uOld], %[pMem]\n\t"
1708 "cmp %w[uOld], %w[uCmp]\n\t"
1709 "bne 1f\n\t" /* stop here if not equal */
1710 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1711 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1712 "mov %w[fXchg], #1\n\t"
1713# else
1714 "ldrex %[uOld], %[pMem]\n\t"
1715 "teq %[uOld], %[uCmp]\n\t"
1716 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1717 "bne 1f\n\t" /* stop here if not equal */
1718 "cmp %[rc], #0\n\t"
1719 "bne .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1720 "mov %[fXchg], #1\n\t"
1721# endif
1722 "1:\n\t"
1723 : [pMem] "+m" (*pu32)
1724 , [uOld] "=&r" (u32ActualOld)
1725 , [rc] "=&r" (rcSpill)
1726 , [fXchg] "=&r" (fXchg.u)
1727 : [uCmp] "r" (u32Old)
1728 , [uNew] "r" (u32New)
1729 , "[fXchg]" (0)
1730 RTASM_ARM_DMB_SY_COMMA_IN_REG
1731 : "cc");
1732 *pu32Old = u32ActualOld;
1733 return fXchg.f;
1734
1735# else
1736# error "Port me"
1737# endif
1738}
1739#endif
1740
1741
1742/**
1743 * Atomically Compare and Exchange a signed 32-bit value, additionally
1744 * passes back old value, ordered.
1745 *
1746 * @returns true if xchg was done.
1747 * @returns false if xchg wasn't done.
1748 *
1749 * @param pi32 Pointer to the value to update.
1750 * @param i32New The new value to assigned to *pi32.
1751 * @param i32Old The old value to *pi32 compare with.
1752 * @param pi32Old Pointer store the old value at.
1753 *
1754 * @remarks x86: Requires a 486 or later.
1755 */
1756DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
1757{
1758 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1759}
1760
1761
1762/**
1763 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1764 * passing back old value, ordered.
1765 *
1766 * @returns true if xchg was done.
1767 * @returns false if xchg wasn't done.
1768 *
1769 * @param pu64 Pointer to the 64-bit variable to update.
1770 * @param u64New The 64-bit value to assign to *pu64.
1771 * @param u64Old The value to compare with.
1772 * @param pu64Old Pointer store the old value at.
1773 *
1774 * @remarks x86: Requires a Pentium or later.
1775 */
1776#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1777 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1778RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
1779#else
1780DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
1781{
1782# if RT_INLINE_ASM_USES_INTRIN
1783 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1784
1785# elif defined(RT_ARCH_AMD64)
1786# if RT_INLINE_ASM_GNU_STYLE
1787 uint8_t u8Ret;
1788 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1789 "setz %1\n\t"
1790 : "=m" (*pu64)
1791 , "=qm" (u8Ret)
1792 , "=a" (*pu64Old)
1793 : "r" (u64New)
1794 , "a" (u64Old)
1795 , "m" (*pu64)
1796 : "cc");
1797 return (bool)u8Ret;
1798# else
1799 bool fRet;
1800 __asm
1801 {
1802 mov rdx, [pu32]
1803 mov rax, [u64Old]
1804 mov rcx, [u64New]
1805 lock cmpxchg [rdx], rcx
1806 mov rdx, [pu64Old]
1807 mov [rdx], rax
1808 setz al
1809 mov [fRet], al
1810 }
1811 return fRet;
1812# endif
1813
1814# elif defined(RT_ARCH_X86)
1815# if RT_INLINE_ASM_GNU_STYLE
1816 uint64_t u64Ret;
1817# if defined(PIC) || defined(__PIC__)
1818 /* NB: this code uses a memory clobber description, because the clean
1819 * solution with an output value for *pu64 makes gcc run out of registers.
1820 * This will cause suboptimal code, and anyone with a better solution is
1821 * welcome to improve this. */
1822 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1823 "lock; cmpxchg8b %3\n\t"
1824 "xchgl %%ebx, %1\n\t"
1825 : "=A" (u64Ret)
1826 : "DS" ((uint32_t)u64New)
1827 , "c" ((uint32_t)(u64New >> 32))
1828 , "m" (*pu64)
1829 , "0" (u64Old)
1830 : "memory"
1831 , "cc" );
1832# else /* !PIC */
1833 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1834 : "=A" (u64Ret)
1835 , "=m" (*pu64)
1836 : "b" ((uint32_t)u64New)
1837 , "c" ((uint32_t)(u64New >> 32))
1838 , "m" (*pu64)
1839 , "0" (u64Old)
1840 : "cc");
1841# endif
1842 *pu64Old = u64Ret;
1843 return u64Ret == u64Old;
1844# else
1845 uint32_t u32Ret;
1846 __asm
1847 {
1848 mov ebx, dword ptr [u64New]
1849 mov ecx, dword ptr [u64New + 4]
1850 mov edi, [pu64]
1851 mov eax, dword ptr [u64Old]
1852 mov edx, dword ptr [u64Old + 4]
1853 lock cmpxchg8b [edi]
1854 mov ebx, [pu64Old]
1855 mov [ebx], eax
1856 setz al
1857 movzx eax, al
1858 add ebx, 4
1859 mov [ebx], edx
1860 mov dword ptr [u32Ret], eax
1861 }
1862 return !!u32Ret;
1863# endif
1864
1865# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1866 union { uint32_t u; bool f; } fXchg;
1867 uint64_t u64ActualOld;
1868 uint32_t rcSpill;
1869 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1870 RTASM_ARM_DMB_SY
1871# if defined(RT_ARCH_ARM64)
1872 "ldaxr %[uOld], %[pMem]\n\t"
1873 "cmp %[uOld], %[uCmp]\n\t"
1874 "bne 1f\n\t" /* stop here if not equal */
1875 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1876 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1877 "mov %w[fXchg], #1\n\t"
1878# else
1879 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1880 "teq %[uOld], %[uCmp]\n\t"
1881 "teqeq %H[uOld], %H[uCmp]\n\t"
1882 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1883 "bne 1f\n\t" /* stop here if not equal */
1884 "cmp %[rc], #0\n\t"
1885 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1886 "mov %[fXchg], #1\n\t"
1887# endif
1888 "1:\n\t"
1889 : [pMem] "+m" (*pu64)
1890 , [uOld] "=&r" (u64ActualOld)
1891 , [rc] "=&r" (rcSpill)
1892 , [fXchg] "=&r" (fXchg.u)
1893 : [uCmp] "r" (u64Old)
1894 , [uNew] "r" (u64New)
1895 , "[fXchg]" (0)
1896 RTASM_ARM_DMB_SY_COMMA_IN_REG
1897 : "cc");
1898 *pu64Old = u64ActualOld;
1899 return fXchg.f;
1900
1901# else
1902# error "Port me"
1903# endif
1904}
1905#endif
1906
1907
1908/**
1909 * Atomically Compare and exchange a signed 64-bit value, additionally
1910 * passing back old value, ordered.
1911 *
1912 * @returns true if xchg was done.
1913 * @returns false if xchg wasn't done.
1914 *
1915 * @param pi64 Pointer to the 64-bit variable to update.
1916 * @param i64 The 64-bit value to assign to *pu64.
1917 * @param i64Old The value to compare with.
1918 * @param pi64Old Pointer store the old value at.
1919 *
1920 * @remarks x86: Requires a Pentium or later.
1921 */
1922DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
1923{
1924 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1925}
1926
1927/** @def ASMAtomicCmpXchgExHandle
1928 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1929 *
1930 * @param ph Pointer to the value to update.
1931 * @param hNew The new value to assigned to *pu.
1932 * @param hOld The old value to *pu compare with.
1933 * @param fRc Where to store the result.
1934 * @param phOldVal Pointer to where to store the old value.
1935 *
1936 * @remarks This doesn't currently work for all handles (like RTFILE).
1937 */
1938#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1939# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1940 do { \
1941 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1942 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1943 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(ph), (uint32_t)(hNew), (uint32_t)(hOld), (uint32_t RT_FAR *)(phOldVal)); \
1944 } while (0)
1945#elif HC_ARCH_BITS == 64
1946# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1947 do { \
1948 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1949 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1950 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(ph), (uint64_t)(hNew), (uint64_t)(hOld), (uint64_t RT_FAR *)(phOldVal)); \
1951 } while (0)
1952#else
1953# error HC_ARCH_BITS
1954#endif
1955
1956
1957/** @def ASMAtomicCmpXchgExSize
1958 * Atomically Compare and Exchange a value which size might differ
1959 * between platforms or compilers. Additionally passes back old value.
1960 *
1961 * @param pu Pointer to the value to update.
1962 * @param uNew The new value to assigned to *pu.
1963 * @param uOld The old value to *pu compare with.
1964 * @param fRc Where to store the result.
1965 * @param puOldVal Pointer to where to store the old value.
1966 *
1967 * @remarks x86: Requires a 486 or later.
1968 */
1969#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1970 do { \
1971 switch (sizeof(*(pu))) { \
1972 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1973 break; \
1974 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1975 break; \
1976 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1977 (fRc) = false; \
1978 (uOldVal) = 0; \
1979 break; \
1980 } \
1981 } while (0)
1982
1983
1984/**
1985 * Atomically Compare and Exchange a pointer value, additionally
1986 * passing back old value, ordered.
1987 *
1988 * @returns true if xchg was done.
1989 * @returns false if xchg wasn't done.
1990 *
1991 * @param ppv Pointer to the value to update.
1992 * @param pvNew The new value to assigned to *ppv.
1993 * @param pvOld The old value to *ppv compare with.
1994 * @param ppvOld Pointer store the old value at.
1995 *
1996 * @remarks x86: Requires a 486 or later.
1997 */
1998DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1999 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
2000{
2001#if ARCH_BITS == 32 || ARCH_BITS == 16
2002 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
2003#elif ARCH_BITS == 64
2004 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
2005#else
2006# error "ARCH_BITS is bogus"
2007#endif
2008}
2009
2010
2011/**
2012 * Atomically Compare and Exchange a pointer value, additionally
2013 * passing back old value, ordered.
2014 *
2015 * @returns true if xchg was done.
2016 * @returns false if xchg wasn't done.
2017 *
2018 * @param ppv Pointer to the value to update.
2019 * @param pvNew The new value to assigned to *ppv.
2020 * @param pvOld The old value to *ppv compare with.
2021 * @param ppvOld Pointer store the old value at.
2022 *
2023 * @remarks This is relatively type safe on GCC platforms.
2024 * @remarks x86: Requires a 486 or later.
2025 */
2026#ifdef __GNUC__
2027# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2028 __extension__ \
2029 ({\
2030 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2031 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
2032 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
2033 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
2034 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
2035 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
2036 (void **)ppvOldTypeChecked); \
2037 fMacroRet; \
2038 })
2039#else
2040# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2041 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
2042#endif
2043
2044
2045/**
2046 * Virtualization unfriendly serializing instruction, always exits.
2047 */
2048#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2049RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
2050#else
2051DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
2052{
2053# if RT_INLINE_ASM_GNU_STYLE
2054 RTCCUINTREG xAX = 0;
2055# ifdef RT_ARCH_AMD64
2056 __asm__ __volatile__ ("cpuid"
2057 : "=a" (xAX)
2058 : "0" (xAX)
2059 : "rbx", "rcx", "rdx", "memory");
2060# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
2061 __asm__ __volatile__ ("push %%ebx\n\t"
2062 "cpuid\n\t"
2063 "pop %%ebx\n\t"
2064 : "=a" (xAX)
2065 : "0" (xAX)
2066 : "ecx", "edx", "memory");
2067# else
2068 __asm__ __volatile__ ("cpuid"
2069 : "=a" (xAX)
2070 : "0" (xAX)
2071 : "ebx", "ecx", "edx", "memory");
2072# endif
2073
2074# elif RT_INLINE_ASM_USES_INTRIN
2075 int aInfo[4];
2076 _ReadWriteBarrier();
2077 __cpuid(aInfo, 0);
2078
2079# else
2080 __asm
2081 {
2082 push ebx
2083 xor eax, eax
2084 cpuid
2085 pop ebx
2086 }
2087# endif
2088}
2089#endif
2090
2091/**
2092 * Virtualization friendly serializing instruction, though more expensive.
2093 */
2094#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2095RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
2096#else
2097DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
2098{
2099# if RT_INLINE_ASM_GNU_STYLE
2100# ifdef RT_ARCH_AMD64
2101 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
2102 "subq $128, %%rsp\n\t" /*redzone*/
2103 "mov %%ss, %%eax\n\t"
2104 "pushq %%rax\n\t"
2105 "pushq %%r10\n\t"
2106 "pushfq\n\t"
2107 "movl %%cs, %%eax\n\t"
2108 "pushq %%rax\n\t"
2109 "leaq 1f(%%rip), %%rax\n\t"
2110 "pushq %%rax\n\t"
2111 "iretq\n\t"
2112 "1:\n\t"
2113 ::: "rax", "r10", "memory", "cc");
2114# else
2115 __asm__ __volatile__ ("pushfl\n\t"
2116 "pushl %%cs\n\t"
2117 "pushl $1f\n\t"
2118 "iretl\n\t"
2119 "1:\n\t"
2120 ::: "memory");
2121# endif
2122
2123# else
2124 __asm
2125 {
2126 pushfd
2127 push cs
2128 push la_ret
2129 iretd
2130 la_ret:
2131 }
2132# endif
2133}
2134#endif
2135
2136/**
2137 * Virtualization friendlier serializing instruction, may still cause exits.
2138 */
2139#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < RT_MSC_VER_VS2008) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2140RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2141#else
2142DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2143{
2144# if RT_INLINE_ASM_GNU_STYLE
2145 /* rdtscp is not supported by ancient linux build VM of course :-( */
2146# ifdef RT_ARCH_AMD64
2147 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2148 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2149# else
2150 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2151 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2152# endif
2153# else
2154# if RT_INLINE_ASM_USES_INTRIN >= RT_MSC_VER_VS2008
2155 uint32_t uIgnore;
2156 _ReadWriteBarrier();
2157 (void)__rdtscp(&uIgnore);
2158 (void)uIgnore;
2159# else
2160 __asm
2161 {
2162 rdtscp
2163 }
2164# endif
2165# endif
2166}
2167#endif
2168
2169
2170/**
2171 * Serialize Instruction (both data store and instruction flush).
2172 */
2173#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2174# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2175#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2176# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2177#elif defined(RT_ARCH_SPARC64)
2178RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2179#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2180DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2181{
2182 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2183}
2184#else
2185# error "Port me"
2186#endif
2187
2188
2189/**
2190 * Memory fence, waits for any pending writes and reads to complete.
2191 * @note No implicit compiler barrier (which is probably stupid).
2192 */
2193DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2194{
2195#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2196# if RT_INLINE_ASM_GNU_STYLE
2197 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2198# elif RT_INLINE_ASM_USES_INTRIN
2199 _mm_mfence();
2200# else
2201 __asm
2202 {
2203 _emit 0x0f
2204 _emit 0xae
2205 _emit 0xf0
2206 }
2207# endif
2208#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2209 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2210#elif ARCH_BITS == 16
2211 uint16_t volatile u16;
2212 ASMAtomicXchgU16(&u16, 0);
2213#else
2214 uint32_t volatile u32;
2215 ASMAtomicXchgU32(&u32, 0);
2216#endif
2217}
2218
2219
2220/**
2221 * Write fence, waits for any pending writes to complete.
2222 * @note No implicit compiler barrier (which is probably stupid).
2223 */
2224DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2225{
2226#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2227# if RT_INLINE_ASM_GNU_STYLE
2228 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2229# elif RT_INLINE_ASM_USES_INTRIN
2230 _mm_sfence();
2231# else
2232 __asm
2233 {
2234 _emit 0x0f
2235 _emit 0xae
2236 _emit 0xf8
2237 }
2238# endif
2239#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2240 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2241#else
2242 ASMMemoryFence();
2243#endif
2244}
2245
2246
2247/**
2248 * Read fence, waits for any pending reads to complete.
2249 * @note No implicit compiler barrier (which is probably stupid).
2250 */
2251DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2252{
2253#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2254# if RT_INLINE_ASM_GNU_STYLE
2255 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2256# elif RT_INLINE_ASM_USES_INTRIN
2257 _mm_lfence();
2258# else
2259 __asm
2260 {
2261 _emit 0x0f
2262 _emit 0xae
2263 _emit 0xe8
2264 }
2265# endif
2266#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2267 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2268#else
2269 ASMMemoryFence();
2270#endif
2271}
2272
2273
2274/**
2275 * Atomically reads an unsigned 8-bit value, ordered.
2276 *
2277 * @returns Current *pu8 value
2278 * @param pu8 Pointer to the 8-bit variable to read.
2279 */
2280DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2281{
2282#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2283 uint32_t u32;
2284 __asm__ __volatile__(".Lstart_ASMAtomicReadU8_%=:\n\t"
2285 RTASM_ARM_DMB_SY
2286# if defined(RT_ARCH_ARM64)
2287 "ldxrb %w[uDst], %[pMem]\n\t"
2288# else
2289 "ldrexb %[uDst], %[pMem]\n\t"
2290# endif
2291 : [uDst] "=&r" (u32)
2292 : [pMem] "m" (*pu8)
2293 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2294 return (uint8_t)u32;
2295#else
2296 ASMMemoryFence();
2297 return *pu8; /* byte reads are atomic on x86 */
2298#endif
2299}
2300
2301
2302/**
2303 * Atomically reads an unsigned 8-bit value, unordered.
2304 *
2305 * @returns Current *pu8 value
2306 * @param pu8 Pointer to the 8-bit variable to read.
2307 */
2308DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2309{
2310#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2311 uint32_t u32;
2312 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU8_%=:\n\t"
2313# if defined(RT_ARCH_ARM64)
2314 "ldxrb %w[uDst], %[pMem]\n\t"
2315# else
2316 "ldrexb %[uDst], %[pMem]\n\t"
2317# endif
2318 : [uDst] "=&r" (u32)
2319 : [pMem] "m" (*pu8));
2320 return (uint8_t)u32;
2321#else
2322 return *pu8; /* byte reads are atomic on x86 */
2323#endif
2324}
2325
2326
2327/**
2328 * Atomically reads a signed 8-bit value, ordered.
2329 *
2330 * @returns Current *pi8 value
2331 * @param pi8 Pointer to the 8-bit variable to read.
2332 */
2333DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2334{
2335 ASMMemoryFence();
2336#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2337 int32_t i32;
2338 __asm__ __volatile__(".Lstart_ASMAtomicReadS8_%=:\n\t"
2339 RTASM_ARM_DMB_SY
2340# if defined(RT_ARCH_ARM64)
2341 "ldxrb %w[iDst], %[pMem]\n\t"
2342# else
2343 "ldrexb %[iDst], %[pMem]\n\t"
2344# endif
2345 : [iDst] "=&r" (i32)
2346 : [pMem] "m" (*pi8)
2347 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2348 return (int8_t)i32;
2349#else
2350 return *pi8; /* byte reads are atomic on x86 */
2351#endif
2352}
2353
2354
2355/**
2356 * Atomically reads a signed 8-bit value, unordered.
2357 *
2358 * @returns Current *pi8 value
2359 * @param pi8 Pointer to the 8-bit variable to read.
2360 */
2361DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2362{
2363#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2364 int32_t i32;
2365 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS8_%=:\n\t"
2366# if defined(RT_ARCH_ARM64)
2367 "ldxrb %w[iDst], %[pMem]\n\t"
2368# else
2369 "ldrexb %[iDst], %[pMem]\n\t"
2370# endif
2371 : [iDst] "=&r" (i32)
2372 : [pMem] "m" (*pi8));
2373 return (int8_t)i32;
2374#else
2375 return *pi8; /* byte reads are atomic on x86 */
2376#endif
2377}
2378
2379
2380/**
2381 * Atomically reads an unsigned 16-bit value, ordered.
2382 *
2383 * @returns Current *pu16 value
2384 * @param pu16 Pointer to the 16-bit variable to read.
2385 */
2386DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2387{
2388 Assert(!((uintptr_t)pu16 & 1));
2389#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2390 uint32_t u32;
2391 __asm__ __volatile__(".Lstart_ASMAtomicReadU16_%=:\n\t"
2392 RTASM_ARM_DMB_SY
2393# if defined(RT_ARCH_ARM64)
2394 "ldxrh %w[uDst], %[pMem]\n\t"
2395# else
2396 "ldrexh %[uDst], %[pMem]\n\t"
2397# endif
2398 : [uDst] "=&r" (u32)
2399 : [pMem] "m" (*pu16)
2400 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2401 return (uint16_t)u32;
2402#else
2403 ASMMemoryFence();
2404 return *pu16;
2405#endif
2406}
2407
2408
2409/**
2410 * Atomically reads an unsigned 16-bit value, unordered.
2411 *
2412 * @returns Current *pu16 value
2413 * @param pu16 Pointer to the 16-bit variable to read.
2414 */
2415DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2416{
2417 Assert(!((uintptr_t)pu16 & 1));
2418#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2419 uint32_t u32;
2420 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU16_%=:\n\t"
2421# if defined(RT_ARCH_ARM64)
2422 "ldxrh %w[uDst], %[pMem]\n\t"
2423# else
2424 "ldrexh %[uDst], %[pMem]\n\t"
2425# endif
2426 : [uDst] "=&r" (u32)
2427 : [pMem] "m" (*pu16));
2428 return (uint16_t)u32;
2429#else
2430 return *pu16;
2431#endif
2432}
2433
2434
2435/**
2436 * Atomically reads a signed 16-bit value, ordered.
2437 *
2438 * @returns Current *pi16 value
2439 * @param pi16 Pointer to the 16-bit variable to read.
2440 */
2441DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2442{
2443 Assert(!((uintptr_t)pi16 & 1));
2444#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2445 int32_t i32;
2446 __asm__ __volatile__(".Lstart_ASMAtomicReadS16_%=:\n\t"
2447 RTASM_ARM_DMB_SY
2448# if defined(RT_ARCH_ARM64)
2449 "ldxrh %w[iDst], %[pMem]\n\t"
2450# else
2451 "ldrexh %[iDst], %[pMem]\n\t"
2452# endif
2453 : [iDst] "=&r" (i32)
2454 : [pMem] "m" (*pi16)
2455 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2456 return (int16_t)i32;
2457#else
2458 ASMMemoryFence();
2459 return *pi16;
2460#endif
2461}
2462
2463
2464/**
2465 * Atomically reads a signed 16-bit value, unordered.
2466 *
2467 * @returns Current *pi16 value
2468 * @param pi16 Pointer to the 16-bit variable to read.
2469 */
2470DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2471{
2472 Assert(!((uintptr_t)pi16 & 1));
2473#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2474 int32_t i32;
2475 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS16_%=:\n\t"
2476# if defined(RT_ARCH_ARM64)
2477 "ldxrh %w[iDst], %[pMem]\n\t"
2478# else
2479 "ldrexh %[iDst], %[pMem]\n\t"
2480# endif
2481 : [iDst] "=&r" (i32)
2482 : [pMem] "m" (*pi16));
2483 return (int16_t)i32;
2484#else
2485 return *pi16;
2486#endif
2487}
2488
2489
2490/**
2491 * Atomically reads an unsigned 32-bit value, ordered.
2492 *
2493 * @returns Current *pu32 value
2494 * @param pu32 Pointer to the 32-bit variable to read.
2495 */
2496DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2497{
2498 Assert(!((uintptr_t)pu32 & 3));
2499#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2500 uint32_t u32;
2501 __asm__ __volatile__(".Lstart_ASMAtomicReadU32_%=:\n\t"
2502 RTASM_ARM_DMB_SY
2503# if defined(RT_ARCH_ARM64)
2504 "ldxr %w[uDst], %[pMem]\n\t"
2505# else
2506 "ldrex %[uDst], %[pMem]\n\t"
2507# endif
2508 : [uDst] "=&r" (u32)
2509 : [pMem] "m" (*pu32)
2510 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2511 return u32;
2512#else
2513 ASMMemoryFence();
2514# if ARCH_BITS == 16
2515 AssertFailed(); /** @todo 16-bit */
2516# endif
2517 return *pu32;
2518#endif
2519}
2520
2521
2522/**
2523 * Atomically reads an unsigned 32-bit value, unordered.
2524 *
2525 * @returns Current *pu32 value
2526 * @param pu32 Pointer to the 32-bit variable to read.
2527 */
2528DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2529{
2530 Assert(!((uintptr_t)pu32 & 3));
2531#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2532 uint32_t u32;
2533 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU32_%=:\n\t"
2534# if defined(RT_ARCH_ARM64)
2535 "ldxr %w[uDst], %[pMem]\n\t"
2536# else
2537 "ldrex %[uDst], %[pMem]\n\t"
2538# endif
2539 : [uDst] "=&r" (u32)
2540 : [pMem] "m" (*pu32));
2541 return u32;
2542#else
2543# if ARCH_BITS == 16
2544 AssertFailed(); /** @todo 16-bit */
2545# endif
2546 return *pu32;
2547#endif
2548}
2549
2550
2551/**
2552 * Atomically reads a signed 32-bit value, ordered.
2553 *
2554 * @returns Current *pi32 value
2555 * @param pi32 Pointer to the 32-bit variable to read.
2556 */
2557DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2558{
2559 Assert(!((uintptr_t)pi32 & 3));
2560#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2561 int32_t i32;
2562 __asm__ __volatile__(".Lstart_ASMAtomicReadS32_%=:\n\t"
2563 RTASM_ARM_DMB_SY
2564# if defined(RT_ARCH_ARM64)
2565 "ldxr %w[iDst], %[pMem]\n\t"
2566# else
2567 "ldrex %[iDst], %[pMem]\n\t"
2568# endif
2569 : [iDst] "=&r" (i32)
2570 : [pMem] "m" (*pi32)
2571 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2572 return i32;
2573#else
2574 ASMMemoryFence();
2575# if ARCH_BITS == 16
2576 AssertFailed(); /** @todo 16-bit */
2577# endif
2578 return *pi32;
2579#endif
2580}
2581
2582
2583/**
2584 * Atomically reads a signed 32-bit value, unordered.
2585 *
2586 * @returns Current *pi32 value
2587 * @param pi32 Pointer to the 32-bit variable to read.
2588 */
2589DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2590{
2591 Assert(!((uintptr_t)pi32 & 3));
2592#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2593 int32_t i32;
2594 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS32_%=:\n\t"
2595# if defined(RT_ARCH_ARM64)
2596 "ldxr %w[iDst], %[pMem]\n\t"
2597# else
2598 "ldrex %[iDst], %[pMem]\n\t"
2599# endif
2600 : [iDst] "=&r" (i32)
2601 : [pMem] "m" (*pi32));
2602 return i32;
2603
2604#else
2605# if ARCH_BITS == 16
2606 AssertFailed(); /** @todo 16-bit */
2607# endif
2608 return *pi32;
2609#endif
2610}
2611
2612
2613/**
2614 * Atomically reads an unsigned 64-bit value, ordered.
2615 *
2616 * @returns Current *pu64 value
2617 * @param pu64 Pointer to the 64-bit variable to read.
2618 * The memory pointed to must be writable.
2619 *
2620 * @remarks This may fault if the memory is read-only!
2621 * @remarks x86: Requires a Pentium or later.
2622 */
2623#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
2624 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2625RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2626#else
2627DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2628{
2629 uint64_t u64;
2630# ifdef RT_ARCH_AMD64
2631 Assert(!((uintptr_t)pu64 & 7));
2632/*# if RT_INLINE_ASM_GNU_STYLE
2633 __asm__ __volatile__( "mfence\n\t"
2634 "movq %1, %0\n\t"
2635 : "=r" (u64)
2636 : "m" (*pu64));
2637# else
2638 __asm
2639 {
2640 mfence
2641 mov rdx, [pu64]
2642 mov rax, [rdx]
2643 mov [u64], rax
2644 }
2645# endif*/
2646 ASMMemoryFence();
2647 u64 = *pu64;
2648
2649# elif defined(RT_ARCH_X86)
2650# if RT_INLINE_ASM_GNU_STYLE
2651# if defined(PIC) || defined(__PIC__)
2652 uint32_t u32EBX = 0;
2653 Assert(!((uintptr_t)pu64 & 7));
2654 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2655 "lock; cmpxchg8b (%5)\n\t"
2656 "movl %3, %%ebx\n\t"
2657 : "=A" (u64)
2658# if RT_GNUC_PREREQ(4, 3)
2659 , "+m" (*pu64)
2660# else
2661 , "=m" (*pu64)
2662# endif
2663 : "0" (0ULL)
2664 , "m" (u32EBX)
2665 , "c" (0)
2666 , "S" (pu64)
2667 : "cc");
2668# else /* !PIC */
2669 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2670 : "=A" (u64)
2671 , "+m" (*pu64)
2672 : "0" (0ULL)
2673 , "b" (0)
2674 , "c" (0)
2675 : "cc");
2676# endif
2677# else
2678 Assert(!((uintptr_t)pu64 & 7));
2679 __asm
2680 {
2681 xor eax, eax
2682 xor edx, edx
2683 mov edi, pu64
2684 xor ecx, ecx
2685 xor ebx, ebx
2686 lock cmpxchg8b [edi]
2687 mov dword ptr [u64], eax
2688 mov dword ptr [u64 + 4], edx
2689 }
2690# endif
2691
2692# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2693 Assert(!((uintptr_t)pu64 & 7));
2694 __asm__ __volatile__(".Lstart_ASMAtomicReadU64_%=:\n\t"
2695 RTASM_ARM_DMB_SY
2696# if defined(RT_ARCH_ARM64)
2697 "ldxr %[uDst], %[pMem]\n\t"
2698# else
2699 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2700# endif
2701 : [uDst] "=&r" (u64)
2702 : [pMem] "m" (*pu64)
2703 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2704
2705# else
2706# error "Port me"
2707# endif
2708 return u64;
2709}
2710#endif
2711
2712
2713/**
2714 * Atomically reads an unsigned 64-bit value, unordered.
2715 *
2716 * @returns Current *pu64 value
2717 * @param pu64 Pointer to the 64-bit variable to read.
2718 * The memory pointed to must be writable.
2719 *
2720 * @remarks This may fault if the memory is read-only!
2721 * @remarks x86: Requires a Pentium or later.
2722 */
2723#if !defined(RT_ARCH_AMD64) \
2724 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2725 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
2726RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2727#else
2728DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2729{
2730 uint64_t u64;
2731# ifdef RT_ARCH_AMD64
2732 Assert(!((uintptr_t)pu64 & 7));
2733/*# if RT_INLINE_ASM_GNU_STYLE
2734 Assert(!((uintptr_t)pu64 & 7));
2735 __asm__ __volatile__("movq %1, %0\n\t"
2736 : "=r" (u64)
2737 : "m" (*pu64));
2738# else
2739 __asm
2740 {
2741 mov rdx, [pu64]
2742 mov rax, [rdx]
2743 mov [u64], rax
2744 }
2745# endif */
2746 u64 = *pu64;
2747
2748# elif defined(RT_ARCH_X86)
2749# if RT_INLINE_ASM_GNU_STYLE
2750# if defined(PIC) || defined(__PIC__)
2751 uint32_t u32EBX = 0;
2752 uint32_t u32Spill;
2753 Assert(!((uintptr_t)pu64 & 7));
2754 __asm__ __volatile__("xor %%eax,%%eax\n\t"
2755 "xor %%ecx,%%ecx\n\t"
2756 "xor %%edx,%%edx\n\t"
2757 "xchgl %%ebx, %3\n\t"
2758 "lock; cmpxchg8b (%4)\n\t"
2759 "movl %3, %%ebx\n\t"
2760 : "=A" (u64)
2761# if RT_GNUC_PREREQ(4, 3)
2762 , "+m" (*pu64)
2763# else
2764 , "=m" (*pu64)
2765# endif
2766 , "=c" (u32Spill)
2767 : "m" (u32EBX)
2768 , "S" (pu64)
2769 : "cc");
2770# else /* !PIC */
2771 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2772 : "=A" (u64)
2773 , "+m" (*pu64)
2774 : "0" (0ULL)
2775 , "b" (0)
2776 , "c" (0)
2777 : "cc");
2778# endif
2779# else
2780 Assert(!((uintptr_t)pu64 & 7));
2781 __asm
2782 {
2783 xor eax, eax
2784 xor edx, edx
2785 mov edi, pu64
2786 xor ecx, ecx
2787 xor ebx, ebx
2788 lock cmpxchg8b [edi]
2789 mov dword ptr [u64], eax
2790 mov dword ptr [u64 + 4], edx
2791 }
2792# endif
2793
2794# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2795 Assert(!((uintptr_t)pu64 & 7));
2796 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU64_%=:\n\t"
2797# if defined(RT_ARCH_ARM64)
2798 "ldxr %[uDst], %[pMem]\n\t"
2799# else
2800 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2801# endif
2802 : [uDst] "=&r" (u64)
2803 : [pMem] "m" (*pu64));
2804
2805# else
2806# error "Port me"
2807# endif
2808 return u64;
2809}
2810#endif
2811
2812
2813/**
2814 * Atomically reads a signed 64-bit value, ordered.
2815 *
2816 * @returns Current *pi64 value
2817 * @param pi64 Pointer to the 64-bit variable to read.
2818 * The memory pointed to must be writable.
2819 *
2820 * @remarks This may fault if the memory is read-only!
2821 * @remarks x86: Requires a Pentium or later.
2822 */
2823DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2824{
2825 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
2826}
2827
2828
2829/**
2830 * Atomically reads a signed 64-bit value, unordered.
2831 *
2832 * @returns Current *pi64 value
2833 * @param pi64 Pointer to the 64-bit variable to read.
2834 * The memory pointed to must be writable.
2835 *
2836 * @remarks This will fault if the memory is read-only!
2837 * @remarks x86: Requires a Pentium or later.
2838 */
2839DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2840{
2841 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
2842}
2843
2844
2845/**
2846 * Atomically reads a size_t value, ordered.
2847 *
2848 * @returns Current *pcb value
2849 * @param pcb Pointer to the size_t variable to read.
2850 */
2851DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2852{
2853#if ARCH_BITS == 64
2854 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
2855#elif ARCH_BITS == 32
2856 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
2857#elif ARCH_BITS == 16
2858 AssertCompileSize(size_t, 2);
2859 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
2860#else
2861# error "Unsupported ARCH_BITS value"
2862#endif
2863}
2864
2865
2866/**
2867 * Atomically reads a size_t value, unordered.
2868 *
2869 * @returns Current *pcb value
2870 * @param pcb Pointer to the size_t variable to read.
2871 */
2872DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2873{
2874#if ARCH_BITS == 64 || ARCH_BITS == 16
2875 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2876#elif ARCH_BITS == 32
2877 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2878#elif ARCH_BITS == 16
2879 AssertCompileSize(size_t, 2);
2880 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2881#else
2882# error "Unsupported ARCH_BITS value"
2883#endif
2884}
2885
2886
2887/**
2888 * Atomically reads a pointer value, ordered.
2889 *
2890 * @returns Current *pv value
2891 * @param ppv Pointer to the pointer variable to read.
2892 *
2893 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2894 * requires less typing (no casts).
2895 */
2896DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2897{
2898#if ARCH_BITS == 32 || ARCH_BITS == 16
2899 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2900#elif ARCH_BITS == 64
2901 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2902#else
2903# error "ARCH_BITS is bogus"
2904#endif
2905}
2906
2907/**
2908 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2909 *
2910 * @returns Current *pv value
2911 * @param ppv Pointer to the pointer variable to read.
2912 * @param Type The type of *ppv, sans volatile.
2913 */
2914#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2915# define ASMAtomicReadPtrT(ppv, Type) \
2916 __extension__ \
2917 ({\
2918 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2919 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2920 pvTypeChecked; \
2921 })
2922#else
2923# define ASMAtomicReadPtrT(ppv, Type) \
2924 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2925#endif
2926
2927
2928/**
2929 * Atomically reads a pointer value, unordered.
2930 *
2931 * @returns Current *pv value
2932 * @param ppv Pointer to the pointer variable to read.
2933 *
2934 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2935 * requires less typing (no casts).
2936 */
2937DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2938{
2939#if ARCH_BITS == 32 || ARCH_BITS == 16
2940 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2941#elif ARCH_BITS == 64
2942 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2943#else
2944# error "ARCH_BITS is bogus"
2945#endif
2946}
2947
2948
2949/**
2950 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2951 *
2952 * @returns Current *pv value
2953 * @param ppv Pointer to the pointer variable to read.
2954 * @param Type The type of *ppv, sans volatile.
2955 */
2956#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2957# define ASMAtomicUoReadPtrT(ppv, Type) \
2958 __extension__ \
2959 ({\
2960 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2961 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2962 pvTypeChecked; \
2963 })
2964#else
2965# define ASMAtomicUoReadPtrT(ppv, Type) \
2966 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2967#endif
2968
2969
2970/**
2971 * Atomically reads a boolean value, ordered.
2972 *
2973 * @returns Current *pf value
2974 * @param pf Pointer to the boolean variable to read.
2975 */
2976DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2977{
2978 ASMMemoryFence();
2979 return *pf; /* byte reads are atomic on x86 */
2980}
2981
2982
2983/**
2984 * Atomically reads a boolean value, unordered.
2985 *
2986 * @returns Current *pf value
2987 * @param pf Pointer to the boolean variable to read.
2988 */
2989DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2990{
2991 return *pf; /* byte reads are atomic on x86 */
2992}
2993
2994
2995/**
2996 * Atomically read a typical IPRT handle value, ordered.
2997 *
2998 * @param ph Pointer to the handle variable to read.
2999 * @param phRes Where to store the result.
3000 *
3001 * @remarks This doesn't currently work for all handles (like RTFILE).
3002 */
3003#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3004# define ASMAtomicReadHandle(ph, phRes) \
3005 do { \
3006 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3007 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3008 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
3009 } while (0)
3010#elif HC_ARCH_BITS == 64
3011# define ASMAtomicReadHandle(ph, phRes) \
3012 do { \
3013 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3014 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3015 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
3016 } while (0)
3017#else
3018# error HC_ARCH_BITS
3019#endif
3020
3021
3022/**
3023 * Atomically read a typical IPRT handle value, unordered.
3024 *
3025 * @param ph Pointer to the handle variable to read.
3026 * @param phRes Where to store the result.
3027 *
3028 * @remarks This doesn't currently work for all handles (like RTFILE).
3029 */
3030#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3031# define ASMAtomicUoReadHandle(ph, phRes) \
3032 do { \
3033 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3034 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3035 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
3036 } while (0)
3037#elif HC_ARCH_BITS == 64
3038# define ASMAtomicUoReadHandle(ph, phRes) \
3039 do { \
3040 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3041 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3042 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
3043 } while (0)
3044#else
3045# error HC_ARCH_BITS
3046#endif
3047
3048
3049/**
3050 * Atomically read a value which size might differ
3051 * between platforms or compilers, ordered.
3052 *
3053 * @param pu Pointer to the variable to read.
3054 * @param puRes Where to store the result.
3055 */
3056#define ASMAtomicReadSize(pu, puRes) \
3057 do { \
3058 switch (sizeof(*(pu))) { \
3059 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3060 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3061 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3062 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3063 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3064 } \
3065 } while (0)
3066
3067
3068/**
3069 * Atomically read a value which size might differ
3070 * between platforms or compilers, unordered.
3071 *
3072 * @param pu Pointer to the variable to read.
3073 * @param puRes Where to store the result.
3074 */
3075#define ASMAtomicUoReadSize(pu, puRes) \
3076 do { \
3077 switch (sizeof(*(pu))) { \
3078 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3079 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3080 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3081 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3082 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3083 } \
3084 } while (0)
3085
3086
3087/**
3088 * Atomically writes an unsigned 8-bit value, ordered.
3089 *
3090 * @param pu8 Pointer to the 8-bit variable.
3091 * @param u8 The 8-bit value to assign to *pu8.
3092 */
3093DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3094{
3095 /** @todo Any possible ARM32/ARM64 optimizations here? */
3096 ASMAtomicXchgU8(pu8, u8);
3097}
3098
3099
3100/**
3101 * Atomically writes an unsigned 8-bit value, unordered.
3102 *
3103 * @param pu8 Pointer to the 8-bit variable.
3104 * @param u8 The 8-bit value to assign to *pu8.
3105 */
3106DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3107{
3108 /** @todo Any possible ARM32/ARM64 improvements here? */
3109 *pu8 = u8; /* byte writes are atomic on x86 */
3110}
3111
3112
3113/**
3114 * Atomically writes a signed 8-bit value, ordered.
3115 *
3116 * @param pi8 Pointer to the 8-bit variable to read.
3117 * @param i8 The 8-bit value to assign to *pi8.
3118 */
3119DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3120{
3121 /** @todo Any possible ARM32/ARM64 optimizations here? */
3122 ASMAtomicXchgS8(pi8, i8);
3123}
3124
3125
3126/**
3127 * Atomically writes a signed 8-bit value, unordered.
3128 *
3129 * @param pi8 Pointer to the 8-bit variable to write.
3130 * @param i8 The 8-bit value to assign to *pi8.
3131 */
3132DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3133{
3134 *pi8 = i8; /* byte writes are atomic on x86 */
3135}
3136
3137
3138/**
3139 * Atomically writes an unsigned 16-bit value, ordered.
3140 *
3141 * @param pu16 Pointer to the 16-bit variable to write.
3142 * @param u16 The 16-bit value to assign to *pu16.
3143 */
3144DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3145{
3146 /** @todo Any possible ARM32/ARM64 optimizations here? */
3147 ASMAtomicXchgU16(pu16, u16);
3148}
3149
3150
3151/**
3152 * Atomically writes an unsigned 16-bit value, unordered.
3153 *
3154 * @param pu16 Pointer to the 16-bit variable to write.
3155 * @param u16 The 16-bit value to assign to *pu16.
3156 */
3157DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3158{
3159 Assert(!((uintptr_t)pu16 & 1));
3160 *pu16 = u16;
3161}
3162
3163
3164/**
3165 * Atomically writes a signed 16-bit value, ordered.
3166 *
3167 * @param pi16 Pointer to the 16-bit variable to write.
3168 * @param i16 The 16-bit value to assign to *pi16.
3169 */
3170DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3171{
3172 /** @todo Any possible ARM32/ARM64 optimizations here? */
3173 ASMAtomicXchgS16(pi16, i16);
3174}
3175
3176
3177/**
3178 * Atomically writes a signed 16-bit value, unordered.
3179 *
3180 * @param pi16 Pointer to the 16-bit variable to write.
3181 * @param i16 The 16-bit value to assign to *pi16.
3182 */
3183DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3184{
3185 Assert(!((uintptr_t)pi16 & 1));
3186 *pi16 = i16;
3187}
3188
3189
3190/**
3191 * Atomically writes an unsigned 32-bit value, ordered.
3192 *
3193 * @param pu32 Pointer to the 32-bit variable to write.
3194 * @param u32 The 32-bit value to assign to *pu32.
3195 */
3196DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3197{
3198 /** @todo Any possible ARM32/ARM64 optimizations here? */
3199 ASMAtomicXchgU32(pu32, u32);
3200}
3201
3202
3203/**
3204 * Atomically writes an unsigned 32-bit value, unordered.
3205 *
3206 * @param pu32 Pointer to the 32-bit variable to write.
3207 * @param u32 The 32-bit value to assign to *pu32.
3208 */
3209DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3210{
3211 Assert(!((uintptr_t)pu32 & 3));
3212#if ARCH_BITS >= 32
3213 *pu32 = u32;
3214#else
3215 ASMAtomicXchgU32(pu32, u32);
3216#endif
3217}
3218
3219
3220/**
3221 * Atomically writes a signed 32-bit value, ordered.
3222 *
3223 * @param pi32 Pointer to the 32-bit variable to write.
3224 * @param i32 The 32-bit value to assign to *pi32.
3225 */
3226DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3227{
3228 ASMAtomicXchgS32(pi32, i32);
3229}
3230
3231
3232/**
3233 * Atomically writes a signed 32-bit value, unordered.
3234 *
3235 * @param pi32 Pointer to the 32-bit variable to write.
3236 * @param i32 The 32-bit value to assign to *pi32.
3237 */
3238DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3239{
3240 Assert(!((uintptr_t)pi32 & 3));
3241#if ARCH_BITS >= 32
3242 *pi32 = i32;
3243#else
3244 ASMAtomicXchgS32(pi32, i32);
3245#endif
3246}
3247
3248
3249/**
3250 * Atomically writes an unsigned 64-bit value, ordered.
3251 *
3252 * @param pu64 Pointer to the 64-bit variable to write.
3253 * @param u64 The 64-bit value to assign to *pu64.
3254 */
3255DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3256{
3257 /** @todo Any possible ARM32/ARM64 optimizations here? */
3258 ASMAtomicXchgU64(pu64, u64);
3259}
3260
3261
3262/**
3263 * Atomically writes an unsigned 64-bit value, unordered.
3264 *
3265 * @param pu64 Pointer to the 64-bit variable to write.
3266 * @param u64 The 64-bit value to assign to *pu64.
3267 */
3268DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3269{
3270 Assert(!((uintptr_t)pu64 & 7));
3271#if ARCH_BITS == 64
3272 *pu64 = u64;
3273#else
3274 ASMAtomicXchgU64(pu64, u64);
3275#endif
3276}
3277
3278
3279/**
3280 * Atomically writes a signed 64-bit value, ordered.
3281 *
3282 * @param pi64 Pointer to the 64-bit variable to write.
3283 * @param i64 The 64-bit value to assign to *pi64.
3284 */
3285DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3286{
3287 /** @todo Any possible ARM32/ARM64 optimizations here? */
3288 ASMAtomicXchgS64(pi64, i64);
3289}
3290
3291
3292/**
3293 * Atomically writes a signed 64-bit value, unordered.
3294 *
3295 * @param pi64 Pointer to the 64-bit variable to write.
3296 * @param i64 The 64-bit value to assign to *pi64.
3297 */
3298DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3299{
3300 Assert(!((uintptr_t)pi64 & 7));
3301#if ARCH_BITS == 64
3302 *pi64 = i64;
3303#else
3304 ASMAtomicXchgS64(pi64, i64);
3305#endif
3306}
3307
3308
3309/**
3310 * Atomically writes a size_t value, ordered.
3311 *
3312 * @returns nothing.
3313 * @param pcb Pointer to the size_t variable to write.
3314 * @param cb The value to assign to *pcb.
3315 */
3316DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3317{
3318#if ARCH_BITS == 64
3319 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3320#elif ARCH_BITS == 32
3321 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3322#elif ARCH_BITS == 16
3323 AssertCompileSize(size_t, 2);
3324 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3325#else
3326# error "Unsupported ARCH_BITS value"
3327#endif
3328}
3329
3330
3331/**
3332 * Atomically writes a size_t value, unordered.
3333 *
3334 * @returns nothing.
3335 * @param pcb Pointer to the size_t variable to write.
3336 * @param cb The value to assign to *pcb.
3337 */
3338DECLINLINE(void) ASMAtomicUoWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3339{
3340#if ARCH_BITS == 64
3341 ASMAtomicUoWriteU64((uint64_t volatile *)pcb, cb);
3342#elif ARCH_BITS == 32
3343 ASMAtomicUoWriteU32((uint32_t volatile *)pcb, cb);
3344#elif ARCH_BITS == 16
3345 AssertCompileSize(size_t, 2);
3346 ASMAtomicUoWriteU16((uint16_t volatile *)pcb, cb);
3347#else
3348# error "Unsupported ARCH_BITS value"
3349#endif
3350}
3351
3352
3353/**
3354 * Atomically writes a boolean value, unordered.
3355 *
3356 * @param pf Pointer to the boolean variable to write.
3357 * @param f The boolean value to assign to *pf.
3358 */
3359DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3360{
3361 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3362}
3363
3364
3365/**
3366 * Atomically writes a boolean value, unordered.
3367 *
3368 * @param pf Pointer to the boolean variable to write.
3369 * @param f The boolean value to assign to *pf.
3370 */
3371DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3372{
3373 *pf = f; /* byte writes are atomic on x86 */
3374}
3375
3376
3377/**
3378 * Atomically writes a pointer value, ordered.
3379 *
3380 * @param ppv Pointer to the pointer variable to write.
3381 * @param pv The pointer value to assign to *ppv.
3382 */
3383DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3384{
3385#if ARCH_BITS == 32 || ARCH_BITS == 16
3386 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3387#elif ARCH_BITS == 64
3388 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3389#else
3390# error "ARCH_BITS is bogus"
3391#endif
3392}
3393
3394
3395/**
3396 * Atomically writes a pointer value, unordered.
3397 *
3398 * @param ppv Pointer to the pointer variable to write.
3399 * @param pv The pointer value to assign to *ppv.
3400 */
3401DECLINLINE(void) ASMAtomicUoWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3402{
3403#if ARCH_BITS == 32 || ARCH_BITS == 16
3404 ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3405#elif ARCH_BITS == 64
3406 ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3407#else
3408# error "ARCH_BITS is bogus"
3409#endif
3410}
3411
3412
3413/**
3414 * Atomically writes a pointer value, ordered.
3415 *
3416 * @param ppv Pointer to the pointer variable to write.
3417 * @param pv The pointer value to assign to *ppv. If NULL use
3418 * ASMAtomicWriteNullPtr or you'll land in trouble.
3419 *
3420 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3421 * NULL.
3422 */
3423#ifdef __GNUC__
3424# define ASMAtomicWritePtr(ppv, pv) \
3425 do \
3426 { \
3427 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3428 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3429 \
3430 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3431 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3432 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3433 \
3434 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3435 } while (0)
3436#else
3437# define ASMAtomicWritePtr(ppv, pv) \
3438 do \
3439 { \
3440 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3441 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3442 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3443 \
3444 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3445 } while (0)
3446#endif
3447
3448
3449/**
3450 * Atomically sets a pointer to NULL, ordered.
3451 *
3452 * @param ppv Pointer to the pointer variable that should be set to NULL.
3453 *
3454 * @remarks This is relatively type safe on GCC platforms.
3455 */
3456#if RT_GNUC_PREREQ(4, 2)
3457# define ASMAtomicWriteNullPtr(ppv) \
3458 do \
3459 { \
3460 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3461 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3462 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3463 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3464 } while (0)
3465#else
3466# define ASMAtomicWriteNullPtr(ppv) \
3467 do \
3468 { \
3469 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3470 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3471 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3472 } while (0)
3473#endif
3474
3475
3476/**
3477 * Atomically writes a pointer value, unordered.
3478 *
3479 * @returns Current *pv value
3480 * @param ppv Pointer to the pointer variable.
3481 * @param pv The pointer value to assign to *ppv. If NULL use
3482 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3483 *
3484 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3485 * NULL.
3486 */
3487#if RT_GNUC_PREREQ(4, 2)
3488# define ASMAtomicUoWritePtr(ppv, pv) \
3489 do \
3490 { \
3491 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3492 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3493 \
3494 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3495 AssertCompile(sizeof(pv) == sizeof(void *)); \
3496 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3497 \
3498 *(ppvTypeChecked) = pvTypeChecked; \
3499 } while (0)
3500#else
3501# define ASMAtomicUoWritePtr(ppv, pv) \
3502 do \
3503 { \
3504 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3505 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3506 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3507 *(ppv) = pv; \
3508 } while (0)
3509#endif
3510
3511
3512/**
3513 * Atomically sets a pointer to NULL, unordered.
3514 *
3515 * @param ppv Pointer to the pointer variable that should be set to NULL.
3516 *
3517 * @remarks This is relatively type safe on GCC platforms.
3518 */
3519#ifdef __GNUC__
3520# define ASMAtomicUoWriteNullPtr(ppv) \
3521 do \
3522 { \
3523 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3524 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3525 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3526 *(ppvTypeChecked) = NULL; \
3527 } while (0)
3528#else
3529# define ASMAtomicUoWriteNullPtr(ppv) \
3530 do \
3531 { \
3532 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3533 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3534 *(ppv) = NULL; \
3535 } while (0)
3536#endif
3537
3538
3539/**
3540 * Atomically write a typical IPRT handle value, ordered.
3541 *
3542 * @param ph Pointer to the variable to update.
3543 * @param hNew The value to assign to *ph.
3544 *
3545 * @remarks This doesn't currently work for all handles (like RTFILE).
3546 */
3547#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3548# define ASMAtomicWriteHandle(ph, hNew) \
3549 do { \
3550 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3551 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3552 } while (0)
3553#elif HC_ARCH_BITS == 64
3554# define ASMAtomicWriteHandle(ph, hNew) \
3555 do { \
3556 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3557 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3558 } while (0)
3559#else
3560# error HC_ARCH_BITS
3561#endif
3562
3563
3564/**
3565 * Atomically write a typical IPRT handle value, unordered.
3566 *
3567 * @param ph Pointer to the variable to update.
3568 * @param hNew The value to assign to *ph.
3569 *
3570 * @remarks This doesn't currently work for all handles (like RTFILE).
3571 */
3572#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3573# define ASMAtomicUoWriteHandle(ph, hNew) \
3574 do { \
3575 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3576 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3577 } while (0)
3578#elif HC_ARCH_BITS == 64
3579# define ASMAtomicUoWriteHandle(ph, hNew) \
3580 do { \
3581 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3582 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
3583 } while (0)
3584#else
3585# error HC_ARCH_BITS
3586#endif
3587
3588
3589/**
3590 * Atomically write a value which size might differ
3591 * between platforms or compilers, ordered.
3592 *
3593 * @param pu Pointer to the variable to update.
3594 * @param uNew The value to assign to *pu.
3595 */
3596#define ASMAtomicWriteSize(pu, uNew) \
3597 do { \
3598 switch (sizeof(*(pu))) { \
3599 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3600 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3601 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3602 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3603 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3604 } \
3605 } while (0)
3606
3607/**
3608 * Atomically write a value which size might differ
3609 * between platforms or compilers, unordered.
3610 *
3611 * @param pu Pointer to the variable to update.
3612 * @param uNew The value to assign to *pu.
3613 */
3614#define ASMAtomicUoWriteSize(pu, uNew) \
3615 do { \
3616 switch (sizeof(*(pu))) { \
3617 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3618 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3619 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3620 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3621 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3622 } \
3623 } while (0)
3624
3625
3626
3627/**
3628 * Atomically exchanges and adds to a 16-bit value, ordered.
3629 *
3630 * @returns The old value.
3631 * @param pu16 Pointer to the value.
3632 * @param u16 Number to add.
3633 *
3634 * @remarks Currently not implemented, just to make 16-bit code happy.
3635 * @remarks x86: Requires a 486 or later.
3636 */
3637RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
3638
3639
3640/**
3641 * Atomically exchanges and adds to a 32-bit value, ordered.
3642 *
3643 * @returns The old value.
3644 * @param pu32 Pointer to the value.
3645 * @param u32 Number to add.
3646 *
3647 * @remarks x86: Requires a 486 or later.
3648 */
3649#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3650RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3651#else
3652DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3653{
3654# if RT_INLINE_ASM_USES_INTRIN
3655 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
3656 return u32;
3657
3658# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3659# if RT_INLINE_ASM_GNU_STYLE
3660 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3661 : "=r" (u32)
3662 , "=m" (*pu32)
3663 : "0" (u32)
3664 , "m" (*pu32)
3665 : "memory"
3666 , "cc");
3667 return u32;
3668# else
3669 __asm
3670 {
3671 mov eax, [u32]
3672# ifdef RT_ARCH_AMD64
3673 mov rdx, [pu32]
3674 lock xadd [rdx], eax
3675# else
3676 mov edx, [pu32]
3677 lock xadd [edx], eax
3678# endif
3679 mov [u32], eax
3680 }
3681 return u32;
3682# endif
3683
3684# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3685 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
3686 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
3687 "add %[uNew], %[uOld], %[uVal]\n\t",
3688 [uVal] "r" (u32));
3689 return u32OldRet;
3690
3691# else
3692# error "Port me"
3693# endif
3694}
3695#endif
3696
3697
3698/**
3699 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3700 *
3701 * @returns The old value.
3702 * @param pi32 Pointer to the value.
3703 * @param i32 Number to add.
3704 *
3705 * @remarks x86: Requires a 486 or later.
3706 */
3707DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3708{
3709 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3710}
3711
3712
3713/**
3714 * Atomically exchanges and adds to a 64-bit value, ordered.
3715 *
3716 * @returns The old value.
3717 * @param pu64 Pointer to the value.
3718 * @param u64 Number to add.
3719 *
3720 * @remarks x86: Requires a Pentium or later.
3721 */
3722#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3723DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3724#else
3725DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3726{
3727# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3728 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
3729 return u64;
3730
3731# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3732 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3733 : "=r" (u64)
3734 , "=m" (*pu64)
3735 : "0" (u64)
3736 , "m" (*pu64)
3737 : "memory"
3738 , "cc");
3739 return u64;
3740
3741# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3742 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
3743 "add %[uNew], %[uOld], %[uVal]\n\t"
3744 ,
3745 "add %[uNew], %[uOld], %[uVal]\n\t"
3746 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
3747 [uVal] "r" (u64));
3748 return u64OldRet;
3749
3750# else
3751 uint64_t u64Old;
3752 for (;;)
3753 {
3754 uint64_t u64New;
3755 u64Old = ASMAtomicUoReadU64(pu64);
3756 u64New = u64Old + u64;
3757 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3758 break;
3759 ASMNopPause();
3760 }
3761 return u64Old;
3762# endif
3763}
3764#endif
3765
3766
3767/**
3768 * Atomically exchanges and adds to a signed 64-bit value, ordered.
3769 *
3770 * @returns The old value.
3771 * @param pi64 Pointer to the value.
3772 * @param i64 Number to add.
3773 *
3774 * @remarks x86: Requires a Pentium or later.
3775 */
3776DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3777{
3778 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3779}
3780
3781
3782/**
3783 * Atomically exchanges and adds to a size_t value, ordered.
3784 *
3785 * @returns The old value.
3786 * @param pcb Pointer to the size_t value.
3787 * @param cb Number to add.
3788 */
3789DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3790{
3791#if ARCH_BITS == 64
3792 AssertCompileSize(size_t, 8);
3793 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
3794#elif ARCH_BITS == 32
3795 AssertCompileSize(size_t, 4);
3796 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
3797#elif ARCH_BITS == 16
3798 AssertCompileSize(size_t, 2);
3799 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
3800#else
3801# error "Unsupported ARCH_BITS value"
3802#endif
3803}
3804
3805
3806/**
3807 * Atomically exchanges and adds a value which size might differ between
3808 * platforms or compilers, ordered.
3809 *
3810 * @param pu Pointer to the variable to update.
3811 * @param uNew The value to add to *pu.
3812 * @param puOld Where to store the old value.
3813 */
3814#define ASMAtomicAddSize(pu, uNew, puOld) \
3815 do { \
3816 switch (sizeof(*(pu))) { \
3817 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3818 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3819 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
3820 } \
3821 } while (0)
3822
3823
3824
3825/**
3826 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
3827 *
3828 * @returns The old value.
3829 * @param pu16 Pointer to the value.
3830 * @param u16 Number to subtract.
3831 *
3832 * @remarks x86: Requires a 486 or later.
3833 */
3834DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
3835{
3836 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
3837}
3838
3839
3840/**
3841 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
3842 *
3843 * @returns The old value.
3844 * @param pi16 Pointer to the value.
3845 * @param i16 Number to subtract.
3846 *
3847 * @remarks x86: Requires a 486 or later.
3848 */
3849DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3850{
3851 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
3852}
3853
3854
3855/**
3856 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3857 *
3858 * @returns The old value.
3859 * @param pu32 Pointer to the value.
3860 * @param u32 Number to subtract.
3861 *
3862 * @remarks x86: Requires a 486 or later.
3863 */
3864DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3865{
3866 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
3867}
3868
3869
3870/**
3871 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3872 *
3873 * @returns The old value.
3874 * @param pi32 Pointer to the value.
3875 * @param i32 Number to subtract.
3876 *
3877 * @remarks x86: Requires a 486 or later.
3878 */
3879DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3880{
3881 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
3882}
3883
3884
3885/**
3886 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
3887 *
3888 * @returns The old value.
3889 * @param pu64 Pointer to the value.
3890 * @param u64 Number to subtract.
3891 *
3892 * @remarks x86: Requires a Pentium or later.
3893 */
3894DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3895{
3896 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
3897}
3898
3899
3900/**
3901 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
3902 *
3903 * @returns The old value.
3904 * @param pi64 Pointer to the value.
3905 * @param i64 Number to subtract.
3906 *
3907 * @remarks x86: Requires a Pentium or later.
3908 */
3909DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3910{
3911 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
3912}
3913
3914
3915/**
3916 * Atomically exchanges and subtracts to a size_t value, ordered.
3917 *
3918 * @returns The old value.
3919 * @param pcb Pointer to the size_t value.
3920 * @param cb Number to subtract.
3921 *
3922 * @remarks x86: Requires a 486 or later.
3923 */
3924DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3925{
3926#if ARCH_BITS == 64
3927 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
3928#elif ARCH_BITS == 32
3929 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
3930#elif ARCH_BITS == 16
3931 AssertCompileSize(size_t, 2);
3932 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
3933#else
3934# error "Unsupported ARCH_BITS value"
3935#endif
3936}
3937
3938
3939/**
3940 * Atomically exchanges and subtracts a value which size might differ between
3941 * platforms or compilers, ordered.
3942 *
3943 * @param pu Pointer to the variable to update.
3944 * @param uNew The value to subtract to *pu.
3945 * @param puOld Where to store the old value.
3946 *
3947 * @remarks x86: Requires a 486 or later.
3948 */
3949#define ASMAtomicSubSize(pu, uNew, puOld) \
3950 do { \
3951 switch (sizeof(*(pu))) { \
3952 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3953 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3954 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3955 } \
3956 } while (0)
3957
3958
3959
3960/**
3961 * Atomically increment a 16-bit value, ordered.
3962 *
3963 * @returns The new value.
3964 * @param pu16 Pointer to the value to increment.
3965 * @remarks Not implemented. Just to make 16-bit code happy.
3966 *
3967 * @remarks x86: Requires a 486 or later.
3968 */
3969RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
3970
3971
3972/**
3973 * Atomically increment a 32-bit value, ordered.
3974 *
3975 * @returns The new value.
3976 * @param pu32 Pointer to the value to increment.
3977 *
3978 * @remarks x86: Requires a 486 or later.
3979 */
3980#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3981RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3982#else
3983DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
3984{
3985# if RT_INLINE_ASM_USES_INTRIN
3986 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
3987
3988# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3989# if RT_INLINE_ASM_GNU_STYLE
3990 uint32_t u32;
3991 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3992 : "=r" (u32)
3993 , "=m" (*pu32)
3994 : "0" (1)
3995 , "m" (*pu32)
3996 : "memory"
3997 , "cc");
3998 return u32+1;
3999# else
4000 __asm
4001 {
4002 mov eax, 1
4003# ifdef RT_ARCH_AMD64
4004 mov rdx, [pu32]
4005 lock xadd [rdx], eax
4006# else
4007 mov edx, [pu32]
4008 lock xadd [edx], eax
4009# endif
4010 mov u32, eax
4011 }
4012 return u32+1;
4013# endif
4014
4015# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4016 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
4017 "add %w[uNew], %w[uNew], #1\n\t",
4018 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4019 "X" (0) /* dummy */);
4020 return u32NewRet;
4021
4022# else
4023 return ASMAtomicAddU32(pu32, 1) + 1;
4024# endif
4025}
4026#endif
4027
4028
4029/**
4030 * Atomically increment a signed 32-bit value, ordered.
4031 *
4032 * @returns The new value.
4033 * @param pi32 Pointer to the value to increment.
4034 *
4035 * @remarks x86: Requires a 486 or later.
4036 */
4037DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4038{
4039 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
4040}
4041
4042
4043/**
4044 * Atomically increment a 64-bit value, ordered.
4045 *
4046 * @returns The new value.
4047 * @param pu64 Pointer to the value to increment.
4048 *
4049 * @remarks x86: Requires a Pentium or later.
4050 */
4051#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4052DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4053#else
4054DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4055{
4056# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4057 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
4058
4059# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4060 uint64_t u64;
4061 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4062 : "=r" (u64)
4063 , "=m" (*pu64)
4064 : "0" (1)
4065 , "m" (*pu64)
4066 : "memory"
4067 , "cc");
4068 return u64 + 1;
4069
4070# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4071 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
4072 "add %[uNew], %[uNew], #1\n\t"
4073 ,
4074 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4075 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4076 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4077 return u64NewRet;
4078
4079# else
4080 return ASMAtomicAddU64(pu64, 1) + 1;
4081# endif
4082}
4083#endif
4084
4085
4086/**
4087 * Atomically increment a signed 64-bit value, ordered.
4088 *
4089 * @returns The new value.
4090 * @param pi64 Pointer to the value to increment.
4091 *
4092 * @remarks x86: Requires a Pentium or later.
4093 */
4094DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4095{
4096 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
4097}
4098
4099
4100/**
4101 * Atomically increment a size_t value, ordered.
4102 *
4103 * @returns The new value.
4104 * @param pcb Pointer to the value to increment.
4105 *
4106 * @remarks x86: Requires a 486 or later.
4107 */
4108DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4109{
4110#if ARCH_BITS == 64
4111 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
4112#elif ARCH_BITS == 32
4113 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
4114#elif ARCH_BITS == 16
4115 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
4116#else
4117# error "Unsupported ARCH_BITS value"
4118#endif
4119}
4120
4121
4122
4123/**
4124 * Atomically decrement an unsigned 32-bit value, ordered.
4125 *
4126 * @returns The new value.
4127 * @param pu16 Pointer to the value to decrement.
4128 * @remarks Not implemented. Just to make 16-bit code happy.
4129 *
4130 * @remarks x86: Requires a 486 or later.
4131 */
4132RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4133
4134
4135/**
4136 * Atomically decrement an unsigned 32-bit value, ordered.
4137 *
4138 * @returns The new value.
4139 * @param pu32 Pointer to the value to decrement.
4140 *
4141 * @remarks x86: Requires a 486 or later.
4142 */
4143#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4144RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4145#else
4146DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4147{
4148# if RT_INLINE_ASM_USES_INTRIN
4149 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4150
4151# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4152# if RT_INLINE_ASM_GNU_STYLE
4153 uint32_t u32;
4154 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4155 : "=r" (u32)
4156 , "=m" (*pu32)
4157 : "0" (-1)
4158 , "m" (*pu32)
4159 : "memory"
4160 , "cc");
4161 return u32-1;
4162# else
4163 uint32_t u32;
4164 __asm
4165 {
4166 mov eax, -1
4167# ifdef RT_ARCH_AMD64
4168 mov rdx, [pu32]
4169 lock xadd [rdx], eax
4170# else
4171 mov edx, [pu32]
4172 lock xadd [edx], eax
4173# endif
4174 mov u32, eax
4175 }
4176 return u32-1;
4177# endif
4178
4179# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4180 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4181 "sub %w[uNew], %w[uNew], #1\n\t",
4182 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4183 "X" (0) /* dummy */);
4184 return u32NewRet;
4185
4186# else
4187 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4188# endif
4189}
4190#endif
4191
4192
4193/**
4194 * Atomically decrement a signed 32-bit value, ordered.
4195 *
4196 * @returns The new value.
4197 * @param pi32 Pointer to the value to decrement.
4198 *
4199 * @remarks x86: Requires a 486 or later.
4200 */
4201DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4202{
4203 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4204}
4205
4206
4207/**
4208 * Atomically decrement an unsigned 64-bit value, ordered.
4209 *
4210 * @returns The new value.
4211 * @param pu64 Pointer to the value to decrement.
4212 *
4213 * @remarks x86: Requires a Pentium or later.
4214 */
4215#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4216RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4217#else
4218DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4219{
4220# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4221 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4222
4223# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4224 uint64_t u64;
4225 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4226 : "=r" (u64)
4227 , "=m" (*pu64)
4228 : "0" (~(uint64_t)0)
4229 , "m" (*pu64)
4230 : "memory"
4231 , "cc");
4232 return u64-1;
4233
4234# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4235 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4236 "sub %[uNew], %[uNew], #1\n\t"
4237 ,
4238 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4239 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4240 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4241 return u64NewRet;
4242
4243# else
4244 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4245# endif
4246}
4247#endif
4248
4249
4250/**
4251 * Atomically decrement a signed 64-bit value, ordered.
4252 *
4253 * @returns The new value.
4254 * @param pi64 Pointer to the value to decrement.
4255 *
4256 * @remarks x86: Requires a Pentium or later.
4257 */
4258DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4259{
4260 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4261}
4262
4263
4264/**
4265 * Atomically decrement a size_t value, ordered.
4266 *
4267 * @returns The new value.
4268 * @param pcb Pointer to the value to decrement.
4269 *
4270 * @remarks x86: Requires a 486 or later.
4271 */
4272DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4273{
4274#if ARCH_BITS == 64
4275 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4276#elif ARCH_BITS == 32
4277 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4278#elif ARCH_BITS == 16
4279 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4280#else
4281# error "Unsupported ARCH_BITS value"
4282#endif
4283}
4284
4285
4286/**
4287 * Atomically Or an unsigned 32-bit value, ordered.
4288 *
4289 * @param pu32 Pointer to the pointer variable to OR u32 with.
4290 * @param u32 The value to OR *pu32 with.
4291 *
4292 * @remarks x86: Requires a 386 or later.
4293 */
4294#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4295RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4296#else
4297DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4298{
4299# if RT_INLINE_ASM_USES_INTRIN
4300 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4301
4302# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4303# if RT_INLINE_ASM_GNU_STYLE
4304 __asm__ __volatile__("lock; orl %1, %0\n\t"
4305 : "=m" (*pu32)
4306 : "ir" (u32)
4307 , "m" (*pu32)
4308 : "cc");
4309# else
4310 __asm
4311 {
4312 mov eax, [u32]
4313# ifdef RT_ARCH_AMD64
4314 mov rdx, [pu32]
4315 lock or [rdx], eax
4316# else
4317 mov edx, [pu32]
4318 lock or [edx], eax
4319# endif
4320 }
4321# endif
4322
4323# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4324 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4325 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4326 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4327 "orr %[uNew], %[uNew], %[uVal]\n\t",
4328 [uVal] "r" (u32));
4329
4330# else
4331# error "Port me"
4332# endif
4333}
4334#endif
4335
4336
4337/**
4338 * Atomically OR an unsigned 32-bit value, ordered, extended version (for bitmap
4339 * fallback).
4340 *
4341 * @returns Old value.
4342 * @param pu32 Pointer to the variable to OR @a u32 with.
4343 * @param u32 The value to OR @a *pu32 with.
4344 */
4345DECLINLINE(uint32_t) ASMAtomicOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4346{
4347#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4348 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicOrEx32, pu32, DMB_SY,
4349 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4350 "orr %[uNew], %[uOld], %[uVal]\n\t",
4351 [uVal] "r" (u32));
4352 return u32OldRet;
4353
4354#else
4355 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4356 uint32_t u32New;
4357 do
4358 u32New = u32RetOld | u32;
4359 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4360 return u32RetOld;
4361#endif
4362}
4363
4364
4365/**
4366 * Atomically Or a signed 32-bit value, ordered.
4367 *
4368 * @param pi32 Pointer to the pointer variable to OR u32 with.
4369 * @param i32 The value to OR *pu32 with.
4370 *
4371 * @remarks x86: Requires a 386 or later.
4372 */
4373DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4374{
4375 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4376}
4377
4378
4379/**
4380 * Atomically Or an unsigned 64-bit value, ordered.
4381 *
4382 * @param pu64 Pointer to the pointer variable to OR u64 with.
4383 * @param u64 The value to OR *pu64 with.
4384 *
4385 * @remarks x86: Requires a Pentium or later.
4386 */
4387#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4388DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4389#else
4390DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4391{
4392# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4393 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4394
4395# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4396 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4397 : "=m" (*pu64)
4398 : "r" (u64)
4399 , "m" (*pu64)
4400 : "cc");
4401
4402# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4403 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4404 "orr %[uNew], %[uNew], %[uVal]\n\t"
4405 ,
4406 "orr %[uNew], %[uNew], %[uVal]\n\t"
4407 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4408 [uVal] "r" (u64));
4409
4410# else
4411 for (;;)
4412 {
4413 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4414 uint64_t u64New = u64Old | u64;
4415 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4416 break;
4417 ASMNopPause();
4418 }
4419# endif
4420}
4421#endif
4422
4423
4424/**
4425 * Atomically Or a signed 64-bit value, ordered.
4426 *
4427 * @param pi64 Pointer to the pointer variable to OR u64 with.
4428 * @param i64 The value to OR *pu64 with.
4429 *
4430 * @remarks x86: Requires a Pentium or later.
4431 */
4432DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4433{
4434 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4435}
4436
4437
4438/**
4439 * Atomically And an unsigned 32-bit value, ordered.
4440 *
4441 * @param pu32 Pointer to the pointer variable to AND u32 with.
4442 * @param u32 The value to AND *pu32 with.
4443 *
4444 * @remarks x86: Requires a 386 or later.
4445 */
4446#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4447RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4448#else
4449DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4450{
4451# if RT_INLINE_ASM_USES_INTRIN
4452 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4453
4454# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4455# if RT_INLINE_ASM_GNU_STYLE
4456 __asm__ __volatile__("lock; andl %1, %0\n\t"
4457 : "=m" (*pu32)
4458 : "ir" (u32)
4459 , "m" (*pu32)
4460 : "cc");
4461# else
4462 __asm
4463 {
4464 mov eax, [u32]
4465# ifdef RT_ARCH_AMD64
4466 mov rdx, [pu32]
4467 lock and [rdx], eax
4468# else
4469 mov edx, [pu32]
4470 lock and [edx], eax
4471# endif
4472 }
4473# endif
4474
4475# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4476 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4477 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4478 "and %[uNew], %[uNew], %[uVal]\n\t",
4479 [uVal] "r" (u32));
4480
4481# else
4482# error "Port me"
4483# endif
4484}
4485#endif
4486
4487
4488/**
4489 * Atomically AND an unsigned 32-bit value, ordered, extended version.
4490 *
4491 * @returns Old value.
4492 * @param pu32 Pointer to the variable to AND @a u32 with.
4493 * @param u32 The value to AND @a *pu32 with.
4494 */
4495DECLINLINE(uint32_t) ASMAtomicAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4496{
4497#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4498 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAndEx32, pu32, DMB_SY,
4499 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4500 "and %[uNew], %[uOld], %[uVal]\n\t",
4501 [uVal] "r" (u32));
4502 return u32OldRet;
4503
4504#else
4505 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4506 uint32_t u32New;
4507 do
4508 u32New = u32RetOld & u32;
4509 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4510 return u32RetOld;
4511#endif
4512}
4513
4514
4515/**
4516 * Atomically And a signed 32-bit value, ordered.
4517 *
4518 * @param pi32 Pointer to the pointer variable to AND i32 with.
4519 * @param i32 The value to AND *pi32 with.
4520 *
4521 * @remarks x86: Requires a 386 or later.
4522 */
4523DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4524{
4525 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4526}
4527
4528
4529/**
4530 * Atomically And an unsigned 64-bit value, ordered.
4531 *
4532 * @param pu64 Pointer to the pointer variable to AND u64 with.
4533 * @param u64 The value to AND *pu64 with.
4534 *
4535 * @remarks x86: Requires a Pentium or later.
4536 */
4537#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4538DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4539#else
4540DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4541{
4542# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4543 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4544
4545# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4546 __asm__ __volatile__("lock; andq %1, %0\n\t"
4547 : "=m" (*pu64)
4548 : "r" (u64)
4549 , "m" (*pu64)
4550 : "cc");
4551
4552# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4553 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
4554 "and %[uNew], %[uNew], %[uVal]\n\t"
4555 ,
4556 "and %[uNew], %[uNew], %[uVal]\n\t"
4557 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4558 [uVal] "r" (u64));
4559
4560# else
4561 for (;;)
4562 {
4563 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4564 uint64_t u64New = u64Old & u64;
4565 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4566 break;
4567 ASMNopPause();
4568 }
4569# endif
4570}
4571#endif
4572
4573
4574/**
4575 * Atomically And a signed 64-bit value, ordered.
4576 *
4577 * @param pi64 Pointer to the pointer variable to AND i64 with.
4578 * @param i64 The value to AND *pi64 with.
4579 *
4580 * @remarks x86: Requires a Pentium or later.
4581 */
4582DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4583{
4584 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4585}
4586
4587
4588/**
4589 * Atomically XOR an unsigned 32-bit value and a memory location, ordered.
4590 *
4591 * @param pu32 Pointer to the variable to XOR @a u32 with.
4592 * @param u32 The value to XOR @a *pu32 with.
4593 *
4594 * @remarks x86: Requires a 386 or later.
4595 */
4596#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4597RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4598#else
4599DECLINLINE(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4600{
4601# if RT_INLINE_ASM_USES_INTRIN
4602 _InterlockedXor((long volatile RT_FAR *)pu32, u32);
4603
4604# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4605# if RT_INLINE_ASM_GNU_STYLE
4606 __asm__ __volatile__("lock; xorl %1, %0\n\t"
4607 : "=m" (*pu32)
4608 : "ir" (u32)
4609 , "m" (*pu32)
4610 : "cc");
4611# else
4612 __asm
4613 {
4614 mov eax, [u32]
4615# ifdef RT_ARCH_AMD64
4616 mov rdx, [pu32]
4617 lock xor [rdx], eax
4618# else
4619 mov edx, [pu32]
4620 lock xor [edx], eax
4621# endif
4622 }
4623# endif
4624
4625# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4626 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicXor32, pu32, DMB_SY,
4627 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
4628 "eor %[uNew], %[uNew], %[uVal]\n\t",
4629 [uVal] "r" (u32));
4630
4631# else
4632# error "Port me"
4633# endif
4634}
4635#endif
4636
4637
4638/**
4639 * Atomically XOR an unsigned 32-bit value and a memory location, ordered,
4640 * extended version (for bitmaps).
4641 *
4642 * @returns Old value.
4643 * @param pu32 Pointer to the variable to XOR @a u32 with.
4644 * @param u32 The value to XOR @a *pu32 with.
4645 */
4646DECLINLINE(uint32_t) ASMAtomicXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4647{
4648#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4649 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicXorEx32, pu32, DMB_SY,
4650 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
4651 "eor %[uNew], %[uOld], %[uVal]\n\t",
4652 [uVal] "r" (u32));
4653 return u32OldRet;
4654
4655#else
4656 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4657 uint32_t u32New;
4658 do
4659 u32New = u32RetOld ^ u32;
4660 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4661 return u32RetOld;
4662#endif
4663}
4664
4665
4666/**
4667 * Atomically XOR a signed 32-bit value, ordered.
4668 *
4669 * @param pi32 Pointer to the variable to XOR i32 with.
4670 * @param i32 The value to XOR *pi32 with.
4671 *
4672 * @remarks x86: Requires a 386 or later.
4673 */
4674DECLINLINE(void) ASMAtomicXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4675{
4676 ASMAtomicXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4677}
4678
4679
4680/**
4681 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
4682 *
4683 * @param pu32 Pointer to the pointer variable to OR u32 with.
4684 * @param u32 The value to OR *pu32 with.
4685 *
4686 * @remarks x86: Requires a 386 or later.
4687 */
4688#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4689RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4690#else
4691DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4692{
4693# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4694# if RT_INLINE_ASM_GNU_STYLE
4695 __asm__ __volatile__("orl %1, %0\n\t"
4696 : "=m" (*pu32)
4697 : "ir" (u32)
4698 , "m" (*pu32)
4699 : "cc");
4700# else
4701 __asm
4702 {
4703 mov eax, [u32]
4704# ifdef RT_ARCH_AMD64
4705 mov rdx, [pu32]
4706 or [rdx], eax
4707# else
4708 mov edx, [pu32]
4709 or [edx], eax
4710# endif
4711 }
4712# endif
4713
4714# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4715 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
4716 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4717 "orr %[uNew], %[uNew], %[uVal]\n\t",
4718 [uVal] "r" (u32));
4719
4720# else
4721# error "Port me"
4722# endif
4723}
4724#endif
4725
4726
4727/**
4728 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe,
4729 * extended version (for bitmap fallback).
4730 *
4731 * @returns Old value.
4732 * @param pu32 Pointer to the variable to OR @a u32 with.
4733 * @param u32 The value to OR @a *pu32 with.
4734 */
4735DECLINLINE(uint32_t) ASMAtomicUoOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4736{
4737#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4738 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoOrExU32, pu32, NO_BARRIER,
4739 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4740 "orr %[uNew], %[uOld], %[uVal]\n\t",
4741 [uVal] "r" (u32));
4742 return u32OldRet;
4743
4744#else
4745 return ASMAtomicOrExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
4746#endif
4747}
4748
4749
4750/**
4751 * Atomically OR a signed 32-bit value, unordered.
4752 *
4753 * @param pi32 Pointer to the pointer variable to OR u32 with.
4754 * @param i32 The value to OR *pu32 with.
4755 *
4756 * @remarks x86: Requires a 386 or later.
4757 */
4758DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4759{
4760 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4761}
4762
4763
4764/**
4765 * Atomically OR an unsigned 64-bit value, unordered.
4766 *
4767 * @param pu64 Pointer to the pointer variable to OR u64 with.
4768 * @param u64 The value to OR *pu64 with.
4769 *
4770 * @remarks x86: Requires a Pentium or later.
4771 */
4772#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4773DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4774#else
4775DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4776{
4777# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4778 __asm__ __volatile__("orq %1, %q0\n\t"
4779 : "=m" (*pu64)
4780 : "r" (u64)
4781 , "m" (*pu64)
4782 : "cc");
4783
4784# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4785 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
4786 "orr %[uNew], %[uNew], %[uVal]\n\t"
4787 ,
4788 "orr %[uNew], %[uNew], %[uVal]\n\t"
4789 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4790 [uVal] "r" (u64));
4791
4792# else
4793 for (;;)
4794 {
4795 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4796 uint64_t u64New = u64Old | u64;
4797 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4798 break;
4799 ASMNopPause();
4800 }
4801# endif
4802}
4803#endif
4804
4805
4806/**
4807 * Atomically Or a signed 64-bit value, unordered.
4808 *
4809 * @param pi64 Pointer to the pointer variable to OR u64 with.
4810 * @param i64 The value to OR *pu64 with.
4811 *
4812 * @remarks x86: Requires a Pentium or later.
4813 */
4814DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4815{
4816 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4817}
4818
4819
4820/**
4821 * Atomically And an unsigned 32-bit value, unordered.
4822 *
4823 * @param pu32 Pointer to the pointer variable to AND u32 with.
4824 * @param u32 The value to AND *pu32 with.
4825 *
4826 * @remarks x86: Requires a 386 or later.
4827 */
4828#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4829RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4830#else
4831DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4832{
4833# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4834# if RT_INLINE_ASM_GNU_STYLE
4835 __asm__ __volatile__("andl %1, %0\n\t"
4836 : "=m" (*pu32)
4837 : "ir" (u32)
4838 , "m" (*pu32)
4839 : "cc");
4840# else
4841 __asm
4842 {
4843 mov eax, [u32]
4844# ifdef RT_ARCH_AMD64
4845 mov rdx, [pu32]
4846 and [rdx], eax
4847# else
4848 mov edx, [pu32]
4849 and [edx], eax
4850# endif
4851 }
4852# endif
4853
4854# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4855 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
4856 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4857 "and %[uNew], %[uNew], %[uVal]\n\t",
4858 [uVal] "r" (u32));
4859
4860# else
4861# error "Port me"
4862# endif
4863}
4864#endif
4865
4866
4867/**
4868 * Atomically AND an unsigned 32-bit value, unordered, extended version (for
4869 * bitmap fallback).
4870 *
4871 * @returns Old value.
4872 * @param pu32 Pointer to the pointer to AND @a u32 with.
4873 * @param u32 The value to AND @a *pu32 with.
4874 */
4875DECLINLINE(uint32_t) ASMAtomicUoAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4876{
4877#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4878 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoAndEx32, pu32, NO_BARRIER,
4879 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4880 "and %[uNew], %[uOld], %[uVal]\n\t",
4881 [uVal] "r" (u32));
4882 return u32OldRet;
4883
4884#else
4885 return ASMAtomicAndExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
4886#endif
4887}
4888
4889
4890/**
4891 * Atomically And a signed 32-bit value, unordered.
4892 *
4893 * @param pi32 Pointer to the pointer variable to AND i32 with.
4894 * @param i32 The value to AND *pi32 with.
4895 *
4896 * @remarks x86: Requires a 386 or later.
4897 */
4898DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4899{
4900 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4901}
4902
4903
4904/**
4905 * Atomically And an unsigned 64-bit value, unordered.
4906 *
4907 * @param pu64 Pointer to the pointer variable to AND u64 with.
4908 * @param u64 The value to AND *pu64 with.
4909 *
4910 * @remarks x86: Requires a Pentium or later.
4911 */
4912#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4913DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4914#else
4915DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4916{
4917# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4918 __asm__ __volatile__("andq %1, %0\n\t"
4919 : "=m" (*pu64)
4920 : "r" (u64)
4921 , "m" (*pu64)
4922 : "cc");
4923
4924# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4925 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
4926 "and %[uNew], %[uNew], %[uVal]\n\t"
4927 ,
4928 "and %[uNew], %[uNew], %[uVal]\n\t"
4929 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4930 [uVal] "r" (u64));
4931
4932# else
4933 for (;;)
4934 {
4935 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4936 uint64_t u64New = u64Old & u64;
4937 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4938 break;
4939 ASMNopPause();
4940 }
4941# endif
4942}
4943#endif
4944
4945
4946/**
4947 * Atomically And a signed 64-bit value, unordered.
4948 *
4949 * @param pi64 Pointer to the pointer variable to AND i64 with.
4950 * @param i64 The value to AND *pi64 with.
4951 *
4952 * @remarks x86: Requires a Pentium or later.
4953 */
4954DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4955{
4956 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4957}
4958
4959
4960/**
4961 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe.
4962 *
4963 * @param pu32 Pointer to the variable to XOR @a u32 with.
4964 * @param u32 The value to OR @a *pu32 with.
4965 *
4966 * @remarks x86: Requires a 386 or later.
4967 */
4968#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4969RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4970#else
4971DECLINLINE(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4972{
4973# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4974# if RT_INLINE_ASM_GNU_STYLE
4975 __asm__ __volatile__("xorl %1, %0\n\t"
4976 : "=m" (*pu32)
4977 : "ir" (u32)
4978 , "m" (*pu32)
4979 : "cc");
4980# else
4981 __asm
4982 {
4983 mov eax, [u32]
4984# ifdef RT_ARCH_AMD64
4985 mov rdx, [pu32]
4986 xor [rdx], eax
4987# else
4988 mov edx, [pu32]
4989 xor [edx], eax
4990# endif
4991 }
4992# endif
4993
4994# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4995 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoXorU32, pu32, NO_BARRIER,
4996 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
4997 "eor %[uNew], %[uNew], %[uVal]\n\t",
4998 [uVal] "r" (u32));
4999
5000# else
5001# error "Port me"
5002# endif
5003}
5004#endif
5005
5006
5007/**
5008 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe,
5009 * extended version (for bitmap fallback).
5010 *
5011 * @returns Old value.
5012 * @param pu32 Pointer to the variable to XOR @a u32 with.
5013 * @param u32 The value to OR @a *pu32 with.
5014 */
5015DECLINLINE(uint32_t) ASMAtomicUoXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5016{
5017#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5018 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoXorExU32, pu32, NO_BARRIER,
5019 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5020 "eor %[uNew], %[uOld], %[uVal]\n\t",
5021 [uVal] "r" (u32));
5022 return u32OldRet;
5023
5024#else
5025 return ASMAtomicXorExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5026#endif
5027}
5028
5029
5030/**
5031 * Atomically XOR a signed 32-bit value, unordered.
5032 *
5033 * @param pi32 Pointer to the variable to XOR @a u32 with.
5034 * @param i32 The value to XOR @a *pu32 with.
5035 *
5036 * @remarks x86: Requires a 386 or later.
5037 */
5038DECLINLINE(void) ASMAtomicUoXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5039{
5040 ASMAtomicUoXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5041}
5042
5043
5044/**
5045 * Atomically increment an unsigned 32-bit value, unordered.
5046 *
5047 * @returns the new value.
5048 * @param pu32 Pointer to the variable to increment.
5049 *
5050 * @remarks x86: Requires a 486 or later.
5051 */
5052#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5053RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5054#else
5055DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5056{
5057# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5058 uint32_t u32;
5059# if RT_INLINE_ASM_GNU_STYLE
5060 __asm__ __volatile__("xaddl %0, %1\n\t"
5061 : "=r" (u32)
5062 , "=m" (*pu32)
5063 : "0" (1)
5064 , "m" (*pu32)
5065 : "memory" /** @todo why 'memory'? */
5066 , "cc");
5067 return u32 + 1;
5068# else
5069 __asm
5070 {
5071 mov eax, 1
5072# ifdef RT_ARCH_AMD64
5073 mov rdx, [pu32]
5074 xadd [rdx], eax
5075# else
5076 mov edx, [pu32]
5077 xadd [edx], eax
5078# endif
5079 mov u32, eax
5080 }
5081 return u32 + 1;
5082# endif
5083
5084# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5085 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
5086 "add %w[uNew], %w[uNew], #1\n\t",
5087 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5088 "X" (0) /* dummy */);
5089 return u32NewRet;
5090
5091# else
5092# error "Port me"
5093# endif
5094}
5095#endif
5096
5097
5098/**
5099 * Atomically decrement an unsigned 32-bit value, unordered.
5100 *
5101 * @returns the new value.
5102 * @param pu32 Pointer to the variable to decrement.
5103 *
5104 * @remarks x86: Requires a 486 or later.
5105 */
5106#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5107RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5108#else
5109DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5110{
5111# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5112 uint32_t u32;
5113# if RT_INLINE_ASM_GNU_STYLE
5114 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
5115 : "=r" (u32)
5116 , "=m" (*pu32)
5117 : "0" (-1)
5118 , "m" (*pu32)
5119 : "memory"
5120 , "cc");
5121 return u32 - 1;
5122# else
5123 __asm
5124 {
5125 mov eax, -1
5126# ifdef RT_ARCH_AMD64
5127 mov rdx, [pu32]
5128 xadd [rdx], eax
5129# else
5130 mov edx, [pu32]
5131 xadd [edx], eax
5132# endif
5133 mov u32, eax
5134 }
5135 return u32 - 1;
5136# endif
5137
5138# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5139 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
5140 "sub %w[uNew], %w[uNew], #1\n\t",
5141 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5142 "X" (0) /* dummy */);
5143 return u32NewRet;
5144
5145# else
5146# error "Port me"
5147# endif
5148}
5149#endif
5150
5151
5152/** @def RT_ASM_PAGE_SIZE
5153 * We try avoid dragging in iprt/param.h here.
5154 * @internal
5155 */
5156#if defined(RT_ARCH_SPARC64)
5157# define RT_ASM_PAGE_SIZE 0x2000
5158# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5159# if PAGE_SIZE != 0x2000
5160# error "PAGE_SIZE is not 0x2000!"
5161# endif
5162# endif
5163#elif defined(RT_ARCH_ARM64)
5164# define RT_ASM_PAGE_SIZE 0x4000
5165# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
5166# if PAGE_SIZE != 0x4000
5167# error "PAGE_SIZE is not 0x4000!"
5168# endif
5169# endif
5170#else
5171# define RT_ASM_PAGE_SIZE 0x1000
5172# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5173# if PAGE_SIZE != 0x1000
5174# error "PAGE_SIZE is not 0x1000!"
5175# endif
5176# endif
5177#endif
5178
5179/**
5180 * Zeros a 4K memory page.
5181 *
5182 * @param pv Pointer to the memory block. This must be page aligned.
5183 */
5184#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5185RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
5186# else
5187DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
5188{
5189# if RT_INLINE_ASM_USES_INTRIN
5190# ifdef RT_ARCH_AMD64
5191 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
5192# else
5193 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
5194# endif
5195
5196# elif RT_INLINE_ASM_GNU_STYLE
5197 RTCCUINTREG uDummy;
5198# ifdef RT_ARCH_AMD64
5199 __asm__ __volatile__("rep stosq"
5200 : "=D" (pv),
5201 "=c" (uDummy)
5202 : "0" (pv),
5203 "c" (RT_ASM_PAGE_SIZE >> 3),
5204 "a" (0)
5205 : "memory");
5206# else
5207 __asm__ __volatile__("rep stosl"
5208 : "=D" (pv),
5209 "=c" (uDummy)
5210 : "0" (pv),
5211 "c" (RT_ASM_PAGE_SIZE >> 2),
5212 "a" (0)
5213 : "memory");
5214# endif
5215# else
5216 __asm
5217 {
5218# ifdef RT_ARCH_AMD64
5219 xor rax, rax
5220 mov ecx, 0200h
5221 mov rdi, [pv]
5222 rep stosq
5223# else
5224 xor eax, eax
5225 mov ecx, 0400h
5226 mov edi, [pv]
5227 rep stosd
5228# endif
5229 }
5230# endif
5231}
5232# endif
5233
5234
5235/**
5236 * Zeros a memory block with a 32-bit aligned size.
5237 *
5238 * @param pv Pointer to the memory block.
5239 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5240 */
5241#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5242RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5243#else
5244DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5245{
5246# if RT_INLINE_ASM_USES_INTRIN
5247# ifdef RT_ARCH_AMD64
5248 if (!(cb & 7))
5249 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
5250 else
5251# endif
5252 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
5253
5254# elif RT_INLINE_ASM_GNU_STYLE
5255 __asm__ __volatile__("rep stosl"
5256 : "=D" (pv),
5257 "=c" (cb)
5258 : "0" (pv),
5259 "1" (cb >> 2),
5260 "a" (0)
5261 : "memory");
5262# else
5263 __asm
5264 {
5265 xor eax, eax
5266# ifdef RT_ARCH_AMD64
5267 mov rcx, [cb]
5268 shr rcx, 2
5269 mov rdi, [pv]
5270# else
5271 mov ecx, [cb]
5272 shr ecx, 2
5273 mov edi, [pv]
5274# endif
5275 rep stosd
5276 }
5277# endif
5278}
5279#endif
5280
5281
5282/**
5283 * Fills a memory block with a 32-bit aligned size.
5284 *
5285 * @param pv Pointer to the memory block.
5286 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5287 * @param u32 The value to fill with.
5288 */
5289#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5290RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
5291#else
5292DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5293{
5294# if RT_INLINE_ASM_USES_INTRIN
5295# ifdef RT_ARCH_AMD64
5296 if (!(cb & 7))
5297 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5298 else
5299# endif
5300 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
5301
5302# elif RT_INLINE_ASM_GNU_STYLE
5303 __asm__ __volatile__("rep stosl"
5304 : "=D" (pv),
5305 "=c" (cb)
5306 : "0" (pv),
5307 "1" (cb >> 2),
5308 "a" (u32)
5309 : "memory");
5310# else
5311 __asm
5312 {
5313# ifdef RT_ARCH_AMD64
5314 mov rcx, [cb]
5315 shr rcx, 2
5316 mov rdi, [pv]
5317# else
5318 mov ecx, [cb]
5319 shr ecx, 2
5320 mov edi, [pv]
5321# endif
5322 mov eax, [u32]
5323 rep stosd
5324 }
5325# endif
5326}
5327#endif
5328
5329
5330/**
5331 * Checks if a memory block is all zeros.
5332 *
5333 * @returns Pointer to the first non-zero byte.
5334 * @returns NULL if all zero.
5335 *
5336 * @param pv Pointer to the memory block.
5337 * @param cb Number of bytes in the block.
5338 */
5339#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
5340DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5341#else
5342DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5343{
5344/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
5345 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5346 for (; cb; cb--, pb++)
5347 if (RT_LIKELY(*pb == 0))
5348 { /* likely */ }
5349 else
5350 return (void RT_FAR *)pb;
5351 return NULL;
5352}
5353#endif
5354
5355
5356/**
5357 * Checks if a memory block is all zeros.
5358 *
5359 * @returns true if zero, false if not.
5360 *
5361 * @param pv Pointer to the memory block.
5362 * @param cb Number of bytes in the block.
5363 *
5364 * @sa ASMMemFirstNonZero
5365 */
5366DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5367{
5368 return ASMMemFirstNonZero(pv, cb) == NULL;
5369}
5370
5371
5372/**
5373 * Checks if a memory page is all zeros.
5374 *
5375 * @returns true / false.
5376 *
5377 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5378 * boundary
5379 */
5380DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
5381{
5382# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5383 union { RTCCUINTREG r; bool f; } uAX;
5384 RTCCUINTREG xCX, xDI;
5385 Assert(!((uintptr_t)pvPage & 15));
5386 __asm__ __volatile__("repe; "
5387# ifdef RT_ARCH_AMD64
5388 "scasq\n\t"
5389# else
5390 "scasl\n\t"
5391# endif
5392 "setnc %%al\n\t"
5393 : "=&c" (xCX)
5394 , "=&D" (xDI)
5395 , "=&a" (uAX.r)
5396 : "mr" (pvPage)
5397# ifdef RT_ARCH_AMD64
5398 , "0" (RT_ASM_PAGE_SIZE/8)
5399# else
5400 , "0" (RT_ASM_PAGE_SIZE/4)
5401# endif
5402 , "1" (pvPage)
5403 , "2" (0)
5404 : "cc");
5405 return uAX.f;
5406# else
5407 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
5408 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
5409 Assert(!((uintptr_t)pvPage & 15));
5410 for (;;)
5411 {
5412 if (puPtr[0]) return false;
5413 if (puPtr[4]) return false;
5414
5415 if (puPtr[2]) return false;
5416 if (puPtr[6]) return false;
5417
5418 if (puPtr[1]) return false;
5419 if (puPtr[5]) return false;
5420
5421 if (puPtr[3]) return false;
5422 if (puPtr[7]) return false;
5423
5424 if (!--cLeft)
5425 return true;
5426 puPtr += 8;
5427 }
5428# endif
5429}
5430
5431
5432/**
5433 * Checks if a memory block is filled with the specified byte, returning the
5434 * first mismatch.
5435 *
5436 * This is sort of an inverted memchr.
5437 *
5438 * @returns Pointer to the byte which doesn't equal u8.
5439 * @returns NULL if all equal to u8.
5440 *
5441 * @param pv Pointer to the memory block.
5442 * @param cb Number of bytes in the block.
5443 * @param u8 The value it's supposed to be filled with.
5444 *
5445 * @remarks No alignment requirements.
5446 */
5447#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5448 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5449DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5450#else
5451DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5452{
5453/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5454 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5455 for (; cb; cb--, pb++)
5456 if (RT_LIKELY(*pb == u8))
5457 { /* likely */ }
5458 else
5459 return (void *)pb;
5460 return NULL;
5461}
5462#endif
5463
5464
5465/**
5466 * Checks if a memory block is filled with the specified byte.
5467 *
5468 * @returns true if all matching, false if not.
5469 *
5470 * @param pv Pointer to the memory block.
5471 * @param cb Number of bytes in the block.
5472 * @param u8 The value it's supposed to be filled with.
5473 *
5474 * @remarks No alignment requirements.
5475 */
5476DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5477{
5478 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5479}
5480
5481
5482/**
5483 * Checks if a memory block is filled with the specified 32-bit value.
5484 *
5485 * This is a sort of inverted memchr.
5486 *
5487 * @returns Pointer to the first value which doesn't equal u32.
5488 * @returns NULL if all equal to u32.
5489 *
5490 * @param pv Pointer to the memory block.
5491 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5492 * @param u32 The value it's supposed to be filled with.
5493 */
5494DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5495{
5496/** @todo rewrite this in inline assembly? */
5497 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5498 for (; cb; cb -= 4, pu32++)
5499 if (RT_LIKELY(*pu32 == u32))
5500 { /* likely */ }
5501 else
5502 return (uint32_t RT_FAR *)pu32;
5503 return NULL;
5504}
5505
5506
5507/**
5508 * Probes a byte pointer for read access.
5509 *
5510 * While the function will not fault if the byte is not read accessible,
5511 * the idea is to do this in a safe place like before acquiring locks
5512 * and such like.
5513 *
5514 * Also, this functions guarantees that an eager compiler is not going
5515 * to optimize the probing away.
5516 *
5517 * @param pvByte Pointer to the byte.
5518 */
5519#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5520RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
5521#else
5522DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
5523{
5524# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5525 uint8_t u8;
5526# if RT_INLINE_ASM_GNU_STYLE
5527 __asm__ __volatile__("movb %1, %0\n\t"
5528 : "=q" (u8)
5529 : "m" (*(const uint8_t *)pvByte));
5530# else
5531 __asm
5532 {
5533# ifdef RT_ARCH_AMD64
5534 mov rax, [pvByte]
5535 mov al, [rax]
5536# else
5537 mov eax, [pvByte]
5538 mov al, [eax]
5539# endif
5540 mov [u8], al
5541 }
5542# endif
5543 return u8;
5544
5545# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5546 uint32_t u32;
5547 __asm__ __volatile__(".Lstart_ASMProbeReadByte_%=:\n\t"
5548# if defined(RT_ARCH_ARM64)
5549 "ldxrb %w[uDst], %[pMem]\n\t"
5550# else
5551 "ldrexb %[uDst], %[pMem]\n\t"
5552# endif
5553 : [uDst] "=&r" (u32)
5554 : [pMem] "m" (*(uint8_t const *)pvByte));
5555 return (uint8_t)u32;
5556
5557# else
5558# error "Port me"
5559# endif
5560}
5561#endif
5562
5563/**
5564 * Probes a buffer for read access page by page.
5565 *
5566 * While the function will fault if the buffer is not fully read
5567 * accessible, the idea is to do this in a safe place like before
5568 * acquiring locks and such like.
5569 *
5570 * Also, this functions guarantees that an eager compiler is not going
5571 * to optimize the probing away.
5572 *
5573 * @param pvBuf Pointer to the buffer.
5574 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5575 */
5576DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
5577{
5578 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5579 /* the first byte */
5580 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
5581 ASMProbeReadByte(pu8);
5582
5583 /* the pages in between pages. */
5584 while (cbBuf > RT_ASM_PAGE_SIZE)
5585 {
5586 ASMProbeReadByte(pu8);
5587 cbBuf -= RT_ASM_PAGE_SIZE;
5588 pu8 += RT_ASM_PAGE_SIZE;
5589 }
5590
5591 /* the last byte */
5592 ASMProbeReadByte(pu8 + cbBuf - 1);
5593}
5594
5595
5596
5597/** @defgroup grp_inline_bits Bit Operations
5598 * @{
5599 */
5600
5601
5602/**
5603 * Sets a bit in a bitmap.
5604 *
5605 * @param pvBitmap Pointer to the bitmap (little endian). This should be
5606 * 32-bit aligned.
5607 * @param iBit The bit to set.
5608 *
5609 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5610 * However, doing so will yield better performance as well as avoiding
5611 * traps accessing the last bits in the bitmap.
5612 */
5613#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5614RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5615#else
5616DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5617{
5618# if RT_INLINE_ASM_USES_INTRIN
5619 _bittestandset((long RT_FAR *)pvBitmap, iBit);
5620
5621# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5622# if RT_INLINE_ASM_GNU_STYLE
5623 __asm__ __volatile__("btsl %1, %0"
5624 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5625 : "Ir" (iBit)
5626 , "m" (*(volatile long RT_FAR *)pvBitmap)
5627 : "memory"
5628 , "cc");
5629# else
5630 __asm
5631 {
5632# ifdef RT_ARCH_AMD64
5633 mov rax, [pvBitmap]
5634 mov edx, [iBit]
5635 bts [rax], edx
5636# else
5637 mov eax, [pvBitmap]
5638 mov edx, [iBit]
5639 bts [eax], edx
5640# endif
5641 }
5642# endif
5643
5644# else
5645 int32_t offBitmap = iBit / 32;
5646 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5647 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5648# endif
5649}
5650#endif
5651
5652
5653/**
5654 * Atomically sets a bit in a bitmap, ordered.
5655 *
5656 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5657 * aligned, otherwise the memory access isn't atomic!
5658 * @param iBit The bit to set.
5659 *
5660 * @remarks x86: Requires a 386 or later.
5661 */
5662#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5663RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5664#else
5665DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5666{
5667 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5668# if RT_INLINE_ASM_USES_INTRIN
5669 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
5670# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5671# if RT_INLINE_ASM_GNU_STYLE
5672 __asm__ __volatile__("lock; btsl %1, %0"
5673 : "=m" (*(volatile long *)pvBitmap)
5674 : "Ir" (iBit)
5675 , "m" (*(volatile long *)pvBitmap)
5676 : "memory"
5677 , "cc");
5678# else
5679 __asm
5680 {
5681# ifdef RT_ARCH_AMD64
5682 mov rax, [pvBitmap]
5683 mov edx, [iBit]
5684 lock bts [rax], edx
5685# else
5686 mov eax, [pvBitmap]
5687 mov edx, [iBit]
5688 lock bts [eax], edx
5689# endif
5690 }
5691# endif
5692
5693# else
5694 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5695# endif
5696}
5697#endif
5698
5699
5700/**
5701 * Clears a bit in a bitmap.
5702 *
5703 * @param pvBitmap Pointer to the bitmap (little endian).
5704 * @param iBit The bit to clear.
5705 *
5706 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5707 * However, doing so will yield better performance as well as avoiding
5708 * traps accessing the last bits in the bitmap.
5709 */
5710#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5711RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5712#else
5713DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5714{
5715# if RT_INLINE_ASM_USES_INTRIN
5716 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
5717
5718# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5719# if RT_INLINE_ASM_GNU_STYLE
5720 __asm__ __volatile__("btrl %1, %0"
5721 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5722 : "Ir" (iBit)
5723 , "m" (*(volatile long RT_FAR *)pvBitmap)
5724 : "memory"
5725 , "cc");
5726# else
5727 __asm
5728 {
5729# ifdef RT_ARCH_AMD64
5730 mov rax, [pvBitmap]
5731 mov edx, [iBit]
5732 btr [rax], edx
5733# else
5734 mov eax, [pvBitmap]
5735 mov edx, [iBit]
5736 btr [eax], edx
5737# endif
5738 }
5739# endif
5740
5741# else
5742 int32_t offBitmap = iBit / 32;
5743 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5744 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
5745# endif
5746}
5747#endif
5748
5749
5750/**
5751 * Atomically clears a bit in a bitmap, ordered.
5752 *
5753 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5754 * aligned, otherwise the memory access isn't atomic!
5755 * @param iBit The bit to toggle set.
5756 *
5757 * @remarks No memory barrier, take care on smp.
5758 * @remarks x86: Requires a 386 or later.
5759 */
5760#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5761RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5762#else
5763DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5764{
5765 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5766# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5767# if RT_INLINE_ASM_GNU_STYLE
5768 __asm__ __volatile__("lock; btrl %1, %0"
5769 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5770 : "Ir" (iBit)
5771 , "m" (*(volatile long RT_FAR *)pvBitmap)
5772 : "memory"
5773 , "cc");
5774# else
5775 __asm
5776 {
5777# ifdef RT_ARCH_AMD64
5778 mov rax, [pvBitmap]
5779 mov edx, [iBit]
5780 lock btr [rax], edx
5781# else
5782 mov eax, [pvBitmap]
5783 mov edx, [iBit]
5784 lock btr [eax], edx
5785# endif
5786 }
5787# endif
5788# else
5789 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
5790# endif
5791}
5792#endif
5793
5794
5795/**
5796 * Toggles a bit in a bitmap.
5797 *
5798 * @param pvBitmap Pointer to the bitmap (little endian).
5799 * @param iBit The bit to toggle.
5800 *
5801 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5802 * However, doing so will yield better performance as well as avoiding
5803 * traps accessing the last bits in the bitmap.
5804 */
5805#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5806RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5807#else
5808DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5809{
5810# if RT_INLINE_ASM_USES_INTRIN
5811 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
5812# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5813# if RT_INLINE_ASM_GNU_STYLE
5814 __asm__ __volatile__("btcl %1, %0"
5815 : "=m" (*(volatile long *)pvBitmap)
5816 : "Ir" (iBit)
5817 , "m" (*(volatile long *)pvBitmap)
5818 : "memory"
5819 , "cc");
5820# else
5821 __asm
5822 {
5823# ifdef RT_ARCH_AMD64
5824 mov rax, [pvBitmap]
5825 mov edx, [iBit]
5826 btc [rax], edx
5827# else
5828 mov eax, [pvBitmap]
5829 mov edx, [iBit]
5830 btc [eax], edx
5831# endif
5832 }
5833# endif
5834# else
5835 int32_t offBitmap = iBit / 32;
5836 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5837 ASMAtomicUoXorU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5838# endif
5839}
5840#endif
5841
5842
5843/**
5844 * Atomically toggles a bit in a bitmap, ordered.
5845 *
5846 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5847 * aligned, otherwise the memory access isn't atomic!
5848 * @param iBit The bit to test and set.
5849 *
5850 * @remarks x86: Requires a 386 or later.
5851 */
5852#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5853RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5854#else
5855DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5856{
5857 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5858# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5859# if RT_INLINE_ASM_GNU_STYLE
5860 __asm__ __volatile__("lock; btcl %1, %0"
5861 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5862 : "Ir" (iBit)
5863 , "m" (*(volatile long RT_FAR *)pvBitmap)
5864 : "memory"
5865 , "cc");
5866# else
5867 __asm
5868 {
5869# ifdef RT_ARCH_AMD64
5870 mov rax, [pvBitmap]
5871 mov edx, [iBit]
5872 lock btc [rax], edx
5873# else
5874 mov eax, [pvBitmap]
5875 mov edx, [iBit]
5876 lock btc [eax], edx
5877# endif
5878 }
5879# endif
5880# else
5881 ASMAtomicXorU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5882# endif
5883}
5884#endif
5885
5886
5887/**
5888 * Tests and sets a bit in a bitmap.
5889 *
5890 * @returns true if the bit was set.
5891 * @returns false if the bit was clear.
5892 *
5893 * @param pvBitmap Pointer to the bitmap (little endian).
5894 * @param iBit The bit to test and set.
5895 *
5896 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5897 * However, doing so will yield better performance as well as avoiding
5898 * traps accessing the last bits in the bitmap.
5899 */
5900#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5901RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5902#else
5903DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5904{
5905 union { bool f; uint32_t u32; uint8_t u8; } rc;
5906# if RT_INLINE_ASM_USES_INTRIN
5907 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
5908
5909# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5910# if RT_INLINE_ASM_GNU_STYLE
5911 __asm__ __volatile__("btsl %2, %1\n\t"
5912 "setc %b0\n\t"
5913 "andl $1, %0\n\t"
5914 : "=q" (rc.u32)
5915 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5916 : "Ir" (iBit)
5917 , "m" (*(volatile long RT_FAR *)pvBitmap)
5918 : "memory"
5919 , "cc");
5920# else
5921 __asm
5922 {
5923 mov edx, [iBit]
5924# ifdef RT_ARCH_AMD64
5925 mov rax, [pvBitmap]
5926 bts [rax], edx
5927# else
5928 mov eax, [pvBitmap]
5929 bts [eax], edx
5930# endif
5931 setc al
5932 and eax, 1
5933 mov [rc.u32], eax
5934 }
5935# endif
5936
5937# else
5938 int32_t offBitmap = iBit / 32;
5939 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5940 rc.u32 = RT_LE2H_U32(ASMAtomicUoOrExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
5941 >> (iBit & 31);
5942 rc.u32 &= 1;
5943# endif
5944 return rc.f;
5945}
5946#endif
5947
5948
5949/**
5950 * Atomically tests and sets a bit in a bitmap, ordered.
5951 *
5952 * @returns true if the bit was set.
5953 * @returns false if the bit was clear.
5954 *
5955 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5956 * aligned, otherwise the memory access isn't atomic!
5957 * @param iBit The bit to set.
5958 *
5959 * @remarks x86: Requires a 386 or later.
5960 */
5961#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5962RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5963#else
5964DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5965{
5966 union { bool f; uint32_t u32; uint8_t u8; } rc;
5967 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5968# if RT_INLINE_ASM_USES_INTRIN
5969 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
5970# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5971# if RT_INLINE_ASM_GNU_STYLE
5972 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5973 "setc %b0\n\t"
5974 "andl $1, %0\n\t"
5975 : "=q" (rc.u32)
5976 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5977 : "Ir" (iBit)
5978 , "m" (*(volatile long RT_FAR *)pvBitmap)
5979 : "memory"
5980 , "cc");
5981# else
5982 __asm
5983 {
5984 mov edx, [iBit]
5985# ifdef RT_ARCH_AMD64
5986 mov rax, [pvBitmap]
5987 lock bts [rax], edx
5988# else
5989 mov eax, [pvBitmap]
5990 lock bts [eax], edx
5991# endif
5992 setc al
5993 and eax, 1
5994 mov [rc.u32], eax
5995 }
5996# endif
5997
5998# else
5999 rc.u32 = RT_LE2H_U32(ASMAtomicOrExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6000 >> (iBit & 31);
6001 rc.u32 &= 1;
6002# endif
6003 return rc.f;
6004}
6005#endif
6006
6007
6008/**
6009 * Tests and clears a bit in a bitmap.
6010 *
6011 * @returns true if the bit was set.
6012 * @returns false if the bit was clear.
6013 *
6014 * @param pvBitmap Pointer to the bitmap (little endian).
6015 * @param iBit The bit to test and clear.
6016 *
6017 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6018 * However, doing so will yield better performance as well as avoiding
6019 * traps accessing the last bits in the bitmap.
6020 */
6021#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6022RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6023#else
6024DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6025{
6026 union { bool f; uint32_t u32; uint8_t u8; } rc;
6027# if RT_INLINE_ASM_USES_INTRIN
6028 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6029
6030# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6031# if RT_INLINE_ASM_GNU_STYLE
6032 __asm__ __volatile__("btrl %2, %1\n\t"
6033 "setc %b0\n\t"
6034 "andl $1, %0\n\t"
6035 : "=q" (rc.u32)
6036 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6037 : "Ir" (iBit)
6038 , "m" (*(volatile long RT_FAR *)pvBitmap)
6039 : "memory"
6040 , "cc");
6041# else
6042 __asm
6043 {
6044 mov edx, [iBit]
6045# ifdef RT_ARCH_AMD64
6046 mov rax, [pvBitmap]
6047 btr [rax], edx
6048# else
6049 mov eax, [pvBitmap]
6050 btr [eax], edx
6051# endif
6052 setc al
6053 and eax, 1
6054 mov [rc.u32], eax
6055 }
6056# endif
6057
6058# else
6059 int32_t offBitmap = iBit / 32;
6060 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6061 rc.u32 = RT_LE2H_U32(ASMAtomicUoAndExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6062 >> (iBit & 31);
6063 rc.u32 &= 1;
6064# endif
6065 return rc.f;
6066}
6067#endif
6068
6069
6070/**
6071 * Atomically tests and clears a bit in a bitmap, ordered.
6072 *
6073 * @returns true if the bit was set.
6074 * @returns false if the bit was clear.
6075 *
6076 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6077 * aligned, otherwise the memory access isn't atomic!
6078 * @param iBit The bit to test and clear.
6079 *
6080 * @remarks No memory barrier, take care on smp.
6081 * @remarks x86: Requires a 386 or later.
6082 */
6083#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6084RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6085#else
6086DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6087{
6088 union { bool f; uint32_t u32; uint8_t u8; } rc;
6089 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6090# if RT_INLINE_ASM_USES_INTRIN
6091 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
6092
6093# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6094# if RT_INLINE_ASM_GNU_STYLE
6095 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6096 "setc %b0\n\t"
6097 "andl $1, %0\n\t"
6098 : "=q" (rc.u32)
6099 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6100 : "Ir" (iBit)
6101 , "m" (*(volatile long RT_FAR *)pvBitmap)
6102 : "memory"
6103 , "cc");
6104# else
6105 __asm
6106 {
6107 mov edx, [iBit]
6108# ifdef RT_ARCH_AMD64
6109 mov rax, [pvBitmap]
6110 lock btr [rax], edx
6111# else
6112 mov eax, [pvBitmap]
6113 lock btr [eax], edx
6114# endif
6115 setc al
6116 and eax, 1
6117 mov [rc.u32], eax
6118 }
6119# endif
6120
6121# else
6122 rc.u32 = RT_LE2H_U32(ASMAtomicAndExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6123 >> (iBit & 31);
6124 rc.u32 &= 1;
6125# endif
6126 return rc.f;
6127}
6128#endif
6129
6130
6131/**
6132 * Tests and toggles a bit in a bitmap.
6133 *
6134 * @returns true if the bit was set.
6135 * @returns false if the bit was clear.
6136 *
6137 * @param pvBitmap Pointer to the bitmap (little endian).
6138 * @param iBit The bit to test and toggle.
6139 *
6140 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6141 * However, doing so will yield better performance as well as avoiding
6142 * traps accessing the last bits in the bitmap.
6143 */
6144#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6145RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6146#else
6147DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6148{
6149 union { bool f; uint32_t u32; uint8_t u8; } rc;
6150# if RT_INLINE_ASM_USES_INTRIN
6151 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6152
6153# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6154# if RT_INLINE_ASM_GNU_STYLE
6155 __asm__ __volatile__("btcl %2, %1\n\t"
6156 "setc %b0\n\t"
6157 "andl $1, %0\n\t"
6158 : "=q" (rc.u32)
6159 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6160 : "Ir" (iBit)
6161 , "m" (*(volatile long RT_FAR *)pvBitmap)
6162 : "memory"
6163 , "cc");
6164# else
6165 __asm
6166 {
6167 mov edx, [iBit]
6168# ifdef RT_ARCH_AMD64
6169 mov rax, [pvBitmap]
6170 btc [rax], edx
6171# else
6172 mov eax, [pvBitmap]
6173 btc [eax], edx
6174# endif
6175 setc al
6176 and eax, 1
6177 mov [rc.u32], eax
6178 }
6179# endif
6180
6181# else
6182 int32_t offBitmap = iBit / 32;
6183 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6184 rc.u32 = RT_LE2H_U32(ASMAtomicUoXorExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6185 >> (iBit & 31);
6186 rc.u32 &= 1;
6187# endif
6188 return rc.f;
6189}
6190#endif
6191
6192
6193/**
6194 * Atomically tests and toggles a bit in a bitmap, ordered.
6195 *
6196 * @returns true if the bit was set.
6197 * @returns false if the bit was clear.
6198 *
6199 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6200 * aligned, otherwise the memory access isn't atomic!
6201 * @param iBit The bit to test and toggle.
6202 *
6203 * @remarks x86: Requires a 386 or later.
6204 */
6205#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6206RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6207#else
6208DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6209{
6210 union { bool f; uint32_t u32; uint8_t u8; } rc;
6211 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6212# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6213# if RT_INLINE_ASM_GNU_STYLE
6214 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6215 "setc %b0\n\t"
6216 "andl $1, %0\n\t"
6217 : "=q" (rc.u32)
6218 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6219 : "Ir" (iBit)
6220 , "m" (*(volatile long RT_FAR *)pvBitmap)
6221 : "memory"
6222 , "cc");
6223# else
6224 __asm
6225 {
6226 mov edx, [iBit]
6227# ifdef RT_ARCH_AMD64
6228 mov rax, [pvBitmap]
6229 lock btc [rax], edx
6230# else
6231 mov eax, [pvBitmap]
6232 lock btc [eax], edx
6233# endif
6234 setc al
6235 and eax, 1
6236 mov [rc.u32], eax
6237 }
6238# endif
6239
6240# else
6241 rc.u32 = RT_H2LE_U32(ASMAtomicXorExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_LE2H_U32(RT_BIT_32(iBit & 31))))
6242 >> (iBit & 31);
6243 rc.u32 &= 1;
6244# endif
6245 return rc.f;
6246}
6247#endif
6248
6249
6250/**
6251 * Tests if a bit in a bitmap is set.
6252 *
6253 * @returns true if the bit is set.
6254 * @returns false if the bit is clear.
6255 *
6256 * @param pvBitmap Pointer to the bitmap (little endian).
6257 * @param iBit The bit to test.
6258 *
6259 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6260 * However, doing so will yield better performance as well as avoiding
6261 * traps accessing the last bits in the bitmap.
6262 */
6263#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6264RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6265#else
6266DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6267{
6268 union { bool f; uint32_t u32; uint8_t u8; } rc;
6269# if RT_INLINE_ASM_USES_INTRIN
6270 rc.u32 = _bittest((long *)pvBitmap, iBit);
6271
6272# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6273# if RT_INLINE_ASM_GNU_STYLE
6274
6275 __asm__ __volatile__("btl %2, %1\n\t"
6276 "setc %b0\n\t"
6277 "andl $1, %0\n\t"
6278 : "=q" (rc.u32)
6279 : "m" (*(const volatile long RT_FAR *)pvBitmap)
6280 , "Ir" (iBit)
6281 : "memory"
6282 , "cc");
6283# else
6284 __asm
6285 {
6286 mov edx, [iBit]
6287# ifdef RT_ARCH_AMD64
6288 mov rax, [pvBitmap]
6289 bt [rax], edx
6290# else
6291 mov eax, [pvBitmap]
6292 bt [eax], edx
6293# endif
6294 setc al
6295 and eax, 1
6296 mov [rc.u32], eax
6297 }
6298# endif
6299
6300# else
6301 int32_t offBitmap = iBit / 32;
6302 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6303 rc.u32 = RT_LE2H_U32(ASMAtomicUoReadU32(&((uint32_t volatile *)pvBitmap)[offBitmap])) >> (iBit & 31);
6304 rc.u32 &= 1;
6305# endif
6306 return rc.f;
6307}
6308#endif
6309
6310
6311/**
6312 * Clears a bit range within a bitmap.
6313 *
6314 * @param pvBitmap Pointer to the bitmap (little endian).
6315 * @param iBitStart The First bit to clear.
6316 * @param iBitEnd The first bit not to clear.
6317 */
6318DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6319{
6320 if (iBitStart < iBitEnd)
6321 {
6322 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6323 int32_t iStart = iBitStart & ~31;
6324 int32_t iEnd = iBitEnd & ~31;
6325 if (iStart == iEnd)
6326 *pu32 &= RT_H2LE_U32(((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6327 else
6328 {
6329 /* bits in first dword. */
6330 if (iBitStart & 31)
6331 {
6332 *pu32 &= RT_H2LE_U32((UINT32_C(1) << (iBitStart & 31)) - 1);
6333 pu32++;
6334 iBitStart = iStart + 32;
6335 }
6336
6337 /* whole dwords. */
6338 if (iBitStart != iEnd)
6339 ASMMemZero32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3);
6340
6341 /* bits in last dword. */
6342 if (iBitEnd & 31)
6343 {
6344 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6345 *pu32 &= RT_H2LE_U32(~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6346 }
6347 }
6348 }
6349}
6350
6351
6352/**
6353 * Sets a bit range within a bitmap.
6354 *
6355 * @param pvBitmap Pointer to the bitmap (little endian).
6356 * @param iBitStart The First bit to set.
6357 * @param iBitEnd The first bit not to set.
6358 */
6359DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6360{
6361 if (iBitStart < iBitEnd)
6362 {
6363 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6364 int32_t iStart = iBitStart & ~31;
6365 int32_t iEnd = iBitEnd & ~31;
6366 if (iStart == iEnd)
6367 *pu32 |= RT_H2LE_U32(((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31));
6368 else
6369 {
6370 /* bits in first dword. */
6371 if (iBitStart & 31)
6372 {
6373 *pu32 |= RT_H2LE_U32(~((UINT32_C(1) << (iBitStart & 31)) - 1));
6374 pu32++;
6375 iBitStart = iStart + 32;
6376 }
6377
6378 /* whole dword. */
6379 if (iBitStart != iEnd)
6380 ASMMemFill32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3, ~UINT32_C(0));
6381
6382 /* bits in last dword. */
6383 if (iBitEnd & 31)
6384 {
6385 pu32 = RT_H2LE_U32((volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5));
6386 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
6387 }
6388 }
6389 }
6390}
6391
6392
6393/**
6394 * Finds the first clear bit in a bitmap.
6395 *
6396 * @returns Index of the first zero bit.
6397 * @returns -1 if no clear bit was found.
6398 * @param pvBitmap Pointer to the bitmap (little endian).
6399 * @param cBits The number of bits in the bitmap. Multiple of 32.
6400 */
6401#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6402DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6403#else
6404DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6405{
6406 if (cBits)
6407 {
6408 int32_t iBit;
6409# if RT_INLINE_ASM_GNU_STYLE
6410 RTCCUINTREG uEAX, uECX, uEDI;
6411 cBits = RT_ALIGN_32(cBits, 32);
6412 __asm__ __volatile__("repe; scasl\n\t"
6413 "je 1f\n\t"
6414# ifdef RT_ARCH_AMD64
6415 "lea -4(%%rdi), %%rdi\n\t"
6416 "xorl (%%rdi), %%eax\n\t"
6417 "subq %5, %%rdi\n\t"
6418# else
6419 "lea -4(%%edi), %%edi\n\t"
6420 "xorl (%%edi), %%eax\n\t"
6421 "subl %5, %%edi\n\t"
6422# endif
6423 "shll $3, %%edi\n\t"
6424 "bsfl %%eax, %%edx\n\t"
6425 "addl %%edi, %%edx\n\t"
6426 "1:\t\n"
6427 : "=d" (iBit)
6428 , "=&c" (uECX)
6429 , "=&D" (uEDI)
6430 , "=&a" (uEAX)
6431 : "0" (0xffffffff)
6432 , "mr" (pvBitmap)
6433 , "1" (cBits >> 5)
6434 , "2" (pvBitmap)
6435 , "3" (0xffffffff)
6436 : "cc");
6437# else
6438 cBits = RT_ALIGN_32(cBits, 32);
6439 __asm
6440 {
6441# ifdef RT_ARCH_AMD64
6442 mov rdi, [pvBitmap]
6443 mov rbx, rdi
6444# else
6445 mov edi, [pvBitmap]
6446 mov ebx, edi
6447# endif
6448 mov edx, 0ffffffffh
6449 mov eax, edx
6450 mov ecx, [cBits]
6451 shr ecx, 5
6452 repe scasd
6453 je done
6454
6455# ifdef RT_ARCH_AMD64
6456 lea rdi, [rdi - 4]
6457 xor eax, [rdi]
6458 sub rdi, rbx
6459# else
6460 lea edi, [edi - 4]
6461 xor eax, [edi]
6462 sub edi, ebx
6463# endif
6464 shl edi, 3
6465 bsf edx, eax
6466 add edx, edi
6467 done:
6468 mov [iBit], edx
6469 }
6470# endif
6471 return iBit;
6472 }
6473 return -1;
6474}
6475#endif
6476
6477
6478/**
6479 * Finds the next clear bit in a bitmap.
6480 *
6481 * @returns Index of the first zero bit.
6482 * @returns -1 if no clear bit was found.
6483 * @param pvBitmap Pointer to the bitmap (little endian).
6484 * @param cBits The number of bits in the bitmap. Multiple of 32.
6485 * @param iBitPrev The bit returned from the last search.
6486 * The search will start at iBitPrev + 1.
6487 */
6488#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6489DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6490#else
6491DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6492{
6493 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6494 int iBit = ++iBitPrev & 31;
6495 if (iBit)
6496 {
6497 /*
6498 * Inspect the 32-bit word containing the unaligned bit.
6499 */
6500 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6501
6502# if RT_INLINE_ASM_USES_INTRIN
6503 unsigned long ulBit = 0;
6504 if (_BitScanForward(&ulBit, u32))
6505 return ulBit + iBitPrev;
6506# else
6507# if RT_INLINE_ASM_GNU_STYLE
6508 __asm__ __volatile__("bsf %1, %0\n\t"
6509 "jnz 1f\n\t"
6510 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
6511 "1:\n\t"
6512 : "=r" (iBit)
6513 : "r" (u32)
6514 : "cc");
6515# else
6516 __asm
6517 {
6518 mov edx, [u32]
6519 bsf eax, edx
6520 jnz done
6521 mov eax, 0ffffffffh
6522 done:
6523 mov [iBit], eax
6524 }
6525# endif
6526 if (iBit >= 0)
6527 return iBit + (int)iBitPrev;
6528# endif
6529
6530 /*
6531 * Skip ahead and see if there is anything left to search.
6532 */
6533 iBitPrev |= 31;
6534 iBitPrev++;
6535 if (cBits <= (uint32_t)iBitPrev)
6536 return -1;
6537 }
6538
6539 /*
6540 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6541 */
6542 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6543 if (iBit >= 0)
6544 iBit += iBitPrev;
6545 return iBit;
6546}
6547#endif
6548
6549
6550/**
6551 * Finds the first set bit in a bitmap.
6552 *
6553 * @returns Index of the first set bit.
6554 * @returns -1 if no clear bit was found.
6555 * @param pvBitmap Pointer to the bitmap (little endian).
6556 * @param cBits The number of bits in the bitmap. Multiple of 32.
6557 */
6558#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6559DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6560#else
6561DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6562{
6563 if (cBits)
6564 {
6565 int32_t iBit;
6566# if RT_INLINE_ASM_GNU_STYLE
6567 RTCCUINTREG uEAX, uECX, uEDI;
6568 cBits = RT_ALIGN_32(cBits, 32);
6569 __asm__ __volatile__("repe; scasl\n\t"
6570 "je 1f\n\t"
6571# ifdef RT_ARCH_AMD64
6572 "lea -4(%%rdi), %%rdi\n\t"
6573 "movl (%%rdi), %%eax\n\t"
6574 "subq %5, %%rdi\n\t"
6575# else
6576 "lea -4(%%edi), %%edi\n\t"
6577 "movl (%%edi), %%eax\n\t"
6578 "subl %5, %%edi\n\t"
6579# endif
6580 "shll $3, %%edi\n\t"
6581 "bsfl %%eax, %%edx\n\t"
6582 "addl %%edi, %%edx\n\t"
6583 "1:\t\n"
6584 : "=d" (iBit)
6585 , "=&c" (uECX)
6586 , "=&D" (uEDI)
6587 , "=&a" (uEAX)
6588 : "0" (0xffffffff)
6589 , "mr" (pvBitmap)
6590 , "1" (cBits >> 5)
6591 , "2" (pvBitmap)
6592 , "3" (0)
6593 : "cc");
6594# else
6595 cBits = RT_ALIGN_32(cBits, 32);
6596 __asm
6597 {
6598# ifdef RT_ARCH_AMD64
6599 mov rdi, [pvBitmap]
6600 mov rbx, rdi
6601# else
6602 mov edi, [pvBitmap]
6603 mov ebx, edi
6604# endif
6605 mov edx, 0ffffffffh
6606 xor eax, eax
6607 mov ecx, [cBits]
6608 shr ecx, 5
6609 repe scasd
6610 je done
6611# ifdef RT_ARCH_AMD64
6612 lea rdi, [rdi - 4]
6613 mov eax, [rdi]
6614 sub rdi, rbx
6615# else
6616 lea edi, [edi - 4]
6617 mov eax, [edi]
6618 sub edi, ebx
6619# endif
6620 shl edi, 3
6621 bsf edx, eax
6622 add edx, edi
6623 done:
6624 mov [iBit], edx
6625 }
6626# endif
6627 return iBit;
6628 }
6629 return -1;
6630}
6631#endif
6632
6633
6634/**
6635 * Finds the next set bit in a bitmap.
6636 *
6637 * @returns Index of the next set bit.
6638 * @returns -1 if no set bit was found.
6639 * @param pvBitmap Pointer to the bitmap (little endian).
6640 * @param cBits The number of bits in the bitmap. Multiple of 32.
6641 * @param iBitPrev The bit returned from the last search.
6642 * The search will start at iBitPrev + 1.
6643 */
6644#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6645DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6646#else
6647DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6648{
6649 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6650 int iBit = ++iBitPrev & 31;
6651 if (iBit)
6652 {
6653 /*
6654 * Inspect the 32-bit word containing the unaligned bit.
6655 */
6656 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6657
6658# if RT_INLINE_ASM_USES_INTRIN
6659 unsigned long ulBit = 0;
6660 if (_BitScanForward(&ulBit, u32))
6661 return ulBit + iBitPrev;
6662# else
6663# if RT_INLINE_ASM_GNU_STYLE
6664 __asm__ __volatile__("bsf %1, %0\n\t"
6665 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
6666 "movl $-1, %0\n\t"
6667 "1:\n\t"
6668 : "=r" (iBit)
6669 : "r" (u32)
6670 : "cc");
6671# else
6672 __asm
6673 {
6674 mov edx, [u32]
6675 bsf eax, edx
6676 jnz done
6677 mov eax, 0ffffffffh
6678 done:
6679 mov [iBit], eax
6680 }
6681# endif
6682 if (iBit >= 0)
6683 return iBit + (int)iBitPrev;
6684# endif
6685
6686 /*
6687 * Skip ahead and see if there is anything left to search.
6688 */
6689 iBitPrev |= 31;
6690 iBitPrev++;
6691 if (cBits <= (uint32_t)iBitPrev)
6692 return -1;
6693 }
6694
6695 /*
6696 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6697 */
6698 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6699 if (iBit >= 0)
6700 iBit += iBitPrev;
6701 return iBit;
6702}
6703#endif
6704
6705
6706/**
6707 * Finds the first bit which is set in the given 32-bit integer.
6708 * Bits are numbered from 1 (least significant) to 32.
6709 *
6710 * @returns index [1..32] of the first set bit.
6711 * @returns 0 if all bits are cleared.
6712 * @param u32 Integer to search for set bits.
6713 * @remarks Similar to ffs() in BSD.
6714 */
6715#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6716RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
6717#else
6718DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
6719{
6720# if RT_INLINE_ASM_USES_INTRIN
6721 unsigned long iBit;
6722 if (_BitScanForward(&iBit, u32))
6723 iBit++;
6724 else
6725 iBit = 0;
6726
6727# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6728# if RT_INLINE_ASM_GNU_STYLE
6729 uint32_t iBit;
6730 __asm__ __volatile__("bsf %1, %0\n\t"
6731 "jnz 1f\n\t"
6732 "xorl %0, %0\n\t"
6733 "jmp 2f\n"
6734 "1:\n\t"
6735 "incl %0\n"
6736 "2:\n\t"
6737 : "=r" (iBit)
6738 : "rm" (u32)
6739 : "cc");
6740# else
6741 uint32_t iBit;
6742 _asm
6743 {
6744 bsf eax, [u32]
6745 jnz found
6746 xor eax, eax
6747 jmp done
6748 found:
6749 inc eax
6750 done:
6751 mov [iBit], eax
6752 }
6753# endif
6754
6755# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6756 /*
6757 * Using the "count leading zeros (clz)" instruction here because there
6758 * is no dedicated instruction to get the first set bit.
6759 * Need to reverse the bits in the value with "rbit" first because
6760 * "clz" starts counting from the most significant bit.
6761 */
6762 uint32_t iBit;
6763 __asm__ __volatile__(
6764# if defined(RT_ARCH_ARM64)
6765 "rbit %w[uVal], %w[uVal]\n\t"
6766 "clz %w[iBit], %w[uVal]\n\t"
6767# else
6768 "rbit %[uVal], %[uVal]\n\t"
6769 "clz %[iBit], %[uVal]\n\t"
6770# endif
6771 : [uVal] "=r" (u32)
6772 , [iBit] "=r" (iBit)
6773 : "[uVal]" (u32));
6774 if (iBit != 32)
6775 iBit++;
6776 else
6777 iBit = 0; /* No bit set. */
6778
6779# else
6780# error "Port me"
6781# endif
6782 return iBit;
6783}
6784#endif
6785
6786
6787/**
6788 * Finds the first bit which is set in the given 32-bit integer.
6789 * Bits are numbered from 1 (least significant) to 32.
6790 *
6791 * @returns index [1..32] of the first set bit.
6792 * @returns 0 if all bits are cleared.
6793 * @param i32 Integer to search for set bits.
6794 * @remark Similar to ffs() in BSD.
6795 */
6796DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
6797{
6798 return ASMBitFirstSetU32((uint32_t)i32);
6799}
6800
6801
6802/**
6803 * Finds the first bit which is set in the given 64-bit integer.
6804 *
6805 * Bits are numbered from 1 (least significant) to 64.
6806 *
6807 * @returns index [1..64] of the first set bit.
6808 * @returns 0 if all bits are cleared.
6809 * @param u64 Integer to search for set bits.
6810 * @remarks Similar to ffs() in BSD.
6811 */
6812#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6813RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
6814#else
6815DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
6816{
6817# if RT_INLINE_ASM_USES_INTRIN
6818 unsigned long iBit;
6819# if ARCH_BITS == 64
6820 if (_BitScanForward64(&iBit, u64))
6821 iBit++;
6822 else
6823 iBit = 0;
6824# else
6825 if (_BitScanForward(&iBit, (uint32_t)u64))
6826 iBit++;
6827 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
6828 iBit += 33;
6829 else
6830 iBit = 0;
6831# endif
6832
6833# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6834 uint64_t iBit;
6835 __asm__ __volatile__("bsfq %1, %0\n\t"
6836 "jnz 1f\n\t"
6837 "xorl %k0, %k0\n\t"
6838 "jmp 2f\n"
6839 "1:\n\t"
6840 "incl %k0\n"
6841 "2:\n\t"
6842 : "=r" (iBit)
6843 : "rm" (u64)
6844 : "cc");
6845
6846# elif defined(RT_ARCH_ARM64)
6847 uint64_t iBit;
6848 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
6849 "clz %[iBit], %[uVal]\n\t"
6850 : [uVal] "=r" (u64)
6851 , [iBit] "=r" (iBit)
6852 : "[uVal]" (u64));
6853 if (iBit != 64)
6854 iBit++;
6855 else
6856 iBit = 0; /* No bit set. */
6857
6858# else
6859 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
6860 if (!iBit)
6861 {
6862 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
6863 if (iBit)
6864 iBit += 32;
6865 }
6866# endif
6867 return (unsigned)iBit;
6868}
6869#endif
6870
6871
6872/**
6873 * Finds the first bit which is set in the given 16-bit integer.
6874 *
6875 * Bits are numbered from 1 (least significant) to 16.
6876 *
6877 * @returns index [1..16] of the first set bit.
6878 * @returns 0 if all bits are cleared.
6879 * @param u16 Integer to search for set bits.
6880 * @remarks For 16-bit bs3kit code.
6881 */
6882#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6883RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
6884#else
6885DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
6886{
6887 return ASMBitFirstSetU32((uint32_t)u16);
6888}
6889#endif
6890
6891
6892/**
6893 * Finds the last bit which is set in the given 32-bit integer.
6894 * Bits are numbered from 1 (least significant) to 32.
6895 *
6896 * @returns index [1..32] of the last set bit.
6897 * @returns 0 if all bits are cleared.
6898 * @param u32 Integer to search for set bits.
6899 * @remark Similar to fls() in BSD.
6900 */
6901#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6902RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
6903#else
6904DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
6905{
6906# if RT_INLINE_ASM_USES_INTRIN
6907 unsigned long iBit;
6908 if (_BitScanReverse(&iBit, u32))
6909 iBit++;
6910 else
6911 iBit = 0;
6912
6913# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6914# if RT_INLINE_ASM_GNU_STYLE
6915 uint32_t iBit;
6916 __asm__ __volatile__("bsrl %1, %0\n\t"
6917 "jnz 1f\n\t"
6918 "xorl %0, %0\n\t"
6919 "jmp 2f\n"
6920 "1:\n\t"
6921 "incl %0\n"
6922 "2:\n\t"
6923 : "=r" (iBit)
6924 : "rm" (u32)
6925 : "cc");
6926# else
6927 uint32_t iBit;
6928 _asm
6929 {
6930 bsr eax, [u32]
6931 jnz found
6932 xor eax, eax
6933 jmp done
6934 found:
6935 inc eax
6936 done:
6937 mov [iBit], eax
6938 }
6939# endif
6940
6941# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6942 uint32_t iBit;
6943 __asm__ __volatile__(
6944# if defined(RT_ARCH_ARM64)
6945 "clz %w[iBit], %w[uVal]\n\t"
6946# else
6947 "clz %[iBit], %[uVal]\n\t"
6948# endif
6949 : [iBit] "=r" (iBit)
6950 : [uVal] "r" (u32));
6951 iBit = 32 - iBit;
6952
6953# else
6954# error "Port me"
6955# endif
6956 return iBit;
6957}
6958#endif
6959
6960
6961/**
6962 * Finds the last bit which is set in the given 32-bit integer.
6963 * Bits are numbered from 1 (least significant) to 32.
6964 *
6965 * @returns index [1..32] of the last set bit.
6966 * @returns 0 if all bits are cleared.
6967 * @param i32 Integer to search for set bits.
6968 * @remark Similar to fls() in BSD.
6969 */
6970DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
6971{
6972 return ASMBitLastSetU32((uint32_t)i32);
6973}
6974
6975
6976/**
6977 * Finds the last bit which is set in the given 64-bit integer.
6978 *
6979 * Bits are numbered from 1 (least significant) to 64.
6980 *
6981 * @returns index [1..64] of the last set bit.
6982 * @returns 0 if all bits are cleared.
6983 * @param u64 Integer to search for set bits.
6984 * @remark Similar to fls() in BSD.
6985 */
6986#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6987RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
6988#else
6989DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
6990{
6991# if RT_INLINE_ASM_USES_INTRIN
6992 unsigned long iBit;
6993# if ARCH_BITS == 64
6994 if (_BitScanReverse64(&iBit, u64))
6995 iBit++;
6996 else
6997 iBit = 0;
6998# else
6999 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7000 iBit += 33;
7001 else if (_BitScanReverse(&iBit, (uint32_t)u64))
7002 iBit++;
7003 else
7004 iBit = 0;
7005# endif
7006
7007# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7008 uint64_t iBit;
7009 __asm__ __volatile__("bsrq %1, %0\n\t"
7010 "jnz 1f\n\t"
7011 "xorl %k0, %k0\n\t"
7012 "jmp 2f\n"
7013 "1:\n\t"
7014 "incl %k0\n"
7015 "2:\n\t"
7016 : "=r" (iBit)
7017 : "rm" (u64)
7018 : "cc");
7019
7020# elif defined(RT_ARCH_ARM64)
7021 uint64_t iBit;
7022 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7023 : [iBit] "=r" (iBit)
7024 : [uVal] "r" (u64));
7025 iBit = 64 - iBit;
7026
7027# else
7028 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
7029 if (iBit)
7030 iBit += 32;
7031 else
7032 iBit = ASMBitLastSetU32((uint32_t)u64);
7033# endif
7034 return (unsigned)iBit;
7035}
7036#endif
7037
7038
7039/**
7040 * Finds the last bit which is set in the given 16-bit integer.
7041 *
7042 * Bits are numbered from 1 (least significant) to 16.
7043 *
7044 * @returns index [1..16] of the last set bit.
7045 * @returns 0 if all bits are cleared.
7046 * @param u16 Integer to search for set bits.
7047 * @remarks For 16-bit bs3kit code.
7048 */
7049#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7050RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7051#else
7052DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
7053{
7054 return ASMBitLastSetU32((uint32_t)u16);
7055}
7056#endif
7057
7058
7059/**
7060 * Reverse the byte order of the given 16-bit integer.
7061 *
7062 * @returns Revert
7063 * @param u16 16-bit integer value.
7064 */
7065#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7066RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
7067#else
7068DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
7069{
7070# if RT_INLINE_ASM_USES_INTRIN
7071 return _byteswap_ushort(u16);
7072
7073# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7074# if RT_INLINE_ASM_GNU_STYLE
7075 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
7076# else
7077 _asm
7078 {
7079 mov ax, [u16]
7080 ror ax, 8
7081 mov [u16], ax
7082 }
7083# endif
7084 return u16;
7085
7086# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7087 uint32_t u32Ret;
7088 __asm__ __volatile__(
7089# if defined(RT_ARCH_ARM64)
7090 "rev16 %w[uRet], %w[uVal]\n\t"
7091# else
7092 "rev16 %[uRet], %[uVal]\n\t"
7093# endif
7094 : [uRet] "=r" (u32Ret)
7095 : [uVal] "r" (u16));
7096 return (uint16_t)u32Ret;
7097
7098# else
7099# error "Port me"
7100# endif
7101}
7102#endif
7103
7104
7105/**
7106 * Reverse the byte order of the given 32-bit integer.
7107 *
7108 * @returns Revert
7109 * @param u32 32-bit integer value.
7110 */
7111#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7112RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
7113#else
7114DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
7115{
7116# if RT_INLINE_ASM_USES_INTRIN
7117 return _byteswap_ulong(u32);
7118
7119# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7120# if RT_INLINE_ASM_GNU_STYLE
7121 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
7122# else
7123 _asm
7124 {
7125 mov eax, [u32]
7126 bswap eax
7127 mov [u32], eax
7128 }
7129# endif
7130 return u32;
7131
7132# elif defined(RT_ARCH_ARM64)
7133 uint64_t u64Ret;
7134 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t"
7135 : [uRet] "=r" (u64Ret)
7136 : [uVal] "r" ((uint64_t)u32));
7137 return (uint32_t)u64Ret;
7138
7139# elif defined(RT_ARCH_ARM32)
7140 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
7141 : [uRet] "=r" (u32)
7142 : [uVal] "[uRet]" (u32));
7143 return u32;
7144
7145# else
7146# error "Port me"
7147# endif
7148}
7149#endif
7150
7151
7152/**
7153 * Reverse the byte order of the given 64-bit integer.
7154 *
7155 * @returns Revert
7156 * @param u64 64-bit integer value.
7157 */
7158DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
7159{
7160#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
7161 return _byteswap_uint64(u64);
7162
7163# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7164 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64));
7165 return u64;
7166
7167# elif defined(RT_ARCH_ARM64)
7168 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
7169 : [uRet] "=r" (u64)
7170 : [uVal] "[uRet]" (u64));
7171 return u64;
7172
7173#else
7174 return (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
7175 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
7176#endif
7177}
7178
7179
7180/**
7181 * Rotate 32-bit unsigned value to the left by @a cShift.
7182 *
7183 * @returns Rotated value.
7184 * @param u32 The value to rotate.
7185 * @param cShift How many bits to rotate by.
7186 */
7187#ifdef __WATCOMC__
7188RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7189#else
7190DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7191{
7192# if RT_INLINE_ASM_USES_INTRIN
7193 return _rotl(u32, cShift);
7194
7195# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7196 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7197 return u32;
7198
7199# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7200 __asm__ __volatile__(
7201# if defined(RT_ARCH_ARM64)
7202 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7203# else
7204 "ror %[uRet], %[uVal], %[cShift]\n\t"
7205# endif
7206 : [uRet] "=r" (u32)
7207 : [uVal] "[uRet]" (u32)
7208 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */
7209 return u32;
7210
7211# else
7212 cShift &= 31;
7213 return (u32 << cShift) | (u32 >> (32 - cShift));
7214# endif
7215}
7216#endif
7217
7218
7219/**
7220 * Rotate 32-bit unsigned value to the right by @a cShift.
7221 *
7222 * @returns Rotated value.
7223 * @param u32 The value to rotate.
7224 * @param cShift How many bits to rotate by.
7225 */
7226#ifdef __WATCOMC__
7227RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7228#else
7229DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7230{
7231# if RT_INLINE_ASM_USES_INTRIN
7232 return _rotr(u32, cShift);
7233
7234# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7235 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7236 return u32;
7237
7238# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7239 __asm__ __volatile__(
7240# if defined(RT_ARCH_ARM64)
7241 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7242# else
7243 "ror %[uRet], %[uVal], %[cShift]\n\t"
7244# endif
7245 : [uRet] "=r" (u32)
7246 : [uVal] "[uRet]" (u32)
7247 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */
7248 return u32;
7249
7250# else
7251 cShift &= 31;
7252 return (u32 >> cShift) | (u32 << (32 - cShift));
7253# endif
7254}
7255#endif
7256
7257
7258/**
7259 * Rotate 64-bit unsigned value to the left by @a cShift.
7260 *
7261 * @returns Rotated value.
7262 * @param u64 The value to rotate.
7263 * @param cShift How many bits to rotate by.
7264 */
7265DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7266{
7267#if RT_INLINE_ASM_USES_INTRIN
7268 return _rotl64(u64, cShift);
7269
7270#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7271 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7272 return u64;
7273
7274#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7275 uint32_t uSpill;
7276 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7277 "jz 1f\n\t"
7278 "xchgl %%eax, %%edx\n\t"
7279 "1:\n\t"
7280 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7281 "jz 2f\n\t"
7282 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7283 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
7284 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
7285 "2:\n\t" /* } */
7286 : "=A" (u64)
7287 , "=c" (cShift)
7288 , "=r" (uSpill)
7289 : "0" (u64)
7290 , "1" (cShift)
7291 : "cc");
7292 return u64;
7293
7294# elif defined(RT_ARCH_ARM64)
7295 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7296 : [uRet] "=r" (u64)
7297 : [uVal] "[uRet]" (u64)
7298 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */
7299 return u64;
7300
7301#else
7302 cShift &= 63;
7303 return (u64 << cShift) | (u64 >> (64 - cShift));
7304#endif
7305}
7306
7307
7308/**
7309 * Rotate 64-bit unsigned value to the right by @a cShift.
7310 *
7311 * @returns Rotated value.
7312 * @param u64 The value to rotate.
7313 * @param cShift How many bits to rotate by.
7314 */
7315DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7316{
7317#if RT_INLINE_ASM_USES_INTRIN
7318 return _rotr64(u64, cShift);
7319
7320#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7321 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7322 return u64;
7323
7324#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7325 uint32_t uSpill;
7326 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7327 "jz 1f\n\t"
7328 "xchgl %%eax, %%edx\n\t"
7329 "1:\n\t"
7330 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7331 "jz 2f\n\t"
7332 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7333 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
7334 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
7335 "2:\n\t" /* } */
7336 : "=A" (u64)
7337 , "=c" (cShift)
7338 , "=r" (uSpill)
7339 : "0" (u64)
7340 , "1" (cShift)
7341 : "cc");
7342 return u64;
7343
7344# elif defined(RT_ARCH_ARM64)
7345 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7346 : [uRet] "=r" (u64)
7347 : [uVal] "[uRet]" (u64)
7348 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */
7349 return u64;
7350
7351#else
7352 cShift &= 63;
7353 return (u64 >> cShift) | (u64 << (64 - cShift));
7354#endif
7355}
7356
7357/** @} */
7358
7359
7360/** @} */
7361
7362/*
7363 * Include #pragma aux definitions for Watcom C/C++.
7364 */
7365#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
7366# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
7367# undef IPRT_INCLUDED_asm_watcom_x86_16_h
7368# include "asm-watcom-x86-16.h"
7369#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
7370# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
7371# undef IPRT_INCLUDED_asm_watcom_x86_32_h
7372# include "asm-watcom-x86-32.h"
7373#endif
7374
7375#endif /* !IPRT_INCLUDED_asm_h */
7376
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette