VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 102938

Last change on this file since 102938 was 102938, checked in by vboxsync, 13 months ago

iprt/asm.h: Added a CLREX instruction to the cmpxchg sequences on arm64, so we don't keep the CPU in the locked state unnecessarily long. This speeds up the negative (test fail) benchmarks of the ASMAtomicCmpXchg* functions. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 260.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
7 *
8 * This file is part of VirtualBox base platform packages, as
9 * available from https://www.virtualbox.org.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation, in version 3 of the
14 * License.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, see <https://www.gnu.org/licenses>.
23 *
24 * The contents of this file may alternatively be used under the terms
25 * of the Common Development and Distribution License Version 1.0
26 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
27 * in the VirtualBox distribution, in which case the provisions of the
28 * CDDL are applicable instead of those of the GPL.
29 *
30 * You may elect to license modified versions of this file under the
31 * terms and conditions of either the GPL or the CDDL or both.
32 *
33 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
34 */
35
36#ifndef IPRT_INCLUDED_asm_h
37#define IPRT_INCLUDED_asm_h
38#ifndef RT_WITHOUT_PRAGMA_ONCE
39# pragma once
40#endif
41
42#include <iprt/cdefs.h>
43#include <iprt/types.h>
44#include <iprt/assert.h>
45/** @def RT_INLINE_ASM_USES_INTRIN
46 * Defined as 1 if we're using a _MSC_VER 1400.
47 * Otherwise defined as 0.
48 */
49
50/* Solaris 10 header ugliness */
51#ifdef u
52# undef u
53#endif
54
55#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
56/* Emit the intrinsics at all optimization levels. */
57# include <iprt/sanitized/intrin.h>
58# pragma intrinsic(_ReadWriteBarrier)
59# pragma intrinsic(__cpuid)
60# pragma intrinsic(__stosd)
61# pragma intrinsic(__stosw)
62# pragma intrinsic(__stosb)
63# pragma intrinsic(_BitScanForward)
64# pragma intrinsic(_BitScanReverse)
65# pragma intrinsic(_bittest)
66# pragma intrinsic(_bittestandset)
67# pragma intrinsic(_bittestandreset)
68# pragma intrinsic(_bittestandcomplement)
69# pragma intrinsic(_byteswap_ushort)
70# pragma intrinsic(_byteswap_ulong)
71# pragma intrinsic(_interlockedbittestandset)
72# pragma intrinsic(_interlockedbittestandreset)
73# pragma intrinsic(_InterlockedAnd)
74# pragma intrinsic(_InterlockedOr)
75# pragma intrinsic(_InterlockedXor)
76# pragma intrinsic(_InterlockedIncrement)
77# pragma intrinsic(_InterlockedDecrement)
78# pragma intrinsic(_InterlockedExchange)
79# pragma intrinsic(_InterlockedExchangeAdd)
80# pragma intrinsic(_InterlockedCompareExchange)
81# pragma intrinsic(_InterlockedCompareExchange8)
82# pragma intrinsic(_InterlockedCompareExchange16)
83# pragma intrinsic(_InterlockedCompareExchange64)
84# pragma intrinsic(_rotl)
85# pragma intrinsic(_rotr)
86# pragma intrinsic(_rotl64)
87# pragma intrinsic(_rotr64)
88# ifdef RT_ARCH_AMD64
89# pragma intrinsic(__stosq)
90# pragma intrinsic(_byteswap_uint64)
91# pragma intrinsic(_InterlockedCompareExchange128)
92# pragma intrinsic(_InterlockedExchange64)
93# pragma intrinsic(_InterlockedExchangeAdd64)
94# pragma intrinsic(_InterlockedAnd64)
95# pragma intrinsic(_InterlockedOr64)
96# pragma intrinsic(_InterlockedIncrement64)
97# pragma intrinsic(_InterlockedDecrement64)
98# endif
99#endif
100
101/*
102 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
103 */
104#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
105# include "asm-watcom-x86-16.h"
106#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
107# include "asm-watcom-x86-32.h"
108#endif
109
110
111/** @defgroup grp_rt_asm ASM - Assembly Routines
112 * @ingroup grp_rt
113 *
114 * @remarks The difference between ordered and unordered atomic operations are
115 * that the former will complete outstanding reads and writes before
116 * continuing while the latter doesn't make any promises about the
117 * order. Ordered operations doesn't, it seems, make any 100% promise
118 * wrt to whether the operation will complete before any subsequent
119 * memory access. (please, correct if wrong.)
120 *
121 * ASMAtomicSomething operations are all ordered, while
122 * ASMAtomicUoSomething are unordered (note the Uo).
123 *
124 * Please note that ordered operations does not necessarily imply a
125 * compiler (memory) barrier. The user has to use the
126 * ASMCompilerBarrier() macro when that is deemed necessary.
127 *
128 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
129 * to reorder or even optimize assembler instructions away. For
130 * instance, in the following code the second rdmsr instruction is
131 * optimized away because gcc treats that instruction as deterministic:
132 *
133 * @code
134 * static inline uint64_t rdmsr_low(int idx)
135 * {
136 * uint32_t low;
137 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
138 * }
139 * ...
140 * uint32_t msr1 = rdmsr_low(1);
141 * foo(msr1);
142 * msr1 = rdmsr_low(1);
143 * bar(msr1);
144 * @endcode
145 *
146 * The input parameter of rdmsr_low is the same for both calls and
147 * therefore gcc will use the result of the first call as input
148 * parameter for bar() as well. For rdmsr this is not acceptable as
149 * this instruction is _not_ deterministic. This applies to reading
150 * machine status information in general.
151 *
152 * @{
153 */
154
155
156/** @def RT_INLINE_ASM_GCC_4_3_X_X86
157 * Used to work around some 4.3.x register allocation issues in this version of
158 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
159 * definitely not for 5.x */
160#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
161# define RT_INLINE_ASM_GCC_4_3_X_X86 1
162#else
163# define RT_INLINE_ASM_GCC_4_3_X_X86 0
164#endif
165
166/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
167 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
168 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
169 * mode, x86.
170 *
171 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
172 * when in PIC mode on x86.
173 */
174#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
175# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
176# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
177# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
178# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
179# elif ( (defined(PIC) || defined(__PIC__)) \
180 && defined(RT_ARCH_X86) \
181 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
182 || defined(RT_OS_DARWIN)) )
183# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
184# else
185# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
186# endif
187#endif
188
189
190/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
191 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
192#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
193# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
194#else
195# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
196#endif
197
198/*
199 * ARM is great fun.
200 */
201#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
202
203# define RTASM_ARM_NO_BARRIER
204# ifdef RT_ARCH_ARM64
205# define RTASM_ARM_NO_BARRIER_IN_REG
206# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
207# define RTASM_ARM_DSB_SY "dsb sy\n\t"
208# define RTASM_ARM_DSB_SY_IN_REG
209# define RTASM_ARM_DSB_SY_COMMA_IN_REG
210# define RTASM_ARM_DMB_SY "dmb sy\n\t"
211# define RTASM_ARM_DMB_SY_IN_REG
212# define RTASM_ARM_DMB_SY_COMMA_IN_REG
213# define RTASM_ARM_DMB_ST "dmb st\n\t"
214# define RTASM_ARM_DMB_ST_IN_REG
215# define RTASM_ARM_DMB_ST_COMMA_IN_REG
216# define RTASM_ARM_DMB_LD "dmb ld\n\t"
217# define RTASM_ARM_DMB_LD_IN_REG
218# define RTASM_ARM_DMB_LD_COMMA_IN_REG
219# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
220# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
221 uint32_t rcSpill; \
222 uint32_t u32NewRet; \
223 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
224 RTASM_ARM_##barrier_type /* before lable? */ \
225 "ldaxr %w[uNew], %[pMem]\n\t" \
226 modify64 \
227 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
228 "cbnz %w[rc], Ltry_again_" #name "_%=\n\t" \
229 : [pMem] "+Q" (*a_pu32Mem) \
230 , [uNew] "=&r" (u32NewRet) \
231 , [rc] "=&r" (rcSpill) \
232 : in_reg \
233 : "cc")
234# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
235 uint32_t rcSpill; \
236 uint32_t u32OldRet; \
237 uint32_t u32NewSpill; \
238 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
239 RTASM_ARM_##barrier_type /* before lable? */ \
240 "ldaxr %w[uOld], %[pMem]\n\t" \
241 modify64 \
242 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
243 "cbnz %w[rc], Ltry_again_" #name "_%=\n\t" \
244 : [pMem] "+Q" (*a_pu32Mem) \
245 , [uOld] "=&r" (u32OldRet) \
246 , [uNew] "=&r" (u32NewSpill) \
247 , [rc] "=&r" (rcSpill) \
248 : in_reg \
249 : "cc")
250# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
251 uint32_t rcSpill; \
252 uint64_t u64NewRet; \
253 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
254 RTASM_ARM_##barrier_type /* before lable? */ \
255 "ldaxr %[uNew], %[pMem]\n\t" \
256 modify64 \
257 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
258 "cbnz %w[rc], Ltry_again_" #name "_%=\n\t" \
259 : [pMem] "+Q" (*a_pu64Mem) \
260 , [uNew] "=&r" (u64NewRet) \
261 , [rc] "=&r" (rcSpill) \
262 : in_reg \
263 : "cc")
264# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
265 uint32_t rcSpill; \
266 uint64_t u64OldRet; \
267 uint64_t u64NewSpill; \
268 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
269 RTASM_ARM_##barrier_type /* before lable? */ \
270 "ldaxr %[uOld], %[pMem]\n\t" \
271 modify64 \
272 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
273 "cbnz %w[rc], Ltry_again_" #name "_%=\n\t" \
274 : [pMem] "+Q" (*a_pu64Mem) \
275 , [uOld] "=&r" (u64OldRet) \
276 , [uNew] "=&r" (u64NewSpill) \
277 , [rc] "=&r" (rcSpill) \
278 : in_reg \
279 : "cc")
280
281# else /* RT_ARCH_ARM32 */
282# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
283# if RT_ARCH_ARM32 >= 7
284# warning armv7
285# define RTASM_ARM_NO_BARRIER_IN_REG
286# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
287# define RTASM_ARM_DSB_SY "dsb sy\n\t"
288# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
289# define RTASM_ARM_DMB_SY "dmb sy\n\t"
290# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
291# define RTASM_ARM_DMB_ST "dmb st\n\t"
292# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
293# define RTASM_ARM_DMB_LD "dmb ld\n\t"
294# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
295
296# elif RT_ARCH_ARM32 >= 6
297# warning armv6
298# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
299# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
300# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
301# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
302# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
303# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
304# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
305# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
306
307# elif RT_ARCH_ARM32 >= 4
308# warning armv5 or older
309# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
310# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
311# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
312# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
313# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
314# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
315# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
316# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
317# else
318# error "huh? Odd RT_ARCH_ARM32 value!"
319# endif
320# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
321# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
322# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
323# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
324# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
325 uint32_t rcSpill; \
326 uint32_t u32NewRet; \
327 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
328 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
329 "ldrex %[uNew], %[pMem]\n\t" \
330 modify32 \
331 "strex %[rc], %[uNew], %[pMem]\n\t" \
332 "cmp %[rc], #0\n\t" \
333 "bne Ltry_again_" #name "_%=\n\t" \
334 : [pMem] "+m" (*a_pu32Mem) \
335 , [uNew] "=&r" (u32NewRet) \
336 , [rc] "=&r" (rcSpill) \
337 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
338 , in_reg \
339 : "cc")
340# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
341 uint32_t rcSpill; \
342 uint32_t u32OldRet; \
343 uint32_t u32NewSpill; \
344 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
345 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
346 "ldrex %[uOld], %[pMem]\n\t" \
347 modify32 \
348 "strex %[rc], %[uNew], %[pMem]\n\t" \
349 "cmp %[rc], #0\n\t" \
350 "bne Ltry_again_" #name "_%=\n\t" \
351 : [pMem] "+m" (*a_pu32Mem) \
352 , [uOld] "=&r" (u32OldRet) \
353 , [uNew] "=&r" (u32NewSpill) \
354 , [rc] "=&r" (rcSpill) \
355 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
356 , in_reg \
357 : "cc")
358# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
359 uint32_t rcSpill; \
360 uint64_t u64NewRet; \
361 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
362 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
363 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
364 modify32 \
365 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
366 "cmp %[rc], #0\n\t" \
367 "bne Ltry_again_" #name "_%=\n\t" \
368 : [pMem] "+m" (*a_pu64Mem), \
369 [uNew] "=&r" (u64NewRet), \
370 [rc] "=&r" (rcSpill) \
371 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
372 , in_reg \
373 : "cc")
374# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
375 uint32_t rcSpill; \
376 uint64_t u64OldRet; \
377 uint64_t u64NewSpill; \
378 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
379 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
380 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
381 modify32 \
382 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
383 "cmp %[rc], #0\n\t" \
384 "bne Ltry_again_" #name "_%=\n\t" \
385 : [pMem] "+m" (*a_pu64Mem), \
386 [uOld] "=&r" (u64OldRet), \
387 [uNew] "=&r" (u64NewSpill), \
388 [rc] "=&r" (rcSpill) \
389 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
390 , in_reg \
391 : "cc")
392# endif /* RT_ARCH_ARM32 */
393#endif
394
395
396/** @def ASMReturnAddress
397 * Gets the return address of the current (or calling if you like) function or method.
398 */
399#ifdef _MSC_VER
400# ifdef __cplusplus
401extern "C"
402# endif
403void * _ReturnAddress(void);
404# pragma intrinsic(_ReturnAddress)
405# define ASMReturnAddress() _ReturnAddress()
406#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
407# define ASMReturnAddress() __builtin_return_address(0)
408#elif defined(__WATCOMC__)
409# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
410#else
411# error "Unsupported compiler."
412#endif
413
414
415/**
416 * Compiler memory barrier.
417 *
418 * Ensure that the compiler does not use any cached (register/tmp stack) memory
419 * values or any outstanding writes when returning from this function.
420 *
421 * This function must be used if non-volatile data is modified by a
422 * device or the VMM. Typical cases are port access, MMIO access,
423 * trapping instruction, etc.
424 */
425#if RT_INLINE_ASM_GNU_STYLE
426# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
427#elif RT_INLINE_ASM_USES_INTRIN
428# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
429#elif defined(__WATCOMC__)
430void ASMCompilerBarrier(void);
431#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
432DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
433{
434 __asm
435 {
436 }
437}
438#endif
439
440
441/** @def ASMBreakpoint
442 * Debugger Breakpoint.
443 * @deprecated Use RT_BREAKPOINT instead.
444 * @internal
445 */
446#define ASMBreakpoint() RT_BREAKPOINT()
447
448
449/**
450 * Spinloop hint for platforms that have these, empty function on the other
451 * platforms.
452 *
453 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
454 * spin locks.
455 */
456#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
457RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
458#else
459DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
460{
461# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
462# if RT_INLINE_ASM_GNU_STYLE
463 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
464# else
465 __asm {
466 _emit 0f3h
467 _emit 090h
468 }
469# endif
470
471# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
472 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
473
474# else
475 /* dummy */
476# endif
477}
478#endif
479
480
481/**
482 * Atomically Exchange an unsigned 8-bit value, ordered.
483 *
484 * @returns Current *pu8 value
485 * @param pu8 Pointer to the 8-bit variable to update.
486 * @param u8 The 8-bit value to assign to *pu8.
487 */
488#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
489RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
490#else
491DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
492{
493# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
494# if RT_INLINE_ASM_GNU_STYLE
495 __asm__ __volatile__("xchgb %0, %1\n\t"
496 : "=m" (*pu8)
497 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
498 : "1" (u8)
499 , "m" (*pu8));
500# else
501 __asm
502 {
503# ifdef RT_ARCH_AMD64
504 mov rdx, [pu8]
505 mov al, [u8]
506 xchg [rdx], al
507 mov [u8], al
508# else
509 mov edx, [pu8]
510 mov al, [u8]
511 xchg [edx], al
512 mov [u8], al
513# endif
514 }
515# endif
516 return u8;
517
518# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
519 uint32_t uOld;
520 uint32_t rcSpill;
521 __asm__ __volatile__("Ltry_again_ASMAtomicXchgU8_%=:\n\t"
522 RTASM_ARM_DMB_SY
523# if defined(RT_ARCH_ARM64)
524 "ldaxrb %w[uOld], %[pMem]\n\t"
525 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
526 "cbnz %w[rc], Ltry_again_ASMAtomicXchgU8_%=\n\t"
527# else
528 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
529 "strexb %[rc], %[uNew], %[pMem]\n\t"
530 "cmp %[rc], #0\n\t"
531 "bne Ltry_again_ASMAtomicXchgU8_%=\n\t"
532# endif
533 : [pMem] "+Q" (*pu8)
534 , [uOld] "=&r" (uOld)
535 , [rc] "=&r" (rcSpill)
536 : [uNew] "r" ((uint32_t)u8)
537 RTASM_ARM_DMB_SY_COMMA_IN_REG
538 : "cc");
539 return (uint8_t)uOld;
540
541# else
542# error "Port me"
543# endif
544}
545#endif
546
547
548/**
549 * Atomically Exchange a signed 8-bit value, ordered.
550 *
551 * @returns Current *pu8 value
552 * @param pi8 Pointer to the 8-bit variable to update.
553 * @param i8 The 8-bit value to assign to *pi8.
554 */
555DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
556{
557 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
558}
559
560
561/**
562 * Atomically Exchange a bool value, ordered.
563 *
564 * @returns Current *pf value
565 * @param pf Pointer to the 8-bit variable to update.
566 * @param f The 8-bit value to assign to *pi8.
567 */
568DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
569{
570#ifdef _MSC_VER
571 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
572#else
573 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
574#endif
575}
576
577
578/**
579 * Atomically Exchange an unsigned 16-bit value, ordered.
580 *
581 * @returns Current *pu16 value
582 * @param pu16 Pointer to the 16-bit variable to update.
583 * @param u16 The 16-bit value to assign to *pu16.
584 */
585#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
586RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
587#else
588DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
589{
590# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
591# if RT_INLINE_ASM_GNU_STYLE
592 __asm__ __volatile__("xchgw %0, %1\n\t"
593 : "=m" (*pu16)
594 , "=r" (u16)
595 : "1" (u16)
596 , "m" (*pu16));
597# else
598 __asm
599 {
600# ifdef RT_ARCH_AMD64
601 mov rdx, [pu16]
602 mov ax, [u16]
603 xchg [rdx], ax
604 mov [u16], ax
605# else
606 mov edx, [pu16]
607 mov ax, [u16]
608 xchg [edx], ax
609 mov [u16], ax
610# endif
611 }
612# endif
613 return u16;
614
615# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
616 uint32_t uOld;
617 uint32_t rcSpill;
618 __asm__ __volatile__("Ltry_again_ASMAtomicXchgU16_%=:\n\t"
619 RTASM_ARM_DMB_SY
620# if defined(RT_ARCH_ARM64)
621 "ldaxrh %w[uOld], %[pMem]\n\t"
622 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
623 "cbnz %w[rc], Ltry_again_ASMAtomicXchgU16_%=\n\t"
624# else
625 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
626 "strexh %[rc], %[uNew], %[pMem]\n\t"
627 "cmp %[rc], #0\n\t"
628 "bne Ltry_again_ASMAtomicXchgU16_%=\n\t"
629# endif
630 : [pMem] "+Q" (*pu16)
631 , [uOld] "=&r" (uOld)
632 , [rc] "=&r" (rcSpill)
633 : [uNew] "r" ((uint32_t)u16)
634 RTASM_ARM_DMB_SY_COMMA_IN_REG
635 : "cc");
636 return (uint16_t)uOld;
637
638# else
639# error "Port me"
640# endif
641}
642#endif
643
644
645/**
646 * Atomically Exchange a signed 16-bit value, ordered.
647 *
648 * @returns Current *pu16 value
649 * @param pi16 Pointer to the 16-bit variable to update.
650 * @param i16 The 16-bit value to assign to *pi16.
651 */
652DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
653{
654 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
655}
656
657
658/**
659 * Atomically Exchange an unsigned 32-bit value, ordered.
660 *
661 * @returns Current *pu32 value
662 * @param pu32 Pointer to the 32-bit variable to update.
663 * @param u32 The 32-bit value to assign to *pu32.
664 *
665 * @remarks Does not work on 286 and earlier.
666 */
667#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
668RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
669#else
670DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
671{
672# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
673# if RT_INLINE_ASM_GNU_STYLE
674 __asm__ __volatile__("xchgl %0, %1\n\t"
675 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
676 , "=r" (u32)
677 : "1" (u32)
678 , "m" (*pu32));
679
680# elif RT_INLINE_ASM_USES_INTRIN
681 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
682
683# else
684 __asm
685 {
686# ifdef RT_ARCH_AMD64
687 mov rdx, [pu32]
688 mov eax, u32
689 xchg [rdx], eax
690 mov [u32], eax
691# else
692 mov edx, [pu32]
693 mov eax, u32
694 xchg [edx], eax
695 mov [u32], eax
696# endif
697 }
698# endif
699 return u32;
700
701# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
702 uint32_t uOld;
703 uint32_t rcSpill;
704 __asm__ __volatile__("Ltry_again_ASMAtomicXchgU32_%=:\n\t"
705 RTASM_ARM_DMB_SY
706# if defined(RT_ARCH_ARM64)
707 "ldaxr %w[uOld], %[pMem]\n\t"
708 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
709 "cbnz %w[rc], Ltry_again_ASMAtomicXchgU32_%=\n\t"
710# else
711 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
712 "strex %[rc], %[uNew], %[pMem]\n\t"
713 "cmp %[rc], #0\n\t"
714 "bne Ltry_again_ASMAtomicXchgU32_%=\n\t"
715# endif
716 : [pMem] "+Q" (*pu32)
717 , [uOld] "=&r" (uOld)
718 , [rc] "=&r" (rcSpill)
719 : [uNew] "r" (u32)
720 RTASM_ARM_DMB_SY_COMMA_IN_REG
721 : "cc");
722 return uOld;
723
724# else
725# error "Port me"
726# endif
727}
728#endif
729
730
731/**
732 * Atomically Exchange a signed 32-bit value, ordered.
733 *
734 * @returns Current *pu32 value
735 * @param pi32 Pointer to the 32-bit variable to update.
736 * @param i32 The 32-bit value to assign to *pi32.
737 */
738DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
739{
740 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
741}
742
743
744/**
745 * Atomically Exchange an unsigned 64-bit value, ordered.
746 *
747 * @returns Current *pu64 value
748 * @param pu64 Pointer to the 64-bit variable to update.
749 * @param u64 The 64-bit value to assign to *pu64.
750 *
751 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
752 */
753#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
754 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
755RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
756#else
757DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
758{
759# if defined(RT_ARCH_AMD64)
760# if RT_INLINE_ASM_USES_INTRIN
761 return _InterlockedExchange64((__int64 *)pu64, u64);
762
763# elif RT_INLINE_ASM_GNU_STYLE
764 __asm__ __volatile__("xchgq %0, %1\n\t"
765 : "=m" (*pu64)
766 , "=r" (u64)
767 : "1" (u64)
768 , "m" (*pu64));
769 return u64;
770# else
771 __asm
772 {
773 mov rdx, [pu64]
774 mov rax, [u64]
775 xchg [rdx], rax
776 mov [u64], rax
777 }
778 return u64;
779# endif
780
781# elif defined(RT_ARCH_X86)
782# if RT_INLINE_ASM_GNU_STYLE
783# if defined(PIC) || defined(__PIC__)
784 uint32_t u32EBX = (uint32_t)u64;
785 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
786 "xchgl %%ebx, %3\n\t"
787 "1:\n\t"
788 "lock; cmpxchg8b (%5)\n\t"
789 "jnz 1b\n\t"
790 "movl %3, %%ebx\n\t"
791 /*"xchgl %%esi, %5\n\t"*/
792 : "=A" (u64)
793 , "=m" (*pu64)
794 : "0" (*pu64)
795 , "m" ( u32EBX )
796 , "c" ( (uint32_t)(u64 >> 32) )
797 , "S" (pu64)
798 : "cc");
799# else /* !PIC */
800 __asm__ __volatile__("1:\n\t"
801 "lock; cmpxchg8b %1\n\t"
802 "jnz 1b\n\t"
803 : "=A" (u64)
804 , "=m" (*pu64)
805 : "0" (*pu64)
806 , "b" ( (uint32_t)u64 )
807 , "c" ( (uint32_t)(u64 >> 32) )
808 : "cc");
809# endif
810# else
811 __asm
812 {
813 mov ebx, dword ptr [u64]
814 mov ecx, dword ptr [u64 + 4]
815 mov edi, pu64
816 mov eax, dword ptr [edi]
817 mov edx, dword ptr [edi + 4]
818 retry:
819 lock cmpxchg8b [edi]
820 jnz retry
821 mov dword ptr [u64], eax
822 mov dword ptr [u64 + 4], edx
823 }
824# endif
825 return u64;
826
827# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
828 uint32_t rcSpill;
829 uint64_t uOld;
830 __asm__ __volatile__("Ltry_again_ASMAtomicXchgU64_%=:\n\t"
831 RTASM_ARM_DMB_SY
832# if defined(RT_ARCH_ARM64)
833 "ldaxr %[uOld], %[pMem]\n\t"
834 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
835 "cbnz %w[rc], Ltry_again_ASMAtomicXchgU64_%=\n\t"
836# else
837 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
838 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
839 "cmp %[rc], #0\n\t"
840 "bne Ltry_again_ASMAtomicXchgU64_%=\n\t"
841# endif
842 : [pMem] "+Q" (*pu64)
843 , [uOld] "=&r" (uOld)
844 , [rc] "=&r" (rcSpill)
845 : [uNew] "r" (u64)
846 RTASM_ARM_DMB_SY_COMMA_IN_REG
847 : "cc");
848 return uOld;
849
850# else
851# error "Port me"
852# endif
853}
854#endif
855
856
857/**
858 * Atomically Exchange an signed 64-bit value, ordered.
859 *
860 * @returns Current *pi64 value
861 * @param pi64 Pointer to the 64-bit variable to update.
862 * @param i64 The 64-bit value to assign to *pi64.
863 */
864DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
865{
866 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
867}
868
869
870/**
871 * Atomically Exchange a size_t value, ordered.
872 *
873 * @returns Current *ppv value
874 * @param puDst Pointer to the size_t variable to update.
875 * @param uNew The new value to assign to *puDst.
876 */
877DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
878{
879#if ARCH_BITS == 16
880 AssertCompile(sizeof(size_t) == 2);
881 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
882#elif ARCH_BITS == 32
883 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
884#elif ARCH_BITS == 64
885 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
886#else
887# error "ARCH_BITS is bogus"
888#endif
889}
890
891
892/**
893 * Atomically Exchange a pointer value, ordered.
894 *
895 * @returns Current *ppv value
896 * @param ppv Pointer to the pointer variable to update.
897 * @param pv The pointer value to assign to *ppv.
898 */
899DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
900{
901#if ARCH_BITS == 32 || ARCH_BITS == 16
902 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
903#elif ARCH_BITS == 64
904 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
905#else
906# error "ARCH_BITS is bogus"
907#endif
908}
909
910
911/**
912 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
913 *
914 * @returns Current *pv value
915 * @param ppv Pointer to the pointer variable to update.
916 * @param pv The pointer value to assign to *ppv.
917 * @param Type The type of *ppv, sans volatile.
918 */
919#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
920# define ASMAtomicXchgPtrT(ppv, pv, Type) \
921 __extension__ \
922 ({\
923 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
924 Type const pvTypeChecked = (pv); \
925 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
926 pvTypeCheckedRet; \
927 })
928#else
929# define ASMAtomicXchgPtrT(ppv, pv, Type) \
930 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
931#endif
932
933
934/**
935 * Atomically Exchange a raw-mode context pointer value, ordered.
936 *
937 * @returns Current *ppv value
938 * @param ppvRC Pointer to the pointer variable to update.
939 * @param pvRC The pointer value to assign to *ppv.
940 */
941DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
942{
943 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
944}
945
946
947/**
948 * Atomically Exchange a ring-0 pointer value, ordered.
949 *
950 * @returns Current *ppv value
951 * @param ppvR0 Pointer to the pointer variable to update.
952 * @param pvR0 The pointer value to assign to *ppv.
953 */
954DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
955{
956#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
957 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
958#elif R0_ARCH_BITS == 64
959 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
960#else
961# error "R0_ARCH_BITS is bogus"
962#endif
963}
964
965
966/**
967 * Atomically Exchange a ring-3 pointer value, ordered.
968 *
969 * @returns Current *ppv value
970 * @param ppvR3 Pointer to the pointer variable to update.
971 * @param pvR3 The pointer value to assign to *ppv.
972 */
973DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
974{
975#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
976 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
977#elif R3_ARCH_BITS == 64
978 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
979#else
980# error "R3_ARCH_BITS is bogus"
981#endif
982}
983
984
985/** @def ASMAtomicXchgHandle
986 * Atomically Exchange a typical IPRT handle value, ordered.
987 *
988 * @param ph Pointer to the value to update.
989 * @param hNew The new value to assigned to *pu.
990 * @param phRes Where to store the current *ph value.
991 *
992 * @remarks This doesn't currently work for all handles (like RTFILE).
993 */
994#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
995# define ASMAtomicXchgHandle(ph, hNew, phRes) \
996 do { \
997 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
998 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
999 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
1000 } while (0)
1001#elif HC_ARCH_BITS == 64
1002# define ASMAtomicXchgHandle(ph, hNew, phRes) \
1003 do { \
1004 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1005 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1006 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
1007 } while (0)
1008#else
1009# error HC_ARCH_BITS
1010#endif
1011
1012
1013/**
1014 * Atomically Exchange a value which size might differ
1015 * between platforms or compilers, ordered.
1016 *
1017 * @param pu Pointer to the variable to update.
1018 * @param uNew The value to assign to *pu.
1019 * @todo This is busted as its missing the result argument.
1020 */
1021#define ASMAtomicXchgSize(pu, uNew) \
1022 do { \
1023 switch (sizeof(*(pu))) { \
1024 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1025 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1026 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1027 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1028 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1029 } \
1030 } while (0)
1031
1032/**
1033 * Atomically Exchange a value which size might differ
1034 * between platforms or compilers, ordered.
1035 *
1036 * @param pu Pointer to the variable to update.
1037 * @param uNew The value to assign to *pu.
1038 * @param puRes Where to store the current *pu value.
1039 */
1040#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1041 do { \
1042 switch (sizeof(*(pu))) { \
1043 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1044 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1045 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1046 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1047 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1048 } \
1049 } while (0)
1050
1051
1052
1053/**
1054 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1055 *
1056 * @returns true if xchg was done.
1057 * @returns false if xchg wasn't done.
1058 *
1059 * @param pu8 Pointer to the value to update.
1060 * @param u8New The new value to assigned to *pu8.
1061 * @param u8Old The old value to *pu8 compare with.
1062 *
1063 * @remarks x86: Requires a 486 or later.
1064 * @todo Rename ASMAtomicCmpWriteU8
1065 */
1066#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1067RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1068#else
1069DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1070{
1071# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1072 uint8_t u8Ret;
1073 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1074 "setz %1\n\t"
1075 : "=m" (*pu8)
1076 , "=qm" (u8Ret)
1077 , "=a" (u8Old)
1078 : "q" (u8New)
1079 , "2" (u8Old)
1080 , "m" (*pu8)
1081 : "cc");
1082 return (bool)u8Ret;
1083
1084# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1085 union { uint32_t u; bool f; } fXchg;
1086 uint32_t u32Spill;
1087 uint32_t rcSpill;
1088 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1089 RTASM_ARM_DMB_SY
1090# if defined(RT_ARCH_ARM64)
1091 "ldaxrb %w[uOld], %[pMem]\n\t"
1092 "cmp %w[uOld], %w[uCmp]\n\t"
1093 "bne 1f\n\t" /* stop here if not equal */
1094 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1095 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1096 "mov %w[fXchg], #1\n\t"
1097 "1:\n\t"
1098 "clrex\n\t"
1099# else
1100 "ldrexb %[uOld], %[pMem]\n\t"
1101 "teq %[uOld], %[uCmp]\n\t"
1102 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1103 "bne 1f\n\t" /* stop here if not equal */
1104 "cmp %[rc], #0\n\t"
1105 "bne Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1106 "mov %[fXchg], #1\n\t"
1107 "1:\n\t"
1108 /** @todo clrexne on armv7? */
1109# endif
1110 : [pMem] "+Q" (*pu8)
1111 , [uOld] "=&r" (u32Spill)
1112 , [rc] "=&r" (rcSpill)
1113 , [fXchg] "=&r" (fXchg.u)
1114 : [uCmp] "r" ((uint32_t)u8Old)
1115 , [uNew] "r" ((uint32_t)u8New)
1116 , "[fXchg]" (0)
1117 RTASM_ARM_DMB_SY_COMMA_IN_REG
1118 : "cc");
1119 return fXchg.f;
1120
1121# else
1122# error "Port me"
1123# endif
1124}
1125#endif
1126
1127
1128/**
1129 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1130 *
1131 * @returns true if xchg was done.
1132 * @returns false if xchg wasn't done.
1133 *
1134 * @param pi8 Pointer to the value to update.
1135 * @param i8New The new value to assigned to *pi8.
1136 * @param i8Old The old value to *pi8 compare with.
1137 *
1138 * @remarks x86: Requires a 486 or later.
1139 * @todo Rename ASMAtomicCmpWriteS8
1140 */
1141DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1142{
1143 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1144}
1145
1146
1147/**
1148 * Atomically Compare and Exchange a bool value, ordered.
1149 *
1150 * @returns true if xchg was done.
1151 * @returns false if xchg wasn't done.
1152 *
1153 * @param pf Pointer to the value to update.
1154 * @param fNew The new value to assigned to *pf.
1155 * @param fOld The old value to *pf compare with.
1156 *
1157 * @remarks x86: Requires a 486 or later.
1158 * @todo Rename ASMAtomicCmpWriteBool
1159 */
1160DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1161{
1162 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1163}
1164
1165
1166/**
1167 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1168 *
1169 * @returns true if xchg was done.
1170 * @returns false if xchg wasn't done.
1171 *
1172 * @param pu32 Pointer to the value to update.
1173 * @param u32New The new value to assigned to *pu32.
1174 * @param u32Old The old value to *pu32 compare with.
1175 *
1176 * @remarks x86: Requires a 486 or later.
1177 * @todo Rename ASMAtomicCmpWriteU32
1178 */
1179#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1180RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1181#else
1182DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1183{
1184# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1185# if RT_INLINE_ASM_GNU_STYLE
1186 uint8_t u8Ret;
1187 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1188 "setz %1\n\t"
1189 : "=m" (*pu32)
1190 , "=qm" (u8Ret)
1191 , "=a" (u32Old)
1192 : "r" (u32New)
1193 , "2" (u32Old)
1194 , "m" (*pu32)
1195 : "cc");
1196 return (bool)u8Ret;
1197
1198# elif RT_INLINE_ASM_USES_INTRIN
1199 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1200
1201# else
1202 uint32_t u32Ret;
1203 __asm
1204 {
1205# ifdef RT_ARCH_AMD64
1206 mov rdx, [pu32]
1207# else
1208 mov edx, [pu32]
1209# endif
1210 mov eax, [u32Old]
1211 mov ecx, [u32New]
1212# ifdef RT_ARCH_AMD64
1213 lock cmpxchg [rdx], ecx
1214# else
1215 lock cmpxchg [edx], ecx
1216# endif
1217 setz al
1218 movzx eax, al
1219 mov [u32Ret], eax
1220 }
1221 return !!u32Ret;
1222# endif
1223
1224# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1225 union { uint32_t u; bool f; } fXchg;
1226 uint32_t u32Spill;
1227 uint32_t rcSpill;
1228 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1229 RTASM_ARM_DMB_SY
1230# if defined(RT_ARCH_ARM64)
1231 "ldaxr %w[uOld], %[pMem]\n\t"
1232 "cmp %w[uOld], %w[uCmp]\n\t"
1233 "bne 1f\n\t" /* stop here if not equal */
1234 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1235 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1236 "mov %w[fXchg], #1\n\t"
1237 "1:\n\t"
1238 "clrex\n\t"
1239# else
1240 "ldrex %[uOld], %[pMem]\n\t"
1241 "teq %[uOld], %[uCmp]\n\t"
1242 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1243 "bne 1f\n\t" /* stop here if not equal */
1244 "cmp %[rc], #0\n\t"
1245 "bne Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1246 "mov %[fXchg], #1\n\t"
1247 "1:\n\t"
1248 /** @todo clrexne on armv7? */
1249# endif
1250 : [pMem] "+Q" (*pu32)
1251 , [uOld] "=&r" (u32Spill)
1252 , [rc] "=&r" (rcSpill)
1253 , [fXchg] "=&r" (fXchg.u)
1254 : [uCmp] "r" (u32Old)
1255 , [uNew] "r" (u32New)
1256 , "[fXchg]" (0)
1257 RTASM_ARM_DMB_SY_COMMA_IN_REG
1258 : "cc");
1259 return fXchg.f;
1260
1261# else
1262# error "Port me"
1263# endif
1264}
1265#endif
1266
1267
1268/**
1269 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1270 *
1271 * @returns true if xchg was done.
1272 * @returns false if xchg wasn't done.
1273 *
1274 * @param pi32 Pointer to the value to update.
1275 * @param i32New The new value to assigned to *pi32.
1276 * @param i32Old The old value to *pi32 compare with.
1277 *
1278 * @remarks x86: Requires a 486 or later.
1279 * @todo Rename ASMAtomicCmpWriteS32
1280 */
1281DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1282{
1283 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1284}
1285
1286
1287/**
1288 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1289 *
1290 * @returns true if xchg was done.
1291 * @returns false if xchg wasn't done.
1292 *
1293 * @param pu64 Pointer to the 64-bit variable to update.
1294 * @param u64New The 64-bit value to assign to *pu64.
1295 * @param u64Old The value to compare with.
1296 *
1297 * @remarks x86: Requires a Pentium or later.
1298 * @todo Rename ASMAtomicCmpWriteU64
1299 */
1300#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1301 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1302RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1303#else
1304DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1305{
1306# if RT_INLINE_ASM_USES_INTRIN
1307 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1308
1309# elif defined(RT_ARCH_AMD64)
1310# if RT_INLINE_ASM_GNU_STYLE
1311 uint8_t u8Ret;
1312 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1313 "setz %1\n\t"
1314 : "=m" (*pu64)
1315 , "=qm" (u8Ret)
1316 , "=a" (u64Old)
1317 : "r" (u64New)
1318 , "2" (u64Old)
1319 , "m" (*pu64)
1320 : "cc");
1321 return (bool)u8Ret;
1322# else
1323 bool fRet;
1324 __asm
1325 {
1326 mov rdx, [pu32]
1327 mov rax, [u64Old]
1328 mov rcx, [u64New]
1329 lock cmpxchg [rdx], rcx
1330 setz al
1331 mov [fRet], al
1332 }
1333 return fRet;
1334# endif
1335
1336# elif defined(RT_ARCH_X86)
1337 uint32_t u32Ret;
1338# if RT_INLINE_ASM_GNU_STYLE
1339# if defined(PIC) || defined(__PIC__)
1340 uint32_t u32EBX = (uint32_t)u64New;
1341 uint32_t u32Spill;
1342 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1343 "lock; cmpxchg8b (%6)\n\t"
1344 "setz %%al\n\t"
1345 "movl %4, %%ebx\n\t"
1346 "movzbl %%al, %%eax\n\t"
1347 : "=a" (u32Ret)
1348 , "=d" (u32Spill)
1349# if RT_GNUC_PREREQ(4, 3)
1350 , "+m" (*pu64)
1351# else
1352 , "=m" (*pu64)
1353# endif
1354 : "A" (u64Old)
1355 , "m" ( u32EBX )
1356 , "c" ( (uint32_t)(u64New >> 32) )
1357 , "S" (pu64)
1358 : "cc");
1359# else /* !PIC */
1360 uint32_t u32Spill;
1361 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1362 "setz %%al\n\t"
1363 "movzbl %%al, %%eax\n\t"
1364 : "=a" (u32Ret)
1365 , "=d" (u32Spill)
1366 , "+m" (*pu64)
1367 : "A" (u64Old)
1368 , "b" ( (uint32_t)u64New )
1369 , "c" ( (uint32_t)(u64New >> 32) )
1370 : "cc");
1371# endif
1372 return (bool)u32Ret;
1373# else
1374 __asm
1375 {
1376 mov ebx, dword ptr [u64New]
1377 mov ecx, dword ptr [u64New + 4]
1378 mov edi, [pu64]
1379 mov eax, dword ptr [u64Old]
1380 mov edx, dword ptr [u64Old + 4]
1381 lock cmpxchg8b [edi]
1382 setz al
1383 movzx eax, al
1384 mov dword ptr [u32Ret], eax
1385 }
1386 return !!u32Ret;
1387# endif
1388
1389# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1390 union { uint32_t u; bool f; } fXchg;
1391 uint64_t u64Spill;
1392 uint32_t rcSpill;
1393 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1394 RTASM_ARM_DMB_SY
1395# if defined(RT_ARCH_ARM64)
1396 "ldaxr %[uOld], %[pMem]\n\t"
1397 "cmp %[uOld], %[uCmp]\n\t"
1398 "bne 1f\n\t" /* stop here if not equal */
1399 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1400 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1401 "mov %w[fXchg], #1\n\t"
1402 "1:\n\t"
1403 "clrex\n\t"
1404# else
1405 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1406 "teq %[uOld], %[uCmp]\n\t"
1407 "teqeq %H[uOld], %H[uCmp]\n\t"
1408 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1409 "bne 1f\n\t" /* stop here if not equal */
1410 "cmp %[rc], #0\n\t"
1411 "bne Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1412 "mov %[fXchg], #1\n\t"
1413 "1:\n\t"
1414 /** @todo clrexne on armv7? */
1415# endif
1416 : [pMem] "+Q" (*pu64)
1417 , [uOld] "=&r" (u64Spill)
1418 , [rc] "=&r" (rcSpill)
1419 , [fXchg] "=&r" (fXchg.u)
1420 : [uCmp] "r" (u64Old)
1421 , [uNew] "r" (u64New)
1422 , "[fXchg]" (0)
1423 RTASM_ARM_DMB_SY_COMMA_IN_REG
1424 : "cc");
1425 return fXchg.f;
1426
1427# else
1428# error "Port me"
1429# endif
1430}
1431#endif
1432
1433
1434/**
1435 * Atomically Compare and exchange a signed 64-bit value, ordered.
1436 *
1437 * @returns true if xchg was done.
1438 * @returns false if xchg wasn't done.
1439 *
1440 * @param pi64 Pointer to the 64-bit variable to update.
1441 * @param i64 The 64-bit value to assign to *pu64.
1442 * @param i64Old The value to compare with.
1443 *
1444 * @remarks x86: Requires a Pentium or later.
1445 * @todo Rename ASMAtomicCmpWriteS64
1446 */
1447DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1448{
1449 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1450}
1451
1452#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
1453
1454/** @def RTASM_HAVE_CMP_WRITE_U128
1455 * Indicates that we've got ASMAtomicCmpWriteU128(), ASMAtomicCmpWriteU128v2()
1456 * and ASMAtomicCmpWriteExU128() available. */
1457# define RTASM_HAVE_CMP_WRITE_U128 1
1458
1459
1460/**
1461 * Atomically compare and write an unsigned 128-bit value, ordered.
1462 *
1463 * @returns true if write was done.
1464 * @returns false if write wasn't done.
1465 *
1466 * @param pu128 Pointer to the 128-bit variable to update.
1467 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
1468 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
1469 * @param u64OldHi The high 64-bit of the value to compare with.
1470 * @param u64OldLo The low 64-bit of the value to compare with.
1471 *
1472 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1473 */
1474# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
1475DECLASM(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1476 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_PROTO;
1477# else
1478DECLINLINE(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1479 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_DEF
1480{
1481# if RT_INLINE_ASM_USES_INTRIN
1482 __int64 ai64Cmp[2];
1483 ai64Cmp[0] = u64OldLo;
1484 ai64Cmp[1] = u64OldHi;
1485 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, ai64Cmp) != 0;
1486
1487# elif (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1488 return __sync_bool_compare_and_swap(pu128, ((uint128_t)u64OldHi << 64) | u64OldLo, ((uint128_t)u64NewHi << 64) | u64NewLo);
1489
1490# elif defined(RT_ARCH_AMD64)
1491# if RT_INLINE_ASM_GNU_STYLE
1492 uint64_t u64Ret;
1493 uint64_t u64Spill;
1494 __asm__ __volatile__("lock; cmpxchg16b %2\n\t"
1495 "setz %%al\n\t"
1496 "movzbl %%al, %%eax\n\t"
1497 : "=a" (u64Ret)
1498 , "=d" (u64Spill)
1499 , "+m" (*pu128)
1500 : "a" (u64OldLo)
1501 , "d" (u64OldHi)
1502 , "b" (u64NewLo)
1503 , "c" (u64NewHi)
1504 : "cc");
1505
1506 return (bool)u64Ret;
1507# else
1508# error "Port me"
1509# endif
1510# else
1511# error "Port me"
1512# endif
1513}
1514# endif
1515
1516
1517/**
1518 * Atomically compare and write an unsigned 128-bit value, ordered.
1519 *
1520 * @returns true if write was done.
1521 * @returns false if write wasn't done.
1522 *
1523 * @param pu128 Pointer to the 128-bit variable to update.
1524 * @param u128New The 128-bit value to assign to *pu128.
1525 * @param u128Old The value to compare with.
1526 *
1527 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1528 */
1529DECLINLINE(bool) ASMAtomicCmpWriteU128(volatile uint128_t *pu128, const uint128_t u128New, const uint128_t u128Old) RT_NOTHROW_DEF
1530{
1531# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
1532# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1533 return __sync_bool_compare_and_swap(pu128, u128Old, u128New);
1534# else
1535 return ASMAtomicCmpWriteU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
1536 (uint64_t)(u128Old >> 64), (uint64_t)u128Old);
1537# endif
1538# else
1539 return ASMAtomicCmpWriteU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo);
1540# endif
1541}
1542
1543
1544/**
1545 * RTUINT128U wrapper for ASMAtomicCmpWriteU128.
1546 */
1547DECLINLINE(bool) ASMAtomicCmpWriteU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
1548 const RTUINT128U u128Old) RT_NOTHROW_DEF
1549{
1550# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1551 return ASMAtomicCmpWriteU128(&pu128->u, u128New.u, u128Old.u);
1552# else
1553 return ASMAtomicCmpWriteU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo);
1554# endif
1555}
1556
1557#endif /* RT_ARCH_AMD64 || RT_ARCH_ARM64 */
1558
1559/**
1560 * Atomically Compare and Exchange a pointer value, ordered.
1561 *
1562 * @returns true if xchg was done.
1563 * @returns false if xchg wasn't done.
1564 *
1565 * @param ppv Pointer to the value to update.
1566 * @param pvNew The new value to assigned to *ppv.
1567 * @param pvOld The old value to *ppv compare with.
1568 *
1569 * @remarks x86: Requires a 486 or later.
1570 * @todo Rename ASMAtomicCmpWritePtrVoid
1571 */
1572DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1573{
1574#if ARCH_BITS == 32 || ARCH_BITS == 16
1575 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1576#elif ARCH_BITS == 64
1577 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1578#else
1579# error "ARCH_BITS is bogus"
1580#endif
1581}
1582
1583
1584/**
1585 * Atomically Compare and Exchange a pointer value, ordered.
1586 *
1587 * @returns true if xchg was done.
1588 * @returns false if xchg wasn't done.
1589 *
1590 * @param ppv Pointer to the value to update.
1591 * @param pvNew The new value to assigned to *ppv.
1592 * @param pvOld The old value to *ppv compare with.
1593 *
1594 * @remarks This is relatively type safe on GCC platforms.
1595 * @remarks x86: Requires a 486 or later.
1596 * @todo Rename ASMAtomicCmpWritePtr
1597 */
1598#ifdef __GNUC__
1599# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1600 __extension__ \
1601 ({\
1602 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1603 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1604 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1605 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1606 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1607 fMacroRet; \
1608 })
1609#else
1610# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1611 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1612#endif
1613
1614
1615/** @def ASMAtomicCmpXchgHandle
1616 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1617 *
1618 * @param ph Pointer to the value to update.
1619 * @param hNew The new value to assigned to *pu.
1620 * @param hOld The old value to *pu compare with.
1621 * @param fRc Where to store the result.
1622 *
1623 * @remarks This doesn't currently work for all handles (like RTFILE).
1624 * @remarks x86: Requires a 486 or later.
1625 * @todo Rename ASMAtomicCmpWriteHandle
1626 */
1627#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1628# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1629 do { \
1630 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1631 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1632 } while (0)
1633#elif HC_ARCH_BITS == 64
1634# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1635 do { \
1636 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1637 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1638 } while (0)
1639#else
1640# error HC_ARCH_BITS
1641#endif
1642
1643
1644/** @def ASMAtomicCmpXchgSize
1645 * Atomically Compare and Exchange a value which size might differ
1646 * between platforms or compilers, ordered.
1647 *
1648 * @param pu Pointer to the value to update.
1649 * @param uNew The new value to assigned to *pu.
1650 * @param uOld The old value to *pu compare with.
1651 * @param fRc Where to store the result.
1652 *
1653 * @remarks x86: Requires a 486 or later.
1654 * @todo Rename ASMAtomicCmpWriteSize
1655 */
1656#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1657 do { \
1658 switch (sizeof(*(pu))) { \
1659 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1660 break; \
1661 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1662 break; \
1663 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1664 (fRc) = false; \
1665 break; \
1666 } \
1667 } while (0)
1668
1669
1670/**
1671 * Atomically Compare and Exchange an unsigned 8-bit value, additionally passes
1672 * back old value, ordered.
1673 *
1674 * @returns true if xchg was done.
1675 * @returns false if xchg wasn't done.
1676 *
1677 * @param pu8 Pointer to the value to update.
1678 * @param u8New The new value to assigned to *pu32.
1679 * @param u8Old The old value to *pu8 compare with.
1680 * @param pu8Old Pointer store the old value at.
1681 *
1682 * @remarks x86: Requires a 486 or later.
1683 */
1684#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1685RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_PROTO;
1686#else
1687DECLINLINE(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_DEF
1688{
1689# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1690# if RT_INLINE_ASM_GNU_STYLE
1691 uint8_t u8Ret;
1692 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1693 "setz %1\n\t"
1694 : "=m" (*pu8)
1695 , "=qm" (u8Ret)
1696 , "=a" (*pu8Old)
1697# if defined(RT_ARCH_X86)
1698 : "q" (u8New)
1699# else
1700 : "r" (u8New)
1701# endif
1702 , "a" (u8Old)
1703 , "m" (*pu8)
1704 : "cc");
1705 return (bool)u8Ret;
1706
1707# elif RT_INLINE_ASM_USES_INTRIN
1708 return (*pu8Old = _InterlockedCompareExchange8((char RT_FAR *)pu8, u8New, u8Old)) == u8Old;
1709
1710# else
1711 uint8_t u8Ret;
1712 __asm
1713 {
1714# ifdef RT_ARCH_AMD64
1715 mov rdx, [pu8]
1716# else
1717 mov edx, [pu8]
1718# endif
1719 mov eax, [u8Old]
1720 mov ecx, [u8New]
1721# ifdef RT_ARCH_AMD64
1722 lock cmpxchg [rdx], ecx
1723 mov rdx, [pu8Old]
1724 mov [rdx], eax
1725# else
1726 lock cmpxchg [edx], ecx
1727 mov edx, [pu8Old]
1728 mov [edx], eax
1729# endif
1730 setz al
1731 movzx eax, al
1732 mov [u8Ret], eax
1733 }
1734 return !!u8Ret;
1735# endif
1736
1737# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1738 union { uint8_t u; bool f; } fXchg;
1739 uint8_t u8ActualOld;
1740 uint8_t rcSpill;
1741 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgExU8_%=:\n\t"
1742 RTASM_ARM_DMB_SY
1743# if defined(RT_ARCH_ARM64)
1744 "ldaxrb %w[uOld], %[pMem]\n\t"
1745 "cmp %w[uOld], %w[uCmp]\n\t"
1746 "bne 1f\n\t" /* stop here if not equal */
1747 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1748 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1749 "mov %w[fXchg], #1\n\t"
1750 "1:\n\t"
1751 "clrex\n\t"
1752# else
1753 "ldrexb %[uOld], %[pMem]\n\t"
1754 "teq %[uOld], %[uCmp]\n\t"
1755 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1756 "bne 1f\n\t" /* stop here if not equal */
1757 "cmp %[rc], #0\n\t"
1758 "bne Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1759 "mov %[fXchg], #1\n\t"
1760 "1:\n\t"
1761 /** @todo clrexne on armv7? */
1762# endif
1763 : [pMem] "+Q" (*pu8)
1764 , [uOld] "=&r" (u8ActualOld)
1765 , [rc] "=&r" (rcSpill)
1766 , [fXchg] "=&r" (fXchg.u)
1767 : [uCmp] "r" (u8Old)
1768 , [uNew] "r" (u8New)
1769 , "[fXchg]" (0)
1770 RTASM_ARM_DMB_SY_COMMA_IN_REG
1771 : "cc");
1772 *pu8Old = u8ActualOld;
1773 return fXchg.f;
1774
1775# else
1776# error "Port me"
1777# endif
1778}
1779#endif
1780
1781
1782/**
1783 * Atomically Compare and Exchange a signed 8-bit value, additionally
1784 * passes back old value, ordered.
1785 *
1786 * @returns true if xchg was done.
1787 * @returns false if xchg wasn't done.
1788 *
1789 * @param pi8 Pointer to the value to update.
1790 * @param i8New The new value to assigned to *pi8.
1791 * @param i8Old The old value to *pi8 compare with.
1792 * @param pi8Old Pointer store the old value at.
1793 *
1794 * @remarks x86: Requires a 486 or later.
1795 */
1796DECLINLINE(bool) ASMAtomicCmpXchgExS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old, int8_t RT_FAR *pi8Old) RT_NOTHROW_DEF
1797{
1798 return ASMAtomicCmpXchgExU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old, (uint8_t RT_FAR *)pi8Old);
1799}
1800
1801
1802/**
1803 * Atomically Compare and Exchange an unsigned 16-bit value, additionally passes
1804 * back old value, ordered.
1805 *
1806 * @returns true if xchg was done.
1807 * @returns false if xchg wasn't done.
1808 *
1809 * @param pu16 Pointer to the value to update.
1810 * @param u16New The new value to assigned to *pu16.
1811 * @param u16Old The old value to *pu32 compare with.
1812 * @param pu16Old Pointer store the old value at.
1813 *
1814 * @remarks x86: Requires a 486 or later.
1815 */
1816#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1817RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_PROTO;
1818#else
1819DECLINLINE(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_DEF
1820{
1821# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1822# if RT_INLINE_ASM_GNU_STYLE
1823 uint8_t u8Ret;
1824 __asm__ __volatile__("lock; cmpxchgw %3, %0\n\t"
1825 "setz %1\n\t"
1826 : "=m" (*pu16)
1827 , "=qm" (u8Ret)
1828 , "=a" (*pu16Old)
1829 : "r" (u16New)
1830 , "a" (u16Old)
1831 , "m" (*pu16)
1832 : "cc");
1833 return (bool)u8Ret;
1834
1835# elif RT_INLINE_ASM_USES_INTRIN
1836 return (*pu16Old = _InterlockedCompareExchange16((short RT_FAR *)pu16, u16New, u16Old)) == u16Old;
1837
1838# else
1839 uint16_t u16Ret;
1840 __asm
1841 {
1842# ifdef RT_ARCH_AMD64
1843 mov rdx, [pu16]
1844# else
1845 mov edx, [pu16]
1846# endif
1847 mov eax, [u16Old]
1848 mov ecx, [u16New]
1849# ifdef RT_ARCH_AMD64
1850 lock cmpxchg [rdx], ecx
1851 mov rdx, [pu16Old]
1852 mov [rdx], eax
1853# else
1854 lock cmpxchg [edx], ecx
1855 mov edx, [pu16Old]
1856 mov [edx], eax
1857# endif
1858 setz al
1859 movzx eax, al
1860 mov [u16Ret], eax
1861 }
1862 return !!u16Ret;
1863# endif
1864
1865# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1866 union { uint16_t u; bool f; } fXchg;
1867 uint16_t u16ActualOld;
1868 uint16_t rcSpill;
1869 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgExU16_%=:\n\t"
1870 RTASM_ARM_DMB_SY
1871# if defined(RT_ARCH_ARM64)
1872 "ldaxrh %w[uOld], %[pMem]\n\t"
1873 "cmp %w[uOld], %w[uCmp]\n\t"
1874 "bne 1f\n\t" /* stop here if not equal */
1875 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
1876 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1877 "mov %w[fXchg], #1\n\t"
1878 "1:\n\t"
1879 "clrex\n\t"
1880# else
1881 "ldrexh %[uOld], %[pMem]\n\t"
1882 "teq %[uOld], %[uCmp]\n\t"
1883 "strexheq %[rc], %[uNew], %[pMem]\n\t"
1884 "bne 1f\n\t" /* stop here if not equal */
1885 "cmp %[rc], #0\n\t"
1886 "bne Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1887 "mov %[fXchg], #1\n\t"
1888 "1:\n\t"
1889 /** @todo clrexne on armv7? */
1890# endif
1891 : [pMem] "+Q" (*pu16)
1892 , [uOld] "=&r" (u16ActualOld)
1893 , [rc] "=&r" (rcSpill)
1894 , [fXchg] "=&r" (fXchg.u)
1895 : [uCmp] "r" (u16Old)
1896 , [uNew] "r" (u16New)
1897 , "[fXchg]" (0)
1898 RTASM_ARM_DMB_SY_COMMA_IN_REG
1899 : "cc");
1900 *pu16Old = u16ActualOld;
1901 return fXchg.f;
1902
1903# else
1904# error "Port me"
1905# endif
1906}
1907#endif
1908
1909
1910/**
1911 * Atomically Compare and Exchange a signed 16-bit value, additionally
1912 * passes back old value, ordered.
1913 *
1914 * @returns true if xchg was done.
1915 * @returns false if xchg wasn't done.
1916 *
1917 * @param pi16 Pointer to the value to update.
1918 * @param i16New The new value to assigned to *pi16.
1919 * @param i16Old The old value to *pi16 compare with.
1920 * @param pi16Old Pointer store the old value at.
1921 *
1922 * @remarks x86: Requires a 486 or later.
1923 */
1924DECLINLINE(bool) ASMAtomicCmpXchgExS16(volatile int16_t RT_FAR *pi16, const int16_t i16New, const int16_t i16Old, int16_t RT_FAR *pi16Old) RT_NOTHROW_DEF
1925{
1926 return ASMAtomicCmpXchgExU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16New, (uint16_t)i16Old, (uint16_t RT_FAR *)pi16Old);
1927}
1928
1929
1930/**
1931 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1932 * passes back old value, ordered.
1933 *
1934 * @returns true if xchg was done.
1935 * @returns false if xchg wasn't done.
1936 *
1937 * @param pu32 Pointer to the value to update.
1938 * @param u32New The new value to assigned to *pu32.
1939 * @param u32Old The old value to *pu32 compare with.
1940 * @param pu32Old Pointer store the old value at.
1941 *
1942 * @remarks x86: Requires a 486 or later.
1943 */
1944#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1945RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1946#else
1947DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1948{
1949# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1950# if RT_INLINE_ASM_GNU_STYLE
1951 uint8_t u8Ret;
1952 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1953 "setz %1\n\t"
1954 : "=m" (*pu32)
1955 , "=qm" (u8Ret)
1956 , "=a" (*pu32Old)
1957 : "r" (u32New)
1958 , "a" (u32Old)
1959 , "m" (*pu32)
1960 : "cc");
1961 return (bool)u8Ret;
1962
1963# elif RT_INLINE_ASM_USES_INTRIN
1964 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1965
1966# else
1967 uint32_t u32Ret;
1968 __asm
1969 {
1970# ifdef RT_ARCH_AMD64
1971 mov rdx, [pu32]
1972# else
1973 mov edx, [pu32]
1974# endif
1975 mov eax, [u32Old]
1976 mov ecx, [u32New]
1977# ifdef RT_ARCH_AMD64
1978 lock cmpxchg [rdx], ecx
1979 mov rdx, [pu32Old]
1980 mov [rdx], eax
1981# else
1982 lock cmpxchg [edx], ecx
1983 mov edx, [pu32Old]
1984 mov [edx], eax
1985# endif
1986 setz al
1987 movzx eax, al
1988 mov [u32Ret], eax
1989 }
1990 return !!u32Ret;
1991# endif
1992
1993# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1994 union { uint32_t u; bool f; } fXchg;
1995 uint32_t u32ActualOld;
1996 uint32_t rcSpill;
1997 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1998 RTASM_ARM_DMB_SY
1999# if defined(RT_ARCH_ARM64)
2000 "ldaxr %w[uOld], %[pMem]\n\t"
2001 "cmp %w[uOld], %w[uCmp]\n\t"
2002 "bne 1f\n\t" /* stop here if not equal */
2003 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
2004 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
2005 "mov %w[fXchg], #1\n\t"
2006 "1:\n\t"
2007 "clrex\n\t"
2008# else
2009 "ldrex %[uOld], %[pMem]\n\t"
2010 "teq %[uOld], %[uCmp]\n\t"
2011 "strexeq %[rc], %[uNew], %[pMem]\n\t"
2012 "bne 1f\n\t" /* stop here if not equal */
2013 "cmp %[rc], #0\n\t"
2014 "bne Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
2015 "mov %[fXchg], #1\n\t"
2016 "1:\n\t"
2017 /** @todo clrexne on armv7? */
2018# endif
2019 : [pMem] "+Q" (*pu32)
2020 , [uOld] "=&r" (u32ActualOld)
2021 , [rc] "=&r" (rcSpill)
2022 , [fXchg] "=&r" (fXchg.u)
2023 : [uCmp] "r" (u32Old)
2024 , [uNew] "r" (u32New)
2025 , "[fXchg]" (0)
2026 RTASM_ARM_DMB_SY_COMMA_IN_REG
2027 : "cc");
2028 *pu32Old = u32ActualOld;
2029 return fXchg.f;
2030
2031# else
2032# error "Port me"
2033# endif
2034}
2035#endif
2036
2037
2038/**
2039 * Atomically Compare and Exchange a signed 32-bit value, additionally
2040 * passes back old value, ordered.
2041 *
2042 * @returns true if xchg was done.
2043 * @returns false if xchg wasn't done.
2044 *
2045 * @param pi32 Pointer to the value to update.
2046 * @param i32New The new value to assigned to *pi32.
2047 * @param i32Old The old value to *pi32 compare with.
2048 * @param pi32Old Pointer store the old value at.
2049 *
2050 * @remarks x86: Requires a 486 or later.
2051 */
2052DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
2053{
2054 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
2055}
2056
2057
2058/**
2059 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2060 * passing back old value, ordered.
2061 *
2062 * @returns true if xchg was done.
2063 * @returns false if xchg wasn't done.
2064 *
2065 * @param pu64 Pointer to the 64-bit variable to update.
2066 * @param u64New The 64-bit value to assign to *pu64.
2067 * @param u64Old The value to compare with.
2068 * @param pu64Old Pointer store the old value at.
2069 *
2070 * @remarks x86: Requires a Pentium or later.
2071 */
2072#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2073 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2074RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
2075#else
2076DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
2077{
2078# if RT_INLINE_ASM_USES_INTRIN
2079 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
2080
2081# elif defined(RT_ARCH_AMD64)
2082# if RT_INLINE_ASM_GNU_STYLE
2083 uint8_t u8Ret;
2084 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2085 "setz %1\n\t"
2086 : "=m" (*pu64)
2087 , "=qm" (u8Ret)
2088 , "=a" (*pu64Old)
2089 : "r" (u64New)
2090 , "a" (u64Old)
2091 , "m" (*pu64)
2092 : "cc");
2093 return (bool)u8Ret;
2094# else
2095 bool fRet;
2096 __asm
2097 {
2098 mov rdx, [pu32]
2099 mov rax, [u64Old]
2100 mov rcx, [u64New]
2101 lock cmpxchg [rdx], rcx
2102 mov rdx, [pu64Old]
2103 mov [rdx], rax
2104 setz al
2105 mov [fRet], al
2106 }
2107 return fRet;
2108# endif
2109
2110# elif defined(RT_ARCH_X86)
2111# if RT_INLINE_ASM_GNU_STYLE
2112 uint64_t u64Ret;
2113# if defined(PIC) || defined(__PIC__)
2114 /* Note #1: This code uses a memory clobber description, because the clean
2115 solution with an output value for *pu64 makes gcc run out of
2116 registers. This will cause suboptimal code, and anyone with a
2117 better solution is welcome to improve this.
2118
2119 Note #2: We must prevent gcc from encoding the memory access, as it
2120 may go via the GOT if we're working on a global variable (like
2121 in the testcase). Thus we request a register (%3) and
2122 dereference it ourselves. */
2123 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2124 "lock; cmpxchg8b (%3)\n\t"
2125 "xchgl %%ebx, %1\n\t"
2126 : "=A" (u64Ret)
2127 : "DS" ((uint32_t)u64New)
2128 , "c" ((uint32_t)(u64New >> 32))
2129 , "r" (pu64) /* Do not use "m" here*/
2130 , "0" (u64Old)
2131 : "memory"
2132 , "cc" );
2133# else /* !PIC */
2134 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2135 : "=A" (u64Ret)
2136 , "=m" (*pu64)
2137 : "b" ((uint32_t)u64New)
2138 , "c" ((uint32_t)(u64New >> 32))
2139 , "m" (*pu64)
2140 , "0" (u64Old)
2141 : "cc");
2142# endif
2143 *pu64Old = u64Ret;
2144 return u64Ret == u64Old;
2145# else
2146 uint32_t u32Ret;
2147 __asm
2148 {
2149 mov ebx, dword ptr [u64New]
2150 mov ecx, dword ptr [u64New + 4]
2151 mov edi, [pu64]
2152 mov eax, dword ptr [u64Old]
2153 mov edx, dword ptr [u64Old + 4]
2154 lock cmpxchg8b [edi]
2155 mov ebx, [pu64Old]
2156 mov [ebx], eax
2157 setz al
2158 movzx eax, al
2159 add ebx, 4
2160 mov [ebx], edx
2161 mov dword ptr [u32Ret], eax
2162 }
2163 return !!u32Ret;
2164# endif
2165
2166# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2167 union { uint32_t u; bool f; } fXchg;
2168 uint64_t u64ActualOld;
2169 uint32_t rcSpill;
2170 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
2171 RTASM_ARM_DMB_SY
2172# if defined(RT_ARCH_ARM64)
2173 "ldaxr %[uOld], %[pMem]\n\t"
2174 "cmp %[uOld], %[uCmp]\n\t"
2175 "bne 1f\n\t" /* stop here if not equal */
2176 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
2177 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2178 "mov %w[fXchg], #1\n\t"
2179 "1:\n\t"
2180 "clrex\n\t"
2181# else
2182 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
2183 "teq %[uOld], %[uCmp]\n\t"
2184 "teqeq %H[uOld], %H[uCmp]\n\t"
2185 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
2186 "bne 1f\n\t" /* stop here if not equal */
2187 "cmp %[rc], #0\n\t"
2188 "bne Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2189 "mov %[fXchg], #1\n\t"
2190 "1:\n\t"
2191 /** @todo clrexne on armv7? */
2192# endif
2193 : [pMem] "+Q" (*pu64)
2194 , [uOld] "=&r" (u64ActualOld)
2195 , [rc] "=&r" (rcSpill)
2196 , [fXchg] "=&r" (fXchg.u)
2197 : [uCmp] "r" (u64Old)
2198 , [uNew] "r" (u64New)
2199 , "[fXchg]" (0)
2200 RTASM_ARM_DMB_SY_COMMA_IN_REG
2201 : "cc");
2202 *pu64Old = u64ActualOld;
2203 return fXchg.f;
2204
2205# else
2206# error "Port me"
2207# endif
2208}
2209#endif
2210
2211
2212/**
2213 * Atomically Compare and exchange a signed 64-bit value, additionally
2214 * passing back old value, ordered.
2215 *
2216 * @returns true if xchg was done.
2217 * @returns false if xchg wasn't done.
2218 *
2219 * @param pi64 Pointer to the 64-bit variable to update.
2220 * @param i64 The 64-bit value to assign to *pu64.
2221 * @param i64Old The value to compare with.
2222 * @param pi64Old Pointer store the old value at.
2223 *
2224 * @remarks x86: Requires a Pentium or later.
2225 */
2226DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
2227{
2228 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
2229}
2230
2231#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
2232
2233/** @def RTASM_HAVE_CMP_XCHG_U128
2234 * Indicates that we've got ASMAtomicCmpSwapU128(), ASMAtomicCmpSwapU128v2()
2235 * and ASMAtomicCmpSwapExU128() available. */
2236# define RTASM_HAVE_CMP_XCHG_U128 1
2237
2238
2239/**
2240 * Atomically compare and exchange an unsigned 128-bit value, ordered.
2241 *
2242 * @returns true if exchange was done.
2243 * @returns false if exchange wasn't done.
2244 *
2245 * @param pu128 Pointer to the 128-bit variable to update.
2246 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
2247 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
2248 * @param u64OldHi The high 64-bit of the value to compare with.
2249 * @param u64OldLo The low 64-bit of the value to compare with.
2250 * @param pu128Old Where to return the old value.
2251 *
2252 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
2253 */
2254# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
2255DECLASM(bool) ASMAtomicCmpXchgU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
2256 const uint64_t u64OldHi, const uint64_t u64OldLo, uint128_t *pu128Old) RT_NOTHROW_PROTO;
2257# else
2258DECLINLINE(bool) ASMAtomicCmpXchgU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
2259 const uint64_t u64OldHi, const uint64_t u64OldLo, uint128_t *pu128Old) RT_NOTHROW_DEF
2260{
2261# if RT_INLINE_ASM_USES_INTRIN
2262 pu128Old->Hi = u64OldHi;
2263 pu128Old->Lo = u64OldLo;
2264 AssertCompileMemberOffset(uint128_t, Lo, 0);
2265 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, (__int64 *)&pu128Old->Lo) != 0;
2266
2267# elif (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2268 uint128_t const uCmp = ((uint128_t)u64OldHi << 64) | u64OldLo;
2269 uint128_t const uOld = __sync_val_compare_and_swap(pu128, uCmp, ((uint128_t)u64NewHi << 64) | u64NewLo);
2270 *pu128Old = uOld;
2271 return uCmp == uOld;
2272
2273# elif defined(RT_ARCH_AMD64)
2274# if RT_INLINE_ASM_GNU_STYLE
2275 uint8_t bRet;
2276 uint64_t u64RetHi, u64RetLo;
2277 __asm__ __volatile__("lock; cmpxchg16b %3\n\t"
2278 "setz %b0\n\t"
2279 : "=r" (bRet)
2280 , "=a" (u64RetLo)
2281 , "=d" (u64RetHi)
2282 , "+m" (*pu128)
2283 : "a" (u64OldLo)
2284 , "d" (u64OldHi)
2285 , "b" (u64NewLo)
2286 , "c" (u64NewHi)
2287 : "cc");
2288 *pu128Old = ((uint128_t)u64RetHi << 64) | u64RetLo;
2289 return (bool)bRet;
2290# else
2291# error "Port me"
2292# endif
2293# else
2294# error "Port me"
2295# endif
2296}
2297# endif
2298
2299
2300/**
2301 * Atomically compare and exchange an unsigned 128-bit value, ordered.
2302 *
2303 * @returns true if exchange was done.
2304 * @returns false if exchange wasn't done.
2305 *
2306 * @param pu128 Pointer to the 128-bit variable to update.
2307 * @param u128New The 128-bit value to assign to *pu128.
2308 * @param u128Old The value to compare with.
2309 * @param pu128Old Where to return the old value.
2310 *
2311 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
2312 */
2313DECLINLINE(bool) ASMAtomicCmpXchgU128(volatile uint128_t *pu128, const uint128_t u128New,
2314 const uint128_t u128Old, uint128_t *pu128Old) RT_NOTHROW_DEF
2315{
2316# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
2317# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2318 uint128_t const uSwapped = __sync_val_compare_and_swap(pu128, u128Old, u128New);
2319 *pu128Old = uSwapped;
2320 return uSwapped == u128Old;
2321# else
2322 return ASMAtomicCmpXchgU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
2323 (uint64_t)(u128Old >> 64), (uint64_t)u128Old, pu128Old);
2324# endif
2325# else
2326 return ASMAtomicCmpXchgU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo, pu128Old);
2327# endif
2328}
2329
2330
2331/**
2332 * RTUINT128U wrapper for ASMAtomicCmpXchgU128.
2333 */
2334DECLINLINE(bool) ASMAtomicCmpXchgU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
2335 const RTUINT128U u128Old, PRTUINT128U pu128Old) RT_NOTHROW_DEF
2336{
2337# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2338 return ASMAtomicCmpXchgU128(&pu128->u, u128New.u, u128Old.u, &pu128Old->u);
2339# else
2340 return ASMAtomicCmpXchgU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo, &pu128Old->u);
2341# endif
2342}
2343
2344#endif /* RT_ARCH_AMD64 || RT_ARCH_ARM64 */
2345
2346
2347
2348/** @def ASMAtomicCmpXchgExHandle
2349 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2350 *
2351 * @param ph Pointer to the value to update.
2352 * @param hNew The new value to assigned to *pu.
2353 * @param hOld The old value to *pu compare with.
2354 * @param fRc Where to store the result.
2355 * @param phOldVal Pointer to where to store the old value.
2356 *
2357 * @remarks This doesn't currently work for all handles (like RTFILE).
2358 */
2359#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2360# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2361 do { \
2362 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
2363 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
2364 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(ph), (uint32_t)(hNew), (uint32_t)(hOld), (uint32_t RT_FAR *)(phOldVal)); \
2365 } while (0)
2366#elif HC_ARCH_BITS == 64
2367# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2368 do { \
2369 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2370 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
2371 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(ph), (uint64_t)(hNew), (uint64_t)(hOld), (uint64_t RT_FAR *)(phOldVal)); \
2372 } while (0)
2373#else
2374# error HC_ARCH_BITS
2375#endif
2376
2377
2378/** @def ASMAtomicCmpXchgExSize
2379 * Atomically Compare and Exchange a value which size might differ
2380 * between platforms or compilers. Additionally passes back old value.
2381 *
2382 * @param pu Pointer to the value to update.
2383 * @param uNew The new value to assigned to *pu.
2384 * @param uOld The old value to *pu compare with.
2385 * @param fRc Where to store the result.
2386 * @param puOldVal Pointer to where to store the old value.
2387 *
2388 * @remarks x86: Requires a 486 or later.
2389 */
2390#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
2391 do { \
2392 switch (sizeof(*(pu))) { \
2393 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
2394 break; \
2395 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
2396 break; \
2397 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2398 (fRc) = false; \
2399 (uOldVal) = 0; \
2400 break; \
2401 } \
2402 } while (0)
2403
2404
2405/**
2406 * Atomically Compare and Exchange a pointer value, additionally
2407 * passing back old value, ordered.
2408 *
2409 * @returns true if xchg was done.
2410 * @returns false if xchg wasn't done.
2411 *
2412 * @param ppv Pointer to the value to update.
2413 * @param pvNew The new value to assigned to *ppv.
2414 * @param pvOld The old value to *ppv compare with.
2415 * @param ppvOld Pointer store the old value at.
2416 *
2417 * @remarks x86: Requires a 486 or later.
2418 */
2419DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
2420 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
2421{
2422#if ARCH_BITS == 32 || ARCH_BITS == 16
2423 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
2424#elif ARCH_BITS == 64
2425 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
2426#else
2427# error "ARCH_BITS is bogus"
2428#endif
2429}
2430
2431
2432/**
2433 * Atomically Compare and Exchange a pointer value, additionally
2434 * passing back old value, ordered.
2435 *
2436 * @returns true if xchg was done.
2437 * @returns false if xchg wasn't done.
2438 *
2439 * @param ppv Pointer to the value to update.
2440 * @param pvNew The new value to assigned to *ppv.
2441 * @param pvOld The old value to *ppv compare with.
2442 * @param ppvOld Pointer store the old value at.
2443 *
2444 * @remarks This is relatively type safe on GCC platforms.
2445 * @remarks x86: Requires a 486 or later.
2446 */
2447#ifdef __GNUC__
2448# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2449 __extension__ \
2450 ({\
2451 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2452 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
2453 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
2454 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
2455 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
2456 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
2457 (void **)ppvOldTypeChecked); \
2458 fMacroRet; \
2459 })
2460#else
2461# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2462 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
2463#endif
2464
2465
2466/**
2467 * Virtualization unfriendly serializing instruction, always exits.
2468 */
2469#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2470RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
2471#else
2472DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
2473{
2474# if RT_INLINE_ASM_GNU_STYLE
2475 RTCCUINTREG xAX = 0;
2476# ifdef RT_ARCH_AMD64
2477 __asm__ __volatile__ ("cpuid"
2478 : "=a" (xAX)
2479 : "0" (xAX)
2480 : "rbx", "rcx", "rdx", "memory");
2481# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
2482 __asm__ __volatile__ ("push %%ebx\n\t"
2483 "cpuid\n\t"
2484 "pop %%ebx\n\t"
2485 : "=a" (xAX)
2486 : "0" (xAX)
2487 : "ecx", "edx", "memory");
2488# else
2489 __asm__ __volatile__ ("cpuid"
2490 : "=a" (xAX)
2491 : "0" (xAX)
2492 : "ebx", "ecx", "edx", "memory");
2493# endif
2494
2495# elif RT_INLINE_ASM_USES_INTRIN
2496 int aInfo[4];
2497 _ReadWriteBarrier();
2498 __cpuid(aInfo, 0);
2499
2500# else
2501 __asm
2502 {
2503 push ebx
2504 xor eax, eax
2505 cpuid
2506 pop ebx
2507 }
2508# endif
2509}
2510#endif
2511
2512/**
2513 * Virtualization friendly serializing instruction, though more expensive.
2514 */
2515#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2516RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
2517#else
2518DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
2519{
2520# if RT_INLINE_ASM_GNU_STYLE
2521# ifdef RT_ARCH_AMD64
2522 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
2523 "subq $128, %%rsp\n\t" /*redzone*/
2524 "mov %%ss, %%eax\n\t"
2525 "pushq %%rax\n\t"
2526 "pushq %%r10\n\t"
2527 "pushfq\n\t"
2528 "movl %%cs, %%eax\n\t"
2529 "pushq %%rax\n\t"
2530 "leaq 1f(%%rip), %%rax\n\t"
2531 "pushq %%rax\n\t"
2532 "iretq\n\t"
2533 "1:\n\t"
2534 ::: "rax", "r10", "memory", "cc");
2535# else
2536 __asm__ __volatile__ ("pushfl\n\t"
2537 "pushl %%cs\n\t"
2538 "pushl $1f\n\t"
2539 "iretl\n\t"
2540 "1:\n\t"
2541 ::: "memory");
2542# endif
2543
2544# else
2545 __asm
2546 {
2547 pushfd
2548 push cs
2549 push la_ret
2550 iretd
2551 la_ret:
2552 }
2553# endif
2554}
2555#endif
2556
2557/**
2558 * Virtualization friendlier serializing instruction, may still cause exits.
2559 */
2560#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < RT_MSC_VER_VS2008) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2561RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2562#else
2563DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2564{
2565# if RT_INLINE_ASM_GNU_STYLE
2566 /* rdtscp is not supported by ancient linux build VM of course :-( */
2567# ifdef RT_ARCH_AMD64
2568 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2569 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2570# else
2571 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2572 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2573# endif
2574# else
2575# if RT_INLINE_ASM_USES_INTRIN >= RT_MSC_VER_VS2008
2576 uint32_t uIgnore;
2577 _ReadWriteBarrier();
2578 (void)__rdtscp(&uIgnore);
2579 (void)uIgnore;
2580# else
2581 __asm
2582 {
2583 rdtscp
2584 }
2585# endif
2586# endif
2587}
2588#endif
2589
2590
2591/**
2592 * Serialize Instruction (both data store and instruction flush).
2593 */
2594#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2595# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2596#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2597# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2598#elif defined(RT_ARCH_SPARC64)
2599RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2600#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2601DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2602{
2603 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2604}
2605#else
2606# error "Port me"
2607#endif
2608
2609
2610/**
2611 * Memory fence, waits for any pending writes and reads to complete.
2612 * @note No implicit compiler barrier (which is probably stupid).
2613 */
2614DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2615{
2616#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2617# if RT_INLINE_ASM_GNU_STYLE
2618 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2619# elif RT_INLINE_ASM_USES_INTRIN
2620 _mm_mfence();
2621# else
2622 __asm
2623 {
2624 _emit 0x0f
2625 _emit 0xae
2626 _emit 0xf0
2627 }
2628# endif
2629#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2630 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2631#elif ARCH_BITS == 16
2632 uint16_t volatile u16;
2633 ASMAtomicXchgU16(&u16, 0);
2634#else
2635 uint32_t volatile u32;
2636 ASMAtomicXchgU32(&u32, 0);
2637#endif
2638}
2639
2640
2641/**
2642 * Write fence, waits for any pending writes to complete.
2643 * @note No implicit compiler barrier (which is probably stupid).
2644 */
2645DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2646{
2647#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2648# if RT_INLINE_ASM_GNU_STYLE
2649 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2650# elif RT_INLINE_ASM_USES_INTRIN
2651 _mm_sfence();
2652# else
2653 __asm
2654 {
2655 _emit 0x0f
2656 _emit 0xae
2657 _emit 0xf8
2658 }
2659# endif
2660#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2661 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2662#else
2663 ASMMemoryFence();
2664#endif
2665}
2666
2667
2668/**
2669 * Read fence, waits for any pending reads to complete.
2670 * @note No implicit compiler barrier (which is probably stupid).
2671 */
2672DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2673{
2674#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2675# if RT_INLINE_ASM_GNU_STYLE
2676 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2677# elif RT_INLINE_ASM_USES_INTRIN
2678 _mm_lfence();
2679# else
2680 __asm
2681 {
2682 _emit 0x0f
2683 _emit 0xae
2684 _emit 0xe8
2685 }
2686# endif
2687#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2688 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2689#else
2690 ASMMemoryFence();
2691#endif
2692}
2693
2694
2695/**
2696 * Atomically reads an unsigned 8-bit value, ordered.
2697 *
2698 * @returns Current *pu8 value
2699 * @param pu8 Pointer to the 8-bit variable to read.
2700 */
2701DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2702{
2703#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2704 uint32_t u32;
2705 __asm__ __volatile__("Lstart_ASMAtomicReadU8_%=:\n\t"
2706 RTASM_ARM_DMB_SY
2707# if defined(RT_ARCH_ARM64)
2708 "ldxrb %w[uDst], %[pMem]\n\t"
2709# else
2710 "ldrexb %[uDst], %[pMem]\n\t"
2711# endif
2712 : [uDst] "=&r" (u32)
2713 : [pMem] "Q" (*pu8)
2714 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2715 return (uint8_t)u32;
2716#else
2717 ASMMemoryFence();
2718 return *pu8; /* byte reads are atomic on x86 */
2719#endif
2720}
2721
2722
2723/**
2724 * Atomically reads an unsigned 8-bit value, unordered.
2725 *
2726 * @returns Current *pu8 value
2727 * @param pu8 Pointer to the 8-bit variable to read.
2728 */
2729DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2730{
2731#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2732 uint32_t u32;
2733 __asm__ __volatile__("Lstart_ASMAtomicUoReadU8_%=:\n\t"
2734# if defined(RT_ARCH_ARM64)
2735 "ldxrb %w[uDst], %[pMem]\n\t"
2736# else
2737 "ldrexb %[uDst], %[pMem]\n\t"
2738# endif
2739 : [uDst] "=&r" (u32)
2740 : [pMem] "Q" (*pu8));
2741 return (uint8_t)u32;
2742#else
2743 return *pu8; /* byte reads are atomic on x86 */
2744#endif
2745}
2746
2747
2748/**
2749 * Atomically reads a signed 8-bit value, ordered.
2750 *
2751 * @returns Current *pi8 value
2752 * @param pi8 Pointer to the 8-bit variable to read.
2753 */
2754DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2755{
2756 ASMMemoryFence();
2757#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2758 int32_t i32;
2759 __asm__ __volatile__("Lstart_ASMAtomicReadS8_%=:\n\t"
2760 RTASM_ARM_DMB_SY
2761# if defined(RT_ARCH_ARM64)
2762 "ldxrb %w[iDst], %[pMem]\n\t"
2763# else
2764 "ldrexb %[iDst], %[pMem]\n\t"
2765# endif
2766 : [iDst] "=&r" (i32)
2767 : [pMem] "Q" (*pi8)
2768 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2769 return (int8_t)i32;
2770#else
2771 return *pi8; /* byte reads are atomic on x86 */
2772#endif
2773}
2774
2775
2776/**
2777 * Atomically reads a signed 8-bit value, unordered.
2778 *
2779 * @returns Current *pi8 value
2780 * @param pi8 Pointer to the 8-bit variable to read.
2781 */
2782DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2783{
2784#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2785 int32_t i32;
2786 __asm__ __volatile__("Lstart_ASMAtomicUoReadS8_%=:\n\t"
2787# if defined(RT_ARCH_ARM64)
2788 "ldxrb %w[iDst], %[pMem]\n\t"
2789# else
2790 "ldrexb %[iDst], %[pMem]\n\t"
2791# endif
2792 : [iDst] "=&r" (i32)
2793 : [pMem] "Q" (*pi8));
2794 return (int8_t)i32;
2795#else
2796 return *pi8; /* byte reads are atomic on x86 */
2797#endif
2798}
2799
2800
2801/**
2802 * Atomically reads an unsigned 16-bit value, ordered.
2803 *
2804 * @returns Current *pu16 value
2805 * @param pu16 Pointer to the 16-bit variable to read.
2806 */
2807DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2808{
2809 Assert(!((uintptr_t)pu16 & 1));
2810#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2811 uint32_t u32;
2812 __asm__ __volatile__("Lstart_ASMAtomicReadU16_%=:\n\t"
2813 RTASM_ARM_DMB_SY
2814# if defined(RT_ARCH_ARM64)
2815 "ldxrh %w[uDst], %[pMem]\n\t"
2816# else
2817 "ldrexh %[uDst], %[pMem]\n\t"
2818# endif
2819 : [uDst] "=&r" (u32)
2820 : [pMem] "Q" (*pu16)
2821 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2822 return (uint16_t)u32;
2823#else
2824 ASMMemoryFence();
2825 return *pu16;
2826#endif
2827}
2828
2829
2830/**
2831 * Atomically reads an unsigned 16-bit value, unordered.
2832 *
2833 * @returns Current *pu16 value
2834 * @param pu16 Pointer to the 16-bit variable to read.
2835 */
2836DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2837{
2838 Assert(!((uintptr_t)pu16 & 1));
2839#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2840 uint32_t u32;
2841 __asm__ __volatile__("Lstart_ASMAtomicUoReadU16_%=:\n\t"
2842# if defined(RT_ARCH_ARM64)
2843 "ldxrh %w[uDst], %[pMem]\n\t"
2844# else
2845 "ldrexh %[uDst], %[pMem]\n\t"
2846# endif
2847 : [uDst] "=&r" (u32)
2848 : [pMem] "Q" (*pu16));
2849 return (uint16_t)u32;
2850#else
2851 return *pu16;
2852#endif
2853}
2854
2855
2856/**
2857 * Atomically reads a signed 16-bit value, ordered.
2858 *
2859 * @returns Current *pi16 value
2860 * @param pi16 Pointer to the 16-bit variable to read.
2861 */
2862DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2863{
2864 Assert(!((uintptr_t)pi16 & 1));
2865#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2866 int32_t i32;
2867 __asm__ __volatile__("Lstart_ASMAtomicReadS16_%=:\n\t"
2868 RTASM_ARM_DMB_SY
2869# if defined(RT_ARCH_ARM64)
2870 "ldxrh %w[iDst], %[pMem]\n\t"
2871# else
2872 "ldrexh %[iDst], %[pMem]\n\t"
2873# endif
2874 : [iDst] "=&r" (i32)
2875 : [pMem] "Q" (*pi16)
2876 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2877 return (int16_t)i32;
2878#else
2879 ASMMemoryFence();
2880 return *pi16;
2881#endif
2882}
2883
2884
2885/**
2886 * Atomically reads a signed 16-bit value, unordered.
2887 *
2888 * @returns Current *pi16 value
2889 * @param pi16 Pointer to the 16-bit variable to read.
2890 */
2891DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2892{
2893 Assert(!((uintptr_t)pi16 & 1));
2894#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2895 int32_t i32;
2896 __asm__ __volatile__("Lstart_ASMAtomicUoReadS16_%=:\n\t"
2897# if defined(RT_ARCH_ARM64)
2898 "ldxrh %w[iDst], %[pMem]\n\t"
2899# else
2900 "ldrexh %[iDst], %[pMem]\n\t"
2901# endif
2902 : [iDst] "=&r" (i32)
2903 : [pMem] "Q" (*pi16));
2904 return (int16_t)i32;
2905#else
2906 return *pi16;
2907#endif
2908}
2909
2910
2911/**
2912 * Atomically reads an unsigned 32-bit value, ordered.
2913 *
2914 * @returns Current *pu32 value
2915 * @param pu32 Pointer to the 32-bit variable to read.
2916 */
2917DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2918{
2919 Assert(!((uintptr_t)pu32 & 3));
2920#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2921 uint32_t u32;
2922 __asm__ __volatile__("Lstart_ASMAtomicReadU32_%=:\n\t"
2923 RTASM_ARM_DMB_SY
2924# if defined(RT_ARCH_ARM64)
2925 "ldxr %w[uDst], %[pMem]\n\t"
2926# else
2927 "ldrex %[uDst], %[pMem]\n\t"
2928# endif
2929 : [uDst] "=&r" (u32)
2930 : [pMem] "Q" (*pu32)
2931 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2932 return u32;
2933#else
2934 ASMMemoryFence();
2935# if ARCH_BITS == 16
2936 AssertFailed(); /** @todo 16-bit */
2937# endif
2938 return *pu32;
2939#endif
2940}
2941
2942
2943/**
2944 * Atomically reads an unsigned 32-bit value, unordered.
2945 *
2946 * @returns Current *pu32 value
2947 * @param pu32 Pointer to the 32-bit variable to read.
2948 */
2949DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2950{
2951 Assert(!((uintptr_t)pu32 & 3));
2952#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2953 uint32_t u32;
2954 __asm__ __volatile__("Lstart_ASMAtomicUoReadU32_%=:\n\t"
2955# if defined(RT_ARCH_ARM64)
2956 "ldxr %w[uDst], %[pMem]\n\t"
2957# else
2958 "ldrex %[uDst], %[pMem]\n\t"
2959# endif
2960 : [uDst] "=&r" (u32)
2961 : [pMem] "Q" (*pu32));
2962 return u32;
2963#else
2964# if ARCH_BITS == 16
2965 AssertFailed(); /** @todo 16-bit */
2966# endif
2967 return *pu32;
2968#endif
2969}
2970
2971
2972/**
2973 * Atomically reads a signed 32-bit value, ordered.
2974 *
2975 * @returns Current *pi32 value
2976 * @param pi32 Pointer to the 32-bit variable to read.
2977 */
2978DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2979{
2980 Assert(!((uintptr_t)pi32 & 3));
2981#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2982 int32_t i32;
2983 __asm__ __volatile__("Lstart_ASMAtomicReadS32_%=:\n\t"
2984 RTASM_ARM_DMB_SY
2985# if defined(RT_ARCH_ARM64)
2986 "ldxr %w[iDst], %[pMem]\n\t"
2987# else
2988 "ldrex %[iDst], %[pMem]\n\t"
2989# endif
2990 : [iDst] "=&r" (i32)
2991 : [pMem] "Q" (*pi32)
2992 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2993 return i32;
2994#else
2995 ASMMemoryFence();
2996# if ARCH_BITS == 16
2997 AssertFailed(); /** @todo 16-bit */
2998# endif
2999 return *pi32;
3000#endif
3001}
3002
3003
3004/**
3005 * Atomically reads a signed 32-bit value, unordered.
3006 *
3007 * @returns Current *pi32 value
3008 * @param pi32 Pointer to the 32-bit variable to read.
3009 */
3010DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
3011{
3012 Assert(!((uintptr_t)pi32 & 3));
3013#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3014 int32_t i32;
3015 __asm__ __volatile__("Lstart_ASMAtomicUoReadS32_%=:\n\t"
3016# if defined(RT_ARCH_ARM64)
3017 "ldxr %w[iDst], %[pMem]\n\t"
3018# else
3019 "ldrex %[iDst], %[pMem]\n\t"
3020# endif
3021 : [iDst] "=&r" (i32)
3022 : [pMem] "Q" (*pi32));
3023 return i32;
3024
3025#else
3026# if ARCH_BITS == 16
3027 AssertFailed(); /** @todo 16-bit */
3028# endif
3029 return *pi32;
3030#endif
3031}
3032
3033
3034/**
3035 * Atomically reads an unsigned 64-bit value, ordered.
3036 *
3037 * @returns Current *pu64 value
3038 * @param pu64 Pointer to the 64-bit variable to read.
3039 * The memory pointed to must be writable.
3040 *
3041 * @remarks This may fault if the memory is read-only!
3042 * @remarks x86: Requires a Pentium or later.
3043 */
3044#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
3045 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
3046RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
3047#else
3048DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
3049{
3050 uint64_t u64;
3051# ifdef RT_ARCH_AMD64
3052 Assert(!((uintptr_t)pu64 & 7));
3053/*# if RT_INLINE_ASM_GNU_STYLE
3054 __asm__ __volatile__( "mfence\n\t"
3055 "movq %1, %0\n\t"
3056 : "=r" (u64)
3057 : "m" (*pu64));
3058# else
3059 __asm
3060 {
3061 mfence
3062 mov rdx, [pu64]
3063 mov rax, [rdx]
3064 mov [u64], rax
3065 }
3066# endif*/
3067 ASMMemoryFence();
3068 u64 = *pu64;
3069
3070# elif defined(RT_ARCH_X86)
3071# if RT_INLINE_ASM_GNU_STYLE
3072# if defined(PIC) || defined(__PIC__)
3073 uint32_t u32EBX = 0;
3074 Assert(!((uintptr_t)pu64 & 7));
3075 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3076 "lock; cmpxchg8b (%5)\n\t"
3077 "movl %3, %%ebx\n\t"
3078 : "=A" (u64)
3079# if RT_GNUC_PREREQ(4, 3)
3080 , "+m" (*pu64)
3081# else
3082 , "=m" (*pu64)
3083# endif
3084 : "0" (0ULL)
3085 , "m" (u32EBX)
3086 , "c" (0)
3087 , "S" (pu64)
3088 : "cc");
3089# else /* !PIC */
3090 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3091 : "=A" (u64)
3092 , "+m" (*pu64)
3093 : "0" (0ULL)
3094 , "b" (0)
3095 , "c" (0)
3096 : "cc");
3097# endif
3098# else
3099 Assert(!((uintptr_t)pu64 & 7));
3100 __asm
3101 {
3102 xor eax, eax
3103 xor edx, edx
3104 mov edi, pu64
3105 xor ecx, ecx
3106 xor ebx, ebx
3107 lock cmpxchg8b [edi]
3108 mov dword ptr [u64], eax
3109 mov dword ptr [u64 + 4], edx
3110 }
3111# endif
3112
3113# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3114 Assert(!((uintptr_t)pu64 & 7));
3115 __asm__ __volatile__("Lstart_ASMAtomicReadU64_%=:\n\t"
3116 RTASM_ARM_DMB_SY
3117# if defined(RT_ARCH_ARM64)
3118 "ldxr %[uDst], %[pMem]\n\t"
3119# else
3120 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3121# endif
3122 : [uDst] "=&r" (u64)
3123 : [pMem] "Q" (*pu64)
3124 RTASM_ARM_DMB_SY_COMMA_IN_REG);
3125
3126# else
3127# error "Port me"
3128# endif
3129 return u64;
3130}
3131#endif
3132
3133
3134/**
3135 * Atomically reads an unsigned 64-bit value, unordered.
3136 *
3137 * @returns Current *pu64 value
3138 * @param pu64 Pointer to the 64-bit variable to read.
3139 * The memory pointed to must be writable.
3140 *
3141 * @remarks This may fault if the memory is read-only!
3142 * @remarks x86: Requires a Pentium or later.
3143 */
3144#if !defined(RT_ARCH_AMD64) \
3145 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
3146 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
3147RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
3148#else
3149DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
3150{
3151 uint64_t u64;
3152# ifdef RT_ARCH_AMD64
3153 Assert(!((uintptr_t)pu64 & 7));
3154/*# if RT_INLINE_ASM_GNU_STYLE
3155 Assert(!((uintptr_t)pu64 & 7));
3156 __asm__ __volatile__("movq %1, %0\n\t"
3157 : "=r" (u64)
3158 : "m" (*pu64));
3159# else
3160 __asm
3161 {
3162 mov rdx, [pu64]
3163 mov rax, [rdx]
3164 mov [u64], rax
3165 }
3166# endif */
3167 u64 = *pu64;
3168
3169# elif defined(RT_ARCH_X86)
3170# if RT_INLINE_ASM_GNU_STYLE
3171# if defined(PIC) || defined(__PIC__)
3172 uint32_t u32EBX = 0;
3173 uint32_t u32Spill;
3174 Assert(!((uintptr_t)pu64 & 7));
3175 __asm__ __volatile__("xor %%eax,%%eax\n\t"
3176 "xor %%ecx,%%ecx\n\t"
3177 "xor %%edx,%%edx\n\t"
3178 "xchgl %%ebx, %3\n\t"
3179 "lock; cmpxchg8b (%4)\n\t"
3180 "movl %3, %%ebx\n\t"
3181 : "=A" (u64)
3182# if RT_GNUC_PREREQ(4, 3)
3183 , "+m" (*pu64)
3184# else
3185 , "=m" (*pu64)
3186# endif
3187 , "=c" (u32Spill)
3188 : "m" (u32EBX)
3189 , "S" (pu64)
3190 : "cc");
3191# else /* !PIC */
3192 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3193 : "=A" (u64)
3194 , "+m" (*pu64)
3195 : "0" (0ULL)
3196 , "b" (0)
3197 , "c" (0)
3198 : "cc");
3199# endif
3200# else
3201 Assert(!((uintptr_t)pu64 & 7));
3202 __asm
3203 {
3204 xor eax, eax
3205 xor edx, edx
3206 mov edi, pu64
3207 xor ecx, ecx
3208 xor ebx, ebx
3209 lock cmpxchg8b [edi]
3210 mov dword ptr [u64], eax
3211 mov dword ptr [u64 + 4], edx
3212 }
3213# endif
3214
3215# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3216 Assert(!((uintptr_t)pu64 & 7));
3217 __asm__ __volatile__("Lstart_ASMAtomicUoReadU64_%=:\n\t"
3218# if defined(RT_ARCH_ARM64)
3219 "ldxr %[uDst], %[pMem]\n\t"
3220# else
3221 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3222# endif
3223 : [uDst] "=&r" (u64)
3224 : [pMem] "Q" (*pu64));
3225
3226# else
3227# error "Port me"
3228# endif
3229 return u64;
3230}
3231#endif
3232
3233
3234/**
3235 * Atomically reads a signed 64-bit value, ordered.
3236 *
3237 * @returns Current *pi64 value
3238 * @param pi64 Pointer to the 64-bit variable to read.
3239 * The memory pointed to must be writable.
3240 *
3241 * @remarks This may fault if the memory is read-only!
3242 * @remarks x86: Requires a Pentium or later.
3243 */
3244DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3245{
3246 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
3247}
3248
3249
3250/**
3251 * Atomically reads a signed 64-bit value, unordered.
3252 *
3253 * @returns Current *pi64 value
3254 * @param pi64 Pointer to the 64-bit variable to read.
3255 * The memory pointed to must be writable.
3256 *
3257 * @remarks This will fault if the memory is read-only!
3258 * @remarks x86: Requires a Pentium or later.
3259 */
3260DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3261{
3262 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
3263}
3264
3265
3266/**
3267 * Atomically reads a size_t value, ordered.
3268 *
3269 * @returns Current *pcb value
3270 * @param pcb Pointer to the size_t variable to read.
3271 */
3272DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3273{
3274#if ARCH_BITS == 64
3275 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
3276#elif ARCH_BITS == 32
3277 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
3278#elif ARCH_BITS == 16
3279 AssertCompileSize(size_t, 2);
3280 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
3281#else
3282# error "Unsupported ARCH_BITS value"
3283#endif
3284}
3285
3286
3287/**
3288 * Atomically reads a size_t value, unordered.
3289 *
3290 * @returns Current *pcb value
3291 * @param pcb Pointer to the size_t variable to read.
3292 */
3293DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3294{
3295#if ARCH_BITS == 64 || ARCH_BITS == 16
3296 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
3297#elif ARCH_BITS == 32
3298 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
3299#elif ARCH_BITS == 16
3300 AssertCompileSize(size_t, 2);
3301 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
3302#else
3303# error "Unsupported ARCH_BITS value"
3304#endif
3305}
3306
3307
3308/**
3309 * Atomically reads a pointer value, ordered.
3310 *
3311 * @returns Current *pv value
3312 * @param ppv Pointer to the pointer variable to read.
3313 *
3314 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
3315 * requires less typing (no casts).
3316 */
3317DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3318{
3319#if ARCH_BITS == 32 || ARCH_BITS == 16
3320 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3321#elif ARCH_BITS == 64
3322 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3323#else
3324# error "ARCH_BITS is bogus"
3325#endif
3326}
3327
3328/**
3329 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
3330 *
3331 * @returns Current *pv value
3332 * @param ppv Pointer to the pointer variable to read.
3333 * @param Type The type of *ppv, sans volatile.
3334 */
3335#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3336# define ASMAtomicReadPtrT(ppv, Type) \
3337 __extension__ \
3338 ({\
3339 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
3340 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
3341 pvTypeChecked; \
3342 })
3343#else
3344# define ASMAtomicReadPtrT(ppv, Type) \
3345 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3346#endif
3347
3348
3349/**
3350 * Atomically reads a pointer value, unordered.
3351 *
3352 * @returns Current *pv value
3353 * @param ppv Pointer to the pointer variable to read.
3354 *
3355 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
3356 * requires less typing (no casts).
3357 */
3358DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3359{
3360#if ARCH_BITS == 32 || ARCH_BITS == 16
3361 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3362#elif ARCH_BITS == 64
3363 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3364#else
3365# error "ARCH_BITS is bogus"
3366#endif
3367}
3368
3369
3370/**
3371 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
3372 *
3373 * @returns Current *pv value
3374 * @param ppv Pointer to the pointer variable to read.
3375 * @param Type The type of *ppv, sans volatile.
3376 */
3377#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3378# define ASMAtomicUoReadPtrT(ppv, Type) \
3379 __extension__ \
3380 ({\
3381 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3382 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
3383 pvTypeChecked; \
3384 })
3385#else
3386# define ASMAtomicUoReadPtrT(ppv, Type) \
3387 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3388#endif
3389
3390
3391/**
3392 * Atomically reads a boolean value, ordered.
3393 *
3394 * @returns Current *pf value
3395 * @param pf Pointer to the boolean variable to read.
3396 */
3397DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3398{
3399 ASMMemoryFence();
3400 return *pf; /* byte reads are atomic on x86 */
3401}
3402
3403
3404/**
3405 * Atomically reads a boolean value, unordered.
3406 *
3407 * @returns Current *pf value
3408 * @param pf Pointer to the boolean variable to read.
3409 */
3410DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3411{
3412 return *pf; /* byte reads are atomic on x86 */
3413}
3414
3415
3416/**
3417 * Atomically read a typical IPRT handle value, ordered.
3418 *
3419 * @param ph Pointer to the handle variable to read.
3420 * @param phRes Where to store the result.
3421 *
3422 * @remarks This doesn't currently work for all handles (like RTFILE).
3423 */
3424#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3425# define ASMAtomicReadHandle(ph, phRes) \
3426 do { \
3427 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3428 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3429 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
3430 } while (0)
3431#elif HC_ARCH_BITS == 64
3432# define ASMAtomicReadHandle(ph, phRes) \
3433 do { \
3434 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3435 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3436 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
3437 } while (0)
3438#else
3439# error HC_ARCH_BITS
3440#endif
3441
3442
3443/**
3444 * Atomically read a typical IPRT handle value, unordered.
3445 *
3446 * @param ph Pointer to the handle variable to read.
3447 * @param phRes Where to store the result.
3448 *
3449 * @remarks This doesn't currently work for all handles (like RTFILE).
3450 */
3451#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3452# define ASMAtomicUoReadHandle(ph, phRes) \
3453 do { \
3454 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3455 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3456 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
3457 } while (0)
3458#elif HC_ARCH_BITS == 64
3459# define ASMAtomicUoReadHandle(ph, phRes) \
3460 do { \
3461 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3462 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3463 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
3464 } while (0)
3465#else
3466# error HC_ARCH_BITS
3467#endif
3468
3469
3470/**
3471 * Atomically read a value which size might differ
3472 * between platforms or compilers, ordered.
3473 *
3474 * @param pu Pointer to the variable to read.
3475 * @param puRes Where to store the result.
3476 */
3477#define ASMAtomicReadSize(pu, puRes) \
3478 do { \
3479 switch (sizeof(*(pu))) { \
3480 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3481 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3482 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3483 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3484 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3485 } \
3486 } while (0)
3487
3488
3489/**
3490 * Atomically read a value which size might differ
3491 * between platforms or compilers, unordered.
3492 *
3493 * @param pu Pointer to the variable to read.
3494 * @param puRes Where to store the result.
3495 */
3496#define ASMAtomicUoReadSize(pu, puRes) \
3497 do { \
3498 switch (sizeof(*(pu))) { \
3499 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3500 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3501 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3502 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3503 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3504 } \
3505 } while (0)
3506
3507
3508/**
3509 * Atomically writes an unsigned 8-bit value, ordered.
3510 *
3511 * @param pu8 Pointer to the 8-bit variable.
3512 * @param u8 The 8-bit value to assign to *pu8.
3513 */
3514DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3515{
3516 /** @todo Any possible ARM32/ARM64 optimizations here? */
3517 ASMAtomicXchgU8(pu8, u8);
3518}
3519
3520
3521/**
3522 * Atomically writes an unsigned 8-bit value, unordered.
3523 *
3524 * @param pu8 Pointer to the 8-bit variable.
3525 * @param u8 The 8-bit value to assign to *pu8.
3526 */
3527DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3528{
3529 /** @todo Any possible ARM32/ARM64 improvements here? */
3530 *pu8 = u8; /* byte writes are atomic on x86 */
3531}
3532
3533
3534/**
3535 * Atomically writes a signed 8-bit value, ordered.
3536 *
3537 * @param pi8 Pointer to the 8-bit variable to read.
3538 * @param i8 The 8-bit value to assign to *pi8.
3539 */
3540DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3541{
3542 /** @todo Any possible ARM32/ARM64 optimizations here? */
3543 ASMAtomicXchgS8(pi8, i8);
3544}
3545
3546
3547/**
3548 * Atomically writes a signed 8-bit value, unordered.
3549 *
3550 * @param pi8 Pointer to the 8-bit variable to write.
3551 * @param i8 The 8-bit value to assign to *pi8.
3552 */
3553DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3554{
3555 *pi8 = i8; /* byte writes are atomic on x86 */
3556}
3557
3558
3559/**
3560 * Atomically writes an unsigned 16-bit value, ordered.
3561 *
3562 * @param pu16 Pointer to the 16-bit variable to write.
3563 * @param u16 The 16-bit value to assign to *pu16.
3564 */
3565DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3566{
3567 /** @todo Any possible ARM32/ARM64 optimizations here? */
3568 ASMAtomicXchgU16(pu16, u16);
3569}
3570
3571
3572/**
3573 * Atomically writes an unsigned 16-bit value, unordered.
3574 *
3575 * @param pu16 Pointer to the 16-bit variable to write.
3576 * @param u16 The 16-bit value to assign to *pu16.
3577 */
3578DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3579{
3580 Assert(!((uintptr_t)pu16 & 1));
3581 *pu16 = u16;
3582}
3583
3584
3585/**
3586 * Atomically writes a signed 16-bit value, ordered.
3587 *
3588 * @param pi16 Pointer to the 16-bit variable to write.
3589 * @param i16 The 16-bit value to assign to *pi16.
3590 */
3591DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3592{
3593 /** @todo Any possible ARM32/ARM64 optimizations here? */
3594 ASMAtomicXchgS16(pi16, i16);
3595}
3596
3597
3598/**
3599 * Atomically writes a signed 16-bit value, unordered.
3600 *
3601 * @param pi16 Pointer to the 16-bit variable to write.
3602 * @param i16 The 16-bit value to assign to *pi16.
3603 */
3604DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3605{
3606 Assert(!((uintptr_t)pi16 & 1));
3607 *pi16 = i16;
3608}
3609
3610
3611/**
3612 * Atomically writes an unsigned 32-bit value, ordered.
3613 *
3614 * @param pu32 Pointer to the 32-bit variable to write.
3615 * @param u32 The 32-bit value to assign to *pu32.
3616 */
3617DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3618{
3619 /** @todo Any possible ARM32/ARM64 optimizations here? */
3620 ASMAtomicXchgU32(pu32, u32);
3621}
3622
3623
3624/**
3625 * Atomically writes an unsigned 32-bit value, unordered.
3626 *
3627 * @param pu32 Pointer to the 32-bit variable to write.
3628 * @param u32 The 32-bit value to assign to *pu32.
3629 */
3630DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3631{
3632 Assert(!((uintptr_t)pu32 & 3));
3633#if ARCH_BITS >= 32
3634 *pu32 = u32;
3635#else
3636 ASMAtomicXchgU32(pu32, u32);
3637#endif
3638}
3639
3640
3641/**
3642 * Atomically writes a signed 32-bit value, ordered.
3643 *
3644 * @param pi32 Pointer to the 32-bit variable to write.
3645 * @param i32 The 32-bit value to assign to *pi32.
3646 */
3647DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3648{
3649 ASMAtomicXchgS32(pi32, i32);
3650}
3651
3652
3653/**
3654 * Atomically writes a signed 32-bit value, unordered.
3655 *
3656 * @param pi32 Pointer to the 32-bit variable to write.
3657 * @param i32 The 32-bit value to assign to *pi32.
3658 */
3659DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3660{
3661 Assert(!((uintptr_t)pi32 & 3));
3662#if ARCH_BITS >= 32
3663 *pi32 = i32;
3664#else
3665 ASMAtomicXchgS32(pi32, i32);
3666#endif
3667}
3668
3669
3670/**
3671 * Atomically writes an unsigned 64-bit value, ordered.
3672 *
3673 * @param pu64 Pointer to the 64-bit variable to write.
3674 * @param u64 The 64-bit value to assign to *pu64.
3675 */
3676DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3677{
3678 /** @todo Any possible ARM32/ARM64 optimizations here? */
3679 ASMAtomicXchgU64(pu64, u64);
3680}
3681
3682
3683/**
3684 * Atomically writes an unsigned 64-bit value, unordered.
3685 *
3686 * @param pu64 Pointer to the 64-bit variable to write.
3687 * @param u64 The 64-bit value to assign to *pu64.
3688 */
3689DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3690{
3691 Assert(!((uintptr_t)pu64 & 7));
3692#if ARCH_BITS == 64
3693 *pu64 = u64;
3694#else
3695 ASMAtomicXchgU64(pu64, u64);
3696#endif
3697}
3698
3699
3700/**
3701 * Atomically writes a signed 64-bit value, ordered.
3702 *
3703 * @param pi64 Pointer to the 64-bit variable to write.
3704 * @param i64 The 64-bit value to assign to *pi64.
3705 */
3706DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3707{
3708 /** @todo Any possible ARM32/ARM64 optimizations here? */
3709 ASMAtomicXchgS64(pi64, i64);
3710}
3711
3712
3713/**
3714 * Atomically writes a signed 64-bit value, unordered.
3715 *
3716 * @param pi64 Pointer to the 64-bit variable to write.
3717 * @param i64 The 64-bit value to assign to *pi64.
3718 */
3719DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3720{
3721 Assert(!((uintptr_t)pi64 & 7));
3722#if ARCH_BITS == 64
3723 *pi64 = i64;
3724#else
3725 ASMAtomicXchgS64(pi64, i64);
3726#endif
3727}
3728
3729
3730/**
3731 * Atomically writes a size_t value, ordered.
3732 *
3733 * @param pcb Pointer to the size_t variable to write.
3734 * @param cb The value to assign to *pcb.
3735 */
3736DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3737{
3738#if ARCH_BITS == 64
3739 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3740#elif ARCH_BITS == 32
3741 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3742#elif ARCH_BITS == 16
3743 AssertCompileSize(size_t, 2);
3744 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3745#else
3746# error "Unsupported ARCH_BITS value"
3747#endif
3748}
3749
3750
3751/**
3752 * Atomically writes a size_t value, unordered.
3753 *
3754 * @param pcb Pointer to the size_t variable to write.
3755 * @param cb The value to assign to *pcb.
3756 */
3757DECLINLINE(void) ASMAtomicUoWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3758{
3759#if ARCH_BITS == 64
3760 ASMAtomicUoWriteU64((uint64_t volatile *)pcb, cb);
3761#elif ARCH_BITS == 32
3762 ASMAtomicUoWriteU32((uint32_t volatile *)pcb, cb);
3763#elif ARCH_BITS == 16
3764 AssertCompileSize(size_t, 2);
3765 ASMAtomicUoWriteU16((uint16_t volatile *)pcb, cb);
3766#else
3767# error "Unsupported ARCH_BITS value"
3768#endif
3769}
3770
3771
3772/**
3773 * Atomically writes a boolean value, unordered.
3774 *
3775 * @param pf Pointer to the boolean variable to write.
3776 * @param f The boolean value to assign to *pf.
3777 */
3778DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3779{
3780 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3781}
3782
3783
3784/**
3785 * Atomically writes a boolean value, unordered.
3786 *
3787 * @param pf Pointer to the boolean variable to write.
3788 * @param f The boolean value to assign to *pf.
3789 */
3790DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3791{
3792 *pf = f; /* byte writes are atomic on x86 */
3793}
3794
3795
3796/**
3797 * Atomically writes a pointer value, ordered.
3798 *
3799 * @param ppv Pointer to the pointer variable to write.
3800 * @param pv The pointer value to assign to *ppv.
3801 */
3802DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3803{
3804#if ARCH_BITS == 32 || ARCH_BITS == 16
3805 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3806#elif ARCH_BITS == 64
3807 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3808#else
3809# error "ARCH_BITS is bogus"
3810#endif
3811}
3812
3813
3814/**
3815 * Atomically writes a pointer value, unordered.
3816 *
3817 * @param ppv Pointer to the pointer variable to write.
3818 * @param pv The pointer value to assign to *ppv.
3819 */
3820DECLINLINE(void) ASMAtomicUoWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3821{
3822#if ARCH_BITS == 32 || ARCH_BITS == 16
3823 ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3824#elif ARCH_BITS == 64
3825 ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3826#else
3827# error "ARCH_BITS is bogus"
3828#endif
3829}
3830
3831
3832/**
3833 * Atomically writes a pointer value, ordered.
3834 *
3835 * @param ppv Pointer to the pointer variable to write.
3836 * @param pv The pointer value to assign to *ppv. If NULL use
3837 * ASMAtomicWriteNullPtr or you'll land in trouble.
3838 *
3839 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3840 * NULL.
3841 */
3842#ifdef __GNUC__
3843# define ASMAtomicWritePtr(ppv, pv) \
3844 do \
3845 { \
3846 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3847 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3848 \
3849 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3850 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3851 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3852 \
3853 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3854 } while (0)
3855#else
3856# define ASMAtomicWritePtr(ppv, pv) \
3857 do \
3858 { \
3859 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3860 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3861 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3862 \
3863 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3864 } while (0)
3865#endif
3866
3867
3868/**
3869 * Atomically sets a pointer to NULL, ordered.
3870 *
3871 * @param ppv Pointer to the pointer variable that should be set to NULL.
3872 *
3873 * @remarks This is relatively type safe on GCC platforms.
3874 */
3875#if RT_GNUC_PREREQ(4, 2)
3876# define ASMAtomicWriteNullPtr(ppv) \
3877 do \
3878 { \
3879 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3880 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3881 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3882 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3883 } while (0)
3884#else
3885# define ASMAtomicWriteNullPtr(ppv) \
3886 do \
3887 { \
3888 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3889 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3890 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3891 } while (0)
3892#endif
3893
3894
3895/**
3896 * Atomically writes a pointer value, unordered.
3897 *
3898 * @returns Current *pv value
3899 * @param ppv Pointer to the pointer variable.
3900 * @param pv The pointer value to assign to *ppv. If NULL use
3901 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3902 *
3903 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3904 * NULL.
3905 */
3906#if RT_GNUC_PREREQ(4, 2)
3907# define ASMAtomicUoWritePtr(ppv, pv) \
3908 do \
3909 { \
3910 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3911 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3912 \
3913 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3914 AssertCompile(sizeof(pv) == sizeof(void *)); \
3915 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3916 \
3917 *(ppvTypeChecked) = pvTypeChecked; \
3918 } while (0)
3919#else
3920# define ASMAtomicUoWritePtr(ppv, pv) \
3921 do \
3922 { \
3923 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3924 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3925 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3926 *(ppv) = pv; \
3927 } while (0)
3928#endif
3929
3930
3931/**
3932 * Atomically sets a pointer to NULL, unordered.
3933 *
3934 * @param ppv Pointer to the pointer variable that should be set to NULL.
3935 *
3936 * @remarks This is relatively type safe on GCC platforms.
3937 */
3938#ifdef __GNUC__
3939# define ASMAtomicUoWriteNullPtr(ppv) \
3940 do \
3941 { \
3942 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3943 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3944 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3945 *(ppvTypeChecked) = NULL; \
3946 } while (0)
3947#else
3948# define ASMAtomicUoWriteNullPtr(ppv) \
3949 do \
3950 { \
3951 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3952 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3953 *(ppv) = NULL; \
3954 } while (0)
3955#endif
3956
3957
3958/**
3959 * Atomically write a typical IPRT handle value, ordered.
3960 *
3961 * @param ph Pointer to the variable to update.
3962 * @param hNew The value to assign to *ph.
3963 *
3964 * @remarks This doesn't currently work for all handles (like RTFILE).
3965 */
3966#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3967# define ASMAtomicWriteHandle(ph, hNew) \
3968 do { \
3969 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3970 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3971 } while (0)
3972#elif HC_ARCH_BITS == 64
3973# define ASMAtomicWriteHandle(ph, hNew) \
3974 do { \
3975 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3976 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3977 } while (0)
3978#else
3979# error HC_ARCH_BITS
3980#endif
3981
3982
3983/**
3984 * Atomically write a typical IPRT handle value, unordered.
3985 *
3986 * @param ph Pointer to the variable to update.
3987 * @param hNew The value to assign to *ph.
3988 *
3989 * @remarks This doesn't currently work for all handles (like RTFILE).
3990 */
3991#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3992# define ASMAtomicUoWriteHandle(ph, hNew) \
3993 do { \
3994 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3995 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3996 } while (0)
3997#elif HC_ARCH_BITS == 64
3998# define ASMAtomicUoWriteHandle(ph, hNew) \
3999 do { \
4000 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4001 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
4002 } while (0)
4003#else
4004# error HC_ARCH_BITS
4005#endif
4006
4007
4008/**
4009 * Atomically write a value which size might differ
4010 * between platforms or compilers, ordered.
4011 *
4012 * @param pu Pointer to the variable to update.
4013 * @param uNew The value to assign to *pu.
4014 */
4015#define ASMAtomicWriteSize(pu, uNew) \
4016 do { \
4017 switch (sizeof(*(pu))) { \
4018 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
4019 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
4020 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4021 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4022 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4023 } \
4024 } while (0)
4025
4026/**
4027 * Atomically write a value which size might differ
4028 * between platforms or compilers, unordered.
4029 *
4030 * @param pu Pointer to the variable to update.
4031 * @param uNew The value to assign to *pu.
4032 */
4033#define ASMAtomicUoWriteSize(pu, uNew) \
4034 do { \
4035 switch (sizeof(*(pu))) { \
4036 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
4037 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
4038 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4039 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4040 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4041 } \
4042 } while (0)
4043
4044
4045
4046/**
4047 * Atomically exchanges and adds to a 16-bit value, ordered.
4048 *
4049 * @returns The old value.
4050 * @param pu16 Pointer to the value.
4051 * @param u16 Number to add.
4052 *
4053 * @remarks Currently not implemented, just to make 16-bit code happy.
4054 * @remarks x86: Requires a 486 or later.
4055 */
4056RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
4057
4058
4059/**
4060 * Atomically exchanges and adds to a 32-bit value, ordered.
4061 *
4062 * @returns The old value.
4063 * @param pu32 Pointer to the value.
4064 * @param u32 Number to add.
4065 *
4066 * @remarks x86: Requires a 486 or later.
4067 */
4068#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4069RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4070#else
4071DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4072{
4073# if RT_INLINE_ASM_USES_INTRIN
4074 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
4075 return u32;
4076
4077# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4078# if RT_INLINE_ASM_GNU_STYLE
4079 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4080 : "=r" (u32)
4081 , "=m" (*pu32)
4082 : "0" (u32)
4083 , "m" (*pu32)
4084 : "memory"
4085 , "cc");
4086 return u32;
4087# else
4088 __asm
4089 {
4090 mov eax, [u32]
4091# ifdef RT_ARCH_AMD64
4092 mov rdx, [pu32]
4093 lock xadd [rdx], eax
4094# else
4095 mov edx, [pu32]
4096 lock xadd [edx], eax
4097# endif
4098 mov [u32], eax
4099 }
4100 return u32;
4101# endif
4102
4103# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4104 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
4105 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
4106 "add %[uNew], %[uOld], %[uVal]\n\t",
4107 [uVal] "r" (u32));
4108 return u32OldRet;
4109
4110# else
4111# error "Port me"
4112# endif
4113}
4114#endif
4115
4116
4117/**
4118 * Atomically exchanges and adds to a signed 32-bit value, ordered.
4119 *
4120 * @returns The old value.
4121 * @param pi32 Pointer to the value.
4122 * @param i32 Number to add.
4123 *
4124 * @remarks x86: Requires a 486 or later.
4125 */
4126DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4127{
4128 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4129}
4130
4131
4132/**
4133 * Atomically exchanges and adds to a 64-bit value, ordered.
4134 *
4135 * @returns The old value.
4136 * @param pu64 Pointer to the value.
4137 * @param u64 Number to add.
4138 *
4139 * @remarks x86: Requires a Pentium or later.
4140 */
4141#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4142DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4143#else
4144DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4145{
4146# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4147 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
4148 return u64;
4149
4150# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4151 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4152 : "=r" (u64)
4153 , "=m" (*pu64)
4154 : "0" (u64)
4155 , "m" (*pu64)
4156 : "memory"
4157 , "cc");
4158 return u64;
4159
4160# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4161 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
4162 "add %[uNew], %[uOld], %[uVal]\n\t"
4163 ,
4164 "add %[uNew], %[uOld], %[uVal]\n\t"
4165 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
4166 [uVal] "r" (u64));
4167 return u64OldRet;
4168
4169# else
4170 uint64_t u64Old;
4171 for (;;)
4172 {
4173 uint64_t u64New;
4174 u64Old = ASMAtomicUoReadU64(pu64);
4175 u64New = u64Old + u64;
4176 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4177 break;
4178 ASMNopPause();
4179 }
4180 return u64Old;
4181# endif
4182}
4183#endif
4184
4185
4186/**
4187 * Atomically exchanges and adds to a signed 64-bit value, ordered.
4188 *
4189 * @returns The old value.
4190 * @param pi64 Pointer to the value.
4191 * @param i64 Number to add.
4192 *
4193 * @remarks x86: Requires a Pentium or later.
4194 */
4195DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4196{
4197 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4198}
4199
4200
4201/**
4202 * Atomically exchanges and adds to a size_t value, ordered.
4203 *
4204 * @returns The old value.
4205 * @param pcb Pointer to the size_t value.
4206 * @param cb Number to add.
4207 */
4208DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4209{
4210#if ARCH_BITS == 64
4211 AssertCompileSize(size_t, 8);
4212 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
4213#elif ARCH_BITS == 32
4214 AssertCompileSize(size_t, 4);
4215 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
4216#elif ARCH_BITS == 16
4217 AssertCompileSize(size_t, 2);
4218 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
4219#else
4220# error "Unsupported ARCH_BITS value"
4221#endif
4222}
4223
4224
4225/**
4226 * Atomically exchanges and adds a value which size might differ between
4227 * platforms or compilers, ordered.
4228 *
4229 * @param pu Pointer to the variable to update.
4230 * @param uNew The value to add to *pu.
4231 * @param puOld Where to store the old value.
4232 */
4233#define ASMAtomicAddSize(pu, uNew, puOld) \
4234 do { \
4235 switch (sizeof(*(pu))) { \
4236 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4237 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4238 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
4239 } \
4240 } while (0)
4241
4242
4243
4244/**
4245 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
4246 *
4247 * @returns The old value.
4248 * @param pu16 Pointer to the value.
4249 * @param u16 Number to subtract.
4250 *
4251 * @remarks x86: Requires a 486 or later.
4252 */
4253DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
4254{
4255 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
4256}
4257
4258
4259/**
4260 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
4261 *
4262 * @returns The old value.
4263 * @param pi16 Pointer to the value.
4264 * @param i16 Number to subtract.
4265 *
4266 * @remarks x86: Requires a 486 or later.
4267 */
4268DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
4269{
4270 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
4271}
4272
4273
4274/**
4275 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
4276 *
4277 * @returns The old value.
4278 * @param pu32 Pointer to the value.
4279 * @param u32 Number to subtract.
4280 *
4281 * @remarks x86: Requires a 486 or later.
4282 */
4283DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4284{
4285 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
4286}
4287
4288
4289/**
4290 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
4291 *
4292 * @returns The old value.
4293 * @param pi32 Pointer to the value.
4294 * @param i32 Number to subtract.
4295 *
4296 * @remarks x86: Requires a 486 or later.
4297 */
4298DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4299{
4300 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
4301}
4302
4303
4304/**
4305 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
4306 *
4307 * @returns The old value.
4308 * @param pu64 Pointer to the value.
4309 * @param u64 Number to subtract.
4310 *
4311 * @remarks x86: Requires a Pentium or later.
4312 */
4313DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4314{
4315 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
4316}
4317
4318
4319/**
4320 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
4321 *
4322 * @returns The old value.
4323 * @param pi64 Pointer to the value.
4324 * @param i64 Number to subtract.
4325 *
4326 * @remarks x86: Requires a Pentium or later.
4327 */
4328DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4329{
4330 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
4331}
4332
4333
4334/**
4335 * Atomically exchanges and subtracts to a size_t value, ordered.
4336 *
4337 * @returns The old value.
4338 * @param pcb Pointer to the size_t value.
4339 * @param cb Number to subtract.
4340 *
4341 * @remarks x86: Requires a 486 or later.
4342 */
4343DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4344{
4345#if ARCH_BITS == 64
4346 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
4347#elif ARCH_BITS == 32
4348 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
4349#elif ARCH_BITS == 16
4350 AssertCompileSize(size_t, 2);
4351 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
4352#else
4353# error "Unsupported ARCH_BITS value"
4354#endif
4355}
4356
4357
4358/**
4359 * Atomically exchanges and subtracts a value which size might differ between
4360 * platforms or compilers, ordered.
4361 *
4362 * @param pu Pointer to the variable to update.
4363 * @param uNew The value to subtract to *pu.
4364 * @param puOld Where to store the old value.
4365 *
4366 * @remarks x86: Requires a 486 or later.
4367 */
4368#define ASMAtomicSubSize(pu, uNew, puOld) \
4369 do { \
4370 switch (sizeof(*(pu))) { \
4371 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4372 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4373 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
4374 } \
4375 } while (0)
4376
4377
4378
4379/**
4380 * Atomically increment a 16-bit value, ordered.
4381 *
4382 * @returns The new value.
4383 * @param pu16 Pointer to the value to increment.
4384 * @remarks Not implemented. Just to make 16-bit code happy.
4385 *
4386 * @remarks x86: Requires a 486 or later.
4387 */
4388RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4389
4390
4391/**
4392 * Atomically increment a 32-bit value, ordered.
4393 *
4394 * @returns The new value.
4395 * @param pu32 Pointer to the value to increment.
4396 *
4397 * @remarks x86: Requires a 486 or later.
4398 */
4399#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4400RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4401#else
4402DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4403{
4404# if RT_INLINE_ASM_USES_INTRIN
4405 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
4406
4407# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4408# if RT_INLINE_ASM_GNU_STYLE
4409 uint32_t u32;
4410 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4411 : "=r" (u32)
4412 , "=m" (*pu32)
4413 : "0" (1)
4414 , "m" (*pu32)
4415 : "memory"
4416 , "cc");
4417 return u32+1;
4418# else
4419 __asm
4420 {
4421 mov eax, 1
4422# ifdef RT_ARCH_AMD64
4423 mov rdx, [pu32]
4424 lock xadd [rdx], eax
4425# else
4426 mov edx, [pu32]
4427 lock xadd [edx], eax
4428# endif
4429 mov u32, eax
4430 }
4431 return u32+1;
4432# endif
4433
4434# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4435 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
4436 "add %w[uNew], %w[uNew], #1\n\t",
4437 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4438 "X" (0) /* dummy */);
4439 return u32NewRet;
4440
4441# else
4442 return ASMAtomicAddU32(pu32, 1) + 1;
4443# endif
4444}
4445#endif
4446
4447
4448/**
4449 * Atomically increment a signed 32-bit value, ordered.
4450 *
4451 * @returns The new value.
4452 * @param pi32 Pointer to the value to increment.
4453 *
4454 * @remarks x86: Requires a 486 or later.
4455 */
4456DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4457{
4458 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
4459}
4460
4461
4462/**
4463 * Atomically increment a 64-bit value, ordered.
4464 *
4465 * @returns The new value.
4466 * @param pu64 Pointer to the value to increment.
4467 *
4468 * @remarks x86: Requires a Pentium or later.
4469 */
4470#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4471DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4472#else
4473DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4474{
4475# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4476 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
4477
4478# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4479 uint64_t u64;
4480 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4481 : "=r" (u64)
4482 , "=m" (*pu64)
4483 : "0" (1)
4484 , "m" (*pu64)
4485 : "memory"
4486 , "cc");
4487 return u64 + 1;
4488
4489# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4490 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
4491 "add %[uNew], %[uNew], #1\n\t"
4492 ,
4493 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4494 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4495 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4496 return u64NewRet;
4497
4498# else
4499 return ASMAtomicAddU64(pu64, 1) + 1;
4500# endif
4501}
4502#endif
4503
4504
4505/**
4506 * Atomically increment a signed 64-bit value, ordered.
4507 *
4508 * @returns The new value.
4509 * @param pi64 Pointer to the value to increment.
4510 *
4511 * @remarks x86: Requires a Pentium or later.
4512 */
4513DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4514{
4515 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
4516}
4517
4518
4519/**
4520 * Atomically increment a size_t value, ordered.
4521 *
4522 * @returns The new value.
4523 * @param pcb Pointer to the value to increment.
4524 *
4525 * @remarks x86: Requires a 486 or later.
4526 */
4527DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4528{
4529#if ARCH_BITS == 64
4530 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
4531#elif ARCH_BITS == 32
4532 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
4533#elif ARCH_BITS == 16
4534 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
4535#else
4536# error "Unsupported ARCH_BITS value"
4537#endif
4538}
4539
4540
4541
4542/**
4543 * Atomically decrement an unsigned 32-bit value, ordered.
4544 *
4545 * @returns The new value.
4546 * @param pu16 Pointer to the value to decrement.
4547 * @remarks Not implemented. Just to make 16-bit code happy.
4548 *
4549 * @remarks x86: Requires a 486 or later.
4550 */
4551RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4552
4553
4554/**
4555 * Atomically decrement an unsigned 32-bit value, ordered.
4556 *
4557 * @returns The new value.
4558 * @param pu32 Pointer to the value to decrement.
4559 *
4560 * @remarks x86: Requires a 486 or later.
4561 */
4562#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4563RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4564#else
4565DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4566{
4567# if RT_INLINE_ASM_USES_INTRIN
4568 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4569
4570# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4571# if RT_INLINE_ASM_GNU_STYLE
4572 uint32_t u32;
4573 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4574 : "=r" (u32)
4575 , "=m" (*pu32)
4576 : "0" (-1)
4577 , "m" (*pu32)
4578 : "memory"
4579 , "cc");
4580 return u32-1;
4581# else
4582 uint32_t u32;
4583 __asm
4584 {
4585 mov eax, -1
4586# ifdef RT_ARCH_AMD64
4587 mov rdx, [pu32]
4588 lock xadd [rdx], eax
4589# else
4590 mov edx, [pu32]
4591 lock xadd [edx], eax
4592# endif
4593 mov u32, eax
4594 }
4595 return u32-1;
4596# endif
4597
4598# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4599 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4600 "sub %w[uNew], %w[uNew], #1\n\t",
4601 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4602 "X" (0) /* dummy */);
4603 return u32NewRet;
4604
4605# else
4606 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4607# endif
4608}
4609#endif
4610
4611
4612/**
4613 * Atomically decrement a signed 32-bit value, ordered.
4614 *
4615 * @returns The new value.
4616 * @param pi32 Pointer to the value to decrement.
4617 *
4618 * @remarks x86: Requires a 486 or later.
4619 */
4620DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4621{
4622 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4623}
4624
4625
4626/**
4627 * Atomically decrement an unsigned 64-bit value, ordered.
4628 *
4629 * @returns The new value.
4630 * @param pu64 Pointer to the value to decrement.
4631 *
4632 * @remarks x86: Requires a Pentium or later.
4633 */
4634#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4635RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4636#else
4637DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4638{
4639# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4640 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4641
4642# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4643 uint64_t u64;
4644 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4645 : "=r" (u64)
4646 , "=m" (*pu64)
4647 : "0" (~(uint64_t)0)
4648 , "m" (*pu64)
4649 : "memory"
4650 , "cc");
4651 return u64-1;
4652
4653# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4654 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4655 "sub %[uNew], %[uNew], #1\n\t"
4656 ,
4657 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4658 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4659 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4660 return u64NewRet;
4661
4662# else
4663 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4664# endif
4665}
4666#endif
4667
4668
4669/**
4670 * Atomically decrement a signed 64-bit value, ordered.
4671 *
4672 * @returns The new value.
4673 * @param pi64 Pointer to the value to decrement.
4674 *
4675 * @remarks x86: Requires a Pentium or later.
4676 */
4677DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4678{
4679 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4680}
4681
4682
4683/**
4684 * Atomically decrement a size_t value, ordered.
4685 *
4686 * @returns The new value.
4687 * @param pcb Pointer to the value to decrement.
4688 *
4689 * @remarks x86: Requires a 486 or later.
4690 */
4691DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4692{
4693#if ARCH_BITS == 64
4694 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4695#elif ARCH_BITS == 32
4696 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4697#elif ARCH_BITS == 16
4698 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4699#else
4700# error "Unsupported ARCH_BITS value"
4701#endif
4702}
4703
4704
4705/**
4706 * Atomically Or an unsigned 32-bit value, ordered.
4707 *
4708 * @param pu32 Pointer to the pointer variable to OR u32 with.
4709 * @param u32 The value to OR *pu32 with.
4710 *
4711 * @remarks x86: Requires a 386 or later.
4712 */
4713#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4714RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4715#else
4716DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4717{
4718# if RT_INLINE_ASM_USES_INTRIN
4719 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4720
4721# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4722# if RT_INLINE_ASM_GNU_STYLE
4723 __asm__ __volatile__("lock; orl %1, %0\n\t"
4724 : "=m" (*pu32)
4725 : "ir" (u32)
4726 , "m" (*pu32)
4727 : "cc");
4728# else
4729 __asm
4730 {
4731 mov eax, [u32]
4732# ifdef RT_ARCH_AMD64
4733 mov rdx, [pu32]
4734 lock or [rdx], eax
4735# else
4736 mov edx, [pu32]
4737 lock or [edx], eax
4738# endif
4739 }
4740# endif
4741
4742# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4743 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4744 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4745 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4746 "orr %[uNew], %[uNew], %[uVal]\n\t",
4747 [uVal] "r" (u32));
4748
4749# else
4750# error "Port me"
4751# endif
4752}
4753#endif
4754
4755
4756/**
4757 * Atomically OR an unsigned 32-bit value, ordered, extended version (for bitmap
4758 * fallback).
4759 *
4760 * @returns Old value.
4761 * @param pu32 Pointer to the variable to OR @a u32 with.
4762 * @param u32 The value to OR @a *pu32 with.
4763 */
4764DECLINLINE(uint32_t) ASMAtomicOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4765{
4766#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4767 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicOrEx32, pu32, DMB_SY,
4768 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4769 "orr %[uNew], %[uOld], %[uVal]\n\t",
4770 [uVal] "r" (u32));
4771 return u32OldRet;
4772
4773#else
4774 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4775 uint32_t u32New;
4776 do
4777 u32New = u32RetOld | u32;
4778 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4779 return u32RetOld;
4780#endif
4781}
4782
4783
4784/**
4785 * Atomically Or a signed 32-bit value, ordered.
4786 *
4787 * @param pi32 Pointer to the pointer variable to OR u32 with.
4788 * @param i32 The value to OR *pu32 with.
4789 *
4790 * @remarks x86: Requires a 386 or later.
4791 */
4792DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4793{
4794 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4795}
4796
4797
4798/**
4799 * Atomically Or an unsigned 64-bit value, ordered.
4800 *
4801 * @param pu64 Pointer to the pointer variable to OR u64 with.
4802 * @param u64 The value to OR *pu64 with.
4803 *
4804 * @remarks x86: Requires a Pentium or later.
4805 */
4806#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4807DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4808#else
4809DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4810{
4811# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4812 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4813
4814# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4815 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4816 : "=m" (*pu64)
4817 : "r" (u64)
4818 , "m" (*pu64)
4819 : "cc");
4820
4821# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4822 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4823 "orr %[uNew], %[uNew], %[uVal]\n\t"
4824 ,
4825 "orr %[uNew], %[uNew], %[uVal]\n\t"
4826 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4827 [uVal] "r" (u64));
4828
4829# else
4830 for (;;)
4831 {
4832 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4833 uint64_t u64New = u64Old | u64;
4834 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4835 break;
4836 ASMNopPause();
4837 }
4838# endif
4839}
4840#endif
4841
4842
4843/**
4844 * Atomically Or a signed 64-bit value, ordered.
4845 *
4846 * @param pi64 Pointer to the pointer variable to OR u64 with.
4847 * @param i64 The value to OR *pu64 with.
4848 *
4849 * @remarks x86: Requires a Pentium or later.
4850 */
4851DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4852{
4853 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4854}
4855
4856
4857/**
4858 * Atomically And an unsigned 32-bit value, ordered.
4859 *
4860 * @param pu32 Pointer to the pointer variable to AND u32 with.
4861 * @param u32 The value to AND *pu32 with.
4862 *
4863 * @remarks x86: Requires a 386 or later.
4864 */
4865#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4866RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4867#else
4868DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4869{
4870# if RT_INLINE_ASM_USES_INTRIN
4871 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4872
4873# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4874# if RT_INLINE_ASM_GNU_STYLE
4875 __asm__ __volatile__("lock; andl %1, %0\n\t"
4876 : "=m" (*pu32)
4877 : "ir" (u32)
4878 , "m" (*pu32)
4879 : "cc");
4880# else
4881 __asm
4882 {
4883 mov eax, [u32]
4884# ifdef RT_ARCH_AMD64
4885 mov rdx, [pu32]
4886 lock and [rdx], eax
4887# else
4888 mov edx, [pu32]
4889 lock and [edx], eax
4890# endif
4891 }
4892# endif
4893
4894# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4895 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4896 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4897 "and %[uNew], %[uNew], %[uVal]\n\t",
4898 [uVal] "r" (u32));
4899
4900# else
4901# error "Port me"
4902# endif
4903}
4904#endif
4905
4906
4907/**
4908 * Atomically AND an unsigned 32-bit value, ordered, extended version.
4909 *
4910 * @returns Old value.
4911 * @param pu32 Pointer to the variable to AND @a u32 with.
4912 * @param u32 The value to AND @a *pu32 with.
4913 */
4914DECLINLINE(uint32_t) ASMAtomicAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4915{
4916#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4917 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAndEx32, pu32, DMB_SY,
4918 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4919 "and %[uNew], %[uOld], %[uVal]\n\t",
4920 [uVal] "r" (u32));
4921 return u32OldRet;
4922
4923#else
4924 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4925 uint32_t u32New;
4926 do
4927 u32New = u32RetOld & u32;
4928 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4929 return u32RetOld;
4930#endif
4931}
4932
4933
4934/**
4935 * Atomically And a signed 32-bit value, ordered.
4936 *
4937 * @param pi32 Pointer to the pointer variable to AND i32 with.
4938 * @param i32 The value to AND *pi32 with.
4939 *
4940 * @remarks x86: Requires a 386 or later.
4941 */
4942DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4943{
4944 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4945}
4946
4947
4948/**
4949 * Atomically And an unsigned 64-bit value, ordered.
4950 *
4951 * @param pu64 Pointer to the pointer variable to AND u64 with.
4952 * @param u64 The value to AND *pu64 with.
4953 *
4954 * @remarks x86: Requires a Pentium or later.
4955 */
4956#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4957DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4958#else
4959DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4960{
4961# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4962 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4963
4964# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4965 __asm__ __volatile__("lock; andq %1, %0\n\t"
4966 : "=m" (*pu64)
4967 : "r" (u64)
4968 , "m" (*pu64)
4969 : "cc");
4970
4971# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4972 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
4973 "and %[uNew], %[uNew], %[uVal]\n\t"
4974 ,
4975 "and %[uNew], %[uNew], %[uVal]\n\t"
4976 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4977 [uVal] "r" (u64));
4978
4979# else
4980 for (;;)
4981 {
4982 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4983 uint64_t u64New = u64Old & u64;
4984 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4985 break;
4986 ASMNopPause();
4987 }
4988# endif
4989}
4990#endif
4991
4992
4993/**
4994 * Atomically And a signed 64-bit value, ordered.
4995 *
4996 * @param pi64 Pointer to the pointer variable to AND i64 with.
4997 * @param i64 The value to AND *pi64 with.
4998 *
4999 * @remarks x86: Requires a Pentium or later.
5000 */
5001DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5002{
5003 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5004}
5005
5006
5007/**
5008 * Atomically XOR an unsigned 32-bit value and a memory location, ordered.
5009 *
5010 * @param pu32 Pointer to the variable to XOR @a u32 with.
5011 * @param u32 The value to XOR @a *pu32 with.
5012 *
5013 * @remarks x86: Requires a 386 or later.
5014 */
5015#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5016RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5017#else
5018DECLINLINE(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5019{
5020# if RT_INLINE_ASM_USES_INTRIN
5021 _InterlockedXor((long volatile RT_FAR *)pu32, u32);
5022
5023# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5024# if RT_INLINE_ASM_GNU_STYLE
5025 __asm__ __volatile__("lock; xorl %1, %0\n\t"
5026 : "=m" (*pu32)
5027 : "ir" (u32)
5028 , "m" (*pu32)
5029 : "cc");
5030# else
5031 __asm
5032 {
5033 mov eax, [u32]
5034# ifdef RT_ARCH_AMD64
5035 mov rdx, [pu32]
5036 lock xor [rdx], eax
5037# else
5038 mov edx, [pu32]
5039 lock xor [edx], eax
5040# endif
5041 }
5042# endif
5043
5044# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5045 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicXor32, pu32, DMB_SY,
5046 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5047 "eor %[uNew], %[uNew], %[uVal]\n\t",
5048 [uVal] "r" (u32));
5049
5050# else
5051# error "Port me"
5052# endif
5053}
5054#endif
5055
5056
5057/**
5058 * Atomically XOR an unsigned 32-bit value and a memory location, ordered,
5059 * extended version (for bitmaps).
5060 *
5061 * @returns Old value.
5062 * @param pu32 Pointer to the variable to XOR @a u32 with.
5063 * @param u32 The value to XOR @a *pu32 with.
5064 */
5065DECLINLINE(uint32_t) ASMAtomicXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5066{
5067#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5068 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicXorEx32, pu32, DMB_SY,
5069 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5070 "eor %[uNew], %[uOld], %[uVal]\n\t",
5071 [uVal] "r" (u32));
5072 return u32OldRet;
5073
5074#else
5075 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
5076 uint32_t u32New;
5077 do
5078 u32New = u32RetOld ^ u32;
5079 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
5080 return u32RetOld;
5081#endif
5082}
5083
5084
5085/**
5086 * Atomically XOR a signed 32-bit value, ordered.
5087 *
5088 * @param pi32 Pointer to the variable to XOR i32 with.
5089 * @param i32 The value to XOR *pi32 with.
5090 *
5091 * @remarks x86: Requires a 386 or later.
5092 */
5093DECLINLINE(void) ASMAtomicXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5094{
5095 ASMAtomicXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5096}
5097
5098
5099/**
5100 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
5101 *
5102 * @param pu32 Pointer to the pointer variable to OR u32 with.
5103 * @param u32 The value to OR *pu32 with.
5104 *
5105 * @remarks x86: Requires a 386 or later.
5106 */
5107#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5108RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5109#else
5110DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5111{
5112# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5113# if RT_INLINE_ASM_GNU_STYLE
5114 __asm__ __volatile__("orl %1, %0\n\t"
5115 : "=m" (*pu32)
5116 : "ir" (u32)
5117 , "m" (*pu32)
5118 : "cc");
5119# else
5120 __asm
5121 {
5122 mov eax, [u32]
5123# ifdef RT_ARCH_AMD64
5124 mov rdx, [pu32]
5125 or [rdx], eax
5126# else
5127 mov edx, [pu32]
5128 or [edx], eax
5129# endif
5130 }
5131# endif
5132
5133# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5134 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
5135 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
5136 "orr %[uNew], %[uNew], %[uVal]\n\t",
5137 [uVal] "r" (u32));
5138
5139# else
5140# error "Port me"
5141# endif
5142}
5143#endif
5144
5145
5146/**
5147 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe,
5148 * extended version (for bitmap fallback).
5149 *
5150 * @returns Old value.
5151 * @param pu32 Pointer to the variable to OR @a u32 with.
5152 * @param u32 The value to OR @a *pu32 with.
5153 */
5154DECLINLINE(uint32_t) ASMAtomicUoOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5155{
5156#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5157 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoOrExU32, pu32, NO_BARRIER,
5158 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
5159 "orr %[uNew], %[uOld], %[uVal]\n\t",
5160 [uVal] "r" (u32));
5161 return u32OldRet;
5162
5163#else
5164 return ASMAtomicOrExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5165#endif
5166}
5167
5168
5169/**
5170 * Atomically OR a signed 32-bit value, unordered.
5171 *
5172 * @param pi32 Pointer to the pointer variable to OR u32 with.
5173 * @param i32 The value to OR *pu32 with.
5174 *
5175 * @remarks x86: Requires a 386 or later.
5176 */
5177DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5178{
5179 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5180}
5181
5182
5183/**
5184 * Atomically OR an unsigned 64-bit value, unordered.
5185 *
5186 * @param pu64 Pointer to the pointer variable to OR u64 with.
5187 * @param u64 The value to OR *pu64 with.
5188 *
5189 * @remarks x86: Requires a Pentium or later.
5190 */
5191#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5192DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5193#else
5194DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5195{
5196# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5197 __asm__ __volatile__("orq %1, %q0\n\t"
5198 : "=m" (*pu64)
5199 : "r" (u64)
5200 , "m" (*pu64)
5201 : "cc");
5202
5203# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5204 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
5205 "orr %[uNew], %[uNew], %[uVal]\n\t"
5206 ,
5207 "orr %[uNew], %[uNew], %[uVal]\n\t"
5208 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
5209 [uVal] "r" (u64));
5210
5211# else
5212 for (;;)
5213 {
5214 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5215 uint64_t u64New = u64Old | u64;
5216 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5217 break;
5218 ASMNopPause();
5219 }
5220# endif
5221}
5222#endif
5223
5224
5225/**
5226 * Atomically Or a signed 64-bit value, unordered.
5227 *
5228 * @param pi64 Pointer to the pointer variable to OR u64 with.
5229 * @param i64 The value to OR *pu64 with.
5230 *
5231 * @remarks x86: Requires a Pentium or later.
5232 */
5233DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5234{
5235 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5236}
5237
5238
5239/**
5240 * Atomically And an unsigned 32-bit value, unordered.
5241 *
5242 * @param pu32 Pointer to the pointer variable to AND u32 with.
5243 * @param u32 The value to AND *pu32 with.
5244 *
5245 * @remarks x86: Requires a 386 or later.
5246 */
5247#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5248RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5249#else
5250DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5251{
5252# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5253# if RT_INLINE_ASM_GNU_STYLE
5254 __asm__ __volatile__("andl %1, %0\n\t"
5255 : "=m" (*pu32)
5256 : "ir" (u32)
5257 , "m" (*pu32)
5258 : "cc");
5259# else
5260 __asm
5261 {
5262 mov eax, [u32]
5263# ifdef RT_ARCH_AMD64
5264 mov rdx, [pu32]
5265 and [rdx], eax
5266# else
5267 mov edx, [pu32]
5268 and [edx], eax
5269# endif
5270 }
5271# endif
5272
5273# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5274 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
5275 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
5276 "and %[uNew], %[uNew], %[uVal]\n\t",
5277 [uVal] "r" (u32));
5278
5279# else
5280# error "Port me"
5281# endif
5282}
5283#endif
5284
5285
5286/**
5287 * Atomically AND an unsigned 32-bit value, unordered, extended version (for
5288 * bitmap fallback).
5289 *
5290 * @returns Old value.
5291 * @param pu32 Pointer to the pointer to AND @a u32 with.
5292 * @param u32 The value to AND @a *pu32 with.
5293 */
5294DECLINLINE(uint32_t) ASMAtomicUoAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5295{
5296#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5297 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoAndEx32, pu32, NO_BARRIER,
5298 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
5299 "and %[uNew], %[uOld], %[uVal]\n\t",
5300 [uVal] "r" (u32));
5301 return u32OldRet;
5302
5303#else
5304 return ASMAtomicAndExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5305#endif
5306}
5307
5308
5309/**
5310 * Atomically And a signed 32-bit value, unordered.
5311 *
5312 * @param pi32 Pointer to the pointer variable to AND i32 with.
5313 * @param i32 The value to AND *pi32 with.
5314 *
5315 * @remarks x86: Requires a 386 or later.
5316 */
5317DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5318{
5319 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5320}
5321
5322
5323/**
5324 * Atomically And an unsigned 64-bit value, unordered.
5325 *
5326 * @param pu64 Pointer to the pointer variable to AND u64 with.
5327 * @param u64 The value to AND *pu64 with.
5328 *
5329 * @remarks x86: Requires a Pentium or later.
5330 */
5331#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5332DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5333#else
5334DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5335{
5336# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5337 __asm__ __volatile__("andq %1, %0\n\t"
5338 : "=m" (*pu64)
5339 : "r" (u64)
5340 , "m" (*pu64)
5341 : "cc");
5342
5343# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5344 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
5345 "and %[uNew], %[uNew], %[uVal]\n\t"
5346 ,
5347 "and %[uNew], %[uNew], %[uVal]\n\t"
5348 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
5349 [uVal] "r" (u64));
5350
5351# else
5352 for (;;)
5353 {
5354 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5355 uint64_t u64New = u64Old & u64;
5356 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5357 break;
5358 ASMNopPause();
5359 }
5360# endif
5361}
5362#endif
5363
5364
5365/**
5366 * Atomically And a signed 64-bit value, unordered.
5367 *
5368 * @param pi64 Pointer to the pointer variable to AND i64 with.
5369 * @param i64 The value to AND *pi64 with.
5370 *
5371 * @remarks x86: Requires a Pentium or later.
5372 */
5373DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5374{
5375 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5376}
5377
5378
5379/**
5380 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe.
5381 *
5382 * @param pu32 Pointer to the variable to XOR @a u32 with.
5383 * @param u32 The value to OR @a *pu32 with.
5384 *
5385 * @remarks x86: Requires a 386 or later.
5386 */
5387#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5388RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5389#else
5390DECLINLINE(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5391{
5392# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5393# if RT_INLINE_ASM_GNU_STYLE
5394 __asm__ __volatile__("xorl %1, %0\n\t"
5395 : "=m" (*pu32)
5396 : "ir" (u32)
5397 , "m" (*pu32)
5398 : "cc");
5399# else
5400 __asm
5401 {
5402 mov eax, [u32]
5403# ifdef RT_ARCH_AMD64
5404 mov rdx, [pu32]
5405 xor [rdx], eax
5406# else
5407 mov edx, [pu32]
5408 xor [edx], eax
5409# endif
5410 }
5411# endif
5412
5413# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5414 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoXorU32, pu32, NO_BARRIER,
5415 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5416 "eor %[uNew], %[uNew], %[uVal]\n\t",
5417 [uVal] "r" (u32));
5418
5419# else
5420# error "Port me"
5421# endif
5422}
5423#endif
5424
5425
5426/**
5427 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe,
5428 * extended version (for bitmap fallback).
5429 *
5430 * @returns Old value.
5431 * @param pu32 Pointer to the variable to XOR @a u32 with.
5432 * @param u32 The value to OR @a *pu32 with.
5433 */
5434DECLINLINE(uint32_t) ASMAtomicUoXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5435{
5436#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5437 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoXorExU32, pu32, NO_BARRIER,
5438 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5439 "eor %[uNew], %[uOld], %[uVal]\n\t",
5440 [uVal] "r" (u32));
5441 return u32OldRet;
5442
5443#else
5444 return ASMAtomicXorExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5445#endif
5446}
5447
5448
5449/**
5450 * Atomically XOR a signed 32-bit value, unordered.
5451 *
5452 * @param pi32 Pointer to the variable to XOR @a u32 with.
5453 * @param i32 The value to XOR @a *pu32 with.
5454 *
5455 * @remarks x86: Requires a 386 or later.
5456 */
5457DECLINLINE(void) ASMAtomicUoXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5458{
5459 ASMAtomicUoXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5460}
5461
5462
5463/**
5464 * Atomically increment an unsigned 32-bit value, unordered.
5465 *
5466 * @returns the new value.
5467 * @param pu32 Pointer to the variable to increment.
5468 *
5469 * @remarks x86: Requires a 486 or later.
5470 */
5471#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5472RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5473#else
5474DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5475{
5476# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5477 uint32_t u32;
5478# if RT_INLINE_ASM_GNU_STYLE
5479 __asm__ __volatile__("xaddl %0, %1\n\t"
5480 : "=r" (u32)
5481 , "=m" (*pu32)
5482 : "0" (1)
5483 , "m" (*pu32)
5484 : "memory" /** @todo why 'memory'? */
5485 , "cc");
5486 return u32 + 1;
5487# else
5488 __asm
5489 {
5490 mov eax, 1
5491# ifdef RT_ARCH_AMD64
5492 mov rdx, [pu32]
5493 xadd [rdx], eax
5494# else
5495 mov edx, [pu32]
5496 xadd [edx], eax
5497# endif
5498 mov u32, eax
5499 }
5500 return u32 + 1;
5501# endif
5502
5503# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5504 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
5505 "add %w[uNew], %w[uNew], #1\n\t",
5506 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5507 "X" (0) /* dummy */);
5508 return u32NewRet;
5509
5510# else
5511# error "Port me"
5512# endif
5513}
5514#endif
5515
5516
5517/**
5518 * Atomically decrement an unsigned 32-bit value, unordered.
5519 *
5520 * @returns the new value.
5521 * @param pu32 Pointer to the variable to decrement.
5522 *
5523 * @remarks x86: Requires a 486 or later.
5524 */
5525#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5526RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5527#else
5528DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5529{
5530# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5531 uint32_t u32;
5532# if RT_INLINE_ASM_GNU_STYLE
5533 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
5534 : "=r" (u32)
5535 , "=m" (*pu32)
5536 : "0" (-1)
5537 , "m" (*pu32)
5538 : "memory"
5539 , "cc");
5540 return u32 - 1;
5541# else
5542 __asm
5543 {
5544 mov eax, -1
5545# ifdef RT_ARCH_AMD64
5546 mov rdx, [pu32]
5547 xadd [rdx], eax
5548# else
5549 mov edx, [pu32]
5550 xadd [edx], eax
5551# endif
5552 mov u32, eax
5553 }
5554 return u32 - 1;
5555# endif
5556
5557# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5558 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
5559 "sub %w[uNew], %w[uNew], #1\n\t",
5560 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5561 "X" (0) /* dummy */);
5562 return u32NewRet;
5563
5564# else
5565# error "Port me"
5566# endif
5567}
5568#endif
5569
5570
5571/** @def RT_ASM_PAGE_SIZE
5572 * We try avoid dragging in iprt/param.h here.
5573 * @internal
5574 */
5575#if defined(RT_ARCH_SPARC64)
5576# define RT_ASM_PAGE_SIZE 0x2000
5577# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5578# if PAGE_SIZE != 0x2000
5579# error "PAGE_SIZE is not 0x2000!"
5580# endif
5581# endif
5582#elif defined(RT_ARCH_ARM64) && defined(RT_OS_DARWIN)
5583# define RT_ASM_PAGE_SIZE 0x4000
5584# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
5585# if PAGE_SIZE != 0x4000
5586# error "PAGE_SIZE is not 0x4000!"
5587# endif
5588# endif
5589#else
5590# define RT_ASM_PAGE_SIZE 0x1000
5591# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(RT_OS_LINUX) && !defined(RT_ARCH_ARM64)
5592# if PAGE_SIZE != 0x1000
5593# error "PAGE_SIZE is not 0x1000!"
5594# endif
5595# endif
5596#endif
5597
5598/**
5599 * Zeros a 4K memory page.
5600 *
5601 * @param pv Pointer to the memory block. This must be page aligned.
5602 */
5603#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5604RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
5605# else
5606DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
5607{
5608# if RT_INLINE_ASM_USES_INTRIN
5609# ifdef RT_ARCH_AMD64
5610 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
5611# else
5612 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
5613# endif
5614
5615# elif RT_INLINE_ASM_GNU_STYLE
5616 RTCCUINTREG uDummy;
5617# ifdef RT_ARCH_AMD64
5618 __asm__ __volatile__("rep stosq"
5619 : "=D" (pv),
5620 "=c" (uDummy)
5621 : "0" (pv),
5622 "c" (RT_ASM_PAGE_SIZE >> 3),
5623 "a" (0)
5624 : "memory");
5625# else
5626 __asm__ __volatile__("rep stosl"
5627 : "=D" (pv),
5628 "=c" (uDummy)
5629 : "0" (pv),
5630 "c" (RT_ASM_PAGE_SIZE >> 2),
5631 "a" (0)
5632 : "memory");
5633# endif
5634# else
5635 __asm
5636 {
5637# ifdef RT_ARCH_AMD64
5638 xor rax, rax
5639 mov ecx, 0200h
5640 mov rdi, [pv]
5641 rep stosq
5642# else
5643 xor eax, eax
5644 mov ecx, 0400h
5645 mov edi, [pv]
5646 rep stosd
5647# endif
5648 }
5649# endif
5650}
5651# endif
5652
5653
5654/**
5655 * Zeros a memory block with a 32-bit aligned size.
5656 *
5657 * @param pv Pointer to the memory block.
5658 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5659 */
5660#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5661RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5662#else
5663DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5664{
5665# if RT_INLINE_ASM_USES_INTRIN
5666# ifdef RT_ARCH_AMD64
5667 if (!(cb & 7))
5668 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
5669 else
5670# endif
5671 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
5672
5673# elif RT_INLINE_ASM_GNU_STYLE
5674 __asm__ __volatile__("rep stosl"
5675 : "=D" (pv),
5676 "=c" (cb)
5677 : "0" (pv),
5678 "1" (cb >> 2),
5679 "a" (0)
5680 : "memory");
5681# else
5682 __asm
5683 {
5684 xor eax, eax
5685# ifdef RT_ARCH_AMD64
5686 mov rcx, [cb]
5687 shr rcx, 2
5688 mov rdi, [pv]
5689# else
5690 mov ecx, [cb]
5691 shr ecx, 2
5692 mov edi, [pv]
5693# endif
5694 rep stosd
5695 }
5696# endif
5697}
5698#endif
5699
5700
5701/**
5702 * Fills a memory block with a 32-bit aligned size.
5703 *
5704 * @param pv Pointer to the memory block.
5705 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5706 * @param u32 The value to fill with.
5707 */
5708#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5709RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
5710#else
5711DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5712{
5713# if RT_INLINE_ASM_USES_INTRIN
5714# ifdef RT_ARCH_AMD64
5715 if (!(cb & 7))
5716 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5717 else
5718# endif
5719 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
5720
5721# elif RT_INLINE_ASM_GNU_STYLE
5722 __asm__ __volatile__("rep stosl"
5723 : "=D" (pv),
5724 "=c" (cb)
5725 : "0" (pv),
5726 "1" (cb >> 2),
5727 "a" (u32)
5728 : "memory");
5729# else
5730 __asm
5731 {
5732# ifdef RT_ARCH_AMD64
5733 mov rcx, [cb]
5734 shr rcx, 2
5735 mov rdi, [pv]
5736# else
5737 mov ecx, [cb]
5738 shr ecx, 2
5739 mov edi, [pv]
5740# endif
5741 mov eax, [u32]
5742 rep stosd
5743 }
5744# endif
5745}
5746#endif
5747
5748
5749/**
5750 * Checks if a memory block is all zeros.
5751 *
5752 * @returns Pointer to the first non-zero byte.
5753 * @returns NULL if all zero.
5754 *
5755 * @param pv Pointer to the memory block.
5756 * @param cb Number of bytes in the block.
5757 */
5758#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
5759DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5760#else
5761DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5762{
5763/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
5764 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5765 for (; cb; cb--, pb++)
5766 if (RT_LIKELY(*pb == 0))
5767 { /* likely */ }
5768 else
5769 return (void RT_FAR *)pb;
5770 return NULL;
5771}
5772#endif
5773
5774
5775/**
5776 * Checks if a memory block is all zeros.
5777 *
5778 * @returns true if zero, false if not.
5779 *
5780 * @param pv Pointer to the memory block.
5781 * @param cb Number of bytes in the block.
5782 *
5783 * @sa ASMMemFirstNonZero
5784 */
5785DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5786{
5787 return ASMMemFirstNonZero(pv, cb) == NULL;
5788}
5789
5790
5791/**
5792 * Checks if a memory page is all zeros.
5793 *
5794 * @returns true / false.
5795 *
5796 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5797 * boundary
5798 */
5799DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
5800{
5801# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5802 union { RTCCUINTREG r; bool f; } uAX;
5803 RTCCUINTREG xCX, xDI;
5804 Assert(!((uintptr_t)pvPage & 15));
5805 __asm__ __volatile__("repe; "
5806# ifdef RT_ARCH_AMD64
5807 "scasq\n\t"
5808# else
5809 "scasl\n\t"
5810# endif
5811 "setnc %%al\n\t"
5812 : "=&c" (xCX)
5813 , "=&D" (xDI)
5814 , "=&a" (uAX.r)
5815 : "mr" (pvPage)
5816# ifdef RT_ARCH_AMD64
5817 , "0" (RT_ASM_PAGE_SIZE/8)
5818# else
5819 , "0" (RT_ASM_PAGE_SIZE/4)
5820# endif
5821 , "1" (pvPage)
5822 , "2" (0)
5823 : "cc");
5824 return uAX.f;
5825# else
5826 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
5827 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
5828 Assert(!((uintptr_t)pvPage & 15));
5829 for (;;)
5830 {
5831 if (puPtr[0]) return false;
5832 if (puPtr[4]) return false;
5833
5834 if (puPtr[2]) return false;
5835 if (puPtr[6]) return false;
5836
5837 if (puPtr[1]) return false;
5838 if (puPtr[5]) return false;
5839
5840 if (puPtr[3]) return false;
5841 if (puPtr[7]) return false;
5842
5843 if (!--cLeft)
5844 return true;
5845 puPtr += 8;
5846 }
5847# endif
5848}
5849
5850
5851/**
5852 * Checks if a memory block is filled with the specified byte, returning the
5853 * first mismatch.
5854 *
5855 * This is sort of an inverted memchr.
5856 *
5857 * @returns Pointer to the byte which doesn't equal u8.
5858 * @returns NULL if all equal to u8.
5859 *
5860 * @param pv Pointer to the memory block.
5861 * @param cb Number of bytes in the block.
5862 * @param u8 The value it's supposed to be filled with.
5863 *
5864 * @remarks No alignment requirements.
5865 */
5866#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5867 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5868DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5869#else
5870DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5871{
5872/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5873 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5874 for (; cb; cb--, pb++)
5875 if (RT_LIKELY(*pb == u8))
5876 { /* likely */ }
5877 else
5878 return (void *)pb;
5879 return NULL;
5880}
5881#endif
5882
5883
5884/**
5885 * Checks if a memory block is filled with the specified byte.
5886 *
5887 * @returns true if all matching, false if not.
5888 *
5889 * @param pv Pointer to the memory block.
5890 * @param cb Number of bytes in the block.
5891 * @param u8 The value it's supposed to be filled with.
5892 *
5893 * @remarks No alignment requirements.
5894 */
5895DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5896{
5897 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5898}
5899
5900
5901/**
5902 * Checks if a memory block is filled with the specified 32-bit value.
5903 *
5904 * This is a sort of inverted memchr.
5905 *
5906 * @returns Pointer to the first value which doesn't equal u32.
5907 * @returns NULL if all equal to u32.
5908 *
5909 * @param pv Pointer to the memory block.
5910 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5911 * @param u32 The value it's supposed to be filled with.
5912 */
5913DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5914{
5915/** @todo rewrite this in inline assembly? */
5916 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5917 for (; cb; cb -= 4, pu32++)
5918 if (RT_LIKELY(*pu32 == u32))
5919 { /* likely */ }
5920 else
5921 return (uint32_t RT_FAR *)pu32;
5922 return NULL;
5923}
5924
5925
5926/**
5927 * Probes a byte pointer for read access.
5928 *
5929 * While the function will not fault if the byte is not read accessible,
5930 * the idea is to do this in a safe place like before acquiring locks
5931 * and such like.
5932 *
5933 * Also, this functions guarantees that an eager compiler is not going
5934 * to optimize the probing away.
5935 *
5936 * @param pvByte Pointer to the byte.
5937 */
5938#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5939RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
5940#else
5941DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
5942{
5943# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5944 uint8_t u8;
5945# if RT_INLINE_ASM_GNU_STYLE
5946 __asm__ __volatile__("movb %1, %0\n\t"
5947 : "=q" (u8)
5948 : "m" (*(const uint8_t *)pvByte));
5949# else
5950 __asm
5951 {
5952# ifdef RT_ARCH_AMD64
5953 mov rax, [pvByte]
5954 mov al, [rax]
5955# else
5956 mov eax, [pvByte]
5957 mov al, [eax]
5958# endif
5959 mov [u8], al
5960 }
5961# endif
5962 return u8;
5963
5964# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5965 uint32_t u32;
5966 __asm__ __volatile__("Lstart_ASMProbeReadByte_%=:\n\t"
5967# if defined(RT_ARCH_ARM64)
5968 "ldxrb %w[uDst], %[pMem]\n\t"
5969# else
5970 "ldrexb %[uDst], %[pMem]\n\t"
5971# endif
5972 : [uDst] "=&r" (u32)
5973 : [pMem] "Q" (*(uint8_t const *)pvByte));
5974 return (uint8_t)u32;
5975
5976# else
5977# error "Port me"
5978# endif
5979}
5980#endif
5981
5982/**
5983 * Probes a buffer for read access page by page.
5984 *
5985 * While the function will fault if the buffer is not fully read
5986 * accessible, the idea is to do this in a safe place like before
5987 * acquiring locks and such like.
5988 *
5989 * Also, this functions guarantees that an eager compiler is not going
5990 * to optimize the probing away.
5991 *
5992 * @param pvBuf Pointer to the buffer.
5993 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5994 */
5995DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
5996{
5997 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5998 /* the first byte */
5999 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
6000 ASMProbeReadByte(pu8);
6001
6002 /* the pages in between pages. */
6003 while (cbBuf > RT_ASM_PAGE_SIZE)
6004 {
6005 ASMProbeReadByte(pu8);
6006 cbBuf -= RT_ASM_PAGE_SIZE;
6007 pu8 += RT_ASM_PAGE_SIZE;
6008 }
6009
6010 /* the last byte */
6011 ASMProbeReadByte(pu8 + cbBuf - 1);
6012}
6013
6014
6015/**
6016 * Reverse the byte order of the given 16-bit integer.
6017 *
6018 * @returns Revert
6019 * @param u16 16-bit integer value.
6020 */
6021#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6022RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
6023#else
6024DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
6025{
6026# if RT_INLINE_ASM_USES_INTRIN
6027 return _byteswap_ushort(u16);
6028
6029# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6030# if RT_INLINE_ASM_GNU_STYLE
6031 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
6032# else
6033 _asm
6034 {
6035 mov ax, [u16]
6036 ror ax, 8
6037 mov [u16], ax
6038 }
6039# endif
6040 return u16;
6041
6042# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6043 uint32_t u32Ret;
6044 __asm__ __volatile__(
6045# if defined(RT_ARCH_ARM64)
6046 "rev16 %w[uRet], %w[uVal]\n\t"
6047# else
6048 "rev16 %[uRet], %[uVal]\n\t"
6049# endif
6050 : [uRet] "=r" (u32Ret)
6051 : [uVal] "r" (u16));
6052 return (uint16_t)u32Ret;
6053
6054# else
6055# error "Port me"
6056# endif
6057}
6058#endif
6059
6060
6061/**
6062 * Reverse the byte order of the given 32-bit integer.
6063 *
6064 * @returns Revert
6065 * @param u32 32-bit integer value.
6066 */
6067#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6068RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
6069#else
6070DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
6071{
6072# if RT_INLINE_ASM_USES_INTRIN
6073 return _byteswap_ulong(u32);
6074
6075# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6076# if RT_INLINE_ASM_GNU_STYLE
6077 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6078# else
6079 _asm
6080 {
6081 mov eax, [u32]
6082 bswap eax
6083 mov [u32], eax
6084 }
6085# endif
6086 return u32;
6087
6088# elif defined(RT_ARCH_ARM64)
6089 uint64_t u64Ret;
6090 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t"
6091 : [uRet] "=r" (u64Ret)
6092 : [uVal] "r" ((uint64_t)u32));
6093 return (uint32_t)u64Ret;
6094
6095# elif defined(RT_ARCH_ARM32)
6096 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
6097 : [uRet] "=r" (u32)
6098 : [uVal] "[uRet]" (u32));
6099 return u32;
6100
6101# else
6102# error "Port me"
6103# endif
6104}
6105#endif
6106
6107
6108/**
6109 * Reverse the byte order of the given 64-bit integer.
6110 *
6111 * @returns Revert
6112 * @param u64 64-bit integer value.
6113 */
6114DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
6115{
6116#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6117 return _byteswap_uint64(u64);
6118
6119# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6120 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64));
6121 return u64;
6122
6123# elif defined(RT_ARCH_ARM64)
6124 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
6125 : [uRet] "=r" (u64)
6126 : [uVal] "[uRet]" (u64));
6127 return u64;
6128
6129#else
6130 return (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6131 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6132#endif
6133}
6134
6135
6136
6137/** @defgroup grp_inline_bits Bit Operations
6138 * @{
6139 */
6140
6141
6142/**
6143 * Sets a bit in a bitmap.
6144 *
6145 * @param pvBitmap Pointer to the bitmap (little endian). This should be
6146 * 32-bit aligned.
6147 * @param iBit The bit to set.
6148 *
6149 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6150 * However, doing so will yield better performance as well as avoiding
6151 * traps accessing the last bits in the bitmap.
6152 */
6153#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6154RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6155#else
6156DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6157{
6158# if RT_INLINE_ASM_USES_INTRIN
6159 _bittestandset((long RT_FAR *)pvBitmap, iBit);
6160
6161# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6162# if RT_INLINE_ASM_GNU_STYLE
6163 __asm__ __volatile__("btsl %1, %0"
6164 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6165 : "Ir" (iBit)
6166 , "m" (*(volatile long RT_FAR *)pvBitmap)
6167 : "memory"
6168 , "cc");
6169# else
6170 __asm
6171 {
6172# ifdef RT_ARCH_AMD64
6173 mov rax, [pvBitmap]
6174 mov edx, [iBit]
6175 bts [rax], edx
6176# else
6177 mov eax, [pvBitmap]
6178 mov edx, [iBit]
6179 bts [eax], edx
6180# endif
6181 }
6182# endif
6183
6184# else
6185 int32_t offBitmap = iBit / 32;
6186 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6187 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6188# endif
6189}
6190#endif
6191
6192
6193/**
6194 * Atomically sets a bit in a bitmap, ordered.
6195 *
6196 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6197 * aligned, otherwise the memory access isn't atomic!
6198 * @param iBit The bit to set.
6199 *
6200 * @remarks x86: Requires a 386 or later.
6201 */
6202#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6203RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6204#else
6205DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6206{
6207 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6208# if RT_INLINE_ASM_USES_INTRIN
6209 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6210# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6211# if RT_INLINE_ASM_GNU_STYLE
6212 __asm__ __volatile__("lock; btsl %1, %0"
6213 : "=m" (*(volatile long *)pvBitmap)
6214 : "Ir" (iBit)
6215 , "m" (*(volatile long *)pvBitmap)
6216 : "memory"
6217 , "cc");
6218# else
6219 __asm
6220 {
6221# ifdef RT_ARCH_AMD64
6222 mov rax, [pvBitmap]
6223 mov edx, [iBit]
6224 lock bts [rax], edx
6225# else
6226 mov eax, [pvBitmap]
6227 mov edx, [iBit]
6228 lock bts [eax], edx
6229# endif
6230 }
6231# endif
6232
6233# else
6234 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6235# endif
6236}
6237#endif
6238
6239
6240/**
6241 * Clears a bit in a bitmap.
6242 *
6243 * @param pvBitmap Pointer to the bitmap (little endian).
6244 * @param iBit The bit to clear.
6245 *
6246 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6247 * However, doing so will yield better performance as well as avoiding
6248 * traps accessing the last bits in the bitmap.
6249 */
6250#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6251RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6252#else
6253DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6254{
6255# if RT_INLINE_ASM_USES_INTRIN
6256 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6257
6258# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6259# if RT_INLINE_ASM_GNU_STYLE
6260 __asm__ __volatile__("btrl %1, %0"
6261 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6262 : "Ir" (iBit)
6263 , "m" (*(volatile long RT_FAR *)pvBitmap)
6264 : "memory"
6265 , "cc");
6266# else
6267 __asm
6268 {
6269# ifdef RT_ARCH_AMD64
6270 mov rax, [pvBitmap]
6271 mov edx, [iBit]
6272 btr [rax], edx
6273# else
6274 mov eax, [pvBitmap]
6275 mov edx, [iBit]
6276 btr [eax], edx
6277# endif
6278 }
6279# endif
6280
6281# else
6282 int32_t offBitmap = iBit / 32;
6283 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6284 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6285# endif
6286}
6287#endif
6288
6289
6290/**
6291 * Atomically clears a bit in a bitmap, ordered.
6292 *
6293 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6294 * aligned, otherwise the memory access isn't atomic!
6295 * @param iBit The bit to toggle set.
6296 *
6297 * @remarks No memory barrier, take care on smp.
6298 * @remarks x86: Requires a 386 or later.
6299 */
6300#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6301RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6302#else
6303DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6304{
6305 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6306# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6307# if RT_INLINE_ASM_GNU_STYLE
6308 __asm__ __volatile__("lock; btrl %1, %0"
6309 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6310 : "Ir" (iBit)
6311 , "m" (*(volatile long RT_FAR *)pvBitmap)
6312 : "memory"
6313 , "cc");
6314# else
6315 __asm
6316 {
6317# ifdef RT_ARCH_AMD64
6318 mov rax, [pvBitmap]
6319 mov edx, [iBit]
6320 lock btr [rax], edx
6321# else
6322 mov eax, [pvBitmap]
6323 mov edx, [iBit]
6324 lock btr [eax], edx
6325# endif
6326 }
6327# endif
6328# else
6329 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6330# endif
6331}
6332#endif
6333
6334
6335/**
6336 * Toggles a bit in a bitmap.
6337 *
6338 * @param pvBitmap Pointer to the bitmap (little endian).
6339 * @param iBit The bit to toggle.
6340 *
6341 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6342 * However, doing so will yield better performance as well as avoiding
6343 * traps accessing the last bits in the bitmap.
6344 */
6345#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6346RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6347#else
6348DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6349{
6350# if RT_INLINE_ASM_USES_INTRIN
6351 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6352# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6353# if RT_INLINE_ASM_GNU_STYLE
6354 __asm__ __volatile__("btcl %1, %0"
6355 : "=m" (*(volatile long *)pvBitmap)
6356 : "Ir" (iBit)
6357 , "m" (*(volatile long *)pvBitmap)
6358 : "memory"
6359 , "cc");
6360# else
6361 __asm
6362 {
6363# ifdef RT_ARCH_AMD64
6364 mov rax, [pvBitmap]
6365 mov edx, [iBit]
6366 btc [rax], edx
6367# else
6368 mov eax, [pvBitmap]
6369 mov edx, [iBit]
6370 btc [eax], edx
6371# endif
6372 }
6373# endif
6374# else
6375 int32_t offBitmap = iBit / 32;
6376 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6377 ASMAtomicUoXorU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6378# endif
6379}
6380#endif
6381
6382
6383/**
6384 * Atomically toggles a bit in a bitmap, ordered.
6385 *
6386 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6387 * aligned, otherwise the memory access isn't atomic!
6388 * @param iBit The bit to test and set.
6389 *
6390 * @remarks x86: Requires a 386 or later.
6391 */
6392#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6393RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6394#else
6395DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6396{
6397 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6398# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6399# if RT_INLINE_ASM_GNU_STYLE
6400 __asm__ __volatile__("lock; btcl %1, %0"
6401 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6402 : "Ir" (iBit)
6403 , "m" (*(volatile long RT_FAR *)pvBitmap)
6404 : "memory"
6405 , "cc");
6406# else
6407 __asm
6408 {
6409# ifdef RT_ARCH_AMD64
6410 mov rax, [pvBitmap]
6411 mov edx, [iBit]
6412 lock btc [rax], edx
6413# else
6414 mov eax, [pvBitmap]
6415 mov edx, [iBit]
6416 lock btc [eax], edx
6417# endif
6418 }
6419# endif
6420# else
6421 ASMAtomicXorU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6422# endif
6423}
6424#endif
6425
6426
6427/**
6428 * Tests and sets a bit in a bitmap.
6429 *
6430 * @returns true if the bit was set.
6431 * @returns false if the bit was clear.
6432 *
6433 * @param pvBitmap Pointer to the bitmap (little endian).
6434 * @param iBit The bit to test and set.
6435 *
6436 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6437 * However, doing so will yield better performance as well as avoiding
6438 * traps accessing the last bits in the bitmap.
6439 */
6440#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6441RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6442#else
6443DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6444{
6445 union { bool f; uint32_t u32; uint8_t u8; } rc;
6446# if RT_INLINE_ASM_USES_INTRIN
6447 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
6448
6449# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6450# if RT_INLINE_ASM_GNU_STYLE
6451 __asm__ __volatile__("btsl %2, %1\n\t"
6452 "setc %b0\n\t"
6453 "andl $1, %0\n\t"
6454 : "=q" (rc.u32)
6455 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6456 : "Ir" (iBit)
6457 , "m" (*(volatile long RT_FAR *)pvBitmap)
6458 : "memory"
6459 , "cc");
6460# else
6461 __asm
6462 {
6463 mov edx, [iBit]
6464# ifdef RT_ARCH_AMD64
6465 mov rax, [pvBitmap]
6466 bts [rax], edx
6467# else
6468 mov eax, [pvBitmap]
6469 bts [eax], edx
6470# endif
6471 setc al
6472 and eax, 1
6473 mov [rc.u32], eax
6474 }
6475# endif
6476
6477# else
6478 int32_t offBitmap = iBit / 32;
6479 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6480 rc.u32 = RT_LE2H_U32(ASMAtomicUoOrExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6481 >> (iBit & 31);
6482 rc.u32 &= 1;
6483# endif
6484 return rc.f;
6485}
6486#endif
6487
6488
6489/**
6490 * Atomically tests and sets a bit in a bitmap, ordered.
6491 *
6492 * @returns true if the bit was set.
6493 * @returns false if the bit was clear.
6494 *
6495 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6496 * aligned, otherwise the memory access isn't atomic!
6497 * @param iBit The bit to set.
6498 *
6499 * @remarks x86: Requires a 386 or later.
6500 */
6501#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6502RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6503#else
6504DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6505{
6506 union { bool f; uint32_t u32; uint8_t u8; } rc;
6507 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6508# if RT_INLINE_ASM_USES_INTRIN
6509 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6510# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6511# if RT_INLINE_ASM_GNU_STYLE
6512 __asm__ __volatile__("lock; btsl %2, %1\n\t"
6513 "setc %b0\n\t"
6514 "andl $1, %0\n\t"
6515 : "=q" (rc.u32)
6516 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6517 : "Ir" (iBit)
6518 , "m" (*(volatile long RT_FAR *)pvBitmap)
6519 : "memory"
6520 , "cc");
6521# else
6522 __asm
6523 {
6524 mov edx, [iBit]
6525# ifdef RT_ARCH_AMD64
6526 mov rax, [pvBitmap]
6527 lock bts [rax], edx
6528# else
6529 mov eax, [pvBitmap]
6530 lock bts [eax], edx
6531# endif
6532 setc al
6533 and eax, 1
6534 mov [rc.u32], eax
6535 }
6536# endif
6537
6538# else
6539 rc.u32 = RT_LE2H_U32(ASMAtomicOrExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6540 >> (iBit & 31);
6541 rc.u32 &= 1;
6542# endif
6543 return rc.f;
6544}
6545#endif
6546
6547
6548/**
6549 * Tests and clears a bit in a bitmap.
6550 *
6551 * @returns true if the bit was set.
6552 * @returns false if the bit was clear.
6553 *
6554 * @param pvBitmap Pointer to the bitmap (little endian).
6555 * @param iBit The bit to test and clear.
6556 *
6557 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6558 * However, doing so will yield better performance as well as avoiding
6559 * traps accessing the last bits in the bitmap.
6560 */
6561#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6562RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6563#else
6564DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6565{
6566 union { bool f; uint32_t u32; uint8_t u8; } rc;
6567# if RT_INLINE_ASM_USES_INTRIN
6568 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6569
6570# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6571# if RT_INLINE_ASM_GNU_STYLE
6572 __asm__ __volatile__("btrl %2, %1\n\t"
6573 "setc %b0\n\t"
6574 "andl $1, %0\n\t"
6575 : "=q" (rc.u32)
6576 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6577 : "Ir" (iBit)
6578 , "m" (*(volatile long RT_FAR *)pvBitmap)
6579 : "memory"
6580 , "cc");
6581# else
6582 __asm
6583 {
6584 mov edx, [iBit]
6585# ifdef RT_ARCH_AMD64
6586 mov rax, [pvBitmap]
6587 btr [rax], edx
6588# else
6589 mov eax, [pvBitmap]
6590 btr [eax], edx
6591# endif
6592 setc al
6593 and eax, 1
6594 mov [rc.u32], eax
6595 }
6596# endif
6597
6598# else
6599 int32_t offBitmap = iBit / 32;
6600 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6601 rc.u32 = RT_LE2H_U32(ASMAtomicUoAndExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6602 >> (iBit & 31);
6603 rc.u32 &= 1;
6604# endif
6605 return rc.f;
6606}
6607#endif
6608
6609
6610/**
6611 * Atomically tests and clears a bit in a bitmap, ordered.
6612 *
6613 * @returns true if the bit was set.
6614 * @returns false if the bit was clear.
6615 *
6616 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6617 * aligned, otherwise the memory access isn't atomic!
6618 * @param iBit The bit to test and clear.
6619 *
6620 * @remarks No memory barrier, take care on smp.
6621 * @remarks x86: Requires a 386 or later.
6622 */
6623#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6624RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6625#else
6626DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6627{
6628 union { bool f; uint32_t u32; uint8_t u8; } rc;
6629 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6630# if RT_INLINE_ASM_USES_INTRIN
6631 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
6632
6633# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6634# if RT_INLINE_ASM_GNU_STYLE
6635 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6636 "setc %b0\n\t"
6637 "andl $1, %0\n\t"
6638 : "=q" (rc.u32)
6639 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6640 : "Ir" (iBit)
6641 , "m" (*(volatile long RT_FAR *)pvBitmap)
6642 : "memory"
6643 , "cc");
6644# else
6645 __asm
6646 {
6647 mov edx, [iBit]
6648# ifdef RT_ARCH_AMD64
6649 mov rax, [pvBitmap]
6650 lock btr [rax], edx
6651# else
6652 mov eax, [pvBitmap]
6653 lock btr [eax], edx
6654# endif
6655 setc al
6656 and eax, 1
6657 mov [rc.u32], eax
6658 }
6659# endif
6660
6661# else
6662 rc.u32 = RT_LE2H_U32(ASMAtomicAndExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6663 >> (iBit & 31);
6664 rc.u32 &= 1;
6665# endif
6666 return rc.f;
6667}
6668#endif
6669
6670
6671/**
6672 * Tests and toggles a bit in a bitmap.
6673 *
6674 * @returns true if the bit was set.
6675 * @returns false if the bit was clear.
6676 *
6677 * @param pvBitmap Pointer to the bitmap (little endian).
6678 * @param iBit The bit to test and toggle.
6679 *
6680 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6681 * However, doing so will yield better performance as well as avoiding
6682 * traps accessing the last bits in the bitmap.
6683 */
6684#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6685RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6686#else
6687DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6688{
6689 union { bool f; uint32_t u32; uint8_t u8; } rc;
6690# if RT_INLINE_ASM_USES_INTRIN
6691 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6692
6693# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6694# if RT_INLINE_ASM_GNU_STYLE
6695 __asm__ __volatile__("btcl %2, %1\n\t"
6696 "setc %b0\n\t"
6697 "andl $1, %0\n\t"
6698 : "=q" (rc.u32)
6699 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6700 : "Ir" (iBit)
6701 , "m" (*(volatile long RT_FAR *)pvBitmap)
6702 : "memory"
6703 , "cc");
6704# else
6705 __asm
6706 {
6707 mov edx, [iBit]
6708# ifdef RT_ARCH_AMD64
6709 mov rax, [pvBitmap]
6710 btc [rax], edx
6711# else
6712 mov eax, [pvBitmap]
6713 btc [eax], edx
6714# endif
6715 setc al
6716 and eax, 1
6717 mov [rc.u32], eax
6718 }
6719# endif
6720
6721# else
6722 int32_t offBitmap = iBit / 32;
6723 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6724 rc.u32 = RT_LE2H_U32(ASMAtomicUoXorExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6725 >> (iBit & 31);
6726 rc.u32 &= 1;
6727# endif
6728 return rc.f;
6729}
6730#endif
6731
6732
6733/**
6734 * Atomically tests and toggles a bit in a bitmap, ordered.
6735 *
6736 * @returns true if the bit was set.
6737 * @returns false if the bit was clear.
6738 *
6739 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6740 * aligned, otherwise the memory access isn't atomic!
6741 * @param iBit The bit to test and toggle.
6742 *
6743 * @remarks x86: Requires a 386 or later.
6744 */
6745#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6746RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6747#else
6748DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6749{
6750 union { bool f; uint32_t u32; uint8_t u8; } rc;
6751 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6752# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6753# if RT_INLINE_ASM_GNU_STYLE
6754 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6755 "setc %b0\n\t"
6756 "andl $1, %0\n\t"
6757 : "=q" (rc.u32)
6758 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6759 : "Ir" (iBit)
6760 , "m" (*(volatile long RT_FAR *)pvBitmap)
6761 : "memory"
6762 , "cc");
6763# else
6764 __asm
6765 {
6766 mov edx, [iBit]
6767# ifdef RT_ARCH_AMD64
6768 mov rax, [pvBitmap]
6769 lock btc [rax], edx
6770# else
6771 mov eax, [pvBitmap]
6772 lock btc [eax], edx
6773# endif
6774 setc al
6775 and eax, 1
6776 mov [rc.u32], eax
6777 }
6778# endif
6779
6780# else
6781 rc.u32 = RT_H2LE_U32(ASMAtomicXorExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_LE2H_U32(RT_BIT_32(iBit & 31))))
6782 >> (iBit & 31);
6783 rc.u32 &= 1;
6784# endif
6785 return rc.f;
6786}
6787#endif
6788
6789
6790/**
6791 * Tests if a bit in a bitmap is set.
6792 *
6793 * @returns true if the bit is set.
6794 * @returns false if the bit is clear.
6795 *
6796 * @param pvBitmap Pointer to the bitmap (little endian).
6797 * @param iBit The bit to test.
6798 *
6799 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6800 * However, doing so will yield better performance as well as avoiding
6801 * traps accessing the last bits in the bitmap.
6802 */
6803#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6804RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6805#else
6806DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6807{
6808 union { bool f; uint32_t u32; uint8_t u8; } rc;
6809# if RT_INLINE_ASM_USES_INTRIN
6810 rc.u32 = _bittest((long *)pvBitmap, iBit);
6811
6812# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6813# if RT_INLINE_ASM_GNU_STYLE
6814
6815 __asm__ __volatile__("btl %2, %1\n\t"
6816 "setc %b0\n\t"
6817 "andl $1, %0\n\t"
6818 : "=q" (rc.u32)
6819 : "m" (*(const volatile long RT_FAR *)pvBitmap)
6820 , "Ir" (iBit)
6821 : "memory"
6822 , "cc");
6823# else
6824 __asm
6825 {
6826 mov edx, [iBit]
6827# ifdef RT_ARCH_AMD64
6828 mov rax, [pvBitmap]
6829 bt [rax], edx
6830# else
6831 mov eax, [pvBitmap]
6832 bt [eax], edx
6833# endif
6834 setc al
6835 and eax, 1
6836 mov [rc.u32], eax
6837 }
6838# endif
6839
6840# else
6841 int32_t offBitmap = iBit / 32;
6842 AssertRelease(!((uintptr_t)pvBitmap & (sizeof(uint32_t) - 1)));
6843 rc.u32 = RT_LE2H_U32(ASMAtomicUoReadU32(&((uint32_t volatile *)pvBitmap)[offBitmap])) >> (iBit & 31);
6844 rc.u32 &= 1;
6845# endif
6846 return rc.f;
6847}
6848#endif
6849
6850
6851/**
6852 * Clears a bit range within a bitmap.
6853 *
6854 * @param pvBitmap Pointer to the bitmap (little endian).
6855 * @param iBitStart The First bit to clear.
6856 * @param iBitEnd The first bit not to clear.
6857 */
6858DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, size_t iBitStart, size_t iBitEnd) RT_NOTHROW_DEF
6859{
6860 if (iBitStart < iBitEnd)
6861 {
6862 uint32_t volatile RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6863 size_t iStart = iBitStart & ~(size_t)31;
6864 size_t iEnd = iBitEnd & ~(size_t)31;
6865 if (iStart == iEnd)
6866 *pu32 &= RT_H2LE_U32(((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6867 else
6868 {
6869 /* bits in first dword. */
6870 if (iBitStart & 31)
6871 {
6872 *pu32 &= RT_H2LE_U32((UINT32_C(1) << (iBitStart & 31)) - 1);
6873 pu32++;
6874 iBitStart = iStart + 32;
6875 }
6876
6877 /* whole dwords. */
6878 if (iBitStart != iEnd)
6879 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6880
6881 /* bits in last dword. */
6882 if (iBitEnd & 31)
6883 {
6884 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6885 *pu32 &= RT_H2LE_U32(~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6886 }
6887 }
6888 }
6889}
6890
6891
6892/**
6893 * Sets a bit range within a bitmap.
6894 *
6895 * @param pvBitmap Pointer to the bitmap (little endian).
6896 * @param iBitStart The First bit to set.
6897 * @param iBitEnd The first bit not to set.
6898 */
6899DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, size_t iBitStart, size_t iBitEnd) RT_NOTHROW_DEF
6900{
6901 if (iBitStart < iBitEnd)
6902 {
6903 uint32_t volatile RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6904 size_t iStart = iBitStart & ~(size_t)31;
6905 size_t iEnd = iBitEnd & ~(size_t)31;
6906 if (iStart == iEnd)
6907 *pu32 |= RT_H2LE_U32(((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31));
6908 else
6909 {
6910 /* bits in first dword. */
6911 if (iBitStart & 31)
6912 {
6913 *pu32 |= RT_H2LE_U32(~((UINT32_C(1) << (iBitStart & 31)) - 1));
6914 pu32++;
6915 iBitStart = iStart + 32;
6916 }
6917
6918 /* whole dword. */
6919 if (iBitStart != iEnd)
6920 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
6921
6922 /* bits in last dword. */
6923 if (iBitEnd & 31)
6924 {
6925 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6926 *pu32 |= RT_H2LE_U32((UINT32_C(1) << (iBitEnd & 31)) - 1);
6927 }
6928 }
6929 }
6930}
6931
6932
6933/**
6934 * Finds the first clear bit in a bitmap.
6935 *
6936 * @returns Index of the first zero bit.
6937 * @returns -1 if no clear bit was found.
6938 * @param pvBitmap Pointer to the bitmap (little endian).
6939 * @param cBits The number of bits in the bitmap. Multiple of 32.
6940 */
6941#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6942DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6943#else
6944DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6945{
6946 if (cBits)
6947 {
6948 int32_t iBit;
6949# if RT_INLINE_ASM_GNU_STYLE
6950 RTCCUINTREG uEAX, uECX, uEDI;
6951 cBits = RT_ALIGN_32(cBits, 32);
6952 __asm__ __volatile__("repe; scasl\n\t"
6953 "je 1f\n\t"
6954# ifdef RT_ARCH_AMD64
6955 "lea -4(%%rdi), %%rdi\n\t"
6956 "xorl (%%rdi), %%eax\n\t"
6957 "subq %5, %%rdi\n\t"
6958# else
6959 "lea -4(%%edi), %%edi\n\t"
6960 "xorl (%%edi), %%eax\n\t"
6961 "subl %5, %%edi\n\t"
6962# endif
6963 "shll $3, %%edi\n\t"
6964 "bsfl %%eax, %%edx\n\t"
6965 "addl %%edi, %%edx\n\t"
6966 "1:\t\n"
6967 : "=d" (iBit)
6968 , "=&c" (uECX)
6969 , "=&D" (uEDI)
6970 , "=&a" (uEAX)
6971 : "0" (0xffffffff)
6972 , "mr" (pvBitmap)
6973 , "1" (cBits >> 5)
6974 , "2" (pvBitmap)
6975 , "3" (0xffffffff)
6976 : "cc");
6977# else
6978 cBits = RT_ALIGN_32(cBits, 32);
6979 __asm
6980 {
6981# ifdef RT_ARCH_AMD64
6982 mov rdi, [pvBitmap]
6983 mov rbx, rdi
6984# else
6985 mov edi, [pvBitmap]
6986 mov ebx, edi
6987# endif
6988 mov edx, 0ffffffffh
6989 mov eax, edx
6990 mov ecx, [cBits]
6991 shr ecx, 5
6992 repe scasd
6993 je done
6994
6995# ifdef RT_ARCH_AMD64
6996 lea rdi, [rdi - 4]
6997 xor eax, [rdi]
6998 sub rdi, rbx
6999# else
7000 lea edi, [edi - 4]
7001 xor eax, [edi]
7002 sub edi, ebx
7003# endif
7004 shl edi, 3
7005 bsf edx, eax
7006 add edx, edi
7007 done:
7008 mov [iBit], edx
7009 }
7010# endif
7011 return iBit;
7012 }
7013 return -1;
7014}
7015#endif
7016
7017
7018/**
7019 * Finds the next clear bit in a bitmap.
7020 *
7021 * @returns Index of the first zero bit.
7022 * @returns -1 if no clear bit was found.
7023 * @param pvBitmap Pointer to the bitmap (little endian).
7024 * @param cBits The number of bits in the bitmap. Multiple of 32.
7025 * @param iBitPrev The bit returned from the last search.
7026 * The search will start at iBitPrev + 1.
7027 */
7028#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7029DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
7030#else
7031DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
7032{
7033 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
7034 int iBit = ++iBitPrev & 31;
7035 if (iBit)
7036 {
7037 /*
7038 * Inspect the 32-bit word containing the unaligned bit.
7039 */
7040 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
7041
7042# if RT_INLINE_ASM_USES_INTRIN
7043 unsigned long ulBit = 0;
7044 if (_BitScanForward(&ulBit, u32))
7045 return ulBit + iBitPrev;
7046# else
7047# if RT_INLINE_ASM_GNU_STYLE
7048 __asm__ __volatile__("bsf %1, %0\n\t"
7049 "jnz 1f\n\t"
7050 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
7051 "1:\n\t"
7052 : "=r" (iBit)
7053 : "r" (u32)
7054 : "cc");
7055# else
7056 __asm
7057 {
7058 mov edx, [u32]
7059 bsf eax, edx
7060 jnz done
7061 mov eax, 0ffffffffh
7062 done:
7063 mov [iBit], eax
7064 }
7065# endif
7066 if (iBit >= 0)
7067 return iBit + (int)iBitPrev;
7068# endif
7069
7070 /*
7071 * Skip ahead and see if there is anything left to search.
7072 */
7073 iBitPrev |= 31;
7074 iBitPrev++;
7075 if (cBits <= (uint32_t)iBitPrev)
7076 return -1;
7077 }
7078
7079 /*
7080 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7081 */
7082 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7083 if (iBit >= 0)
7084 iBit += iBitPrev;
7085 return iBit;
7086}
7087#endif
7088
7089
7090/**
7091 * Finds the first set bit in a bitmap.
7092 *
7093 * @returns Index of the first set bit.
7094 * @returns -1 if no clear bit was found.
7095 * @param pvBitmap Pointer to the bitmap (little endian).
7096 * @param cBits The number of bits in the bitmap. Multiple of 32.
7097 */
7098#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7099DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
7100#else
7101DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
7102{
7103 if (cBits)
7104 {
7105 int32_t iBit;
7106# if RT_INLINE_ASM_GNU_STYLE
7107 RTCCUINTREG uEAX, uECX, uEDI;
7108 cBits = RT_ALIGN_32(cBits, 32);
7109 __asm__ __volatile__("repe; scasl\n\t"
7110 "je 1f\n\t"
7111# ifdef RT_ARCH_AMD64
7112 "lea -4(%%rdi), %%rdi\n\t"
7113 "movl (%%rdi), %%eax\n\t"
7114 "subq %5, %%rdi\n\t"
7115# else
7116 "lea -4(%%edi), %%edi\n\t"
7117 "movl (%%edi), %%eax\n\t"
7118 "subl %5, %%edi\n\t"
7119# endif
7120 "shll $3, %%edi\n\t"
7121 "bsfl %%eax, %%edx\n\t"
7122 "addl %%edi, %%edx\n\t"
7123 "1:\t\n"
7124 : "=d" (iBit)
7125 , "=&c" (uECX)
7126 , "=&D" (uEDI)
7127 , "=&a" (uEAX)
7128 : "0" (0xffffffff)
7129 , "mr" (pvBitmap)
7130 , "1" (cBits >> 5)
7131 , "2" (pvBitmap)
7132 , "3" (0)
7133 : "cc");
7134# else
7135 cBits = RT_ALIGN_32(cBits, 32);
7136 __asm
7137 {
7138# ifdef RT_ARCH_AMD64
7139 mov rdi, [pvBitmap]
7140 mov rbx, rdi
7141# else
7142 mov edi, [pvBitmap]
7143 mov ebx, edi
7144# endif
7145 mov edx, 0ffffffffh
7146 xor eax, eax
7147 mov ecx, [cBits]
7148 shr ecx, 5
7149 repe scasd
7150 je done
7151# ifdef RT_ARCH_AMD64
7152 lea rdi, [rdi - 4]
7153 mov eax, [rdi]
7154 sub rdi, rbx
7155# else
7156 lea edi, [edi - 4]
7157 mov eax, [edi]
7158 sub edi, ebx
7159# endif
7160 shl edi, 3
7161 bsf edx, eax
7162 add edx, edi
7163 done:
7164 mov [iBit], edx
7165 }
7166# endif
7167 return iBit;
7168 }
7169 return -1;
7170}
7171#endif
7172
7173
7174/**
7175 * Finds the next set bit in a bitmap.
7176 *
7177 * @returns Index of the next set bit.
7178 * @returns -1 if no set bit was found.
7179 * @param pvBitmap Pointer to the bitmap (little endian).
7180 * @param cBits The number of bits in the bitmap. Multiple of 32.
7181 * @param iBitPrev The bit returned from the last search.
7182 * The search will start at iBitPrev + 1.
7183 */
7184#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7185DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
7186#else
7187DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
7188{
7189 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
7190 int iBit = ++iBitPrev & 31;
7191 if (iBit)
7192 {
7193 /*
7194 * Inspect the 32-bit word containing the unaligned bit.
7195 */
7196 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
7197
7198# if RT_INLINE_ASM_USES_INTRIN
7199 unsigned long ulBit = 0;
7200 if (_BitScanForward(&ulBit, u32))
7201 return ulBit + iBitPrev;
7202# else
7203# if RT_INLINE_ASM_GNU_STYLE
7204 __asm__ __volatile__("bsf %1, %0\n\t"
7205 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
7206 "movl $-1, %0\n\t"
7207 "1:\n\t"
7208 : "=r" (iBit)
7209 : "r" (u32)
7210 : "cc");
7211# else
7212 __asm
7213 {
7214 mov edx, [u32]
7215 bsf eax, edx
7216 jnz done
7217 mov eax, 0ffffffffh
7218 done:
7219 mov [iBit], eax
7220 }
7221# endif
7222 if (iBit >= 0)
7223 return iBit + (int)iBitPrev;
7224# endif
7225
7226 /*
7227 * Skip ahead and see if there is anything left to search.
7228 */
7229 iBitPrev |= 31;
7230 iBitPrev++;
7231 if (cBits <= (uint32_t)iBitPrev)
7232 return -1;
7233 }
7234
7235 /*
7236 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7237 */
7238 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7239 if (iBit >= 0)
7240 iBit += iBitPrev;
7241 return iBit;
7242}
7243#endif
7244
7245
7246/**
7247 * Finds the first bit which is set in the given 32-bit integer.
7248 * Bits are numbered from 1 (least significant) to 32.
7249 *
7250 * @returns index [1..32] of the first set bit.
7251 * @returns 0 if all bits are cleared.
7252 * @param u32 Integer to search for set bits.
7253 * @remarks Similar to ffs() in BSD.
7254 */
7255#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7256RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7257#else
7258DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
7259{
7260# if RT_INLINE_ASM_USES_INTRIN
7261 unsigned long iBit;
7262 if (_BitScanForward(&iBit, u32))
7263 iBit++;
7264 else
7265 iBit = 0;
7266
7267# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7268# if RT_INLINE_ASM_GNU_STYLE
7269 uint32_t iBit;
7270 __asm__ __volatile__("bsf %1, %0\n\t"
7271 "jnz 1f\n\t"
7272 "xorl %0, %0\n\t"
7273 "jmp 2f\n"
7274 "1:\n\t"
7275 "incl %0\n"
7276 "2:\n\t"
7277 : "=r" (iBit)
7278 : "rm" (u32)
7279 : "cc");
7280# else
7281 uint32_t iBit;
7282 _asm
7283 {
7284 bsf eax, [u32]
7285 jnz found
7286 xor eax, eax
7287 jmp done
7288 found:
7289 inc eax
7290 done:
7291 mov [iBit], eax
7292 }
7293# endif
7294
7295# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7296 /*
7297 * Using the "count leading zeros (clz)" instruction here because there
7298 * is no dedicated instruction to get the first set bit.
7299 * Need to reverse the bits in the value with "rbit" first because
7300 * "clz" starts counting from the most significant bit.
7301 */
7302 uint32_t iBit;
7303 __asm__ __volatile__(
7304# if defined(RT_ARCH_ARM64)
7305 "rbit %w[uVal], %w[uVal]\n\t"
7306 "clz %w[iBit], %w[uVal]\n\t"
7307# else
7308 "rbit %[uVal], %[uVal]\n\t"
7309 "clz %[iBit], %[uVal]\n\t"
7310# endif
7311 : [uVal] "=r" (u32)
7312 , [iBit] "=r" (iBit)
7313 : "[uVal]" (u32));
7314 if (iBit != 32)
7315 iBit++;
7316 else
7317 iBit = 0; /* No bit set. */
7318
7319# else
7320# error "Port me"
7321# endif
7322 return iBit;
7323}
7324#endif
7325
7326
7327/**
7328 * Finds the first bit which is set in the given 32-bit integer.
7329 * Bits are numbered from 1 (least significant) to 32.
7330 *
7331 * @returns index [1..32] of the first set bit.
7332 * @returns 0 if all bits are cleared.
7333 * @param i32 Integer to search for set bits.
7334 * @remark Similar to ffs() in BSD.
7335 */
7336DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
7337{
7338 return ASMBitFirstSetU32((uint32_t)i32);
7339}
7340
7341
7342/**
7343 * Finds the first bit which is set in the given 64-bit integer.
7344 *
7345 * Bits are numbered from 1 (least significant) to 64.
7346 *
7347 * @returns index [1..64] of the first set bit.
7348 * @returns 0 if all bits are cleared.
7349 * @param u64 Integer to search for set bits.
7350 * @remarks Similar to ffs() in BSD.
7351 */
7352#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7353RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7354#else
7355DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
7356{
7357# if RT_INLINE_ASM_USES_INTRIN
7358 unsigned long iBit;
7359# if ARCH_BITS == 64
7360 if (_BitScanForward64(&iBit, u64))
7361 iBit++;
7362 else
7363 iBit = 0;
7364# else
7365 if (_BitScanForward(&iBit, (uint32_t)u64))
7366 iBit++;
7367 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
7368 iBit += 33;
7369 else
7370 iBit = 0;
7371# endif
7372
7373# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7374 uint64_t iBit;
7375 __asm__ __volatile__("bsfq %1, %0\n\t"
7376 "jnz 1f\n\t"
7377 "xorl %k0, %k0\n\t"
7378 "jmp 2f\n"
7379 "1:\n\t"
7380 "incl %k0\n"
7381 "2:\n\t"
7382 : "=r" (iBit)
7383 : "rm" (u64)
7384 : "cc");
7385
7386# elif defined(RT_ARCH_ARM64)
7387 uint64_t iBit;
7388 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
7389 "clz %[iBit], %[uVal]\n\t"
7390 : [uVal] "=r" (u64)
7391 , [iBit] "=r" (iBit)
7392 : "[uVal]" (u64));
7393 if (iBit != 64)
7394 iBit++;
7395 else
7396 iBit = 0; /* No bit set. */
7397
7398# else
7399 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
7400 if (!iBit)
7401 {
7402 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
7403 if (iBit)
7404 iBit += 32;
7405 }
7406# endif
7407 return (unsigned)iBit;
7408}
7409#endif
7410
7411
7412/**
7413 * Finds the first bit which is set in the given 16-bit integer.
7414 *
7415 * Bits are numbered from 1 (least significant) to 16.
7416 *
7417 * @returns index [1..16] of the first set bit.
7418 * @returns 0 if all bits are cleared.
7419 * @param u16 Integer to search for set bits.
7420 * @remarks For 16-bit bs3kit code.
7421 */
7422#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7423RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7424#else
7425DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
7426{
7427 return ASMBitFirstSetU32((uint32_t)u16);
7428}
7429#endif
7430
7431
7432/**
7433 * Finds the last bit which is set in the given 32-bit integer.
7434 * Bits are numbered from 1 (least significant) to 32.
7435 *
7436 * @returns index [1..32] of the last set bit.
7437 * @returns 0 if all bits are cleared.
7438 * @param u32 Integer to search for set bits.
7439 * @remark Similar to fls() in BSD.
7440 */
7441#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7442RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7443#else
7444DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
7445{
7446# if RT_INLINE_ASM_USES_INTRIN
7447 unsigned long iBit;
7448 if (_BitScanReverse(&iBit, u32))
7449 iBit++;
7450 else
7451 iBit = 0;
7452
7453# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7454# if RT_INLINE_ASM_GNU_STYLE
7455 uint32_t iBit;
7456 __asm__ __volatile__("bsrl %1, %0\n\t"
7457 "jnz 1f\n\t"
7458 "xorl %0, %0\n\t"
7459 "jmp 2f\n"
7460 "1:\n\t"
7461 "incl %0\n"
7462 "2:\n\t"
7463 : "=r" (iBit)
7464 : "rm" (u32)
7465 : "cc");
7466# else
7467 uint32_t iBit;
7468 _asm
7469 {
7470 bsr eax, [u32]
7471 jnz found
7472 xor eax, eax
7473 jmp done
7474 found:
7475 inc eax
7476 done:
7477 mov [iBit], eax
7478 }
7479# endif
7480
7481# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7482 uint32_t iBit;
7483 __asm__ __volatile__(
7484# if defined(RT_ARCH_ARM64)
7485 "clz %w[iBit], %w[uVal]\n\t"
7486# else
7487 "clz %[iBit], %[uVal]\n\t"
7488# endif
7489 : [iBit] "=r" (iBit)
7490 : [uVal] "r" (u32));
7491 iBit = 32 - iBit;
7492
7493# else
7494# error "Port me"
7495# endif
7496 return iBit;
7497}
7498#endif
7499
7500
7501/**
7502 * Finds the last bit which is set in the given 32-bit integer.
7503 * Bits are numbered from 1 (least significant) to 32.
7504 *
7505 * @returns index [1..32] of the last set bit.
7506 * @returns 0 if all bits are cleared.
7507 * @param i32 Integer to search for set bits.
7508 * @remark Similar to fls() in BSD.
7509 */
7510DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
7511{
7512 return ASMBitLastSetU32((uint32_t)i32);
7513}
7514
7515
7516/**
7517 * Finds the last bit which is set in the given 64-bit integer.
7518 *
7519 * Bits are numbered from 1 (least significant) to 64.
7520 *
7521 * @returns index [1..64] of the last set bit.
7522 * @returns 0 if all bits are cleared.
7523 * @param u64 Integer to search for set bits.
7524 * @remark Similar to fls() in BSD.
7525 */
7526#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7527RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7528#else
7529DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
7530{
7531# if RT_INLINE_ASM_USES_INTRIN
7532 unsigned long iBit;
7533# if ARCH_BITS == 64
7534 if (_BitScanReverse64(&iBit, u64))
7535 iBit++;
7536 else
7537 iBit = 0;
7538# else
7539 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7540 iBit += 33;
7541 else if (_BitScanReverse(&iBit, (uint32_t)u64))
7542 iBit++;
7543 else
7544 iBit = 0;
7545# endif
7546
7547# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7548 uint64_t iBit;
7549 __asm__ __volatile__("bsrq %1, %0\n\t"
7550 "jnz 1f\n\t"
7551 "xorl %k0, %k0\n\t"
7552 "jmp 2f\n"
7553 "1:\n\t"
7554 "incl %k0\n"
7555 "2:\n\t"
7556 : "=r" (iBit)
7557 : "rm" (u64)
7558 : "cc");
7559
7560# elif defined(RT_ARCH_ARM64)
7561 uint64_t iBit;
7562 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7563 : [iBit] "=r" (iBit)
7564 : [uVal] "r" (u64));
7565 iBit = 64 - iBit;
7566
7567# else
7568 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
7569 if (iBit)
7570 iBit += 32;
7571 else
7572 iBit = ASMBitLastSetU32((uint32_t)u64);
7573# endif
7574 return (unsigned)iBit;
7575}
7576#endif
7577
7578
7579/**
7580 * Finds the last bit which is set in the given 16-bit integer.
7581 *
7582 * Bits are numbered from 1 (least significant) to 16.
7583 *
7584 * @returns index [1..16] of the last set bit.
7585 * @returns 0 if all bits are cleared.
7586 * @param u16 Integer to search for set bits.
7587 * @remarks For 16-bit bs3kit code.
7588 */
7589#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7590RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7591#else
7592DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
7593{
7594 return ASMBitLastSetU32((uint32_t)u16);
7595}
7596#endif
7597
7598
7599/**
7600 * Count the number of leading zero bits in the given 32-bit integer.
7601 *
7602 * The counting starts with the most significate bit.
7603 *
7604 * @returns Number of most significant zero bits.
7605 * @returns 32 if all bits are cleared.
7606 * @param u32 Integer to consider.
7607 * @remarks Similar to __builtin_clz() in gcc, except defined zero input result.
7608 */
7609#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7610RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU32(uint32_t u32) RT_NOTHROW_PROTO;
7611#else
7612DECLINLINE(unsigned) ASMCountLeadingZerosU32(uint32_t u32) RT_NOTHROW_DEF
7613{
7614# if RT_INLINE_ASM_USES_INTRIN
7615 unsigned long iBit;
7616 if (!_BitScanReverse(&iBit, u32))
7617 return 32;
7618 return 31 - (unsigned)iBit;
7619
7620# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7621 uint32_t iBit;
7622# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) && 0 /* significantly slower on 10980xe; 929 vs 237 ps/call */
7623 __asm__ __volatile__("bsrl %1, %0\n\t"
7624 "cmovzl %2, %0\n\t"
7625 : "=&r" (iBit)
7626 : "rm" (u32)
7627 , "rm" ((int32_t)-1)
7628 : "cc");
7629# elif RT_INLINE_ASM_GNU_STYLE
7630 __asm__ __volatile__("bsr %1, %0\n\t"
7631 "jnz 1f\n\t"
7632 "mov $-1, %0\n\t"
7633 "1:\n\t"
7634 : "=r" (iBit)
7635 : "rm" (u32)
7636 : "cc");
7637# else
7638 _asm
7639 {
7640 bsr eax, [u32]
7641 jnz found
7642 mov eax, -1
7643 found:
7644 mov [iBit], eax
7645 }
7646# endif
7647 return 31 - iBit;
7648
7649# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7650 uint32_t iBit;
7651 __asm__ __volatile__(
7652# if defined(RT_ARCH_ARM64)
7653 "clz %w[iBit], %w[uVal]\n\t"
7654# else
7655 "clz %[iBit], %[uVal]\n\t"
7656# endif
7657 : [uVal] "=r" (u32)
7658 , [iBit] "=r" (iBit)
7659 : "[uVal]" (u32));
7660 return iBit;
7661
7662# elif defined(__GNUC__)
7663 AssertCompile(sizeof(u32) == sizeof(unsigned int));
7664 return u32 ? __builtin_clz(u32) : 32;
7665
7666# else
7667# error "Port me"
7668# endif
7669}
7670#endif
7671
7672
7673/**
7674 * Count the number of leading zero bits in the given 64-bit integer.
7675 *
7676 * The counting starts with the most significate bit.
7677 *
7678 * @returns Number of most significant zero bits.
7679 * @returns 64 if all bits are cleared.
7680 * @param u64 Integer to consider.
7681 * @remarks Similar to __builtin_clzl() in gcc, except defined zero input
7682 * result.
7683 */
7684#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7685RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU64(uint64_t u64) RT_NOTHROW_PROTO;
7686#else
7687DECLINLINE(unsigned) ASMCountLeadingZerosU64(uint64_t u64) RT_NOTHROW_DEF
7688{
7689# if RT_INLINE_ASM_USES_INTRIN
7690 unsigned long iBit;
7691# if ARCH_BITS == 64
7692 if (_BitScanReverse64(&iBit, u64))
7693 return 63 - (unsigned)iBit;
7694# else
7695 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7696 return 31 - (unsigned)iBit;
7697 if (_BitScanReverse(&iBit, (uint32_t)u64))
7698 return 63 - (unsigned)iBit;
7699# endif
7700 return 64;
7701
7702# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7703 uint64_t iBit;
7704# if 0 /* 10980xe benchmark: 932 ps/call - the slower variant */
7705 __asm__ __volatile__("bsrq %1, %0\n\t"
7706 "cmovzq %2, %0\n\t"
7707 : "=&r" (iBit)
7708 : "rm" (u64)
7709 , "rm" ((int64_t)-1)
7710 : "cc");
7711# else /* 10980xe benchmark: 262 ps/call */
7712 __asm__ __volatile__("bsrq %1, %0\n\t"
7713 "jnz 1f\n\t"
7714 "mov $-1, %0\n\t"
7715 "1:\n\t"
7716 : "=&r" (iBit)
7717 : "rm" (u64)
7718 : "cc");
7719# endif
7720 return 63 - (unsigned)iBit;
7721
7722# elif defined(RT_ARCH_ARM64)
7723 uint64_t iBit;
7724 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7725 : [uVal] "=r" (u64)
7726 , [iBit] "=r" (iBit)
7727 : "[uVal]" (u64));
7728 return (unsigned)iBit;
7729
7730# elif defined(__GNUC__) && ARCH_BITS == 64
7731 AssertCompile(sizeof(u64) == sizeof(unsigned long));
7732 return u64 ? __builtin_clzl(u64) : 64;
7733
7734# else
7735 unsigned iBit = ASMCountLeadingZerosU32((uint32_t)(u64 >> 32));
7736 if (iBit == 32)
7737 iBit = ASMCountLeadingZerosU32((uint32_t)u64) + 32;
7738 return iBit;
7739# endif
7740}
7741#endif
7742
7743
7744/**
7745 * Count the number of leading zero bits in the given 16-bit integer.
7746 *
7747 * The counting starts with the most significate bit.
7748 *
7749 * @returns Number of most significant zero bits.
7750 * @returns 16 if all bits are cleared.
7751 * @param u16 Integer to consider.
7752 */
7753#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7754RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU16(uint16_t u16) RT_NOTHROW_PROTO;
7755#else
7756DECLINLINE(unsigned) ASMCountLeadingZerosU16(uint16_t u16) RT_NOTHROW_DEF
7757{
7758# if RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && 0 /* slower (10980xe: 987 vs 292 ps/call) */
7759 uint16_t iBit;
7760 __asm__ __volatile__("bsrw %1, %0\n\t"
7761 "jnz 1f\n\t"
7762 "mov $-1, %0\n\t"
7763 "1:\n\t"
7764 : "=r" (iBit)
7765 : "rm" (u16)
7766 : "cc");
7767 return 15 - (int16_t)iBit;
7768# else
7769 return ASMCountLeadingZerosU32((uint32_t)u16) - 16;
7770# endif
7771}
7772#endif
7773
7774
7775/**
7776 * Count the number of trailing zero bits in the given 32-bit integer.
7777 *
7778 * The counting starts with the least significate bit, i.e. the zero bit.
7779 *
7780 * @returns Number of least significant zero bits.
7781 * @returns 32 if all bits are cleared.
7782 * @param u32 Integer to consider.
7783 * @remarks Similar to __builtin_ctz() in gcc, except defined zero input result.
7784 */
7785#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7786RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU32(uint32_t u32) RT_NOTHROW_PROTO;
7787#else
7788DECLINLINE(unsigned) ASMCountTrailingZerosU32(uint32_t u32) RT_NOTHROW_DEF
7789{
7790# if RT_INLINE_ASM_USES_INTRIN
7791 unsigned long iBit;
7792 if (!_BitScanForward(&iBit, u32))
7793 return 32;
7794 return (unsigned)iBit;
7795
7796# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7797 uint32_t iBit;
7798# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) && 0 /* significantly slower on 10980xe; 932 vs 240 ps/call */
7799 __asm__ __volatile__("bsfl %1, %0\n\t"
7800 "cmovzl %2, %0\n\t"
7801 : "=&r" (iBit)
7802 : "rm" (u32)
7803 , "rm" ((int32_t)32)
7804 : "cc");
7805# elif RT_INLINE_ASM_GNU_STYLE
7806 __asm__ __volatile__("bsfl %1, %0\n\t"
7807 "jnz 1f\n\t"
7808 "mov $32, %0\n\t"
7809 "1:\n\t"
7810 : "=r" (iBit)
7811 : "rm" (u32)
7812 : "cc");
7813# else
7814 _asm
7815 {
7816 bsf eax, [u32]
7817 jnz found
7818 mov eax, 32
7819 found:
7820 mov [iBit], eax
7821 }
7822# endif
7823 return iBit;
7824
7825# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7826 /* Invert the bits and use clz. */
7827 uint32_t iBit;
7828 __asm__ __volatile__(
7829# if defined(RT_ARCH_ARM64)
7830 "rbit %w[uVal], %w[uVal]\n\t"
7831 "clz %w[iBit], %w[uVal]\n\t"
7832# else
7833 "rbit %[uVal], %[uVal]\n\t"
7834 "clz %[iBit], %[uVal]\n\t"
7835# endif
7836 : [uVal] "=r" (u32)
7837 , [iBit] "=r" (iBit)
7838 : "[uVal]" (u32));
7839 return iBit;
7840
7841# elif defined(__GNUC__)
7842 AssertCompile(sizeof(u32) == sizeof(unsigned int));
7843 return u32 ? __builtin_ctz(u32) : 32;
7844
7845# else
7846# error "Port me"
7847# endif
7848}
7849#endif
7850
7851
7852/**
7853 * Count the number of trailing zero bits in the given 64-bit integer.
7854 *
7855 * The counting starts with the least significate bit.
7856 *
7857 * @returns Number of least significant zero bits.
7858 * @returns 64 if all bits are cleared.
7859 * @param u64 Integer to consider.
7860 * @remarks Similar to __builtin_ctzl() in gcc, except defined zero input
7861 * result.
7862 */
7863#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7864RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU64(uint64_t u64) RT_NOTHROW_PROTO;
7865#else
7866DECLINLINE(unsigned) ASMCountTrailingZerosU64(uint64_t u64) RT_NOTHROW_DEF
7867{
7868# if RT_INLINE_ASM_USES_INTRIN
7869 unsigned long iBit;
7870# if ARCH_BITS == 64
7871 if (_BitScanForward64(&iBit, u64))
7872 return (unsigned)iBit;
7873# else
7874 if (_BitScanForward(&iBit, (uint32_t)u64))
7875 return (unsigned)iBit;
7876 if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
7877 return (unsigned)iBit + 32;
7878# endif
7879 return 64;
7880
7881# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7882 uint64_t iBit;
7883# if 0 /* 10980xe benchmark: 932 ps/call - the slower variant */
7884 __asm__ __volatile__("bsfq %1, %0\n\t"
7885 "cmovzq %2, %0\n\t"
7886 : "=&r" (iBit)
7887 : "rm" (u64)
7888 , "rm" ((int64_t)64)
7889 : "cc");
7890# else /* 10980xe benchmark: 262 ps/call */
7891 __asm__ __volatile__("bsfq %1, %0\n\t"
7892 "jnz 1f\n\t"
7893 "mov $64, %0\n\t"
7894 "1:\n\t"
7895 : "=&r" (iBit)
7896 : "rm" (u64)
7897 : "cc");
7898# endif
7899 return (unsigned)iBit;
7900
7901# elif defined(RT_ARCH_ARM64)
7902 /* Invert the bits and use clz. */
7903 uint64_t iBit;
7904 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
7905 "clz %[iBit], %[uVal]\n\t"
7906 : [uVal] "=r" (u64)
7907 , [iBit] "=r" (iBit)
7908 : "[uVal]" (u64));
7909 return (unsigned)iBit;
7910
7911# elif defined(__GNUC__) && ARCH_BITS == 64
7912 AssertCompile(sizeof(u64) == sizeof(unsigned long));
7913 return u64 ? __builtin_ctzl(u64) : 64;
7914
7915# else
7916 unsigned iBit = ASMCountTrailingZerosU32((uint32_t)u64);
7917 if (iBit == 32)
7918 iBit = ASMCountTrailingZerosU32((uint32_t)(u64 >> 32)) + 32;
7919 return iBit;
7920# endif
7921}
7922#endif
7923
7924
7925/**
7926 * Count the number of trailing zero bits in the given 16-bit integer.
7927 *
7928 * The counting starts with the most significate bit.
7929 *
7930 * @returns Number of most significant zero bits.
7931 * @returns 16 if all bits are cleared.
7932 * @param u16 Integer to consider.
7933 */
7934#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7935RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU16(uint16_t u16) RT_NOTHROW_PROTO;
7936#else
7937DECLINLINE(unsigned) ASMCountTrailingZerosU16(uint16_t u16) RT_NOTHROW_DEF
7938{
7939# if RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && 0 /* slower (10980xe: 992 vs 349 ps/call) */
7940 uint16_t iBit;
7941 __asm__ __volatile__("bsfw %1, %0\n\t"
7942 "jnz 1f\n\t"
7943 "mov $16, %0\n\t"
7944 "1:\n\t"
7945 : "=r" (iBit)
7946 : "rm" (u16)
7947 : "cc");
7948 return iBit;
7949# else
7950 return ASMCountTrailingZerosU32((uint32_t)u16 | UINT32_C(0x10000));
7951#endif
7952}
7953#endif
7954
7955
7956/**
7957 * Rotate 32-bit unsigned value to the left by @a cShift.
7958 *
7959 * @returns Rotated value.
7960 * @param u32 The value to rotate.
7961 * @param cShift How many bits to rotate by.
7962 */
7963#ifdef __WATCOMC__
7964RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7965#else
7966DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7967{
7968# if RT_INLINE_ASM_USES_INTRIN
7969 return _rotl(u32, cShift);
7970
7971# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7972 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7973 return u32;
7974
7975# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7976 __asm__ __volatile__(
7977# if defined(RT_ARCH_ARM64)
7978 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7979# else
7980 "ror %[uRet], %[uVal], %[cShift]\n\t"
7981# endif
7982 : [uRet] "=r" (u32)
7983 : [uVal] "[uRet]" (u32)
7984 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */
7985 return u32;
7986
7987# else
7988 cShift &= 31;
7989 return (u32 << cShift) | (u32 >> (32 - cShift));
7990# endif
7991}
7992#endif
7993
7994
7995/**
7996 * Rotate 32-bit unsigned value to the right by @a cShift.
7997 *
7998 * @returns Rotated value.
7999 * @param u32 The value to rotate.
8000 * @param cShift How many bits to rotate by.
8001 */
8002#ifdef __WATCOMC__
8003RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
8004#else
8005DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
8006{
8007# if RT_INLINE_ASM_USES_INTRIN
8008 return _rotr(u32, cShift);
8009
8010# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
8011 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
8012 return u32;
8013
8014# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
8015 __asm__ __volatile__(
8016# if defined(RT_ARCH_ARM64)
8017 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
8018# else
8019 "ror %[uRet], %[uVal], %[cShift]\n\t"
8020# endif
8021 : [uRet] "=r" (u32)
8022 : [uVal] "[uRet]" (u32)
8023 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */
8024 return u32;
8025
8026# else
8027 cShift &= 31;
8028 return (u32 >> cShift) | (u32 << (32 - cShift));
8029# endif
8030}
8031#endif
8032
8033
8034/**
8035 * Rotate 64-bit unsigned value to the left by @a cShift.
8036 *
8037 * @returns Rotated value.
8038 * @param u64 The value to rotate.
8039 * @param cShift How many bits to rotate by.
8040 */
8041DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
8042{
8043#if RT_INLINE_ASM_USES_INTRIN
8044 return _rotl64(u64, cShift);
8045
8046#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
8047 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
8048 return u64;
8049
8050#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
8051 uint32_t uSpill;
8052 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
8053 "jz 1f\n\t"
8054 "xchgl %%eax, %%edx\n\t"
8055 "1:\n\t"
8056 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
8057 "jz 2f\n\t"
8058 "movl %%edx, %2\n\t" /* save the hi value in %3. */
8059 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
8060 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
8061 "2:\n\t" /* } */
8062 : "=A" (u64)
8063 , "=c" (cShift)
8064 , "=r" (uSpill)
8065 : "0" (u64)
8066 , "1" (cShift)
8067 : "cc");
8068 return u64;
8069
8070# elif defined(RT_ARCH_ARM64)
8071 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
8072 : [uRet] "=r" (u64)
8073 : [uVal] "[uRet]" (u64)
8074 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */
8075 return u64;
8076
8077#else
8078 cShift &= 63;
8079 return (u64 << cShift) | (u64 >> (64 - cShift));
8080#endif
8081}
8082
8083
8084/**
8085 * Rotate 64-bit unsigned value to the right by @a cShift.
8086 *
8087 * @returns Rotated value.
8088 * @param u64 The value to rotate.
8089 * @param cShift How many bits to rotate by.
8090 */
8091DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
8092{
8093#if RT_INLINE_ASM_USES_INTRIN
8094 return _rotr64(u64, cShift);
8095
8096#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
8097 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
8098 return u64;
8099
8100#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
8101 uint32_t uSpill;
8102 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
8103 "jz 1f\n\t"
8104 "xchgl %%eax, %%edx\n\t"
8105 "1:\n\t"
8106 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
8107 "jz 2f\n\t"
8108 "movl %%edx, %2\n\t" /* save the hi value in %3. */
8109 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
8110 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
8111 "2:\n\t" /* } */
8112 : "=A" (u64)
8113 , "=c" (cShift)
8114 , "=r" (uSpill)
8115 : "0" (u64)
8116 , "1" (cShift)
8117 : "cc");
8118 return u64;
8119
8120# elif defined(RT_ARCH_ARM64)
8121 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
8122 : [uRet] "=r" (u64)
8123 : [uVal] "[uRet]" (u64)
8124 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */
8125 return u64;
8126
8127#else
8128 cShift &= 63;
8129 return (u64 >> cShift) | (u64 << (64 - cShift));
8130#endif
8131}
8132
8133/** @} */
8134
8135
8136/** @} */
8137
8138/*
8139 * Include #pragma aux definitions for Watcom C/C++.
8140 */
8141#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
8142# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
8143# undef IPRT_INCLUDED_asm_watcom_x86_16_h
8144# include "asm-watcom-x86-16.h"
8145#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
8146# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
8147# undef IPRT_INCLUDED_asm_watcom_x86_32_h
8148# include "asm-watcom-x86-32.h"
8149#endif
8150
8151#endif /* !IPRT_INCLUDED_asm_h */
8152
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette