VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 100599

Last change on this file since 100599 was 100318, checked in by vboxsync, 19 months ago

include/iprt/{param.h,zero.h,asm.h}: Make the use of PAGE_SIZE/PAGE_SHIFT/PAGE_OFFSET_MASK in linux.arm64 userspace code result in a build error due to unknown page size, for the kerne side the page size is derived from the kernel config bits, bugref:10476

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 259.8 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
7 *
8 * This file is part of VirtualBox base platform packages, as
9 * available from https://www.virtualbox.org.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation, in version 3 of the
14 * License.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, see <https://www.gnu.org/licenses>.
23 *
24 * The contents of this file may alternatively be used under the terms
25 * of the Common Development and Distribution License Version 1.0
26 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
27 * in the VirtualBox distribution, in which case the provisions of the
28 * CDDL are applicable instead of those of the GPL.
29 *
30 * You may elect to license modified versions of this file under the
31 * terms and conditions of either the GPL or the CDDL or both.
32 *
33 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
34 */
35
36#ifndef IPRT_INCLUDED_asm_h
37#define IPRT_INCLUDED_asm_h
38#ifndef RT_WITHOUT_PRAGMA_ONCE
39# pragma once
40#endif
41
42#include <iprt/cdefs.h>
43#include <iprt/types.h>
44#include <iprt/assert.h>
45/** @def RT_INLINE_ASM_USES_INTRIN
46 * Defined as 1 if we're using a _MSC_VER 1400.
47 * Otherwise defined as 0.
48 */
49
50/* Solaris 10 header ugliness */
51#ifdef u
52# undef u
53#endif
54
55#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
56/* Emit the intrinsics at all optimization levels. */
57# include <iprt/sanitized/intrin.h>
58# pragma intrinsic(_ReadWriteBarrier)
59# pragma intrinsic(__cpuid)
60# pragma intrinsic(__stosd)
61# pragma intrinsic(__stosw)
62# pragma intrinsic(__stosb)
63# pragma intrinsic(_BitScanForward)
64# pragma intrinsic(_BitScanReverse)
65# pragma intrinsic(_bittest)
66# pragma intrinsic(_bittestandset)
67# pragma intrinsic(_bittestandreset)
68# pragma intrinsic(_bittestandcomplement)
69# pragma intrinsic(_byteswap_ushort)
70# pragma intrinsic(_byteswap_ulong)
71# pragma intrinsic(_interlockedbittestandset)
72# pragma intrinsic(_interlockedbittestandreset)
73# pragma intrinsic(_InterlockedAnd)
74# pragma intrinsic(_InterlockedOr)
75# pragma intrinsic(_InterlockedXor)
76# pragma intrinsic(_InterlockedIncrement)
77# pragma intrinsic(_InterlockedDecrement)
78# pragma intrinsic(_InterlockedExchange)
79# pragma intrinsic(_InterlockedExchangeAdd)
80# pragma intrinsic(_InterlockedCompareExchange)
81# pragma intrinsic(_InterlockedCompareExchange8)
82# pragma intrinsic(_InterlockedCompareExchange16)
83# pragma intrinsic(_InterlockedCompareExchange64)
84# pragma intrinsic(_rotl)
85# pragma intrinsic(_rotr)
86# pragma intrinsic(_rotl64)
87# pragma intrinsic(_rotr64)
88# ifdef RT_ARCH_AMD64
89# pragma intrinsic(__stosq)
90# pragma intrinsic(_byteswap_uint64)
91# pragma intrinsic(_InterlockedCompareExchange128)
92# pragma intrinsic(_InterlockedExchange64)
93# pragma intrinsic(_InterlockedExchangeAdd64)
94# pragma intrinsic(_InterlockedAnd64)
95# pragma intrinsic(_InterlockedOr64)
96# pragma intrinsic(_InterlockedIncrement64)
97# pragma intrinsic(_InterlockedDecrement64)
98# endif
99#endif
100
101/*
102 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
103 */
104#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
105# include "asm-watcom-x86-16.h"
106#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
107# include "asm-watcom-x86-32.h"
108#endif
109
110
111/** @defgroup grp_rt_asm ASM - Assembly Routines
112 * @ingroup grp_rt
113 *
114 * @remarks The difference between ordered and unordered atomic operations are
115 * that the former will complete outstanding reads and writes before
116 * continuing while the latter doesn't make any promises about the
117 * order. Ordered operations doesn't, it seems, make any 100% promise
118 * wrt to whether the operation will complete before any subsequent
119 * memory access. (please, correct if wrong.)
120 *
121 * ASMAtomicSomething operations are all ordered, while
122 * ASMAtomicUoSomething are unordered (note the Uo).
123 *
124 * Please note that ordered operations does not necessarily imply a
125 * compiler (memory) barrier. The user has to use the
126 * ASMCompilerBarrier() macro when that is deemed necessary.
127 *
128 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
129 * to reorder or even optimize assembler instructions away. For
130 * instance, in the following code the second rdmsr instruction is
131 * optimized away because gcc treats that instruction as deterministic:
132 *
133 * @code
134 * static inline uint64_t rdmsr_low(int idx)
135 * {
136 * uint32_t low;
137 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
138 * }
139 * ...
140 * uint32_t msr1 = rdmsr_low(1);
141 * foo(msr1);
142 * msr1 = rdmsr_low(1);
143 * bar(msr1);
144 * @endcode
145 *
146 * The input parameter of rdmsr_low is the same for both calls and
147 * therefore gcc will use the result of the first call as input
148 * parameter for bar() as well. For rdmsr this is not acceptable as
149 * this instruction is _not_ deterministic. This applies to reading
150 * machine status information in general.
151 *
152 * @{
153 */
154
155
156/** @def RT_INLINE_ASM_GCC_4_3_X_X86
157 * Used to work around some 4.3.x register allocation issues in this version of
158 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
159 * definitely not for 5.x */
160#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
161# define RT_INLINE_ASM_GCC_4_3_X_X86 1
162#else
163# define RT_INLINE_ASM_GCC_4_3_X_X86 0
164#endif
165
166/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
167 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
168 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
169 * mode, x86.
170 *
171 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
172 * when in PIC mode on x86.
173 */
174#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
175# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
176# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
177# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
178# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
179# elif ( (defined(PIC) || defined(__PIC__)) \
180 && defined(RT_ARCH_X86) \
181 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
182 || defined(RT_OS_DARWIN)) )
183# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
184# else
185# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
186# endif
187#endif
188
189
190/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
191 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
192#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
193# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
194#else
195# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
196#endif
197
198/*
199 * ARM is great fun.
200 */
201#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
202
203# define RTASM_ARM_NO_BARRIER
204# ifdef RT_ARCH_ARM64
205# define RTASM_ARM_NO_BARRIER_IN_REG
206# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
207# define RTASM_ARM_DSB_SY "dsb sy\n\t"
208# define RTASM_ARM_DSB_SY_IN_REG
209# define RTASM_ARM_DSB_SY_COMMA_IN_REG
210# define RTASM_ARM_DMB_SY "dmb sy\n\t"
211# define RTASM_ARM_DMB_SY_IN_REG
212# define RTASM_ARM_DMB_SY_COMMA_IN_REG
213# define RTASM_ARM_DMB_ST "dmb st\n\t"
214# define RTASM_ARM_DMB_ST_IN_REG
215# define RTASM_ARM_DMB_ST_COMMA_IN_REG
216# define RTASM_ARM_DMB_LD "dmb ld\n\t"
217# define RTASM_ARM_DMB_LD_IN_REG
218# define RTASM_ARM_DMB_LD_COMMA_IN_REG
219# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
220# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
221 uint32_t rcSpill; \
222 uint32_t u32NewRet; \
223 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
224 RTASM_ARM_##barrier_type /* before lable? */ \
225 "ldaxr %w[uNew], %[pMem]\n\t" \
226 modify64 \
227 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
228 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
229 : [pMem] "+Q" (*a_pu32Mem) \
230 , [uNew] "=&r" (u32NewRet) \
231 , [rc] "=&r" (rcSpill) \
232 : in_reg \
233 : "cc")
234# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
235 uint32_t rcSpill; \
236 uint32_t u32OldRet; \
237 uint32_t u32NewSpill; \
238 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
239 RTASM_ARM_##barrier_type /* before lable? */ \
240 "ldaxr %w[uOld], %[pMem]\n\t" \
241 modify64 \
242 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
243 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
244 : [pMem] "+Q" (*a_pu32Mem) \
245 , [uOld] "=&r" (u32OldRet) \
246 , [uNew] "=&r" (u32NewSpill) \
247 , [rc] "=&r" (rcSpill) \
248 : in_reg \
249 : "cc")
250# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
251 uint32_t rcSpill; \
252 uint64_t u64NewRet; \
253 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
254 RTASM_ARM_##barrier_type /* before lable? */ \
255 "ldaxr %[uNew], %[pMem]\n\t" \
256 modify64 \
257 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
258 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
259 : [pMem] "+Q" (*a_pu64Mem) \
260 , [uNew] "=&r" (u64NewRet) \
261 , [rc] "=&r" (rcSpill) \
262 : in_reg \
263 : "cc")
264# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
265 uint32_t rcSpill; \
266 uint64_t u64OldRet; \
267 uint64_t u64NewSpill; \
268 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
269 RTASM_ARM_##barrier_type /* before lable? */ \
270 "ldaxr %[uOld], %[pMem]\n\t" \
271 modify64 \
272 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
273 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
274 : [pMem] "+Q" (*a_pu64Mem) \
275 , [uOld] "=&r" (u64OldRet) \
276 , [uNew] "=&r" (u64NewSpill) \
277 , [rc] "=&r" (rcSpill) \
278 : in_reg \
279 : "cc")
280
281# else /* RT_ARCH_ARM32 */
282# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
283# if RT_ARCH_ARM32 >= 7
284# warning armv7
285# define RTASM_ARM_NO_BARRIER_IN_REG
286# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
287# define RTASM_ARM_DSB_SY "dsb sy\n\t"
288# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
289# define RTASM_ARM_DMB_SY "dmb sy\n\t"
290# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
291# define RTASM_ARM_DMB_ST "dmb st\n\t"
292# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
293# define RTASM_ARM_DMB_LD "dmb ld\n\t"
294# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
295
296# elif RT_ARCH_ARM32 >= 6
297# warning armv6
298# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
299# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
300# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
301# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
302# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
303# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
304# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
305# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
306# elif RT_ARCH_ARM32 >= 4
307# warning armv5 or older
308# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
309# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
310# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
311# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
312# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
313# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
314# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
315# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
316# else
317# error "huh? Odd RT_ARCH_ARM32 value!"
318# endif
319# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
320# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
321# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
322# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
323# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
324 uint32_t rcSpill; \
325 uint32_t u32NewRet; \
326 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
327 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
328 "ldrex %[uNew], %[pMem]\n\t" \
329 modify32 \
330 "strex %[rc], %[uNew], %[pMem]\n\t" \
331 "cmp %[rc], #0\n\t" \
332 "bne .Ltry_again_" #name "_%=\n\t" \
333 : [pMem] "+m" (*a_pu32Mem) \
334 , [uNew] "=&r" (u32NewRet) \
335 , [rc] "=&r" (rcSpill) \
336 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
337 , in_reg \
338 : "cc")
339# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
340 uint32_t rcSpill; \
341 uint32_t u32OldRet; \
342 uint32_t u32NewSpill; \
343 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
344 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
345 "ldrex %[uOld], %[pMem]\n\t" \
346 modify32 \
347 "strex %[rc], %[uNew], %[pMem]\n\t" \
348 "cmp %[rc], #0\n\t" \
349 "bne .Ltry_again_" #name "_%=\n\t" \
350 : [pMem] "+m" (*a_pu32Mem) \
351 , [uOld] "=&r" (u32OldRet) \
352 , [uNew] "=&r" (u32NewSpill) \
353 , [rc] "=&r" (rcSpill) \
354 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
355 , in_reg \
356 : "cc")
357# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
358 uint32_t rcSpill; \
359 uint64_t u64NewRet; \
360 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
361 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
362 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
363 modify32 \
364 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
365 "cmp %[rc], #0\n\t" \
366 "bne .Ltry_again_" #name "_%=\n\t" \
367 : [pMem] "+m" (*a_pu64Mem), \
368 [uNew] "=&r" (u64NewRet), \
369 [rc] "=&r" (rcSpill) \
370 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
371 , in_reg \
372 : "cc")
373# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
374 uint32_t rcSpill; \
375 uint64_t u64OldRet; \
376 uint64_t u64NewSpill; \
377 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
378 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
379 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
380 modify32 \
381 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
382 "cmp %[rc], #0\n\t" \
383 "bne .Ltry_again_" #name "_%=\n\t" \
384 : [pMem] "+m" (*a_pu64Mem), \
385 [uOld] "=&r" (u64OldRet), \
386 [uNew] "=&r" (u64NewSpill), \
387 [rc] "=&r" (rcSpill) \
388 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
389 , in_reg \
390 : "cc")
391# endif /* RT_ARCH_ARM32 */
392#endif
393
394
395/** @def ASMReturnAddress
396 * Gets the return address of the current (or calling if you like) function or method.
397 */
398#ifdef _MSC_VER
399# ifdef __cplusplus
400extern "C"
401# endif
402void * _ReturnAddress(void);
403# pragma intrinsic(_ReturnAddress)
404# define ASMReturnAddress() _ReturnAddress()
405#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
406# define ASMReturnAddress() __builtin_return_address(0)
407#elif defined(__WATCOMC__)
408# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
409#else
410# error "Unsupported compiler."
411#endif
412
413
414/**
415 * Compiler memory barrier.
416 *
417 * Ensure that the compiler does not use any cached (register/tmp stack) memory
418 * values or any outstanding writes when returning from this function.
419 *
420 * This function must be used if non-volatile data is modified by a
421 * device or the VMM. Typical cases are port access, MMIO access,
422 * trapping instruction, etc.
423 */
424#if RT_INLINE_ASM_GNU_STYLE
425# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
426#elif RT_INLINE_ASM_USES_INTRIN
427# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
428#elif defined(__WATCOMC__)
429void ASMCompilerBarrier(void);
430#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
431DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
432{
433 __asm
434 {
435 }
436}
437#endif
438
439
440/** @def ASMBreakpoint
441 * Debugger Breakpoint.
442 * @deprecated Use RT_BREAKPOINT instead.
443 * @internal
444 */
445#define ASMBreakpoint() RT_BREAKPOINT()
446
447
448/**
449 * Spinloop hint for platforms that have these, empty function on the other
450 * platforms.
451 *
452 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
453 * spin locks.
454 */
455#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
456RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
457#else
458DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
459{
460# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
461# if RT_INLINE_ASM_GNU_STYLE
462 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
463# else
464 __asm {
465 _emit 0f3h
466 _emit 090h
467 }
468# endif
469
470# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
471 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
472
473# else
474 /* dummy */
475# endif
476}
477#endif
478
479
480/**
481 * Atomically Exchange an unsigned 8-bit value, ordered.
482 *
483 * @returns Current *pu8 value
484 * @param pu8 Pointer to the 8-bit variable to update.
485 * @param u8 The 8-bit value to assign to *pu8.
486 */
487#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
488RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
489#else
490DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
491{
492# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
493# if RT_INLINE_ASM_GNU_STYLE
494 __asm__ __volatile__("xchgb %0, %1\n\t"
495 : "=m" (*pu8)
496 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
497 : "1" (u8)
498 , "m" (*pu8));
499# else
500 __asm
501 {
502# ifdef RT_ARCH_AMD64
503 mov rdx, [pu8]
504 mov al, [u8]
505 xchg [rdx], al
506 mov [u8], al
507# else
508 mov edx, [pu8]
509 mov al, [u8]
510 xchg [edx], al
511 mov [u8], al
512# endif
513 }
514# endif
515 return u8;
516
517# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
518 uint32_t uOld;
519 uint32_t rcSpill;
520 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU8_%=:\n\t"
521 RTASM_ARM_DMB_SY
522# if defined(RT_ARCH_ARM64)
523 "ldaxrb %w[uOld], %[pMem]\n\t"
524 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
525 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU8_%=\n\t"
526# else
527 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
528 "strexb %[rc], %[uNew], %[pMem]\n\t"
529 "cmp %[rc], #0\n\t"
530 "bne .Ltry_again_ASMAtomicXchgU8_%=\n\t"
531# endif
532 : [pMem] "+Q" (*pu8)
533 , [uOld] "=&r" (uOld)
534 , [rc] "=&r" (rcSpill)
535 : [uNew] "r" ((uint32_t)u8)
536 RTASM_ARM_DMB_SY_COMMA_IN_REG
537 : "cc");
538 return (uint8_t)uOld;
539
540# else
541# error "Port me"
542# endif
543}
544#endif
545
546
547/**
548 * Atomically Exchange a signed 8-bit value, ordered.
549 *
550 * @returns Current *pu8 value
551 * @param pi8 Pointer to the 8-bit variable to update.
552 * @param i8 The 8-bit value to assign to *pi8.
553 */
554DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
555{
556 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
557}
558
559
560/**
561 * Atomically Exchange a bool value, ordered.
562 *
563 * @returns Current *pf value
564 * @param pf Pointer to the 8-bit variable to update.
565 * @param f The 8-bit value to assign to *pi8.
566 */
567DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
568{
569#ifdef _MSC_VER
570 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
571#else
572 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
573#endif
574}
575
576
577/**
578 * Atomically Exchange an unsigned 16-bit value, ordered.
579 *
580 * @returns Current *pu16 value
581 * @param pu16 Pointer to the 16-bit variable to update.
582 * @param u16 The 16-bit value to assign to *pu16.
583 */
584#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
585RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
586#else
587DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
588{
589# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
590# if RT_INLINE_ASM_GNU_STYLE
591 __asm__ __volatile__("xchgw %0, %1\n\t"
592 : "=m" (*pu16)
593 , "=r" (u16)
594 : "1" (u16)
595 , "m" (*pu16));
596# else
597 __asm
598 {
599# ifdef RT_ARCH_AMD64
600 mov rdx, [pu16]
601 mov ax, [u16]
602 xchg [rdx], ax
603 mov [u16], ax
604# else
605 mov edx, [pu16]
606 mov ax, [u16]
607 xchg [edx], ax
608 mov [u16], ax
609# endif
610 }
611# endif
612 return u16;
613
614# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
615 uint32_t uOld;
616 uint32_t rcSpill;
617 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU16_%=:\n\t"
618 RTASM_ARM_DMB_SY
619# if defined(RT_ARCH_ARM64)
620 "ldaxrh %w[uOld], %[pMem]\n\t"
621 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
622 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU16_%=\n\t"
623# else
624 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
625 "strexh %[rc], %[uNew], %[pMem]\n\t"
626 "cmp %[rc], #0\n\t"
627 "bne .Ltry_again_ASMAtomicXchgU16_%=\n\t"
628# endif
629 : [pMem] "+Q" (*pu16)
630 , [uOld] "=&r" (uOld)
631 , [rc] "=&r" (rcSpill)
632 : [uNew] "r" ((uint32_t)u16)
633 RTASM_ARM_DMB_SY_COMMA_IN_REG
634 : "cc");
635 return (uint16_t)uOld;
636
637# else
638# error "Port me"
639# endif
640}
641#endif
642
643
644/**
645 * Atomically Exchange a signed 16-bit value, ordered.
646 *
647 * @returns Current *pu16 value
648 * @param pi16 Pointer to the 16-bit variable to update.
649 * @param i16 The 16-bit value to assign to *pi16.
650 */
651DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
652{
653 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
654}
655
656
657/**
658 * Atomically Exchange an unsigned 32-bit value, ordered.
659 *
660 * @returns Current *pu32 value
661 * @param pu32 Pointer to the 32-bit variable to update.
662 * @param u32 The 32-bit value to assign to *pu32.
663 *
664 * @remarks Does not work on 286 and earlier.
665 */
666#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
667RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
668#else
669DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
670{
671# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
672# if RT_INLINE_ASM_GNU_STYLE
673 __asm__ __volatile__("xchgl %0, %1\n\t"
674 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
675 , "=r" (u32)
676 : "1" (u32)
677 , "m" (*pu32));
678
679# elif RT_INLINE_ASM_USES_INTRIN
680 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
681
682# else
683 __asm
684 {
685# ifdef RT_ARCH_AMD64
686 mov rdx, [pu32]
687 mov eax, u32
688 xchg [rdx], eax
689 mov [u32], eax
690# else
691 mov edx, [pu32]
692 mov eax, u32
693 xchg [edx], eax
694 mov [u32], eax
695# endif
696 }
697# endif
698 return u32;
699
700# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
701 uint32_t uOld;
702 uint32_t rcSpill;
703 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU32_%=:\n\t"
704 RTASM_ARM_DMB_SY
705# if defined(RT_ARCH_ARM64)
706 "ldaxr %w[uOld], %[pMem]\n\t"
707 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
708 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU32_%=\n\t"
709# else
710 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
711 "strex %[rc], %[uNew], %[pMem]\n\t"
712 "cmp %[rc], #0\n\t"
713 "bne .Ltry_again_ASMAtomicXchgU32_%=\n\t"
714# endif
715 : [pMem] "+Q" (*pu32)
716 , [uOld] "=&r" (uOld)
717 , [rc] "=&r" (rcSpill)
718 : [uNew] "r" (u32)
719 RTASM_ARM_DMB_SY_COMMA_IN_REG
720 : "cc");
721 return uOld;
722
723# else
724# error "Port me"
725# endif
726}
727#endif
728
729
730/**
731 * Atomically Exchange a signed 32-bit value, ordered.
732 *
733 * @returns Current *pu32 value
734 * @param pi32 Pointer to the 32-bit variable to update.
735 * @param i32 The 32-bit value to assign to *pi32.
736 */
737DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
738{
739 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
740}
741
742
743/**
744 * Atomically Exchange an unsigned 64-bit value, ordered.
745 *
746 * @returns Current *pu64 value
747 * @param pu64 Pointer to the 64-bit variable to update.
748 * @param u64 The 64-bit value to assign to *pu64.
749 *
750 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
751 */
752#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
753 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
754RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
755#else
756DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
757{
758# if defined(RT_ARCH_AMD64)
759# if RT_INLINE_ASM_USES_INTRIN
760 return _InterlockedExchange64((__int64 *)pu64, u64);
761
762# elif RT_INLINE_ASM_GNU_STYLE
763 __asm__ __volatile__("xchgq %0, %1\n\t"
764 : "=m" (*pu64)
765 , "=r" (u64)
766 : "1" (u64)
767 , "m" (*pu64));
768 return u64;
769# else
770 __asm
771 {
772 mov rdx, [pu64]
773 mov rax, [u64]
774 xchg [rdx], rax
775 mov [u64], rax
776 }
777 return u64;
778# endif
779
780# elif defined(RT_ARCH_X86)
781# if RT_INLINE_ASM_GNU_STYLE
782# if defined(PIC) || defined(__PIC__)
783 uint32_t u32EBX = (uint32_t)u64;
784 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
785 "xchgl %%ebx, %3\n\t"
786 "1:\n\t"
787 "lock; cmpxchg8b (%5)\n\t"
788 "jnz 1b\n\t"
789 "movl %3, %%ebx\n\t"
790 /*"xchgl %%esi, %5\n\t"*/
791 : "=A" (u64)
792 , "=m" (*pu64)
793 : "0" (*pu64)
794 , "m" ( u32EBX )
795 , "c" ( (uint32_t)(u64 >> 32) )
796 , "S" (pu64)
797 : "cc");
798# else /* !PIC */
799 __asm__ __volatile__("1:\n\t"
800 "lock; cmpxchg8b %1\n\t"
801 "jnz 1b\n\t"
802 : "=A" (u64)
803 , "=m" (*pu64)
804 : "0" (*pu64)
805 , "b" ( (uint32_t)u64 )
806 , "c" ( (uint32_t)(u64 >> 32) )
807 : "cc");
808# endif
809# else
810 __asm
811 {
812 mov ebx, dword ptr [u64]
813 mov ecx, dword ptr [u64 + 4]
814 mov edi, pu64
815 mov eax, dword ptr [edi]
816 mov edx, dword ptr [edi + 4]
817 retry:
818 lock cmpxchg8b [edi]
819 jnz retry
820 mov dword ptr [u64], eax
821 mov dword ptr [u64 + 4], edx
822 }
823# endif
824 return u64;
825
826# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
827 uint32_t rcSpill;
828 uint64_t uOld;
829 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU64_%=:\n\t"
830 RTASM_ARM_DMB_SY
831# if defined(RT_ARCH_ARM64)
832 "ldaxr %[uOld], %[pMem]\n\t"
833 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
834 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU64_%=\n\t"
835# else
836 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
837 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
838 "cmp %[rc], #0\n\t"
839 "bne .Ltry_again_ASMAtomicXchgU64_%=\n\t"
840# endif
841 : [pMem] "+Q" (*pu64)
842 , [uOld] "=&r" (uOld)
843 , [rc] "=&r" (rcSpill)
844 : [uNew] "r" (u64)
845 RTASM_ARM_DMB_SY_COMMA_IN_REG
846 : "cc");
847 return uOld;
848
849# else
850# error "Port me"
851# endif
852}
853#endif
854
855
856/**
857 * Atomically Exchange an signed 64-bit value, ordered.
858 *
859 * @returns Current *pi64 value
860 * @param pi64 Pointer to the 64-bit variable to update.
861 * @param i64 The 64-bit value to assign to *pi64.
862 */
863DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
864{
865 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
866}
867
868
869/**
870 * Atomically Exchange a size_t value, ordered.
871 *
872 * @returns Current *ppv value
873 * @param puDst Pointer to the size_t variable to update.
874 * @param uNew The new value to assign to *puDst.
875 */
876DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
877{
878#if ARCH_BITS == 16
879 AssertCompile(sizeof(size_t) == 2);
880 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
881#elif ARCH_BITS == 32
882 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
883#elif ARCH_BITS == 64
884 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
885#else
886# error "ARCH_BITS is bogus"
887#endif
888}
889
890
891/**
892 * Atomically Exchange a pointer value, ordered.
893 *
894 * @returns Current *ppv value
895 * @param ppv Pointer to the pointer variable to update.
896 * @param pv The pointer value to assign to *ppv.
897 */
898DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
899{
900#if ARCH_BITS == 32 || ARCH_BITS == 16
901 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
902#elif ARCH_BITS == 64
903 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
904#else
905# error "ARCH_BITS is bogus"
906#endif
907}
908
909
910/**
911 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
912 *
913 * @returns Current *pv value
914 * @param ppv Pointer to the pointer variable to update.
915 * @param pv The pointer value to assign to *ppv.
916 * @param Type The type of *ppv, sans volatile.
917 */
918#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
919# define ASMAtomicXchgPtrT(ppv, pv, Type) \
920 __extension__ \
921 ({\
922 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
923 Type const pvTypeChecked = (pv); \
924 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
925 pvTypeCheckedRet; \
926 })
927#else
928# define ASMAtomicXchgPtrT(ppv, pv, Type) \
929 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
930#endif
931
932
933/**
934 * Atomically Exchange a raw-mode context pointer value, ordered.
935 *
936 * @returns Current *ppv value
937 * @param ppvRC Pointer to the pointer variable to update.
938 * @param pvRC The pointer value to assign to *ppv.
939 */
940DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
941{
942 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
943}
944
945
946/**
947 * Atomically Exchange a ring-0 pointer value, ordered.
948 *
949 * @returns Current *ppv value
950 * @param ppvR0 Pointer to the pointer variable to update.
951 * @param pvR0 The pointer value to assign to *ppv.
952 */
953DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
954{
955#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
956 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
957#elif R0_ARCH_BITS == 64
958 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
959#else
960# error "R0_ARCH_BITS is bogus"
961#endif
962}
963
964
965/**
966 * Atomically Exchange a ring-3 pointer value, ordered.
967 *
968 * @returns Current *ppv value
969 * @param ppvR3 Pointer to the pointer variable to update.
970 * @param pvR3 The pointer value to assign to *ppv.
971 */
972DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
973{
974#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
975 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
976#elif R3_ARCH_BITS == 64
977 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
978#else
979# error "R3_ARCH_BITS is bogus"
980#endif
981}
982
983
984/** @def ASMAtomicXchgHandle
985 * Atomically Exchange a typical IPRT handle value, ordered.
986 *
987 * @param ph Pointer to the value to update.
988 * @param hNew The new value to assigned to *pu.
989 * @param phRes Where to store the current *ph value.
990 *
991 * @remarks This doesn't currently work for all handles (like RTFILE).
992 */
993#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
994# define ASMAtomicXchgHandle(ph, hNew, phRes) \
995 do { \
996 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
997 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
998 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
999 } while (0)
1000#elif HC_ARCH_BITS == 64
1001# define ASMAtomicXchgHandle(ph, hNew, phRes) \
1002 do { \
1003 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1004 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1005 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
1006 } while (0)
1007#else
1008# error HC_ARCH_BITS
1009#endif
1010
1011
1012/**
1013 * Atomically Exchange a value which size might differ
1014 * between platforms or compilers, ordered.
1015 *
1016 * @param pu Pointer to the variable to update.
1017 * @param uNew The value to assign to *pu.
1018 * @todo This is busted as its missing the result argument.
1019 */
1020#define ASMAtomicXchgSize(pu, uNew) \
1021 do { \
1022 switch (sizeof(*(pu))) { \
1023 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1024 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1025 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1026 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1027 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1028 } \
1029 } while (0)
1030
1031/**
1032 * Atomically Exchange a value which size might differ
1033 * between platforms or compilers, ordered.
1034 *
1035 * @param pu Pointer to the variable to update.
1036 * @param uNew The value to assign to *pu.
1037 * @param puRes Where to store the current *pu value.
1038 */
1039#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1040 do { \
1041 switch (sizeof(*(pu))) { \
1042 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1043 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1044 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1045 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1046 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1047 } \
1048 } while (0)
1049
1050
1051
1052/**
1053 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1054 *
1055 * @returns true if xchg was done.
1056 * @returns false if xchg wasn't done.
1057 *
1058 * @param pu8 Pointer to the value to update.
1059 * @param u8New The new value to assigned to *pu8.
1060 * @param u8Old The old value to *pu8 compare with.
1061 *
1062 * @remarks x86: Requires a 486 or later.
1063 * @todo Rename ASMAtomicCmpWriteU8
1064 */
1065#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1066RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1067#else
1068DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1069{
1070# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1071 uint8_t u8Ret;
1072 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1073 "setz %1\n\t"
1074 : "=m" (*pu8)
1075 , "=qm" (u8Ret)
1076 , "=a" (u8Old)
1077 : "q" (u8New)
1078 , "2" (u8Old)
1079 , "m" (*pu8)
1080 : "cc");
1081 return (bool)u8Ret;
1082
1083# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1084 union { uint32_t u; bool f; } fXchg;
1085 uint32_t u32Spill;
1086 uint32_t rcSpill;
1087 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1088 RTASM_ARM_DMB_SY
1089# if defined(RT_ARCH_ARM64)
1090 "ldaxrb %w[uOld], %[pMem]\n\t"
1091 "cmp %w[uOld], %w[uCmp]\n\t"
1092 "bne 1f\n\t" /* stop here if not equal */
1093 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1094 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1095 "mov %w[fXchg], #1\n\t"
1096# else
1097 "ldrexb %[uOld], %[pMem]\n\t"
1098 "teq %[uOld], %[uCmp]\n\t"
1099 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1100 "bne 1f\n\t" /* stop here if not equal */
1101 "cmp %[rc], #0\n\t"
1102 "bne .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1103 "mov %[fXchg], #1\n\t"
1104# endif
1105 "1:\n\t"
1106 : [pMem] "+Q" (*pu8)
1107 , [uOld] "=&r" (u32Spill)
1108 , [rc] "=&r" (rcSpill)
1109 , [fXchg] "=&r" (fXchg.u)
1110 : [uCmp] "r" ((uint32_t)u8Old)
1111 , [uNew] "r" ((uint32_t)u8New)
1112 , "[fXchg]" (0)
1113 RTASM_ARM_DMB_SY_COMMA_IN_REG
1114 : "cc");
1115 return fXchg.f;
1116
1117# else
1118# error "Port me"
1119# endif
1120}
1121#endif
1122
1123
1124/**
1125 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1126 *
1127 * @returns true if xchg was done.
1128 * @returns false if xchg wasn't done.
1129 *
1130 * @param pi8 Pointer to the value to update.
1131 * @param i8New The new value to assigned to *pi8.
1132 * @param i8Old The old value to *pi8 compare with.
1133 *
1134 * @remarks x86: Requires a 486 or later.
1135 * @todo Rename ASMAtomicCmpWriteS8
1136 */
1137DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1138{
1139 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1140}
1141
1142
1143/**
1144 * Atomically Compare and Exchange a bool value, ordered.
1145 *
1146 * @returns true if xchg was done.
1147 * @returns false if xchg wasn't done.
1148 *
1149 * @param pf Pointer to the value to update.
1150 * @param fNew The new value to assigned to *pf.
1151 * @param fOld The old value to *pf compare with.
1152 *
1153 * @remarks x86: Requires a 486 or later.
1154 * @todo Rename ASMAtomicCmpWriteBool
1155 */
1156DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1157{
1158 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1159}
1160
1161
1162/**
1163 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1164 *
1165 * @returns true if xchg was done.
1166 * @returns false if xchg wasn't done.
1167 *
1168 * @param pu32 Pointer to the value to update.
1169 * @param u32New The new value to assigned to *pu32.
1170 * @param u32Old The old value to *pu32 compare with.
1171 *
1172 * @remarks x86: Requires a 486 or later.
1173 * @todo Rename ASMAtomicCmpWriteU32
1174 */
1175#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1176RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1177#else
1178DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1179{
1180# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1181# if RT_INLINE_ASM_GNU_STYLE
1182 uint8_t u8Ret;
1183 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1184 "setz %1\n\t"
1185 : "=m" (*pu32)
1186 , "=qm" (u8Ret)
1187 , "=a" (u32Old)
1188 : "r" (u32New)
1189 , "2" (u32Old)
1190 , "m" (*pu32)
1191 : "cc");
1192 return (bool)u8Ret;
1193
1194# elif RT_INLINE_ASM_USES_INTRIN
1195 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1196
1197# else
1198 uint32_t u32Ret;
1199 __asm
1200 {
1201# ifdef RT_ARCH_AMD64
1202 mov rdx, [pu32]
1203# else
1204 mov edx, [pu32]
1205# endif
1206 mov eax, [u32Old]
1207 mov ecx, [u32New]
1208# ifdef RT_ARCH_AMD64
1209 lock cmpxchg [rdx], ecx
1210# else
1211 lock cmpxchg [edx], ecx
1212# endif
1213 setz al
1214 movzx eax, al
1215 mov [u32Ret], eax
1216 }
1217 return !!u32Ret;
1218# endif
1219
1220# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1221 union { uint32_t u; bool f; } fXchg;
1222 uint32_t u32Spill;
1223 uint32_t rcSpill;
1224 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1225 RTASM_ARM_DMB_SY
1226# if defined(RT_ARCH_ARM64)
1227 "ldaxr %w[uOld], %[pMem]\n\t"
1228 "cmp %w[uOld], %w[uCmp]\n\t"
1229 "bne 1f\n\t" /* stop here if not equal */
1230 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1231 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1232 "mov %w[fXchg], #1\n\t"
1233# else
1234 "ldrex %[uOld], %[pMem]\n\t"
1235 "teq %[uOld], %[uCmp]\n\t"
1236 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1237 "bne 1f\n\t" /* stop here if not equal */
1238 "cmp %[rc], #0\n\t"
1239 "bne .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1240 "mov %[fXchg], #1\n\t"
1241# endif
1242 "1:\n\t"
1243 : [pMem] "+Q" (*pu32)
1244 , [uOld] "=&r" (u32Spill)
1245 , [rc] "=&r" (rcSpill)
1246 , [fXchg] "=&r" (fXchg.u)
1247 : [uCmp] "r" (u32Old)
1248 , [uNew] "r" (u32New)
1249 , "[fXchg]" (0)
1250 RTASM_ARM_DMB_SY_COMMA_IN_REG
1251 : "cc");
1252 return fXchg.f;
1253
1254# else
1255# error "Port me"
1256# endif
1257}
1258#endif
1259
1260
1261/**
1262 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1263 *
1264 * @returns true if xchg was done.
1265 * @returns false if xchg wasn't done.
1266 *
1267 * @param pi32 Pointer to the value to update.
1268 * @param i32New The new value to assigned to *pi32.
1269 * @param i32Old The old value to *pi32 compare with.
1270 *
1271 * @remarks x86: Requires a 486 or later.
1272 * @todo Rename ASMAtomicCmpWriteS32
1273 */
1274DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1275{
1276 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1277}
1278
1279
1280/**
1281 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1282 *
1283 * @returns true if xchg was done.
1284 * @returns false if xchg wasn't done.
1285 *
1286 * @param pu64 Pointer to the 64-bit variable to update.
1287 * @param u64New The 64-bit value to assign to *pu64.
1288 * @param u64Old The value to compare with.
1289 *
1290 * @remarks x86: Requires a Pentium or later.
1291 * @todo Rename ASMAtomicCmpWriteU64
1292 */
1293#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1294 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1295RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1296#else
1297DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1298{
1299# if RT_INLINE_ASM_USES_INTRIN
1300 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1301
1302# elif defined(RT_ARCH_AMD64)
1303# if RT_INLINE_ASM_GNU_STYLE
1304 uint8_t u8Ret;
1305 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1306 "setz %1\n\t"
1307 : "=m" (*pu64)
1308 , "=qm" (u8Ret)
1309 , "=a" (u64Old)
1310 : "r" (u64New)
1311 , "2" (u64Old)
1312 , "m" (*pu64)
1313 : "cc");
1314 return (bool)u8Ret;
1315# else
1316 bool fRet;
1317 __asm
1318 {
1319 mov rdx, [pu32]
1320 mov rax, [u64Old]
1321 mov rcx, [u64New]
1322 lock cmpxchg [rdx], rcx
1323 setz al
1324 mov [fRet], al
1325 }
1326 return fRet;
1327# endif
1328
1329# elif defined(RT_ARCH_X86)
1330 uint32_t u32Ret;
1331# if RT_INLINE_ASM_GNU_STYLE
1332# if defined(PIC) || defined(__PIC__)
1333 uint32_t u32EBX = (uint32_t)u64New;
1334 uint32_t u32Spill;
1335 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1336 "lock; cmpxchg8b (%6)\n\t"
1337 "setz %%al\n\t"
1338 "movl %4, %%ebx\n\t"
1339 "movzbl %%al, %%eax\n\t"
1340 : "=a" (u32Ret)
1341 , "=d" (u32Spill)
1342# if RT_GNUC_PREREQ(4, 3)
1343 , "+m" (*pu64)
1344# else
1345 , "=m" (*pu64)
1346# endif
1347 : "A" (u64Old)
1348 , "m" ( u32EBX )
1349 , "c" ( (uint32_t)(u64New >> 32) )
1350 , "S" (pu64)
1351 : "cc");
1352# else /* !PIC */
1353 uint32_t u32Spill;
1354 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1355 "setz %%al\n\t"
1356 "movzbl %%al, %%eax\n\t"
1357 : "=a" (u32Ret)
1358 , "=d" (u32Spill)
1359 , "+m" (*pu64)
1360 : "A" (u64Old)
1361 , "b" ( (uint32_t)u64New )
1362 , "c" ( (uint32_t)(u64New >> 32) )
1363 : "cc");
1364# endif
1365 return (bool)u32Ret;
1366# else
1367 __asm
1368 {
1369 mov ebx, dword ptr [u64New]
1370 mov ecx, dword ptr [u64New + 4]
1371 mov edi, [pu64]
1372 mov eax, dword ptr [u64Old]
1373 mov edx, dword ptr [u64Old + 4]
1374 lock cmpxchg8b [edi]
1375 setz al
1376 movzx eax, al
1377 mov dword ptr [u32Ret], eax
1378 }
1379 return !!u32Ret;
1380# endif
1381
1382# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1383 union { uint32_t u; bool f; } fXchg;
1384 uint64_t u64Spill;
1385 uint32_t rcSpill;
1386 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1387 RTASM_ARM_DMB_SY
1388# if defined(RT_ARCH_ARM64)
1389 "ldaxr %[uOld], %[pMem]\n\t"
1390 "cmp %[uOld], %[uCmp]\n\t"
1391 "bne 1f\n\t" /* stop here if not equal */
1392 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1393 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1394 "mov %w[fXchg], #1\n\t"
1395# else
1396 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1397 "teq %[uOld], %[uCmp]\n\t"
1398 "teqeq %H[uOld], %H[uCmp]\n\t"
1399 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1400 "bne 1f\n\t" /* stop here if not equal */
1401 "cmp %[rc], #0\n\t"
1402 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1403 "mov %[fXchg], #1\n\t"
1404# endif
1405 "1:\n\t"
1406 : [pMem] "+Q" (*pu64)
1407 , [uOld] "=&r" (u64Spill)
1408 , [rc] "=&r" (rcSpill)
1409 , [fXchg] "=&r" (fXchg.u)
1410 : [uCmp] "r" (u64Old)
1411 , [uNew] "r" (u64New)
1412 , "[fXchg]" (0)
1413 RTASM_ARM_DMB_SY_COMMA_IN_REG
1414 : "cc");
1415 return fXchg.f;
1416
1417# else
1418# error "Port me"
1419# endif
1420}
1421#endif
1422
1423
1424/**
1425 * Atomically Compare and exchange a signed 64-bit value, ordered.
1426 *
1427 * @returns true if xchg was done.
1428 * @returns false if xchg wasn't done.
1429 *
1430 * @param pi64 Pointer to the 64-bit variable to update.
1431 * @param i64 The 64-bit value to assign to *pu64.
1432 * @param i64Old The value to compare with.
1433 *
1434 * @remarks x86: Requires a Pentium or later.
1435 * @todo Rename ASMAtomicCmpWriteS64
1436 */
1437DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1438{
1439 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1440}
1441
1442#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
1443
1444/** @def RTASM_HAVE_CMP_WRITE_U128
1445 * Indicates that we've got ASMAtomicCmpWriteU128(), ASMAtomicCmpWriteU128v2()
1446 * and ASMAtomicCmpWriteExU128() available. */
1447# define RTASM_HAVE_CMP_WRITE_U128 1
1448
1449
1450/**
1451 * Atomically compare and write an unsigned 128-bit value, ordered.
1452 *
1453 * @returns true if write was done.
1454 * @returns false if write wasn't done.
1455 *
1456 * @param pu128 Pointer to the 128-bit variable to update.
1457 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
1458 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
1459 * @param u64OldHi The high 64-bit of the value to compare with.
1460 * @param u64OldLo The low 64-bit of the value to compare with.
1461 *
1462 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1463 */
1464# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
1465DECLASM(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1466 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_PROTO;
1467# else
1468DECLINLINE(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1469 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_DEF
1470{
1471# if RT_INLINE_ASM_USES_INTRIN
1472 __int64 ai64Cmp[2];
1473 ai64Cmp[0] = u64OldLo;
1474 ai64Cmp[1] = u64OldHi;
1475 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, ai64Cmp) != 0;
1476
1477# elif (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1478 return __sync_bool_compare_and_swap(pu128, ((uint128_t)u64OldHi << 64) | u64OldLo, ((uint128_t)u64NewHi << 64) | u64NewLo);
1479
1480# elif defined(RT_ARCH_AMD64)
1481# if RT_INLINE_ASM_GNU_STYLE
1482 uint64_t u64Ret;
1483 uint64_t u64Spill;
1484 __asm__ __volatile__("lock; cmpxchg16b %2\n\t"
1485 "setz %%al\n\t"
1486 "movzbl %%al, %%eax\n\t"
1487 : "=a" (u64Ret)
1488 , "=d" (u64Spill)
1489 , "+m" (*pu128)
1490 : "a" (u64OldLo)
1491 , "d" (u64OldHi)
1492 , "b" (u64NewLo)
1493 , "c" (u64NewHi)
1494 : "cc");
1495
1496 return (bool)u64Ret;
1497# else
1498# error "Port me"
1499# endif
1500# else
1501# error "Port me"
1502# endif
1503}
1504# endif
1505
1506
1507/**
1508 * Atomically compare and write an unsigned 128-bit value, ordered.
1509 *
1510 * @returns true if write was done.
1511 * @returns false if write wasn't done.
1512 *
1513 * @param pu128 Pointer to the 128-bit variable to update.
1514 * @param u128New The 128-bit value to assign to *pu128.
1515 * @param u128Old The value to compare with.
1516 *
1517 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1518 */
1519DECLINLINE(bool) ASMAtomicCmpWriteU128(volatile uint128_t *pu128, const uint128_t u128New, const uint128_t u128Old) RT_NOTHROW_DEF
1520{
1521# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
1522# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1523 return __sync_bool_compare_and_swap(pu128, u128Old, u128New);
1524# else
1525 return ASMAtomicCmpWriteU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
1526 (uint64_t)(u128Old >> 64), (uint64_t)u128Old);
1527# endif
1528# else
1529 return ASMAtomicCmpWriteU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo);
1530# endif
1531}
1532
1533
1534/**
1535 * RTUINT128U wrapper for ASMAtomicCmpWriteU128.
1536 */
1537DECLINLINE(bool) ASMAtomicCmpWriteU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
1538 const RTUINT128U u128Old) RT_NOTHROW_DEF
1539{
1540# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1541 return ASMAtomicCmpWriteU128(&pu128->u, u128New.u, u128Old.u);
1542# else
1543 return ASMAtomicCmpWriteU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo);
1544# endif
1545}
1546
1547#endif /* RT_ARCH_AMD64 || RT_ARCH_ARM64 */
1548
1549/**
1550 * Atomically Compare and Exchange a pointer value, ordered.
1551 *
1552 * @returns true if xchg was done.
1553 * @returns false if xchg wasn't done.
1554 *
1555 * @param ppv Pointer to the value to update.
1556 * @param pvNew The new value to assigned to *ppv.
1557 * @param pvOld The old value to *ppv compare with.
1558 *
1559 * @remarks x86: Requires a 486 or later.
1560 * @todo Rename ASMAtomicCmpWritePtrVoid
1561 */
1562DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1563{
1564#if ARCH_BITS == 32 || ARCH_BITS == 16
1565 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1566#elif ARCH_BITS == 64
1567 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1568#else
1569# error "ARCH_BITS is bogus"
1570#endif
1571}
1572
1573
1574/**
1575 * Atomically Compare and Exchange a pointer value, ordered.
1576 *
1577 * @returns true if xchg was done.
1578 * @returns false if xchg wasn't done.
1579 *
1580 * @param ppv Pointer to the value to update.
1581 * @param pvNew The new value to assigned to *ppv.
1582 * @param pvOld The old value to *ppv compare with.
1583 *
1584 * @remarks This is relatively type safe on GCC platforms.
1585 * @remarks x86: Requires a 486 or later.
1586 * @todo Rename ASMAtomicCmpWritePtr
1587 */
1588#ifdef __GNUC__
1589# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1590 __extension__ \
1591 ({\
1592 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1593 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1594 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1595 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1596 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1597 fMacroRet; \
1598 })
1599#else
1600# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1601 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1602#endif
1603
1604
1605/** @def ASMAtomicCmpXchgHandle
1606 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1607 *
1608 * @param ph Pointer to the value to update.
1609 * @param hNew The new value to assigned to *pu.
1610 * @param hOld The old value to *pu compare with.
1611 * @param fRc Where to store the result.
1612 *
1613 * @remarks This doesn't currently work for all handles (like RTFILE).
1614 * @remarks x86: Requires a 486 or later.
1615 * @todo Rename ASMAtomicCmpWriteHandle
1616 */
1617#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1618# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1619 do { \
1620 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1621 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1622 } while (0)
1623#elif HC_ARCH_BITS == 64
1624# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1625 do { \
1626 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1627 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1628 } while (0)
1629#else
1630# error HC_ARCH_BITS
1631#endif
1632
1633
1634/** @def ASMAtomicCmpXchgSize
1635 * Atomically Compare and Exchange a value which size might differ
1636 * between platforms or compilers, ordered.
1637 *
1638 * @param pu Pointer to the value to update.
1639 * @param uNew The new value to assigned to *pu.
1640 * @param uOld The old value to *pu compare with.
1641 * @param fRc Where to store the result.
1642 *
1643 * @remarks x86: Requires a 486 or later.
1644 * @todo Rename ASMAtomicCmpWriteSize
1645 */
1646#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1647 do { \
1648 switch (sizeof(*(pu))) { \
1649 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1650 break; \
1651 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1652 break; \
1653 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1654 (fRc) = false; \
1655 break; \
1656 } \
1657 } while (0)
1658
1659
1660/**
1661 * Atomically Compare and Exchange an unsigned 8-bit value, additionally passes
1662 * back old value, ordered.
1663 *
1664 * @returns true if xchg was done.
1665 * @returns false if xchg wasn't done.
1666 *
1667 * @param pu8 Pointer to the value to update.
1668 * @param u8New The new value to assigned to *pu32.
1669 * @param u8Old The old value to *pu8 compare with.
1670 * @param pu8Old Pointer store the old value at.
1671 *
1672 * @remarks x86: Requires a 486 or later.
1673 */
1674#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1675RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_PROTO;
1676#else
1677DECLINLINE(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_DEF
1678{
1679# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1680# if RT_INLINE_ASM_GNU_STYLE
1681 uint8_t u8Ret;
1682 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1683 "setz %1\n\t"
1684 : "=m" (*pu8)
1685 , "=qm" (u8Ret)
1686 , "=a" (*pu8Old)
1687# if defined(RT_ARCH_X86)
1688 : "q" (u8New)
1689# else
1690 : "r" (u8New)
1691# endif
1692 , "a" (u8Old)
1693 , "m" (*pu8)
1694 : "cc");
1695 return (bool)u8Ret;
1696
1697# elif RT_INLINE_ASM_USES_INTRIN
1698 return (*pu8Old = _InterlockedCompareExchange8((char RT_FAR *)pu8, u8New, u8Old)) == u8Old;
1699
1700# else
1701 uint8_t u8Ret;
1702 __asm
1703 {
1704# ifdef RT_ARCH_AMD64
1705 mov rdx, [pu8]
1706# else
1707 mov edx, [pu8]
1708# endif
1709 mov eax, [u8Old]
1710 mov ecx, [u8New]
1711# ifdef RT_ARCH_AMD64
1712 lock cmpxchg [rdx], ecx
1713 mov rdx, [pu8Old]
1714 mov [rdx], eax
1715# else
1716 lock cmpxchg [edx], ecx
1717 mov edx, [pu8Old]
1718 mov [edx], eax
1719# endif
1720 setz al
1721 movzx eax, al
1722 mov [u8Ret], eax
1723 }
1724 return !!u8Ret;
1725# endif
1726
1727# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1728 union { uint8_t u; bool f; } fXchg;
1729 uint8_t u8ActualOld;
1730 uint8_t rcSpill;
1731 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU8_%=:\n\t"
1732 RTASM_ARM_DMB_SY
1733# if defined(RT_ARCH_ARM64)
1734 "ldaxrb %w[uOld], %[pMem]\n\t"
1735 "cmp %w[uOld], %w[uCmp]\n\t"
1736 "bne 1f\n\t" /* stop here if not equal */
1737 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1738 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1739 "mov %w[fXchg], #1\n\t"
1740# else
1741 "ldrexb %[uOld], %[pMem]\n\t"
1742 "teq %[uOld], %[uCmp]\n\t"
1743 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1744 "bne 1f\n\t" /* stop here if not equal */
1745 "cmp %[rc], #0\n\t"
1746 "bne .Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1747 "mov %[fXchg], #1\n\t"
1748# endif
1749 "1:\n\t"
1750 : [pMem] "+Q" (*pu8)
1751 , [uOld] "=&r" (u8ActualOld)
1752 , [rc] "=&r" (rcSpill)
1753 , [fXchg] "=&r" (fXchg.u)
1754 : [uCmp] "r" (u8Old)
1755 , [uNew] "r" (u8New)
1756 , "[fXchg]" (0)
1757 RTASM_ARM_DMB_SY_COMMA_IN_REG
1758 : "cc");
1759 *pu8Old = u8ActualOld;
1760 return fXchg.f;
1761
1762# else
1763# error "Port me"
1764# endif
1765}
1766#endif
1767
1768
1769/**
1770 * Atomically Compare and Exchange a signed 8-bit value, additionally
1771 * passes back old value, ordered.
1772 *
1773 * @returns true if xchg was done.
1774 * @returns false if xchg wasn't done.
1775 *
1776 * @param pi8 Pointer to the value to update.
1777 * @param i8New The new value to assigned to *pi8.
1778 * @param i8Old The old value to *pi8 compare with.
1779 * @param pi8Old Pointer store the old value at.
1780 *
1781 * @remarks x86: Requires a 486 or later.
1782 */
1783DECLINLINE(bool) ASMAtomicCmpXchgExS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old, int8_t RT_FAR *pi8Old) RT_NOTHROW_DEF
1784{
1785 return ASMAtomicCmpXchgExU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old, (uint8_t RT_FAR *)pi8Old);
1786}
1787
1788
1789/**
1790 * Atomically Compare and Exchange an unsigned 16-bit value, additionally passes
1791 * back old value, ordered.
1792 *
1793 * @returns true if xchg was done.
1794 * @returns false if xchg wasn't done.
1795 *
1796 * @param pu16 Pointer to the value to update.
1797 * @param u16New The new value to assigned to *pu16.
1798 * @param u16Old The old value to *pu32 compare with.
1799 * @param pu16Old Pointer store the old value at.
1800 *
1801 * @remarks x86: Requires a 486 or later.
1802 */
1803#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1804RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_PROTO;
1805#else
1806DECLINLINE(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_DEF
1807{
1808# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1809# if RT_INLINE_ASM_GNU_STYLE
1810 uint8_t u8Ret;
1811 __asm__ __volatile__("lock; cmpxchgw %3, %0\n\t"
1812 "setz %1\n\t"
1813 : "=m" (*pu16)
1814 , "=qm" (u8Ret)
1815 , "=a" (*pu16Old)
1816 : "r" (u16New)
1817 , "a" (u16Old)
1818 , "m" (*pu16)
1819 : "cc");
1820 return (bool)u8Ret;
1821
1822# elif RT_INLINE_ASM_USES_INTRIN
1823 return (*pu16Old = _InterlockedCompareExchange16((short RT_FAR *)pu16, u16New, u16Old)) == u16Old;
1824
1825# else
1826 uint16_t u16Ret;
1827 __asm
1828 {
1829# ifdef RT_ARCH_AMD64
1830 mov rdx, [pu16]
1831# else
1832 mov edx, [pu16]
1833# endif
1834 mov eax, [u16Old]
1835 mov ecx, [u16New]
1836# ifdef RT_ARCH_AMD64
1837 lock cmpxchg [rdx], ecx
1838 mov rdx, [pu16Old]
1839 mov [rdx], eax
1840# else
1841 lock cmpxchg [edx], ecx
1842 mov edx, [pu16Old]
1843 mov [edx], eax
1844# endif
1845 setz al
1846 movzx eax, al
1847 mov [u16Ret], eax
1848 }
1849 return !!u16Ret;
1850# endif
1851
1852# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1853 union { uint16_t u; bool f; } fXchg;
1854 uint16_t u16ActualOld;
1855 uint16_t rcSpill;
1856 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU16_%=:\n\t"
1857 RTASM_ARM_DMB_SY
1858# if defined(RT_ARCH_ARM64)
1859 "ldaxrh %w[uOld], %[pMem]\n\t"
1860 "cmp %w[uOld], %w[uCmp]\n\t"
1861 "bne 1f\n\t" /* stop here if not equal */
1862 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
1863 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1864 "mov %w[fXchg], #1\n\t"
1865# else
1866 "ldrexh %[uOld], %[pMem]\n\t"
1867 "teq %[uOld], %[uCmp]\n\t"
1868 "strexheq %[rc], %[uNew], %[pMem]\n\t"
1869 "bne 1f\n\t" /* stop here if not equal */
1870 "cmp %[rc], #0\n\t"
1871 "bne .Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1872 "mov %[fXchg], #1\n\t"
1873# endif
1874 "1:\n\t"
1875 : [pMem] "+Q" (*pu16)
1876 , [uOld] "=&r" (u16ActualOld)
1877 , [rc] "=&r" (rcSpill)
1878 , [fXchg] "=&r" (fXchg.u)
1879 : [uCmp] "r" (u16Old)
1880 , [uNew] "r" (u16New)
1881 , "[fXchg]" (0)
1882 RTASM_ARM_DMB_SY_COMMA_IN_REG
1883 : "cc");
1884 *pu16Old = u16ActualOld;
1885 return fXchg.f;
1886
1887# else
1888# error "Port me"
1889# endif
1890}
1891#endif
1892
1893
1894/**
1895 * Atomically Compare and Exchange a signed 16-bit value, additionally
1896 * passes back old value, ordered.
1897 *
1898 * @returns true if xchg was done.
1899 * @returns false if xchg wasn't done.
1900 *
1901 * @param pi16 Pointer to the value to update.
1902 * @param i16New The new value to assigned to *pi16.
1903 * @param i16Old The old value to *pi16 compare with.
1904 * @param pi16Old Pointer store the old value at.
1905 *
1906 * @remarks x86: Requires a 486 or later.
1907 */
1908DECLINLINE(bool) ASMAtomicCmpXchgExS16(volatile int16_t RT_FAR *pi16, const int16_t i16New, const int16_t i16Old, int16_t RT_FAR *pi16Old) RT_NOTHROW_DEF
1909{
1910 return ASMAtomicCmpXchgExU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16New, (uint16_t)i16Old, (uint16_t RT_FAR *)pi16Old);
1911}
1912
1913
1914/**
1915 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1916 * passes back old value, ordered.
1917 *
1918 * @returns true if xchg was done.
1919 * @returns false if xchg wasn't done.
1920 *
1921 * @param pu32 Pointer to the value to update.
1922 * @param u32New The new value to assigned to *pu32.
1923 * @param u32Old The old value to *pu32 compare with.
1924 * @param pu32Old Pointer store the old value at.
1925 *
1926 * @remarks x86: Requires a 486 or later.
1927 */
1928#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1929RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1930#else
1931DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1932{
1933# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1934# if RT_INLINE_ASM_GNU_STYLE
1935 uint8_t u8Ret;
1936 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1937 "setz %1\n\t"
1938 : "=m" (*pu32)
1939 , "=qm" (u8Ret)
1940 , "=a" (*pu32Old)
1941 : "r" (u32New)
1942 , "a" (u32Old)
1943 , "m" (*pu32)
1944 : "cc");
1945 return (bool)u8Ret;
1946
1947# elif RT_INLINE_ASM_USES_INTRIN
1948 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1949
1950# else
1951 uint32_t u32Ret;
1952 __asm
1953 {
1954# ifdef RT_ARCH_AMD64
1955 mov rdx, [pu32]
1956# else
1957 mov edx, [pu32]
1958# endif
1959 mov eax, [u32Old]
1960 mov ecx, [u32New]
1961# ifdef RT_ARCH_AMD64
1962 lock cmpxchg [rdx], ecx
1963 mov rdx, [pu32Old]
1964 mov [rdx], eax
1965# else
1966 lock cmpxchg [edx], ecx
1967 mov edx, [pu32Old]
1968 mov [edx], eax
1969# endif
1970 setz al
1971 movzx eax, al
1972 mov [u32Ret], eax
1973 }
1974 return !!u32Ret;
1975# endif
1976
1977# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1978 union { uint32_t u; bool f; } fXchg;
1979 uint32_t u32ActualOld;
1980 uint32_t rcSpill;
1981 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1982 RTASM_ARM_DMB_SY
1983# if defined(RT_ARCH_ARM64)
1984 "ldaxr %w[uOld], %[pMem]\n\t"
1985 "cmp %w[uOld], %w[uCmp]\n\t"
1986 "bne 1f\n\t" /* stop here if not equal */
1987 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1988 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1989 "mov %w[fXchg], #1\n\t"
1990# else
1991 "ldrex %[uOld], %[pMem]\n\t"
1992 "teq %[uOld], %[uCmp]\n\t"
1993 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1994 "bne 1f\n\t" /* stop here if not equal */
1995 "cmp %[rc], #0\n\t"
1996 "bne .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1997 "mov %[fXchg], #1\n\t"
1998# endif
1999 "1:\n\t"
2000 : [pMem] "+Q" (*pu32)
2001 , [uOld] "=&r" (u32ActualOld)
2002 , [rc] "=&r" (rcSpill)
2003 , [fXchg] "=&r" (fXchg.u)
2004 : [uCmp] "r" (u32Old)
2005 , [uNew] "r" (u32New)
2006 , "[fXchg]" (0)
2007 RTASM_ARM_DMB_SY_COMMA_IN_REG
2008 : "cc");
2009 *pu32Old = u32ActualOld;
2010 return fXchg.f;
2011
2012# else
2013# error "Port me"
2014# endif
2015}
2016#endif
2017
2018
2019/**
2020 * Atomically Compare and Exchange a signed 32-bit value, additionally
2021 * passes back old value, ordered.
2022 *
2023 * @returns true if xchg was done.
2024 * @returns false if xchg wasn't done.
2025 *
2026 * @param pi32 Pointer to the value to update.
2027 * @param i32New The new value to assigned to *pi32.
2028 * @param i32Old The old value to *pi32 compare with.
2029 * @param pi32Old Pointer store the old value at.
2030 *
2031 * @remarks x86: Requires a 486 or later.
2032 */
2033DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
2034{
2035 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
2036}
2037
2038
2039/**
2040 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2041 * passing back old value, ordered.
2042 *
2043 * @returns true if xchg was done.
2044 * @returns false if xchg wasn't done.
2045 *
2046 * @param pu64 Pointer to the 64-bit variable to update.
2047 * @param u64New The 64-bit value to assign to *pu64.
2048 * @param u64Old The value to compare with.
2049 * @param pu64Old Pointer store the old value at.
2050 *
2051 * @remarks x86: Requires a Pentium or later.
2052 */
2053#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2054 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2055RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
2056#else
2057DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
2058{
2059# if RT_INLINE_ASM_USES_INTRIN
2060 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
2061
2062# elif defined(RT_ARCH_AMD64)
2063# if RT_INLINE_ASM_GNU_STYLE
2064 uint8_t u8Ret;
2065 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2066 "setz %1\n\t"
2067 : "=m" (*pu64)
2068 , "=qm" (u8Ret)
2069 , "=a" (*pu64Old)
2070 : "r" (u64New)
2071 , "a" (u64Old)
2072 , "m" (*pu64)
2073 : "cc");
2074 return (bool)u8Ret;
2075# else
2076 bool fRet;
2077 __asm
2078 {
2079 mov rdx, [pu32]
2080 mov rax, [u64Old]
2081 mov rcx, [u64New]
2082 lock cmpxchg [rdx], rcx
2083 mov rdx, [pu64Old]
2084 mov [rdx], rax
2085 setz al
2086 mov [fRet], al
2087 }
2088 return fRet;
2089# endif
2090
2091# elif defined(RT_ARCH_X86)
2092# if RT_INLINE_ASM_GNU_STYLE
2093 uint64_t u64Ret;
2094# if defined(PIC) || defined(__PIC__)
2095 /* Note #1: This code uses a memory clobber description, because the clean
2096 solution with an output value for *pu64 makes gcc run out of
2097 registers. This will cause suboptimal code, and anyone with a
2098 better solution is welcome to improve this.
2099
2100 Note #2: We must prevent gcc from encoding the memory access, as it
2101 may go via the GOT if we're working on a global variable (like
2102 in the testcase). Thus we request a register (%3) and
2103 dereference it ourselves. */
2104 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2105 "lock; cmpxchg8b (%3)\n\t"
2106 "xchgl %%ebx, %1\n\t"
2107 : "=A" (u64Ret)
2108 : "DS" ((uint32_t)u64New)
2109 , "c" ((uint32_t)(u64New >> 32))
2110 , "r" (pu64) /* Do not use "m" here*/
2111 , "0" (u64Old)
2112 : "memory"
2113 , "cc" );
2114# else /* !PIC */
2115 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2116 : "=A" (u64Ret)
2117 , "=m" (*pu64)
2118 : "b" ((uint32_t)u64New)
2119 , "c" ((uint32_t)(u64New >> 32))
2120 , "m" (*pu64)
2121 , "0" (u64Old)
2122 : "cc");
2123# endif
2124 *pu64Old = u64Ret;
2125 return u64Ret == u64Old;
2126# else
2127 uint32_t u32Ret;
2128 __asm
2129 {
2130 mov ebx, dword ptr [u64New]
2131 mov ecx, dword ptr [u64New + 4]
2132 mov edi, [pu64]
2133 mov eax, dword ptr [u64Old]
2134 mov edx, dword ptr [u64Old + 4]
2135 lock cmpxchg8b [edi]
2136 mov ebx, [pu64Old]
2137 mov [ebx], eax
2138 setz al
2139 movzx eax, al
2140 add ebx, 4
2141 mov [ebx], edx
2142 mov dword ptr [u32Ret], eax
2143 }
2144 return !!u32Ret;
2145# endif
2146
2147# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2148 union { uint32_t u; bool f; } fXchg;
2149 uint64_t u64ActualOld;
2150 uint32_t rcSpill;
2151 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
2152 RTASM_ARM_DMB_SY
2153# if defined(RT_ARCH_ARM64)
2154 "ldaxr %[uOld], %[pMem]\n\t"
2155 "cmp %[uOld], %[uCmp]\n\t"
2156 "bne 1f\n\t" /* stop here if not equal */
2157 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
2158 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2159 "mov %w[fXchg], #1\n\t"
2160# else
2161 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
2162 "teq %[uOld], %[uCmp]\n\t"
2163 "teqeq %H[uOld], %H[uCmp]\n\t"
2164 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
2165 "bne 1f\n\t" /* stop here if not equal */
2166 "cmp %[rc], #0\n\t"
2167 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2168 "mov %[fXchg], #1\n\t"
2169# endif
2170 "1:\n\t"
2171 : [pMem] "+Q" (*pu64)
2172 , [uOld] "=&r" (u64ActualOld)
2173 , [rc] "=&r" (rcSpill)
2174 , [fXchg] "=&r" (fXchg.u)
2175 : [uCmp] "r" (u64Old)
2176 , [uNew] "r" (u64New)
2177 , "[fXchg]" (0)
2178 RTASM_ARM_DMB_SY_COMMA_IN_REG
2179 : "cc");
2180 *pu64Old = u64ActualOld;
2181 return fXchg.f;
2182
2183# else
2184# error "Port me"
2185# endif
2186}
2187#endif
2188
2189
2190/**
2191 * Atomically Compare and exchange a signed 64-bit value, additionally
2192 * passing back old value, ordered.
2193 *
2194 * @returns true if xchg was done.
2195 * @returns false if xchg wasn't done.
2196 *
2197 * @param pi64 Pointer to the 64-bit variable to update.
2198 * @param i64 The 64-bit value to assign to *pu64.
2199 * @param i64Old The value to compare with.
2200 * @param pi64Old Pointer store the old value at.
2201 *
2202 * @remarks x86: Requires a Pentium or later.
2203 */
2204DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
2205{
2206 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
2207}
2208
2209#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
2210
2211/** @def RTASM_HAVE_CMP_XCHG_U128
2212 * Indicates that we've got ASMAtomicCmpSwapU128(), ASMAtomicCmpSwapU128v2()
2213 * and ASMAtomicCmpSwapExU128() available. */
2214# define RTASM_HAVE_CMP_XCHG_U128 1
2215
2216
2217/**
2218 * Atomically compare and exchange an unsigned 128-bit value, ordered.
2219 *
2220 * @returns true if exchange was done.
2221 * @returns false if exchange wasn't done.
2222 *
2223 * @param pu128 Pointer to the 128-bit variable to update.
2224 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
2225 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
2226 * @param u64OldHi The high 64-bit of the value to compare with.
2227 * @param u64OldLo The low 64-bit of the value to compare with.
2228 * @param pu128Old Where to return the old value.
2229 *
2230 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
2231 */
2232# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
2233DECLASM(bool) ASMAtomicCmpXchgU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
2234 const uint64_t u64OldHi, const uint64_t u64OldLo, uint128_t *pu128Old) RT_NOTHROW_PROTO;
2235# else
2236DECLINLINE(bool) ASMAtomicCmpXchgU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
2237 const uint64_t u64OldHi, const uint64_t u64OldLo, uint128_t *pu128Old) RT_NOTHROW_DEF
2238{
2239# if RT_INLINE_ASM_USES_INTRIN
2240 pu128Old->Hi = u64OldHi;
2241 pu128Old->Lo = u64OldLo;
2242 AssertCompileMemberOffset(uint128_t, Lo, 0);
2243 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, (__int64 *)&pu128Old->Lo) != 0;
2244
2245# elif (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2246 uint128_t const uCmp = ((uint128_t)u64OldHi << 64) | u64OldLo;
2247 uint128_t const uOld = __sync_val_compare_and_swap(pu128, uCmp, ((uint128_t)u64NewHi << 64) | u64NewLo);
2248 *pu128Old = uOld;
2249 return uCmp == uOld;
2250
2251# elif defined(RT_ARCH_AMD64)
2252# if RT_INLINE_ASM_GNU_STYLE
2253 uint8_t bRet;
2254 uint64_t u64RetHi, u64RetLo;
2255 __asm__ __volatile__("lock; cmpxchg16b %3\n\t"
2256 "setz %b0\n\t"
2257 : "=r" (bRet)
2258 , "=a" (u64RetLo)
2259 , "=d" (u64RetHi)
2260 , "+m" (*pu128)
2261 : "a" (u64OldLo)
2262 , "d" (u64OldHi)
2263 , "b" (u64NewLo)
2264 , "c" (u64NewHi)
2265 : "cc");
2266 *pu128Old = ((uint128_t)u64RetHi << 64) | u64RetLo;
2267 return (bool)bRet;
2268# else
2269# error "Port me"
2270# endif
2271# else
2272# error "Port me"
2273# endif
2274}
2275# endif
2276
2277
2278/**
2279 * Atomically compare and exchange an unsigned 128-bit value, ordered.
2280 *
2281 * @returns true if exchange was done.
2282 * @returns false if exchange wasn't done.
2283 *
2284 * @param pu128 Pointer to the 128-bit variable to update.
2285 * @param u128New The 128-bit value to assign to *pu128.
2286 * @param u128Old The value to compare with.
2287 * @param pu128Old Where to return the old value.
2288 *
2289 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
2290 */
2291DECLINLINE(bool) ASMAtomicCmpXchgU128(volatile uint128_t *pu128, const uint128_t u128New,
2292 const uint128_t u128Old, uint128_t *pu128Old) RT_NOTHROW_DEF
2293{
2294# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
2295# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2296 uint128_t const uSwapped = __sync_val_compare_and_swap(pu128, u128Old, u128New);
2297 *pu128Old = uSwapped;
2298 return uSwapped == u128Old;
2299# else
2300 return ASMAtomicCmpXchgU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
2301 (uint64_t)(u128Old >> 64), (uint64_t)u128Old, pu128Old);
2302# endif
2303# else
2304 return ASMAtomicCmpXchgU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo, pu128Old);
2305# endif
2306}
2307
2308
2309/**
2310 * RTUINT128U wrapper for ASMAtomicCmpXchgU128.
2311 */
2312DECLINLINE(bool) ASMAtomicCmpXchgU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
2313 const RTUINT128U u128Old, PRTUINT128U pu128Old) RT_NOTHROW_DEF
2314{
2315# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2316 return ASMAtomicCmpXchgU128(&pu128->u, u128New.u, u128Old.u, &pu128Old->u);
2317# else
2318 return ASMAtomicCmpXchgU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo, &pu128Old->u);
2319# endif
2320}
2321
2322#endif /* RT_ARCH_AMD64 || RT_ARCH_ARM64 */
2323
2324
2325
2326/** @def ASMAtomicCmpXchgExHandle
2327 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2328 *
2329 * @param ph Pointer to the value to update.
2330 * @param hNew The new value to assigned to *pu.
2331 * @param hOld The old value to *pu compare with.
2332 * @param fRc Where to store the result.
2333 * @param phOldVal Pointer to where to store the old value.
2334 *
2335 * @remarks This doesn't currently work for all handles (like RTFILE).
2336 */
2337#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2338# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2339 do { \
2340 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
2341 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
2342 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(ph), (uint32_t)(hNew), (uint32_t)(hOld), (uint32_t RT_FAR *)(phOldVal)); \
2343 } while (0)
2344#elif HC_ARCH_BITS == 64
2345# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2346 do { \
2347 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2348 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
2349 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(ph), (uint64_t)(hNew), (uint64_t)(hOld), (uint64_t RT_FAR *)(phOldVal)); \
2350 } while (0)
2351#else
2352# error HC_ARCH_BITS
2353#endif
2354
2355
2356/** @def ASMAtomicCmpXchgExSize
2357 * Atomically Compare and Exchange a value which size might differ
2358 * between platforms or compilers. Additionally passes back old value.
2359 *
2360 * @param pu Pointer to the value to update.
2361 * @param uNew The new value to assigned to *pu.
2362 * @param uOld The old value to *pu compare with.
2363 * @param fRc Where to store the result.
2364 * @param puOldVal Pointer to where to store the old value.
2365 *
2366 * @remarks x86: Requires a 486 or later.
2367 */
2368#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
2369 do { \
2370 switch (sizeof(*(pu))) { \
2371 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
2372 break; \
2373 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
2374 break; \
2375 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2376 (fRc) = false; \
2377 (uOldVal) = 0; \
2378 break; \
2379 } \
2380 } while (0)
2381
2382
2383/**
2384 * Atomically Compare and Exchange a pointer value, additionally
2385 * passing back old value, ordered.
2386 *
2387 * @returns true if xchg was done.
2388 * @returns false if xchg wasn't done.
2389 *
2390 * @param ppv Pointer to the value to update.
2391 * @param pvNew The new value to assigned to *ppv.
2392 * @param pvOld The old value to *ppv compare with.
2393 * @param ppvOld Pointer store the old value at.
2394 *
2395 * @remarks x86: Requires a 486 or later.
2396 */
2397DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
2398 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
2399{
2400#if ARCH_BITS == 32 || ARCH_BITS == 16
2401 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
2402#elif ARCH_BITS == 64
2403 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
2404#else
2405# error "ARCH_BITS is bogus"
2406#endif
2407}
2408
2409
2410/**
2411 * Atomically Compare and Exchange a pointer value, additionally
2412 * passing back old value, ordered.
2413 *
2414 * @returns true if xchg was done.
2415 * @returns false if xchg wasn't done.
2416 *
2417 * @param ppv Pointer to the value to update.
2418 * @param pvNew The new value to assigned to *ppv.
2419 * @param pvOld The old value to *ppv compare with.
2420 * @param ppvOld Pointer store the old value at.
2421 *
2422 * @remarks This is relatively type safe on GCC platforms.
2423 * @remarks x86: Requires a 486 or later.
2424 */
2425#ifdef __GNUC__
2426# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2427 __extension__ \
2428 ({\
2429 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2430 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
2431 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
2432 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
2433 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
2434 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
2435 (void **)ppvOldTypeChecked); \
2436 fMacroRet; \
2437 })
2438#else
2439# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2440 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
2441#endif
2442
2443
2444/**
2445 * Virtualization unfriendly serializing instruction, always exits.
2446 */
2447#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2448RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
2449#else
2450DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
2451{
2452# if RT_INLINE_ASM_GNU_STYLE
2453 RTCCUINTREG xAX = 0;
2454# ifdef RT_ARCH_AMD64
2455 __asm__ __volatile__ ("cpuid"
2456 : "=a" (xAX)
2457 : "0" (xAX)
2458 : "rbx", "rcx", "rdx", "memory");
2459# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
2460 __asm__ __volatile__ ("push %%ebx\n\t"
2461 "cpuid\n\t"
2462 "pop %%ebx\n\t"
2463 : "=a" (xAX)
2464 : "0" (xAX)
2465 : "ecx", "edx", "memory");
2466# else
2467 __asm__ __volatile__ ("cpuid"
2468 : "=a" (xAX)
2469 : "0" (xAX)
2470 : "ebx", "ecx", "edx", "memory");
2471# endif
2472
2473# elif RT_INLINE_ASM_USES_INTRIN
2474 int aInfo[4];
2475 _ReadWriteBarrier();
2476 __cpuid(aInfo, 0);
2477
2478# else
2479 __asm
2480 {
2481 push ebx
2482 xor eax, eax
2483 cpuid
2484 pop ebx
2485 }
2486# endif
2487}
2488#endif
2489
2490/**
2491 * Virtualization friendly serializing instruction, though more expensive.
2492 */
2493#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2494RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
2495#else
2496DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
2497{
2498# if RT_INLINE_ASM_GNU_STYLE
2499# ifdef RT_ARCH_AMD64
2500 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
2501 "subq $128, %%rsp\n\t" /*redzone*/
2502 "mov %%ss, %%eax\n\t"
2503 "pushq %%rax\n\t"
2504 "pushq %%r10\n\t"
2505 "pushfq\n\t"
2506 "movl %%cs, %%eax\n\t"
2507 "pushq %%rax\n\t"
2508 "leaq 1f(%%rip), %%rax\n\t"
2509 "pushq %%rax\n\t"
2510 "iretq\n\t"
2511 "1:\n\t"
2512 ::: "rax", "r10", "memory", "cc");
2513# else
2514 __asm__ __volatile__ ("pushfl\n\t"
2515 "pushl %%cs\n\t"
2516 "pushl $1f\n\t"
2517 "iretl\n\t"
2518 "1:\n\t"
2519 ::: "memory");
2520# endif
2521
2522# else
2523 __asm
2524 {
2525 pushfd
2526 push cs
2527 push la_ret
2528 iretd
2529 la_ret:
2530 }
2531# endif
2532}
2533#endif
2534
2535/**
2536 * Virtualization friendlier serializing instruction, may still cause exits.
2537 */
2538#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < RT_MSC_VER_VS2008) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2539RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2540#else
2541DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2542{
2543# if RT_INLINE_ASM_GNU_STYLE
2544 /* rdtscp is not supported by ancient linux build VM of course :-( */
2545# ifdef RT_ARCH_AMD64
2546 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2547 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2548# else
2549 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2550 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2551# endif
2552# else
2553# if RT_INLINE_ASM_USES_INTRIN >= RT_MSC_VER_VS2008
2554 uint32_t uIgnore;
2555 _ReadWriteBarrier();
2556 (void)__rdtscp(&uIgnore);
2557 (void)uIgnore;
2558# else
2559 __asm
2560 {
2561 rdtscp
2562 }
2563# endif
2564# endif
2565}
2566#endif
2567
2568
2569/**
2570 * Serialize Instruction (both data store and instruction flush).
2571 */
2572#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2573# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2574#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2575# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2576#elif defined(RT_ARCH_SPARC64)
2577RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2578#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2579DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2580{
2581 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2582}
2583#else
2584# error "Port me"
2585#endif
2586
2587
2588/**
2589 * Memory fence, waits for any pending writes and reads to complete.
2590 * @note No implicit compiler barrier (which is probably stupid).
2591 */
2592DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2593{
2594#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2595# if RT_INLINE_ASM_GNU_STYLE
2596 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2597# elif RT_INLINE_ASM_USES_INTRIN
2598 _mm_mfence();
2599# else
2600 __asm
2601 {
2602 _emit 0x0f
2603 _emit 0xae
2604 _emit 0xf0
2605 }
2606# endif
2607#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2608 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2609#elif ARCH_BITS == 16
2610 uint16_t volatile u16;
2611 ASMAtomicXchgU16(&u16, 0);
2612#else
2613 uint32_t volatile u32;
2614 ASMAtomicXchgU32(&u32, 0);
2615#endif
2616}
2617
2618
2619/**
2620 * Write fence, waits for any pending writes to complete.
2621 * @note No implicit compiler barrier (which is probably stupid).
2622 */
2623DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2624{
2625#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2626# if RT_INLINE_ASM_GNU_STYLE
2627 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2628# elif RT_INLINE_ASM_USES_INTRIN
2629 _mm_sfence();
2630# else
2631 __asm
2632 {
2633 _emit 0x0f
2634 _emit 0xae
2635 _emit 0xf8
2636 }
2637# endif
2638#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2639 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2640#else
2641 ASMMemoryFence();
2642#endif
2643}
2644
2645
2646/**
2647 * Read fence, waits for any pending reads to complete.
2648 * @note No implicit compiler barrier (which is probably stupid).
2649 */
2650DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2651{
2652#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2653# if RT_INLINE_ASM_GNU_STYLE
2654 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2655# elif RT_INLINE_ASM_USES_INTRIN
2656 _mm_lfence();
2657# else
2658 __asm
2659 {
2660 _emit 0x0f
2661 _emit 0xae
2662 _emit 0xe8
2663 }
2664# endif
2665#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2666 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2667#else
2668 ASMMemoryFence();
2669#endif
2670}
2671
2672
2673/**
2674 * Atomically reads an unsigned 8-bit value, ordered.
2675 *
2676 * @returns Current *pu8 value
2677 * @param pu8 Pointer to the 8-bit variable to read.
2678 */
2679DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2680{
2681#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2682 uint32_t u32;
2683 __asm__ __volatile__(".Lstart_ASMAtomicReadU8_%=:\n\t"
2684 RTASM_ARM_DMB_SY
2685# if defined(RT_ARCH_ARM64)
2686 "ldxrb %w[uDst], %[pMem]\n\t"
2687# else
2688 "ldrexb %[uDst], %[pMem]\n\t"
2689# endif
2690 : [uDst] "=&r" (u32)
2691 : [pMem] "Q" (*pu8)
2692 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2693 return (uint8_t)u32;
2694#else
2695 ASMMemoryFence();
2696 return *pu8; /* byte reads are atomic on x86 */
2697#endif
2698}
2699
2700
2701/**
2702 * Atomically reads an unsigned 8-bit value, unordered.
2703 *
2704 * @returns Current *pu8 value
2705 * @param pu8 Pointer to the 8-bit variable to read.
2706 */
2707DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2708{
2709#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2710 uint32_t u32;
2711 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU8_%=:\n\t"
2712# if defined(RT_ARCH_ARM64)
2713 "ldxrb %w[uDst], %[pMem]\n\t"
2714# else
2715 "ldrexb %[uDst], %[pMem]\n\t"
2716# endif
2717 : [uDst] "=&r" (u32)
2718 : [pMem] "Q" (*pu8));
2719 return (uint8_t)u32;
2720#else
2721 return *pu8; /* byte reads are atomic on x86 */
2722#endif
2723}
2724
2725
2726/**
2727 * Atomically reads a signed 8-bit value, ordered.
2728 *
2729 * @returns Current *pi8 value
2730 * @param pi8 Pointer to the 8-bit variable to read.
2731 */
2732DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2733{
2734 ASMMemoryFence();
2735#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2736 int32_t i32;
2737 __asm__ __volatile__(".Lstart_ASMAtomicReadS8_%=:\n\t"
2738 RTASM_ARM_DMB_SY
2739# if defined(RT_ARCH_ARM64)
2740 "ldxrb %w[iDst], %[pMem]\n\t"
2741# else
2742 "ldrexb %[iDst], %[pMem]\n\t"
2743# endif
2744 : [iDst] "=&r" (i32)
2745 : [pMem] "Q" (*pi8)
2746 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2747 return (int8_t)i32;
2748#else
2749 return *pi8; /* byte reads are atomic on x86 */
2750#endif
2751}
2752
2753
2754/**
2755 * Atomically reads a signed 8-bit value, unordered.
2756 *
2757 * @returns Current *pi8 value
2758 * @param pi8 Pointer to the 8-bit variable to read.
2759 */
2760DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2761{
2762#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2763 int32_t i32;
2764 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS8_%=:\n\t"
2765# if defined(RT_ARCH_ARM64)
2766 "ldxrb %w[iDst], %[pMem]\n\t"
2767# else
2768 "ldrexb %[iDst], %[pMem]\n\t"
2769# endif
2770 : [iDst] "=&r" (i32)
2771 : [pMem] "Q" (*pi8));
2772 return (int8_t)i32;
2773#else
2774 return *pi8; /* byte reads are atomic on x86 */
2775#endif
2776}
2777
2778
2779/**
2780 * Atomically reads an unsigned 16-bit value, ordered.
2781 *
2782 * @returns Current *pu16 value
2783 * @param pu16 Pointer to the 16-bit variable to read.
2784 */
2785DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2786{
2787 Assert(!((uintptr_t)pu16 & 1));
2788#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2789 uint32_t u32;
2790 __asm__ __volatile__(".Lstart_ASMAtomicReadU16_%=:\n\t"
2791 RTASM_ARM_DMB_SY
2792# if defined(RT_ARCH_ARM64)
2793 "ldxrh %w[uDst], %[pMem]\n\t"
2794# else
2795 "ldrexh %[uDst], %[pMem]\n\t"
2796# endif
2797 : [uDst] "=&r" (u32)
2798 : [pMem] "Q" (*pu16)
2799 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2800 return (uint16_t)u32;
2801#else
2802 ASMMemoryFence();
2803 return *pu16;
2804#endif
2805}
2806
2807
2808/**
2809 * Atomically reads an unsigned 16-bit value, unordered.
2810 *
2811 * @returns Current *pu16 value
2812 * @param pu16 Pointer to the 16-bit variable to read.
2813 */
2814DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2815{
2816 Assert(!((uintptr_t)pu16 & 1));
2817#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2818 uint32_t u32;
2819 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU16_%=:\n\t"
2820# if defined(RT_ARCH_ARM64)
2821 "ldxrh %w[uDst], %[pMem]\n\t"
2822# else
2823 "ldrexh %[uDst], %[pMem]\n\t"
2824# endif
2825 : [uDst] "=&r" (u32)
2826 : [pMem] "Q" (*pu16));
2827 return (uint16_t)u32;
2828#else
2829 return *pu16;
2830#endif
2831}
2832
2833
2834/**
2835 * Atomically reads a signed 16-bit value, ordered.
2836 *
2837 * @returns Current *pi16 value
2838 * @param pi16 Pointer to the 16-bit variable to read.
2839 */
2840DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2841{
2842 Assert(!((uintptr_t)pi16 & 1));
2843#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2844 int32_t i32;
2845 __asm__ __volatile__(".Lstart_ASMAtomicReadS16_%=:\n\t"
2846 RTASM_ARM_DMB_SY
2847# if defined(RT_ARCH_ARM64)
2848 "ldxrh %w[iDst], %[pMem]\n\t"
2849# else
2850 "ldrexh %[iDst], %[pMem]\n\t"
2851# endif
2852 : [iDst] "=&r" (i32)
2853 : [pMem] "Q" (*pi16)
2854 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2855 return (int16_t)i32;
2856#else
2857 ASMMemoryFence();
2858 return *pi16;
2859#endif
2860}
2861
2862
2863/**
2864 * Atomically reads a signed 16-bit value, unordered.
2865 *
2866 * @returns Current *pi16 value
2867 * @param pi16 Pointer to the 16-bit variable to read.
2868 */
2869DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2870{
2871 Assert(!((uintptr_t)pi16 & 1));
2872#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2873 int32_t i32;
2874 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS16_%=:\n\t"
2875# if defined(RT_ARCH_ARM64)
2876 "ldxrh %w[iDst], %[pMem]\n\t"
2877# else
2878 "ldrexh %[iDst], %[pMem]\n\t"
2879# endif
2880 : [iDst] "=&r" (i32)
2881 : [pMem] "Q" (*pi16));
2882 return (int16_t)i32;
2883#else
2884 return *pi16;
2885#endif
2886}
2887
2888
2889/**
2890 * Atomically reads an unsigned 32-bit value, ordered.
2891 *
2892 * @returns Current *pu32 value
2893 * @param pu32 Pointer to the 32-bit variable to read.
2894 */
2895DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2896{
2897 Assert(!((uintptr_t)pu32 & 3));
2898#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2899 uint32_t u32;
2900 __asm__ __volatile__(".Lstart_ASMAtomicReadU32_%=:\n\t"
2901 RTASM_ARM_DMB_SY
2902# if defined(RT_ARCH_ARM64)
2903 "ldxr %w[uDst], %[pMem]\n\t"
2904# else
2905 "ldrex %[uDst], %[pMem]\n\t"
2906# endif
2907 : [uDst] "=&r" (u32)
2908 : [pMem] "Q" (*pu32)
2909 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2910 return u32;
2911#else
2912 ASMMemoryFence();
2913# if ARCH_BITS == 16
2914 AssertFailed(); /** @todo 16-bit */
2915# endif
2916 return *pu32;
2917#endif
2918}
2919
2920
2921/**
2922 * Atomically reads an unsigned 32-bit value, unordered.
2923 *
2924 * @returns Current *pu32 value
2925 * @param pu32 Pointer to the 32-bit variable to read.
2926 */
2927DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2928{
2929 Assert(!((uintptr_t)pu32 & 3));
2930#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2931 uint32_t u32;
2932 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU32_%=:\n\t"
2933# if defined(RT_ARCH_ARM64)
2934 "ldxr %w[uDst], %[pMem]\n\t"
2935# else
2936 "ldrex %[uDst], %[pMem]\n\t"
2937# endif
2938 : [uDst] "=&r" (u32)
2939 : [pMem] "Q" (*pu32));
2940 return u32;
2941#else
2942# if ARCH_BITS == 16
2943 AssertFailed(); /** @todo 16-bit */
2944# endif
2945 return *pu32;
2946#endif
2947}
2948
2949
2950/**
2951 * Atomically reads a signed 32-bit value, ordered.
2952 *
2953 * @returns Current *pi32 value
2954 * @param pi32 Pointer to the 32-bit variable to read.
2955 */
2956DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2957{
2958 Assert(!((uintptr_t)pi32 & 3));
2959#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2960 int32_t i32;
2961 __asm__ __volatile__(".Lstart_ASMAtomicReadS32_%=:\n\t"
2962 RTASM_ARM_DMB_SY
2963# if defined(RT_ARCH_ARM64)
2964 "ldxr %w[iDst], %[pMem]\n\t"
2965# else
2966 "ldrex %[iDst], %[pMem]\n\t"
2967# endif
2968 : [iDst] "=&r" (i32)
2969 : [pMem] "Q" (*pi32)
2970 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2971 return i32;
2972#else
2973 ASMMemoryFence();
2974# if ARCH_BITS == 16
2975 AssertFailed(); /** @todo 16-bit */
2976# endif
2977 return *pi32;
2978#endif
2979}
2980
2981
2982/**
2983 * Atomically reads a signed 32-bit value, unordered.
2984 *
2985 * @returns Current *pi32 value
2986 * @param pi32 Pointer to the 32-bit variable to read.
2987 */
2988DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2989{
2990 Assert(!((uintptr_t)pi32 & 3));
2991#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2992 int32_t i32;
2993 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS32_%=:\n\t"
2994# if defined(RT_ARCH_ARM64)
2995 "ldxr %w[iDst], %[pMem]\n\t"
2996# else
2997 "ldrex %[iDst], %[pMem]\n\t"
2998# endif
2999 : [iDst] "=&r" (i32)
3000 : [pMem] "Q" (*pi32));
3001 return i32;
3002
3003#else
3004# if ARCH_BITS == 16
3005 AssertFailed(); /** @todo 16-bit */
3006# endif
3007 return *pi32;
3008#endif
3009}
3010
3011
3012/**
3013 * Atomically reads an unsigned 64-bit value, ordered.
3014 *
3015 * @returns Current *pu64 value
3016 * @param pu64 Pointer to the 64-bit variable to read.
3017 * The memory pointed to must be writable.
3018 *
3019 * @remarks This may fault if the memory is read-only!
3020 * @remarks x86: Requires a Pentium or later.
3021 */
3022#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
3023 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
3024RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
3025#else
3026DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
3027{
3028 uint64_t u64;
3029# ifdef RT_ARCH_AMD64
3030 Assert(!((uintptr_t)pu64 & 7));
3031/*# if RT_INLINE_ASM_GNU_STYLE
3032 __asm__ __volatile__( "mfence\n\t"
3033 "movq %1, %0\n\t"
3034 : "=r" (u64)
3035 : "m" (*pu64));
3036# else
3037 __asm
3038 {
3039 mfence
3040 mov rdx, [pu64]
3041 mov rax, [rdx]
3042 mov [u64], rax
3043 }
3044# endif*/
3045 ASMMemoryFence();
3046 u64 = *pu64;
3047
3048# elif defined(RT_ARCH_X86)
3049# if RT_INLINE_ASM_GNU_STYLE
3050# if defined(PIC) || defined(__PIC__)
3051 uint32_t u32EBX = 0;
3052 Assert(!((uintptr_t)pu64 & 7));
3053 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3054 "lock; cmpxchg8b (%5)\n\t"
3055 "movl %3, %%ebx\n\t"
3056 : "=A" (u64)
3057# if RT_GNUC_PREREQ(4, 3)
3058 , "+m" (*pu64)
3059# else
3060 , "=m" (*pu64)
3061# endif
3062 : "0" (0ULL)
3063 , "m" (u32EBX)
3064 , "c" (0)
3065 , "S" (pu64)
3066 : "cc");
3067# else /* !PIC */
3068 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3069 : "=A" (u64)
3070 , "+m" (*pu64)
3071 : "0" (0ULL)
3072 , "b" (0)
3073 , "c" (0)
3074 : "cc");
3075# endif
3076# else
3077 Assert(!((uintptr_t)pu64 & 7));
3078 __asm
3079 {
3080 xor eax, eax
3081 xor edx, edx
3082 mov edi, pu64
3083 xor ecx, ecx
3084 xor ebx, ebx
3085 lock cmpxchg8b [edi]
3086 mov dword ptr [u64], eax
3087 mov dword ptr [u64 + 4], edx
3088 }
3089# endif
3090
3091# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3092 Assert(!((uintptr_t)pu64 & 7));
3093 __asm__ __volatile__(".Lstart_ASMAtomicReadU64_%=:\n\t"
3094 RTASM_ARM_DMB_SY
3095# if defined(RT_ARCH_ARM64)
3096 "ldxr %[uDst], %[pMem]\n\t"
3097# else
3098 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3099# endif
3100 : [uDst] "=&r" (u64)
3101 : [pMem] "Q" (*pu64)
3102 RTASM_ARM_DMB_SY_COMMA_IN_REG);
3103
3104# else
3105# error "Port me"
3106# endif
3107 return u64;
3108}
3109#endif
3110
3111
3112/**
3113 * Atomically reads an unsigned 64-bit value, unordered.
3114 *
3115 * @returns Current *pu64 value
3116 * @param pu64 Pointer to the 64-bit variable to read.
3117 * The memory pointed to must be writable.
3118 *
3119 * @remarks This may fault if the memory is read-only!
3120 * @remarks x86: Requires a Pentium or later.
3121 */
3122#if !defined(RT_ARCH_AMD64) \
3123 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
3124 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
3125RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
3126#else
3127DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
3128{
3129 uint64_t u64;
3130# ifdef RT_ARCH_AMD64
3131 Assert(!((uintptr_t)pu64 & 7));
3132/*# if RT_INLINE_ASM_GNU_STYLE
3133 Assert(!((uintptr_t)pu64 & 7));
3134 __asm__ __volatile__("movq %1, %0\n\t"
3135 : "=r" (u64)
3136 : "m" (*pu64));
3137# else
3138 __asm
3139 {
3140 mov rdx, [pu64]
3141 mov rax, [rdx]
3142 mov [u64], rax
3143 }
3144# endif */
3145 u64 = *pu64;
3146
3147# elif defined(RT_ARCH_X86)
3148# if RT_INLINE_ASM_GNU_STYLE
3149# if defined(PIC) || defined(__PIC__)
3150 uint32_t u32EBX = 0;
3151 uint32_t u32Spill;
3152 Assert(!((uintptr_t)pu64 & 7));
3153 __asm__ __volatile__("xor %%eax,%%eax\n\t"
3154 "xor %%ecx,%%ecx\n\t"
3155 "xor %%edx,%%edx\n\t"
3156 "xchgl %%ebx, %3\n\t"
3157 "lock; cmpxchg8b (%4)\n\t"
3158 "movl %3, %%ebx\n\t"
3159 : "=A" (u64)
3160# if RT_GNUC_PREREQ(4, 3)
3161 , "+m" (*pu64)
3162# else
3163 , "=m" (*pu64)
3164# endif
3165 , "=c" (u32Spill)
3166 : "m" (u32EBX)
3167 , "S" (pu64)
3168 : "cc");
3169# else /* !PIC */
3170 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3171 : "=A" (u64)
3172 , "+m" (*pu64)
3173 : "0" (0ULL)
3174 , "b" (0)
3175 , "c" (0)
3176 : "cc");
3177# endif
3178# else
3179 Assert(!((uintptr_t)pu64 & 7));
3180 __asm
3181 {
3182 xor eax, eax
3183 xor edx, edx
3184 mov edi, pu64
3185 xor ecx, ecx
3186 xor ebx, ebx
3187 lock cmpxchg8b [edi]
3188 mov dword ptr [u64], eax
3189 mov dword ptr [u64 + 4], edx
3190 }
3191# endif
3192
3193# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3194 Assert(!((uintptr_t)pu64 & 7));
3195 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU64_%=:\n\t"
3196# if defined(RT_ARCH_ARM64)
3197 "ldxr %[uDst], %[pMem]\n\t"
3198# else
3199 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3200# endif
3201 : [uDst] "=&r" (u64)
3202 : [pMem] "Q" (*pu64));
3203
3204# else
3205# error "Port me"
3206# endif
3207 return u64;
3208}
3209#endif
3210
3211
3212/**
3213 * Atomically reads a signed 64-bit value, ordered.
3214 *
3215 * @returns Current *pi64 value
3216 * @param pi64 Pointer to the 64-bit variable to read.
3217 * The memory pointed to must be writable.
3218 *
3219 * @remarks This may fault if the memory is read-only!
3220 * @remarks x86: Requires a Pentium or later.
3221 */
3222DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3223{
3224 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
3225}
3226
3227
3228/**
3229 * Atomically reads a signed 64-bit value, unordered.
3230 *
3231 * @returns Current *pi64 value
3232 * @param pi64 Pointer to the 64-bit variable to read.
3233 * The memory pointed to must be writable.
3234 *
3235 * @remarks This will fault if the memory is read-only!
3236 * @remarks x86: Requires a Pentium or later.
3237 */
3238DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3239{
3240 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
3241}
3242
3243
3244/**
3245 * Atomically reads a size_t value, ordered.
3246 *
3247 * @returns Current *pcb value
3248 * @param pcb Pointer to the size_t variable to read.
3249 */
3250DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3251{
3252#if ARCH_BITS == 64
3253 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
3254#elif ARCH_BITS == 32
3255 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
3256#elif ARCH_BITS == 16
3257 AssertCompileSize(size_t, 2);
3258 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
3259#else
3260# error "Unsupported ARCH_BITS value"
3261#endif
3262}
3263
3264
3265/**
3266 * Atomically reads a size_t value, unordered.
3267 *
3268 * @returns Current *pcb value
3269 * @param pcb Pointer to the size_t variable to read.
3270 */
3271DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3272{
3273#if ARCH_BITS == 64 || ARCH_BITS == 16
3274 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
3275#elif ARCH_BITS == 32
3276 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
3277#elif ARCH_BITS == 16
3278 AssertCompileSize(size_t, 2);
3279 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
3280#else
3281# error "Unsupported ARCH_BITS value"
3282#endif
3283}
3284
3285
3286/**
3287 * Atomically reads a pointer value, ordered.
3288 *
3289 * @returns Current *pv value
3290 * @param ppv Pointer to the pointer variable to read.
3291 *
3292 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
3293 * requires less typing (no casts).
3294 */
3295DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3296{
3297#if ARCH_BITS == 32 || ARCH_BITS == 16
3298 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3299#elif ARCH_BITS == 64
3300 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3301#else
3302# error "ARCH_BITS is bogus"
3303#endif
3304}
3305
3306/**
3307 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
3308 *
3309 * @returns Current *pv value
3310 * @param ppv Pointer to the pointer variable to read.
3311 * @param Type The type of *ppv, sans volatile.
3312 */
3313#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3314# define ASMAtomicReadPtrT(ppv, Type) \
3315 __extension__ \
3316 ({\
3317 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
3318 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
3319 pvTypeChecked; \
3320 })
3321#else
3322# define ASMAtomicReadPtrT(ppv, Type) \
3323 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3324#endif
3325
3326
3327/**
3328 * Atomically reads a pointer value, unordered.
3329 *
3330 * @returns Current *pv value
3331 * @param ppv Pointer to the pointer variable to read.
3332 *
3333 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
3334 * requires less typing (no casts).
3335 */
3336DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3337{
3338#if ARCH_BITS == 32 || ARCH_BITS == 16
3339 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3340#elif ARCH_BITS == 64
3341 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3342#else
3343# error "ARCH_BITS is bogus"
3344#endif
3345}
3346
3347
3348/**
3349 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
3350 *
3351 * @returns Current *pv value
3352 * @param ppv Pointer to the pointer variable to read.
3353 * @param Type The type of *ppv, sans volatile.
3354 */
3355#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3356# define ASMAtomicUoReadPtrT(ppv, Type) \
3357 __extension__ \
3358 ({\
3359 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3360 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
3361 pvTypeChecked; \
3362 })
3363#else
3364# define ASMAtomicUoReadPtrT(ppv, Type) \
3365 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3366#endif
3367
3368
3369/**
3370 * Atomically reads a boolean value, ordered.
3371 *
3372 * @returns Current *pf value
3373 * @param pf Pointer to the boolean variable to read.
3374 */
3375DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3376{
3377 ASMMemoryFence();
3378 return *pf; /* byte reads are atomic on x86 */
3379}
3380
3381
3382/**
3383 * Atomically reads a boolean value, unordered.
3384 *
3385 * @returns Current *pf value
3386 * @param pf Pointer to the boolean variable to read.
3387 */
3388DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3389{
3390 return *pf; /* byte reads are atomic on x86 */
3391}
3392
3393
3394/**
3395 * Atomically read a typical IPRT handle value, ordered.
3396 *
3397 * @param ph Pointer to the handle variable to read.
3398 * @param phRes Where to store the result.
3399 *
3400 * @remarks This doesn't currently work for all handles (like RTFILE).
3401 */
3402#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3403# define ASMAtomicReadHandle(ph, phRes) \
3404 do { \
3405 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3406 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3407 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
3408 } while (0)
3409#elif HC_ARCH_BITS == 64
3410# define ASMAtomicReadHandle(ph, phRes) \
3411 do { \
3412 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3413 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3414 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
3415 } while (0)
3416#else
3417# error HC_ARCH_BITS
3418#endif
3419
3420
3421/**
3422 * Atomically read a typical IPRT handle value, unordered.
3423 *
3424 * @param ph Pointer to the handle variable to read.
3425 * @param phRes Where to store the result.
3426 *
3427 * @remarks This doesn't currently work for all handles (like RTFILE).
3428 */
3429#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3430# define ASMAtomicUoReadHandle(ph, phRes) \
3431 do { \
3432 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3433 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3434 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
3435 } while (0)
3436#elif HC_ARCH_BITS == 64
3437# define ASMAtomicUoReadHandle(ph, phRes) \
3438 do { \
3439 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3440 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3441 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
3442 } while (0)
3443#else
3444# error HC_ARCH_BITS
3445#endif
3446
3447
3448/**
3449 * Atomically read a value which size might differ
3450 * between platforms or compilers, ordered.
3451 *
3452 * @param pu Pointer to the variable to read.
3453 * @param puRes Where to store the result.
3454 */
3455#define ASMAtomicReadSize(pu, puRes) \
3456 do { \
3457 switch (sizeof(*(pu))) { \
3458 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3459 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3460 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3461 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3462 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3463 } \
3464 } while (0)
3465
3466
3467/**
3468 * Atomically read a value which size might differ
3469 * between platforms or compilers, unordered.
3470 *
3471 * @param pu Pointer to the variable to read.
3472 * @param puRes Where to store the result.
3473 */
3474#define ASMAtomicUoReadSize(pu, puRes) \
3475 do { \
3476 switch (sizeof(*(pu))) { \
3477 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3478 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3479 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3480 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3481 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3482 } \
3483 } while (0)
3484
3485
3486/**
3487 * Atomically writes an unsigned 8-bit value, ordered.
3488 *
3489 * @param pu8 Pointer to the 8-bit variable.
3490 * @param u8 The 8-bit value to assign to *pu8.
3491 */
3492DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3493{
3494 /** @todo Any possible ARM32/ARM64 optimizations here? */
3495 ASMAtomicXchgU8(pu8, u8);
3496}
3497
3498
3499/**
3500 * Atomically writes an unsigned 8-bit value, unordered.
3501 *
3502 * @param pu8 Pointer to the 8-bit variable.
3503 * @param u8 The 8-bit value to assign to *pu8.
3504 */
3505DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3506{
3507 /** @todo Any possible ARM32/ARM64 improvements here? */
3508 *pu8 = u8; /* byte writes are atomic on x86 */
3509}
3510
3511
3512/**
3513 * Atomically writes a signed 8-bit value, ordered.
3514 *
3515 * @param pi8 Pointer to the 8-bit variable to read.
3516 * @param i8 The 8-bit value to assign to *pi8.
3517 */
3518DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3519{
3520 /** @todo Any possible ARM32/ARM64 optimizations here? */
3521 ASMAtomicXchgS8(pi8, i8);
3522}
3523
3524
3525/**
3526 * Atomically writes a signed 8-bit value, unordered.
3527 *
3528 * @param pi8 Pointer to the 8-bit variable to write.
3529 * @param i8 The 8-bit value to assign to *pi8.
3530 */
3531DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3532{
3533 *pi8 = i8; /* byte writes are atomic on x86 */
3534}
3535
3536
3537/**
3538 * Atomically writes an unsigned 16-bit value, ordered.
3539 *
3540 * @param pu16 Pointer to the 16-bit variable to write.
3541 * @param u16 The 16-bit value to assign to *pu16.
3542 */
3543DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3544{
3545 /** @todo Any possible ARM32/ARM64 optimizations here? */
3546 ASMAtomicXchgU16(pu16, u16);
3547}
3548
3549
3550/**
3551 * Atomically writes an unsigned 16-bit value, unordered.
3552 *
3553 * @param pu16 Pointer to the 16-bit variable to write.
3554 * @param u16 The 16-bit value to assign to *pu16.
3555 */
3556DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3557{
3558 Assert(!((uintptr_t)pu16 & 1));
3559 *pu16 = u16;
3560}
3561
3562
3563/**
3564 * Atomically writes a signed 16-bit value, ordered.
3565 *
3566 * @param pi16 Pointer to the 16-bit variable to write.
3567 * @param i16 The 16-bit value to assign to *pi16.
3568 */
3569DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3570{
3571 /** @todo Any possible ARM32/ARM64 optimizations here? */
3572 ASMAtomicXchgS16(pi16, i16);
3573}
3574
3575
3576/**
3577 * Atomically writes a signed 16-bit value, unordered.
3578 *
3579 * @param pi16 Pointer to the 16-bit variable to write.
3580 * @param i16 The 16-bit value to assign to *pi16.
3581 */
3582DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3583{
3584 Assert(!((uintptr_t)pi16 & 1));
3585 *pi16 = i16;
3586}
3587
3588
3589/**
3590 * Atomically writes an unsigned 32-bit value, ordered.
3591 *
3592 * @param pu32 Pointer to the 32-bit variable to write.
3593 * @param u32 The 32-bit value to assign to *pu32.
3594 */
3595DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3596{
3597 /** @todo Any possible ARM32/ARM64 optimizations here? */
3598 ASMAtomicXchgU32(pu32, u32);
3599}
3600
3601
3602/**
3603 * Atomically writes an unsigned 32-bit value, unordered.
3604 *
3605 * @param pu32 Pointer to the 32-bit variable to write.
3606 * @param u32 The 32-bit value to assign to *pu32.
3607 */
3608DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3609{
3610 Assert(!((uintptr_t)pu32 & 3));
3611#if ARCH_BITS >= 32
3612 *pu32 = u32;
3613#else
3614 ASMAtomicXchgU32(pu32, u32);
3615#endif
3616}
3617
3618
3619/**
3620 * Atomically writes a signed 32-bit value, ordered.
3621 *
3622 * @param pi32 Pointer to the 32-bit variable to write.
3623 * @param i32 The 32-bit value to assign to *pi32.
3624 */
3625DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3626{
3627 ASMAtomicXchgS32(pi32, i32);
3628}
3629
3630
3631/**
3632 * Atomically writes a signed 32-bit value, unordered.
3633 *
3634 * @param pi32 Pointer to the 32-bit variable to write.
3635 * @param i32 The 32-bit value to assign to *pi32.
3636 */
3637DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3638{
3639 Assert(!((uintptr_t)pi32 & 3));
3640#if ARCH_BITS >= 32
3641 *pi32 = i32;
3642#else
3643 ASMAtomicXchgS32(pi32, i32);
3644#endif
3645}
3646
3647
3648/**
3649 * Atomically writes an unsigned 64-bit value, ordered.
3650 *
3651 * @param pu64 Pointer to the 64-bit variable to write.
3652 * @param u64 The 64-bit value to assign to *pu64.
3653 */
3654DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3655{
3656 /** @todo Any possible ARM32/ARM64 optimizations here? */
3657 ASMAtomicXchgU64(pu64, u64);
3658}
3659
3660
3661/**
3662 * Atomically writes an unsigned 64-bit value, unordered.
3663 *
3664 * @param pu64 Pointer to the 64-bit variable to write.
3665 * @param u64 The 64-bit value to assign to *pu64.
3666 */
3667DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3668{
3669 Assert(!((uintptr_t)pu64 & 7));
3670#if ARCH_BITS == 64
3671 *pu64 = u64;
3672#else
3673 ASMAtomicXchgU64(pu64, u64);
3674#endif
3675}
3676
3677
3678/**
3679 * Atomically writes a signed 64-bit value, ordered.
3680 *
3681 * @param pi64 Pointer to the 64-bit variable to write.
3682 * @param i64 The 64-bit value to assign to *pi64.
3683 */
3684DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3685{
3686 /** @todo Any possible ARM32/ARM64 optimizations here? */
3687 ASMAtomicXchgS64(pi64, i64);
3688}
3689
3690
3691/**
3692 * Atomically writes a signed 64-bit value, unordered.
3693 *
3694 * @param pi64 Pointer to the 64-bit variable to write.
3695 * @param i64 The 64-bit value to assign to *pi64.
3696 */
3697DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3698{
3699 Assert(!((uintptr_t)pi64 & 7));
3700#if ARCH_BITS == 64
3701 *pi64 = i64;
3702#else
3703 ASMAtomicXchgS64(pi64, i64);
3704#endif
3705}
3706
3707
3708/**
3709 * Atomically writes a size_t value, ordered.
3710 *
3711 * @param pcb Pointer to the size_t variable to write.
3712 * @param cb The value to assign to *pcb.
3713 */
3714DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3715{
3716#if ARCH_BITS == 64
3717 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3718#elif ARCH_BITS == 32
3719 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3720#elif ARCH_BITS == 16
3721 AssertCompileSize(size_t, 2);
3722 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3723#else
3724# error "Unsupported ARCH_BITS value"
3725#endif
3726}
3727
3728
3729/**
3730 * Atomically writes a size_t value, unordered.
3731 *
3732 * @param pcb Pointer to the size_t variable to write.
3733 * @param cb The value to assign to *pcb.
3734 */
3735DECLINLINE(void) ASMAtomicUoWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3736{
3737#if ARCH_BITS == 64
3738 ASMAtomicUoWriteU64((uint64_t volatile *)pcb, cb);
3739#elif ARCH_BITS == 32
3740 ASMAtomicUoWriteU32((uint32_t volatile *)pcb, cb);
3741#elif ARCH_BITS == 16
3742 AssertCompileSize(size_t, 2);
3743 ASMAtomicUoWriteU16((uint16_t volatile *)pcb, cb);
3744#else
3745# error "Unsupported ARCH_BITS value"
3746#endif
3747}
3748
3749
3750/**
3751 * Atomically writes a boolean value, unordered.
3752 *
3753 * @param pf Pointer to the boolean variable to write.
3754 * @param f The boolean value to assign to *pf.
3755 */
3756DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3757{
3758 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3759}
3760
3761
3762/**
3763 * Atomically writes a boolean value, unordered.
3764 *
3765 * @param pf Pointer to the boolean variable to write.
3766 * @param f The boolean value to assign to *pf.
3767 */
3768DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3769{
3770 *pf = f; /* byte writes are atomic on x86 */
3771}
3772
3773
3774/**
3775 * Atomically writes a pointer value, ordered.
3776 *
3777 * @param ppv Pointer to the pointer variable to write.
3778 * @param pv The pointer value to assign to *ppv.
3779 */
3780DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3781{
3782#if ARCH_BITS == 32 || ARCH_BITS == 16
3783 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3784#elif ARCH_BITS == 64
3785 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3786#else
3787# error "ARCH_BITS is bogus"
3788#endif
3789}
3790
3791
3792/**
3793 * Atomically writes a pointer value, unordered.
3794 *
3795 * @param ppv Pointer to the pointer variable to write.
3796 * @param pv The pointer value to assign to *ppv.
3797 */
3798DECLINLINE(void) ASMAtomicUoWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3799{
3800#if ARCH_BITS == 32 || ARCH_BITS == 16
3801 ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3802#elif ARCH_BITS == 64
3803 ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3804#else
3805# error "ARCH_BITS is bogus"
3806#endif
3807}
3808
3809
3810/**
3811 * Atomically writes a pointer value, ordered.
3812 *
3813 * @param ppv Pointer to the pointer variable to write.
3814 * @param pv The pointer value to assign to *ppv. If NULL use
3815 * ASMAtomicWriteNullPtr or you'll land in trouble.
3816 *
3817 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3818 * NULL.
3819 */
3820#ifdef __GNUC__
3821# define ASMAtomicWritePtr(ppv, pv) \
3822 do \
3823 { \
3824 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3825 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3826 \
3827 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3828 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3829 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3830 \
3831 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3832 } while (0)
3833#else
3834# define ASMAtomicWritePtr(ppv, pv) \
3835 do \
3836 { \
3837 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3838 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3839 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3840 \
3841 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3842 } while (0)
3843#endif
3844
3845
3846/**
3847 * Atomically sets a pointer to NULL, ordered.
3848 *
3849 * @param ppv Pointer to the pointer variable that should be set to NULL.
3850 *
3851 * @remarks This is relatively type safe on GCC platforms.
3852 */
3853#if RT_GNUC_PREREQ(4, 2)
3854# define ASMAtomicWriteNullPtr(ppv) \
3855 do \
3856 { \
3857 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3858 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3859 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3860 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3861 } while (0)
3862#else
3863# define ASMAtomicWriteNullPtr(ppv) \
3864 do \
3865 { \
3866 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3867 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3868 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3869 } while (0)
3870#endif
3871
3872
3873/**
3874 * Atomically writes a pointer value, unordered.
3875 *
3876 * @returns Current *pv value
3877 * @param ppv Pointer to the pointer variable.
3878 * @param pv The pointer value to assign to *ppv. If NULL use
3879 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3880 *
3881 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3882 * NULL.
3883 */
3884#if RT_GNUC_PREREQ(4, 2)
3885# define ASMAtomicUoWritePtr(ppv, pv) \
3886 do \
3887 { \
3888 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3889 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3890 \
3891 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3892 AssertCompile(sizeof(pv) == sizeof(void *)); \
3893 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3894 \
3895 *(ppvTypeChecked) = pvTypeChecked; \
3896 } while (0)
3897#else
3898# define ASMAtomicUoWritePtr(ppv, pv) \
3899 do \
3900 { \
3901 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3902 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3903 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3904 *(ppv) = pv; \
3905 } while (0)
3906#endif
3907
3908
3909/**
3910 * Atomically sets a pointer to NULL, unordered.
3911 *
3912 * @param ppv Pointer to the pointer variable that should be set to NULL.
3913 *
3914 * @remarks This is relatively type safe on GCC platforms.
3915 */
3916#ifdef __GNUC__
3917# define ASMAtomicUoWriteNullPtr(ppv) \
3918 do \
3919 { \
3920 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3921 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3922 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3923 *(ppvTypeChecked) = NULL; \
3924 } while (0)
3925#else
3926# define ASMAtomicUoWriteNullPtr(ppv) \
3927 do \
3928 { \
3929 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3930 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3931 *(ppv) = NULL; \
3932 } while (0)
3933#endif
3934
3935
3936/**
3937 * Atomically write a typical IPRT handle value, ordered.
3938 *
3939 * @param ph Pointer to the variable to update.
3940 * @param hNew The value to assign to *ph.
3941 *
3942 * @remarks This doesn't currently work for all handles (like RTFILE).
3943 */
3944#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3945# define ASMAtomicWriteHandle(ph, hNew) \
3946 do { \
3947 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3948 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3949 } while (0)
3950#elif HC_ARCH_BITS == 64
3951# define ASMAtomicWriteHandle(ph, hNew) \
3952 do { \
3953 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3954 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3955 } while (0)
3956#else
3957# error HC_ARCH_BITS
3958#endif
3959
3960
3961/**
3962 * Atomically write a typical IPRT handle value, unordered.
3963 *
3964 * @param ph Pointer to the variable to update.
3965 * @param hNew The value to assign to *ph.
3966 *
3967 * @remarks This doesn't currently work for all handles (like RTFILE).
3968 */
3969#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3970# define ASMAtomicUoWriteHandle(ph, hNew) \
3971 do { \
3972 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3973 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3974 } while (0)
3975#elif HC_ARCH_BITS == 64
3976# define ASMAtomicUoWriteHandle(ph, hNew) \
3977 do { \
3978 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3979 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
3980 } while (0)
3981#else
3982# error HC_ARCH_BITS
3983#endif
3984
3985
3986/**
3987 * Atomically write a value which size might differ
3988 * between platforms or compilers, ordered.
3989 *
3990 * @param pu Pointer to the variable to update.
3991 * @param uNew The value to assign to *pu.
3992 */
3993#define ASMAtomicWriteSize(pu, uNew) \
3994 do { \
3995 switch (sizeof(*(pu))) { \
3996 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3997 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3998 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3999 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4000 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4001 } \
4002 } while (0)
4003
4004/**
4005 * Atomically write a value which size might differ
4006 * between platforms or compilers, unordered.
4007 *
4008 * @param pu Pointer to the variable to update.
4009 * @param uNew The value to assign to *pu.
4010 */
4011#define ASMAtomicUoWriteSize(pu, uNew) \
4012 do { \
4013 switch (sizeof(*(pu))) { \
4014 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
4015 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
4016 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4017 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4018 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4019 } \
4020 } while (0)
4021
4022
4023
4024/**
4025 * Atomically exchanges and adds to a 16-bit value, ordered.
4026 *
4027 * @returns The old value.
4028 * @param pu16 Pointer to the value.
4029 * @param u16 Number to add.
4030 *
4031 * @remarks Currently not implemented, just to make 16-bit code happy.
4032 * @remarks x86: Requires a 486 or later.
4033 */
4034RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
4035
4036
4037/**
4038 * Atomically exchanges and adds to a 32-bit value, ordered.
4039 *
4040 * @returns The old value.
4041 * @param pu32 Pointer to the value.
4042 * @param u32 Number to add.
4043 *
4044 * @remarks x86: Requires a 486 or later.
4045 */
4046#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4047RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4048#else
4049DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4050{
4051# if RT_INLINE_ASM_USES_INTRIN
4052 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
4053 return u32;
4054
4055# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4056# if RT_INLINE_ASM_GNU_STYLE
4057 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4058 : "=r" (u32)
4059 , "=m" (*pu32)
4060 : "0" (u32)
4061 , "m" (*pu32)
4062 : "memory"
4063 , "cc");
4064 return u32;
4065# else
4066 __asm
4067 {
4068 mov eax, [u32]
4069# ifdef RT_ARCH_AMD64
4070 mov rdx, [pu32]
4071 lock xadd [rdx], eax
4072# else
4073 mov edx, [pu32]
4074 lock xadd [edx], eax
4075# endif
4076 mov [u32], eax
4077 }
4078 return u32;
4079# endif
4080
4081# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4082 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
4083 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
4084 "add %[uNew], %[uOld], %[uVal]\n\t",
4085 [uVal] "r" (u32));
4086 return u32OldRet;
4087
4088# else
4089# error "Port me"
4090# endif
4091}
4092#endif
4093
4094
4095/**
4096 * Atomically exchanges and adds to a signed 32-bit value, ordered.
4097 *
4098 * @returns The old value.
4099 * @param pi32 Pointer to the value.
4100 * @param i32 Number to add.
4101 *
4102 * @remarks x86: Requires a 486 or later.
4103 */
4104DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4105{
4106 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4107}
4108
4109
4110/**
4111 * Atomically exchanges and adds to a 64-bit value, ordered.
4112 *
4113 * @returns The old value.
4114 * @param pu64 Pointer to the value.
4115 * @param u64 Number to add.
4116 *
4117 * @remarks x86: Requires a Pentium or later.
4118 */
4119#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4120DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4121#else
4122DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4123{
4124# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4125 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
4126 return u64;
4127
4128# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4129 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4130 : "=r" (u64)
4131 , "=m" (*pu64)
4132 : "0" (u64)
4133 , "m" (*pu64)
4134 : "memory"
4135 , "cc");
4136 return u64;
4137
4138# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4139 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
4140 "add %[uNew], %[uOld], %[uVal]\n\t"
4141 ,
4142 "add %[uNew], %[uOld], %[uVal]\n\t"
4143 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
4144 [uVal] "r" (u64));
4145 return u64OldRet;
4146
4147# else
4148 uint64_t u64Old;
4149 for (;;)
4150 {
4151 uint64_t u64New;
4152 u64Old = ASMAtomicUoReadU64(pu64);
4153 u64New = u64Old + u64;
4154 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4155 break;
4156 ASMNopPause();
4157 }
4158 return u64Old;
4159# endif
4160}
4161#endif
4162
4163
4164/**
4165 * Atomically exchanges and adds to a signed 64-bit value, ordered.
4166 *
4167 * @returns The old value.
4168 * @param pi64 Pointer to the value.
4169 * @param i64 Number to add.
4170 *
4171 * @remarks x86: Requires a Pentium or later.
4172 */
4173DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4174{
4175 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4176}
4177
4178
4179/**
4180 * Atomically exchanges and adds to a size_t value, ordered.
4181 *
4182 * @returns The old value.
4183 * @param pcb Pointer to the size_t value.
4184 * @param cb Number to add.
4185 */
4186DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4187{
4188#if ARCH_BITS == 64
4189 AssertCompileSize(size_t, 8);
4190 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
4191#elif ARCH_BITS == 32
4192 AssertCompileSize(size_t, 4);
4193 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
4194#elif ARCH_BITS == 16
4195 AssertCompileSize(size_t, 2);
4196 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
4197#else
4198# error "Unsupported ARCH_BITS value"
4199#endif
4200}
4201
4202
4203/**
4204 * Atomically exchanges and adds a value which size might differ between
4205 * platforms or compilers, ordered.
4206 *
4207 * @param pu Pointer to the variable to update.
4208 * @param uNew The value to add to *pu.
4209 * @param puOld Where to store the old value.
4210 */
4211#define ASMAtomicAddSize(pu, uNew, puOld) \
4212 do { \
4213 switch (sizeof(*(pu))) { \
4214 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4215 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4216 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
4217 } \
4218 } while (0)
4219
4220
4221
4222/**
4223 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
4224 *
4225 * @returns The old value.
4226 * @param pu16 Pointer to the value.
4227 * @param u16 Number to subtract.
4228 *
4229 * @remarks x86: Requires a 486 or later.
4230 */
4231DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
4232{
4233 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
4234}
4235
4236
4237/**
4238 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
4239 *
4240 * @returns The old value.
4241 * @param pi16 Pointer to the value.
4242 * @param i16 Number to subtract.
4243 *
4244 * @remarks x86: Requires a 486 or later.
4245 */
4246DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
4247{
4248 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
4249}
4250
4251
4252/**
4253 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
4254 *
4255 * @returns The old value.
4256 * @param pu32 Pointer to the value.
4257 * @param u32 Number to subtract.
4258 *
4259 * @remarks x86: Requires a 486 or later.
4260 */
4261DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4262{
4263 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
4264}
4265
4266
4267/**
4268 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
4269 *
4270 * @returns The old value.
4271 * @param pi32 Pointer to the value.
4272 * @param i32 Number to subtract.
4273 *
4274 * @remarks x86: Requires a 486 or later.
4275 */
4276DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4277{
4278 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
4279}
4280
4281
4282/**
4283 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
4284 *
4285 * @returns The old value.
4286 * @param pu64 Pointer to the value.
4287 * @param u64 Number to subtract.
4288 *
4289 * @remarks x86: Requires a Pentium or later.
4290 */
4291DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4292{
4293 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
4294}
4295
4296
4297/**
4298 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
4299 *
4300 * @returns The old value.
4301 * @param pi64 Pointer to the value.
4302 * @param i64 Number to subtract.
4303 *
4304 * @remarks x86: Requires a Pentium or later.
4305 */
4306DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4307{
4308 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
4309}
4310
4311
4312/**
4313 * Atomically exchanges and subtracts to a size_t value, ordered.
4314 *
4315 * @returns The old value.
4316 * @param pcb Pointer to the size_t value.
4317 * @param cb Number to subtract.
4318 *
4319 * @remarks x86: Requires a 486 or later.
4320 */
4321DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4322{
4323#if ARCH_BITS == 64
4324 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
4325#elif ARCH_BITS == 32
4326 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
4327#elif ARCH_BITS == 16
4328 AssertCompileSize(size_t, 2);
4329 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
4330#else
4331# error "Unsupported ARCH_BITS value"
4332#endif
4333}
4334
4335
4336/**
4337 * Atomically exchanges and subtracts a value which size might differ between
4338 * platforms or compilers, ordered.
4339 *
4340 * @param pu Pointer to the variable to update.
4341 * @param uNew The value to subtract to *pu.
4342 * @param puOld Where to store the old value.
4343 *
4344 * @remarks x86: Requires a 486 or later.
4345 */
4346#define ASMAtomicSubSize(pu, uNew, puOld) \
4347 do { \
4348 switch (sizeof(*(pu))) { \
4349 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4350 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4351 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
4352 } \
4353 } while (0)
4354
4355
4356
4357/**
4358 * Atomically increment a 16-bit value, ordered.
4359 *
4360 * @returns The new value.
4361 * @param pu16 Pointer to the value to increment.
4362 * @remarks Not implemented. Just to make 16-bit code happy.
4363 *
4364 * @remarks x86: Requires a 486 or later.
4365 */
4366RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4367
4368
4369/**
4370 * Atomically increment a 32-bit value, ordered.
4371 *
4372 * @returns The new value.
4373 * @param pu32 Pointer to the value to increment.
4374 *
4375 * @remarks x86: Requires a 486 or later.
4376 */
4377#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4378RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4379#else
4380DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4381{
4382# if RT_INLINE_ASM_USES_INTRIN
4383 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
4384
4385# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4386# if RT_INLINE_ASM_GNU_STYLE
4387 uint32_t u32;
4388 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4389 : "=r" (u32)
4390 , "=m" (*pu32)
4391 : "0" (1)
4392 , "m" (*pu32)
4393 : "memory"
4394 , "cc");
4395 return u32+1;
4396# else
4397 __asm
4398 {
4399 mov eax, 1
4400# ifdef RT_ARCH_AMD64
4401 mov rdx, [pu32]
4402 lock xadd [rdx], eax
4403# else
4404 mov edx, [pu32]
4405 lock xadd [edx], eax
4406# endif
4407 mov u32, eax
4408 }
4409 return u32+1;
4410# endif
4411
4412# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4413 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
4414 "add %w[uNew], %w[uNew], #1\n\t",
4415 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4416 "X" (0) /* dummy */);
4417 return u32NewRet;
4418
4419# else
4420 return ASMAtomicAddU32(pu32, 1) + 1;
4421# endif
4422}
4423#endif
4424
4425
4426/**
4427 * Atomically increment a signed 32-bit value, ordered.
4428 *
4429 * @returns The new value.
4430 * @param pi32 Pointer to the value to increment.
4431 *
4432 * @remarks x86: Requires a 486 or later.
4433 */
4434DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4435{
4436 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
4437}
4438
4439
4440/**
4441 * Atomically increment a 64-bit value, ordered.
4442 *
4443 * @returns The new value.
4444 * @param pu64 Pointer to the value to increment.
4445 *
4446 * @remarks x86: Requires a Pentium or later.
4447 */
4448#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4449DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4450#else
4451DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4452{
4453# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4454 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
4455
4456# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4457 uint64_t u64;
4458 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4459 : "=r" (u64)
4460 , "=m" (*pu64)
4461 : "0" (1)
4462 , "m" (*pu64)
4463 : "memory"
4464 , "cc");
4465 return u64 + 1;
4466
4467# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4468 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
4469 "add %[uNew], %[uNew], #1\n\t"
4470 ,
4471 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4472 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4473 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4474 return u64NewRet;
4475
4476# else
4477 return ASMAtomicAddU64(pu64, 1) + 1;
4478# endif
4479}
4480#endif
4481
4482
4483/**
4484 * Atomically increment a signed 64-bit value, ordered.
4485 *
4486 * @returns The new value.
4487 * @param pi64 Pointer to the value to increment.
4488 *
4489 * @remarks x86: Requires a Pentium or later.
4490 */
4491DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4492{
4493 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
4494}
4495
4496
4497/**
4498 * Atomically increment a size_t value, ordered.
4499 *
4500 * @returns The new value.
4501 * @param pcb Pointer to the value to increment.
4502 *
4503 * @remarks x86: Requires a 486 or later.
4504 */
4505DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4506{
4507#if ARCH_BITS == 64
4508 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
4509#elif ARCH_BITS == 32
4510 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
4511#elif ARCH_BITS == 16
4512 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
4513#else
4514# error "Unsupported ARCH_BITS value"
4515#endif
4516}
4517
4518
4519
4520/**
4521 * Atomically decrement an unsigned 32-bit value, ordered.
4522 *
4523 * @returns The new value.
4524 * @param pu16 Pointer to the value to decrement.
4525 * @remarks Not implemented. Just to make 16-bit code happy.
4526 *
4527 * @remarks x86: Requires a 486 or later.
4528 */
4529RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4530
4531
4532/**
4533 * Atomically decrement an unsigned 32-bit value, ordered.
4534 *
4535 * @returns The new value.
4536 * @param pu32 Pointer to the value to decrement.
4537 *
4538 * @remarks x86: Requires a 486 or later.
4539 */
4540#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4541RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4542#else
4543DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4544{
4545# if RT_INLINE_ASM_USES_INTRIN
4546 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4547
4548# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4549# if RT_INLINE_ASM_GNU_STYLE
4550 uint32_t u32;
4551 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4552 : "=r" (u32)
4553 , "=m" (*pu32)
4554 : "0" (-1)
4555 , "m" (*pu32)
4556 : "memory"
4557 , "cc");
4558 return u32-1;
4559# else
4560 uint32_t u32;
4561 __asm
4562 {
4563 mov eax, -1
4564# ifdef RT_ARCH_AMD64
4565 mov rdx, [pu32]
4566 lock xadd [rdx], eax
4567# else
4568 mov edx, [pu32]
4569 lock xadd [edx], eax
4570# endif
4571 mov u32, eax
4572 }
4573 return u32-1;
4574# endif
4575
4576# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4577 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4578 "sub %w[uNew], %w[uNew], #1\n\t",
4579 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4580 "X" (0) /* dummy */);
4581 return u32NewRet;
4582
4583# else
4584 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4585# endif
4586}
4587#endif
4588
4589
4590/**
4591 * Atomically decrement a signed 32-bit value, ordered.
4592 *
4593 * @returns The new value.
4594 * @param pi32 Pointer to the value to decrement.
4595 *
4596 * @remarks x86: Requires a 486 or later.
4597 */
4598DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4599{
4600 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4601}
4602
4603
4604/**
4605 * Atomically decrement an unsigned 64-bit value, ordered.
4606 *
4607 * @returns The new value.
4608 * @param pu64 Pointer to the value to decrement.
4609 *
4610 * @remarks x86: Requires a Pentium or later.
4611 */
4612#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4613RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4614#else
4615DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4616{
4617# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4618 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4619
4620# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4621 uint64_t u64;
4622 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4623 : "=r" (u64)
4624 , "=m" (*pu64)
4625 : "0" (~(uint64_t)0)
4626 , "m" (*pu64)
4627 : "memory"
4628 , "cc");
4629 return u64-1;
4630
4631# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4632 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4633 "sub %[uNew], %[uNew], #1\n\t"
4634 ,
4635 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4636 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4637 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4638 return u64NewRet;
4639
4640# else
4641 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4642# endif
4643}
4644#endif
4645
4646
4647/**
4648 * Atomically decrement a signed 64-bit value, ordered.
4649 *
4650 * @returns The new value.
4651 * @param pi64 Pointer to the value to decrement.
4652 *
4653 * @remarks x86: Requires a Pentium or later.
4654 */
4655DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4656{
4657 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4658}
4659
4660
4661/**
4662 * Atomically decrement a size_t value, ordered.
4663 *
4664 * @returns The new value.
4665 * @param pcb Pointer to the value to decrement.
4666 *
4667 * @remarks x86: Requires a 486 or later.
4668 */
4669DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4670{
4671#if ARCH_BITS == 64
4672 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4673#elif ARCH_BITS == 32
4674 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4675#elif ARCH_BITS == 16
4676 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4677#else
4678# error "Unsupported ARCH_BITS value"
4679#endif
4680}
4681
4682
4683/**
4684 * Atomically Or an unsigned 32-bit value, ordered.
4685 *
4686 * @param pu32 Pointer to the pointer variable to OR u32 with.
4687 * @param u32 The value to OR *pu32 with.
4688 *
4689 * @remarks x86: Requires a 386 or later.
4690 */
4691#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4692RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4693#else
4694DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4695{
4696# if RT_INLINE_ASM_USES_INTRIN
4697 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4698
4699# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4700# if RT_INLINE_ASM_GNU_STYLE
4701 __asm__ __volatile__("lock; orl %1, %0\n\t"
4702 : "=m" (*pu32)
4703 : "ir" (u32)
4704 , "m" (*pu32)
4705 : "cc");
4706# else
4707 __asm
4708 {
4709 mov eax, [u32]
4710# ifdef RT_ARCH_AMD64
4711 mov rdx, [pu32]
4712 lock or [rdx], eax
4713# else
4714 mov edx, [pu32]
4715 lock or [edx], eax
4716# endif
4717 }
4718# endif
4719
4720# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4721 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4722 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4723 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4724 "orr %[uNew], %[uNew], %[uVal]\n\t",
4725 [uVal] "r" (u32));
4726
4727# else
4728# error "Port me"
4729# endif
4730}
4731#endif
4732
4733
4734/**
4735 * Atomically OR an unsigned 32-bit value, ordered, extended version (for bitmap
4736 * fallback).
4737 *
4738 * @returns Old value.
4739 * @param pu32 Pointer to the variable to OR @a u32 with.
4740 * @param u32 The value to OR @a *pu32 with.
4741 */
4742DECLINLINE(uint32_t) ASMAtomicOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4743{
4744#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4745 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicOrEx32, pu32, DMB_SY,
4746 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4747 "orr %[uNew], %[uOld], %[uVal]\n\t",
4748 [uVal] "r" (u32));
4749 return u32OldRet;
4750
4751#else
4752 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4753 uint32_t u32New;
4754 do
4755 u32New = u32RetOld | u32;
4756 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4757 return u32RetOld;
4758#endif
4759}
4760
4761
4762/**
4763 * Atomically Or a signed 32-bit value, ordered.
4764 *
4765 * @param pi32 Pointer to the pointer variable to OR u32 with.
4766 * @param i32 The value to OR *pu32 with.
4767 *
4768 * @remarks x86: Requires a 386 or later.
4769 */
4770DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4771{
4772 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4773}
4774
4775
4776/**
4777 * Atomically Or an unsigned 64-bit value, ordered.
4778 *
4779 * @param pu64 Pointer to the pointer variable to OR u64 with.
4780 * @param u64 The value to OR *pu64 with.
4781 *
4782 * @remarks x86: Requires a Pentium or later.
4783 */
4784#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4785DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4786#else
4787DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4788{
4789# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4790 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4791
4792# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4793 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4794 : "=m" (*pu64)
4795 : "r" (u64)
4796 , "m" (*pu64)
4797 : "cc");
4798
4799# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4800 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4801 "orr %[uNew], %[uNew], %[uVal]\n\t"
4802 ,
4803 "orr %[uNew], %[uNew], %[uVal]\n\t"
4804 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4805 [uVal] "r" (u64));
4806
4807# else
4808 for (;;)
4809 {
4810 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4811 uint64_t u64New = u64Old | u64;
4812 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4813 break;
4814 ASMNopPause();
4815 }
4816# endif
4817}
4818#endif
4819
4820
4821/**
4822 * Atomically Or a signed 64-bit value, ordered.
4823 *
4824 * @param pi64 Pointer to the pointer variable to OR u64 with.
4825 * @param i64 The value to OR *pu64 with.
4826 *
4827 * @remarks x86: Requires a Pentium or later.
4828 */
4829DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4830{
4831 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4832}
4833
4834
4835/**
4836 * Atomically And an unsigned 32-bit value, ordered.
4837 *
4838 * @param pu32 Pointer to the pointer variable to AND u32 with.
4839 * @param u32 The value to AND *pu32 with.
4840 *
4841 * @remarks x86: Requires a 386 or later.
4842 */
4843#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4844RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4845#else
4846DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4847{
4848# if RT_INLINE_ASM_USES_INTRIN
4849 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4850
4851# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4852# if RT_INLINE_ASM_GNU_STYLE
4853 __asm__ __volatile__("lock; andl %1, %0\n\t"
4854 : "=m" (*pu32)
4855 : "ir" (u32)
4856 , "m" (*pu32)
4857 : "cc");
4858# else
4859 __asm
4860 {
4861 mov eax, [u32]
4862# ifdef RT_ARCH_AMD64
4863 mov rdx, [pu32]
4864 lock and [rdx], eax
4865# else
4866 mov edx, [pu32]
4867 lock and [edx], eax
4868# endif
4869 }
4870# endif
4871
4872# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4873 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4874 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4875 "and %[uNew], %[uNew], %[uVal]\n\t",
4876 [uVal] "r" (u32));
4877
4878# else
4879# error "Port me"
4880# endif
4881}
4882#endif
4883
4884
4885/**
4886 * Atomically AND an unsigned 32-bit value, ordered, extended version.
4887 *
4888 * @returns Old value.
4889 * @param pu32 Pointer to the variable to AND @a u32 with.
4890 * @param u32 The value to AND @a *pu32 with.
4891 */
4892DECLINLINE(uint32_t) ASMAtomicAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4893{
4894#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4895 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAndEx32, pu32, DMB_SY,
4896 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4897 "and %[uNew], %[uOld], %[uVal]\n\t",
4898 [uVal] "r" (u32));
4899 return u32OldRet;
4900
4901#else
4902 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4903 uint32_t u32New;
4904 do
4905 u32New = u32RetOld & u32;
4906 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4907 return u32RetOld;
4908#endif
4909}
4910
4911
4912/**
4913 * Atomically And a signed 32-bit value, ordered.
4914 *
4915 * @param pi32 Pointer to the pointer variable to AND i32 with.
4916 * @param i32 The value to AND *pi32 with.
4917 *
4918 * @remarks x86: Requires a 386 or later.
4919 */
4920DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4921{
4922 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4923}
4924
4925
4926/**
4927 * Atomically And an unsigned 64-bit value, ordered.
4928 *
4929 * @param pu64 Pointer to the pointer variable to AND u64 with.
4930 * @param u64 The value to AND *pu64 with.
4931 *
4932 * @remarks x86: Requires a Pentium or later.
4933 */
4934#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4935DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4936#else
4937DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4938{
4939# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4940 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4941
4942# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4943 __asm__ __volatile__("lock; andq %1, %0\n\t"
4944 : "=m" (*pu64)
4945 : "r" (u64)
4946 , "m" (*pu64)
4947 : "cc");
4948
4949# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4950 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
4951 "and %[uNew], %[uNew], %[uVal]\n\t"
4952 ,
4953 "and %[uNew], %[uNew], %[uVal]\n\t"
4954 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4955 [uVal] "r" (u64));
4956
4957# else
4958 for (;;)
4959 {
4960 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4961 uint64_t u64New = u64Old & u64;
4962 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4963 break;
4964 ASMNopPause();
4965 }
4966# endif
4967}
4968#endif
4969
4970
4971/**
4972 * Atomically And a signed 64-bit value, ordered.
4973 *
4974 * @param pi64 Pointer to the pointer variable to AND i64 with.
4975 * @param i64 The value to AND *pi64 with.
4976 *
4977 * @remarks x86: Requires a Pentium or later.
4978 */
4979DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4980{
4981 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4982}
4983
4984
4985/**
4986 * Atomically XOR an unsigned 32-bit value and a memory location, ordered.
4987 *
4988 * @param pu32 Pointer to the variable to XOR @a u32 with.
4989 * @param u32 The value to XOR @a *pu32 with.
4990 *
4991 * @remarks x86: Requires a 386 or later.
4992 */
4993#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4994RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4995#else
4996DECLINLINE(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4997{
4998# if RT_INLINE_ASM_USES_INTRIN
4999 _InterlockedXor((long volatile RT_FAR *)pu32, u32);
5000
5001# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5002# if RT_INLINE_ASM_GNU_STYLE
5003 __asm__ __volatile__("lock; xorl %1, %0\n\t"
5004 : "=m" (*pu32)
5005 : "ir" (u32)
5006 , "m" (*pu32)
5007 : "cc");
5008# else
5009 __asm
5010 {
5011 mov eax, [u32]
5012# ifdef RT_ARCH_AMD64
5013 mov rdx, [pu32]
5014 lock xor [rdx], eax
5015# else
5016 mov edx, [pu32]
5017 lock xor [edx], eax
5018# endif
5019 }
5020# endif
5021
5022# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5023 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicXor32, pu32, DMB_SY,
5024 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5025 "eor %[uNew], %[uNew], %[uVal]\n\t",
5026 [uVal] "r" (u32));
5027
5028# else
5029# error "Port me"
5030# endif
5031}
5032#endif
5033
5034
5035/**
5036 * Atomically XOR an unsigned 32-bit value and a memory location, ordered,
5037 * extended version (for bitmaps).
5038 *
5039 * @returns Old value.
5040 * @param pu32 Pointer to the variable to XOR @a u32 with.
5041 * @param u32 The value to XOR @a *pu32 with.
5042 */
5043DECLINLINE(uint32_t) ASMAtomicXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5044{
5045#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5046 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicXorEx32, pu32, DMB_SY,
5047 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5048 "eor %[uNew], %[uOld], %[uVal]\n\t",
5049 [uVal] "r" (u32));
5050 return u32OldRet;
5051
5052#else
5053 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
5054 uint32_t u32New;
5055 do
5056 u32New = u32RetOld ^ u32;
5057 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
5058 return u32RetOld;
5059#endif
5060}
5061
5062
5063/**
5064 * Atomically XOR a signed 32-bit value, ordered.
5065 *
5066 * @param pi32 Pointer to the variable to XOR i32 with.
5067 * @param i32 The value to XOR *pi32 with.
5068 *
5069 * @remarks x86: Requires a 386 or later.
5070 */
5071DECLINLINE(void) ASMAtomicXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5072{
5073 ASMAtomicXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5074}
5075
5076
5077/**
5078 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
5079 *
5080 * @param pu32 Pointer to the pointer variable to OR u32 with.
5081 * @param u32 The value to OR *pu32 with.
5082 *
5083 * @remarks x86: Requires a 386 or later.
5084 */
5085#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5086RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5087#else
5088DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5089{
5090# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5091# if RT_INLINE_ASM_GNU_STYLE
5092 __asm__ __volatile__("orl %1, %0\n\t"
5093 : "=m" (*pu32)
5094 : "ir" (u32)
5095 , "m" (*pu32)
5096 : "cc");
5097# else
5098 __asm
5099 {
5100 mov eax, [u32]
5101# ifdef RT_ARCH_AMD64
5102 mov rdx, [pu32]
5103 or [rdx], eax
5104# else
5105 mov edx, [pu32]
5106 or [edx], eax
5107# endif
5108 }
5109# endif
5110
5111# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5112 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
5113 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
5114 "orr %[uNew], %[uNew], %[uVal]\n\t",
5115 [uVal] "r" (u32));
5116
5117# else
5118# error "Port me"
5119# endif
5120}
5121#endif
5122
5123
5124/**
5125 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe,
5126 * extended version (for bitmap fallback).
5127 *
5128 * @returns Old value.
5129 * @param pu32 Pointer to the variable to OR @a u32 with.
5130 * @param u32 The value to OR @a *pu32 with.
5131 */
5132DECLINLINE(uint32_t) ASMAtomicUoOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5133{
5134#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5135 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoOrExU32, pu32, NO_BARRIER,
5136 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
5137 "orr %[uNew], %[uOld], %[uVal]\n\t",
5138 [uVal] "r" (u32));
5139 return u32OldRet;
5140
5141#else
5142 return ASMAtomicOrExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5143#endif
5144}
5145
5146
5147/**
5148 * Atomically OR a signed 32-bit value, unordered.
5149 *
5150 * @param pi32 Pointer to the pointer variable to OR u32 with.
5151 * @param i32 The value to OR *pu32 with.
5152 *
5153 * @remarks x86: Requires a 386 or later.
5154 */
5155DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5156{
5157 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5158}
5159
5160
5161/**
5162 * Atomically OR an unsigned 64-bit value, unordered.
5163 *
5164 * @param pu64 Pointer to the pointer variable to OR u64 with.
5165 * @param u64 The value to OR *pu64 with.
5166 *
5167 * @remarks x86: Requires a Pentium or later.
5168 */
5169#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5170DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5171#else
5172DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5173{
5174# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5175 __asm__ __volatile__("orq %1, %q0\n\t"
5176 : "=m" (*pu64)
5177 : "r" (u64)
5178 , "m" (*pu64)
5179 : "cc");
5180
5181# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5182 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
5183 "orr %[uNew], %[uNew], %[uVal]\n\t"
5184 ,
5185 "orr %[uNew], %[uNew], %[uVal]\n\t"
5186 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
5187 [uVal] "r" (u64));
5188
5189# else
5190 for (;;)
5191 {
5192 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5193 uint64_t u64New = u64Old | u64;
5194 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5195 break;
5196 ASMNopPause();
5197 }
5198# endif
5199}
5200#endif
5201
5202
5203/**
5204 * Atomically Or a signed 64-bit value, unordered.
5205 *
5206 * @param pi64 Pointer to the pointer variable to OR u64 with.
5207 * @param i64 The value to OR *pu64 with.
5208 *
5209 * @remarks x86: Requires a Pentium or later.
5210 */
5211DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5212{
5213 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5214}
5215
5216
5217/**
5218 * Atomically And an unsigned 32-bit value, unordered.
5219 *
5220 * @param pu32 Pointer to the pointer variable to AND u32 with.
5221 * @param u32 The value to AND *pu32 with.
5222 *
5223 * @remarks x86: Requires a 386 or later.
5224 */
5225#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5226RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5227#else
5228DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5229{
5230# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5231# if RT_INLINE_ASM_GNU_STYLE
5232 __asm__ __volatile__("andl %1, %0\n\t"
5233 : "=m" (*pu32)
5234 : "ir" (u32)
5235 , "m" (*pu32)
5236 : "cc");
5237# else
5238 __asm
5239 {
5240 mov eax, [u32]
5241# ifdef RT_ARCH_AMD64
5242 mov rdx, [pu32]
5243 and [rdx], eax
5244# else
5245 mov edx, [pu32]
5246 and [edx], eax
5247# endif
5248 }
5249# endif
5250
5251# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5252 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
5253 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
5254 "and %[uNew], %[uNew], %[uVal]\n\t",
5255 [uVal] "r" (u32));
5256
5257# else
5258# error "Port me"
5259# endif
5260}
5261#endif
5262
5263
5264/**
5265 * Atomically AND an unsigned 32-bit value, unordered, extended version (for
5266 * bitmap fallback).
5267 *
5268 * @returns Old value.
5269 * @param pu32 Pointer to the pointer to AND @a u32 with.
5270 * @param u32 The value to AND @a *pu32 with.
5271 */
5272DECLINLINE(uint32_t) ASMAtomicUoAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5273{
5274#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5275 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoAndEx32, pu32, NO_BARRIER,
5276 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
5277 "and %[uNew], %[uOld], %[uVal]\n\t",
5278 [uVal] "r" (u32));
5279 return u32OldRet;
5280
5281#else
5282 return ASMAtomicAndExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5283#endif
5284}
5285
5286
5287/**
5288 * Atomically And a signed 32-bit value, unordered.
5289 *
5290 * @param pi32 Pointer to the pointer variable to AND i32 with.
5291 * @param i32 The value to AND *pi32 with.
5292 *
5293 * @remarks x86: Requires a 386 or later.
5294 */
5295DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5296{
5297 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5298}
5299
5300
5301/**
5302 * Atomically And an unsigned 64-bit value, unordered.
5303 *
5304 * @param pu64 Pointer to the pointer variable to AND u64 with.
5305 * @param u64 The value to AND *pu64 with.
5306 *
5307 * @remarks x86: Requires a Pentium or later.
5308 */
5309#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5310DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5311#else
5312DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5313{
5314# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5315 __asm__ __volatile__("andq %1, %0\n\t"
5316 : "=m" (*pu64)
5317 : "r" (u64)
5318 , "m" (*pu64)
5319 : "cc");
5320
5321# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5322 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
5323 "and %[uNew], %[uNew], %[uVal]\n\t"
5324 ,
5325 "and %[uNew], %[uNew], %[uVal]\n\t"
5326 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
5327 [uVal] "r" (u64));
5328
5329# else
5330 for (;;)
5331 {
5332 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5333 uint64_t u64New = u64Old & u64;
5334 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5335 break;
5336 ASMNopPause();
5337 }
5338# endif
5339}
5340#endif
5341
5342
5343/**
5344 * Atomically And a signed 64-bit value, unordered.
5345 *
5346 * @param pi64 Pointer to the pointer variable to AND i64 with.
5347 * @param i64 The value to AND *pi64 with.
5348 *
5349 * @remarks x86: Requires a Pentium or later.
5350 */
5351DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5352{
5353 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5354}
5355
5356
5357/**
5358 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe.
5359 *
5360 * @param pu32 Pointer to the variable to XOR @a u32 with.
5361 * @param u32 The value to OR @a *pu32 with.
5362 *
5363 * @remarks x86: Requires a 386 or later.
5364 */
5365#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5366RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5367#else
5368DECLINLINE(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5369{
5370# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5371# if RT_INLINE_ASM_GNU_STYLE
5372 __asm__ __volatile__("xorl %1, %0\n\t"
5373 : "=m" (*pu32)
5374 : "ir" (u32)
5375 , "m" (*pu32)
5376 : "cc");
5377# else
5378 __asm
5379 {
5380 mov eax, [u32]
5381# ifdef RT_ARCH_AMD64
5382 mov rdx, [pu32]
5383 xor [rdx], eax
5384# else
5385 mov edx, [pu32]
5386 xor [edx], eax
5387# endif
5388 }
5389# endif
5390
5391# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5392 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoXorU32, pu32, NO_BARRIER,
5393 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5394 "eor %[uNew], %[uNew], %[uVal]\n\t",
5395 [uVal] "r" (u32));
5396
5397# else
5398# error "Port me"
5399# endif
5400}
5401#endif
5402
5403
5404/**
5405 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe,
5406 * extended version (for bitmap fallback).
5407 *
5408 * @returns Old value.
5409 * @param pu32 Pointer to the variable to XOR @a u32 with.
5410 * @param u32 The value to OR @a *pu32 with.
5411 */
5412DECLINLINE(uint32_t) ASMAtomicUoXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5413{
5414#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5415 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoXorExU32, pu32, NO_BARRIER,
5416 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5417 "eor %[uNew], %[uOld], %[uVal]\n\t",
5418 [uVal] "r" (u32));
5419 return u32OldRet;
5420
5421#else
5422 return ASMAtomicXorExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5423#endif
5424}
5425
5426
5427/**
5428 * Atomically XOR a signed 32-bit value, unordered.
5429 *
5430 * @param pi32 Pointer to the variable to XOR @a u32 with.
5431 * @param i32 The value to XOR @a *pu32 with.
5432 *
5433 * @remarks x86: Requires a 386 or later.
5434 */
5435DECLINLINE(void) ASMAtomicUoXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5436{
5437 ASMAtomicUoXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5438}
5439
5440
5441/**
5442 * Atomically increment an unsigned 32-bit value, unordered.
5443 *
5444 * @returns the new value.
5445 * @param pu32 Pointer to the variable to increment.
5446 *
5447 * @remarks x86: Requires a 486 or later.
5448 */
5449#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5450RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5451#else
5452DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5453{
5454# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5455 uint32_t u32;
5456# if RT_INLINE_ASM_GNU_STYLE
5457 __asm__ __volatile__("xaddl %0, %1\n\t"
5458 : "=r" (u32)
5459 , "=m" (*pu32)
5460 : "0" (1)
5461 , "m" (*pu32)
5462 : "memory" /** @todo why 'memory'? */
5463 , "cc");
5464 return u32 + 1;
5465# else
5466 __asm
5467 {
5468 mov eax, 1
5469# ifdef RT_ARCH_AMD64
5470 mov rdx, [pu32]
5471 xadd [rdx], eax
5472# else
5473 mov edx, [pu32]
5474 xadd [edx], eax
5475# endif
5476 mov u32, eax
5477 }
5478 return u32 + 1;
5479# endif
5480
5481# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5482 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
5483 "add %w[uNew], %w[uNew], #1\n\t",
5484 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5485 "X" (0) /* dummy */);
5486 return u32NewRet;
5487
5488# else
5489# error "Port me"
5490# endif
5491}
5492#endif
5493
5494
5495/**
5496 * Atomically decrement an unsigned 32-bit value, unordered.
5497 *
5498 * @returns the new value.
5499 * @param pu32 Pointer to the variable to decrement.
5500 *
5501 * @remarks x86: Requires a 486 or later.
5502 */
5503#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5504RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5505#else
5506DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5507{
5508# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5509 uint32_t u32;
5510# if RT_INLINE_ASM_GNU_STYLE
5511 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
5512 : "=r" (u32)
5513 , "=m" (*pu32)
5514 : "0" (-1)
5515 , "m" (*pu32)
5516 : "memory"
5517 , "cc");
5518 return u32 - 1;
5519# else
5520 __asm
5521 {
5522 mov eax, -1
5523# ifdef RT_ARCH_AMD64
5524 mov rdx, [pu32]
5525 xadd [rdx], eax
5526# else
5527 mov edx, [pu32]
5528 xadd [edx], eax
5529# endif
5530 mov u32, eax
5531 }
5532 return u32 - 1;
5533# endif
5534
5535# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5536 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
5537 "sub %w[uNew], %w[uNew], #1\n\t",
5538 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5539 "X" (0) /* dummy */);
5540 return u32NewRet;
5541
5542# else
5543# error "Port me"
5544# endif
5545}
5546#endif
5547
5548
5549/** @def RT_ASM_PAGE_SIZE
5550 * We try avoid dragging in iprt/param.h here.
5551 * @internal
5552 */
5553#if defined(RT_ARCH_SPARC64)
5554# define RT_ASM_PAGE_SIZE 0x2000
5555# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5556# if PAGE_SIZE != 0x2000
5557# error "PAGE_SIZE is not 0x2000!"
5558# endif
5559# endif
5560#elif defined(RT_ARCH_ARM64) && defined(RT_OS_DARWIN)
5561# define RT_ASM_PAGE_SIZE 0x4000
5562# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
5563# if PAGE_SIZE != 0x4000
5564# error "PAGE_SIZE is not 0x4000!"
5565# endif
5566# endif
5567#else
5568# define RT_ASM_PAGE_SIZE 0x1000
5569# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(RT_OS_LINUX) && !defined(RT_ARCH_ARM64)
5570# if PAGE_SIZE != 0x1000
5571# error "PAGE_SIZE is not 0x1000!"
5572# endif
5573# endif
5574#endif
5575
5576/**
5577 * Zeros a 4K memory page.
5578 *
5579 * @param pv Pointer to the memory block. This must be page aligned.
5580 */
5581#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5582RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
5583# else
5584DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
5585{
5586# if RT_INLINE_ASM_USES_INTRIN
5587# ifdef RT_ARCH_AMD64
5588 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
5589# else
5590 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
5591# endif
5592
5593# elif RT_INLINE_ASM_GNU_STYLE
5594 RTCCUINTREG uDummy;
5595# ifdef RT_ARCH_AMD64
5596 __asm__ __volatile__("rep stosq"
5597 : "=D" (pv),
5598 "=c" (uDummy)
5599 : "0" (pv),
5600 "c" (RT_ASM_PAGE_SIZE >> 3),
5601 "a" (0)
5602 : "memory");
5603# else
5604 __asm__ __volatile__("rep stosl"
5605 : "=D" (pv),
5606 "=c" (uDummy)
5607 : "0" (pv),
5608 "c" (RT_ASM_PAGE_SIZE >> 2),
5609 "a" (0)
5610 : "memory");
5611# endif
5612# else
5613 __asm
5614 {
5615# ifdef RT_ARCH_AMD64
5616 xor rax, rax
5617 mov ecx, 0200h
5618 mov rdi, [pv]
5619 rep stosq
5620# else
5621 xor eax, eax
5622 mov ecx, 0400h
5623 mov edi, [pv]
5624 rep stosd
5625# endif
5626 }
5627# endif
5628}
5629# endif
5630
5631
5632/**
5633 * Zeros a memory block with a 32-bit aligned size.
5634 *
5635 * @param pv Pointer to the memory block.
5636 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5637 */
5638#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5639RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5640#else
5641DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5642{
5643# if RT_INLINE_ASM_USES_INTRIN
5644# ifdef RT_ARCH_AMD64
5645 if (!(cb & 7))
5646 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
5647 else
5648# endif
5649 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
5650
5651# elif RT_INLINE_ASM_GNU_STYLE
5652 __asm__ __volatile__("rep stosl"
5653 : "=D" (pv),
5654 "=c" (cb)
5655 : "0" (pv),
5656 "1" (cb >> 2),
5657 "a" (0)
5658 : "memory");
5659# else
5660 __asm
5661 {
5662 xor eax, eax
5663# ifdef RT_ARCH_AMD64
5664 mov rcx, [cb]
5665 shr rcx, 2
5666 mov rdi, [pv]
5667# else
5668 mov ecx, [cb]
5669 shr ecx, 2
5670 mov edi, [pv]
5671# endif
5672 rep stosd
5673 }
5674# endif
5675}
5676#endif
5677
5678
5679/**
5680 * Fills a memory block with a 32-bit aligned size.
5681 *
5682 * @param pv Pointer to the memory block.
5683 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5684 * @param u32 The value to fill with.
5685 */
5686#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5687RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
5688#else
5689DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5690{
5691# if RT_INLINE_ASM_USES_INTRIN
5692# ifdef RT_ARCH_AMD64
5693 if (!(cb & 7))
5694 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5695 else
5696# endif
5697 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
5698
5699# elif RT_INLINE_ASM_GNU_STYLE
5700 __asm__ __volatile__("rep stosl"
5701 : "=D" (pv),
5702 "=c" (cb)
5703 : "0" (pv),
5704 "1" (cb >> 2),
5705 "a" (u32)
5706 : "memory");
5707# else
5708 __asm
5709 {
5710# ifdef RT_ARCH_AMD64
5711 mov rcx, [cb]
5712 shr rcx, 2
5713 mov rdi, [pv]
5714# else
5715 mov ecx, [cb]
5716 shr ecx, 2
5717 mov edi, [pv]
5718# endif
5719 mov eax, [u32]
5720 rep stosd
5721 }
5722# endif
5723}
5724#endif
5725
5726
5727/**
5728 * Checks if a memory block is all zeros.
5729 *
5730 * @returns Pointer to the first non-zero byte.
5731 * @returns NULL if all zero.
5732 *
5733 * @param pv Pointer to the memory block.
5734 * @param cb Number of bytes in the block.
5735 */
5736#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
5737DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5738#else
5739DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5740{
5741/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
5742 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5743 for (; cb; cb--, pb++)
5744 if (RT_LIKELY(*pb == 0))
5745 { /* likely */ }
5746 else
5747 return (void RT_FAR *)pb;
5748 return NULL;
5749}
5750#endif
5751
5752
5753/**
5754 * Checks if a memory block is all zeros.
5755 *
5756 * @returns true if zero, false if not.
5757 *
5758 * @param pv Pointer to the memory block.
5759 * @param cb Number of bytes in the block.
5760 *
5761 * @sa ASMMemFirstNonZero
5762 */
5763DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5764{
5765 return ASMMemFirstNonZero(pv, cb) == NULL;
5766}
5767
5768
5769/**
5770 * Checks if a memory page is all zeros.
5771 *
5772 * @returns true / false.
5773 *
5774 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5775 * boundary
5776 */
5777DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
5778{
5779# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5780 union { RTCCUINTREG r; bool f; } uAX;
5781 RTCCUINTREG xCX, xDI;
5782 Assert(!((uintptr_t)pvPage & 15));
5783 __asm__ __volatile__("repe; "
5784# ifdef RT_ARCH_AMD64
5785 "scasq\n\t"
5786# else
5787 "scasl\n\t"
5788# endif
5789 "setnc %%al\n\t"
5790 : "=&c" (xCX)
5791 , "=&D" (xDI)
5792 , "=&a" (uAX.r)
5793 : "mr" (pvPage)
5794# ifdef RT_ARCH_AMD64
5795 , "0" (RT_ASM_PAGE_SIZE/8)
5796# else
5797 , "0" (RT_ASM_PAGE_SIZE/4)
5798# endif
5799 , "1" (pvPage)
5800 , "2" (0)
5801 : "cc");
5802 return uAX.f;
5803# else
5804 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
5805 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
5806 Assert(!((uintptr_t)pvPage & 15));
5807 for (;;)
5808 {
5809 if (puPtr[0]) return false;
5810 if (puPtr[4]) return false;
5811
5812 if (puPtr[2]) return false;
5813 if (puPtr[6]) return false;
5814
5815 if (puPtr[1]) return false;
5816 if (puPtr[5]) return false;
5817
5818 if (puPtr[3]) return false;
5819 if (puPtr[7]) return false;
5820
5821 if (!--cLeft)
5822 return true;
5823 puPtr += 8;
5824 }
5825# endif
5826}
5827
5828
5829/**
5830 * Checks if a memory block is filled with the specified byte, returning the
5831 * first mismatch.
5832 *
5833 * This is sort of an inverted memchr.
5834 *
5835 * @returns Pointer to the byte which doesn't equal u8.
5836 * @returns NULL if all equal to u8.
5837 *
5838 * @param pv Pointer to the memory block.
5839 * @param cb Number of bytes in the block.
5840 * @param u8 The value it's supposed to be filled with.
5841 *
5842 * @remarks No alignment requirements.
5843 */
5844#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5845 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5846DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5847#else
5848DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5849{
5850/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5851 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5852 for (; cb; cb--, pb++)
5853 if (RT_LIKELY(*pb == u8))
5854 { /* likely */ }
5855 else
5856 return (void *)pb;
5857 return NULL;
5858}
5859#endif
5860
5861
5862/**
5863 * Checks if a memory block is filled with the specified byte.
5864 *
5865 * @returns true if all matching, false if not.
5866 *
5867 * @param pv Pointer to the memory block.
5868 * @param cb Number of bytes in the block.
5869 * @param u8 The value it's supposed to be filled with.
5870 *
5871 * @remarks No alignment requirements.
5872 */
5873DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5874{
5875 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5876}
5877
5878
5879/**
5880 * Checks if a memory block is filled with the specified 32-bit value.
5881 *
5882 * This is a sort of inverted memchr.
5883 *
5884 * @returns Pointer to the first value which doesn't equal u32.
5885 * @returns NULL if all equal to u32.
5886 *
5887 * @param pv Pointer to the memory block.
5888 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5889 * @param u32 The value it's supposed to be filled with.
5890 */
5891DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5892{
5893/** @todo rewrite this in inline assembly? */
5894 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5895 for (; cb; cb -= 4, pu32++)
5896 if (RT_LIKELY(*pu32 == u32))
5897 { /* likely */ }
5898 else
5899 return (uint32_t RT_FAR *)pu32;
5900 return NULL;
5901}
5902
5903
5904/**
5905 * Probes a byte pointer for read access.
5906 *
5907 * While the function will not fault if the byte is not read accessible,
5908 * the idea is to do this in a safe place like before acquiring locks
5909 * and such like.
5910 *
5911 * Also, this functions guarantees that an eager compiler is not going
5912 * to optimize the probing away.
5913 *
5914 * @param pvByte Pointer to the byte.
5915 */
5916#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5917RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
5918#else
5919DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
5920{
5921# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5922 uint8_t u8;
5923# if RT_INLINE_ASM_GNU_STYLE
5924 __asm__ __volatile__("movb %1, %0\n\t"
5925 : "=q" (u8)
5926 : "m" (*(const uint8_t *)pvByte));
5927# else
5928 __asm
5929 {
5930# ifdef RT_ARCH_AMD64
5931 mov rax, [pvByte]
5932 mov al, [rax]
5933# else
5934 mov eax, [pvByte]
5935 mov al, [eax]
5936# endif
5937 mov [u8], al
5938 }
5939# endif
5940 return u8;
5941
5942# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5943 uint32_t u32;
5944 __asm__ __volatile__(".Lstart_ASMProbeReadByte_%=:\n\t"
5945# if defined(RT_ARCH_ARM64)
5946 "ldxrb %w[uDst], %[pMem]\n\t"
5947# else
5948 "ldrexb %[uDst], %[pMem]\n\t"
5949# endif
5950 : [uDst] "=&r" (u32)
5951 : [pMem] "Q" (*(uint8_t const *)pvByte));
5952 return (uint8_t)u32;
5953
5954# else
5955# error "Port me"
5956# endif
5957}
5958#endif
5959
5960/**
5961 * Probes a buffer for read access page by page.
5962 *
5963 * While the function will fault if the buffer is not fully read
5964 * accessible, the idea is to do this in a safe place like before
5965 * acquiring locks and such like.
5966 *
5967 * Also, this functions guarantees that an eager compiler is not going
5968 * to optimize the probing away.
5969 *
5970 * @param pvBuf Pointer to the buffer.
5971 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5972 */
5973DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
5974{
5975 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5976 /* the first byte */
5977 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
5978 ASMProbeReadByte(pu8);
5979
5980 /* the pages in between pages. */
5981 while (cbBuf > RT_ASM_PAGE_SIZE)
5982 {
5983 ASMProbeReadByte(pu8);
5984 cbBuf -= RT_ASM_PAGE_SIZE;
5985 pu8 += RT_ASM_PAGE_SIZE;
5986 }
5987
5988 /* the last byte */
5989 ASMProbeReadByte(pu8 + cbBuf - 1);
5990}
5991
5992
5993/**
5994 * Reverse the byte order of the given 16-bit integer.
5995 *
5996 * @returns Revert
5997 * @param u16 16-bit integer value.
5998 */
5999#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6000RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
6001#else
6002DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
6003{
6004# if RT_INLINE_ASM_USES_INTRIN
6005 return _byteswap_ushort(u16);
6006
6007# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6008# if RT_INLINE_ASM_GNU_STYLE
6009 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
6010# else
6011 _asm
6012 {
6013 mov ax, [u16]
6014 ror ax, 8
6015 mov [u16], ax
6016 }
6017# endif
6018 return u16;
6019
6020# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6021 uint32_t u32Ret;
6022 __asm__ __volatile__(
6023# if defined(RT_ARCH_ARM64)
6024 "rev16 %w[uRet], %w[uVal]\n\t"
6025# else
6026 "rev16 %[uRet], %[uVal]\n\t"
6027# endif
6028 : [uRet] "=r" (u32Ret)
6029 : [uVal] "r" (u16));
6030 return (uint16_t)u32Ret;
6031
6032# else
6033# error "Port me"
6034# endif
6035}
6036#endif
6037
6038
6039/**
6040 * Reverse the byte order of the given 32-bit integer.
6041 *
6042 * @returns Revert
6043 * @param u32 32-bit integer value.
6044 */
6045#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6046RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
6047#else
6048DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
6049{
6050# if RT_INLINE_ASM_USES_INTRIN
6051 return _byteswap_ulong(u32);
6052
6053# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6054# if RT_INLINE_ASM_GNU_STYLE
6055 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6056# else
6057 _asm
6058 {
6059 mov eax, [u32]
6060 bswap eax
6061 mov [u32], eax
6062 }
6063# endif
6064 return u32;
6065
6066# elif defined(RT_ARCH_ARM64)
6067 uint64_t u64Ret;
6068 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t"
6069 : [uRet] "=r" (u64Ret)
6070 : [uVal] "r" ((uint64_t)u32));
6071 return (uint32_t)u64Ret;
6072
6073# elif defined(RT_ARCH_ARM32)
6074 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
6075 : [uRet] "=r" (u32)
6076 : [uVal] "[uRet]" (u32));
6077 return u32;
6078
6079# else
6080# error "Port me"
6081# endif
6082}
6083#endif
6084
6085
6086/**
6087 * Reverse the byte order of the given 64-bit integer.
6088 *
6089 * @returns Revert
6090 * @param u64 64-bit integer value.
6091 */
6092DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
6093{
6094#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6095 return _byteswap_uint64(u64);
6096
6097# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6098 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64));
6099 return u64;
6100
6101# elif defined(RT_ARCH_ARM64)
6102 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
6103 : [uRet] "=r" (u64)
6104 : [uVal] "[uRet]" (u64));
6105 return u64;
6106
6107#else
6108 return (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6109 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6110#endif
6111}
6112
6113
6114
6115/** @defgroup grp_inline_bits Bit Operations
6116 * @{
6117 */
6118
6119
6120/**
6121 * Sets a bit in a bitmap.
6122 *
6123 * @param pvBitmap Pointer to the bitmap (little endian). This should be
6124 * 32-bit aligned.
6125 * @param iBit The bit to set.
6126 *
6127 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6128 * However, doing so will yield better performance as well as avoiding
6129 * traps accessing the last bits in the bitmap.
6130 */
6131#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6132RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6133#else
6134DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6135{
6136# if RT_INLINE_ASM_USES_INTRIN
6137 _bittestandset((long RT_FAR *)pvBitmap, iBit);
6138
6139# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6140# if RT_INLINE_ASM_GNU_STYLE
6141 __asm__ __volatile__("btsl %1, %0"
6142 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6143 : "Ir" (iBit)
6144 , "m" (*(volatile long RT_FAR *)pvBitmap)
6145 : "memory"
6146 , "cc");
6147# else
6148 __asm
6149 {
6150# ifdef RT_ARCH_AMD64
6151 mov rax, [pvBitmap]
6152 mov edx, [iBit]
6153 bts [rax], edx
6154# else
6155 mov eax, [pvBitmap]
6156 mov edx, [iBit]
6157 bts [eax], edx
6158# endif
6159 }
6160# endif
6161
6162# else
6163 int32_t offBitmap = iBit / 32;
6164 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6165 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6166# endif
6167}
6168#endif
6169
6170
6171/**
6172 * Atomically sets a bit in a bitmap, ordered.
6173 *
6174 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6175 * aligned, otherwise the memory access isn't atomic!
6176 * @param iBit The bit to set.
6177 *
6178 * @remarks x86: Requires a 386 or later.
6179 */
6180#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6181RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6182#else
6183DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6184{
6185 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6186# if RT_INLINE_ASM_USES_INTRIN
6187 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6188# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6189# if RT_INLINE_ASM_GNU_STYLE
6190 __asm__ __volatile__("lock; btsl %1, %0"
6191 : "=m" (*(volatile long *)pvBitmap)
6192 : "Ir" (iBit)
6193 , "m" (*(volatile long *)pvBitmap)
6194 : "memory"
6195 , "cc");
6196# else
6197 __asm
6198 {
6199# ifdef RT_ARCH_AMD64
6200 mov rax, [pvBitmap]
6201 mov edx, [iBit]
6202 lock bts [rax], edx
6203# else
6204 mov eax, [pvBitmap]
6205 mov edx, [iBit]
6206 lock bts [eax], edx
6207# endif
6208 }
6209# endif
6210
6211# else
6212 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6213# endif
6214}
6215#endif
6216
6217
6218/**
6219 * Clears a bit in a bitmap.
6220 *
6221 * @param pvBitmap Pointer to the bitmap (little endian).
6222 * @param iBit The bit to clear.
6223 *
6224 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6225 * However, doing so will yield better performance as well as avoiding
6226 * traps accessing the last bits in the bitmap.
6227 */
6228#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6229RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6230#else
6231DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6232{
6233# if RT_INLINE_ASM_USES_INTRIN
6234 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6235
6236# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6237# if RT_INLINE_ASM_GNU_STYLE
6238 __asm__ __volatile__("btrl %1, %0"
6239 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6240 : "Ir" (iBit)
6241 , "m" (*(volatile long RT_FAR *)pvBitmap)
6242 : "memory"
6243 , "cc");
6244# else
6245 __asm
6246 {
6247# ifdef RT_ARCH_AMD64
6248 mov rax, [pvBitmap]
6249 mov edx, [iBit]
6250 btr [rax], edx
6251# else
6252 mov eax, [pvBitmap]
6253 mov edx, [iBit]
6254 btr [eax], edx
6255# endif
6256 }
6257# endif
6258
6259# else
6260 int32_t offBitmap = iBit / 32;
6261 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6262 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6263# endif
6264}
6265#endif
6266
6267
6268/**
6269 * Atomically clears a bit in a bitmap, ordered.
6270 *
6271 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6272 * aligned, otherwise the memory access isn't atomic!
6273 * @param iBit The bit to toggle set.
6274 *
6275 * @remarks No memory barrier, take care on smp.
6276 * @remarks x86: Requires a 386 or later.
6277 */
6278#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6279RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6280#else
6281DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6282{
6283 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6284# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6285# if RT_INLINE_ASM_GNU_STYLE
6286 __asm__ __volatile__("lock; btrl %1, %0"
6287 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6288 : "Ir" (iBit)
6289 , "m" (*(volatile long RT_FAR *)pvBitmap)
6290 : "memory"
6291 , "cc");
6292# else
6293 __asm
6294 {
6295# ifdef RT_ARCH_AMD64
6296 mov rax, [pvBitmap]
6297 mov edx, [iBit]
6298 lock btr [rax], edx
6299# else
6300 mov eax, [pvBitmap]
6301 mov edx, [iBit]
6302 lock btr [eax], edx
6303# endif
6304 }
6305# endif
6306# else
6307 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6308# endif
6309}
6310#endif
6311
6312
6313/**
6314 * Toggles a bit in a bitmap.
6315 *
6316 * @param pvBitmap Pointer to the bitmap (little endian).
6317 * @param iBit The bit to toggle.
6318 *
6319 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6320 * However, doing so will yield better performance as well as avoiding
6321 * traps accessing the last bits in the bitmap.
6322 */
6323#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6324RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6325#else
6326DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6327{
6328# if RT_INLINE_ASM_USES_INTRIN
6329 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6330# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6331# if RT_INLINE_ASM_GNU_STYLE
6332 __asm__ __volatile__("btcl %1, %0"
6333 : "=m" (*(volatile long *)pvBitmap)
6334 : "Ir" (iBit)
6335 , "m" (*(volatile long *)pvBitmap)
6336 : "memory"
6337 , "cc");
6338# else
6339 __asm
6340 {
6341# ifdef RT_ARCH_AMD64
6342 mov rax, [pvBitmap]
6343 mov edx, [iBit]
6344 btc [rax], edx
6345# else
6346 mov eax, [pvBitmap]
6347 mov edx, [iBit]
6348 btc [eax], edx
6349# endif
6350 }
6351# endif
6352# else
6353 int32_t offBitmap = iBit / 32;
6354 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6355 ASMAtomicUoXorU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6356# endif
6357}
6358#endif
6359
6360
6361/**
6362 * Atomically toggles a bit in a bitmap, ordered.
6363 *
6364 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6365 * aligned, otherwise the memory access isn't atomic!
6366 * @param iBit The bit to test and set.
6367 *
6368 * @remarks x86: Requires a 386 or later.
6369 */
6370#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6371RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6372#else
6373DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6374{
6375 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6376# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6377# if RT_INLINE_ASM_GNU_STYLE
6378 __asm__ __volatile__("lock; btcl %1, %0"
6379 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6380 : "Ir" (iBit)
6381 , "m" (*(volatile long RT_FAR *)pvBitmap)
6382 : "memory"
6383 , "cc");
6384# else
6385 __asm
6386 {
6387# ifdef RT_ARCH_AMD64
6388 mov rax, [pvBitmap]
6389 mov edx, [iBit]
6390 lock btc [rax], edx
6391# else
6392 mov eax, [pvBitmap]
6393 mov edx, [iBit]
6394 lock btc [eax], edx
6395# endif
6396 }
6397# endif
6398# else
6399 ASMAtomicXorU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6400# endif
6401}
6402#endif
6403
6404
6405/**
6406 * Tests and sets a bit in a bitmap.
6407 *
6408 * @returns true if the bit was set.
6409 * @returns false if the bit was clear.
6410 *
6411 * @param pvBitmap Pointer to the bitmap (little endian).
6412 * @param iBit The bit to test and set.
6413 *
6414 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6415 * However, doing so will yield better performance as well as avoiding
6416 * traps accessing the last bits in the bitmap.
6417 */
6418#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6419RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6420#else
6421DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6422{
6423 union { bool f; uint32_t u32; uint8_t u8; } rc;
6424# if RT_INLINE_ASM_USES_INTRIN
6425 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
6426
6427# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6428# if RT_INLINE_ASM_GNU_STYLE
6429 __asm__ __volatile__("btsl %2, %1\n\t"
6430 "setc %b0\n\t"
6431 "andl $1, %0\n\t"
6432 : "=q" (rc.u32)
6433 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6434 : "Ir" (iBit)
6435 , "m" (*(volatile long RT_FAR *)pvBitmap)
6436 : "memory"
6437 , "cc");
6438# else
6439 __asm
6440 {
6441 mov edx, [iBit]
6442# ifdef RT_ARCH_AMD64
6443 mov rax, [pvBitmap]
6444 bts [rax], edx
6445# else
6446 mov eax, [pvBitmap]
6447 bts [eax], edx
6448# endif
6449 setc al
6450 and eax, 1
6451 mov [rc.u32], eax
6452 }
6453# endif
6454
6455# else
6456 int32_t offBitmap = iBit / 32;
6457 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6458 rc.u32 = RT_LE2H_U32(ASMAtomicUoOrExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6459 >> (iBit & 31);
6460 rc.u32 &= 1;
6461# endif
6462 return rc.f;
6463}
6464#endif
6465
6466
6467/**
6468 * Atomically tests and sets a bit in a bitmap, ordered.
6469 *
6470 * @returns true if the bit was set.
6471 * @returns false if the bit was clear.
6472 *
6473 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6474 * aligned, otherwise the memory access isn't atomic!
6475 * @param iBit The bit to set.
6476 *
6477 * @remarks x86: Requires a 386 or later.
6478 */
6479#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6480RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6481#else
6482DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6483{
6484 union { bool f; uint32_t u32; uint8_t u8; } rc;
6485 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6486# if RT_INLINE_ASM_USES_INTRIN
6487 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6488# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6489# if RT_INLINE_ASM_GNU_STYLE
6490 __asm__ __volatile__("lock; btsl %2, %1\n\t"
6491 "setc %b0\n\t"
6492 "andl $1, %0\n\t"
6493 : "=q" (rc.u32)
6494 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6495 : "Ir" (iBit)
6496 , "m" (*(volatile long RT_FAR *)pvBitmap)
6497 : "memory"
6498 , "cc");
6499# else
6500 __asm
6501 {
6502 mov edx, [iBit]
6503# ifdef RT_ARCH_AMD64
6504 mov rax, [pvBitmap]
6505 lock bts [rax], edx
6506# else
6507 mov eax, [pvBitmap]
6508 lock bts [eax], edx
6509# endif
6510 setc al
6511 and eax, 1
6512 mov [rc.u32], eax
6513 }
6514# endif
6515
6516# else
6517 rc.u32 = RT_LE2H_U32(ASMAtomicOrExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6518 >> (iBit & 31);
6519 rc.u32 &= 1;
6520# endif
6521 return rc.f;
6522}
6523#endif
6524
6525
6526/**
6527 * Tests and clears a bit in a bitmap.
6528 *
6529 * @returns true if the bit was set.
6530 * @returns false if the bit was clear.
6531 *
6532 * @param pvBitmap Pointer to the bitmap (little endian).
6533 * @param iBit The bit to test and clear.
6534 *
6535 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6536 * However, doing so will yield better performance as well as avoiding
6537 * traps accessing the last bits in the bitmap.
6538 */
6539#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6540RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6541#else
6542DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6543{
6544 union { bool f; uint32_t u32; uint8_t u8; } rc;
6545# if RT_INLINE_ASM_USES_INTRIN
6546 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6547
6548# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6549# if RT_INLINE_ASM_GNU_STYLE
6550 __asm__ __volatile__("btrl %2, %1\n\t"
6551 "setc %b0\n\t"
6552 "andl $1, %0\n\t"
6553 : "=q" (rc.u32)
6554 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6555 : "Ir" (iBit)
6556 , "m" (*(volatile long RT_FAR *)pvBitmap)
6557 : "memory"
6558 , "cc");
6559# else
6560 __asm
6561 {
6562 mov edx, [iBit]
6563# ifdef RT_ARCH_AMD64
6564 mov rax, [pvBitmap]
6565 btr [rax], edx
6566# else
6567 mov eax, [pvBitmap]
6568 btr [eax], edx
6569# endif
6570 setc al
6571 and eax, 1
6572 mov [rc.u32], eax
6573 }
6574# endif
6575
6576# else
6577 int32_t offBitmap = iBit / 32;
6578 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6579 rc.u32 = RT_LE2H_U32(ASMAtomicUoAndExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6580 >> (iBit & 31);
6581 rc.u32 &= 1;
6582# endif
6583 return rc.f;
6584}
6585#endif
6586
6587
6588/**
6589 * Atomically tests and clears a bit in a bitmap, ordered.
6590 *
6591 * @returns true if the bit was set.
6592 * @returns false if the bit was clear.
6593 *
6594 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6595 * aligned, otherwise the memory access isn't atomic!
6596 * @param iBit The bit to test and clear.
6597 *
6598 * @remarks No memory barrier, take care on smp.
6599 * @remarks x86: Requires a 386 or later.
6600 */
6601#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6602RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6603#else
6604DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6605{
6606 union { bool f; uint32_t u32; uint8_t u8; } rc;
6607 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6608# if RT_INLINE_ASM_USES_INTRIN
6609 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
6610
6611# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6612# if RT_INLINE_ASM_GNU_STYLE
6613 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6614 "setc %b0\n\t"
6615 "andl $1, %0\n\t"
6616 : "=q" (rc.u32)
6617 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6618 : "Ir" (iBit)
6619 , "m" (*(volatile long RT_FAR *)pvBitmap)
6620 : "memory"
6621 , "cc");
6622# else
6623 __asm
6624 {
6625 mov edx, [iBit]
6626# ifdef RT_ARCH_AMD64
6627 mov rax, [pvBitmap]
6628 lock btr [rax], edx
6629# else
6630 mov eax, [pvBitmap]
6631 lock btr [eax], edx
6632# endif
6633 setc al
6634 and eax, 1
6635 mov [rc.u32], eax
6636 }
6637# endif
6638
6639# else
6640 rc.u32 = RT_LE2H_U32(ASMAtomicAndExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6641 >> (iBit & 31);
6642 rc.u32 &= 1;
6643# endif
6644 return rc.f;
6645}
6646#endif
6647
6648
6649/**
6650 * Tests and toggles a bit in a bitmap.
6651 *
6652 * @returns true if the bit was set.
6653 * @returns false if the bit was clear.
6654 *
6655 * @param pvBitmap Pointer to the bitmap (little endian).
6656 * @param iBit The bit to test and toggle.
6657 *
6658 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6659 * However, doing so will yield better performance as well as avoiding
6660 * traps accessing the last bits in the bitmap.
6661 */
6662#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6663RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6664#else
6665DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6666{
6667 union { bool f; uint32_t u32; uint8_t u8; } rc;
6668# if RT_INLINE_ASM_USES_INTRIN
6669 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6670
6671# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6672# if RT_INLINE_ASM_GNU_STYLE
6673 __asm__ __volatile__("btcl %2, %1\n\t"
6674 "setc %b0\n\t"
6675 "andl $1, %0\n\t"
6676 : "=q" (rc.u32)
6677 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6678 : "Ir" (iBit)
6679 , "m" (*(volatile long RT_FAR *)pvBitmap)
6680 : "memory"
6681 , "cc");
6682# else
6683 __asm
6684 {
6685 mov edx, [iBit]
6686# ifdef RT_ARCH_AMD64
6687 mov rax, [pvBitmap]
6688 btc [rax], edx
6689# else
6690 mov eax, [pvBitmap]
6691 btc [eax], edx
6692# endif
6693 setc al
6694 and eax, 1
6695 mov [rc.u32], eax
6696 }
6697# endif
6698
6699# else
6700 int32_t offBitmap = iBit / 32;
6701 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6702 rc.u32 = RT_LE2H_U32(ASMAtomicUoXorExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6703 >> (iBit & 31);
6704 rc.u32 &= 1;
6705# endif
6706 return rc.f;
6707}
6708#endif
6709
6710
6711/**
6712 * Atomically tests and toggles a bit in a bitmap, ordered.
6713 *
6714 * @returns true if the bit was set.
6715 * @returns false if the bit was clear.
6716 *
6717 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6718 * aligned, otherwise the memory access isn't atomic!
6719 * @param iBit The bit to test and toggle.
6720 *
6721 * @remarks x86: Requires a 386 or later.
6722 */
6723#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6724RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6725#else
6726DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6727{
6728 union { bool f; uint32_t u32; uint8_t u8; } rc;
6729 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6730# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6731# if RT_INLINE_ASM_GNU_STYLE
6732 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6733 "setc %b0\n\t"
6734 "andl $1, %0\n\t"
6735 : "=q" (rc.u32)
6736 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6737 : "Ir" (iBit)
6738 , "m" (*(volatile long RT_FAR *)pvBitmap)
6739 : "memory"
6740 , "cc");
6741# else
6742 __asm
6743 {
6744 mov edx, [iBit]
6745# ifdef RT_ARCH_AMD64
6746 mov rax, [pvBitmap]
6747 lock btc [rax], edx
6748# else
6749 mov eax, [pvBitmap]
6750 lock btc [eax], edx
6751# endif
6752 setc al
6753 and eax, 1
6754 mov [rc.u32], eax
6755 }
6756# endif
6757
6758# else
6759 rc.u32 = RT_H2LE_U32(ASMAtomicXorExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_LE2H_U32(RT_BIT_32(iBit & 31))))
6760 >> (iBit & 31);
6761 rc.u32 &= 1;
6762# endif
6763 return rc.f;
6764}
6765#endif
6766
6767
6768/**
6769 * Tests if a bit in a bitmap is set.
6770 *
6771 * @returns true if the bit is set.
6772 * @returns false if the bit is clear.
6773 *
6774 * @param pvBitmap Pointer to the bitmap (little endian).
6775 * @param iBit The bit to test.
6776 *
6777 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6778 * However, doing so will yield better performance as well as avoiding
6779 * traps accessing the last bits in the bitmap.
6780 */
6781#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6782RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6783#else
6784DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6785{
6786 union { bool f; uint32_t u32; uint8_t u8; } rc;
6787# if RT_INLINE_ASM_USES_INTRIN
6788 rc.u32 = _bittest((long *)pvBitmap, iBit);
6789
6790# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6791# if RT_INLINE_ASM_GNU_STYLE
6792
6793 __asm__ __volatile__("btl %2, %1\n\t"
6794 "setc %b0\n\t"
6795 "andl $1, %0\n\t"
6796 : "=q" (rc.u32)
6797 : "m" (*(const volatile long RT_FAR *)pvBitmap)
6798 , "Ir" (iBit)
6799 : "memory"
6800 , "cc");
6801# else
6802 __asm
6803 {
6804 mov edx, [iBit]
6805# ifdef RT_ARCH_AMD64
6806 mov rax, [pvBitmap]
6807 bt [rax], edx
6808# else
6809 mov eax, [pvBitmap]
6810 bt [eax], edx
6811# endif
6812 setc al
6813 and eax, 1
6814 mov [rc.u32], eax
6815 }
6816# endif
6817
6818# else
6819 int32_t offBitmap = iBit / 32;
6820 AssertRelease(!((uintptr_t)pvBitmap & (sizeof(uint32_t) - 1)));
6821 rc.u32 = RT_LE2H_U32(ASMAtomicUoReadU32(&((uint32_t volatile *)pvBitmap)[offBitmap])) >> (iBit & 31);
6822 rc.u32 &= 1;
6823# endif
6824 return rc.f;
6825}
6826#endif
6827
6828
6829/**
6830 * Clears a bit range within a bitmap.
6831 *
6832 * @param pvBitmap Pointer to the bitmap (little endian).
6833 * @param iBitStart The First bit to clear.
6834 * @param iBitEnd The first bit not to clear.
6835 */
6836DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, size_t iBitStart, size_t iBitEnd) RT_NOTHROW_DEF
6837{
6838 if (iBitStart < iBitEnd)
6839 {
6840 uint32_t volatile RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6841 size_t iStart = iBitStart & ~(size_t)31;
6842 size_t iEnd = iBitEnd & ~(size_t)31;
6843 if (iStart == iEnd)
6844 *pu32 &= RT_H2LE_U32(((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6845 else
6846 {
6847 /* bits in first dword. */
6848 if (iBitStart & 31)
6849 {
6850 *pu32 &= RT_H2LE_U32((UINT32_C(1) << (iBitStart & 31)) - 1);
6851 pu32++;
6852 iBitStart = iStart + 32;
6853 }
6854
6855 /* whole dwords. */
6856 if (iBitStart != iEnd)
6857 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6858
6859 /* bits in last dword. */
6860 if (iBitEnd & 31)
6861 {
6862 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6863 *pu32 &= RT_H2LE_U32(~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6864 }
6865 }
6866 }
6867}
6868
6869
6870/**
6871 * Sets a bit range within a bitmap.
6872 *
6873 * @param pvBitmap Pointer to the bitmap (little endian).
6874 * @param iBitStart The First bit to set.
6875 * @param iBitEnd The first bit not to set.
6876 */
6877DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, size_t iBitStart, size_t iBitEnd) RT_NOTHROW_DEF
6878{
6879 if (iBitStart < iBitEnd)
6880 {
6881 uint32_t volatile RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6882 size_t iStart = iBitStart & ~(size_t)31;
6883 size_t iEnd = iBitEnd & ~(size_t)31;
6884 if (iStart == iEnd)
6885 *pu32 |= RT_H2LE_U32(((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31));
6886 else
6887 {
6888 /* bits in first dword. */
6889 if (iBitStart & 31)
6890 {
6891 *pu32 |= RT_H2LE_U32(~((UINT32_C(1) << (iBitStart & 31)) - 1));
6892 pu32++;
6893 iBitStart = iStart + 32;
6894 }
6895
6896 /* whole dword. */
6897 if (iBitStart != iEnd)
6898 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
6899
6900 /* bits in last dword. */
6901 if (iBitEnd & 31)
6902 {
6903 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6904 *pu32 |= RT_H2LE_U32((UINT32_C(1) << (iBitEnd & 31)) - 1);
6905 }
6906 }
6907 }
6908}
6909
6910
6911/**
6912 * Finds the first clear bit in a bitmap.
6913 *
6914 * @returns Index of the first zero bit.
6915 * @returns -1 if no clear bit was found.
6916 * @param pvBitmap Pointer to the bitmap (little endian).
6917 * @param cBits The number of bits in the bitmap. Multiple of 32.
6918 */
6919#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6920DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6921#else
6922DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6923{
6924 if (cBits)
6925 {
6926 int32_t iBit;
6927# if RT_INLINE_ASM_GNU_STYLE
6928 RTCCUINTREG uEAX, uECX, uEDI;
6929 cBits = RT_ALIGN_32(cBits, 32);
6930 __asm__ __volatile__("repe; scasl\n\t"
6931 "je 1f\n\t"
6932# ifdef RT_ARCH_AMD64
6933 "lea -4(%%rdi), %%rdi\n\t"
6934 "xorl (%%rdi), %%eax\n\t"
6935 "subq %5, %%rdi\n\t"
6936# else
6937 "lea -4(%%edi), %%edi\n\t"
6938 "xorl (%%edi), %%eax\n\t"
6939 "subl %5, %%edi\n\t"
6940# endif
6941 "shll $3, %%edi\n\t"
6942 "bsfl %%eax, %%edx\n\t"
6943 "addl %%edi, %%edx\n\t"
6944 "1:\t\n"
6945 : "=d" (iBit)
6946 , "=&c" (uECX)
6947 , "=&D" (uEDI)
6948 , "=&a" (uEAX)
6949 : "0" (0xffffffff)
6950 , "mr" (pvBitmap)
6951 , "1" (cBits >> 5)
6952 , "2" (pvBitmap)
6953 , "3" (0xffffffff)
6954 : "cc");
6955# else
6956 cBits = RT_ALIGN_32(cBits, 32);
6957 __asm
6958 {
6959# ifdef RT_ARCH_AMD64
6960 mov rdi, [pvBitmap]
6961 mov rbx, rdi
6962# else
6963 mov edi, [pvBitmap]
6964 mov ebx, edi
6965# endif
6966 mov edx, 0ffffffffh
6967 mov eax, edx
6968 mov ecx, [cBits]
6969 shr ecx, 5
6970 repe scasd
6971 je done
6972
6973# ifdef RT_ARCH_AMD64
6974 lea rdi, [rdi - 4]
6975 xor eax, [rdi]
6976 sub rdi, rbx
6977# else
6978 lea edi, [edi - 4]
6979 xor eax, [edi]
6980 sub edi, ebx
6981# endif
6982 shl edi, 3
6983 bsf edx, eax
6984 add edx, edi
6985 done:
6986 mov [iBit], edx
6987 }
6988# endif
6989 return iBit;
6990 }
6991 return -1;
6992}
6993#endif
6994
6995
6996/**
6997 * Finds the next clear bit in a bitmap.
6998 *
6999 * @returns Index of the first zero bit.
7000 * @returns -1 if no clear bit was found.
7001 * @param pvBitmap Pointer to the bitmap (little endian).
7002 * @param cBits The number of bits in the bitmap. Multiple of 32.
7003 * @param iBitPrev The bit returned from the last search.
7004 * The search will start at iBitPrev + 1.
7005 */
7006#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7007DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
7008#else
7009DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
7010{
7011 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
7012 int iBit = ++iBitPrev & 31;
7013 if (iBit)
7014 {
7015 /*
7016 * Inspect the 32-bit word containing the unaligned bit.
7017 */
7018 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
7019
7020# if RT_INLINE_ASM_USES_INTRIN
7021 unsigned long ulBit = 0;
7022 if (_BitScanForward(&ulBit, u32))
7023 return ulBit + iBitPrev;
7024# else
7025# if RT_INLINE_ASM_GNU_STYLE
7026 __asm__ __volatile__("bsf %1, %0\n\t"
7027 "jnz 1f\n\t"
7028 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
7029 "1:\n\t"
7030 : "=r" (iBit)
7031 : "r" (u32)
7032 : "cc");
7033# else
7034 __asm
7035 {
7036 mov edx, [u32]
7037 bsf eax, edx
7038 jnz done
7039 mov eax, 0ffffffffh
7040 done:
7041 mov [iBit], eax
7042 }
7043# endif
7044 if (iBit >= 0)
7045 return iBit + (int)iBitPrev;
7046# endif
7047
7048 /*
7049 * Skip ahead and see if there is anything left to search.
7050 */
7051 iBitPrev |= 31;
7052 iBitPrev++;
7053 if (cBits <= (uint32_t)iBitPrev)
7054 return -1;
7055 }
7056
7057 /*
7058 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7059 */
7060 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7061 if (iBit >= 0)
7062 iBit += iBitPrev;
7063 return iBit;
7064}
7065#endif
7066
7067
7068/**
7069 * Finds the first set bit in a bitmap.
7070 *
7071 * @returns Index of the first set bit.
7072 * @returns -1 if no clear bit was found.
7073 * @param pvBitmap Pointer to the bitmap (little endian).
7074 * @param cBits The number of bits in the bitmap. Multiple of 32.
7075 */
7076#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7077DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
7078#else
7079DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
7080{
7081 if (cBits)
7082 {
7083 int32_t iBit;
7084# if RT_INLINE_ASM_GNU_STYLE
7085 RTCCUINTREG uEAX, uECX, uEDI;
7086 cBits = RT_ALIGN_32(cBits, 32);
7087 __asm__ __volatile__("repe; scasl\n\t"
7088 "je 1f\n\t"
7089# ifdef RT_ARCH_AMD64
7090 "lea -4(%%rdi), %%rdi\n\t"
7091 "movl (%%rdi), %%eax\n\t"
7092 "subq %5, %%rdi\n\t"
7093# else
7094 "lea -4(%%edi), %%edi\n\t"
7095 "movl (%%edi), %%eax\n\t"
7096 "subl %5, %%edi\n\t"
7097# endif
7098 "shll $3, %%edi\n\t"
7099 "bsfl %%eax, %%edx\n\t"
7100 "addl %%edi, %%edx\n\t"
7101 "1:\t\n"
7102 : "=d" (iBit)
7103 , "=&c" (uECX)
7104 , "=&D" (uEDI)
7105 , "=&a" (uEAX)
7106 : "0" (0xffffffff)
7107 , "mr" (pvBitmap)
7108 , "1" (cBits >> 5)
7109 , "2" (pvBitmap)
7110 , "3" (0)
7111 : "cc");
7112# else
7113 cBits = RT_ALIGN_32(cBits, 32);
7114 __asm
7115 {
7116# ifdef RT_ARCH_AMD64
7117 mov rdi, [pvBitmap]
7118 mov rbx, rdi
7119# else
7120 mov edi, [pvBitmap]
7121 mov ebx, edi
7122# endif
7123 mov edx, 0ffffffffh
7124 xor eax, eax
7125 mov ecx, [cBits]
7126 shr ecx, 5
7127 repe scasd
7128 je done
7129# ifdef RT_ARCH_AMD64
7130 lea rdi, [rdi - 4]
7131 mov eax, [rdi]
7132 sub rdi, rbx
7133# else
7134 lea edi, [edi - 4]
7135 mov eax, [edi]
7136 sub edi, ebx
7137# endif
7138 shl edi, 3
7139 bsf edx, eax
7140 add edx, edi
7141 done:
7142 mov [iBit], edx
7143 }
7144# endif
7145 return iBit;
7146 }
7147 return -1;
7148}
7149#endif
7150
7151
7152/**
7153 * Finds the next set bit in a bitmap.
7154 *
7155 * @returns Index of the next set bit.
7156 * @returns -1 if no set bit was found.
7157 * @param pvBitmap Pointer to the bitmap (little endian).
7158 * @param cBits The number of bits in the bitmap. Multiple of 32.
7159 * @param iBitPrev The bit returned from the last search.
7160 * The search will start at iBitPrev + 1.
7161 */
7162#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7163DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
7164#else
7165DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
7166{
7167 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
7168 int iBit = ++iBitPrev & 31;
7169 if (iBit)
7170 {
7171 /*
7172 * Inspect the 32-bit word containing the unaligned bit.
7173 */
7174 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
7175
7176# if RT_INLINE_ASM_USES_INTRIN
7177 unsigned long ulBit = 0;
7178 if (_BitScanForward(&ulBit, u32))
7179 return ulBit + iBitPrev;
7180# else
7181# if RT_INLINE_ASM_GNU_STYLE
7182 __asm__ __volatile__("bsf %1, %0\n\t"
7183 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
7184 "movl $-1, %0\n\t"
7185 "1:\n\t"
7186 : "=r" (iBit)
7187 : "r" (u32)
7188 : "cc");
7189# else
7190 __asm
7191 {
7192 mov edx, [u32]
7193 bsf eax, edx
7194 jnz done
7195 mov eax, 0ffffffffh
7196 done:
7197 mov [iBit], eax
7198 }
7199# endif
7200 if (iBit >= 0)
7201 return iBit + (int)iBitPrev;
7202# endif
7203
7204 /*
7205 * Skip ahead and see if there is anything left to search.
7206 */
7207 iBitPrev |= 31;
7208 iBitPrev++;
7209 if (cBits <= (uint32_t)iBitPrev)
7210 return -1;
7211 }
7212
7213 /*
7214 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7215 */
7216 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7217 if (iBit >= 0)
7218 iBit += iBitPrev;
7219 return iBit;
7220}
7221#endif
7222
7223
7224/**
7225 * Finds the first bit which is set in the given 32-bit integer.
7226 * Bits are numbered from 1 (least significant) to 32.
7227 *
7228 * @returns index [1..32] of the first set bit.
7229 * @returns 0 if all bits are cleared.
7230 * @param u32 Integer to search for set bits.
7231 * @remarks Similar to ffs() in BSD.
7232 */
7233#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7234RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7235#else
7236DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
7237{
7238# if RT_INLINE_ASM_USES_INTRIN
7239 unsigned long iBit;
7240 if (_BitScanForward(&iBit, u32))
7241 iBit++;
7242 else
7243 iBit = 0;
7244
7245# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7246# if RT_INLINE_ASM_GNU_STYLE
7247 uint32_t iBit;
7248 __asm__ __volatile__("bsf %1, %0\n\t"
7249 "jnz 1f\n\t"
7250 "xorl %0, %0\n\t"
7251 "jmp 2f\n"
7252 "1:\n\t"
7253 "incl %0\n"
7254 "2:\n\t"
7255 : "=r" (iBit)
7256 : "rm" (u32)
7257 : "cc");
7258# else
7259 uint32_t iBit;
7260 _asm
7261 {
7262 bsf eax, [u32]
7263 jnz found
7264 xor eax, eax
7265 jmp done
7266 found:
7267 inc eax
7268 done:
7269 mov [iBit], eax
7270 }
7271# endif
7272
7273# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7274 /*
7275 * Using the "count leading zeros (clz)" instruction here because there
7276 * is no dedicated instruction to get the first set bit.
7277 * Need to reverse the bits in the value with "rbit" first because
7278 * "clz" starts counting from the most significant bit.
7279 */
7280 uint32_t iBit;
7281 __asm__ __volatile__(
7282# if defined(RT_ARCH_ARM64)
7283 "rbit %w[uVal], %w[uVal]\n\t"
7284 "clz %w[iBit], %w[uVal]\n\t"
7285# else
7286 "rbit %[uVal], %[uVal]\n\t"
7287 "clz %[iBit], %[uVal]\n\t"
7288# endif
7289 : [uVal] "=r" (u32)
7290 , [iBit] "=r" (iBit)
7291 : "[uVal]" (u32));
7292 if (iBit != 32)
7293 iBit++;
7294 else
7295 iBit = 0; /* No bit set. */
7296
7297# else
7298# error "Port me"
7299# endif
7300 return iBit;
7301}
7302#endif
7303
7304
7305/**
7306 * Finds the first bit which is set in the given 32-bit integer.
7307 * Bits are numbered from 1 (least significant) to 32.
7308 *
7309 * @returns index [1..32] of the first set bit.
7310 * @returns 0 if all bits are cleared.
7311 * @param i32 Integer to search for set bits.
7312 * @remark Similar to ffs() in BSD.
7313 */
7314DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
7315{
7316 return ASMBitFirstSetU32((uint32_t)i32);
7317}
7318
7319
7320/**
7321 * Finds the first bit which is set in the given 64-bit integer.
7322 *
7323 * Bits are numbered from 1 (least significant) to 64.
7324 *
7325 * @returns index [1..64] of the first set bit.
7326 * @returns 0 if all bits are cleared.
7327 * @param u64 Integer to search for set bits.
7328 * @remarks Similar to ffs() in BSD.
7329 */
7330#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7331RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7332#else
7333DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
7334{
7335# if RT_INLINE_ASM_USES_INTRIN
7336 unsigned long iBit;
7337# if ARCH_BITS == 64
7338 if (_BitScanForward64(&iBit, u64))
7339 iBit++;
7340 else
7341 iBit = 0;
7342# else
7343 if (_BitScanForward(&iBit, (uint32_t)u64))
7344 iBit++;
7345 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
7346 iBit += 33;
7347 else
7348 iBit = 0;
7349# endif
7350
7351# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7352 uint64_t iBit;
7353 __asm__ __volatile__("bsfq %1, %0\n\t"
7354 "jnz 1f\n\t"
7355 "xorl %k0, %k0\n\t"
7356 "jmp 2f\n"
7357 "1:\n\t"
7358 "incl %k0\n"
7359 "2:\n\t"
7360 : "=r" (iBit)
7361 : "rm" (u64)
7362 : "cc");
7363
7364# elif defined(RT_ARCH_ARM64)
7365 uint64_t iBit;
7366 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
7367 "clz %[iBit], %[uVal]\n\t"
7368 : [uVal] "=r" (u64)
7369 , [iBit] "=r" (iBit)
7370 : "[uVal]" (u64));
7371 if (iBit != 64)
7372 iBit++;
7373 else
7374 iBit = 0; /* No bit set. */
7375
7376# else
7377 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
7378 if (!iBit)
7379 {
7380 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
7381 if (iBit)
7382 iBit += 32;
7383 }
7384# endif
7385 return (unsigned)iBit;
7386}
7387#endif
7388
7389
7390/**
7391 * Finds the first bit which is set in the given 16-bit integer.
7392 *
7393 * Bits are numbered from 1 (least significant) to 16.
7394 *
7395 * @returns index [1..16] of the first set bit.
7396 * @returns 0 if all bits are cleared.
7397 * @param u16 Integer to search for set bits.
7398 * @remarks For 16-bit bs3kit code.
7399 */
7400#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7401RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7402#else
7403DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
7404{
7405 return ASMBitFirstSetU32((uint32_t)u16);
7406}
7407#endif
7408
7409
7410/**
7411 * Finds the last bit which is set in the given 32-bit integer.
7412 * Bits are numbered from 1 (least significant) to 32.
7413 *
7414 * @returns index [1..32] of the last set bit.
7415 * @returns 0 if all bits are cleared.
7416 * @param u32 Integer to search for set bits.
7417 * @remark Similar to fls() in BSD.
7418 */
7419#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7420RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7421#else
7422DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
7423{
7424# if RT_INLINE_ASM_USES_INTRIN
7425 unsigned long iBit;
7426 if (_BitScanReverse(&iBit, u32))
7427 iBit++;
7428 else
7429 iBit = 0;
7430
7431# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7432# if RT_INLINE_ASM_GNU_STYLE
7433 uint32_t iBit;
7434 __asm__ __volatile__("bsrl %1, %0\n\t"
7435 "jnz 1f\n\t"
7436 "xorl %0, %0\n\t"
7437 "jmp 2f\n"
7438 "1:\n\t"
7439 "incl %0\n"
7440 "2:\n\t"
7441 : "=r" (iBit)
7442 : "rm" (u32)
7443 : "cc");
7444# else
7445 uint32_t iBit;
7446 _asm
7447 {
7448 bsr eax, [u32]
7449 jnz found
7450 xor eax, eax
7451 jmp done
7452 found:
7453 inc eax
7454 done:
7455 mov [iBit], eax
7456 }
7457# endif
7458
7459# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7460 uint32_t iBit;
7461 __asm__ __volatile__(
7462# if defined(RT_ARCH_ARM64)
7463 "clz %w[iBit], %w[uVal]\n\t"
7464# else
7465 "clz %[iBit], %[uVal]\n\t"
7466# endif
7467 : [iBit] "=r" (iBit)
7468 : [uVal] "r" (u32));
7469 iBit = 32 - iBit;
7470
7471# else
7472# error "Port me"
7473# endif
7474 return iBit;
7475}
7476#endif
7477
7478
7479/**
7480 * Finds the last bit which is set in the given 32-bit integer.
7481 * Bits are numbered from 1 (least significant) to 32.
7482 *
7483 * @returns index [1..32] of the last set bit.
7484 * @returns 0 if all bits are cleared.
7485 * @param i32 Integer to search for set bits.
7486 * @remark Similar to fls() in BSD.
7487 */
7488DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
7489{
7490 return ASMBitLastSetU32((uint32_t)i32);
7491}
7492
7493
7494/**
7495 * Finds the last bit which is set in the given 64-bit integer.
7496 *
7497 * Bits are numbered from 1 (least significant) to 64.
7498 *
7499 * @returns index [1..64] of the last set bit.
7500 * @returns 0 if all bits are cleared.
7501 * @param u64 Integer to search for set bits.
7502 * @remark Similar to fls() in BSD.
7503 */
7504#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7505RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7506#else
7507DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
7508{
7509# if RT_INLINE_ASM_USES_INTRIN
7510 unsigned long iBit;
7511# if ARCH_BITS == 64
7512 if (_BitScanReverse64(&iBit, u64))
7513 iBit++;
7514 else
7515 iBit = 0;
7516# else
7517 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7518 iBit += 33;
7519 else if (_BitScanReverse(&iBit, (uint32_t)u64))
7520 iBit++;
7521 else
7522 iBit = 0;
7523# endif
7524
7525# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7526 uint64_t iBit;
7527 __asm__ __volatile__("bsrq %1, %0\n\t"
7528 "jnz 1f\n\t"
7529 "xorl %k0, %k0\n\t"
7530 "jmp 2f\n"
7531 "1:\n\t"
7532 "incl %k0\n"
7533 "2:\n\t"
7534 : "=r" (iBit)
7535 : "rm" (u64)
7536 : "cc");
7537
7538# elif defined(RT_ARCH_ARM64)
7539 uint64_t iBit;
7540 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7541 : [iBit] "=r" (iBit)
7542 : [uVal] "r" (u64));
7543 iBit = 64 - iBit;
7544
7545# else
7546 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
7547 if (iBit)
7548 iBit += 32;
7549 else
7550 iBit = ASMBitLastSetU32((uint32_t)u64);
7551# endif
7552 return (unsigned)iBit;
7553}
7554#endif
7555
7556
7557/**
7558 * Finds the last bit which is set in the given 16-bit integer.
7559 *
7560 * Bits are numbered from 1 (least significant) to 16.
7561 *
7562 * @returns index [1..16] of the last set bit.
7563 * @returns 0 if all bits are cleared.
7564 * @param u16 Integer to search for set bits.
7565 * @remarks For 16-bit bs3kit code.
7566 */
7567#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7568RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7569#else
7570DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
7571{
7572 return ASMBitLastSetU32((uint32_t)u16);
7573}
7574#endif
7575
7576
7577/**
7578 * Count the number of leading zero bits in the given 32-bit integer.
7579 *
7580 * The counting starts with the most significate bit.
7581 *
7582 * @returns Number of most significant zero bits.
7583 * @returns 32 if all bits are cleared.
7584 * @param u32 Integer to consider.
7585 * @remarks Similar to __builtin_clz() in gcc, except defined zero input result.
7586 */
7587#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7588RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU32(uint32_t u32) RT_NOTHROW_PROTO;
7589#else
7590DECLINLINE(unsigned) ASMCountLeadingZerosU32(uint32_t u32) RT_NOTHROW_DEF
7591{
7592# if RT_INLINE_ASM_USES_INTRIN
7593 unsigned long iBit;
7594 if (!_BitScanReverse(&iBit, u32))
7595 return 32;
7596 return 31 - (unsigned)iBit;
7597
7598# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7599 uint32_t iBit;
7600# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) && 0 /* significantly slower on 10980xe; 929 vs 237 ps/call */
7601 __asm__ __volatile__("bsrl %1, %0\n\t"
7602 "cmovzl %2, %0\n\t"
7603 : "=&r" (iBit)
7604 : "rm" (u32)
7605 , "rm" ((int32_t)-1)
7606 : "cc");
7607# elif RT_INLINE_ASM_GNU_STYLE
7608 __asm__ __volatile__("bsr %1, %0\n\t"
7609 "jnz 1f\n\t"
7610 "mov $-1, %0\n\t"
7611 "1:\n\t"
7612 : "=r" (iBit)
7613 : "rm" (u32)
7614 : "cc");
7615# else
7616 _asm
7617 {
7618 bsr eax, [u32]
7619 jnz found
7620 mov eax, -1
7621 found:
7622 mov [iBit], eax
7623 }
7624# endif
7625 return 31 - iBit;
7626
7627# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7628 uint32_t iBit;
7629 __asm__ __volatile__(
7630# if defined(RT_ARCH_ARM64)
7631 "clz %w[iBit], %w[uVal]\n\t"
7632# else
7633 "clz %[iBit], %[uVal]\n\t"
7634# endif
7635 : [uVal] "=r" (u32)
7636 , [iBit] "=r" (iBit)
7637 : "[uVal]" (u32));
7638 return iBit;
7639
7640# elif defined(__GNUC__)
7641 AssertCompile(sizeof(u32) == sizeof(unsigned int));
7642 return u32 ? __builtin_clz(u32) : 32;
7643
7644# else
7645# error "Port me"
7646# endif
7647}
7648#endif
7649
7650
7651/**
7652 * Count the number of leading zero bits in the given 64-bit integer.
7653 *
7654 * The counting starts with the most significate bit.
7655 *
7656 * @returns Number of most significant zero bits.
7657 * @returns 64 if all bits are cleared.
7658 * @param u64 Integer to consider.
7659 * @remarks Similar to __builtin_clzl() in gcc, except defined zero input
7660 * result.
7661 */
7662#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7663RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU64(uint64_t u64) RT_NOTHROW_PROTO;
7664#else
7665DECLINLINE(unsigned) ASMCountLeadingZerosU64(uint64_t u64) RT_NOTHROW_DEF
7666{
7667# if RT_INLINE_ASM_USES_INTRIN
7668 unsigned long iBit;
7669# if ARCH_BITS == 64
7670 if (_BitScanReverse64(&iBit, u64))
7671 return 63 - (unsigned)iBit;
7672# else
7673 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7674 return 31 - (unsigned)iBit;
7675 if (_BitScanReverse(&iBit, (uint32_t)u64))
7676 return 63 - (unsigned)iBit;
7677# endif
7678 return 64;
7679
7680# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7681 uint64_t iBit;
7682# if 0 /* 10980xe benchmark: 932 ps/call - the slower variant */
7683 __asm__ __volatile__("bsrq %1, %0\n\t"
7684 "cmovzq %2, %0\n\t"
7685 : "=&r" (iBit)
7686 : "rm" (u64)
7687 , "rm" ((int64_t)-1)
7688 : "cc");
7689# else /* 10980xe benchmark: 262 ps/call */
7690 __asm__ __volatile__("bsrq %1, %0\n\t"
7691 "jnz 1f\n\t"
7692 "mov $-1, %0\n\t"
7693 "1:\n\t"
7694 : "=&r" (iBit)
7695 : "rm" (u64)
7696 : "cc");
7697# endif
7698 return 63 - (unsigned)iBit;
7699
7700# elif defined(RT_ARCH_ARM64)
7701 uint64_t iBit;
7702 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7703 : [uVal] "=r" (u64)
7704 , [iBit] "=r" (iBit)
7705 : "[uVal]" (u64));
7706 return (unsigned)iBit;
7707
7708# elif defined(__GNUC__) && ARCH_BITS == 64
7709 AssertCompile(sizeof(u64) == sizeof(unsigned long));
7710 return u64 ? __builtin_clzl(u64) : 64;
7711
7712# else
7713 unsigned iBit = ASMCountLeadingZerosU32((uint32_t)(u64 >> 32));
7714 if (iBit == 32)
7715 iBit = ASMCountLeadingZerosU32((uint32_t)u64) + 32;
7716 return iBit;
7717# endif
7718}
7719#endif
7720
7721
7722/**
7723 * Count the number of leading zero bits in the given 16-bit integer.
7724 *
7725 * The counting starts with the most significate bit.
7726 *
7727 * @returns Number of most significant zero bits.
7728 * @returns 16 if all bits are cleared.
7729 * @param u16 Integer to consider.
7730 */
7731#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7732RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU16(uint16_t u16) RT_NOTHROW_PROTO;
7733#else
7734DECLINLINE(unsigned) ASMCountLeadingZerosU16(uint16_t u16) RT_NOTHROW_DEF
7735{
7736# if RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && 0 /* slower (10980xe: 987 vs 292 ps/call) */
7737 uint16_t iBit;
7738 __asm__ __volatile__("bsrw %1, %0\n\t"
7739 "jnz 1f\n\t"
7740 "mov $-1, %0\n\t"
7741 "1:\n\t"
7742 : "=r" (iBit)
7743 : "rm" (u16)
7744 : "cc");
7745 return 15 - (int16_t)iBit;
7746# else
7747 return ASMCountLeadingZerosU32((uint32_t)u16) - 16;
7748# endif
7749}
7750#endif
7751
7752
7753/**
7754 * Count the number of trailing zero bits in the given 32-bit integer.
7755 *
7756 * The counting starts with the least significate bit, i.e. the zero bit.
7757 *
7758 * @returns Number of lest significant zero bits.
7759 * @returns 32 if all bits are cleared.
7760 * @param u32 Integer to consider.
7761 * @remarks Similar to __builtin_ctz() in gcc, except defined zero input result.
7762 */
7763#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7764RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU32(uint32_t u32) RT_NOTHROW_PROTO;
7765#else
7766DECLINLINE(unsigned) ASMCountTrailingZerosU32(uint32_t u32) RT_NOTHROW_DEF
7767{
7768# if RT_INLINE_ASM_USES_INTRIN
7769 unsigned long iBit;
7770 if (!_BitScanForward(&iBit, u32))
7771 return 32;
7772 return (unsigned)iBit;
7773
7774# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7775 uint32_t iBit;
7776# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) && 0 /* significantly slower on 10980xe; 932 vs 240 ps/call */
7777 __asm__ __volatile__("bsfl %1, %0\n\t"
7778 "cmovzl %2, %0\n\t"
7779 : "=&r" (iBit)
7780 : "rm" (u32)
7781 , "rm" ((int32_t)32)
7782 : "cc");
7783# elif RT_INLINE_ASM_GNU_STYLE
7784 __asm__ __volatile__("bsfl %1, %0\n\t"
7785 "jnz 1f\n\t"
7786 "mov $32, %0\n\t"
7787 "1:\n\t"
7788 : "=r" (iBit)
7789 : "rm" (u32)
7790 : "cc");
7791# else
7792 _asm
7793 {
7794 bsf eax, [u32]
7795 jnz found
7796 mov eax, 32
7797 found:
7798 mov [iBit], eax
7799 }
7800# endif
7801 return iBit;
7802
7803# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7804 /* Invert the bits and use clz. */
7805 uint32_t iBit;
7806 __asm__ __volatile__(
7807# if defined(RT_ARCH_ARM64)
7808 "rbit %w[uVal], %w[uVal]\n\t"
7809 "clz %w[iBit], %w[uVal]\n\t"
7810# else
7811 "rbit %[uVal], %[uVal]\n\t"
7812 "clz %[iBit], %[uVal]\n\t"
7813# endif
7814 : [uVal] "=r" (u32)
7815 , [iBit] "=r" (iBit)
7816 : "[uVal]" (u32));
7817 return iBit;
7818
7819# elif defined(__GNUC__)
7820 AssertCompile(sizeof(u32) == sizeof(unsigned int));
7821 return u32 ? __builtin_ctz(u32) : 32;
7822
7823# else
7824# error "Port me"
7825# endif
7826}
7827#endif
7828
7829
7830/**
7831 * Count the number of trailing zero bits in the given 64-bit integer.
7832 *
7833 * The counting starts with the least significate bit.
7834 *
7835 * @returns Number of least significant zero bits.
7836 * @returns 64 if all bits are cleared.
7837 * @param u64 Integer to consider.
7838 * @remarks Similar to __builtin_ctzl() in gcc, except defined zero input
7839 * result.
7840 */
7841#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7842RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU64(uint64_t u64) RT_NOTHROW_PROTO;
7843#else
7844DECLINLINE(unsigned) ASMCountTrailingZerosU64(uint64_t u64) RT_NOTHROW_DEF
7845{
7846# if RT_INLINE_ASM_USES_INTRIN
7847 unsigned long iBit;
7848# if ARCH_BITS == 64
7849 if (_BitScanForward64(&iBit, u64))
7850 return (unsigned)iBit;
7851# else
7852 if (_BitScanForward(&iBit, (uint32_t)u64))
7853 return (unsigned)iBit;
7854 if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
7855 return (unsigned)iBit + 32;
7856# endif
7857 return 64;
7858
7859# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7860 uint64_t iBit;
7861# if 0 /* 10980xe benchmark: 932 ps/call - the slower variant */
7862 __asm__ __volatile__("bsfq %1, %0\n\t"
7863 "cmovzq %2, %0\n\t"
7864 : "=&r" (iBit)
7865 : "rm" (u64)
7866 , "rm" ((int64_t)64)
7867 : "cc");
7868# else /* 10980xe benchmark: 262 ps/call */
7869 __asm__ __volatile__("bsfq %1, %0\n\t"
7870 "jnz 1f\n\t"
7871 "mov $64, %0\n\t"
7872 "1:\n\t"
7873 : "=&r" (iBit)
7874 : "rm" (u64)
7875 : "cc");
7876# endif
7877 return (unsigned)iBit;
7878
7879# elif defined(RT_ARCH_ARM64)
7880 /* Invert the bits and use clz. */
7881 uint64_t iBit;
7882 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
7883 "clz %[iBit], %[uVal]\n\t"
7884 : [uVal] "=r" (u64)
7885 , [iBit] "=r" (iBit)
7886 : "[uVal]" (u64));
7887 return (unsigned)iBit;
7888
7889# elif defined(__GNUC__) && ARCH_BITS == 64
7890 AssertCompile(sizeof(u64) == sizeof(unsigned long));
7891 return u64 ? __builtin_ctzl(u64) : 64;
7892
7893# else
7894 unsigned iBit = ASMCountTrailingZerosU32((uint32_t)u64);
7895 if (iBit == 32)
7896 iBit = ASMCountTrailingZerosU32((uint32_t)(u64 >> 32)) + 32;
7897 return iBit;
7898# endif
7899}
7900#endif
7901
7902
7903/**
7904 * Count the number of trailing zero bits in the given 16-bit integer.
7905 *
7906 * The counting starts with the most significate bit.
7907 *
7908 * @returns Number of most significant zero bits.
7909 * @returns 16 if all bits are cleared.
7910 * @param u16 Integer to consider.
7911 */
7912#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7913RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU16(uint16_t u16) RT_NOTHROW_PROTO;
7914#else
7915DECLINLINE(unsigned) ASMCountTrailingZerosU16(uint16_t u16) RT_NOTHROW_DEF
7916{
7917# if RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && 0 /* slower (10980xe: 992 vs 349 ps/call) */
7918 uint16_t iBit;
7919 __asm__ __volatile__("bsfw %1, %0\n\t"
7920 "jnz 1f\n\t"
7921 "mov $16, %0\n\t"
7922 "1:\n\t"
7923 : "=r" (iBit)
7924 : "rm" (u16)
7925 : "cc");
7926 return iBit;
7927# else
7928 return ASMCountTrailingZerosU32((uint32_t)u16 | UINT32_C(0x10000));
7929#endif
7930}
7931#endif
7932
7933
7934/**
7935 * Rotate 32-bit unsigned value to the left by @a cShift.
7936 *
7937 * @returns Rotated value.
7938 * @param u32 The value to rotate.
7939 * @param cShift How many bits to rotate by.
7940 */
7941#ifdef __WATCOMC__
7942RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7943#else
7944DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7945{
7946# if RT_INLINE_ASM_USES_INTRIN
7947 return _rotl(u32, cShift);
7948
7949# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7950 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7951 return u32;
7952
7953# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7954 __asm__ __volatile__(
7955# if defined(RT_ARCH_ARM64)
7956 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7957# else
7958 "ror %[uRet], %[uVal], %[cShift]\n\t"
7959# endif
7960 : [uRet] "=r" (u32)
7961 : [uVal] "[uRet]" (u32)
7962 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */
7963 return u32;
7964
7965# else
7966 cShift &= 31;
7967 return (u32 << cShift) | (u32 >> (32 - cShift));
7968# endif
7969}
7970#endif
7971
7972
7973/**
7974 * Rotate 32-bit unsigned value to the right by @a cShift.
7975 *
7976 * @returns Rotated value.
7977 * @param u32 The value to rotate.
7978 * @param cShift How many bits to rotate by.
7979 */
7980#ifdef __WATCOMC__
7981RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7982#else
7983DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7984{
7985# if RT_INLINE_ASM_USES_INTRIN
7986 return _rotr(u32, cShift);
7987
7988# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7989 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7990 return u32;
7991
7992# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7993 __asm__ __volatile__(
7994# if defined(RT_ARCH_ARM64)
7995 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7996# else
7997 "ror %[uRet], %[uVal], %[cShift]\n\t"
7998# endif
7999 : [uRet] "=r" (u32)
8000 : [uVal] "[uRet]" (u32)
8001 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */
8002 return u32;
8003
8004# else
8005 cShift &= 31;
8006 return (u32 >> cShift) | (u32 << (32 - cShift));
8007# endif
8008}
8009#endif
8010
8011
8012/**
8013 * Rotate 64-bit unsigned value to the left by @a cShift.
8014 *
8015 * @returns Rotated value.
8016 * @param u64 The value to rotate.
8017 * @param cShift How many bits to rotate by.
8018 */
8019DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
8020{
8021#if RT_INLINE_ASM_USES_INTRIN
8022 return _rotl64(u64, cShift);
8023
8024#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
8025 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
8026 return u64;
8027
8028#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
8029 uint32_t uSpill;
8030 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
8031 "jz 1f\n\t"
8032 "xchgl %%eax, %%edx\n\t"
8033 "1:\n\t"
8034 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
8035 "jz 2f\n\t"
8036 "movl %%edx, %2\n\t" /* save the hi value in %3. */
8037 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
8038 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
8039 "2:\n\t" /* } */
8040 : "=A" (u64)
8041 , "=c" (cShift)
8042 , "=r" (uSpill)
8043 : "0" (u64)
8044 , "1" (cShift)
8045 : "cc");
8046 return u64;
8047
8048# elif defined(RT_ARCH_ARM64)
8049 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
8050 : [uRet] "=r" (u64)
8051 : [uVal] "[uRet]" (u64)
8052 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */
8053 return u64;
8054
8055#else
8056 cShift &= 63;
8057 return (u64 << cShift) | (u64 >> (64 - cShift));
8058#endif
8059}
8060
8061
8062/**
8063 * Rotate 64-bit unsigned value to the right by @a cShift.
8064 *
8065 * @returns Rotated value.
8066 * @param u64 The value to rotate.
8067 * @param cShift How many bits to rotate by.
8068 */
8069DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
8070{
8071#if RT_INLINE_ASM_USES_INTRIN
8072 return _rotr64(u64, cShift);
8073
8074#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
8075 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
8076 return u64;
8077
8078#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
8079 uint32_t uSpill;
8080 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
8081 "jz 1f\n\t"
8082 "xchgl %%eax, %%edx\n\t"
8083 "1:\n\t"
8084 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
8085 "jz 2f\n\t"
8086 "movl %%edx, %2\n\t" /* save the hi value in %3. */
8087 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
8088 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
8089 "2:\n\t" /* } */
8090 : "=A" (u64)
8091 , "=c" (cShift)
8092 , "=r" (uSpill)
8093 : "0" (u64)
8094 , "1" (cShift)
8095 : "cc");
8096 return u64;
8097
8098# elif defined(RT_ARCH_ARM64)
8099 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
8100 : [uRet] "=r" (u64)
8101 : [uVal] "[uRet]" (u64)
8102 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */
8103 return u64;
8104
8105#else
8106 cShift &= 63;
8107 return (u64 >> cShift) | (u64 << (64 - cShift));
8108#endif
8109}
8110
8111/** @} */
8112
8113
8114/** @} */
8115
8116/*
8117 * Include #pragma aux definitions for Watcom C/C++.
8118 */
8119#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
8120# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
8121# undef IPRT_INCLUDED_asm_watcom_x86_16_h
8122# include "asm-watcom-x86-16.h"
8123#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
8124# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
8125# undef IPRT_INCLUDED_asm_watcom_x86_32_h
8126# include "asm-watcom-x86-32.h"
8127#endif
8128
8129#endif /* !IPRT_INCLUDED_asm_h */
8130
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette