VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 102939

Last change on this file since 102939 was 102939, checked in by vboxsync, 13 months ago

iprt/asm.h: Working over the ASMAtomicWrite* functions for arm64 w/ optional use of LSE instructions. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 263.4 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
7 *
8 * This file is part of VirtualBox base platform packages, as
9 * available from https://www.virtualbox.org.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation, in version 3 of the
14 * License.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, see <https://www.gnu.org/licenses>.
23 *
24 * The contents of this file may alternatively be used under the terms
25 * of the Common Development and Distribution License Version 1.0
26 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
27 * in the VirtualBox distribution, in which case the provisions of the
28 * CDDL are applicable instead of those of the GPL.
29 *
30 * You may elect to license modified versions of this file under the
31 * terms and conditions of either the GPL or the CDDL or both.
32 *
33 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
34 */
35
36#ifndef IPRT_INCLUDED_asm_h
37#define IPRT_INCLUDED_asm_h
38#ifndef RT_WITHOUT_PRAGMA_ONCE
39# pragma once
40#endif
41
42#include <iprt/cdefs.h>
43#include <iprt/types.h>
44#include <iprt/assert.h>
45/** @def RT_INLINE_ASM_USES_INTRIN
46 * Defined as 1 if we're using a _MSC_VER 1400.
47 * Otherwise defined as 0.
48 */
49
50/* Solaris 10 header ugliness */
51#ifdef u
52# undef u
53#endif
54
55#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
56/* Emit the intrinsics at all optimization levels. */
57# include <iprt/sanitized/intrin.h>
58# pragma intrinsic(_ReadWriteBarrier)
59# pragma intrinsic(__cpuid)
60# pragma intrinsic(__stosd)
61# pragma intrinsic(__stosw)
62# pragma intrinsic(__stosb)
63# pragma intrinsic(_BitScanForward)
64# pragma intrinsic(_BitScanReverse)
65# pragma intrinsic(_bittest)
66# pragma intrinsic(_bittestandset)
67# pragma intrinsic(_bittestandreset)
68# pragma intrinsic(_bittestandcomplement)
69# pragma intrinsic(_byteswap_ushort)
70# pragma intrinsic(_byteswap_ulong)
71# pragma intrinsic(_interlockedbittestandset)
72# pragma intrinsic(_interlockedbittestandreset)
73# pragma intrinsic(_InterlockedAnd)
74# pragma intrinsic(_InterlockedOr)
75# pragma intrinsic(_InterlockedXor)
76# pragma intrinsic(_InterlockedIncrement)
77# pragma intrinsic(_InterlockedDecrement)
78# pragma intrinsic(_InterlockedExchange)
79# pragma intrinsic(_InterlockedExchangeAdd)
80# pragma intrinsic(_InterlockedCompareExchange)
81# pragma intrinsic(_InterlockedCompareExchange8)
82# pragma intrinsic(_InterlockedCompareExchange16)
83# pragma intrinsic(_InterlockedCompareExchange64)
84# pragma intrinsic(_rotl)
85# pragma intrinsic(_rotr)
86# pragma intrinsic(_rotl64)
87# pragma intrinsic(_rotr64)
88# ifdef RT_ARCH_AMD64
89# pragma intrinsic(__stosq)
90# pragma intrinsic(_byteswap_uint64)
91# pragma intrinsic(_InterlockedCompareExchange128)
92# pragma intrinsic(_InterlockedExchange64)
93# pragma intrinsic(_InterlockedExchangeAdd64)
94# pragma intrinsic(_InterlockedAnd64)
95# pragma intrinsic(_InterlockedOr64)
96# pragma intrinsic(_InterlockedIncrement64)
97# pragma intrinsic(_InterlockedDecrement64)
98# endif
99#endif
100
101#if (defined(RT_ARCH_ARM64) && defined(RT_OS_DARWIN)) || defined(DOXYGEN_RUNNING)
102/** @def RTASM_ARM64_USE_FEAT_LSE
103 * Use instructions from the FEAT_LSE set to implement atomic operations,
104 * assuming that the host CPU always supports these. */
105# define RTASM_ARM64_USE_FEAT_LSE 1
106#endif
107
108
109/*
110 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
111 */
112#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
113# include "asm-watcom-x86-16.h"
114#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
115# include "asm-watcom-x86-32.h"
116#endif
117
118
119/** @defgroup grp_rt_asm ASM - Assembly Routines
120 * @ingroup grp_rt
121 *
122 * @remarks The difference between ordered and unordered atomic operations are
123 * that the former will complete outstanding reads and writes before
124 * continuing while the latter doesn't make any promises about the
125 * order. Ordered operations doesn't, it seems, make any 100% promise
126 * wrt to whether the operation will complete before any subsequent
127 * memory access. (please, correct if wrong.)
128 *
129 * ASMAtomicSomething operations are all ordered, while
130 * ASMAtomicUoSomething are unordered (note the Uo).
131 *
132 * Please note that ordered operations does not necessarily imply a
133 * compiler (memory) barrier. The user has to use the
134 * ASMCompilerBarrier() macro when that is deemed necessary.
135 *
136 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
137 * to reorder or even optimize assembler instructions away. For
138 * instance, in the following code the second rdmsr instruction is
139 * optimized away because gcc treats that instruction as deterministic:
140 *
141 * @code
142 * static inline uint64_t rdmsr_low(int idx)
143 * {
144 * uint32_t low;
145 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
146 * }
147 * ...
148 * uint32_t msr1 = rdmsr_low(1);
149 * foo(msr1);
150 * msr1 = rdmsr_low(1);
151 * bar(msr1);
152 * @endcode
153 *
154 * The input parameter of rdmsr_low is the same for both calls and
155 * therefore gcc will use the result of the first call as input
156 * parameter for bar() as well. For rdmsr this is not acceptable as
157 * this instruction is _not_ deterministic. This applies to reading
158 * machine status information in general.
159 *
160 * @{
161 */
162
163
164/** @def RT_INLINE_ASM_GCC_4_3_X_X86
165 * Used to work around some 4.3.x register allocation issues in this version of
166 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
167 * definitely not for 5.x */
168#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
169# define RT_INLINE_ASM_GCC_4_3_X_X86 1
170#else
171# define RT_INLINE_ASM_GCC_4_3_X_X86 0
172#endif
173
174/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
175 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
176 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
177 * mode, x86.
178 *
179 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
180 * when in PIC mode on x86.
181 */
182#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
183# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
184# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
185# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
186# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
187# elif ( (defined(PIC) || defined(__PIC__)) \
188 && defined(RT_ARCH_X86) \
189 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
190 || defined(RT_OS_DARWIN)) )
191# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
192# else
193# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
194# endif
195#endif
196
197
198/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
199 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
200#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
201# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
202#else
203# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
204#endif
205
206/*
207 * ARM is great fun.
208 */
209#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
210
211# define RTASM_ARM_NO_BARRIER
212# ifdef RT_ARCH_ARM64
213# define RTASM_ARM_NO_BARRIER_IN_REG
214# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
215# define RTASM_ARM_DSB_SY "dsb sy\n\t"
216# define RTASM_ARM_DSB_SY_IN_REG
217# define RTASM_ARM_DSB_SY_COMMA_IN_REG
218# define RTASM_ARM_DMB_SY "dmb sy\n\t"
219# define RTASM_ARM_DMB_SY_IN_REG
220# define RTASM_ARM_DMB_SY_COMMA_IN_REG
221# define RTASM_ARM_DMB_ST "dmb st\n\t"
222# define RTASM_ARM_DMB_ST_IN_REG
223# define RTASM_ARM_DMB_ST_COMMA_IN_REG
224# define RTASM_ARM_DMB_LD "dmb ld\n\t"
225# define RTASM_ARM_DMB_LD_IN_REG
226# define RTASM_ARM_DMB_LD_COMMA_IN_REG
227# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
228# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
229 uint32_t rcSpill; \
230 uint32_t u32NewRet; \
231 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
232 RTASM_ARM_##barrier_type /* before lable? */ \
233 "ldaxr %w[uNew], %[pMem]\n\t" \
234 modify64 \
235 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
236 "cbnz %w[rc], Ltry_again_" #name "_%=\n\t" \
237 : [pMem] "+Q" (*a_pu32Mem) \
238 , [uNew] "=&r" (u32NewRet) \
239 , [rc] "=&r" (rcSpill) \
240 : in_reg \
241 : "cc")
242# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
243 uint32_t rcSpill; \
244 uint32_t u32OldRet; \
245 uint32_t u32NewSpill; \
246 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
247 RTASM_ARM_##barrier_type /* before lable? */ \
248 "ldaxr %w[uOld], %[pMem]\n\t" \
249 modify64 \
250 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
251 "cbnz %w[rc], Ltry_again_" #name "_%=\n\t" \
252 : [pMem] "+Q" (*a_pu32Mem) \
253 , [uOld] "=&r" (u32OldRet) \
254 , [uNew] "=&r" (u32NewSpill) \
255 , [rc] "=&r" (rcSpill) \
256 : in_reg \
257 : "cc")
258# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
259 uint32_t rcSpill; \
260 uint64_t u64NewRet; \
261 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
262 RTASM_ARM_##barrier_type /* before lable? */ \
263 "ldaxr %[uNew], %[pMem]\n\t" \
264 modify64 \
265 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
266 "cbnz %w[rc], Ltry_again_" #name "_%=\n\t" \
267 : [pMem] "+Q" (*a_pu64Mem) \
268 , [uNew] "=&r" (u64NewRet) \
269 , [rc] "=&r" (rcSpill) \
270 : in_reg \
271 : "cc")
272# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
273 uint32_t rcSpill; \
274 uint64_t u64OldRet; \
275 uint64_t u64NewSpill; \
276 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
277 RTASM_ARM_##barrier_type /* before lable? */ \
278 "ldaxr %[uOld], %[pMem]\n\t" \
279 modify64 \
280 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
281 "cbnz %w[rc], Ltry_again_" #name "_%=\n\t" \
282 : [pMem] "+Q" (*a_pu64Mem) \
283 , [uOld] "=&r" (u64OldRet) \
284 , [uNew] "=&r" (u64NewSpill) \
285 , [rc] "=&r" (rcSpill) \
286 : in_reg \
287 : "cc")
288
289# else /* RT_ARCH_ARM32 */
290# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
291# if RT_ARCH_ARM32 >= 7
292# warning armv7
293# define RTASM_ARM_NO_BARRIER_IN_REG
294# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
295# define RTASM_ARM_DSB_SY "dsb sy\n\t"
296# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
297# define RTASM_ARM_DMB_SY "dmb sy\n\t"
298# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
299# define RTASM_ARM_DMB_ST "dmb st\n\t"
300# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
301# define RTASM_ARM_DMB_LD "dmb ld\n\t"
302# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
303
304# elif RT_ARCH_ARM32 >= 6
305# warning armv6
306# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
307# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
308# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
309# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
310# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
311# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
312# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
313# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
314
315# elif RT_ARCH_ARM32 >= 4
316# warning armv5 or older
317# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
318# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
319# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
320# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
321# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
322# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
323# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
324# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
325# else
326# error "huh? Odd RT_ARCH_ARM32 value!"
327# endif
328# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
329# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
330# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
331# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
332# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
333 uint32_t rcSpill; \
334 uint32_t u32NewRet; \
335 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
336 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
337 "ldrex %[uNew], %[pMem]\n\t" \
338 modify32 \
339 "strex %[rc], %[uNew], %[pMem]\n\t" \
340 "cmp %[rc], #0\n\t" \
341 "bne Ltry_again_" #name "_%=\n\t" \
342 : [pMem] "+m" (*a_pu32Mem) \
343 , [uNew] "=&r" (u32NewRet) \
344 , [rc] "=&r" (rcSpill) \
345 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
346 , in_reg \
347 : "cc")
348# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
349 uint32_t rcSpill; \
350 uint32_t u32OldRet; \
351 uint32_t u32NewSpill; \
352 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
353 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
354 "ldrex %[uOld], %[pMem]\n\t" \
355 modify32 \
356 "strex %[rc], %[uNew], %[pMem]\n\t" \
357 "cmp %[rc], #0\n\t" \
358 "bne Ltry_again_" #name "_%=\n\t" \
359 : [pMem] "+m" (*a_pu32Mem) \
360 , [uOld] "=&r" (u32OldRet) \
361 , [uNew] "=&r" (u32NewSpill) \
362 , [rc] "=&r" (rcSpill) \
363 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
364 , in_reg \
365 : "cc")
366# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
367 uint32_t rcSpill; \
368 uint64_t u64NewRet; \
369 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
370 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
371 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
372 modify32 \
373 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
374 "cmp %[rc], #0\n\t" \
375 "bne Ltry_again_" #name "_%=\n\t" \
376 : [pMem] "+m" (*a_pu64Mem), \
377 [uNew] "=&r" (u64NewRet), \
378 [rc] "=&r" (rcSpill) \
379 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
380 , in_reg \
381 : "cc")
382# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
383 uint32_t rcSpill; \
384 uint64_t u64OldRet; \
385 uint64_t u64NewSpill; \
386 __asm__ __volatile__("Ltry_again_" #name "_%=:\n\t" \
387 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
388 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
389 modify32 \
390 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
391 "cmp %[rc], #0\n\t" \
392 "bne Ltry_again_" #name "_%=\n\t" \
393 : [pMem] "+m" (*a_pu64Mem), \
394 [uOld] "=&r" (u64OldRet), \
395 [uNew] "=&r" (u64NewSpill), \
396 [rc] "=&r" (rcSpill) \
397 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
398 , in_reg \
399 : "cc")
400# endif /* RT_ARCH_ARM32 */
401#endif
402
403
404/** @def ASMReturnAddress
405 * Gets the return address of the current (or calling if you like) function or method.
406 */
407#ifdef _MSC_VER
408# ifdef __cplusplus
409extern "C"
410# endif
411void * _ReturnAddress(void);
412# pragma intrinsic(_ReturnAddress)
413# define ASMReturnAddress() _ReturnAddress()
414#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
415# define ASMReturnAddress() __builtin_return_address(0)
416#elif defined(__WATCOMC__)
417# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
418#else
419# error "Unsupported compiler."
420#endif
421
422
423/**
424 * Compiler memory barrier.
425 *
426 * Ensure that the compiler does not use any cached (register/tmp stack) memory
427 * values or any outstanding writes when returning from this function.
428 *
429 * This function must be used if non-volatile data is modified by a
430 * device or the VMM. Typical cases are port access, MMIO access,
431 * trapping instruction, etc.
432 */
433#if RT_INLINE_ASM_GNU_STYLE
434# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
435#elif RT_INLINE_ASM_USES_INTRIN
436# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
437#elif defined(__WATCOMC__)
438void ASMCompilerBarrier(void);
439#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
440DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
441{
442 __asm
443 {
444 }
445}
446#endif
447
448
449/** @def ASMBreakpoint
450 * Debugger Breakpoint.
451 * @deprecated Use RT_BREAKPOINT instead.
452 * @internal
453 */
454#define ASMBreakpoint() RT_BREAKPOINT()
455
456
457/**
458 * Spinloop hint for platforms that have these, empty function on the other
459 * platforms.
460 *
461 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
462 * spin locks.
463 */
464#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
465RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
466#else
467DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
468{
469# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
470# if RT_INLINE_ASM_GNU_STYLE
471 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
472# else
473 __asm {
474 _emit 0f3h
475 _emit 090h
476 }
477# endif
478
479# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
480 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
481
482# else
483 /* dummy */
484# endif
485}
486#endif
487
488
489/**
490 * Atomically Exchange an unsigned 8-bit value, ordered.
491 *
492 * @returns Current *pu8 value
493 * @param pu8 Pointer to the 8-bit variable to update.
494 * @param u8 The 8-bit value to assign to *pu8.
495 */
496#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
497RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
498#else
499DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
500{
501# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
502# if RT_INLINE_ASM_GNU_STYLE
503 __asm__ __volatile__("xchgb %0, %1\n\t"
504 : "=m" (*pu8)
505 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
506 : "1" (u8)
507 , "m" (*pu8));
508# else
509 __asm
510 {
511# ifdef RT_ARCH_AMD64
512 mov rdx, [pu8]
513 mov al, [u8]
514 xchg [rdx], al
515 mov [u8], al
516# else
517 mov edx, [pu8]
518 mov al, [u8]
519 xchg [edx], al
520 mov [u8], al
521# endif
522 }
523# endif
524 return u8;
525
526# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
527 uint32_t uOld;
528 uint32_t rcSpill;
529 __asm__ __volatile__("Ltry_again_ASMAtomicXchgU8_%=:\n\t"
530 RTASM_ARM_DMB_SY
531# if defined(RT_ARCH_ARM64)
532 "ldaxrb %w[uOld], %[pMem]\n\t"
533 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
534 "cbnz %w[rc], Ltry_again_ASMAtomicXchgU8_%=\n\t"
535# else
536 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
537 "strexb %[rc], %[uNew], %[pMem]\n\t"
538 "cmp %[rc], #0\n\t"
539 "bne Ltry_again_ASMAtomicXchgU8_%=\n\t"
540# endif
541 : [pMem] "+Q" (*pu8)
542 , [uOld] "=&r" (uOld)
543 , [rc] "=&r" (rcSpill)
544 : [uNew] "r" ((uint32_t)u8)
545 RTASM_ARM_DMB_SY_COMMA_IN_REG
546 : "cc");
547 return (uint8_t)uOld;
548
549# else
550# error "Port me"
551# endif
552}
553#endif
554
555
556/**
557 * Atomically Exchange a signed 8-bit value, ordered.
558 *
559 * @returns Current *pu8 value
560 * @param pi8 Pointer to the 8-bit variable to update.
561 * @param i8 The 8-bit value to assign to *pi8.
562 */
563DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
564{
565 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
566}
567
568
569/**
570 * Atomically Exchange a bool value, ordered.
571 *
572 * @returns Current *pf value
573 * @param pf Pointer to the 8-bit variable to update.
574 * @param f The 8-bit value to assign to *pi8.
575 */
576DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
577{
578#ifdef _MSC_VER
579 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
580#else
581 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
582#endif
583}
584
585
586/**
587 * Atomically Exchange an unsigned 16-bit value, ordered.
588 *
589 * @returns Current *pu16 value
590 * @param pu16 Pointer to the 16-bit variable to update.
591 * @param u16 The 16-bit value to assign to *pu16.
592 */
593#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
594RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
595#else
596DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
597{
598# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
599# if RT_INLINE_ASM_GNU_STYLE
600 __asm__ __volatile__("xchgw %0, %1\n\t"
601 : "=m" (*pu16)
602 , "=r" (u16)
603 : "1" (u16)
604 , "m" (*pu16));
605# else
606 __asm
607 {
608# ifdef RT_ARCH_AMD64
609 mov rdx, [pu16]
610 mov ax, [u16]
611 xchg [rdx], ax
612 mov [u16], ax
613# else
614 mov edx, [pu16]
615 mov ax, [u16]
616 xchg [edx], ax
617 mov [u16], ax
618# endif
619 }
620# endif
621 return u16;
622
623# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
624 uint32_t uOld;
625 uint32_t rcSpill;
626 __asm__ __volatile__("Ltry_again_ASMAtomicXchgU16_%=:\n\t"
627 RTASM_ARM_DMB_SY
628# if defined(RT_ARCH_ARM64)
629 "ldaxrh %w[uOld], %[pMem]\n\t"
630 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
631 "cbnz %w[rc], Ltry_again_ASMAtomicXchgU16_%=\n\t"
632# else
633 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
634 "strexh %[rc], %[uNew], %[pMem]\n\t"
635 "cmp %[rc], #0\n\t"
636 "bne Ltry_again_ASMAtomicXchgU16_%=\n\t"
637# endif
638 : [pMem] "+Q" (*pu16)
639 , [uOld] "=&r" (uOld)
640 , [rc] "=&r" (rcSpill)
641 : [uNew] "r" ((uint32_t)u16)
642 RTASM_ARM_DMB_SY_COMMA_IN_REG
643 : "cc");
644 return (uint16_t)uOld;
645
646# else
647# error "Port me"
648# endif
649}
650#endif
651
652
653/**
654 * Atomically Exchange a signed 16-bit value, ordered.
655 *
656 * @returns Current *pu16 value
657 * @param pi16 Pointer to the 16-bit variable to update.
658 * @param i16 The 16-bit value to assign to *pi16.
659 */
660DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
661{
662 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
663}
664
665
666/**
667 * Atomically Exchange an unsigned 32-bit value, ordered.
668 *
669 * @returns Current *pu32 value
670 * @param pu32 Pointer to the 32-bit variable to update.
671 * @param u32 The 32-bit value to assign to *pu32.
672 *
673 * @remarks Does not work on 286 and earlier.
674 */
675#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
676RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
677#else
678DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
679{
680# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
681# if RT_INLINE_ASM_GNU_STYLE
682 __asm__ __volatile__("xchgl %0, %1\n\t"
683 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
684 , "=r" (u32)
685 : "1" (u32)
686 , "m" (*pu32));
687
688# elif RT_INLINE_ASM_USES_INTRIN
689 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
690
691# else
692 __asm
693 {
694# ifdef RT_ARCH_AMD64
695 mov rdx, [pu32]
696 mov eax, u32
697 xchg [rdx], eax
698 mov [u32], eax
699# else
700 mov edx, [pu32]
701 mov eax, u32
702 xchg [edx], eax
703 mov [u32], eax
704# endif
705 }
706# endif
707 return u32;
708
709# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
710 uint32_t uOld;
711 uint32_t rcSpill;
712 __asm__ __volatile__("Ltry_again_ASMAtomicXchgU32_%=:\n\t"
713 RTASM_ARM_DMB_SY
714# if defined(RT_ARCH_ARM64)
715 "ldaxr %w[uOld], %[pMem]\n\t"
716 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
717 "cbnz %w[rc], Ltry_again_ASMAtomicXchgU32_%=\n\t"
718# else
719 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
720 "strex %[rc], %[uNew], %[pMem]\n\t"
721 "cmp %[rc], #0\n\t"
722 "bne Ltry_again_ASMAtomicXchgU32_%=\n\t"
723# endif
724 : [pMem] "+Q" (*pu32)
725 , [uOld] "=&r" (uOld)
726 , [rc] "=&r" (rcSpill)
727 : [uNew] "r" (u32)
728 RTASM_ARM_DMB_SY_COMMA_IN_REG
729 : "cc");
730 return uOld;
731
732# else
733# error "Port me"
734# endif
735}
736#endif
737
738
739/**
740 * Atomically Exchange a signed 32-bit value, ordered.
741 *
742 * @returns Current *pu32 value
743 * @param pi32 Pointer to the 32-bit variable to update.
744 * @param i32 The 32-bit value to assign to *pi32.
745 */
746DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
747{
748 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
749}
750
751
752/**
753 * Atomically Exchange an unsigned 64-bit value, ordered.
754 *
755 * @returns Current *pu64 value
756 * @param pu64 Pointer to the 64-bit variable to update.
757 * @param u64 The 64-bit value to assign to *pu64.
758 *
759 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
760 */
761#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
762 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
763RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
764#else
765DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
766{
767# if defined(RT_ARCH_AMD64)
768# if RT_INLINE_ASM_USES_INTRIN
769 return _InterlockedExchange64((__int64 *)pu64, u64);
770
771# elif RT_INLINE_ASM_GNU_STYLE
772 __asm__ __volatile__("xchgq %0, %1\n\t"
773 : "=m" (*pu64)
774 , "=r" (u64)
775 : "1" (u64)
776 , "m" (*pu64));
777 return u64;
778# else
779 __asm
780 {
781 mov rdx, [pu64]
782 mov rax, [u64]
783 xchg [rdx], rax
784 mov [u64], rax
785 }
786 return u64;
787# endif
788
789# elif defined(RT_ARCH_X86)
790# if RT_INLINE_ASM_GNU_STYLE
791# if defined(PIC) || defined(__PIC__)
792 uint32_t u32EBX = (uint32_t)u64;
793 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
794 "xchgl %%ebx, %3\n\t"
795 "1:\n\t"
796 "lock; cmpxchg8b (%5)\n\t"
797 "jnz 1b\n\t"
798 "movl %3, %%ebx\n\t"
799 /*"xchgl %%esi, %5\n\t"*/
800 : "=A" (u64)
801 , "=m" (*pu64)
802 : "0" (*pu64)
803 , "m" ( u32EBX )
804 , "c" ( (uint32_t)(u64 >> 32) )
805 , "S" (pu64)
806 : "cc");
807# else /* !PIC */
808 __asm__ __volatile__("1:\n\t"
809 "lock; cmpxchg8b %1\n\t"
810 "jnz 1b\n\t"
811 : "=A" (u64)
812 , "=m" (*pu64)
813 : "0" (*pu64)
814 , "b" ( (uint32_t)u64 )
815 , "c" ( (uint32_t)(u64 >> 32) )
816 : "cc");
817# endif
818# else
819 __asm
820 {
821 mov ebx, dword ptr [u64]
822 mov ecx, dword ptr [u64 + 4]
823 mov edi, pu64
824 mov eax, dword ptr [edi]
825 mov edx, dword ptr [edi + 4]
826 retry:
827 lock cmpxchg8b [edi]
828 jnz retry
829 mov dword ptr [u64], eax
830 mov dword ptr [u64 + 4], edx
831 }
832# endif
833 return u64;
834
835# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
836 uint32_t rcSpill;
837 uint64_t uOld;
838 __asm__ __volatile__("Ltry_again_ASMAtomicXchgU64_%=:\n\t"
839 RTASM_ARM_DMB_SY
840# if defined(RT_ARCH_ARM64)
841 "ldaxr %[uOld], %[pMem]\n\t"
842 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
843 "cbnz %w[rc], Ltry_again_ASMAtomicXchgU64_%=\n\t"
844# else
845 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
846 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
847 "cmp %[rc], #0\n\t"
848 "bne Ltry_again_ASMAtomicXchgU64_%=\n\t"
849# endif
850 : [pMem] "+Q" (*pu64)
851 , [uOld] "=&r" (uOld)
852 , [rc] "=&r" (rcSpill)
853 : [uNew] "r" (u64)
854 RTASM_ARM_DMB_SY_COMMA_IN_REG
855 : "cc");
856 return uOld;
857
858# else
859# error "Port me"
860# endif
861}
862#endif
863
864
865/**
866 * Atomically Exchange an signed 64-bit value, ordered.
867 *
868 * @returns Current *pi64 value
869 * @param pi64 Pointer to the 64-bit variable to update.
870 * @param i64 The 64-bit value to assign to *pi64.
871 */
872DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
873{
874 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
875}
876
877
878/**
879 * Atomically Exchange a size_t value, ordered.
880 *
881 * @returns Current *ppv value
882 * @param puDst Pointer to the size_t variable to update.
883 * @param uNew The new value to assign to *puDst.
884 */
885DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
886{
887#if ARCH_BITS == 16
888 AssertCompile(sizeof(size_t) == 2);
889 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
890#elif ARCH_BITS == 32
891 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
892#elif ARCH_BITS == 64
893 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
894#else
895# error "ARCH_BITS is bogus"
896#endif
897}
898
899
900/**
901 * Atomically Exchange a pointer value, ordered.
902 *
903 * @returns Current *ppv value
904 * @param ppv Pointer to the pointer variable to update.
905 * @param pv The pointer value to assign to *ppv.
906 */
907DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
908{
909#if ARCH_BITS == 32 || ARCH_BITS == 16
910 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
911#elif ARCH_BITS == 64
912 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
913#else
914# error "ARCH_BITS is bogus"
915#endif
916}
917
918
919/**
920 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
921 *
922 * @returns Current *pv value
923 * @param ppv Pointer to the pointer variable to update.
924 * @param pv The pointer value to assign to *ppv.
925 * @param Type The type of *ppv, sans volatile.
926 */
927#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
928# define ASMAtomicXchgPtrT(ppv, pv, Type) \
929 __extension__ \
930 ({\
931 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
932 Type const pvTypeChecked = (pv); \
933 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
934 pvTypeCheckedRet; \
935 })
936#else
937# define ASMAtomicXchgPtrT(ppv, pv, Type) \
938 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
939#endif
940
941
942/**
943 * Atomically Exchange a raw-mode context pointer value, ordered.
944 *
945 * @returns Current *ppv value
946 * @param ppvRC Pointer to the pointer variable to update.
947 * @param pvRC The pointer value to assign to *ppv.
948 */
949DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
950{
951 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
952}
953
954
955/**
956 * Atomically Exchange a ring-0 pointer value, ordered.
957 *
958 * @returns Current *ppv value
959 * @param ppvR0 Pointer to the pointer variable to update.
960 * @param pvR0 The pointer value to assign to *ppv.
961 */
962DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
963{
964#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
965 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
966#elif R0_ARCH_BITS == 64
967 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
968#else
969# error "R0_ARCH_BITS is bogus"
970#endif
971}
972
973
974/**
975 * Atomically Exchange a ring-3 pointer value, ordered.
976 *
977 * @returns Current *ppv value
978 * @param ppvR3 Pointer to the pointer variable to update.
979 * @param pvR3 The pointer value to assign to *ppv.
980 */
981DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
982{
983#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
984 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
985#elif R3_ARCH_BITS == 64
986 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
987#else
988# error "R3_ARCH_BITS is bogus"
989#endif
990}
991
992
993/** @def ASMAtomicXchgHandle
994 * Atomically Exchange a typical IPRT handle value, ordered.
995 *
996 * @param ph Pointer to the value to update.
997 * @param hNew The new value to assigned to *pu.
998 * @param phRes Where to store the current *ph value.
999 *
1000 * @remarks This doesn't currently work for all handles (like RTFILE).
1001 */
1002#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1003# define ASMAtomicXchgHandle(ph, hNew, phRes) \
1004 do { \
1005 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1006 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
1007 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
1008 } while (0)
1009#elif HC_ARCH_BITS == 64
1010# define ASMAtomicXchgHandle(ph, hNew, phRes) \
1011 do { \
1012 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1013 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
1014 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
1015 } while (0)
1016#else
1017# error HC_ARCH_BITS
1018#endif
1019
1020
1021/**
1022 * Atomically Exchange a value which size might differ
1023 * between platforms or compilers, ordered.
1024 *
1025 * @param pu Pointer to the variable to update.
1026 * @param uNew The value to assign to *pu.
1027 * @todo This is busted as its missing the result argument.
1028 */
1029#define ASMAtomicXchgSize(pu, uNew) \
1030 do { \
1031 switch (sizeof(*(pu))) { \
1032 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1033 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1034 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1035 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1036 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1037 } \
1038 } while (0)
1039
1040/**
1041 * Atomically Exchange a value which size might differ
1042 * between platforms or compilers, ordered.
1043 *
1044 * @param pu Pointer to the variable to update.
1045 * @param uNew The value to assign to *pu.
1046 * @param puRes Where to store the current *pu value.
1047 */
1048#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1049 do { \
1050 switch (sizeof(*(pu))) { \
1051 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1052 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1053 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1054 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1055 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1056 } \
1057 } while (0)
1058
1059
1060
1061/**
1062 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1063 *
1064 * @returns true if xchg was done.
1065 * @returns false if xchg wasn't done.
1066 *
1067 * @param pu8 Pointer to the value to update.
1068 * @param u8New The new value to assigned to *pu8.
1069 * @param u8Old The old value to *pu8 compare with.
1070 *
1071 * @remarks x86: Requires a 486 or later.
1072 * @todo Rename ASMAtomicCmpWriteU8
1073 */
1074#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1075RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1076#else
1077DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1078{
1079# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1080 uint8_t u8Ret;
1081 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1082 "setz %1\n\t"
1083 : "=m" (*pu8)
1084 , "=qm" (u8Ret)
1085 , "=a" (u8Old)
1086 : "q" (u8New)
1087 , "2" (u8Old)
1088 , "m" (*pu8)
1089 : "cc");
1090 return (bool)u8Ret;
1091
1092# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1093 union { uint32_t u; bool f; } fXchg;
1094 uint32_t u32Spill;
1095 uint32_t rcSpill;
1096 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1097 RTASM_ARM_DMB_SY
1098# if defined(RT_ARCH_ARM64)
1099 "ldaxrb %w[uOld], %[pMem]\n\t"
1100 "cmp %w[uOld], %w[uCmp]\n\t"
1101 "bne 1f\n\t" /* stop here if not equal */
1102 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1103 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1104 "mov %w[fXchg], #1\n\t"
1105 "1:\n\t"
1106 "clrex\n\t"
1107# else
1108 "ldrexb %[uOld], %[pMem]\n\t"
1109 "teq %[uOld], %[uCmp]\n\t"
1110 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1111 "bne 1f\n\t" /* stop here if not equal */
1112 "cmp %[rc], #0\n\t"
1113 "bne Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1114 "mov %[fXchg], #1\n\t"
1115 "1:\n\t"
1116 /** @todo clrexne on armv7? */
1117# endif
1118 : [pMem] "+Q" (*pu8)
1119 , [uOld] "=&r" (u32Spill)
1120 , [rc] "=&r" (rcSpill)
1121 , [fXchg] "=&r" (fXchg.u)
1122 : [uCmp] "r" ((uint32_t)u8Old)
1123 , [uNew] "r" ((uint32_t)u8New)
1124 , "[fXchg]" (0)
1125 RTASM_ARM_DMB_SY_COMMA_IN_REG
1126 : "cc");
1127 return fXchg.f;
1128
1129# else
1130# error "Port me"
1131# endif
1132}
1133#endif
1134
1135
1136/**
1137 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1138 *
1139 * @returns true if xchg was done.
1140 * @returns false if xchg wasn't done.
1141 *
1142 * @param pi8 Pointer to the value to update.
1143 * @param i8New The new value to assigned to *pi8.
1144 * @param i8Old The old value to *pi8 compare with.
1145 *
1146 * @remarks x86: Requires a 486 or later.
1147 * @todo Rename ASMAtomicCmpWriteS8
1148 */
1149DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1150{
1151 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1152}
1153
1154
1155/**
1156 * Atomically Compare and Exchange a bool value, ordered.
1157 *
1158 * @returns true if xchg was done.
1159 * @returns false if xchg wasn't done.
1160 *
1161 * @param pf Pointer to the value to update.
1162 * @param fNew The new value to assigned to *pf.
1163 * @param fOld The old value to *pf compare with.
1164 *
1165 * @remarks x86: Requires a 486 or later.
1166 * @todo Rename ASMAtomicCmpWriteBool
1167 */
1168DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1169{
1170 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1171}
1172
1173
1174/**
1175 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1176 *
1177 * @returns true if xchg was done.
1178 * @returns false if xchg wasn't done.
1179 *
1180 * @param pu32 Pointer to the value to update.
1181 * @param u32New The new value to assigned to *pu32.
1182 * @param u32Old The old value to *pu32 compare with.
1183 *
1184 * @remarks x86: Requires a 486 or later.
1185 * @todo Rename ASMAtomicCmpWriteU32
1186 */
1187#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1188RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1189#else
1190DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1191{
1192# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1193# if RT_INLINE_ASM_GNU_STYLE
1194 uint8_t u8Ret;
1195 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1196 "setz %1\n\t"
1197 : "=m" (*pu32)
1198 , "=qm" (u8Ret)
1199 , "=a" (u32Old)
1200 : "r" (u32New)
1201 , "2" (u32Old)
1202 , "m" (*pu32)
1203 : "cc");
1204 return (bool)u8Ret;
1205
1206# elif RT_INLINE_ASM_USES_INTRIN
1207 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1208
1209# else
1210 uint32_t u32Ret;
1211 __asm
1212 {
1213# ifdef RT_ARCH_AMD64
1214 mov rdx, [pu32]
1215# else
1216 mov edx, [pu32]
1217# endif
1218 mov eax, [u32Old]
1219 mov ecx, [u32New]
1220# ifdef RT_ARCH_AMD64
1221 lock cmpxchg [rdx], ecx
1222# else
1223 lock cmpxchg [edx], ecx
1224# endif
1225 setz al
1226 movzx eax, al
1227 mov [u32Ret], eax
1228 }
1229 return !!u32Ret;
1230# endif
1231
1232# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1233 union { uint32_t u; bool f; } fXchg;
1234 uint32_t u32Spill;
1235 uint32_t rcSpill;
1236 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1237 RTASM_ARM_DMB_SY
1238# if defined(RT_ARCH_ARM64)
1239 "ldaxr %w[uOld], %[pMem]\n\t"
1240 "cmp %w[uOld], %w[uCmp]\n\t"
1241 "bne 1f\n\t" /* stop here if not equal */
1242 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1243 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1244 "mov %w[fXchg], #1\n\t"
1245 "1:\n\t"
1246 "clrex\n\t"
1247# else
1248 "ldrex %[uOld], %[pMem]\n\t"
1249 "teq %[uOld], %[uCmp]\n\t"
1250 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1251 "bne 1f\n\t" /* stop here if not equal */
1252 "cmp %[rc], #0\n\t"
1253 "bne Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1254 "mov %[fXchg], #1\n\t"
1255 "1:\n\t"
1256 /** @todo clrexne on armv7? */
1257# endif
1258 : [pMem] "+Q" (*pu32)
1259 , [uOld] "=&r" (u32Spill)
1260 , [rc] "=&r" (rcSpill)
1261 , [fXchg] "=&r" (fXchg.u)
1262 : [uCmp] "r" (u32Old)
1263 , [uNew] "r" (u32New)
1264 , "[fXchg]" (0)
1265 RTASM_ARM_DMB_SY_COMMA_IN_REG
1266 : "cc");
1267 return fXchg.f;
1268
1269# else
1270# error "Port me"
1271# endif
1272}
1273#endif
1274
1275
1276/**
1277 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1278 *
1279 * @returns true if xchg was done.
1280 * @returns false if xchg wasn't done.
1281 *
1282 * @param pi32 Pointer to the value to update.
1283 * @param i32New The new value to assigned to *pi32.
1284 * @param i32Old The old value to *pi32 compare with.
1285 *
1286 * @remarks x86: Requires a 486 or later.
1287 * @todo Rename ASMAtomicCmpWriteS32
1288 */
1289DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1290{
1291 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1292}
1293
1294
1295/**
1296 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1297 *
1298 * @returns true if xchg was done.
1299 * @returns false if xchg wasn't done.
1300 *
1301 * @param pu64 Pointer to the 64-bit variable to update.
1302 * @param u64New The 64-bit value to assign to *pu64.
1303 * @param u64Old The value to compare with.
1304 *
1305 * @remarks x86: Requires a Pentium or later.
1306 * @todo Rename ASMAtomicCmpWriteU64
1307 */
1308#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1309 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1310RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1311#else
1312DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1313{
1314# if RT_INLINE_ASM_USES_INTRIN
1315 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1316
1317# elif defined(RT_ARCH_AMD64)
1318# if RT_INLINE_ASM_GNU_STYLE
1319 uint8_t u8Ret;
1320 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1321 "setz %1\n\t"
1322 : "=m" (*pu64)
1323 , "=qm" (u8Ret)
1324 , "=a" (u64Old)
1325 : "r" (u64New)
1326 , "2" (u64Old)
1327 , "m" (*pu64)
1328 : "cc");
1329 return (bool)u8Ret;
1330# else
1331 bool fRet;
1332 __asm
1333 {
1334 mov rdx, [pu32]
1335 mov rax, [u64Old]
1336 mov rcx, [u64New]
1337 lock cmpxchg [rdx], rcx
1338 setz al
1339 mov [fRet], al
1340 }
1341 return fRet;
1342# endif
1343
1344# elif defined(RT_ARCH_X86)
1345 uint32_t u32Ret;
1346# if RT_INLINE_ASM_GNU_STYLE
1347# if defined(PIC) || defined(__PIC__)
1348 uint32_t u32EBX = (uint32_t)u64New;
1349 uint32_t u32Spill;
1350 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1351 "lock; cmpxchg8b (%6)\n\t"
1352 "setz %%al\n\t"
1353 "movl %4, %%ebx\n\t"
1354 "movzbl %%al, %%eax\n\t"
1355 : "=a" (u32Ret)
1356 , "=d" (u32Spill)
1357# if RT_GNUC_PREREQ(4, 3)
1358 , "+m" (*pu64)
1359# else
1360 , "=m" (*pu64)
1361# endif
1362 : "A" (u64Old)
1363 , "m" ( u32EBX )
1364 , "c" ( (uint32_t)(u64New >> 32) )
1365 , "S" (pu64)
1366 : "cc");
1367# else /* !PIC */
1368 uint32_t u32Spill;
1369 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1370 "setz %%al\n\t"
1371 "movzbl %%al, %%eax\n\t"
1372 : "=a" (u32Ret)
1373 , "=d" (u32Spill)
1374 , "+m" (*pu64)
1375 : "A" (u64Old)
1376 , "b" ( (uint32_t)u64New )
1377 , "c" ( (uint32_t)(u64New >> 32) )
1378 : "cc");
1379# endif
1380 return (bool)u32Ret;
1381# else
1382 __asm
1383 {
1384 mov ebx, dword ptr [u64New]
1385 mov ecx, dword ptr [u64New + 4]
1386 mov edi, [pu64]
1387 mov eax, dword ptr [u64Old]
1388 mov edx, dword ptr [u64Old + 4]
1389 lock cmpxchg8b [edi]
1390 setz al
1391 movzx eax, al
1392 mov dword ptr [u32Ret], eax
1393 }
1394 return !!u32Ret;
1395# endif
1396
1397# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1398 union { uint32_t u; bool f; } fXchg;
1399 uint64_t u64Spill;
1400 uint32_t rcSpill;
1401 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1402 RTASM_ARM_DMB_SY
1403# if defined(RT_ARCH_ARM64)
1404 "ldaxr %[uOld], %[pMem]\n\t"
1405 "cmp %[uOld], %[uCmp]\n\t"
1406 "bne 1f\n\t" /* stop here if not equal */
1407 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1408 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1409 "mov %w[fXchg], #1\n\t"
1410 "1:\n\t"
1411 "clrex\n\t"
1412# else
1413 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1414 "teq %[uOld], %[uCmp]\n\t"
1415 "teqeq %H[uOld], %H[uCmp]\n\t"
1416 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1417 "bne 1f\n\t" /* stop here if not equal */
1418 "cmp %[rc], #0\n\t"
1419 "bne Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1420 "mov %[fXchg], #1\n\t"
1421 "1:\n\t"
1422 /** @todo clrexne on armv7? */
1423# endif
1424 : [pMem] "+Q" (*pu64)
1425 , [uOld] "=&r" (u64Spill)
1426 , [rc] "=&r" (rcSpill)
1427 , [fXchg] "=&r" (fXchg.u)
1428 : [uCmp] "r" (u64Old)
1429 , [uNew] "r" (u64New)
1430 , "[fXchg]" (0)
1431 RTASM_ARM_DMB_SY_COMMA_IN_REG
1432 : "cc");
1433 return fXchg.f;
1434
1435# else
1436# error "Port me"
1437# endif
1438}
1439#endif
1440
1441
1442/**
1443 * Atomically Compare and exchange a signed 64-bit value, ordered.
1444 *
1445 * @returns true if xchg was done.
1446 * @returns false if xchg wasn't done.
1447 *
1448 * @param pi64 Pointer to the 64-bit variable to update.
1449 * @param i64 The 64-bit value to assign to *pu64.
1450 * @param i64Old The value to compare with.
1451 *
1452 * @remarks x86: Requires a Pentium or later.
1453 * @todo Rename ASMAtomicCmpWriteS64
1454 */
1455DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1456{
1457 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1458}
1459
1460#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
1461
1462/** @def RTASM_HAVE_CMP_WRITE_U128
1463 * Indicates that we've got ASMAtomicCmpWriteU128(), ASMAtomicCmpWriteU128v2()
1464 * and ASMAtomicCmpWriteExU128() available. */
1465# define RTASM_HAVE_CMP_WRITE_U128 1
1466
1467
1468/**
1469 * Atomically compare and write an unsigned 128-bit value, ordered.
1470 *
1471 * @returns true if write was done.
1472 * @returns false if write wasn't done.
1473 *
1474 * @param pu128 Pointer to the 128-bit variable to update.
1475 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
1476 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
1477 * @param u64OldHi The high 64-bit of the value to compare with.
1478 * @param u64OldLo The low 64-bit of the value to compare with.
1479 *
1480 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1481 */
1482# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
1483DECLASM(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1484 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_PROTO;
1485# else
1486DECLINLINE(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1487 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_DEF
1488{
1489# if RT_INLINE_ASM_USES_INTRIN
1490 __int64 ai64Cmp[2];
1491 ai64Cmp[0] = u64OldLo;
1492 ai64Cmp[1] = u64OldHi;
1493 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, ai64Cmp) != 0;
1494
1495# elif (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1496 return __sync_bool_compare_and_swap(pu128, ((uint128_t)u64OldHi << 64) | u64OldLo, ((uint128_t)u64NewHi << 64) | u64NewLo);
1497
1498# elif defined(RT_ARCH_AMD64)
1499# if RT_INLINE_ASM_GNU_STYLE
1500 uint64_t u64Ret;
1501 uint64_t u64Spill;
1502 __asm__ __volatile__("lock; cmpxchg16b %2\n\t"
1503 "setz %%al\n\t"
1504 "movzbl %%al, %%eax\n\t"
1505 : "=a" (u64Ret)
1506 , "=d" (u64Spill)
1507 , "+m" (*pu128)
1508 : "a" (u64OldLo)
1509 , "d" (u64OldHi)
1510 , "b" (u64NewLo)
1511 , "c" (u64NewHi)
1512 : "cc");
1513
1514 return (bool)u64Ret;
1515# else
1516# error "Port me"
1517# endif
1518# else
1519# error "Port me"
1520# endif
1521}
1522# endif
1523
1524
1525/**
1526 * Atomically compare and write an unsigned 128-bit value, ordered.
1527 *
1528 * @returns true if write was done.
1529 * @returns false if write wasn't done.
1530 *
1531 * @param pu128 Pointer to the 128-bit variable to update.
1532 * @param u128New The 128-bit value to assign to *pu128.
1533 * @param u128Old The value to compare with.
1534 *
1535 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1536 */
1537DECLINLINE(bool) ASMAtomicCmpWriteU128(volatile uint128_t *pu128, const uint128_t u128New, const uint128_t u128Old) RT_NOTHROW_DEF
1538{
1539# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
1540# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1541 return __sync_bool_compare_and_swap(pu128, u128Old, u128New);
1542# else
1543 return ASMAtomicCmpWriteU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
1544 (uint64_t)(u128Old >> 64), (uint64_t)u128Old);
1545# endif
1546# else
1547 return ASMAtomicCmpWriteU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo);
1548# endif
1549}
1550
1551
1552/**
1553 * RTUINT128U wrapper for ASMAtomicCmpWriteU128.
1554 */
1555DECLINLINE(bool) ASMAtomicCmpWriteU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
1556 const RTUINT128U u128Old) RT_NOTHROW_DEF
1557{
1558# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
1559 return ASMAtomicCmpWriteU128(&pu128->u, u128New.u, u128Old.u);
1560# else
1561 return ASMAtomicCmpWriteU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo);
1562# endif
1563}
1564
1565#endif /* RT_ARCH_AMD64 || RT_ARCH_ARM64 */
1566
1567/**
1568 * Atomically Compare and Exchange a pointer value, ordered.
1569 *
1570 * @returns true if xchg was done.
1571 * @returns false if xchg wasn't done.
1572 *
1573 * @param ppv Pointer to the value to update.
1574 * @param pvNew The new value to assigned to *ppv.
1575 * @param pvOld The old value to *ppv compare with.
1576 *
1577 * @remarks x86: Requires a 486 or later.
1578 * @todo Rename ASMAtomicCmpWritePtrVoid
1579 */
1580DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1581{
1582#if ARCH_BITS == 32 || ARCH_BITS == 16
1583 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1584#elif ARCH_BITS == 64
1585 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1586#else
1587# error "ARCH_BITS is bogus"
1588#endif
1589}
1590
1591
1592/**
1593 * Atomically Compare and Exchange a pointer value, ordered.
1594 *
1595 * @returns true if xchg was done.
1596 * @returns false if xchg wasn't done.
1597 *
1598 * @param ppv Pointer to the value to update.
1599 * @param pvNew The new value to assigned to *ppv.
1600 * @param pvOld The old value to *ppv compare with.
1601 *
1602 * @remarks This is relatively type safe on GCC platforms.
1603 * @remarks x86: Requires a 486 or later.
1604 * @todo Rename ASMAtomicCmpWritePtr
1605 */
1606#ifdef __GNUC__
1607# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1608 __extension__ \
1609 ({\
1610 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1611 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1612 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1613 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1614 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1615 fMacroRet; \
1616 })
1617#else
1618# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1619 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1620#endif
1621
1622
1623/** @def ASMAtomicCmpXchgHandle
1624 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1625 *
1626 * @param ph Pointer to the value to update.
1627 * @param hNew The new value to assigned to *pu.
1628 * @param hOld The old value to *pu compare with.
1629 * @param fRc Where to store the result.
1630 *
1631 * @remarks This doesn't currently work for all handles (like RTFILE).
1632 * @remarks x86: Requires a 486 or later.
1633 * @todo Rename ASMAtomicCmpWriteHandle
1634 */
1635#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1636# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1637 do { \
1638 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1639 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1640 } while (0)
1641#elif HC_ARCH_BITS == 64
1642# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1643 do { \
1644 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1645 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1646 } while (0)
1647#else
1648# error HC_ARCH_BITS
1649#endif
1650
1651
1652/** @def ASMAtomicCmpXchgSize
1653 * Atomically Compare and Exchange a value which size might differ
1654 * between platforms or compilers, ordered.
1655 *
1656 * @param pu Pointer to the value to update.
1657 * @param uNew The new value to assigned to *pu.
1658 * @param uOld The old value to *pu compare with.
1659 * @param fRc Where to store the result.
1660 *
1661 * @remarks x86: Requires a 486 or later.
1662 * @todo Rename ASMAtomicCmpWriteSize
1663 */
1664#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1665 do { \
1666 switch (sizeof(*(pu))) { \
1667 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1668 break; \
1669 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1670 break; \
1671 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1672 (fRc) = false; \
1673 break; \
1674 } \
1675 } while (0)
1676
1677
1678/**
1679 * Atomically Compare and Exchange an unsigned 8-bit value, additionally passes
1680 * back old value, ordered.
1681 *
1682 * @returns true if xchg was done.
1683 * @returns false if xchg wasn't done.
1684 *
1685 * @param pu8 Pointer to the value to update.
1686 * @param u8New The new value to assigned to *pu32.
1687 * @param u8Old The old value to *pu8 compare with.
1688 * @param pu8Old Pointer store the old value at.
1689 *
1690 * @remarks x86: Requires a 486 or later.
1691 */
1692#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1693RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_PROTO;
1694#else
1695DECLINLINE(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_DEF
1696{
1697# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1698# if RT_INLINE_ASM_GNU_STYLE
1699 uint8_t u8Ret;
1700 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1701 "setz %1\n\t"
1702 : "=m" (*pu8)
1703 , "=qm" (u8Ret)
1704 , "=a" (*pu8Old)
1705# if defined(RT_ARCH_X86)
1706 : "q" (u8New)
1707# else
1708 : "r" (u8New)
1709# endif
1710 , "a" (u8Old)
1711 , "m" (*pu8)
1712 : "cc");
1713 return (bool)u8Ret;
1714
1715# elif RT_INLINE_ASM_USES_INTRIN
1716 return (*pu8Old = _InterlockedCompareExchange8((char RT_FAR *)pu8, u8New, u8Old)) == u8Old;
1717
1718# else
1719 uint8_t u8Ret;
1720 __asm
1721 {
1722# ifdef RT_ARCH_AMD64
1723 mov rdx, [pu8]
1724# else
1725 mov edx, [pu8]
1726# endif
1727 mov eax, [u8Old]
1728 mov ecx, [u8New]
1729# ifdef RT_ARCH_AMD64
1730 lock cmpxchg [rdx], ecx
1731 mov rdx, [pu8Old]
1732 mov [rdx], eax
1733# else
1734 lock cmpxchg [edx], ecx
1735 mov edx, [pu8Old]
1736 mov [edx], eax
1737# endif
1738 setz al
1739 movzx eax, al
1740 mov [u8Ret], eax
1741 }
1742 return !!u8Ret;
1743# endif
1744
1745# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1746 union { uint8_t u; bool f; } fXchg;
1747 uint8_t u8ActualOld;
1748 uint8_t rcSpill;
1749 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgExU8_%=:\n\t"
1750 RTASM_ARM_DMB_SY
1751# if defined(RT_ARCH_ARM64)
1752 "ldaxrb %w[uOld], %[pMem]\n\t"
1753 "cmp %w[uOld], %w[uCmp]\n\t"
1754 "bne 1f\n\t" /* stop here if not equal */
1755 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1756 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1757 "mov %w[fXchg], #1\n\t"
1758 "1:\n\t"
1759 "clrex\n\t"
1760# else
1761 "ldrexb %[uOld], %[pMem]\n\t"
1762 "teq %[uOld], %[uCmp]\n\t"
1763 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1764 "bne 1f\n\t" /* stop here if not equal */
1765 "cmp %[rc], #0\n\t"
1766 "bne Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1767 "mov %[fXchg], #1\n\t"
1768 "1:\n\t"
1769 /** @todo clrexne on armv7? */
1770# endif
1771 : [pMem] "+Q" (*pu8)
1772 , [uOld] "=&r" (u8ActualOld)
1773 , [rc] "=&r" (rcSpill)
1774 , [fXchg] "=&r" (fXchg.u)
1775 : [uCmp] "r" (u8Old)
1776 , [uNew] "r" (u8New)
1777 , "[fXchg]" (0)
1778 RTASM_ARM_DMB_SY_COMMA_IN_REG
1779 : "cc");
1780 *pu8Old = u8ActualOld;
1781 return fXchg.f;
1782
1783# else
1784# error "Port me"
1785# endif
1786}
1787#endif
1788
1789
1790/**
1791 * Atomically Compare and Exchange a signed 8-bit value, additionally
1792 * passes back old value, ordered.
1793 *
1794 * @returns true if xchg was done.
1795 * @returns false if xchg wasn't done.
1796 *
1797 * @param pi8 Pointer to the value to update.
1798 * @param i8New The new value to assigned to *pi8.
1799 * @param i8Old The old value to *pi8 compare with.
1800 * @param pi8Old Pointer store the old value at.
1801 *
1802 * @remarks x86: Requires a 486 or later.
1803 */
1804DECLINLINE(bool) ASMAtomicCmpXchgExS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old, int8_t RT_FAR *pi8Old) RT_NOTHROW_DEF
1805{
1806 return ASMAtomicCmpXchgExU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old, (uint8_t RT_FAR *)pi8Old);
1807}
1808
1809
1810/**
1811 * Atomically Compare and Exchange an unsigned 16-bit value, additionally passes
1812 * back old value, ordered.
1813 *
1814 * @returns true if xchg was done.
1815 * @returns false if xchg wasn't done.
1816 *
1817 * @param pu16 Pointer to the value to update.
1818 * @param u16New The new value to assigned to *pu16.
1819 * @param u16Old The old value to *pu32 compare with.
1820 * @param pu16Old Pointer store the old value at.
1821 *
1822 * @remarks x86: Requires a 486 or later.
1823 */
1824#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1825RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_PROTO;
1826#else
1827DECLINLINE(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_DEF
1828{
1829# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1830# if RT_INLINE_ASM_GNU_STYLE
1831 uint8_t u8Ret;
1832 __asm__ __volatile__("lock; cmpxchgw %3, %0\n\t"
1833 "setz %1\n\t"
1834 : "=m" (*pu16)
1835 , "=qm" (u8Ret)
1836 , "=a" (*pu16Old)
1837 : "r" (u16New)
1838 , "a" (u16Old)
1839 , "m" (*pu16)
1840 : "cc");
1841 return (bool)u8Ret;
1842
1843# elif RT_INLINE_ASM_USES_INTRIN
1844 return (*pu16Old = _InterlockedCompareExchange16((short RT_FAR *)pu16, u16New, u16Old)) == u16Old;
1845
1846# else
1847 uint16_t u16Ret;
1848 __asm
1849 {
1850# ifdef RT_ARCH_AMD64
1851 mov rdx, [pu16]
1852# else
1853 mov edx, [pu16]
1854# endif
1855 mov eax, [u16Old]
1856 mov ecx, [u16New]
1857# ifdef RT_ARCH_AMD64
1858 lock cmpxchg [rdx], ecx
1859 mov rdx, [pu16Old]
1860 mov [rdx], eax
1861# else
1862 lock cmpxchg [edx], ecx
1863 mov edx, [pu16Old]
1864 mov [edx], eax
1865# endif
1866 setz al
1867 movzx eax, al
1868 mov [u16Ret], eax
1869 }
1870 return !!u16Ret;
1871# endif
1872
1873# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1874 union { uint16_t u; bool f; } fXchg;
1875 uint16_t u16ActualOld;
1876 uint16_t rcSpill;
1877 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgExU16_%=:\n\t"
1878 RTASM_ARM_DMB_SY
1879# if defined(RT_ARCH_ARM64)
1880 "ldaxrh %w[uOld], %[pMem]\n\t"
1881 "cmp %w[uOld], %w[uCmp]\n\t"
1882 "bne 1f\n\t" /* stop here if not equal */
1883 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
1884 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1885 "mov %w[fXchg], #1\n\t"
1886 "1:\n\t"
1887 "clrex\n\t"
1888# else
1889 "ldrexh %[uOld], %[pMem]\n\t"
1890 "teq %[uOld], %[uCmp]\n\t"
1891 "strexheq %[rc], %[uNew], %[pMem]\n\t"
1892 "bne 1f\n\t" /* stop here if not equal */
1893 "cmp %[rc], #0\n\t"
1894 "bne Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1895 "mov %[fXchg], #1\n\t"
1896 "1:\n\t"
1897 /** @todo clrexne on armv7? */
1898# endif
1899 : [pMem] "+Q" (*pu16)
1900 , [uOld] "=&r" (u16ActualOld)
1901 , [rc] "=&r" (rcSpill)
1902 , [fXchg] "=&r" (fXchg.u)
1903 : [uCmp] "r" (u16Old)
1904 , [uNew] "r" (u16New)
1905 , "[fXchg]" (0)
1906 RTASM_ARM_DMB_SY_COMMA_IN_REG
1907 : "cc");
1908 *pu16Old = u16ActualOld;
1909 return fXchg.f;
1910
1911# else
1912# error "Port me"
1913# endif
1914}
1915#endif
1916
1917
1918/**
1919 * Atomically Compare and Exchange a signed 16-bit value, additionally
1920 * passes back old value, ordered.
1921 *
1922 * @returns true if xchg was done.
1923 * @returns false if xchg wasn't done.
1924 *
1925 * @param pi16 Pointer to the value to update.
1926 * @param i16New The new value to assigned to *pi16.
1927 * @param i16Old The old value to *pi16 compare with.
1928 * @param pi16Old Pointer store the old value at.
1929 *
1930 * @remarks x86: Requires a 486 or later.
1931 */
1932DECLINLINE(bool) ASMAtomicCmpXchgExS16(volatile int16_t RT_FAR *pi16, const int16_t i16New, const int16_t i16Old, int16_t RT_FAR *pi16Old) RT_NOTHROW_DEF
1933{
1934 return ASMAtomicCmpXchgExU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16New, (uint16_t)i16Old, (uint16_t RT_FAR *)pi16Old);
1935}
1936
1937
1938/**
1939 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1940 * passes back old value, ordered.
1941 *
1942 * @returns true if xchg was done.
1943 * @returns false if xchg wasn't done.
1944 *
1945 * @param pu32 Pointer to the value to update.
1946 * @param u32New The new value to assigned to *pu32.
1947 * @param u32Old The old value to *pu32 compare with.
1948 * @param pu32Old Pointer store the old value at.
1949 *
1950 * @remarks x86: Requires a 486 or later.
1951 */
1952#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1953RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1954#else
1955DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1956{
1957# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1958# if RT_INLINE_ASM_GNU_STYLE
1959 uint8_t u8Ret;
1960 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1961 "setz %1\n\t"
1962 : "=m" (*pu32)
1963 , "=qm" (u8Ret)
1964 , "=a" (*pu32Old)
1965 : "r" (u32New)
1966 , "a" (u32Old)
1967 , "m" (*pu32)
1968 : "cc");
1969 return (bool)u8Ret;
1970
1971# elif RT_INLINE_ASM_USES_INTRIN
1972 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1973
1974# else
1975 uint32_t u32Ret;
1976 __asm
1977 {
1978# ifdef RT_ARCH_AMD64
1979 mov rdx, [pu32]
1980# else
1981 mov edx, [pu32]
1982# endif
1983 mov eax, [u32Old]
1984 mov ecx, [u32New]
1985# ifdef RT_ARCH_AMD64
1986 lock cmpxchg [rdx], ecx
1987 mov rdx, [pu32Old]
1988 mov [rdx], eax
1989# else
1990 lock cmpxchg [edx], ecx
1991 mov edx, [pu32Old]
1992 mov [edx], eax
1993# endif
1994 setz al
1995 movzx eax, al
1996 mov [u32Ret], eax
1997 }
1998 return !!u32Ret;
1999# endif
2000
2001# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2002 union { uint32_t u; bool f; } fXchg;
2003 uint32_t u32ActualOld;
2004 uint32_t rcSpill;
2005 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
2006 RTASM_ARM_DMB_SY
2007# if defined(RT_ARCH_ARM64)
2008 "ldaxr %w[uOld], %[pMem]\n\t"
2009 "cmp %w[uOld], %w[uCmp]\n\t"
2010 "bne 1f\n\t" /* stop here if not equal */
2011 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
2012 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
2013 "mov %w[fXchg], #1\n\t"
2014 "1:\n\t"
2015 "clrex\n\t"
2016# else
2017 "ldrex %[uOld], %[pMem]\n\t"
2018 "teq %[uOld], %[uCmp]\n\t"
2019 "strexeq %[rc], %[uNew], %[pMem]\n\t"
2020 "bne 1f\n\t" /* stop here if not equal */
2021 "cmp %[rc], #0\n\t"
2022 "bne Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
2023 "mov %[fXchg], #1\n\t"
2024 "1:\n\t"
2025 /** @todo clrexne on armv7? */
2026# endif
2027 : [pMem] "+Q" (*pu32)
2028 , [uOld] "=&r" (u32ActualOld)
2029 , [rc] "=&r" (rcSpill)
2030 , [fXchg] "=&r" (fXchg.u)
2031 : [uCmp] "r" (u32Old)
2032 , [uNew] "r" (u32New)
2033 , "[fXchg]" (0)
2034 RTASM_ARM_DMB_SY_COMMA_IN_REG
2035 : "cc");
2036 *pu32Old = u32ActualOld;
2037 return fXchg.f;
2038
2039# else
2040# error "Port me"
2041# endif
2042}
2043#endif
2044
2045
2046/**
2047 * Atomically Compare and Exchange a signed 32-bit value, additionally
2048 * passes back old value, ordered.
2049 *
2050 * @returns true if xchg was done.
2051 * @returns false if xchg wasn't done.
2052 *
2053 * @param pi32 Pointer to the value to update.
2054 * @param i32New The new value to assigned to *pi32.
2055 * @param i32Old The old value to *pi32 compare with.
2056 * @param pi32Old Pointer store the old value at.
2057 *
2058 * @remarks x86: Requires a 486 or later.
2059 */
2060DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
2061{
2062 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
2063}
2064
2065
2066/**
2067 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2068 * passing back old value, ordered.
2069 *
2070 * @returns true if xchg was done.
2071 * @returns false if xchg wasn't done.
2072 *
2073 * @param pu64 Pointer to the 64-bit variable to update.
2074 * @param u64New The 64-bit value to assign to *pu64.
2075 * @param u64Old The value to compare with.
2076 * @param pu64Old Pointer store the old value at.
2077 *
2078 * @remarks x86: Requires a Pentium or later.
2079 */
2080#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2081 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2082RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
2083#else
2084DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
2085{
2086# if RT_INLINE_ASM_USES_INTRIN
2087 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
2088
2089# elif defined(RT_ARCH_AMD64)
2090# if RT_INLINE_ASM_GNU_STYLE
2091 uint8_t u8Ret;
2092 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2093 "setz %1\n\t"
2094 : "=m" (*pu64)
2095 , "=qm" (u8Ret)
2096 , "=a" (*pu64Old)
2097 : "r" (u64New)
2098 , "a" (u64Old)
2099 , "m" (*pu64)
2100 : "cc");
2101 return (bool)u8Ret;
2102# else
2103 bool fRet;
2104 __asm
2105 {
2106 mov rdx, [pu32]
2107 mov rax, [u64Old]
2108 mov rcx, [u64New]
2109 lock cmpxchg [rdx], rcx
2110 mov rdx, [pu64Old]
2111 mov [rdx], rax
2112 setz al
2113 mov [fRet], al
2114 }
2115 return fRet;
2116# endif
2117
2118# elif defined(RT_ARCH_X86)
2119# if RT_INLINE_ASM_GNU_STYLE
2120 uint64_t u64Ret;
2121# if defined(PIC) || defined(__PIC__)
2122 /* Note #1: This code uses a memory clobber description, because the clean
2123 solution with an output value for *pu64 makes gcc run out of
2124 registers. This will cause suboptimal code, and anyone with a
2125 better solution is welcome to improve this.
2126
2127 Note #2: We must prevent gcc from encoding the memory access, as it
2128 may go via the GOT if we're working on a global variable (like
2129 in the testcase). Thus we request a register (%3) and
2130 dereference it ourselves. */
2131 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2132 "lock; cmpxchg8b (%3)\n\t"
2133 "xchgl %%ebx, %1\n\t"
2134 : "=A" (u64Ret)
2135 : "DS" ((uint32_t)u64New)
2136 , "c" ((uint32_t)(u64New >> 32))
2137 , "r" (pu64) /* Do not use "m" here*/
2138 , "0" (u64Old)
2139 : "memory"
2140 , "cc" );
2141# else /* !PIC */
2142 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2143 : "=A" (u64Ret)
2144 , "=m" (*pu64)
2145 : "b" ((uint32_t)u64New)
2146 , "c" ((uint32_t)(u64New >> 32))
2147 , "m" (*pu64)
2148 , "0" (u64Old)
2149 : "cc");
2150# endif
2151 *pu64Old = u64Ret;
2152 return u64Ret == u64Old;
2153# else
2154 uint32_t u32Ret;
2155 __asm
2156 {
2157 mov ebx, dword ptr [u64New]
2158 mov ecx, dword ptr [u64New + 4]
2159 mov edi, [pu64]
2160 mov eax, dword ptr [u64Old]
2161 mov edx, dword ptr [u64Old + 4]
2162 lock cmpxchg8b [edi]
2163 mov ebx, [pu64Old]
2164 mov [ebx], eax
2165 setz al
2166 movzx eax, al
2167 add ebx, 4
2168 mov [ebx], edx
2169 mov dword ptr [u32Ret], eax
2170 }
2171 return !!u32Ret;
2172# endif
2173
2174# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2175 union { uint32_t u; bool f; } fXchg;
2176 uint64_t u64ActualOld;
2177 uint32_t rcSpill;
2178 __asm__ __volatile__("Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
2179 RTASM_ARM_DMB_SY
2180# if defined(RT_ARCH_ARM64)
2181 "ldaxr %[uOld], %[pMem]\n\t"
2182 "cmp %[uOld], %[uCmp]\n\t"
2183 "bne 1f\n\t" /* stop here if not equal */
2184 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
2185 "cbnz %w[rc], Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2186 "mov %w[fXchg], #1\n\t"
2187 "1:\n\t"
2188 "clrex\n\t"
2189# else
2190 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
2191 "teq %[uOld], %[uCmp]\n\t"
2192 "teqeq %H[uOld], %H[uCmp]\n\t"
2193 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
2194 "bne 1f\n\t" /* stop here if not equal */
2195 "cmp %[rc], #0\n\t"
2196 "bne Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2197 "mov %[fXchg], #1\n\t"
2198 "1:\n\t"
2199 /** @todo clrexne on armv7? */
2200# endif
2201 : [pMem] "+Q" (*pu64)
2202 , [uOld] "=&r" (u64ActualOld)
2203 , [rc] "=&r" (rcSpill)
2204 , [fXchg] "=&r" (fXchg.u)
2205 : [uCmp] "r" (u64Old)
2206 , [uNew] "r" (u64New)
2207 , "[fXchg]" (0)
2208 RTASM_ARM_DMB_SY_COMMA_IN_REG
2209 : "cc");
2210 *pu64Old = u64ActualOld;
2211 return fXchg.f;
2212
2213# else
2214# error "Port me"
2215# endif
2216}
2217#endif
2218
2219
2220/**
2221 * Atomically Compare and exchange a signed 64-bit value, additionally
2222 * passing back old value, ordered.
2223 *
2224 * @returns true if xchg was done.
2225 * @returns false if xchg wasn't done.
2226 *
2227 * @param pi64 Pointer to the 64-bit variable to update.
2228 * @param i64 The 64-bit value to assign to *pu64.
2229 * @param i64Old The value to compare with.
2230 * @param pi64Old Pointer store the old value at.
2231 *
2232 * @remarks x86: Requires a Pentium or later.
2233 */
2234DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
2235{
2236 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
2237}
2238
2239#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
2240
2241/** @def RTASM_HAVE_CMP_XCHG_U128
2242 * Indicates that we've got ASMAtomicCmpSwapU128(), ASMAtomicCmpSwapU128v2()
2243 * and ASMAtomicCmpSwapExU128() available. */
2244# define RTASM_HAVE_CMP_XCHG_U128 1
2245
2246
2247/**
2248 * Atomically compare and exchange an unsigned 128-bit value, ordered.
2249 *
2250 * @returns true if exchange was done.
2251 * @returns false if exchange wasn't done.
2252 *
2253 * @param pu128 Pointer to the 128-bit variable to update.
2254 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
2255 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
2256 * @param u64OldHi The high 64-bit of the value to compare with.
2257 * @param u64OldLo The low 64-bit of the value to compare with.
2258 * @param pu128Old Where to return the old value.
2259 *
2260 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
2261 */
2262# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
2263DECLASM(bool) ASMAtomicCmpXchgU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
2264 const uint64_t u64OldHi, const uint64_t u64OldLo, uint128_t *pu128Old) RT_NOTHROW_PROTO;
2265# else
2266DECLINLINE(bool) ASMAtomicCmpXchgU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
2267 const uint64_t u64OldHi, const uint64_t u64OldLo, uint128_t *pu128Old) RT_NOTHROW_DEF
2268{
2269# if RT_INLINE_ASM_USES_INTRIN
2270 pu128Old->Hi = u64OldHi;
2271 pu128Old->Lo = u64OldLo;
2272 AssertCompileMemberOffset(uint128_t, Lo, 0);
2273 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, (__int64 *)&pu128Old->Lo) != 0;
2274
2275# elif (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2276 uint128_t const uCmp = ((uint128_t)u64OldHi << 64) | u64OldLo;
2277 uint128_t const uOld = __sync_val_compare_and_swap(pu128, uCmp, ((uint128_t)u64NewHi << 64) | u64NewLo);
2278 *pu128Old = uOld;
2279 return uCmp == uOld;
2280
2281# elif defined(RT_ARCH_AMD64)
2282# if RT_INLINE_ASM_GNU_STYLE
2283 uint8_t bRet;
2284 uint64_t u64RetHi, u64RetLo;
2285 __asm__ __volatile__("lock; cmpxchg16b %3\n\t"
2286 "setz %b0\n\t"
2287 : "=r" (bRet)
2288 , "=a" (u64RetLo)
2289 , "=d" (u64RetHi)
2290 , "+m" (*pu128)
2291 : "a" (u64OldLo)
2292 , "d" (u64OldHi)
2293 , "b" (u64NewLo)
2294 , "c" (u64NewHi)
2295 : "cc");
2296 *pu128Old = ((uint128_t)u64RetHi << 64) | u64RetLo;
2297 return (bool)bRet;
2298# else
2299# error "Port me"
2300# endif
2301# else
2302# error "Port me"
2303# endif
2304}
2305# endif
2306
2307
2308/**
2309 * Atomically compare and exchange an unsigned 128-bit value, ordered.
2310 *
2311 * @returns true if exchange was done.
2312 * @returns false if exchange wasn't done.
2313 *
2314 * @param pu128 Pointer to the 128-bit variable to update.
2315 * @param u128New The 128-bit value to assign to *pu128.
2316 * @param u128Old The value to compare with.
2317 * @param pu128Old Where to return the old value.
2318 *
2319 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
2320 */
2321DECLINLINE(bool) ASMAtomicCmpXchgU128(volatile uint128_t *pu128, const uint128_t u128New,
2322 const uint128_t u128Old, uint128_t *pu128Old) RT_NOTHROW_DEF
2323{
2324# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
2325# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2326 uint128_t const uSwapped = __sync_val_compare_and_swap(pu128, u128Old, u128New);
2327 *pu128Old = uSwapped;
2328 return uSwapped == u128Old;
2329# else
2330 return ASMAtomicCmpXchgU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
2331 (uint64_t)(u128Old >> 64), (uint64_t)u128Old, pu128Old);
2332# endif
2333# else
2334 return ASMAtomicCmpXchgU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo, pu128Old);
2335# endif
2336}
2337
2338
2339/**
2340 * RTUINT128U wrapper for ASMAtomicCmpXchgU128.
2341 */
2342DECLINLINE(bool) ASMAtomicCmpXchgU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
2343 const RTUINT128U u128Old, PRTUINT128U pu128Old) RT_NOTHROW_DEF
2344{
2345# if (defined(__clang_major__) || defined(__GNUC__)) && defined(RT_ARCH_ARM64)
2346 return ASMAtomicCmpXchgU128(&pu128->u, u128New.u, u128Old.u, &pu128Old->u);
2347# else
2348 return ASMAtomicCmpXchgU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo, &pu128Old->u);
2349# endif
2350}
2351
2352#endif /* RT_ARCH_AMD64 || RT_ARCH_ARM64 */
2353
2354
2355
2356/** @def ASMAtomicCmpXchgExHandle
2357 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2358 *
2359 * @param ph Pointer to the value to update.
2360 * @param hNew The new value to assigned to *pu.
2361 * @param hOld The old value to *pu compare with.
2362 * @param fRc Where to store the result.
2363 * @param phOldVal Pointer to where to store the old value.
2364 *
2365 * @remarks This doesn't currently work for all handles (like RTFILE).
2366 */
2367#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2368# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2369 do { \
2370 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
2371 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
2372 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(ph), (uint32_t)(hNew), (uint32_t)(hOld), (uint32_t RT_FAR *)(phOldVal)); \
2373 } while (0)
2374#elif HC_ARCH_BITS == 64
2375# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2376 do { \
2377 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2378 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
2379 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(ph), (uint64_t)(hNew), (uint64_t)(hOld), (uint64_t RT_FAR *)(phOldVal)); \
2380 } while (0)
2381#else
2382# error HC_ARCH_BITS
2383#endif
2384
2385
2386/** @def ASMAtomicCmpXchgExSize
2387 * Atomically Compare and Exchange a value which size might differ
2388 * between platforms or compilers. Additionally passes back old value.
2389 *
2390 * @param pu Pointer to the value to update.
2391 * @param uNew The new value to assigned to *pu.
2392 * @param uOld The old value to *pu compare with.
2393 * @param fRc Where to store the result.
2394 * @param puOldVal Pointer to where to store the old value.
2395 *
2396 * @remarks x86: Requires a 486 or later.
2397 */
2398#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
2399 do { \
2400 switch (sizeof(*(pu))) { \
2401 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
2402 break; \
2403 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
2404 break; \
2405 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2406 (fRc) = false; \
2407 (uOldVal) = 0; \
2408 break; \
2409 } \
2410 } while (0)
2411
2412
2413/**
2414 * Atomically Compare and Exchange a pointer value, additionally
2415 * passing back old value, ordered.
2416 *
2417 * @returns true if xchg was done.
2418 * @returns false if xchg wasn't done.
2419 *
2420 * @param ppv Pointer to the value to update.
2421 * @param pvNew The new value to assigned to *ppv.
2422 * @param pvOld The old value to *ppv compare with.
2423 * @param ppvOld Pointer store the old value at.
2424 *
2425 * @remarks x86: Requires a 486 or later.
2426 */
2427DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
2428 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
2429{
2430#if ARCH_BITS == 32 || ARCH_BITS == 16
2431 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
2432#elif ARCH_BITS == 64
2433 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
2434#else
2435# error "ARCH_BITS is bogus"
2436#endif
2437}
2438
2439
2440/**
2441 * Atomically Compare and Exchange a pointer value, additionally
2442 * passing back old value, ordered.
2443 *
2444 * @returns true if xchg was done.
2445 * @returns false if xchg wasn't done.
2446 *
2447 * @param ppv Pointer to the value to update.
2448 * @param pvNew The new value to assigned to *ppv.
2449 * @param pvOld The old value to *ppv compare with.
2450 * @param ppvOld Pointer store the old value at.
2451 *
2452 * @remarks This is relatively type safe on GCC platforms.
2453 * @remarks x86: Requires a 486 or later.
2454 */
2455#ifdef __GNUC__
2456# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2457 __extension__ \
2458 ({\
2459 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2460 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
2461 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
2462 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
2463 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
2464 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
2465 (void **)ppvOldTypeChecked); \
2466 fMacroRet; \
2467 })
2468#else
2469# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2470 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
2471#endif
2472
2473
2474/**
2475 * Virtualization unfriendly serializing instruction, always exits.
2476 */
2477#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2478RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
2479#else
2480DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
2481{
2482# if RT_INLINE_ASM_GNU_STYLE
2483 RTCCUINTREG xAX = 0;
2484# ifdef RT_ARCH_AMD64
2485 __asm__ __volatile__ ("cpuid"
2486 : "=a" (xAX)
2487 : "0" (xAX)
2488 : "rbx", "rcx", "rdx", "memory");
2489# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
2490 __asm__ __volatile__ ("push %%ebx\n\t"
2491 "cpuid\n\t"
2492 "pop %%ebx\n\t"
2493 : "=a" (xAX)
2494 : "0" (xAX)
2495 : "ecx", "edx", "memory");
2496# else
2497 __asm__ __volatile__ ("cpuid"
2498 : "=a" (xAX)
2499 : "0" (xAX)
2500 : "ebx", "ecx", "edx", "memory");
2501# endif
2502
2503# elif RT_INLINE_ASM_USES_INTRIN
2504 int aInfo[4];
2505 _ReadWriteBarrier();
2506 __cpuid(aInfo, 0);
2507
2508# else
2509 __asm
2510 {
2511 push ebx
2512 xor eax, eax
2513 cpuid
2514 pop ebx
2515 }
2516# endif
2517}
2518#endif
2519
2520/**
2521 * Virtualization friendly serializing instruction, though more expensive.
2522 */
2523#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2524RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
2525#else
2526DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
2527{
2528# if RT_INLINE_ASM_GNU_STYLE
2529# ifdef RT_ARCH_AMD64
2530 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
2531 "subq $128, %%rsp\n\t" /*redzone*/
2532 "mov %%ss, %%eax\n\t"
2533 "pushq %%rax\n\t"
2534 "pushq %%r10\n\t"
2535 "pushfq\n\t"
2536 "movl %%cs, %%eax\n\t"
2537 "pushq %%rax\n\t"
2538 "leaq 1f(%%rip), %%rax\n\t"
2539 "pushq %%rax\n\t"
2540 "iretq\n\t"
2541 "1:\n\t"
2542 ::: "rax", "r10", "memory", "cc");
2543# else
2544 __asm__ __volatile__ ("pushfl\n\t"
2545 "pushl %%cs\n\t"
2546 "pushl $1f\n\t"
2547 "iretl\n\t"
2548 "1:\n\t"
2549 ::: "memory");
2550# endif
2551
2552# else
2553 __asm
2554 {
2555 pushfd
2556 push cs
2557 push la_ret
2558 iretd
2559 la_ret:
2560 }
2561# endif
2562}
2563#endif
2564
2565/**
2566 * Virtualization friendlier serializing instruction, may still cause exits.
2567 */
2568#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < RT_MSC_VER_VS2008) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2569RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2570#else
2571DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2572{
2573# if RT_INLINE_ASM_GNU_STYLE
2574 /* rdtscp is not supported by ancient linux build VM of course :-( */
2575# ifdef RT_ARCH_AMD64
2576 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2577 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2578# else
2579 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2580 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2581# endif
2582# else
2583# if RT_INLINE_ASM_USES_INTRIN >= RT_MSC_VER_VS2008
2584 uint32_t uIgnore;
2585 _ReadWriteBarrier();
2586 (void)__rdtscp(&uIgnore);
2587 (void)uIgnore;
2588# else
2589 __asm
2590 {
2591 rdtscp
2592 }
2593# endif
2594# endif
2595}
2596#endif
2597
2598
2599/**
2600 * Serialize Instruction (both data store and instruction flush).
2601 */
2602#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2603# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2604#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2605# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2606#elif defined(RT_ARCH_SPARC64)
2607RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2608#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2609DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2610{
2611 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2612}
2613#else
2614# error "Port me"
2615#endif
2616
2617
2618/**
2619 * Memory fence, waits for any pending writes and reads to complete.
2620 * @note No implicit compiler barrier (which is probably stupid).
2621 */
2622DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2623{
2624#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2625# if RT_INLINE_ASM_GNU_STYLE
2626 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2627# elif RT_INLINE_ASM_USES_INTRIN
2628 _mm_mfence();
2629# else
2630 __asm
2631 {
2632 _emit 0x0f
2633 _emit 0xae
2634 _emit 0xf0
2635 }
2636# endif
2637#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2638 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2639#elif ARCH_BITS == 16
2640 uint16_t volatile u16;
2641 ASMAtomicXchgU16(&u16, 0);
2642#else
2643 uint32_t volatile u32;
2644 ASMAtomicXchgU32(&u32, 0);
2645#endif
2646}
2647
2648
2649/**
2650 * Write fence, waits for any pending writes to complete.
2651 * @note No implicit compiler barrier (which is probably stupid).
2652 */
2653DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2654{
2655#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2656# if RT_INLINE_ASM_GNU_STYLE
2657 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2658# elif RT_INLINE_ASM_USES_INTRIN
2659 _mm_sfence();
2660# else
2661 __asm
2662 {
2663 _emit 0x0f
2664 _emit 0xae
2665 _emit 0xf8
2666 }
2667# endif
2668#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2669 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2670#else
2671 ASMMemoryFence();
2672#endif
2673}
2674
2675
2676/**
2677 * Read fence, waits for any pending reads to complete.
2678 * @note No implicit compiler barrier (which is probably stupid).
2679 */
2680DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2681{
2682#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2683# if RT_INLINE_ASM_GNU_STYLE
2684 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2685# elif RT_INLINE_ASM_USES_INTRIN
2686 _mm_lfence();
2687# else
2688 __asm
2689 {
2690 _emit 0x0f
2691 _emit 0xae
2692 _emit 0xe8
2693 }
2694# endif
2695#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2696 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2697#else
2698 ASMMemoryFence();
2699#endif
2700}
2701
2702
2703/**
2704 * Atomically reads an unsigned 8-bit value, ordered.
2705 *
2706 * @returns Current *pu8 value
2707 * @param pu8 Pointer to the 8-bit variable to read.
2708 */
2709DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2710{
2711#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2712 uint32_t u32;
2713 __asm__ __volatile__("Lstart_ASMAtomicReadU8_%=:\n\t"
2714 RTASM_ARM_DMB_SY
2715# if defined(RT_ARCH_ARM64)
2716 "ldxrb %w[uDst], %[pMem]\n\t"
2717# else
2718 "ldrexb %[uDst], %[pMem]\n\t"
2719# endif
2720 : [uDst] "=&r" (u32)
2721 : [pMem] "Q" (*pu8)
2722 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2723 return (uint8_t)u32;
2724#else
2725 ASMMemoryFence();
2726 return *pu8; /* byte reads are atomic on x86 */
2727#endif
2728}
2729
2730
2731/**
2732 * Atomically reads an unsigned 8-bit value, unordered.
2733 *
2734 * @returns Current *pu8 value
2735 * @param pu8 Pointer to the 8-bit variable to read.
2736 */
2737DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2738{
2739#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2740 uint32_t u32;
2741 __asm__ __volatile__("Lstart_ASMAtomicUoReadU8_%=:\n\t"
2742# if defined(RT_ARCH_ARM64)
2743 "ldxrb %w[uDst], %[pMem]\n\t"
2744# else
2745 "ldrexb %[uDst], %[pMem]\n\t"
2746# endif
2747 : [uDst] "=&r" (u32)
2748 : [pMem] "Q" (*pu8));
2749 return (uint8_t)u32;
2750#else
2751 return *pu8; /* byte reads are atomic on x86 */
2752#endif
2753}
2754
2755
2756/**
2757 * Atomically reads a signed 8-bit value, ordered.
2758 *
2759 * @returns Current *pi8 value
2760 * @param pi8 Pointer to the 8-bit variable to read.
2761 */
2762DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2763{
2764 ASMMemoryFence();
2765#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2766 int32_t i32;
2767 __asm__ __volatile__("Lstart_ASMAtomicReadS8_%=:\n\t"
2768 RTASM_ARM_DMB_SY
2769# if defined(RT_ARCH_ARM64)
2770 "ldxrb %w[iDst], %[pMem]\n\t"
2771# else
2772 "ldrexb %[iDst], %[pMem]\n\t"
2773# endif
2774 : [iDst] "=&r" (i32)
2775 : [pMem] "Q" (*pi8)
2776 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2777 return (int8_t)i32;
2778#else
2779 return *pi8; /* byte reads are atomic on x86 */
2780#endif
2781}
2782
2783
2784/**
2785 * Atomically reads a signed 8-bit value, unordered.
2786 *
2787 * @returns Current *pi8 value
2788 * @param pi8 Pointer to the 8-bit variable to read.
2789 */
2790DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2791{
2792#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2793 int32_t i32;
2794 __asm__ __volatile__("Lstart_ASMAtomicUoReadS8_%=:\n\t"
2795# if defined(RT_ARCH_ARM64)
2796 "ldxrb %w[iDst], %[pMem]\n\t"
2797# else
2798 "ldrexb %[iDst], %[pMem]\n\t"
2799# endif
2800 : [iDst] "=&r" (i32)
2801 : [pMem] "Q" (*pi8));
2802 return (int8_t)i32;
2803#else
2804 return *pi8; /* byte reads are atomic on x86 */
2805#endif
2806}
2807
2808
2809/**
2810 * Atomically reads an unsigned 16-bit value, ordered.
2811 *
2812 * @returns Current *pu16 value
2813 * @param pu16 Pointer to the 16-bit variable to read.
2814 */
2815DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2816{
2817 Assert(!((uintptr_t)pu16 & 1));
2818#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2819 uint32_t u32;
2820 __asm__ __volatile__("Lstart_ASMAtomicReadU16_%=:\n\t"
2821 RTASM_ARM_DMB_SY
2822# if defined(RT_ARCH_ARM64)
2823 "ldxrh %w[uDst], %[pMem]\n\t"
2824# else
2825 "ldrexh %[uDst], %[pMem]\n\t"
2826# endif
2827 : [uDst] "=&r" (u32)
2828 : [pMem] "Q" (*pu16)
2829 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2830 return (uint16_t)u32;
2831#else
2832 ASMMemoryFence();
2833 return *pu16;
2834#endif
2835}
2836
2837
2838/**
2839 * Atomically reads an unsigned 16-bit value, unordered.
2840 *
2841 * @returns Current *pu16 value
2842 * @param pu16 Pointer to the 16-bit variable to read.
2843 */
2844DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2845{
2846 Assert(!((uintptr_t)pu16 & 1));
2847#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2848 uint32_t u32;
2849 __asm__ __volatile__("Lstart_ASMAtomicUoReadU16_%=:\n\t"
2850# if defined(RT_ARCH_ARM64)
2851 "ldxrh %w[uDst], %[pMem]\n\t"
2852# else
2853 "ldrexh %[uDst], %[pMem]\n\t"
2854# endif
2855 : [uDst] "=&r" (u32)
2856 : [pMem] "Q" (*pu16));
2857 return (uint16_t)u32;
2858#else
2859 return *pu16;
2860#endif
2861}
2862
2863
2864/**
2865 * Atomically reads a signed 16-bit value, ordered.
2866 *
2867 * @returns Current *pi16 value
2868 * @param pi16 Pointer to the 16-bit variable to read.
2869 */
2870DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2871{
2872 Assert(!((uintptr_t)pi16 & 1));
2873#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2874 int32_t i32;
2875 __asm__ __volatile__("Lstart_ASMAtomicReadS16_%=:\n\t"
2876 RTASM_ARM_DMB_SY
2877# if defined(RT_ARCH_ARM64)
2878 "ldxrh %w[iDst], %[pMem]\n\t"
2879# else
2880 "ldrexh %[iDst], %[pMem]\n\t"
2881# endif
2882 : [iDst] "=&r" (i32)
2883 : [pMem] "Q" (*pi16)
2884 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2885 return (int16_t)i32;
2886#else
2887 ASMMemoryFence();
2888 return *pi16;
2889#endif
2890}
2891
2892
2893/**
2894 * Atomically reads a signed 16-bit value, unordered.
2895 *
2896 * @returns Current *pi16 value
2897 * @param pi16 Pointer to the 16-bit variable to read.
2898 */
2899DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2900{
2901 Assert(!((uintptr_t)pi16 & 1));
2902#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2903 int32_t i32;
2904 __asm__ __volatile__("Lstart_ASMAtomicUoReadS16_%=:\n\t"
2905# if defined(RT_ARCH_ARM64)
2906 "ldxrh %w[iDst], %[pMem]\n\t"
2907# else
2908 "ldrexh %[iDst], %[pMem]\n\t"
2909# endif
2910 : [iDst] "=&r" (i32)
2911 : [pMem] "Q" (*pi16));
2912 return (int16_t)i32;
2913#else
2914 return *pi16;
2915#endif
2916}
2917
2918
2919/**
2920 * Atomically reads an unsigned 32-bit value, ordered.
2921 *
2922 * @returns Current *pu32 value
2923 * @param pu32 Pointer to the 32-bit variable to read.
2924 */
2925DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2926{
2927 Assert(!((uintptr_t)pu32 & 3));
2928#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2929 uint32_t u32;
2930 __asm__ __volatile__("Lstart_ASMAtomicReadU32_%=:\n\t"
2931 RTASM_ARM_DMB_SY
2932# if defined(RT_ARCH_ARM64)
2933 "ldxr %w[uDst], %[pMem]\n\t"
2934# else
2935 "ldrex %[uDst], %[pMem]\n\t"
2936# endif
2937 : [uDst] "=&r" (u32)
2938 : [pMem] "Q" (*pu32)
2939 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2940 return u32;
2941#else
2942 ASMMemoryFence();
2943# if ARCH_BITS == 16
2944 AssertFailed(); /** @todo 16-bit */
2945# endif
2946 return *pu32;
2947#endif
2948}
2949
2950
2951/**
2952 * Atomically reads an unsigned 32-bit value, unordered.
2953 *
2954 * @returns Current *pu32 value
2955 * @param pu32 Pointer to the 32-bit variable to read.
2956 */
2957DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2958{
2959 Assert(!((uintptr_t)pu32 & 3));
2960#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2961 uint32_t u32;
2962 __asm__ __volatile__("Lstart_ASMAtomicUoReadU32_%=:\n\t"
2963# if defined(RT_ARCH_ARM64)
2964 "ldxr %w[uDst], %[pMem]\n\t"
2965# else
2966 "ldrex %[uDst], %[pMem]\n\t"
2967# endif
2968 : [uDst] "=&r" (u32)
2969 : [pMem] "Q" (*pu32));
2970 return u32;
2971#else
2972# if ARCH_BITS == 16
2973 AssertFailed(); /** @todo 16-bit */
2974# endif
2975 return *pu32;
2976#endif
2977}
2978
2979
2980/**
2981 * Atomically reads a signed 32-bit value, ordered.
2982 *
2983 * @returns Current *pi32 value
2984 * @param pi32 Pointer to the 32-bit variable to read.
2985 */
2986DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2987{
2988 Assert(!((uintptr_t)pi32 & 3));
2989#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2990 int32_t i32;
2991 __asm__ __volatile__("Lstart_ASMAtomicReadS32_%=:\n\t"
2992 RTASM_ARM_DMB_SY
2993# if defined(RT_ARCH_ARM64)
2994 "ldxr %w[iDst], %[pMem]\n\t"
2995# else
2996 "ldrex %[iDst], %[pMem]\n\t"
2997# endif
2998 : [iDst] "=&r" (i32)
2999 : [pMem] "Q" (*pi32)
3000 RTASM_ARM_DMB_SY_COMMA_IN_REG);
3001 return i32;
3002#else
3003 ASMMemoryFence();
3004# if ARCH_BITS == 16
3005 AssertFailed(); /** @todo 16-bit */
3006# endif
3007 return *pi32;
3008#endif
3009}
3010
3011
3012/**
3013 * Atomically reads a signed 32-bit value, unordered.
3014 *
3015 * @returns Current *pi32 value
3016 * @param pi32 Pointer to the 32-bit variable to read.
3017 */
3018DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
3019{
3020 Assert(!((uintptr_t)pi32 & 3));
3021#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3022 int32_t i32;
3023 __asm__ __volatile__("Lstart_ASMAtomicUoReadS32_%=:\n\t"
3024# if defined(RT_ARCH_ARM64)
3025 "ldxr %w[iDst], %[pMem]\n\t"
3026# else
3027 "ldrex %[iDst], %[pMem]\n\t"
3028# endif
3029 : [iDst] "=&r" (i32)
3030 : [pMem] "Q" (*pi32));
3031 return i32;
3032
3033#else
3034# if ARCH_BITS == 16
3035 AssertFailed(); /** @todo 16-bit */
3036# endif
3037 return *pi32;
3038#endif
3039}
3040
3041
3042/**
3043 * Atomically reads an unsigned 64-bit value, ordered.
3044 *
3045 * @returns Current *pu64 value
3046 * @param pu64 Pointer to the 64-bit variable to read.
3047 * The memory pointed to must be writable.
3048 *
3049 * @remarks This may fault if the memory is read-only!
3050 * @remarks x86: Requires a Pentium or later.
3051 */
3052#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
3053 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
3054RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
3055#else
3056DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
3057{
3058 uint64_t u64;
3059# ifdef RT_ARCH_AMD64
3060 Assert(!((uintptr_t)pu64 & 7));
3061/*# if RT_INLINE_ASM_GNU_STYLE
3062 __asm__ __volatile__( "mfence\n\t"
3063 "movq %1, %0\n\t"
3064 : "=r" (u64)
3065 : "m" (*pu64));
3066# else
3067 __asm
3068 {
3069 mfence
3070 mov rdx, [pu64]
3071 mov rax, [rdx]
3072 mov [u64], rax
3073 }
3074# endif*/
3075 ASMMemoryFence();
3076 u64 = *pu64;
3077
3078# elif defined(RT_ARCH_X86)
3079# if RT_INLINE_ASM_GNU_STYLE
3080# if defined(PIC) || defined(__PIC__)
3081 uint32_t u32EBX = 0;
3082 Assert(!((uintptr_t)pu64 & 7));
3083 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3084 "lock; cmpxchg8b (%5)\n\t"
3085 "movl %3, %%ebx\n\t"
3086 : "=A" (u64)
3087# if RT_GNUC_PREREQ(4, 3)
3088 , "+m" (*pu64)
3089# else
3090 , "=m" (*pu64)
3091# endif
3092 : "0" (0ULL)
3093 , "m" (u32EBX)
3094 , "c" (0)
3095 , "S" (pu64)
3096 : "cc");
3097# else /* !PIC */
3098 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3099 : "=A" (u64)
3100 , "+m" (*pu64)
3101 : "0" (0ULL)
3102 , "b" (0)
3103 , "c" (0)
3104 : "cc");
3105# endif
3106# else
3107 Assert(!((uintptr_t)pu64 & 7));
3108 __asm
3109 {
3110 xor eax, eax
3111 xor edx, edx
3112 mov edi, pu64
3113 xor ecx, ecx
3114 xor ebx, ebx
3115 lock cmpxchg8b [edi]
3116 mov dword ptr [u64], eax
3117 mov dword ptr [u64 + 4], edx
3118 }
3119# endif
3120
3121# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3122 Assert(!((uintptr_t)pu64 & 7));
3123 __asm__ __volatile__("Lstart_ASMAtomicReadU64_%=:\n\t"
3124 RTASM_ARM_DMB_SY
3125# if defined(RT_ARCH_ARM64)
3126 "ldxr %[uDst], %[pMem]\n\t"
3127# else
3128 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3129# endif
3130 : [uDst] "=&r" (u64)
3131 : [pMem] "Q" (*pu64)
3132 RTASM_ARM_DMB_SY_COMMA_IN_REG);
3133
3134# else
3135# error "Port me"
3136# endif
3137 return u64;
3138}
3139#endif
3140
3141
3142/**
3143 * Atomically reads an unsigned 64-bit value, unordered.
3144 *
3145 * @returns Current *pu64 value
3146 * @param pu64 Pointer to the 64-bit variable to read.
3147 * The memory pointed to must be writable.
3148 *
3149 * @remarks This may fault if the memory is read-only!
3150 * @remarks x86: Requires a Pentium or later.
3151 */
3152#if !defined(RT_ARCH_AMD64) \
3153 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
3154 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
3155RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
3156#else
3157DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
3158{
3159 uint64_t u64;
3160# ifdef RT_ARCH_AMD64
3161 Assert(!((uintptr_t)pu64 & 7));
3162/*# if RT_INLINE_ASM_GNU_STYLE
3163 Assert(!((uintptr_t)pu64 & 7));
3164 __asm__ __volatile__("movq %1, %0\n\t"
3165 : "=r" (u64)
3166 : "m" (*pu64));
3167# else
3168 __asm
3169 {
3170 mov rdx, [pu64]
3171 mov rax, [rdx]
3172 mov [u64], rax
3173 }
3174# endif */
3175 u64 = *pu64;
3176
3177# elif defined(RT_ARCH_X86)
3178# if RT_INLINE_ASM_GNU_STYLE
3179# if defined(PIC) || defined(__PIC__)
3180 uint32_t u32EBX = 0;
3181 uint32_t u32Spill;
3182 Assert(!((uintptr_t)pu64 & 7));
3183 __asm__ __volatile__("xor %%eax,%%eax\n\t"
3184 "xor %%ecx,%%ecx\n\t"
3185 "xor %%edx,%%edx\n\t"
3186 "xchgl %%ebx, %3\n\t"
3187 "lock; cmpxchg8b (%4)\n\t"
3188 "movl %3, %%ebx\n\t"
3189 : "=A" (u64)
3190# if RT_GNUC_PREREQ(4, 3)
3191 , "+m" (*pu64)
3192# else
3193 , "=m" (*pu64)
3194# endif
3195 , "=c" (u32Spill)
3196 : "m" (u32EBX)
3197 , "S" (pu64)
3198 : "cc");
3199# else /* !PIC */
3200 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3201 : "=A" (u64)
3202 , "+m" (*pu64)
3203 : "0" (0ULL)
3204 , "b" (0)
3205 , "c" (0)
3206 : "cc");
3207# endif
3208# else
3209 Assert(!((uintptr_t)pu64 & 7));
3210 __asm
3211 {
3212 xor eax, eax
3213 xor edx, edx
3214 mov edi, pu64
3215 xor ecx, ecx
3216 xor ebx, ebx
3217 lock cmpxchg8b [edi]
3218 mov dword ptr [u64], eax
3219 mov dword ptr [u64 + 4], edx
3220 }
3221# endif
3222
3223# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3224 Assert(!((uintptr_t)pu64 & 7));
3225 __asm__ __volatile__("Lstart_ASMAtomicUoReadU64_%=:\n\t"
3226# if defined(RT_ARCH_ARM64)
3227 "ldxr %[uDst], %[pMem]\n\t"
3228# else
3229 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3230# endif
3231 : [uDst] "=&r" (u64)
3232 : [pMem] "Q" (*pu64));
3233
3234# else
3235# error "Port me"
3236# endif
3237 return u64;
3238}
3239#endif
3240
3241
3242/**
3243 * Atomically reads a signed 64-bit value, ordered.
3244 *
3245 * @returns Current *pi64 value
3246 * @param pi64 Pointer to the 64-bit variable to read.
3247 * The memory pointed to must be writable.
3248 *
3249 * @remarks This may fault if the memory is read-only!
3250 * @remarks x86: Requires a Pentium or later.
3251 */
3252DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3253{
3254 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
3255}
3256
3257
3258/**
3259 * Atomically reads a signed 64-bit value, unordered.
3260 *
3261 * @returns Current *pi64 value
3262 * @param pi64 Pointer to the 64-bit variable to read.
3263 * The memory pointed to must be writable.
3264 *
3265 * @remarks This will fault if the memory is read-only!
3266 * @remarks x86: Requires a Pentium or later.
3267 */
3268DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3269{
3270 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
3271}
3272
3273
3274/**
3275 * Atomically reads a size_t value, ordered.
3276 *
3277 * @returns Current *pcb value
3278 * @param pcb Pointer to the size_t variable to read.
3279 */
3280DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3281{
3282#if ARCH_BITS == 64
3283 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
3284#elif ARCH_BITS == 32
3285 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
3286#elif ARCH_BITS == 16
3287 AssertCompileSize(size_t, 2);
3288 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
3289#else
3290# error "Unsupported ARCH_BITS value"
3291#endif
3292}
3293
3294
3295/**
3296 * Atomically reads a size_t value, unordered.
3297 *
3298 * @returns Current *pcb value
3299 * @param pcb Pointer to the size_t variable to read.
3300 */
3301DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3302{
3303#if ARCH_BITS == 64 || ARCH_BITS == 16
3304 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
3305#elif ARCH_BITS == 32
3306 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
3307#elif ARCH_BITS == 16
3308 AssertCompileSize(size_t, 2);
3309 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
3310#else
3311# error "Unsupported ARCH_BITS value"
3312#endif
3313}
3314
3315
3316/**
3317 * Atomically reads a pointer value, ordered.
3318 *
3319 * @returns Current *pv value
3320 * @param ppv Pointer to the pointer variable to read.
3321 *
3322 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
3323 * requires less typing (no casts).
3324 */
3325DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3326{
3327#if ARCH_BITS == 32 || ARCH_BITS == 16
3328 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3329#elif ARCH_BITS == 64
3330 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3331#else
3332# error "ARCH_BITS is bogus"
3333#endif
3334}
3335
3336/**
3337 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
3338 *
3339 * @returns Current *pv value
3340 * @param ppv Pointer to the pointer variable to read.
3341 * @param Type The type of *ppv, sans volatile.
3342 */
3343#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3344# define ASMAtomicReadPtrT(ppv, Type) \
3345 __extension__ \
3346 ({\
3347 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
3348 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
3349 pvTypeChecked; \
3350 })
3351#else
3352# define ASMAtomicReadPtrT(ppv, Type) \
3353 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3354#endif
3355
3356
3357/**
3358 * Atomically reads a pointer value, unordered.
3359 *
3360 * @returns Current *pv value
3361 * @param ppv Pointer to the pointer variable to read.
3362 *
3363 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
3364 * requires less typing (no casts).
3365 */
3366DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3367{
3368#if ARCH_BITS == 32 || ARCH_BITS == 16
3369 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3370#elif ARCH_BITS == 64
3371 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3372#else
3373# error "ARCH_BITS is bogus"
3374#endif
3375}
3376
3377
3378/**
3379 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
3380 *
3381 * @returns Current *pv value
3382 * @param ppv Pointer to the pointer variable to read.
3383 * @param Type The type of *ppv, sans volatile.
3384 */
3385#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3386# define ASMAtomicUoReadPtrT(ppv, Type) \
3387 __extension__ \
3388 ({\
3389 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3390 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
3391 pvTypeChecked; \
3392 })
3393#else
3394# define ASMAtomicUoReadPtrT(ppv, Type) \
3395 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3396#endif
3397
3398
3399/**
3400 * Atomically reads a boolean value, ordered.
3401 *
3402 * @returns Current *pf value
3403 * @param pf Pointer to the boolean variable to read.
3404 */
3405DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3406{
3407 ASMMemoryFence();
3408 return *pf; /* byte reads are atomic on x86 */
3409}
3410
3411
3412/**
3413 * Atomically reads a boolean value, unordered.
3414 *
3415 * @returns Current *pf value
3416 * @param pf Pointer to the boolean variable to read.
3417 */
3418DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3419{
3420 return *pf; /* byte reads are atomic on x86 */
3421}
3422
3423
3424/**
3425 * Atomically read a typical IPRT handle value, ordered.
3426 *
3427 * @param ph Pointer to the handle variable to read.
3428 * @param phRes Where to store the result.
3429 *
3430 * @remarks This doesn't currently work for all handles (like RTFILE).
3431 */
3432#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3433# define ASMAtomicReadHandle(ph, phRes) \
3434 do { \
3435 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3436 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3437 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
3438 } while (0)
3439#elif HC_ARCH_BITS == 64
3440# define ASMAtomicReadHandle(ph, phRes) \
3441 do { \
3442 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3443 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3444 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
3445 } while (0)
3446#else
3447# error HC_ARCH_BITS
3448#endif
3449
3450
3451/**
3452 * Atomically read a typical IPRT handle value, unordered.
3453 *
3454 * @param ph Pointer to the handle variable to read.
3455 * @param phRes Where to store the result.
3456 *
3457 * @remarks This doesn't currently work for all handles (like RTFILE).
3458 */
3459#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3460# define ASMAtomicUoReadHandle(ph, phRes) \
3461 do { \
3462 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3463 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3464 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
3465 } while (0)
3466#elif HC_ARCH_BITS == 64
3467# define ASMAtomicUoReadHandle(ph, phRes) \
3468 do { \
3469 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3470 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3471 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
3472 } while (0)
3473#else
3474# error HC_ARCH_BITS
3475#endif
3476
3477
3478/**
3479 * Atomically read a value which size might differ
3480 * between platforms or compilers, ordered.
3481 *
3482 * @param pu Pointer to the variable to read.
3483 * @param puRes Where to store the result.
3484 */
3485#define ASMAtomicReadSize(pu, puRes) \
3486 do { \
3487 switch (sizeof(*(pu))) { \
3488 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3489 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3490 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3491 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3492 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3493 } \
3494 } while (0)
3495
3496
3497/**
3498 * Atomically read a value which size might differ
3499 * between platforms or compilers, unordered.
3500 *
3501 * @param pu Pointer to the variable to read.
3502 * @param puRes Where to store the result.
3503 */
3504#define ASMAtomicUoReadSize(pu, puRes) \
3505 do { \
3506 switch (sizeof(*(pu))) { \
3507 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3508 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3509 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3510 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3511 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3512 } \
3513 } while (0)
3514
3515
3516/**
3517 * Atomically writes an unsigned 8-bit value, ordered.
3518 *
3519 * @param pu8 Pointer to the 8-bit variable.
3520 * @param u8 The 8-bit value to assign to *pu8.
3521 */
3522DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3523{
3524#if defined(RT_ARCH_ARM64)
3525 /* The DMB SY will ensure ordering a la x86, the stlrb is probably overkill
3526 as all byte accesses are single-copy atomic, which I think suffices here. */
3527 __asm__ __volatile__("Lstart_ASMAtomicWriteU8_%=:\n\t"
3528# if defined(RTASM_ARM64_USE_FEAT_LSE) && 0 /* this is a lot slower and has no alignment benefits with LSE2 */
3529 RTASM_ARM_DMB_SY
3530 "swpb %w[uValue], wzr, %[pMem]\n\t"
3531# else
3532 RTASM_ARM_DMB_SY
3533 "stlrb %w[uValue], %[pMem]\n\t" /* single-copy atomic w/ release semantics. */
3534# endif
3535 : [pMem] "+Q" (*pu8)
3536 : [uValue] "r" ((uint32_t)u8)
3537 : );
3538#else
3539 ASMAtomicXchgU8(pu8, u8);
3540#endif
3541}
3542
3543
3544/**
3545 * Atomically writes an unsigned 8-bit value, unordered.
3546 *
3547 * @param pu8 Pointer to the 8-bit variable.
3548 * @param u8 The 8-bit value to assign to *pu8.
3549 */
3550DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3551{
3552 *pu8 = u8; /* byte writes are atomic on x86 */
3553}
3554
3555
3556/**
3557 * Atomically writes a signed 8-bit value, ordered.
3558 *
3559 * @param pi8 Pointer to the 8-bit variable to read.
3560 * @param i8 The 8-bit value to assign to *pi8.
3561 */
3562DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3563{
3564#if defined(RT_ARCH_ARM64)
3565 ASMAtomicWriteU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
3566#else
3567 ASMAtomicXchgS8(pi8, i8);
3568#endif
3569}
3570
3571
3572/**
3573 * Atomically writes a signed 8-bit value, unordered.
3574 *
3575 * @param pi8 Pointer to the 8-bit variable to write.
3576 * @param i8 The 8-bit value to assign to *pi8.
3577 */
3578DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3579{
3580 *pi8 = i8; /* byte writes are atomic on x86 */
3581}
3582
3583
3584/**
3585 * Atomically writes an unsigned 16-bit value, ordered.
3586 *
3587 * @param pu16 Pointer to the 16-bit variable to write.
3588 * @param u16 The 16-bit value to assign to *pu16.
3589 */
3590DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3591{
3592#if defined(RT_ARCH_ARM64)
3593 __asm__ __volatile__("Lstart_ASMAtomicWriteU16_%=:\n\t"
3594# if defined(RTASM_ARM64_USE_FEAT_LSE) /* slower on M1, but benefits from relaxed LSE2 alignment requirements (M2?). */
3595 RTASM_ARM_DMB_SY
3596 "swph %w[uValue], wzr, %[pMem]\n\t"
3597# else
3598 RTASM_ARM_DMB_SY
3599 "stlrh %w[uValue], %[pMem]\n\t" /* single-copy atomic w/ release semantics. */
3600# endif
3601 : [pMem] "+Q" (*pu16)
3602 : [uValue] "r" ((uint32_t)u16)
3603 : );
3604#else
3605 ASMAtomicXchgU16(pu16, u16);
3606#endif
3607}
3608
3609
3610/**
3611 * Atomically writes an unsigned 16-bit value, unordered.
3612 *
3613 * @param pu16 Pointer to the 16-bit variable to write.
3614 * @param u16 The 16-bit value to assign to *pu16.
3615 */
3616DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3617{
3618 Assert(!((uintptr_t)pu16 & 1));
3619 *pu16 = u16;
3620}
3621
3622
3623/**
3624 * Atomically writes a signed 16-bit value, ordered.
3625 *
3626 * @param pi16 Pointer to the 16-bit variable to write.
3627 * @param i16 The 16-bit value to assign to *pi16.
3628 */
3629DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3630{
3631#if defined(RT_ARCH_ARM64)
3632 ASMAtomicWriteU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
3633#else
3634 ASMAtomicXchgS16(pi16, i16);
3635#endif
3636}
3637
3638
3639/**
3640 * Atomically writes a signed 16-bit value, unordered.
3641 *
3642 * @param pi16 Pointer to the 16-bit variable to write.
3643 * @param i16 The 16-bit value to assign to *pi16.
3644 */
3645DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3646{
3647 Assert(!((uintptr_t)pi16 & 1));
3648 *pi16 = i16;
3649}
3650
3651
3652/**
3653 * Atomically writes an unsigned 32-bit value, ordered.
3654 *
3655 * @param pu32 Pointer to the 32-bit variable to write.
3656 * @param u32 The 32-bit value to assign to *pu32.
3657 */
3658DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3659{
3660#if defined(RT_ARCH_ARM64)
3661 __asm__ __volatile__("Lstart_ASMAtomicWriteU32_%=:\n\t"
3662# if defined(RTASM_ARM64_USE_FEAT_LSE) /* slower on M1, but benefits from relaxed LSE2 alignment requirements (M2?). */
3663 RTASM_ARM_DMB_SY
3664 "swp %w[uValue], wzr, %[pMem]\n\t"
3665# else
3666 RTASM_ARM_DMB_SY
3667 "stlr %w[uValue], %[pMem]\n\t" /* single-copy atomic w/ release semantics. */
3668# endif
3669 : [pMem] "+Q" (*pu32)
3670 : [uValue] "r" (u32)
3671 : "cc");
3672#else
3673 ASMAtomicXchgU32(pu32, u32);
3674#endif
3675}
3676
3677
3678/**
3679 * Atomically writes an unsigned 32-bit value, unordered.
3680 *
3681 * @param pu32 Pointer to the 32-bit variable to write.
3682 * @param u32 The 32-bit value to assign to *pu32.
3683 */
3684DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3685{
3686 Assert(!((uintptr_t)pu32 & 3));
3687#if ARCH_BITS >= 32
3688 *pu32 = u32;
3689#else
3690 ASMAtomicXchgU32(pu32, u32);
3691#endif
3692}
3693
3694
3695/**
3696 * Atomically writes a signed 32-bit value, ordered.
3697 *
3698 * @param pi32 Pointer to the 32-bit variable to write.
3699 * @param i32 The 32-bit value to assign to *pi32.
3700 */
3701DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3702{
3703#if defined(RT_ARCH_ARM64)
3704 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
3705#else
3706 ASMAtomicXchgS32(pi32, i32);
3707#endif
3708}
3709
3710
3711/**
3712 * Atomically writes a signed 32-bit value, unordered.
3713 *
3714 * @param pi32 Pointer to the 32-bit variable to write.
3715 * @param i32 The 32-bit value to assign to *pi32.
3716 */
3717DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3718{
3719 Assert(!((uintptr_t)pi32 & 3));
3720#if ARCH_BITS >= 32
3721 *pi32 = i32;
3722#else
3723 ASMAtomicXchgS32(pi32, i32);
3724#endif
3725}
3726
3727
3728/**
3729 * Atomically writes an unsigned 64-bit value, ordered.
3730 *
3731 * @param pu64 Pointer to the 64-bit variable to write.
3732 * @param u64 The 64-bit value to assign to *pu64.
3733 */
3734DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3735{
3736#if defined(RT_ARCH_ARM64)
3737 __asm__ __volatile__("Lstart_ASMAtomicWriteU64_%=:\n\t"
3738# if defined(RTASM_ARM64_USE_FEAT_LSE) /* slower on M1, but benefits from relaxed LSE2 alignment requirements (M2?). */
3739 RTASM_ARM_DMB_SY
3740 "swp %[uValue], xzr, %[pMem]\n\t"
3741# else
3742 RTASM_ARM_DMB_SY /** @todo necessary? */
3743 "stlr %[uValue], %[pMem]\n\t"
3744# endif
3745 : [pMem] "+Q" (*pu64)
3746 : [uValue] "r" (u64)
3747 : );
3748#else
3749 ASMAtomicXchgU64(pu64, u64);
3750#endif
3751}
3752
3753
3754/**
3755 * Atomically writes an unsigned 64-bit value, unordered.
3756 *
3757 * @param pu64 Pointer to the 64-bit variable to write.
3758 * @param u64 The 64-bit value to assign to *pu64.
3759 */
3760DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3761{
3762 Assert(!((uintptr_t)pu64 & 7));
3763#if ARCH_BITS == 64
3764 *pu64 = u64;
3765#else
3766 ASMAtomicXchgU64(pu64, u64);
3767#endif
3768}
3769
3770
3771/**
3772 * Atomically writes a signed 64-bit value, ordered.
3773 *
3774 * @param pi64 Pointer to the 64-bit variable to write.
3775 * @param i64 The 64-bit value to assign to *pi64.
3776 */
3777DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3778{
3779#if defined(RT_ARCH_ARM64)
3780 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
3781#else
3782 ASMAtomicXchgS64(pi64, i64);
3783#endif
3784}
3785
3786
3787/**
3788 * Atomically writes a signed 64-bit value, unordered.
3789 *
3790 * @param pi64 Pointer to the 64-bit variable to write.
3791 * @param i64 The 64-bit value to assign to *pi64.
3792 */
3793DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3794{
3795 Assert(!((uintptr_t)pi64 & 7));
3796#if ARCH_BITS == 64
3797 *pi64 = i64;
3798#else
3799 ASMAtomicXchgS64(pi64, i64);
3800#endif
3801}
3802
3803
3804/**
3805 * Atomically writes a size_t value, ordered.
3806 *
3807 * @param pcb Pointer to the size_t variable to write.
3808 * @param cb The value to assign to *pcb.
3809 */
3810DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3811{
3812#if ARCH_BITS == 64
3813 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3814#elif ARCH_BITS == 32
3815 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3816#elif ARCH_BITS == 16
3817 AssertCompileSize(size_t, 2);
3818 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3819#else
3820# error "Unsupported ARCH_BITS value"
3821#endif
3822}
3823
3824
3825/**
3826 * Atomically writes a size_t value, unordered.
3827 *
3828 * @param pcb Pointer to the size_t variable to write.
3829 * @param cb The value to assign to *pcb.
3830 */
3831DECLINLINE(void) ASMAtomicUoWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3832{
3833#if ARCH_BITS == 64
3834 ASMAtomicUoWriteU64((uint64_t volatile *)pcb, cb);
3835#elif ARCH_BITS == 32
3836 ASMAtomicUoWriteU32((uint32_t volatile *)pcb, cb);
3837#elif ARCH_BITS == 16
3838 AssertCompileSize(size_t, 2);
3839 ASMAtomicUoWriteU16((uint16_t volatile *)pcb, cb);
3840#else
3841# error "Unsupported ARCH_BITS value"
3842#endif
3843}
3844
3845
3846/**
3847 * Atomically writes a boolean value, unordered.
3848 *
3849 * @param pf Pointer to the boolean variable to write.
3850 * @param f The boolean value to assign to *pf.
3851 */
3852DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3853{
3854 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3855}
3856
3857
3858/**
3859 * Atomically writes a boolean value, unordered.
3860 *
3861 * @param pf Pointer to the boolean variable to write.
3862 * @param f The boolean value to assign to *pf.
3863 */
3864DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3865{
3866 *pf = f; /* byte writes are atomic on x86 */
3867}
3868
3869
3870/**
3871 * Atomically writes a pointer value, ordered.
3872 *
3873 * @param ppv Pointer to the pointer variable to write.
3874 * @param pv The pointer value to assign to *ppv.
3875 */
3876DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3877{
3878#if ARCH_BITS == 32 || ARCH_BITS == 16
3879 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3880#elif ARCH_BITS == 64
3881 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3882#else
3883# error "ARCH_BITS is bogus"
3884#endif
3885}
3886
3887
3888/**
3889 * Atomically writes a pointer value, unordered.
3890 *
3891 * @param ppv Pointer to the pointer variable to write.
3892 * @param pv The pointer value to assign to *ppv.
3893 */
3894DECLINLINE(void) ASMAtomicUoWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3895{
3896#if ARCH_BITS == 32 || ARCH_BITS == 16
3897 ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3898#elif ARCH_BITS == 64
3899 ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3900#else
3901# error "ARCH_BITS is bogus"
3902#endif
3903}
3904
3905
3906/**
3907 * Atomically writes a pointer value, ordered.
3908 *
3909 * @param ppv Pointer to the pointer variable to write.
3910 * @param pv The pointer value to assign to *ppv. If NULL use
3911 * ASMAtomicWriteNullPtr or you'll land in trouble.
3912 *
3913 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3914 * NULL.
3915 */
3916#ifdef __GNUC__
3917# define ASMAtomicWritePtr(ppv, pv) \
3918 do \
3919 { \
3920 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3921 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3922 \
3923 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3924 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3925 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3926 \
3927 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3928 } while (0)
3929#else
3930# define ASMAtomicWritePtr(ppv, pv) \
3931 do \
3932 { \
3933 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3934 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3935 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3936 \
3937 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3938 } while (0)
3939#endif
3940
3941
3942/**
3943 * Atomically sets a pointer to NULL, ordered.
3944 *
3945 * @param ppv Pointer to the pointer variable that should be set to NULL.
3946 *
3947 * @remarks This is relatively type safe on GCC platforms.
3948 */
3949#if RT_GNUC_PREREQ(4, 2)
3950# define ASMAtomicWriteNullPtr(ppv) \
3951 do \
3952 { \
3953 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3954 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3955 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3956 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3957 } while (0)
3958#else
3959# define ASMAtomicWriteNullPtr(ppv) \
3960 do \
3961 { \
3962 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3963 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3964 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3965 } while (0)
3966#endif
3967
3968
3969/**
3970 * Atomically writes a pointer value, unordered.
3971 *
3972 * @returns Current *pv value
3973 * @param ppv Pointer to the pointer variable.
3974 * @param pv The pointer value to assign to *ppv. If NULL use
3975 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3976 *
3977 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3978 * NULL.
3979 */
3980#if RT_GNUC_PREREQ(4, 2)
3981# define ASMAtomicUoWritePtr(ppv, pv) \
3982 do \
3983 { \
3984 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3985 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3986 \
3987 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3988 AssertCompile(sizeof(pv) == sizeof(void *)); \
3989 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3990 \
3991 *(ppvTypeChecked) = pvTypeChecked; \
3992 } while (0)
3993#else
3994# define ASMAtomicUoWritePtr(ppv, pv) \
3995 do \
3996 { \
3997 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3998 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3999 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
4000 *(ppv) = pv; \
4001 } while (0)
4002#endif
4003
4004
4005/**
4006 * Atomically sets a pointer to NULL, unordered.
4007 *
4008 * @param ppv Pointer to the pointer variable that should be set to NULL.
4009 *
4010 * @remarks This is relatively type safe on GCC platforms.
4011 */
4012#ifdef __GNUC__
4013# define ASMAtomicUoWriteNullPtr(ppv) \
4014 do \
4015 { \
4016 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
4017 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
4018 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
4019 *(ppvTypeChecked) = NULL; \
4020 } while (0)
4021#else
4022# define ASMAtomicUoWriteNullPtr(ppv) \
4023 do \
4024 { \
4025 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
4026 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
4027 *(ppv) = NULL; \
4028 } while (0)
4029#endif
4030
4031
4032/**
4033 * Atomically write a typical IPRT handle value, ordered.
4034 *
4035 * @param ph Pointer to the variable to update.
4036 * @param hNew The value to assign to *ph.
4037 *
4038 * @remarks This doesn't currently work for all handles (like RTFILE).
4039 */
4040#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
4041# define ASMAtomicWriteHandle(ph, hNew) \
4042 do { \
4043 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4044 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
4045 } while (0)
4046#elif HC_ARCH_BITS == 64
4047# define ASMAtomicWriteHandle(ph, hNew) \
4048 do { \
4049 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4050 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
4051 } while (0)
4052#else
4053# error HC_ARCH_BITS
4054#endif
4055
4056
4057/**
4058 * Atomically write a typical IPRT handle value, unordered.
4059 *
4060 * @param ph Pointer to the variable to update.
4061 * @param hNew The value to assign to *ph.
4062 *
4063 * @remarks This doesn't currently work for all handles (like RTFILE).
4064 */
4065#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
4066# define ASMAtomicUoWriteHandle(ph, hNew) \
4067 do { \
4068 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4069 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
4070 } while (0)
4071#elif HC_ARCH_BITS == 64
4072# define ASMAtomicUoWriteHandle(ph, hNew) \
4073 do { \
4074 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4075 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
4076 } while (0)
4077#else
4078# error HC_ARCH_BITS
4079#endif
4080
4081
4082/**
4083 * Atomically write a value which size might differ
4084 * between platforms or compilers, ordered.
4085 *
4086 * @param pu Pointer to the variable to update.
4087 * @param uNew The value to assign to *pu.
4088 */
4089#define ASMAtomicWriteSize(pu, uNew) \
4090 do { \
4091 switch (sizeof(*(pu))) { \
4092 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
4093 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
4094 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4095 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4096 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4097 } \
4098 } while (0)
4099
4100/**
4101 * Atomically write a value which size might differ
4102 * between platforms or compilers, unordered.
4103 *
4104 * @param pu Pointer to the variable to update.
4105 * @param uNew The value to assign to *pu.
4106 */
4107#define ASMAtomicUoWriteSize(pu, uNew) \
4108 do { \
4109 switch (sizeof(*(pu))) { \
4110 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
4111 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
4112 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4113 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4114 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4115 } \
4116 } while (0)
4117
4118
4119
4120/**
4121 * Atomically exchanges and adds to a 16-bit value, ordered.
4122 *
4123 * @returns The old value.
4124 * @param pu16 Pointer to the value.
4125 * @param u16 Number to add.
4126 *
4127 * @remarks Currently not implemented, just to make 16-bit code happy.
4128 * @remarks x86: Requires a 486 or later.
4129 */
4130RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
4131
4132
4133/**
4134 * Atomically exchanges and adds to a 32-bit value, ordered.
4135 *
4136 * @returns The old value.
4137 * @param pu32 Pointer to the value.
4138 * @param u32 Number to add.
4139 *
4140 * @remarks x86: Requires a 486 or later.
4141 */
4142#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4143RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4144#else
4145DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4146{
4147# if RT_INLINE_ASM_USES_INTRIN
4148 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
4149 return u32;
4150
4151# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4152# if RT_INLINE_ASM_GNU_STYLE
4153 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4154 : "=r" (u32)
4155 , "=m" (*pu32)
4156 : "0" (u32)
4157 , "m" (*pu32)
4158 : "memory"
4159 , "cc");
4160 return u32;
4161# else
4162 __asm
4163 {
4164 mov eax, [u32]
4165# ifdef RT_ARCH_AMD64
4166 mov rdx, [pu32]
4167 lock xadd [rdx], eax
4168# else
4169 mov edx, [pu32]
4170 lock xadd [edx], eax
4171# endif
4172 mov [u32], eax
4173 }
4174 return u32;
4175# endif
4176
4177# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4178 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
4179 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
4180 "add %[uNew], %[uOld], %[uVal]\n\t",
4181 [uVal] "r" (u32));
4182 return u32OldRet;
4183
4184# else
4185# error "Port me"
4186# endif
4187}
4188#endif
4189
4190
4191/**
4192 * Atomically exchanges and adds to a signed 32-bit value, ordered.
4193 *
4194 * @returns The old value.
4195 * @param pi32 Pointer to the value.
4196 * @param i32 Number to add.
4197 *
4198 * @remarks x86: Requires a 486 or later.
4199 */
4200DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4201{
4202 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4203}
4204
4205
4206/**
4207 * Atomically exchanges and adds to a 64-bit value, ordered.
4208 *
4209 * @returns The old value.
4210 * @param pu64 Pointer to the value.
4211 * @param u64 Number to add.
4212 *
4213 * @remarks x86: Requires a Pentium or later.
4214 */
4215#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4216DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4217#else
4218DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4219{
4220# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4221 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
4222 return u64;
4223
4224# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4225 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4226 : "=r" (u64)
4227 , "=m" (*pu64)
4228 : "0" (u64)
4229 , "m" (*pu64)
4230 : "memory"
4231 , "cc");
4232 return u64;
4233
4234# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4235 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
4236 "add %[uNew], %[uOld], %[uVal]\n\t"
4237 ,
4238 "add %[uNew], %[uOld], %[uVal]\n\t"
4239 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
4240 [uVal] "r" (u64));
4241 return u64OldRet;
4242
4243# else
4244 uint64_t u64Old;
4245 for (;;)
4246 {
4247 uint64_t u64New;
4248 u64Old = ASMAtomicUoReadU64(pu64);
4249 u64New = u64Old + u64;
4250 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4251 break;
4252 ASMNopPause();
4253 }
4254 return u64Old;
4255# endif
4256}
4257#endif
4258
4259
4260/**
4261 * Atomically exchanges and adds to a signed 64-bit value, ordered.
4262 *
4263 * @returns The old value.
4264 * @param pi64 Pointer to the value.
4265 * @param i64 Number to add.
4266 *
4267 * @remarks x86: Requires a Pentium or later.
4268 */
4269DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4270{
4271 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4272}
4273
4274
4275/**
4276 * Atomically exchanges and adds to a size_t value, ordered.
4277 *
4278 * @returns The old value.
4279 * @param pcb Pointer to the size_t value.
4280 * @param cb Number to add.
4281 */
4282DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4283{
4284#if ARCH_BITS == 64
4285 AssertCompileSize(size_t, 8);
4286 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
4287#elif ARCH_BITS == 32
4288 AssertCompileSize(size_t, 4);
4289 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
4290#elif ARCH_BITS == 16
4291 AssertCompileSize(size_t, 2);
4292 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
4293#else
4294# error "Unsupported ARCH_BITS value"
4295#endif
4296}
4297
4298
4299/**
4300 * Atomically exchanges and adds a value which size might differ between
4301 * platforms or compilers, ordered.
4302 *
4303 * @param pu Pointer to the variable to update.
4304 * @param uNew The value to add to *pu.
4305 * @param puOld Where to store the old value.
4306 */
4307#define ASMAtomicAddSize(pu, uNew, puOld) \
4308 do { \
4309 switch (sizeof(*(pu))) { \
4310 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4311 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4312 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
4313 } \
4314 } while (0)
4315
4316
4317
4318/**
4319 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
4320 *
4321 * @returns The old value.
4322 * @param pu16 Pointer to the value.
4323 * @param u16 Number to subtract.
4324 *
4325 * @remarks x86: Requires a 486 or later.
4326 */
4327DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
4328{
4329 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
4330}
4331
4332
4333/**
4334 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
4335 *
4336 * @returns The old value.
4337 * @param pi16 Pointer to the value.
4338 * @param i16 Number to subtract.
4339 *
4340 * @remarks x86: Requires a 486 or later.
4341 */
4342DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
4343{
4344 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
4345}
4346
4347
4348/**
4349 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
4350 *
4351 * @returns The old value.
4352 * @param pu32 Pointer to the value.
4353 * @param u32 Number to subtract.
4354 *
4355 * @remarks x86: Requires a 486 or later.
4356 */
4357DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4358{
4359 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
4360}
4361
4362
4363/**
4364 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
4365 *
4366 * @returns The old value.
4367 * @param pi32 Pointer to the value.
4368 * @param i32 Number to subtract.
4369 *
4370 * @remarks x86: Requires a 486 or later.
4371 */
4372DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4373{
4374 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
4375}
4376
4377
4378/**
4379 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
4380 *
4381 * @returns The old value.
4382 * @param pu64 Pointer to the value.
4383 * @param u64 Number to subtract.
4384 *
4385 * @remarks x86: Requires a Pentium or later.
4386 */
4387DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4388{
4389 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
4390}
4391
4392
4393/**
4394 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
4395 *
4396 * @returns The old value.
4397 * @param pi64 Pointer to the value.
4398 * @param i64 Number to subtract.
4399 *
4400 * @remarks x86: Requires a Pentium or later.
4401 */
4402DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4403{
4404 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
4405}
4406
4407
4408/**
4409 * Atomically exchanges and subtracts to a size_t value, ordered.
4410 *
4411 * @returns The old value.
4412 * @param pcb Pointer to the size_t value.
4413 * @param cb Number to subtract.
4414 *
4415 * @remarks x86: Requires a 486 or later.
4416 */
4417DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4418{
4419#if ARCH_BITS == 64
4420 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
4421#elif ARCH_BITS == 32
4422 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
4423#elif ARCH_BITS == 16
4424 AssertCompileSize(size_t, 2);
4425 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
4426#else
4427# error "Unsupported ARCH_BITS value"
4428#endif
4429}
4430
4431
4432/**
4433 * Atomically exchanges and subtracts a value which size might differ between
4434 * platforms or compilers, ordered.
4435 *
4436 * @param pu Pointer to the variable to update.
4437 * @param uNew The value to subtract to *pu.
4438 * @param puOld Where to store the old value.
4439 *
4440 * @remarks x86: Requires a 486 or later.
4441 */
4442#define ASMAtomicSubSize(pu, uNew, puOld) \
4443 do { \
4444 switch (sizeof(*(pu))) { \
4445 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4446 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4447 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
4448 } \
4449 } while (0)
4450
4451
4452
4453/**
4454 * Atomically increment a 16-bit value, ordered.
4455 *
4456 * @returns The new value.
4457 * @param pu16 Pointer to the value to increment.
4458 * @remarks Not implemented. Just to make 16-bit code happy.
4459 *
4460 * @remarks x86: Requires a 486 or later.
4461 */
4462RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4463
4464
4465/**
4466 * Atomically increment a 32-bit value, ordered.
4467 *
4468 * @returns The new value.
4469 * @param pu32 Pointer to the value to increment.
4470 *
4471 * @remarks x86: Requires a 486 or later.
4472 */
4473#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4474RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4475#else
4476DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4477{
4478# if RT_INLINE_ASM_USES_INTRIN
4479 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
4480
4481# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4482# if RT_INLINE_ASM_GNU_STYLE
4483 uint32_t u32;
4484 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4485 : "=r" (u32)
4486 , "=m" (*pu32)
4487 : "0" (1)
4488 , "m" (*pu32)
4489 : "memory"
4490 , "cc");
4491 return u32+1;
4492# else
4493 __asm
4494 {
4495 mov eax, 1
4496# ifdef RT_ARCH_AMD64
4497 mov rdx, [pu32]
4498 lock xadd [rdx], eax
4499# else
4500 mov edx, [pu32]
4501 lock xadd [edx], eax
4502# endif
4503 mov u32, eax
4504 }
4505 return u32+1;
4506# endif
4507
4508# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4509 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
4510 "add %w[uNew], %w[uNew], #1\n\t",
4511 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4512 "X" (0) /* dummy */);
4513 return u32NewRet;
4514
4515# else
4516 return ASMAtomicAddU32(pu32, 1) + 1;
4517# endif
4518}
4519#endif
4520
4521
4522/**
4523 * Atomically increment a signed 32-bit value, ordered.
4524 *
4525 * @returns The new value.
4526 * @param pi32 Pointer to the value to increment.
4527 *
4528 * @remarks x86: Requires a 486 or later.
4529 */
4530DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4531{
4532 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
4533}
4534
4535
4536/**
4537 * Atomically increment a 64-bit value, ordered.
4538 *
4539 * @returns The new value.
4540 * @param pu64 Pointer to the value to increment.
4541 *
4542 * @remarks x86: Requires a Pentium or later.
4543 */
4544#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4545DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4546#else
4547DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4548{
4549# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4550 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
4551
4552# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4553 uint64_t u64;
4554 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4555 : "=r" (u64)
4556 , "=m" (*pu64)
4557 : "0" (1)
4558 , "m" (*pu64)
4559 : "memory"
4560 , "cc");
4561 return u64 + 1;
4562
4563# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4564 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
4565 "add %[uNew], %[uNew], #1\n\t"
4566 ,
4567 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4568 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4569 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4570 return u64NewRet;
4571
4572# else
4573 return ASMAtomicAddU64(pu64, 1) + 1;
4574# endif
4575}
4576#endif
4577
4578
4579/**
4580 * Atomically increment a signed 64-bit value, ordered.
4581 *
4582 * @returns The new value.
4583 * @param pi64 Pointer to the value to increment.
4584 *
4585 * @remarks x86: Requires a Pentium or later.
4586 */
4587DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4588{
4589 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
4590}
4591
4592
4593/**
4594 * Atomically increment a size_t value, ordered.
4595 *
4596 * @returns The new value.
4597 * @param pcb Pointer to the value to increment.
4598 *
4599 * @remarks x86: Requires a 486 or later.
4600 */
4601DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4602{
4603#if ARCH_BITS == 64
4604 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
4605#elif ARCH_BITS == 32
4606 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
4607#elif ARCH_BITS == 16
4608 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
4609#else
4610# error "Unsupported ARCH_BITS value"
4611#endif
4612}
4613
4614
4615
4616/**
4617 * Atomically decrement an unsigned 32-bit value, ordered.
4618 *
4619 * @returns The new value.
4620 * @param pu16 Pointer to the value to decrement.
4621 * @remarks Not implemented. Just to make 16-bit code happy.
4622 *
4623 * @remarks x86: Requires a 486 or later.
4624 */
4625RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4626
4627
4628/**
4629 * Atomically decrement an unsigned 32-bit value, ordered.
4630 *
4631 * @returns The new value.
4632 * @param pu32 Pointer to the value to decrement.
4633 *
4634 * @remarks x86: Requires a 486 or later.
4635 */
4636#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4637RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4638#else
4639DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4640{
4641# if RT_INLINE_ASM_USES_INTRIN
4642 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4643
4644# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4645# if RT_INLINE_ASM_GNU_STYLE
4646 uint32_t u32;
4647 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4648 : "=r" (u32)
4649 , "=m" (*pu32)
4650 : "0" (-1)
4651 , "m" (*pu32)
4652 : "memory"
4653 , "cc");
4654 return u32-1;
4655# else
4656 uint32_t u32;
4657 __asm
4658 {
4659 mov eax, -1
4660# ifdef RT_ARCH_AMD64
4661 mov rdx, [pu32]
4662 lock xadd [rdx], eax
4663# else
4664 mov edx, [pu32]
4665 lock xadd [edx], eax
4666# endif
4667 mov u32, eax
4668 }
4669 return u32-1;
4670# endif
4671
4672# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4673 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4674 "sub %w[uNew], %w[uNew], #1\n\t",
4675 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4676 "X" (0) /* dummy */);
4677 return u32NewRet;
4678
4679# else
4680 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4681# endif
4682}
4683#endif
4684
4685
4686/**
4687 * Atomically decrement a signed 32-bit value, ordered.
4688 *
4689 * @returns The new value.
4690 * @param pi32 Pointer to the value to decrement.
4691 *
4692 * @remarks x86: Requires a 486 or later.
4693 */
4694DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4695{
4696 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4697}
4698
4699
4700/**
4701 * Atomically decrement an unsigned 64-bit value, ordered.
4702 *
4703 * @returns The new value.
4704 * @param pu64 Pointer to the value to decrement.
4705 *
4706 * @remarks x86: Requires a Pentium or later.
4707 */
4708#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4709RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4710#else
4711DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4712{
4713# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4714 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4715
4716# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4717 uint64_t u64;
4718 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4719 : "=r" (u64)
4720 , "=m" (*pu64)
4721 : "0" (~(uint64_t)0)
4722 , "m" (*pu64)
4723 : "memory"
4724 , "cc");
4725 return u64-1;
4726
4727# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4728 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4729 "sub %[uNew], %[uNew], #1\n\t"
4730 ,
4731 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4732 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4733 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4734 return u64NewRet;
4735
4736# else
4737 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4738# endif
4739}
4740#endif
4741
4742
4743/**
4744 * Atomically decrement a signed 64-bit value, ordered.
4745 *
4746 * @returns The new value.
4747 * @param pi64 Pointer to the value to decrement.
4748 *
4749 * @remarks x86: Requires a Pentium or later.
4750 */
4751DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4752{
4753 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4754}
4755
4756
4757/**
4758 * Atomically decrement a size_t value, ordered.
4759 *
4760 * @returns The new value.
4761 * @param pcb Pointer to the value to decrement.
4762 *
4763 * @remarks x86: Requires a 486 or later.
4764 */
4765DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4766{
4767#if ARCH_BITS == 64
4768 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4769#elif ARCH_BITS == 32
4770 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4771#elif ARCH_BITS == 16
4772 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4773#else
4774# error "Unsupported ARCH_BITS value"
4775#endif
4776}
4777
4778
4779/**
4780 * Atomically Or an unsigned 32-bit value, ordered.
4781 *
4782 * @param pu32 Pointer to the pointer variable to OR u32 with.
4783 * @param u32 The value to OR *pu32 with.
4784 *
4785 * @remarks x86: Requires a 386 or later.
4786 */
4787#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4788RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4789#else
4790DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4791{
4792# if RT_INLINE_ASM_USES_INTRIN
4793 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4794
4795# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4796# if RT_INLINE_ASM_GNU_STYLE
4797 __asm__ __volatile__("lock; orl %1, %0\n\t"
4798 : "=m" (*pu32)
4799 : "ir" (u32)
4800 , "m" (*pu32)
4801 : "cc");
4802# else
4803 __asm
4804 {
4805 mov eax, [u32]
4806# ifdef RT_ARCH_AMD64
4807 mov rdx, [pu32]
4808 lock or [rdx], eax
4809# else
4810 mov edx, [pu32]
4811 lock or [edx], eax
4812# endif
4813 }
4814# endif
4815
4816# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4817 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4818 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4819 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4820 "orr %[uNew], %[uNew], %[uVal]\n\t",
4821 [uVal] "r" (u32));
4822
4823# else
4824# error "Port me"
4825# endif
4826}
4827#endif
4828
4829
4830/**
4831 * Atomically OR an unsigned 32-bit value, ordered, extended version (for bitmap
4832 * fallback).
4833 *
4834 * @returns Old value.
4835 * @param pu32 Pointer to the variable to OR @a u32 with.
4836 * @param u32 The value to OR @a *pu32 with.
4837 */
4838DECLINLINE(uint32_t) ASMAtomicOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4839{
4840#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4841 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicOrEx32, pu32, DMB_SY,
4842 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4843 "orr %[uNew], %[uOld], %[uVal]\n\t",
4844 [uVal] "r" (u32));
4845 return u32OldRet;
4846
4847#else
4848 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4849 uint32_t u32New;
4850 do
4851 u32New = u32RetOld | u32;
4852 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4853 return u32RetOld;
4854#endif
4855}
4856
4857
4858/**
4859 * Atomically Or a signed 32-bit value, ordered.
4860 *
4861 * @param pi32 Pointer to the pointer variable to OR u32 with.
4862 * @param i32 The value to OR *pu32 with.
4863 *
4864 * @remarks x86: Requires a 386 or later.
4865 */
4866DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4867{
4868 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4869}
4870
4871
4872/**
4873 * Atomically Or an unsigned 64-bit value, ordered.
4874 *
4875 * @param pu64 Pointer to the pointer variable to OR u64 with.
4876 * @param u64 The value to OR *pu64 with.
4877 *
4878 * @remarks x86: Requires a Pentium or later.
4879 */
4880#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4881DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4882#else
4883DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4884{
4885# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4886 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4887
4888# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4889 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4890 : "=m" (*pu64)
4891 : "r" (u64)
4892 , "m" (*pu64)
4893 : "cc");
4894
4895# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4896 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4897 "orr %[uNew], %[uNew], %[uVal]\n\t"
4898 ,
4899 "orr %[uNew], %[uNew], %[uVal]\n\t"
4900 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4901 [uVal] "r" (u64));
4902
4903# else
4904 for (;;)
4905 {
4906 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4907 uint64_t u64New = u64Old | u64;
4908 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4909 break;
4910 ASMNopPause();
4911 }
4912# endif
4913}
4914#endif
4915
4916
4917/**
4918 * Atomically Or a signed 64-bit value, ordered.
4919 *
4920 * @param pi64 Pointer to the pointer variable to OR u64 with.
4921 * @param i64 The value to OR *pu64 with.
4922 *
4923 * @remarks x86: Requires a Pentium or later.
4924 */
4925DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4926{
4927 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4928}
4929
4930
4931/**
4932 * Atomically And an unsigned 32-bit value, ordered.
4933 *
4934 * @param pu32 Pointer to the pointer variable to AND u32 with.
4935 * @param u32 The value to AND *pu32 with.
4936 *
4937 * @remarks x86: Requires a 386 or later.
4938 */
4939#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4940RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4941#else
4942DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4943{
4944# if RT_INLINE_ASM_USES_INTRIN
4945 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4946
4947# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4948# if RT_INLINE_ASM_GNU_STYLE
4949 __asm__ __volatile__("lock; andl %1, %0\n\t"
4950 : "=m" (*pu32)
4951 : "ir" (u32)
4952 , "m" (*pu32)
4953 : "cc");
4954# else
4955 __asm
4956 {
4957 mov eax, [u32]
4958# ifdef RT_ARCH_AMD64
4959 mov rdx, [pu32]
4960 lock and [rdx], eax
4961# else
4962 mov edx, [pu32]
4963 lock and [edx], eax
4964# endif
4965 }
4966# endif
4967
4968# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4969 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4970 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4971 "and %[uNew], %[uNew], %[uVal]\n\t",
4972 [uVal] "r" (u32));
4973
4974# else
4975# error "Port me"
4976# endif
4977}
4978#endif
4979
4980
4981/**
4982 * Atomically AND an unsigned 32-bit value, ordered, extended version.
4983 *
4984 * @returns Old value.
4985 * @param pu32 Pointer to the variable to AND @a u32 with.
4986 * @param u32 The value to AND @a *pu32 with.
4987 */
4988DECLINLINE(uint32_t) ASMAtomicAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4989{
4990#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4991 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAndEx32, pu32, DMB_SY,
4992 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4993 "and %[uNew], %[uOld], %[uVal]\n\t",
4994 [uVal] "r" (u32));
4995 return u32OldRet;
4996
4997#else
4998 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4999 uint32_t u32New;
5000 do
5001 u32New = u32RetOld & u32;
5002 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
5003 return u32RetOld;
5004#endif
5005}
5006
5007
5008/**
5009 * Atomically And a signed 32-bit value, ordered.
5010 *
5011 * @param pi32 Pointer to the pointer variable to AND i32 with.
5012 * @param i32 The value to AND *pi32 with.
5013 *
5014 * @remarks x86: Requires a 386 or later.
5015 */
5016DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5017{
5018 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5019}
5020
5021
5022/**
5023 * Atomically And an unsigned 64-bit value, ordered.
5024 *
5025 * @param pu64 Pointer to the pointer variable to AND u64 with.
5026 * @param u64 The value to AND *pu64 with.
5027 *
5028 * @remarks x86: Requires a Pentium or later.
5029 */
5030#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5031DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5032#else
5033DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5034{
5035# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
5036 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
5037
5038# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5039 __asm__ __volatile__("lock; andq %1, %0\n\t"
5040 : "=m" (*pu64)
5041 : "r" (u64)
5042 , "m" (*pu64)
5043 : "cc");
5044
5045# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5046 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
5047 "and %[uNew], %[uNew], %[uVal]\n\t"
5048 ,
5049 "and %[uNew], %[uNew], %[uVal]\n\t"
5050 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
5051 [uVal] "r" (u64));
5052
5053# else
5054 for (;;)
5055 {
5056 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5057 uint64_t u64New = u64Old & u64;
5058 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5059 break;
5060 ASMNopPause();
5061 }
5062# endif
5063}
5064#endif
5065
5066
5067/**
5068 * Atomically And a signed 64-bit value, ordered.
5069 *
5070 * @param pi64 Pointer to the pointer variable to AND i64 with.
5071 * @param i64 The value to AND *pi64 with.
5072 *
5073 * @remarks x86: Requires a Pentium or later.
5074 */
5075DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5076{
5077 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5078}
5079
5080
5081/**
5082 * Atomically XOR an unsigned 32-bit value and a memory location, ordered.
5083 *
5084 * @param pu32 Pointer to the variable to XOR @a u32 with.
5085 * @param u32 The value to XOR @a *pu32 with.
5086 *
5087 * @remarks x86: Requires a 386 or later.
5088 */
5089#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5090RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5091#else
5092DECLINLINE(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5093{
5094# if RT_INLINE_ASM_USES_INTRIN
5095 _InterlockedXor((long volatile RT_FAR *)pu32, u32);
5096
5097# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5098# if RT_INLINE_ASM_GNU_STYLE
5099 __asm__ __volatile__("lock; xorl %1, %0\n\t"
5100 : "=m" (*pu32)
5101 : "ir" (u32)
5102 , "m" (*pu32)
5103 : "cc");
5104# else
5105 __asm
5106 {
5107 mov eax, [u32]
5108# ifdef RT_ARCH_AMD64
5109 mov rdx, [pu32]
5110 lock xor [rdx], eax
5111# else
5112 mov edx, [pu32]
5113 lock xor [edx], eax
5114# endif
5115 }
5116# endif
5117
5118# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5119 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicXor32, pu32, DMB_SY,
5120 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5121 "eor %[uNew], %[uNew], %[uVal]\n\t",
5122 [uVal] "r" (u32));
5123
5124# else
5125# error "Port me"
5126# endif
5127}
5128#endif
5129
5130
5131/**
5132 * Atomically XOR an unsigned 32-bit value and a memory location, ordered,
5133 * extended version (for bitmaps).
5134 *
5135 * @returns Old value.
5136 * @param pu32 Pointer to the variable to XOR @a u32 with.
5137 * @param u32 The value to XOR @a *pu32 with.
5138 */
5139DECLINLINE(uint32_t) ASMAtomicXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5140{
5141#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5142 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicXorEx32, pu32, DMB_SY,
5143 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5144 "eor %[uNew], %[uOld], %[uVal]\n\t",
5145 [uVal] "r" (u32));
5146 return u32OldRet;
5147
5148#else
5149 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
5150 uint32_t u32New;
5151 do
5152 u32New = u32RetOld ^ u32;
5153 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
5154 return u32RetOld;
5155#endif
5156}
5157
5158
5159/**
5160 * Atomically XOR a signed 32-bit value, ordered.
5161 *
5162 * @param pi32 Pointer to the variable to XOR i32 with.
5163 * @param i32 The value to XOR *pi32 with.
5164 *
5165 * @remarks x86: Requires a 386 or later.
5166 */
5167DECLINLINE(void) ASMAtomicXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5168{
5169 ASMAtomicXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5170}
5171
5172
5173/**
5174 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
5175 *
5176 * @param pu32 Pointer to the pointer variable to OR u32 with.
5177 * @param u32 The value to OR *pu32 with.
5178 *
5179 * @remarks x86: Requires a 386 or later.
5180 */
5181#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5182RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5183#else
5184DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5185{
5186# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5187# if RT_INLINE_ASM_GNU_STYLE
5188 __asm__ __volatile__("orl %1, %0\n\t"
5189 : "=m" (*pu32)
5190 : "ir" (u32)
5191 , "m" (*pu32)
5192 : "cc");
5193# else
5194 __asm
5195 {
5196 mov eax, [u32]
5197# ifdef RT_ARCH_AMD64
5198 mov rdx, [pu32]
5199 or [rdx], eax
5200# else
5201 mov edx, [pu32]
5202 or [edx], eax
5203# endif
5204 }
5205# endif
5206
5207# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5208 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
5209 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
5210 "orr %[uNew], %[uNew], %[uVal]\n\t",
5211 [uVal] "r" (u32));
5212
5213# else
5214# error "Port me"
5215# endif
5216}
5217#endif
5218
5219
5220/**
5221 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe,
5222 * extended version (for bitmap fallback).
5223 *
5224 * @returns Old value.
5225 * @param pu32 Pointer to the variable to OR @a u32 with.
5226 * @param u32 The value to OR @a *pu32 with.
5227 */
5228DECLINLINE(uint32_t) ASMAtomicUoOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5229{
5230#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5231 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoOrExU32, pu32, NO_BARRIER,
5232 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
5233 "orr %[uNew], %[uOld], %[uVal]\n\t",
5234 [uVal] "r" (u32));
5235 return u32OldRet;
5236
5237#else
5238 return ASMAtomicOrExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5239#endif
5240}
5241
5242
5243/**
5244 * Atomically OR a signed 32-bit value, unordered.
5245 *
5246 * @param pi32 Pointer to the pointer variable to OR u32 with.
5247 * @param i32 The value to OR *pu32 with.
5248 *
5249 * @remarks x86: Requires a 386 or later.
5250 */
5251DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5252{
5253 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5254}
5255
5256
5257/**
5258 * Atomically OR an unsigned 64-bit value, unordered.
5259 *
5260 * @param pu64 Pointer to the pointer variable to OR u64 with.
5261 * @param u64 The value to OR *pu64 with.
5262 *
5263 * @remarks x86: Requires a Pentium or later.
5264 */
5265#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5266DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5267#else
5268DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5269{
5270# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5271 __asm__ __volatile__("orq %1, %q0\n\t"
5272 : "=m" (*pu64)
5273 : "r" (u64)
5274 , "m" (*pu64)
5275 : "cc");
5276
5277# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5278 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
5279 "orr %[uNew], %[uNew], %[uVal]\n\t"
5280 ,
5281 "orr %[uNew], %[uNew], %[uVal]\n\t"
5282 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
5283 [uVal] "r" (u64));
5284
5285# else
5286 for (;;)
5287 {
5288 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5289 uint64_t u64New = u64Old | u64;
5290 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5291 break;
5292 ASMNopPause();
5293 }
5294# endif
5295}
5296#endif
5297
5298
5299/**
5300 * Atomically Or a signed 64-bit value, unordered.
5301 *
5302 * @param pi64 Pointer to the pointer variable to OR u64 with.
5303 * @param i64 The value to OR *pu64 with.
5304 *
5305 * @remarks x86: Requires a Pentium or later.
5306 */
5307DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5308{
5309 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5310}
5311
5312
5313/**
5314 * Atomically And an unsigned 32-bit value, unordered.
5315 *
5316 * @param pu32 Pointer to the pointer variable to AND u32 with.
5317 * @param u32 The value to AND *pu32 with.
5318 *
5319 * @remarks x86: Requires a 386 or later.
5320 */
5321#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5322RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5323#else
5324DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5325{
5326# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5327# if RT_INLINE_ASM_GNU_STYLE
5328 __asm__ __volatile__("andl %1, %0\n\t"
5329 : "=m" (*pu32)
5330 : "ir" (u32)
5331 , "m" (*pu32)
5332 : "cc");
5333# else
5334 __asm
5335 {
5336 mov eax, [u32]
5337# ifdef RT_ARCH_AMD64
5338 mov rdx, [pu32]
5339 and [rdx], eax
5340# else
5341 mov edx, [pu32]
5342 and [edx], eax
5343# endif
5344 }
5345# endif
5346
5347# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5348 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
5349 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
5350 "and %[uNew], %[uNew], %[uVal]\n\t",
5351 [uVal] "r" (u32));
5352
5353# else
5354# error "Port me"
5355# endif
5356}
5357#endif
5358
5359
5360/**
5361 * Atomically AND an unsigned 32-bit value, unordered, extended version (for
5362 * bitmap fallback).
5363 *
5364 * @returns Old value.
5365 * @param pu32 Pointer to the pointer to AND @a u32 with.
5366 * @param u32 The value to AND @a *pu32 with.
5367 */
5368DECLINLINE(uint32_t) ASMAtomicUoAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5369{
5370#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5371 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoAndEx32, pu32, NO_BARRIER,
5372 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
5373 "and %[uNew], %[uOld], %[uVal]\n\t",
5374 [uVal] "r" (u32));
5375 return u32OldRet;
5376
5377#else
5378 return ASMAtomicAndExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5379#endif
5380}
5381
5382
5383/**
5384 * Atomically And a signed 32-bit value, unordered.
5385 *
5386 * @param pi32 Pointer to the pointer variable to AND i32 with.
5387 * @param i32 The value to AND *pi32 with.
5388 *
5389 * @remarks x86: Requires a 386 or later.
5390 */
5391DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5392{
5393 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5394}
5395
5396
5397/**
5398 * Atomically And an unsigned 64-bit value, unordered.
5399 *
5400 * @param pu64 Pointer to the pointer variable to AND u64 with.
5401 * @param u64 The value to AND *pu64 with.
5402 *
5403 * @remarks x86: Requires a Pentium or later.
5404 */
5405#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5406DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5407#else
5408DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5409{
5410# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5411 __asm__ __volatile__("andq %1, %0\n\t"
5412 : "=m" (*pu64)
5413 : "r" (u64)
5414 , "m" (*pu64)
5415 : "cc");
5416
5417# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5418 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
5419 "and %[uNew], %[uNew], %[uVal]\n\t"
5420 ,
5421 "and %[uNew], %[uNew], %[uVal]\n\t"
5422 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
5423 [uVal] "r" (u64));
5424
5425# else
5426 for (;;)
5427 {
5428 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5429 uint64_t u64New = u64Old & u64;
5430 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5431 break;
5432 ASMNopPause();
5433 }
5434# endif
5435}
5436#endif
5437
5438
5439/**
5440 * Atomically And a signed 64-bit value, unordered.
5441 *
5442 * @param pi64 Pointer to the pointer variable to AND i64 with.
5443 * @param i64 The value to AND *pi64 with.
5444 *
5445 * @remarks x86: Requires a Pentium or later.
5446 */
5447DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5448{
5449 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5450}
5451
5452
5453/**
5454 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe.
5455 *
5456 * @param pu32 Pointer to the variable to XOR @a u32 with.
5457 * @param u32 The value to OR @a *pu32 with.
5458 *
5459 * @remarks x86: Requires a 386 or later.
5460 */
5461#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5462RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5463#else
5464DECLINLINE(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5465{
5466# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5467# if RT_INLINE_ASM_GNU_STYLE
5468 __asm__ __volatile__("xorl %1, %0\n\t"
5469 : "=m" (*pu32)
5470 : "ir" (u32)
5471 , "m" (*pu32)
5472 : "cc");
5473# else
5474 __asm
5475 {
5476 mov eax, [u32]
5477# ifdef RT_ARCH_AMD64
5478 mov rdx, [pu32]
5479 xor [rdx], eax
5480# else
5481 mov edx, [pu32]
5482 xor [edx], eax
5483# endif
5484 }
5485# endif
5486
5487# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5488 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoXorU32, pu32, NO_BARRIER,
5489 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5490 "eor %[uNew], %[uNew], %[uVal]\n\t",
5491 [uVal] "r" (u32));
5492
5493# else
5494# error "Port me"
5495# endif
5496}
5497#endif
5498
5499
5500/**
5501 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe,
5502 * extended version (for bitmap fallback).
5503 *
5504 * @returns Old value.
5505 * @param pu32 Pointer to the variable to XOR @a u32 with.
5506 * @param u32 The value to OR @a *pu32 with.
5507 */
5508DECLINLINE(uint32_t) ASMAtomicUoXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5509{
5510#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5511 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoXorExU32, pu32, NO_BARRIER,
5512 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5513 "eor %[uNew], %[uOld], %[uVal]\n\t",
5514 [uVal] "r" (u32));
5515 return u32OldRet;
5516
5517#else
5518 return ASMAtomicXorExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5519#endif
5520}
5521
5522
5523/**
5524 * Atomically XOR a signed 32-bit value, unordered.
5525 *
5526 * @param pi32 Pointer to the variable to XOR @a u32 with.
5527 * @param i32 The value to XOR @a *pu32 with.
5528 *
5529 * @remarks x86: Requires a 386 or later.
5530 */
5531DECLINLINE(void) ASMAtomicUoXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5532{
5533 ASMAtomicUoXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5534}
5535
5536
5537/**
5538 * Atomically increment an unsigned 32-bit value, unordered.
5539 *
5540 * @returns the new value.
5541 * @param pu32 Pointer to the variable to increment.
5542 *
5543 * @remarks x86: Requires a 486 or later.
5544 */
5545#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5546RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5547#else
5548DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5549{
5550# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5551 uint32_t u32;
5552# if RT_INLINE_ASM_GNU_STYLE
5553 __asm__ __volatile__("xaddl %0, %1\n\t"
5554 : "=r" (u32)
5555 , "=m" (*pu32)
5556 : "0" (1)
5557 , "m" (*pu32)
5558 : "memory" /** @todo why 'memory'? */
5559 , "cc");
5560 return u32 + 1;
5561# else
5562 __asm
5563 {
5564 mov eax, 1
5565# ifdef RT_ARCH_AMD64
5566 mov rdx, [pu32]
5567 xadd [rdx], eax
5568# else
5569 mov edx, [pu32]
5570 xadd [edx], eax
5571# endif
5572 mov u32, eax
5573 }
5574 return u32 + 1;
5575# endif
5576
5577# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5578 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
5579 "add %w[uNew], %w[uNew], #1\n\t",
5580 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5581 "X" (0) /* dummy */);
5582 return u32NewRet;
5583
5584# else
5585# error "Port me"
5586# endif
5587}
5588#endif
5589
5590
5591/**
5592 * Atomically decrement an unsigned 32-bit value, unordered.
5593 *
5594 * @returns the new value.
5595 * @param pu32 Pointer to the variable to decrement.
5596 *
5597 * @remarks x86: Requires a 486 or later.
5598 */
5599#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5600RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5601#else
5602DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5603{
5604# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5605 uint32_t u32;
5606# if RT_INLINE_ASM_GNU_STYLE
5607 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
5608 : "=r" (u32)
5609 , "=m" (*pu32)
5610 : "0" (-1)
5611 , "m" (*pu32)
5612 : "memory"
5613 , "cc");
5614 return u32 - 1;
5615# else
5616 __asm
5617 {
5618 mov eax, -1
5619# ifdef RT_ARCH_AMD64
5620 mov rdx, [pu32]
5621 xadd [rdx], eax
5622# else
5623 mov edx, [pu32]
5624 xadd [edx], eax
5625# endif
5626 mov u32, eax
5627 }
5628 return u32 - 1;
5629# endif
5630
5631# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5632 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
5633 "sub %w[uNew], %w[uNew], #1\n\t",
5634 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5635 "X" (0) /* dummy */);
5636 return u32NewRet;
5637
5638# else
5639# error "Port me"
5640# endif
5641}
5642#endif
5643
5644
5645/** @def RT_ASM_PAGE_SIZE
5646 * We try avoid dragging in iprt/param.h here.
5647 * @internal
5648 */
5649#if defined(RT_ARCH_SPARC64)
5650# define RT_ASM_PAGE_SIZE 0x2000
5651# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5652# if PAGE_SIZE != 0x2000
5653# error "PAGE_SIZE is not 0x2000!"
5654# endif
5655# endif
5656#elif defined(RT_ARCH_ARM64) && defined(RT_OS_DARWIN)
5657# define RT_ASM_PAGE_SIZE 0x4000
5658# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
5659# if PAGE_SIZE != 0x4000
5660# error "PAGE_SIZE is not 0x4000!"
5661# endif
5662# endif
5663#else
5664# define RT_ASM_PAGE_SIZE 0x1000
5665# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(RT_OS_LINUX) && !defined(RT_ARCH_ARM64)
5666# if PAGE_SIZE != 0x1000
5667# error "PAGE_SIZE is not 0x1000!"
5668# endif
5669# endif
5670#endif
5671
5672/**
5673 * Zeros a 4K memory page.
5674 *
5675 * @param pv Pointer to the memory block. This must be page aligned.
5676 */
5677#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5678RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
5679# else
5680DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
5681{
5682# if RT_INLINE_ASM_USES_INTRIN
5683# ifdef RT_ARCH_AMD64
5684 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
5685# else
5686 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
5687# endif
5688
5689# elif RT_INLINE_ASM_GNU_STYLE
5690 RTCCUINTREG uDummy;
5691# ifdef RT_ARCH_AMD64
5692 __asm__ __volatile__("rep stosq"
5693 : "=D" (pv),
5694 "=c" (uDummy)
5695 : "0" (pv),
5696 "c" (RT_ASM_PAGE_SIZE >> 3),
5697 "a" (0)
5698 : "memory");
5699# else
5700 __asm__ __volatile__("rep stosl"
5701 : "=D" (pv),
5702 "=c" (uDummy)
5703 : "0" (pv),
5704 "c" (RT_ASM_PAGE_SIZE >> 2),
5705 "a" (0)
5706 : "memory");
5707# endif
5708# else
5709 __asm
5710 {
5711# ifdef RT_ARCH_AMD64
5712 xor rax, rax
5713 mov ecx, 0200h
5714 mov rdi, [pv]
5715 rep stosq
5716# else
5717 xor eax, eax
5718 mov ecx, 0400h
5719 mov edi, [pv]
5720 rep stosd
5721# endif
5722 }
5723# endif
5724}
5725# endif
5726
5727
5728/**
5729 * Zeros a memory block with a 32-bit aligned size.
5730 *
5731 * @param pv Pointer to the memory block.
5732 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5733 */
5734#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5735RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5736#else
5737DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5738{
5739# if RT_INLINE_ASM_USES_INTRIN
5740# ifdef RT_ARCH_AMD64
5741 if (!(cb & 7))
5742 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
5743 else
5744# endif
5745 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
5746
5747# elif RT_INLINE_ASM_GNU_STYLE
5748 __asm__ __volatile__("rep stosl"
5749 : "=D" (pv),
5750 "=c" (cb)
5751 : "0" (pv),
5752 "1" (cb >> 2),
5753 "a" (0)
5754 : "memory");
5755# else
5756 __asm
5757 {
5758 xor eax, eax
5759# ifdef RT_ARCH_AMD64
5760 mov rcx, [cb]
5761 shr rcx, 2
5762 mov rdi, [pv]
5763# else
5764 mov ecx, [cb]
5765 shr ecx, 2
5766 mov edi, [pv]
5767# endif
5768 rep stosd
5769 }
5770# endif
5771}
5772#endif
5773
5774
5775/**
5776 * Fills a memory block with a 32-bit aligned size.
5777 *
5778 * @param pv Pointer to the memory block.
5779 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5780 * @param u32 The value to fill with.
5781 */
5782#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5783RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
5784#else
5785DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5786{
5787# if RT_INLINE_ASM_USES_INTRIN
5788# ifdef RT_ARCH_AMD64
5789 if (!(cb & 7))
5790 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5791 else
5792# endif
5793 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
5794
5795# elif RT_INLINE_ASM_GNU_STYLE
5796 __asm__ __volatile__("rep stosl"
5797 : "=D" (pv),
5798 "=c" (cb)
5799 : "0" (pv),
5800 "1" (cb >> 2),
5801 "a" (u32)
5802 : "memory");
5803# else
5804 __asm
5805 {
5806# ifdef RT_ARCH_AMD64
5807 mov rcx, [cb]
5808 shr rcx, 2
5809 mov rdi, [pv]
5810# else
5811 mov ecx, [cb]
5812 shr ecx, 2
5813 mov edi, [pv]
5814# endif
5815 mov eax, [u32]
5816 rep stosd
5817 }
5818# endif
5819}
5820#endif
5821
5822
5823/**
5824 * Checks if a memory block is all zeros.
5825 *
5826 * @returns Pointer to the first non-zero byte.
5827 * @returns NULL if all zero.
5828 *
5829 * @param pv Pointer to the memory block.
5830 * @param cb Number of bytes in the block.
5831 */
5832#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
5833DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5834#else
5835DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5836{
5837/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
5838 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5839 for (; cb; cb--, pb++)
5840 if (RT_LIKELY(*pb == 0))
5841 { /* likely */ }
5842 else
5843 return (void RT_FAR *)pb;
5844 return NULL;
5845}
5846#endif
5847
5848
5849/**
5850 * Checks if a memory block is all zeros.
5851 *
5852 * @returns true if zero, false if not.
5853 *
5854 * @param pv Pointer to the memory block.
5855 * @param cb Number of bytes in the block.
5856 *
5857 * @sa ASMMemFirstNonZero
5858 */
5859DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5860{
5861 return ASMMemFirstNonZero(pv, cb) == NULL;
5862}
5863
5864
5865/**
5866 * Checks if a memory page is all zeros.
5867 *
5868 * @returns true / false.
5869 *
5870 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5871 * boundary
5872 */
5873DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
5874{
5875# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5876 union { RTCCUINTREG r; bool f; } uAX;
5877 RTCCUINTREG xCX, xDI;
5878 Assert(!((uintptr_t)pvPage & 15));
5879 __asm__ __volatile__("repe; "
5880# ifdef RT_ARCH_AMD64
5881 "scasq\n\t"
5882# else
5883 "scasl\n\t"
5884# endif
5885 "setnc %%al\n\t"
5886 : "=&c" (xCX)
5887 , "=&D" (xDI)
5888 , "=&a" (uAX.r)
5889 : "mr" (pvPage)
5890# ifdef RT_ARCH_AMD64
5891 , "0" (RT_ASM_PAGE_SIZE/8)
5892# else
5893 , "0" (RT_ASM_PAGE_SIZE/4)
5894# endif
5895 , "1" (pvPage)
5896 , "2" (0)
5897 : "cc");
5898 return uAX.f;
5899# else
5900 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
5901 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
5902 Assert(!((uintptr_t)pvPage & 15));
5903 for (;;)
5904 {
5905 if (puPtr[0]) return false;
5906 if (puPtr[4]) return false;
5907
5908 if (puPtr[2]) return false;
5909 if (puPtr[6]) return false;
5910
5911 if (puPtr[1]) return false;
5912 if (puPtr[5]) return false;
5913
5914 if (puPtr[3]) return false;
5915 if (puPtr[7]) return false;
5916
5917 if (!--cLeft)
5918 return true;
5919 puPtr += 8;
5920 }
5921# endif
5922}
5923
5924
5925/**
5926 * Checks if a memory block is filled with the specified byte, returning the
5927 * first mismatch.
5928 *
5929 * This is sort of an inverted memchr.
5930 *
5931 * @returns Pointer to the byte which doesn't equal u8.
5932 * @returns NULL if all equal to u8.
5933 *
5934 * @param pv Pointer to the memory block.
5935 * @param cb Number of bytes in the block.
5936 * @param u8 The value it's supposed to be filled with.
5937 *
5938 * @remarks No alignment requirements.
5939 */
5940#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5941 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5942DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5943#else
5944DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5945{
5946/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5947 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5948 for (; cb; cb--, pb++)
5949 if (RT_LIKELY(*pb == u8))
5950 { /* likely */ }
5951 else
5952 return (void *)pb;
5953 return NULL;
5954}
5955#endif
5956
5957
5958/**
5959 * Checks if a memory block is filled with the specified byte.
5960 *
5961 * @returns true if all matching, false if not.
5962 *
5963 * @param pv Pointer to the memory block.
5964 * @param cb Number of bytes in the block.
5965 * @param u8 The value it's supposed to be filled with.
5966 *
5967 * @remarks No alignment requirements.
5968 */
5969DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5970{
5971 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5972}
5973
5974
5975/**
5976 * Checks if a memory block is filled with the specified 32-bit value.
5977 *
5978 * This is a sort of inverted memchr.
5979 *
5980 * @returns Pointer to the first value which doesn't equal u32.
5981 * @returns NULL if all equal to u32.
5982 *
5983 * @param pv Pointer to the memory block.
5984 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5985 * @param u32 The value it's supposed to be filled with.
5986 */
5987DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5988{
5989/** @todo rewrite this in inline assembly? */
5990 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5991 for (; cb; cb -= 4, pu32++)
5992 if (RT_LIKELY(*pu32 == u32))
5993 { /* likely */ }
5994 else
5995 return (uint32_t RT_FAR *)pu32;
5996 return NULL;
5997}
5998
5999
6000/**
6001 * Probes a byte pointer for read access.
6002 *
6003 * While the function will not fault if the byte is not read accessible,
6004 * the idea is to do this in a safe place like before acquiring locks
6005 * and such like.
6006 *
6007 * Also, this functions guarantees that an eager compiler is not going
6008 * to optimize the probing away.
6009 *
6010 * @param pvByte Pointer to the byte.
6011 */
6012#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6013RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
6014#else
6015DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
6016{
6017# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6018 uint8_t u8;
6019# if RT_INLINE_ASM_GNU_STYLE
6020 __asm__ __volatile__("movb %1, %0\n\t"
6021 : "=q" (u8)
6022 : "m" (*(const uint8_t *)pvByte));
6023# else
6024 __asm
6025 {
6026# ifdef RT_ARCH_AMD64
6027 mov rax, [pvByte]
6028 mov al, [rax]
6029# else
6030 mov eax, [pvByte]
6031 mov al, [eax]
6032# endif
6033 mov [u8], al
6034 }
6035# endif
6036 return u8;
6037
6038# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6039 uint32_t u32;
6040 __asm__ __volatile__("Lstart_ASMProbeReadByte_%=:\n\t"
6041# if defined(RT_ARCH_ARM64)
6042 "ldxrb %w[uDst], %[pMem]\n\t"
6043# else
6044 "ldrexb %[uDst], %[pMem]\n\t"
6045# endif
6046 : [uDst] "=&r" (u32)
6047 : [pMem] "Q" (*(uint8_t const *)pvByte));
6048 return (uint8_t)u32;
6049
6050# else
6051# error "Port me"
6052# endif
6053}
6054#endif
6055
6056/**
6057 * Probes a buffer for read access page by page.
6058 *
6059 * While the function will fault if the buffer is not fully read
6060 * accessible, the idea is to do this in a safe place like before
6061 * acquiring locks and such like.
6062 *
6063 * Also, this functions guarantees that an eager compiler is not going
6064 * to optimize the probing away.
6065 *
6066 * @param pvBuf Pointer to the buffer.
6067 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
6068 */
6069DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
6070{
6071 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
6072 /* the first byte */
6073 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
6074 ASMProbeReadByte(pu8);
6075
6076 /* the pages in between pages. */
6077 while (cbBuf > RT_ASM_PAGE_SIZE)
6078 {
6079 ASMProbeReadByte(pu8);
6080 cbBuf -= RT_ASM_PAGE_SIZE;
6081 pu8 += RT_ASM_PAGE_SIZE;
6082 }
6083
6084 /* the last byte */
6085 ASMProbeReadByte(pu8 + cbBuf - 1);
6086}
6087
6088
6089/**
6090 * Reverse the byte order of the given 16-bit integer.
6091 *
6092 * @returns Revert
6093 * @param u16 16-bit integer value.
6094 */
6095#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6096RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
6097#else
6098DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
6099{
6100# if RT_INLINE_ASM_USES_INTRIN
6101 return _byteswap_ushort(u16);
6102
6103# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6104# if RT_INLINE_ASM_GNU_STYLE
6105 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
6106# else
6107 _asm
6108 {
6109 mov ax, [u16]
6110 ror ax, 8
6111 mov [u16], ax
6112 }
6113# endif
6114 return u16;
6115
6116# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6117 uint32_t u32Ret;
6118 __asm__ __volatile__(
6119# if defined(RT_ARCH_ARM64)
6120 "rev16 %w[uRet], %w[uVal]\n\t"
6121# else
6122 "rev16 %[uRet], %[uVal]\n\t"
6123# endif
6124 : [uRet] "=r" (u32Ret)
6125 : [uVal] "r" (u16));
6126 return (uint16_t)u32Ret;
6127
6128# else
6129# error "Port me"
6130# endif
6131}
6132#endif
6133
6134
6135/**
6136 * Reverse the byte order of the given 32-bit integer.
6137 *
6138 * @returns Revert
6139 * @param u32 32-bit integer value.
6140 */
6141#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6142RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
6143#else
6144DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
6145{
6146# if RT_INLINE_ASM_USES_INTRIN
6147 return _byteswap_ulong(u32);
6148
6149# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6150# if RT_INLINE_ASM_GNU_STYLE
6151 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6152# else
6153 _asm
6154 {
6155 mov eax, [u32]
6156 bswap eax
6157 mov [u32], eax
6158 }
6159# endif
6160 return u32;
6161
6162# elif defined(RT_ARCH_ARM64)
6163 uint64_t u64Ret;
6164 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t"
6165 : [uRet] "=r" (u64Ret)
6166 : [uVal] "r" ((uint64_t)u32));
6167 return (uint32_t)u64Ret;
6168
6169# elif defined(RT_ARCH_ARM32)
6170 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
6171 : [uRet] "=r" (u32)
6172 : [uVal] "[uRet]" (u32));
6173 return u32;
6174
6175# else
6176# error "Port me"
6177# endif
6178}
6179#endif
6180
6181
6182/**
6183 * Reverse the byte order of the given 64-bit integer.
6184 *
6185 * @returns Revert
6186 * @param u64 64-bit integer value.
6187 */
6188DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
6189{
6190#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6191 return _byteswap_uint64(u64);
6192
6193# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6194 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64));
6195 return u64;
6196
6197# elif defined(RT_ARCH_ARM64)
6198 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
6199 : [uRet] "=r" (u64)
6200 : [uVal] "[uRet]" (u64));
6201 return u64;
6202
6203#else
6204 return (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6205 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6206#endif
6207}
6208
6209
6210
6211/** @defgroup grp_inline_bits Bit Operations
6212 * @{
6213 */
6214
6215
6216/**
6217 * Sets a bit in a bitmap.
6218 *
6219 * @param pvBitmap Pointer to the bitmap (little endian). This should be
6220 * 32-bit aligned.
6221 * @param iBit The bit to set.
6222 *
6223 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6224 * However, doing so will yield better performance as well as avoiding
6225 * traps accessing the last bits in the bitmap.
6226 */
6227#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6228RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6229#else
6230DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6231{
6232# if RT_INLINE_ASM_USES_INTRIN
6233 _bittestandset((long RT_FAR *)pvBitmap, iBit);
6234
6235# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6236# if RT_INLINE_ASM_GNU_STYLE
6237 __asm__ __volatile__("btsl %1, %0"
6238 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6239 : "Ir" (iBit)
6240 , "m" (*(volatile long RT_FAR *)pvBitmap)
6241 : "memory"
6242 , "cc");
6243# else
6244 __asm
6245 {
6246# ifdef RT_ARCH_AMD64
6247 mov rax, [pvBitmap]
6248 mov edx, [iBit]
6249 bts [rax], edx
6250# else
6251 mov eax, [pvBitmap]
6252 mov edx, [iBit]
6253 bts [eax], edx
6254# endif
6255 }
6256# endif
6257
6258# else
6259 int32_t offBitmap = iBit / 32;
6260 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6261 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6262# endif
6263}
6264#endif
6265
6266
6267/**
6268 * Atomically sets a bit in a bitmap, ordered.
6269 *
6270 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6271 * aligned, otherwise the memory access isn't atomic!
6272 * @param iBit The bit to set.
6273 *
6274 * @remarks x86: Requires a 386 or later.
6275 */
6276#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6277RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6278#else
6279DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6280{
6281 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6282# if RT_INLINE_ASM_USES_INTRIN
6283 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6284# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6285# if RT_INLINE_ASM_GNU_STYLE
6286 __asm__ __volatile__("lock; btsl %1, %0"
6287 : "=m" (*(volatile long *)pvBitmap)
6288 : "Ir" (iBit)
6289 , "m" (*(volatile long *)pvBitmap)
6290 : "memory"
6291 , "cc");
6292# else
6293 __asm
6294 {
6295# ifdef RT_ARCH_AMD64
6296 mov rax, [pvBitmap]
6297 mov edx, [iBit]
6298 lock bts [rax], edx
6299# else
6300 mov eax, [pvBitmap]
6301 mov edx, [iBit]
6302 lock bts [eax], edx
6303# endif
6304 }
6305# endif
6306
6307# else
6308 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6309# endif
6310}
6311#endif
6312
6313
6314/**
6315 * Clears a bit in a bitmap.
6316 *
6317 * @param pvBitmap Pointer to the bitmap (little endian).
6318 * @param iBit The bit to clear.
6319 *
6320 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6321 * However, doing so will yield better performance as well as avoiding
6322 * traps accessing the last bits in the bitmap.
6323 */
6324#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6325RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6326#else
6327DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6328{
6329# if RT_INLINE_ASM_USES_INTRIN
6330 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6331
6332# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6333# if RT_INLINE_ASM_GNU_STYLE
6334 __asm__ __volatile__("btrl %1, %0"
6335 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6336 : "Ir" (iBit)
6337 , "m" (*(volatile long RT_FAR *)pvBitmap)
6338 : "memory"
6339 , "cc");
6340# else
6341 __asm
6342 {
6343# ifdef RT_ARCH_AMD64
6344 mov rax, [pvBitmap]
6345 mov edx, [iBit]
6346 btr [rax], edx
6347# else
6348 mov eax, [pvBitmap]
6349 mov edx, [iBit]
6350 btr [eax], edx
6351# endif
6352 }
6353# endif
6354
6355# else
6356 int32_t offBitmap = iBit / 32;
6357 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6358 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6359# endif
6360}
6361#endif
6362
6363
6364/**
6365 * Atomically clears a bit in a bitmap, ordered.
6366 *
6367 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6368 * aligned, otherwise the memory access isn't atomic!
6369 * @param iBit The bit to toggle set.
6370 *
6371 * @remarks No memory barrier, take care on smp.
6372 * @remarks x86: Requires a 386 or later.
6373 */
6374#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6375RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6376#else
6377DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6378{
6379 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6380# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6381# if RT_INLINE_ASM_GNU_STYLE
6382 __asm__ __volatile__("lock; btrl %1, %0"
6383 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6384 : "Ir" (iBit)
6385 , "m" (*(volatile long RT_FAR *)pvBitmap)
6386 : "memory"
6387 , "cc");
6388# else
6389 __asm
6390 {
6391# ifdef RT_ARCH_AMD64
6392 mov rax, [pvBitmap]
6393 mov edx, [iBit]
6394 lock btr [rax], edx
6395# else
6396 mov eax, [pvBitmap]
6397 mov edx, [iBit]
6398 lock btr [eax], edx
6399# endif
6400 }
6401# endif
6402# else
6403 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6404# endif
6405}
6406#endif
6407
6408
6409/**
6410 * Toggles a bit in a bitmap.
6411 *
6412 * @param pvBitmap Pointer to the bitmap (little endian).
6413 * @param iBit The bit to toggle.
6414 *
6415 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6416 * However, doing so will yield better performance as well as avoiding
6417 * traps accessing the last bits in the bitmap.
6418 */
6419#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6420RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6421#else
6422DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6423{
6424# if RT_INLINE_ASM_USES_INTRIN
6425 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6426# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6427# if RT_INLINE_ASM_GNU_STYLE
6428 __asm__ __volatile__("btcl %1, %0"
6429 : "=m" (*(volatile long *)pvBitmap)
6430 : "Ir" (iBit)
6431 , "m" (*(volatile long *)pvBitmap)
6432 : "memory"
6433 , "cc");
6434# else
6435 __asm
6436 {
6437# ifdef RT_ARCH_AMD64
6438 mov rax, [pvBitmap]
6439 mov edx, [iBit]
6440 btc [rax], edx
6441# else
6442 mov eax, [pvBitmap]
6443 mov edx, [iBit]
6444 btc [eax], edx
6445# endif
6446 }
6447# endif
6448# else
6449 int32_t offBitmap = iBit / 32;
6450 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6451 ASMAtomicUoXorU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6452# endif
6453}
6454#endif
6455
6456
6457/**
6458 * Atomically toggles a bit in a bitmap, ordered.
6459 *
6460 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6461 * aligned, otherwise the memory access isn't atomic!
6462 * @param iBit The bit to test and set.
6463 *
6464 * @remarks x86: Requires a 386 or later.
6465 */
6466#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6467RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6468#else
6469DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6470{
6471 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6472# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6473# if RT_INLINE_ASM_GNU_STYLE
6474 __asm__ __volatile__("lock; btcl %1, %0"
6475 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6476 : "Ir" (iBit)
6477 , "m" (*(volatile long RT_FAR *)pvBitmap)
6478 : "memory"
6479 , "cc");
6480# else
6481 __asm
6482 {
6483# ifdef RT_ARCH_AMD64
6484 mov rax, [pvBitmap]
6485 mov edx, [iBit]
6486 lock btc [rax], edx
6487# else
6488 mov eax, [pvBitmap]
6489 mov edx, [iBit]
6490 lock btc [eax], edx
6491# endif
6492 }
6493# endif
6494# else
6495 ASMAtomicXorU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6496# endif
6497}
6498#endif
6499
6500
6501/**
6502 * Tests and sets a bit in a bitmap.
6503 *
6504 * @returns true if the bit was set.
6505 * @returns false if the bit was clear.
6506 *
6507 * @param pvBitmap Pointer to the bitmap (little endian).
6508 * @param iBit The bit to test and set.
6509 *
6510 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6511 * However, doing so will yield better performance as well as avoiding
6512 * traps accessing the last bits in the bitmap.
6513 */
6514#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6515RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6516#else
6517DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6518{
6519 union { bool f; uint32_t u32; uint8_t u8; } rc;
6520# if RT_INLINE_ASM_USES_INTRIN
6521 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
6522
6523# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6524# if RT_INLINE_ASM_GNU_STYLE
6525 __asm__ __volatile__("btsl %2, %1\n\t"
6526 "setc %b0\n\t"
6527 "andl $1, %0\n\t"
6528 : "=q" (rc.u32)
6529 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6530 : "Ir" (iBit)
6531 , "m" (*(volatile long RT_FAR *)pvBitmap)
6532 : "memory"
6533 , "cc");
6534# else
6535 __asm
6536 {
6537 mov edx, [iBit]
6538# ifdef RT_ARCH_AMD64
6539 mov rax, [pvBitmap]
6540 bts [rax], edx
6541# else
6542 mov eax, [pvBitmap]
6543 bts [eax], edx
6544# endif
6545 setc al
6546 and eax, 1
6547 mov [rc.u32], eax
6548 }
6549# endif
6550
6551# else
6552 int32_t offBitmap = iBit / 32;
6553 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6554 rc.u32 = RT_LE2H_U32(ASMAtomicUoOrExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6555 >> (iBit & 31);
6556 rc.u32 &= 1;
6557# endif
6558 return rc.f;
6559}
6560#endif
6561
6562
6563/**
6564 * Atomically tests and sets a bit in a bitmap, ordered.
6565 *
6566 * @returns true if the bit was set.
6567 * @returns false if the bit was clear.
6568 *
6569 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6570 * aligned, otherwise the memory access isn't atomic!
6571 * @param iBit The bit to set.
6572 *
6573 * @remarks x86: Requires a 386 or later.
6574 */
6575#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6576RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6577#else
6578DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6579{
6580 union { bool f; uint32_t u32; uint8_t u8; } rc;
6581 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6582# if RT_INLINE_ASM_USES_INTRIN
6583 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6584# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6585# if RT_INLINE_ASM_GNU_STYLE
6586 __asm__ __volatile__("lock; btsl %2, %1\n\t"
6587 "setc %b0\n\t"
6588 "andl $1, %0\n\t"
6589 : "=q" (rc.u32)
6590 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6591 : "Ir" (iBit)
6592 , "m" (*(volatile long RT_FAR *)pvBitmap)
6593 : "memory"
6594 , "cc");
6595# else
6596 __asm
6597 {
6598 mov edx, [iBit]
6599# ifdef RT_ARCH_AMD64
6600 mov rax, [pvBitmap]
6601 lock bts [rax], edx
6602# else
6603 mov eax, [pvBitmap]
6604 lock bts [eax], edx
6605# endif
6606 setc al
6607 and eax, 1
6608 mov [rc.u32], eax
6609 }
6610# endif
6611
6612# else
6613 rc.u32 = RT_LE2H_U32(ASMAtomicOrExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6614 >> (iBit & 31);
6615 rc.u32 &= 1;
6616# endif
6617 return rc.f;
6618}
6619#endif
6620
6621
6622/**
6623 * Tests and clears a bit in a bitmap.
6624 *
6625 * @returns true if the bit was set.
6626 * @returns false if the bit was clear.
6627 *
6628 * @param pvBitmap Pointer to the bitmap (little endian).
6629 * @param iBit The bit to test and clear.
6630 *
6631 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6632 * However, doing so will yield better performance as well as avoiding
6633 * traps accessing the last bits in the bitmap.
6634 */
6635#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6636RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6637#else
6638DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6639{
6640 union { bool f; uint32_t u32; uint8_t u8; } rc;
6641# if RT_INLINE_ASM_USES_INTRIN
6642 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6643
6644# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6645# if RT_INLINE_ASM_GNU_STYLE
6646 __asm__ __volatile__("btrl %2, %1\n\t"
6647 "setc %b0\n\t"
6648 "andl $1, %0\n\t"
6649 : "=q" (rc.u32)
6650 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6651 : "Ir" (iBit)
6652 , "m" (*(volatile long RT_FAR *)pvBitmap)
6653 : "memory"
6654 , "cc");
6655# else
6656 __asm
6657 {
6658 mov edx, [iBit]
6659# ifdef RT_ARCH_AMD64
6660 mov rax, [pvBitmap]
6661 btr [rax], edx
6662# else
6663 mov eax, [pvBitmap]
6664 btr [eax], edx
6665# endif
6666 setc al
6667 and eax, 1
6668 mov [rc.u32], eax
6669 }
6670# endif
6671
6672# else
6673 int32_t offBitmap = iBit / 32;
6674 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6675 rc.u32 = RT_LE2H_U32(ASMAtomicUoAndExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6676 >> (iBit & 31);
6677 rc.u32 &= 1;
6678# endif
6679 return rc.f;
6680}
6681#endif
6682
6683
6684/**
6685 * Atomically tests and clears a bit in a bitmap, ordered.
6686 *
6687 * @returns true if the bit was set.
6688 * @returns false if the bit was clear.
6689 *
6690 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6691 * aligned, otherwise the memory access isn't atomic!
6692 * @param iBit The bit to test and clear.
6693 *
6694 * @remarks No memory barrier, take care on smp.
6695 * @remarks x86: Requires a 386 or later.
6696 */
6697#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6698RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6699#else
6700DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6701{
6702 union { bool f; uint32_t u32; uint8_t u8; } rc;
6703 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6704# if RT_INLINE_ASM_USES_INTRIN
6705 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
6706
6707# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6708# if RT_INLINE_ASM_GNU_STYLE
6709 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6710 "setc %b0\n\t"
6711 "andl $1, %0\n\t"
6712 : "=q" (rc.u32)
6713 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6714 : "Ir" (iBit)
6715 , "m" (*(volatile long RT_FAR *)pvBitmap)
6716 : "memory"
6717 , "cc");
6718# else
6719 __asm
6720 {
6721 mov edx, [iBit]
6722# ifdef RT_ARCH_AMD64
6723 mov rax, [pvBitmap]
6724 lock btr [rax], edx
6725# else
6726 mov eax, [pvBitmap]
6727 lock btr [eax], edx
6728# endif
6729 setc al
6730 and eax, 1
6731 mov [rc.u32], eax
6732 }
6733# endif
6734
6735# else
6736 rc.u32 = RT_LE2H_U32(ASMAtomicAndExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6737 >> (iBit & 31);
6738 rc.u32 &= 1;
6739# endif
6740 return rc.f;
6741}
6742#endif
6743
6744
6745/**
6746 * Tests and toggles a bit in a bitmap.
6747 *
6748 * @returns true if the bit was set.
6749 * @returns false if the bit was clear.
6750 *
6751 * @param pvBitmap Pointer to the bitmap (little endian).
6752 * @param iBit The bit to test and toggle.
6753 *
6754 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6755 * However, doing so will yield better performance as well as avoiding
6756 * traps accessing the last bits in the bitmap.
6757 */
6758#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6759RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6760#else
6761DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6762{
6763 union { bool f; uint32_t u32; uint8_t u8; } rc;
6764# if RT_INLINE_ASM_USES_INTRIN
6765 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6766
6767# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6768# if RT_INLINE_ASM_GNU_STYLE
6769 __asm__ __volatile__("btcl %2, %1\n\t"
6770 "setc %b0\n\t"
6771 "andl $1, %0\n\t"
6772 : "=q" (rc.u32)
6773 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6774 : "Ir" (iBit)
6775 , "m" (*(volatile long RT_FAR *)pvBitmap)
6776 : "memory"
6777 , "cc");
6778# else
6779 __asm
6780 {
6781 mov edx, [iBit]
6782# ifdef RT_ARCH_AMD64
6783 mov rax, [pvBitmap]
6784 btc [rax], edx
6785# else
6786 mov eax, [pvBitmap]
6787 btc [eax], edx
6788# endif
6789 setc al
6790 and eax, 1
6791 mov [rc.u32], eax
6792 }
6793# endif
6794
6795# else
6796 int32_t offBitmap = iBit / 32;
6797 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6798 rc.u32 = RT_LE2H_U32(ASMAtomicUoXorExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6799 >> (iBit & 31);
6800 rc.u32 &= 1;
6801# endif
6802 return rc.f;
6803}
6804#endif
6805
6806
6807/**
6808 * Atomically tests and toggles a bit in a bitmap, ordered.
6809 *
6810 * @returns true if the bit was set.
6811 * @returns false if the bit was clear.
6812 *
6813 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6814 * aligned, otherwise the memory access isn't atomic!
6815 * @param iBit The bit to test and toggle.
6816 *
6817 * @remarks x86: Requires a 386 or later.
6818 */
6819#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6820RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6821#else
6822DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6823{
6824 union { bool f; uint32_t u32; uint8_t u8; } rc;
6825 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6826# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6827# if RT_INLINE_ASM_GNU_STYLE
6828 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6829 "setc %b0\n\t"
6830 "andl $1, %0\n\t"
6831 : "=q" (rc.u32)
6832 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6833 : "Ir" (iBit)
6834 , "m" (*(volatile long RT_FAR *)pvBitmap)
6835 : "memory"
6836 , "cc");
6837# else
6838 __asm
6839 {
6840 mov edx, [iBit]
6841# ifdef RT_ARCH_AMD64
6842 mov rax, [pvBitmap]
6843 lock btc [rax], edx
6844# else
6845 mov eax, [pvBitmap]
6846 lock btc [eax], edx
6847# endif
6848 setc al
6849 and eax, 1
6850 mov [rc.u32], eax
6851 }
6852# endif
6853
6854# else
6855 rc.u32 = RT_H2LE_U32(ASMAtomicXorExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_LE2H_U32(RT_BIT_32(iBit & 31))))
6856 >> (iBit & 31);
6857 rc.u32 &= 1;
6858# endif
6859 return rc.f;
6860}
6861#endif
6862
6863
6864/**
6865 * Tests if a bit in a bitmap is set.
6866 *
6867 * @returns true if the bit is set.
6868 * @returns false if the bit is clear.
6869 *
6870 * @param pvBitmap Pointer to the bitmap (little endian).
6871 * @param iBit The bit to test.
6872 *
6873 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6874 * However, doing so will yield better performance as well as avoiding
6875 * traps accessing the last bits in the bitmap.
6876 */
6877#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6878RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6879#else
6880DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6881{
6882 union { bool f; uint32_t u32; uint8_t u8; } rc;
6883# if RT_INLINE_ASM_USES_INTRIN
6884 rc.u32 = _bittest((long *)pvBitmap, iBit);
6885
6886# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6887# if RT_INLINE_ASM_GNU_STYLE
6888
6889 __asm__ __volatile__("btl %2, %1\n\t"
6890 "setc %b0\n\t"
6891 "andl $1, %0\n\t"
6892 : "=q" (rc.u32)
6893 : "m" (*(const volatile long RT_FAR *)pvBitmap)
6894 , "Ir" (iBit)
6895 : "memory"
6896 , "cc");
6897# else
6898 __asm
6899 {
6900 mov edx, [iBit]
6901# ifdef RT_ARCH_AMD64
6902 mov rax, [pvBitmap]
6903 bt [rax], edx
6904# else
6905 mov eax, [pvBitmap]
6906 bt [eax], edx
6907# endif
6908 setc al
6909 and eax, 1
6910 mov [rc.u32], eax
6911 }
6912# endif
6913
6914# else
6915 int32_t offBitmap = iBit / 32;
6916 AssertRelease(!((uintptr_t)pvBitmap & (sizeof(uint32_t) - 1)));
6917 rc.u32 = RT_LE2H_U32(ASMAtomicUoReadU32(&((uint32_t volatile *)pvBitmap)[offBitmap])) >> (iBit & 31);
6918 rc.u32 &= 1;
6919# endif
6920 return rc.f;
6921}
6922#endif
6923
6924
6925/**
6926 * Clears a bit range within a bitmap.
6927 *
6928 * @param pvBitmap Pointer to the bitmap (little endian).
6929 * @param iBitStart The First bit to clear.
6930 * @param iBitEnd The first bit not to clear.
6931 */
6932DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, size_t iBitStart, size_t iBitEnd) RT_NOTHROW_DEF
6933{
6934 if (iBitStart < iBitEnd)
6935 {
6936 uint32_t volatile RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6937 size_t iStart = iBitStart & ~(size_t)31;
6938 size_t iEnd = iBitEnd & ~(size_t)31;
6939 if (iStart == iEnd)
6940 *pu32 &= RT_H2LE_U32(((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6941 else
6942 {
6943 /* bits in first dword. */
6944 if (iBitStart & 31)
6945 {
6946 *pu32 &= RT_H2LE_U32((UINT32_C(1) << (iBitStart & 31)) - 1);
6947 pu32++;
6948 iBitStart = iStart + 32;
6949 }
6950
6951 /* whole dwords. */
6952 if (iBitStart != iEnd)
6953 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6954
6955 /* bits in last dword. */
6956 if (iBitEnd & 31)
6957 {
6958 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6959 *pu32 &= RT_H2LE_U32(~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6960 }
6961 }
6962 }
6963}
6964
6965
6966/**
6967 * Sets a bit range within a bitmap.
6968 *
6969 * @param pvBitmap Pointer to the bitmap (little endian).
6970 * @param iBitStart The First bit to set.
6971 * @param iBitEnd The first bit not to set.
6972 */
6973DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, size_t iBitStart, size_t iBitEnd) RT_NOTHROW_DEF
6974{
6975 if (iBitStart < iBitEnd)
6976 {
6977 uint32_t volatile RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6978 size_t iStart = iBitStart & ~(size_t)31;
6979 size_t iEnd = iBitEnd & ~(size_t)31;
6980 if (iStart == iEnd)
6981 *pu32 |= RT_H2LE_U32(((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31));
6982 else
6983 {
6984 /* bits in first dword. */
6985 if (iBitStart & 31)
6986 {
6987 *pu32 |= RT_H2LE_U32(~((UINT32_C(1) << (iBitStart & 31)) - 1));
6988 pu32++;
6989 iBitStart = iStart + 32;
6990 }
6991
6992 /* whole dword. */
6993 if (iBitStart != iEnd)
6994 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
6995
6996 /* bits in last dword. */
6997 if (iBitEnd & 31)
6998 {
6999 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
7000 *pu32 |= RT_H2LE_U32((UINT32_C(1) << (iBitEnd & 31)) - 1);
7001 }
7002 }
7003 }
7004}
7005
7006
7007/**
7008 * Finds the first clear bit in a bitmap.
7009 *
7010 * @returns Index of the first zero bit.
7011 * @returns -1 if no clear bit was found.
7012 * @param pvBitmap Pointer to the bitmap (little endian).
7013 * @param cBits The number of bits in the bitmap. Multiple of 32.
7014 */
7015#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7016DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
7017#else
7018DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
7019{
7020 if (cBits)
7021 {
7022 int32_t iBit;
7023# if RT_INLINE_ASM_GNU_STYLE
7024 RTCCUINTREG uEAX, uECX, uEDI;
7025 cBits = RT_ALIGN_32(cBits, 32);
7026 __asm__ __volatile__("repe; scasl\n\t"
7027 "je 1f\n\t"
7028# ifdef RT_ARCH_AMD64
7029 "lea -4(%%rdi), %%rdi\n\t"
7030 "xorl (%%rdi), %%eax\n\t"
7031 "subq %5, %%rdi\n\t"
7032# else
7033 "lea -4(%%edi), %%edi\n\t"
7034 "xorl (%%edi), %%eax\n\t"
7035 "subl %5, %%edi\n\t"
7036# endif
7037 "shll $3, %%edi\n\t"
7038 "bsfl %%eax, %%edx\n\t"
7039 "addl %%edi, %%edx\n\t"
7040 "1:\t\n"
7041 : "=d" (iBit)
7042 , "=&c" (uECX)
7043 , "=&D" (uEDI)
7044 , "=&a" (uEAX)
7045 : "0" (0xffffffff)
7046 , "mr" (pvBitmap)
7047 , "1" (cBits >> 5)
7048 , "2" (pvBitmap)
7049 , "3" (0xffffffff)
7050 : "cc");
7051# else
7052 cBits = RT_ALIGN_32(cBits, 32);
7053 __asm
7054 {
7055# ifdef RT_ARCH_AMD64
7056 mov rdi, [pvBitmap]
7057 mov rbx, rdi
7058# else
7059 mov edi, [pvBitmap]
7060 mov ebx, edi
7061# endif
7062 mov edx, 0ffffffffh
7063 mov eax, edx
7064 mov ecx, [cBits]
7065 shr ecx, 5
7066 repe scasd
7067 je done
7068
7069# ifdef RT_ARCH_AMD64
7070 lea rdi, [rdi - 4]
7071 xor eax, [rdi]
7072 sub rdi, rbx
7073# else
7074 lea edi, [edi - 4]
7075 xor eax, [edi]
7076 sub edi, ebx
7077# endif
7078 shl edi, 3
7079 bsf edx, eax
7080 add edx, edi
7081 done:
7082 mov [iBit], edx
7083 }
7084# endif
7085 return iBit;
7086 }
7087 return -1;
7088}
7089#endif
7090
7091
7092/**
7093 * Finds the next clear bit in a bitmap.
7094 *
7095 * @returns Index of the first zero bit.
7096 * @returns -1 if no clear bit was found.
7097 * @param pvBitmap Pointer to the bitmap (little endian).
7098 * @param cBits The number of bits in the bitmap. Multiple of 32.
7099 * @param iBitPrev The bit returned from the last search.
7100 * The search will start at iBitPrev + 1.
7101 */
7102#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7103DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
7104#else
7105DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
7106{
7107 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
7108 int iBit = ++iBitPrev & 31;
7109 if (iBit)
7110 {
7111 /*
7112 * Inspect the 32-bit word containing the unaligned bit.
7113 */
7114 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
7115
7116# if RT_INLINE_ASM_USES_INTRIN
7117 unsigned long ulBit = 0;
7118 if (_BitScanForward(&ulBit, u32))
7119 return ulBit + iBitPrev;
7120# else
7121# if RT_INLINE_ASM_GNU_STYLE
7122 __asm__ __volatile__("bsf %1, %0\n\t"
7123 "jnz 1f\n\t"
7124 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
7125 "1:\n\t"
7126 : "=r" (iBit)
7127 : "r" (u32)
7128 : "cc");
7129# else
7130 __asm
7131 {
7132 mov edx, [u32]
7133 bsf eax, edx
7134 jnz done
7135 mov eax, 0ffffffffh
7136 done:
7137 mov [iBit], eax
7138 }
7139# endif
7140 if (iBit >= 0)
7141 return iBit + (int)iBitPrev;
7142# endif
7143
7144 /*
7145 * Skip ahead and see if there is anything left to search.
7146 */
7147 iBitPrev |= 31;
7148 iBitPrev++;
7149 if (cBits <= (uint32_t)iBitPrev)
7150 return -1;
7151 }
7152
7153 /*
7154 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7155 */
7156 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7157 if (iBit >= 0)
7158 iBit += iBitPrev;
7159 return iBit;
7160}
7161#endif
7162
7163
7164/**
7165 * Finds the first set bit in a bitmap.
7166 *
7167 * @returns Index of the first set bit.
7168 * @returns -1 if no clear bit was found.
7169 * @param pvBitmap Pointer to the bitmap (little endian).
7170 * @param cBits The number of bits in the bitmap. Multiple of 32.
7171 */
7172#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7173DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
7174#else
7175DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
7176{
7177 if (cBits)
7178 {
7179 int32_t iBit;
7180# if RT_INLINE_ASM_GNU_STYLE
7181 RTCCUINTREG uEAX, uECX, uEDI;
7182 cBits = RT_ALIGN_32(cBits, 32);
7183 __asm__ __volatile__("repe; scasl\n\t"
7184 "je 1f\n\t"
7185# ifdef RT_ARCH_AMD64
7186 "lea -4(%%rdi), %%rdi\n\t"
7187 "movl (%%rdi), %%eax\n\t"
7188 "subq %5, %%rdi\n\t"
7189# else
7190 "lea -4(%%edi), %%edi\n\t"
7191 "movl (%%edi), %%eax\n\t"
7192 "subl %5, %%edi\n\t"
7193# endif
7194 "shll $3, %%edi\n\t"
7195 "bsfl %%eax, %%edx\n\t"
7196 "addl %%edi, %%edx\n\t"
7197 "1:\t\n"
7198 : "=d" (iBit)
7199 , "=&c" (uECX)
7200 , "=&D" (uEDI)
7201 , "=&a" (uEAX)
7202 : "0" (0xffffffff)
7203 , "mr" (pvBitmap)
7204 , "1" (cBits >> 5)
7205 , "2" (pvBitmap)
7206 , "3" (0)
7207 : "cc");
7208# else
7209 cBits = RT_ALIGN_32(cBits, 32);
7210 __asm
7211 {
7212# ifdef RT_ARCH_AMD64
7213 mov rdi, [pvBitmap]
7214 mov rbx, rdi
7215# else
7216 mov edi, [pvBitmap]
7217 mov ebx, edi
7218# endif
7219 mov edx, 0ffffffffh
7220 xor eax, eax
7221 mov ecx, [cBits]
7222 shr ecx, 5
7223 repe scasd
7224 je done
7225# ifdef RT_ARCH_AMD64
7226 lea rdi, [rdi - 4]
7227 mov eax, [rdi]
7228 sub rdi, rbx
7229# else
7230 lea edi, [edi - 4]
7231 mov eax, [edi]
7232 sub edi, ebx
7233# endif
7234 shl edi, 3
7235 bsf edx, eax
7236 add edx, edi
7237 done:
7238 mov [iBit], edx
7239 }
7240# endif
7241 return iBit;
7242 }
7243 return -1;
7244}
7245#endif
7246
7247
7248/**
7249 * Finds the next set bit in a bitmap.
7250 *
7251 * @returns Index of the next set bit.
7252 * @returns -1 if no set bit was found.
7253 * @param pvBitmap Pointer to the bitmap (little endian).
7254 * @param cBits The number of bits in the bitmap. Multiple of 32.
7255 * @param iBitPrev The bit returned from the last search.
7256 * The search will start at iBitPrev + 1.
7257 */
7258#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7259DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
7260#else
7261DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
7262{
7263 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
7264 int iBit = ++iBitPrev & 31;
7265 if (iBit)
7266 {
7267 /*
7268 * Inspect the 32-bit word containing the unaligned bit.
7269 */
7270 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
7271
7272# if RT_INLINE_ASM_USES_INTRIN
7273 unsigned long ulBit = 0;
7274 if (_BitScanForward(&ulBit, u32))
7275 return ulBit + iBitPrev;
7276# else
7277# if RT_INLINE_ASM_GNU_STYLE
7278 __asm__ __volatile__("bsf %1, %0\n\t"
7279 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
7280 "movl $-1, %0\n\t"
7281 "1:\n\t"
7282 : "=r" (iBit)
7283 : "r" (u32)
7284 : "cc");
7285# else
7286 __asm
7287 {
7288 mov edx, [u32]
7289 bsf eax, edx
7290 jnz done
7291 mov eax, 0ffffffffh
7292 done:
7293 mov [iBit], eax
7294 }
7295# endif
7296 if (iBit >= 0)
7297 return iBit + (int)iBitPrev;
7298# endif
7299
7300 /*
7301 * Skip ahead and see if there is anything left to search.
7302 */
7303 iBitPrev |= 31;
7304 iBitPrev++;
7305 if (cBits <= (uint32_t)iBitPrev)
7306 return -1;
7307 }
7308
7309 /*
7310 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7311 */
7312 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7313 if (iBit >= 0)
7314 iBit += iBitPrev;
7315 return iBit;
7316}
7317#endif
7318
7319
7320/**
7321 * Finds the first bit which is set in the given 32-bit integer.
7322 * Bits are numbered from 1 (least significant) to 32.
7323 *
7324 * @returns index [1..32] of the first set bit.
7325 * @returns 0 if all bits are cleared.
7326 * @param u32 Integer to search for set bits.
7327 * @remarks Similar to ffs() in BSD.
7328 */
7329#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7330RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7331#else
7332DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
7333{
7334# if RT_INLINE_ASM_USES_INTRIN
7335 unsigned long iBit;
7336 if (_BitScanForward(&iBit, u32))
7337 iBit++;
7338 else
7339 iBit = 0;
7340
7341# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7342# if RT_INLINE_ASM_GNU_STYLE
7343 uint32_t iBit;
7344 __asm__ __volatile__("bsf %1, %0\n\t"
7345 "jnz 1f\n\t"
7346 "xorl %0, %0\n\t"
7347 "jmp 2f\n"
7348 "1:\n\t"
7349 "incl %0\n"
7350 "2:\n\t"
7351 : "=r" (iBit)
7352 : "rm" (u32)
7353 : "cc");
7354# else
7355 uint32_t iBit;
7356 _asm
7357 {
7358 bsf eax, [u32]
7359 jnz found
7360 xor eax, eax
7361 jmp done
7362 found:
7363 inc eax
7364 done:
7365 mov [iBit], eax
7366 }
7367# endif
7368
7369# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7370 /*
7371 * Using the "count leading zeros (clz)" instruction here because there
7372 * is no dedicated instruction to get the first set bit.
7373 * Need to reverse the bits in the value with "rbit" first because
7374 * "clz" starts counting from the most significant bit.
7375 */
7376 uint32_t iBit;
7377 __asm__ __volatile__(
7378# if defined(RT_ARCH_ARM64)
7379 "rbit %w[uVal], %w[uVal]\n\t"
7380 "clz %w[iBit], %w[uVal]\n\t"
7381# else
7382 "rbit %[uVal], %[uVal]\n\t"
7383 "clz %[iBit], %[uVal]\n\t"
7384# endif
7385 : [uVal] "=r" (u32)
7386 , [iBit] "=r" (iBit)
7387 : "[uVal]" (u32));
7388 if (iBit != 32)
7389 iBit++;
7390 else
7391 iBit = 0; /* No bit set. */
7392
7393# else
7394# error "Port me"
7395# endif
7396 return iBit;
7397}
7398#endif
7399
7400
7401/**
7402 * Finds the first bit which is set in the given 32-bit integer.
7403 * Bits are numbered from 1 (least significant) to 32.
7404 *
7405 * @returns index [1..32] of the first set bit.
7406 * @returns 0 if all bits are cleared.
7407 * @param i32 Integer to search for set bits.
7408 * @remark Similar to ffs() in BSD.
7409 */
7410DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
7411{
7412 return ASMBitFirstSetU32((uint32_t)i32);
7413}
7414
7415
7416/**
7417 * Finds the first bit which is set in the given 64-bit integer.
7418 *
7419 * Bits are numbered from 1 (least significant) to 64.
7420 *
7421 * @returns index [1..64] of the first set bit.
7422 * @returns 0 if all bits are cleared.
7423 * @param u64 Integer to search for set bits.
7424 * @remarks Similar to ffs() in BSD.
7425 */
7426#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7427RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7428#else
7429DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
7430{
7431# if RT_INLINE_ASM_USES_INTRIN
7432 unsigned long iBit;
7433# if ARCH_BITS == 64
7434 if (_BitScanForward64(&iBit, u64))
7435 iBit++;
7436 else
7437 iBit = 0;
7438# else
7439 if (_BitScanForward(&iBit, (uint32_t)u64))
7440 iBit++;
7441 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
7442 iBit += 33;
7443 else
7444 iBit = 0;
7445# endif
7446
7447# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7448 uint64_t iBit;
7449 __asm__ __volatile__("bsfq %1, %0\n\t"
7450 "jnz 1f\n\t"
7451 "xorl %k0, %k0\n\t"
7452 "jmp 2f\n"
7453 "1:\n\t"
7454 "incl %k0\n"
7455 "2:\n\t"
7456 : "=r" (iBit)
7457 : "rm" (u64)
7458 : "cc");
7459
7460# elif defined(RT_ARCH_ARM64)
7461 uint64_t iBit;
7462 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
7463 "clz %[iBit], %[uVal]\n\t"
7464 : [uVal] "=r" (u64)
7465 , [iBit] "=r" (iBit)
7466 : "[uVal]" (u64));
7467 if (iBit != 64)
7468 iBit++;
7469 else
7470 iBit = 0; /* No bit set. */
7471
7472# else
7473 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
7474 if (!iBit)
7475 {
7476 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
7477 if (iBit)
7478 iBit += 32;
7479 }
7480# endif
7481 return (unsigned)iBit;
7482}
7483#endif
7484
7485
7486/**
7487 * Finds the first bit which is set in the given 16-bit integer.
7488 *
7489 * Bits are numbered from 1 (least significant) to 16.
7490 *
7491 * @returns index [1..16] of the first set bit.
7492 * @returns 0 if all bits are cleared.
7493 * @param u16 Integer to search for set bits.
7494 * @remarks For 16-bit bs3kit code.
7495 */
7496#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7497RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7498#else
7499DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
7500{
7501 return ASMBitFirstSetU32((uint32_t)u16);
7502}
7503#endif
7504
7505
7506/**
7507 * Finds the last bit which is set in the given 32-bit integer.
7508 * Bits are numbered from 1 (least significant) to 32.
7509 *
7510 * @returns index [1..32] of the last set bit.
7511 * @returns 0 if all bits are cleared.
7512 * @param u32 Integer to search for set bits.
7513 * @remark Similar to fls() in BSD.
7514 */
7515#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7516RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7517#else
7518DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
7519{
7520# if RT_INLINE_ASM_USES_INTRIN
7521 unsigned long iBit;
7522 if (_BitScanReverse(&iBit, u32))
7523 iBit++;
7524 else
7525 iBit = 0;
7526
7527# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7528# if RT_INLINE_ASM_GNU_STYLE
7529 uint32_t iBit;
7530 __asm__ __volatile__("bsrl %1, %0\n\t"
7531 "jnz 1f\n\t"
7532 "xorl %0, %0\n\t"
7533 "jmp 2f\n"
7534 "1:\n\t"
7535 "incl %0\n"
7536 "2:\n\t"
7537 : "=r" (iBit)
7538 : "rm" (u32)
7539 : "cc");
7540# else
7541 uint32_t iBit;
7542 _asm
7543 {
7544 bsr eax, [u32]
7545 jnz found
7546 xor eax, eax
7547 jmp done
7548 found:
7549 inc eax
7550 done:
7551 mov [iBit], eax
7552 }
7553# endif
7554
7555# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7556 uint32_t iBit;
7557 __asm__ __volatile__(
7558# if defined(RT_ARCH_ARM64)
7559 "clz %w[iBit], %w[uVal]\n\t"
7560# else
7561 "clz %[iBit], %[uVal]\n\t"
7562# endif
7563 : [iBit] "=r" (iBit)
7564 : [uVal] "r" (u32));
7565 iBit = 32 - iBit;
7566
7567# else
7568# error "Port me"
7569# endif
7570 return iBit;
7571}
7572#endif
7573
7574
7575/**
7576 * Finds the last bit which is set in the given 32-bit integer.
7577 * Bits are numbered from 1 (least significant) to 32.
7578 *
7579 * @returns index [1..32] of the last set bit.
7580 * @returns 0 if all bits are cleared.
7581 * @param i32 Integer to search for set bits.
7582 * @remark Similar to fls() in BSD.
7583 */
7584DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
7585{
7586 return ASMBitLastSetU32((uint32_t)i32);
7587}
7588
7589
7590/**
7591 * Finds the last bit which is set in the given 64-bit integer.
7592 *
7593 * Bits are numbered from 1 (least significant) to 64.
7594 *
7595 * @returns index [1..64] of the last set bit.
7596 * @returns 0 if all bits are cleared.
7597 * @param u64 Integer to search for set bits.
7598 * @remark Similar to fls() in BSD.
7599 */
7600#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7601RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7602#else
7603DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
7604{
7605# if RT_INLINE_ASM_USES_INTRIN
7606 unsigned long iBit;
7607# if ARCH_BITS == 64
7608 if (_BitScanReverse64(&iBit, u64))
7609 iBit++;
7610 else
7611 iBit = 0;
7612# else
7613 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7614 iBit += 33;
7615 else if (_BitScanReverse(&iBit, (uint32_t)u64))
7616 iBit++;
7617 else
7618 iBit = 0;
7619# endif
7620
7621# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7622 uint64_t iBit;
7623 __asm__ __volatile__("bsrq %1, %0\n\t"
7624 "jnz 1f\n\t"
7625 "xorl %k0, %k0\n\t"
7626 "jmp 2f\n"
7627 "1:\n\t"
7628 "incl %k0\n"
7629 "2:\n\t"
7630 : "=r" (iBit)
7631 : "rm" (u64)
7632 : "cc");
7633
7634# elif defined(RT_ARCH_ARM64)
7635 uint64_t iBit;
7636 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7637 : [iBit] "=r" (iBit)
7638 : [uVal] "r" (u64));
7639 iBit = 64 - iBit;
7640
7641# else
7642 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
7643 if (iBit)
7644 iBit += 32;
7645 else
7646 iBit = ASMBitLastSetU32((uint32_t)u64);
7647# endif
7648 return (unsigned)iBit;
7649}
7650#endif
7651
7652
7653/**
7654 * Finds the last bit which is set in the given 16-bit integer.
7655 *
7656 * Bits are numbered from 1 (least significant) to 16.
7657 *
7658 * @returns index [1..16] of the last set bit.
7659 * @returns 0 if all bits are cleared.
7660 * @param u16 Integer to search for set bits.
7661 * @remarks For 16-bit bs3kit code.
7662 */
7663#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7664RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7665#else
7666DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
7667{
7668 return ASMBitLastSetU32((uint32_t)u16);
7669}
7670#endif
7671
7672
7673/**
7674 * Count the number of leading zero bits in the given 32-bit integer.
7675 *
7676 * The counting starts with the most significate bit.
7677 *
7678 * @returns Number of most significant zero bits.
7679 * @returns 32 if all bits are cleared.
7680 * @param u32 Integer to consider.
7681 * @remarks Similar to __builtin_clz() in gcc, except defined zero input result.
7682 */
7683#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7684RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU32(uint32_t u32) RT_NOTHROW_PROTO;
7685#else
7686DECLINLINE(unsigned) ASMCountLeadingZerosU32(uint32_t u32) RT_NOTHROW_DEF
7687{
7688# if RT_INLINE_ASM_USES_INTRIN
7689 unsigned long iBit;
7690 if (!_BitScanReverse(&iBit, u32))
7691 return 32;
7692 return 31 - (unsigned)iBit;
7693
7694# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7695 uint32_t iBit;
7696# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) && 0 /* significantly slower on 10980xe; 929 vs 237 ps/call */
7697 __asm__ __volatile__("bsrl %1, %0\n\t"
7698 "cmovzl %2, %0\n\t"
7699 : "=&r" (iBit)
7700 : "rm" (u32)
7701 , "rm" ((int32_t)-1)
7702 : "cc");
7703# elif RT_INLINE_ASM_GNU_STYLE
7704 __asm__ __volatile__("bsr %1, %0\n\t"
7705 "jnz 1f\n\t"
7706 "mov $-1, %0\n\t"
7707 "1:\n\t"
7708 : "=r" (iBit)
7709 : "rm" (u32)
7710 : "cc");
7711# else
7712 _asm
7713 {
7714 bsr eax, [u32]
7715 jnz found
7716 mov eax, -1
7717 found:
7718 mov [iBit], eax
7719 }
7720# endif
7721 return 31 - iBit;
7722
7723# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7724 uint32_t iBit;
7725 __asm__ __volatile__(
7726# if defined(RT_ARCH_ARM64)
7727 "clz %w[iBit], %w[uVal]\n\t"
7728# else
7729 "clz %[iBit], %[uVal]\n\t"
7730# endif
7731 : [uVal] "=r" (u32)
7732 , [iBit] "=r" (iBit)
7733 : "[uVal]" (u32));
7734 return iBit;
7735
7736# elif defined(__GNUC__)
7737 AssertCompile(sizeof(u32) == sizeof(unsigned int));
7738 return u32 ? __builtin_clz(u32) : 32;
7739
7740# else
7741# error "Port me"
7742# endif
7743}
7744#endif
7745
7746
7747/**
7748 * Count the number of leading zero bits in the given 64-bit integer.
7749 *
7750 * The counting starts with the most significate bit.
7751 *
7752 * @returns Number of most significant zero bits.
7753 * @returns 64 if all bits are cleared.
7754 * @param u64 Integer to consider.
7755 * @remarks Similar to __builtin_clzl() in gcc, except defined zero input
7756 * result.
7757 */
7758#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7759RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU64(uint64_t u64) RT_NOTHROW_PROTO;
7760#else
7761DECLINLINE(unsigned) ASMCountLeadingZerosU64(uint64_t u64) RT_NOTHROW_DEF
7762{
7763# if RT_INLINE_ASM_USES_INTRIN
7764 unsigned long iBit;
7765# if ARCH_BITS == 64
7766 if (_BitScanReverse64(&iBit, u64))
7767 return 63 - (unsigned)iBit;
7768# else
7769 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7770 return 31 - (unsigned)iBit;
7771 if (_BitScanReverse(&iBit, (uint32_t)u64))
7772 return 63 - (unsigned)iBit;
7773# endif
7774 return 64;
7775
7776# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7777 uint64_t iBit;
7778# if 0 /* 10980xe benchmark: 932 ps/call - the slower variant */
7779 __asm__ __volatile__("bsrq %1, %0\n\t"
7780 "cmovzq %2, %0\n\t"
7781 : "=&r" (iBit)
7782 : "rm" (u64)
7783 , "rm" ((int64_t)-1)
7784 : "cc");
7785# else /* 10980xe benchmark: 262 ps/call */
7786 __asm__ __volatile__("bsrq %1, %0\n\t"
7787 "jnz 1f\n\t"
7788 "mov $-1, %0\n\t"
7789 "1:\n\t"
7790 : "=&r" (iBit)
7791 : "rm" (u64)
7792 : "cc");
7793# endif
7794 return 63 - (unsigned)iBit;
7795
7796# elif defined(RT_ARCH_ARM64)
7797 uint64_t iBit;
7798 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7799 : [uVal] "=r" (u64)
7800 , [iBit] "=r" (iBit)
7801 : "[uVal]" (u64));
7802 return (unsigned)iBit;
7803
7804# elif defined(__GNUC__) && ARCH_BITS == 64
7805 AssertCompile(sizeof(u64) == sizeof(unsigned long));
7806 return u64 ? __builtin_clzl(u64) : 64;
7807
7808# else
7809 unsigned iBit = ASMCountLeadingZerosU32((uint32_t)(u64 >> 32));
7810 if (iBit == 32)
7811 iBit = ASMCountLeadingZerosU32((uint32_t)u64) + 32;
7812 return iBit;
7813# endif
7814}
7815#endif
7816
7817
7818/**
7819 * Count the number of leading zero bits in the given 16-bit integer.
7820 *
7821 * The counting starts with the most significate bit.
7822 *
7823 * @returns Number of most significant zero bits.
7824 * @returns 16 if all bits are cleared.
7825 * @param u16 Integer to consider.
7826 */
7827#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7828RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountLeadingZerosU16(uint16_t u16) RT_NOTHROW_PROTO;
7829#else
7830DECLINLINE(unsigned) ASMCountLeadingZerosU16(uint16_t u16) RT_NOTHROW_DEF
7831{
7832# if RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && 0 /* slower (10980xe: 987 vs 292 ps/call) */
7833 uint16_t iBit;
7834 __asm__ __volatile__("bsrw %1, %0\n\t"
7835 "jnz 1f\n\t"
7836 "mov $-1, %0\n\t"
7837 "1:\n\t"
7838 : "=r" (iBit)
7839 : "rm" (u16)
7840 : "cc");
7841 return 15 - (int16_t)iBit;
7842# else
7843 return ASMCountLeadingZerosU32((uint32_t)u16) - 16;
7844# endif
7845}
7846#endif
7847
7848
7849/**
7850 * Count the number of trailing zero bits in the given 32-bit integer.
7851 *
7852 * The counting starts with the least significate bit, i.e. the zero bit.
7853 *
7854 * @returns Number of least significant zero bits.
7855 * @returns 32 if all bits are cleared.
7856 * @param u32 Integer to consider.
7857 * @remarks Similar to __builtin_ctz() in gcc, except defined zero input result.
7858 */
7859#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7860RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU32(uint32_t u32) RT_NOTHROW_PROTO;
7861#else
7862DECLINLINE(unsigned) ASMCountTrailingZerosU32(uint32_t u32) RT_NOTHROW_DEF
7863{
7864# if RT_INLINE_ASM_USES_INTRIN
7865 unsigned long iBit;
7866 if (!_BitScanForward(&iBit, u32))
7867 return 32;
7868 return (unsigned)iBit;
7869
7870# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7871 uint32_t iBit;
7872# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64) && 0 /* significantly slower on 10980xe; 932 vs 240 ps/call */
7873 __asm__ __volatile__("bsfl %1, %0\n\t"
7874 "cmovzl %2, %0\n\t"
7875 : "=&r" (iBit)
7876 : "rm" (u32)
7877 , "rm" ((int32_t)32)
7878 : "cc");
7879# elif RT_INLINE_ASM_GNU_STYLE
7880 __asm__ __volatile__("bsfl %1, %0\n\t"
7881 "jnz 1f\n\t"
7882 "mov $32, %0\n\t"
7883 "1:\n\t"
7884 : "=r" (iBit)
7885 : "rm" (u32)
7886 : "cc");
7887# else
7888 _asm
7889 {
7890 bsf eax, [u32]
7891 jnz found
7892 mov eax, 32
7893 found:
7894 mov [iBit], eax
7895 }
7896# endif
7897 return iBit;
7898
7899# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7900 /* Invert the bits and use clz. */
7901 uint32_t iBit;
7902 __asm__ __volatile__(
7903# if defined(RT_ARCH_ARM64)
7904 "rbit %w[uVal], %w[uVal]\n\t"
7905 "clz %w[iBit], %w[uVal]\n\t"
7906# else
7907 "rbit %[uVal], %[uVal]\n\t"
7908 "clz %[iBit], %[uVal]\n\t"
7909# endif
7910 : [uVal] "=r" (u32)
7911 , [iBit] "=r" (iBit)
7912 : "[uVal]" (u32));
7913 return iBit;
7914
7915# elif defined(__GNUC__)
7916 AssertCompile(sizeof(u32) == sizeof(unsigned int));
7917 return u32 ? __builtin_ctz(u32) : 32;
7918
7919# else
7920# error "Port me"
7921# endif
7922}
7923#endif
7924
7925
7926/**
7927 * Count the number of trailing zero bits in the given 64-bit integer.
7928 *
7929 * The counting starts with the least significate bit.
7930 *
7931 * @returns Number of least significant zero bits.
7932 * @returns 64 if all bits are cleared.
7933 * @param u64 Integer to consider.
7934 * @remarks Similar to __builtin_ctzl() in gcc, except defined zero input
7935 * result.
7936 */
7937#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7938RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU64(uint64_t u64) RT_NOTHROW_PROTO;
7939#else
7940DECLINLINE(unsigned) ASMCountTrailingZerosU64(uint64_t u64) RT_NOTHROW_DEF
7941{
7942# if RT_INLINE_ASM_USES_INTRIN
7943 unsigned long iBit;
7944# if ARCH_BITS == 64
7945 if (_BitScanForward64(&iBit, u64))
7946 return (unsigned)iBit;
7947# else
7948 if (_BitScanForward(&iBit, (uint32_t)u64))
7949 return (unsigned)iBit;
7950 if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
7951 return (unsigned)iBit + 32;
7952# endif
7953 return 64;
7954
7955# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7956 uint64_t iBit;
7957# if 0 /* 10980xe benchmark: 932 ps/call - the slower variant */
7958 __asm__ __volatile__("bsfq %1, %0\n\t"
7959 "cmovzq %2, %0\n\t"
7960 : "=&r" (iBit)
7961 : "rm" (u64)
7962 , "rm" ((int64_t)64)
7963 : "cc");
7964# else /* 10980xe benchmark: 262 ps/call */
7965 __asm__ __volatile__("bsfq %1, %0\n\t"
7966 "jnz 1f\n\t"
7967 "mov $64, %0\n\t"
7968 "1:\n\t"
7969 : "=&r" (iBit)
7970 : "rm" (u64)
7971 : "cc");
7972# endif
7973 return (unsigned)iBit;
7974
7975# elif defined(RT_ARCH_ARM64)
7976 /* Invert the bits and use clz. */
7977 uint64_t iBit;
7978 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
7979 "clz %[iBit], %[uVal]\n\t"
7980 : [uVal] "=r" (u64)
7981 , [iBit] "=r" (iBit)
7982 : "[uVal]" (u64));
7983 return (unsigned)iBit;
7984
7985# elif defined(__GNUC__) && ARCH_BITS == 64
7986 AssertCompile(sizeof(u64) == sizeof(unsigned long));
7987 return u64 ? __builtin_ctzl(u64) : 64;
7988
7989# else
7990 unsigned iBit = ASMCountTrailingZerosU32((uint32_t)u64);
7991 if (iBit == 32)
7992 iBit = ASMCountTrailingZerosU32((uint32_t)(u64 >> 32)) + 32;
7993 return iBit;
7994# endif
7995}
7996#endif
7997
7998
7999/**
8000 * Count the number of trailing zero bits in the given 16-bit integer.
8001 *
8002 * The counting starts with the most significate bit.
8003 *
8004 * @returns Number of most significant zero bits.
8005 * @returns 16 if all bits are cleared.
8006 * @param u16 Integer to consider.
8007 */
8008#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
8009RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMCountTrailingZerosU16(uint16_t u16) RT_NOTHROW_PROTO;
8010#else
8011DECLINLINE(unsigned) ASMCountTrailingZerosU16(uint16_t u16) RT_NOTHROW_DEF
8012{
8013# if RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && 0 /* slower (10980xe: 992 vs 349 ps/call) */
8014 uint16_t iBit;
8015 __asm__ __volatile__("bsfw %1, %0\n\t"
8016 "jnz 1f\n\t"
8017 "mov $16, %0\n\t"
8018 "1:\n\t"
8019 : "=r" (iBit)
8020 : "rm" (u16)
8021 : "cc");
8022 return iBit;
8023# else
8024 return ASMCountTrailingZerosU32((uint32_t)u16 | UINT32_C(0x10000));
8025#endif
8026}
8027#endif
8028
8029
8030/**
8031 * Rotate 32-bit unsigned value to the left by @a cShift.
8032 *
8033 * @returns Rotated value.
8034 * @param u32 The value to rotate.
8035 * @param cShift How many bits to rotate by.
8036 */
8037#ifdef __WATCOMC__
8038RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
8039#else
8040DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
8041{
8042# if RT_INLINE_ASM_USES_INTRIN
8043 return _rotl(u32, cShift);
8044
8045# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
8046 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
8047 return u32;
8048
8049# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
8050 __asm__ __volatile__(
8051# if defined(RT_ARCH_ARM64)
8052 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
8053# else
8054 "ror %[uRet], %[uVal], %[cShift]\n\t"
8055# endif
8056 : [uRet] "=r" (u32)
8057 : [uVal] "[uRet]" (u32)
8058 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */
8059 return u32;
8060
8061# else
8062 cShift &= 31;
8063 return (u32 << cShift) | (u32 >> (32 - cShift));
8064# endif
8065}
8066#endif
8067
8068
8069/**
8070 * Rotate 32-bit unsigned value to the right by @a cShift.
8071 *
8072 * @returns Rotated value.
8073 * @param u32 The value to rotate.
8074 * @param cShift How many bits to rotate by.
8075 */
8076#ifdef __WATCOMC__
8077RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
8078#else
8079DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
8080{
8081# if RT_INLINE_ASM_USES_INTRIN
8082 return _rotr(u32, cShift);
8083
8084# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
8085 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
8086 return u32;
8087
8088# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
8089 __asm__ __volatile__(
8090# if defined(RT_ARCH_ARM64)
8091 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
8092# else
8093 "ror %[uRet], %[uVal], %[cShift]\n\t"
8094# endif
8095 : [uRet] "=r" (u32)
8096 : [uVal] "[uRet]" (u32)
8097 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */
8098 return u32;
8099
8100# else
8101 cShift &= 31;
8102 return (u32 >> cShift) | (u32 << (32 - cShift));
8103# endif
8104}
8105#endif
8106
8107
8108/**
8109 * Rotate 64-bit unsigned value to the left by @a cShift.
8110 *
8111 * @returns Rotated value.
8112 * @param u64 The value to rotate.
8113 * @param cShift How many bits to rotate by.
8114 */
8115DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
8116{
8117#if RT_INLINE_ASM_USES_INTRIN
8118 return _rotl64(u64, cShift);
8119
8120#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
8121 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
8122 return u64;
8123
8124#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
8125 uint32_t uSpill;
8126 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
8127 "jz 1f\n\t"
8128 "xchgl %%eax, %%edx\n\t"
8129 "1:\n\t"
8130 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
8131 "jz 2f\n\t"
8132 "movl %%edx, %2\n\t" /* save the hi value in %3. */
8133 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
8134 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
8135 "2:\n\t" /* } */
8136 : "=A" (u64)
8137 , "=c" (cShift)
8138 , "=r" (uSpill)
8139 : "0" (u64)
8140 , "1" (cShift)
8141 : "cc");
8142 return u64;
8143
8144# elif defined(RT_ARCH_ARM64)
8145 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
8146 : [uRet] "=r" (u64)
8147 : [uVal] "[uRet]" (u64)
8148 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */
8149 return u64;
8150
8151#else
8152 cShift &= 63;
8153 return (u64 << cShift) | (u64 >> (64 - cShift));
8154#endif
8155}
8156
8157
8158/**
8159 * Rotate 64-bit unsigned value to the right by @a cShift.
8160 *
8161 * @returns Rotated value.
8162 * @param u64 The value to rotate.
8163 * @param cShift How many bits to rotate by.
8164 */
8165DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
8166{
8167#if RT_INLINE_ASM_USES_INTRIN
8168 return _rotr64(u64, cShift);
8169
8170#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
8171 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
8172 return u64;
8173
8174#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
8175 uint32_t uSpill;
8176 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
8177 "jz 1f\n\t"
8178 "xchgl %%eax, %%edx\n\t"
8179 "1:\n\t"
8180 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
8181 "jz 2f\n\t"
8182 "movl %%edx, %2\n\t" /* save the hi value in %3. */
8183 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
8184 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
8185 "2:\n\t" /* } */
8186 : "=A" (u64)
8187 , "=c" (cShift)
8188 , "=r" (uSpill)
8189 : "0" (u64)
8190 , "1" (cShift)
8191 : "cc");
8192 return u64;
8193
8194# elif defined(RT_ARCH_ARM64)
8195 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
8196 : [uRet] "=r" (u64)
8197 : [uVal] "[uRet]" (u64)
8198 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */
8199 return u64;
8200
8201#else
8202 cShift &= 63;
8203 return (u64 >> cShift) | (u64 << (64 - cShift));
8204#endif
8205}
8206
8207/** @} */
8208
8209
8210/** @} */
8211
8212/*
8213 * Include #pragma aux definitions for Watcom C/C++.
8214 */
8215#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
8216# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
8217# undef IPRT_INCLUDED_asm_watcom_x86_16_h
8218# include "asm-watcom-x86-16.h"
8219#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
8220# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
8221# undef IPRT_INCLUDED_asm_watcom_x86_32_h
8222# include "asm-watcom-x86-32.h"
8223#endif
8224
8225#endif /* !IPRT_INCLUDED_asm_h */
8226
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette