VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 93752

Last change on this file since 93752 was 93752, checked in by vboxsync, 3 years ago

IPRT/asm.h,tstRTInlineAsm: Added 8-bit and 16-bit extended cmpxchg functions (needed for IEM). bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 242.7 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2022 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46/* Emit the intrinsics at all optimization levels. */
47# include <iprt/sanitized/intrin.h>
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(_BitScanForward)
54# pragma intrinsic(_BitScanReverse)
55# pragma intrinsic(_bittest)
56# pragma intrinsic(_bittestandset)
57# pragma intrinsic(_bittestandreset)
58# pragma intrinsic(_bittestandcomplement)
59# pragma intrinsic(_byteswap_ushort)
60# pragma intrinsic(_byteswap_ulong)
61# pragma intrinsic(_interlockedbittestandset)
62# pragma intrinsic(_interlockedbittestandreset)
63# pragma intrinsic(_InterlockedAnd)
64# pragma intrinsic(_InterlockedOr)
65# pragma intrinsic(_InterlockedXor)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange8)
72# pragma intrinsic(_InterlockedCompareExchange16)
73# pragma intrinsic(_InterlockedCompareExchange64)
74# pragma intrinsic(_rotl)
75# pragma intrinsic(_rotr)
76# pragma intrinsic(_rotl64)
77# pragma intrinsic(_rotr64)
78# ifdef RT_ARCH_AMD64
79# pragma intrinsic(__stosq)
80# pragma intrinsic(_byteswap_uint64)
81# pragma intrinsic(_InterlockedCompareExchange128)
82# pragma intrinsic(_InterlockedExchange64)
83# pragma intrinsic(_InterlockedExchangeAdd64)
84# pragma intrinsic(_InterlockedAnd64)
85# pragma intrinsic(_InterlockedOr64)
86# pragma intrinsic(_InterlockedIncrement64)
87# pragma intrinsic(_InterlockedDecrement64)
88# endif
89#endif
90
91/*
92 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
93 */
94#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
95# include "asm-watcom-x86-16.h"
96#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
97# include "asm-watcom-x86-32.h"
98#endif
99
100
101/** @defgroup grp_rt_asm ASM - Assembly Routines
102 * @ingroup grp_rt
103 *
104 * @remarks The difference between ordered and unordered atomic operations are
105 * that the former will complete outstanding reads and writes before
106 * continuing while the latter doesn't make any promises about the
107 * order. Ordered operations doesn't, it seems, make any 100% promise
108 * wrt to whether the operation will complete before any subsequent
109 * memory access. (please, correct if wrong.)
110 *
111 * ASMAtomicSomething operations are all ordered, while
112 * ASMAtomicUoSomething are unordered (note the Uo).
113 *
114 * Please note that ordered operations does not necessarily imply a
115 * compiler (memory) barrier. The user has to use the
116 * ASMCompilerBarrier() macro when that is deemed necessary.
117 *
118 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
119 * to reorder or even optimize assembler instructions away. For
120 * instance, in the following code the second rdmsr instruction is
121 * optimized away because gcc treats that instruction as deterministic:
122 *
123 * @code
124 * static inline uint64_t rdmsr_low(int idx)
125 * {
126 * uint32_t low;
127 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
128 * }
129 * ...
130 * uint32_t msr1 = rdmsr_low(1);
131 * foo(msr1);
132 * msr1 = rdmsr_low(1);
133 * bar(msr1);
134 * @endcode
135 *
136 * The input parameter of rdmsr_low is the same for both calls and
137 * therefore gcc will use the result of the first call as input
138 * parameter for bar() as well. For rdmsr this is not acceptable as
139 * this instruction is _not_ deterministic. This applies to reading
140 * machine status information in general.
141 *
142 * @{
143 */
144
145
146/** @def RT_INLINE_ASM_GCC_4_3_X_X86
147 * Used to work around some 4.3.x register allocation issues in this version of
148 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
149 * definitely not for 5.x */
150#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
151# define RT_INLINE_ASM_GCC_4_3_X_X86 1
152#else
153# define RT_INLINE_ASM_GCC_4_3_X_X86 0
154#endif
155
156/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
157 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
158 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
159 * mode, x86.
160 *
161 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
162 * when in PIC mode on x86.
163 */
164#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
165# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
167# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
168# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
169# elif ( (defined(PIC) || defined(__PIC__)) \
170 && defined(RT_ARCH_X86) \
171 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
172 || defined(RT_OS_DARWIN)) )
173# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
174# else
175# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
176# endif
177#endif
178
179
180/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
181 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
182#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
183# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
184#else
185# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
186#endif
187
188/*
189 * ARM is great fun.
190 */
191#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
192
193# define RTASM_ARM_NO_BARRIER
194# ifdef RT_ARCH_ARM64
195# define RTASM_ARM_NO_BARRIER_IN_REG
196# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
197# define RTASM_ARM_DSB_SY "dsb sy\n\t"
198# define RTASM_ARM_DSB_SY_IN_REG
199# define RTASM_ARM_DSB_SY_COMMA_IN_REG
200# define RTASM_ARM_DMB_SY "dmb sy\n\t"
201# define RTASM_ARM_DMB_SY_IN_REG
202# define RTASM_ARM_DMB_SY_COMMA_IN_REG
203# define RTASM_ARM_DMB_ST "dmb st\n\t"
204# define RTASM_ARM_DMB_ST_IN_REG
205# define RTASM_ARM_DMB_ST_COMMA_IN_REG
206# define RTASM_ARM_DMB_LD "dmb ld\n\t"
207# define RTASM_ARM_DMB_LD_IN_REG
208# define RTASM_ARM_DMB_LD_COMMA_IN_REG
209# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
210# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
211 uint32_t rcSpill; \
212 uint32_t u32NewRet; \
213 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
214 RTASM_ARM_##barrier_type /* before lable? */ \
215 "ldaxr %w[uNew], %[pMem]\n\t" \
216 modify64 \
217 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
218 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
219 : [pMem] "+m" (*a_pu32Mem) \
220 , [uNew] "=&r" (u32NewRet) \
221 , [rc] "=&r" (rcSpill) \
222 : in_reg \
223 : "cc")
224# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
225 uint32_t rcSpill; \
226 uint32_t u32OldRet; \
227 uint32_t u32NewSpill; \
228 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
229 RTASM_ARM_##barrier_type /* before lable? */ \
230 "ldaxr %w[uOld], %[pMem]\n\t" \
231 modify64 \
232 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
233 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
234 : [pMem] "+m" (*a_pu32Mem) \
235 , [uOld] "=&r" (u32OldRet) \
236 , [uNew] "=&r" (u32NewSpill) \
237 , [rc] "=&r" (rcSpill) \
238 : in_reg \
239 : "cc")
240# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
241 uint32_t rcSpill; \
242 uint64_t u64NewRet; \
243 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
244 RTASM_ARM_##barrier_type /* before lable? */ \
245 "ldaxr %[uNew], %[pMem]\n\t" \
246 modify64 \
247 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
248 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
249 : [pMem] "+m" (*a_pu64Mem) \
250 , [uNew] "=&r" (u64NewRet) \
251 , [rc] "=&r" (rcSpill) \
252 : in_reg \
253 : "cc")
254# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
255 uint32_t rcSpill; \
256 uint64_t u64OldRet; \
257 uint64_t u64NewSpill; \
258 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
259 RTASM_ARM_##barrier_type /* before lable? */ \
260 "ldaxr %[uOld], %[pMem]\n\t" \
261 modify64 \
262 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
263 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
264 : [pMem] "+m" (*a_pu64Mem) \
265 , [uOld] "=&r" (u64OldRet) \
266 , [uNew] "=&r" (u64NewSpill) \
267 , [rc] "=&r" (rcSpill) \
268 : in_reg \
269 : "cc")
270
271# else /* RT_ARCH_ARM32 */
272# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
273# if RT_ARCH_ARM32 >= 7
274# warning armv7
275# define RTASM_ARM_NO_BARRIER_IN_REG
276# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
277# define RTASM_ARM_DSB_SY "dsb sy\n\t"
278# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
279# define RTASM_ARM_DMB_SY "dmb sy\n\t"
280# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
281# define RTASM_ARM_DMB_ST "dmb st\n\t"
282# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
283# define RTASM_ARM_DMB_LD "dmb ld\n\t"
284# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
285
286# elif RT_ARCH_ARM32 >= 6
287# warning armv6
288# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
289# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
290# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
291# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
292# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
293# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
294# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
295# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
296# elif RT_ARCH_ARM32 >= 4
297# warning armv5 or older
298# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
299# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
300# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
301# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
302# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
303# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
304# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
305# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
306# else
307# error "huh? Odd RT_ARCH_ARM32 value!"
308# endif
309# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
310# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
311# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
312# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
313# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
314 uint32_t rcSpill; \
315 uint32_t u32NewRet; \
316 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
317 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
318 "ldrex %[uNew], %[pMem]\n\t" \
319 modify32 \
320 "strex %[rc], %[uNew], %[pMem]\n\t" \
321 "cmp %[rc], #0\n\t" \
322 "bne .Ltry_again_" #name "_%=\n\t" \
323 : [pMem] "+m" (*a_pu32Mem) \
324 , [uNew] "=&r" (u32NewRet) \
325 , [rc] "=&r" (rcSpill) \
326 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
327 , in_reg \
328 : "cc")
329# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
330 uint32_t rcSpill; \
331 uint32_t u32OldRet; \
332 uint32_t u32NewSpill; \
333 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
334 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
335 "ldrex %[uOld], %[pMem]\n\t" \
336 modify32 \
337 "strex %[rc], %[uNew], %[pMem]\n\t" \
338 "cmp %[rc], #0\n\t" \
339 "bne .Ltry_again_" #name "_%=\n\t" \
340 : [pMem] "+m" (*a_pu32Mem) \
341 , [uOld] "=&r" (u32OldRet) \
342 , [uNew] "=&r" (u32NewSpill) \
343 , [rc] "=&r" (rcSpill) \
344 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
345 , in_reg \
346 : "cc")
347# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
348 uint32_t rcSpill; \
349 uint64_t u64NewRet; \
350 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
351 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
352 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
353 modify32 \
354 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
355 "cmp %[rc], #0\n\t" \
356 "bne .Ltry_again_" #name "_%=\n\t" \
357 : [pMem] "+m" (*a_pu64Mem), \
358 [uNew] "=&r" (u64NewRet), \
359 [rc] "=&r" (rcSpill) \
360 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
361 , in_reg \
362 : "cc")
363# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
364 uint32_t rcSpill; \
365 uint64_t u64OldRet; \
366 uint64_t u64NewSpill; \
367 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
368 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
369 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
370 modify32 \
371 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
372 "cmp %[rc], #0\n\t" \
373 "bne .Ltry_again_" #name "_%=\n\t" \
374 : [pMem] "+m" (*a_pu64Mem), \
375 [uOld] "=&r" (u64OldRet), \
376 [uNew] "=&r" (u64NewSpill), \
377 [rc] "=&r" (rcSpill) \
378 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
379 , in_reg \
380 : "cc")
381# endif /* RT_ARCH_ARM32 */
382#endif
383
384
385/** @def ASMReturnAddress
386 * Gets the return address of the current (or calling if you like) function or method.
387 */
388#ifdef _MSC_VER
389# ifdef __cplusplus
390extern "C"
391# endif
392void * _ReturnAddress(void);
393# pragma intrinsic(_ReturnAddress)
394# define ASMReturnAddress() _ReturnAddress()
395#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
396# define ASMReturnAddress() __builtin_return_address(0)
397#elif defined(__WATCOMC__)
398# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
399#else
400# error "Unsupported compiler."
401#endif
402
403
404/**
405 * Compiler memory barrier.
406 *
407 * Ensure that the compiler does not use any cached (register/tmp stack) memory
408 * values or any outstanding writes when returning from this function.
409 *
410 * This function must be used if non-volatile data is modified by a
411 * device or the VMM. Typical cases are port access, MMIO access,
412 * trapping instruction, etc.
413 */
414#if RT_INLINE_ASM_GNU_STYLE
415# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
416#elif RT_INLINE_ASM_USES_INTRIN
417# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
418#elif defined(__WATCOMC__)
419void ASMCompilerBarrier(void);
420#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
421DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
422{
423 __asm
424 {
425 }
426}
427#endif
428
429
430/** @def ASMBreakpoint
431 * Debugger Breakpoint.
432 * @deprecated Use RT_BREAKPOINT instead.
433 * @internal
434 */
435#define ASMBreakpoint() RT_BREAKPOINT()
436
437
438/**
439 * Spinloop hint for platforms that have these, empty function on the other
440 * platforms.
441 *
442 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
443 * spin locks.
444 */
445#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
446RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
447#else
448DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
449{
450# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
451# if RT_INLINE_ASM_GNU_STYLE
452 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
453# else
454 __asm {
455 _emit 0f3h
456 _emit 090h
457 }
458# endif
459
460# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
461 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
462
463# else
464 /* dummy */
465# endif
466}
467#endif
468
469
470/**
471 * Atomically Exchange an unsigned 8-bit value, ordered.
472 *
473 * @returns Current *pu8 value
474 * @param pu8 Pointer to the 8-bit variable to update.
475 * @param u8 The 8-bit value to assign to *pu8.
476 */
477#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
478RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
479#else
480DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
481{
482# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
483# if RT_INLINE_ASM_GNU_STYLE
484 __asm__ __volatile__("xchgb %0, %1\n\t"
485 : "=m" (*pu8)
486 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
487 : "1" (u8)
488 , "m" (*pu8));
489# else
490 __asm
491 {
492# ifdef RT_ARCH_AMD64
493 mov rdx, [pu8]
494 mov al, [u8]
495 xchg [rdx], al
496 mov [u8], al
497# else
498 mov edx, [pu8]
499 mov al, [u8]
500 xchg [edx], al
501 mov [u8], al
502# endif
503 }
504# endif
505 return u8;
506
507# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
508 uint32_t uOld;
509 uint32_t rcSpill;
510 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU8_%=:\n\t"
511 RTASM_ARM_DMB_SY
512# if defined(RT_ARCH_ARM64)
513 "ldaxrb %w[uOld], %[pMem]\n\t"
514 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
515 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU8_%=\n\t"
516# else
517 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
518 "strexb %[rc], %[uNew], %[pMem]\n\t"
519 "cmp %[rc], #0\n\t"
520 "bne .Ltry_again_ASMAtomicXchgU8_%=\n\t"
521# endif
522 : [pMem] "+m" (*pu8)
523 , [uOld] "=&r" (uOld)
524 , [rc] "=&r" (rcSpill)
525 : [uNew] "r" ((uint32_t)u8)
526 RTASM_ARM_DMB_SY_COMMA_IN_REG
527 : "cc");
528 return (uint8_t)uOld;
529
530# else
531# error "Port me"
532# endif
533}
534#endif
535
536
537/**
538 * Atomically Exchange a signed 8-bit value, ordered.
539 *
540 * @returns Current *pu8 value
541 * @param pi8 Pointer to the 8-bit variable to update.
542 * @param i8 The 8-bit value to assign to *pi8.
543 */
544DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
545{
546 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
547}
548
549
550/**
551 * Atomically Exchange a bool value, ordered.
552 *
553 * @returns Current *pf value
554 * @param pf Pointer to the 8-bit variable to update.
555 * @param f The 8-bit value to assign to *pi8.
556 */
557DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
558{
559#ifdef _MSC_VER
560 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
561#else
562 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
563#endif
564}
565
566
567/**
568 * Atomically Exchange an unsigned 16-bit value, ordered.
569 *
570 * @returns Current *pu16 value
571 * @param pu16 Pointer to the 16-bit variable to update.
572 * @param u16 The 16-bit value to assign to *pu16.
573 */
574#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
575RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
576#else
577DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
578{
579# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
580# if RT_INLINE_ASM_GNU_STYLE
581 __asm__ __volatile__("xchgw %0, %1\n\t"
582 : "=m" (*pu16)
583 , "=r" (u16)
584 : "1" (u16)
585 , "m" (*pu16));
586# else
587 __asm
588 {
589# ifdef RT_ARCH_AMD64
590 mov rdx, [pu16]
591 mov ax, [u16]
592 xchg [rdx], ax
593 mov [u16], ax
594# else
595 mov edx, [pu16]
596 mov ax, [u16]
597 xchg [edx], ax
598 mov [u16], ax
599# endif
600 }
601# endif
602 return u16;
603
604# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
605 uint32_t uOld;
606 uint32_t rcSpill;
607 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU16_%=:\n\t"
608 RTASM_ARM_DMB_SY
609# if defined(RT_ARCH_ARM64)
610 "ldaxrh %w[uOld], %[pMem]\n\t"
611 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
612 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU16_%=\n\t"
613# else
614 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
615 "strexh %[rc], %[uNew], %[pMem]\n\t"
616 "cmp %[rc], #0\n\t"
617 "bne .Ltry_again_ASMAtomicXchgU16_%=\n\t"
618# endif
619 : [pMem] "+m" (*pu16)
620 , [uOld] "=&r" (uOld)
621 , [rc] "=&r" (rcSpill)
622 : [uNew] "r" ((uint32_t)u16)
623 RTASM_ARM_DMB_SY_COMMA_IN_REG
624 : "cc");
625 return (uint16_t)uOld;
626
627# else
628# error "Port me"
629# endif
630}
631#endif
632
633
634/**
635 * Atomically Exchange a signed 16-bit value, ordered.
636 *
637 * @returns Current *pu16 value
638 * @param pi16 Pointer to the 16-bit variable to update.
639 * @param i16 The 16-bit value to assign to *pi16.
640 */
641DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
642{
643 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
644}
645
646
647/**
648 * Atomically Exchange an unsigned 32-bit value, ordered.
649 *
650 * @returns Current *pu32 value
651 * @param pu32 Pointer to the 32-bit variable to update.
652 * @param u32 The 32-bit value to assign to *pu32.
653 *
654 * @remarks Does not work on 286 and earlier.
655 */
656#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
657RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
658#else
659DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
660{
661# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
662# if RT_INLINE_ASM_GNU_STYLE
663 __asm__ __volatile__("xchgl %0, %1\n\t"
664 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
665 , "=r" (u32)
666 : "1" (u32)
667 , "m" (*pu32));
668
669# elif RT_INLINE_ASM_USES_INTRIN
670 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
671
672# else
673 __asm
674 {
675# ifdef RT_ARCH_AMD64
676 mov rdx, [pu32]
677 mov eax, u32
678 xchg [rdx], eax
679 mov [u32], eax
680# else
681 mov edx, [pu32]
682 mov eax, u32
683 xchg [edx], eax
684 mov [u32], eax
685# endif
686 }
687# endif
688 return u32;
689
690# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
691 uint32_t uOld;
692 uint32_t rcSpill;
693 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU32_%=:\n\t"
694 RTASM_ARM_DMB_SY
695# if defined(RT_ARCH_ARM64)
696 "ldaxr %w[uOld], %[pMem]\n\t"
697 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
698 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU32_%=\n\t"
699# else
700 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
701 "strex %[rc], %[uNew], %[pMem]\n\t"
702 "cmp %[rc], #0\n\t"
703 "bne .Ltry_again_ASMAtomicXchgU32_%=\n\t"
704# endif
705 : [pMem] "+m" (*pu32)
706 , [uOld] "=&r" (uOld)
707 , [rc] "=&r" (rcSpill)
708 : [uNew] "r" (u32)
709 RTASM_ARM_DMB_SY_COMMA_IN_REG
710 : "cc");
711 return uOld;
712
713# else
714# error "Port me"
715# endif
716}
717#endif
718
719
720/**
721 * Atomically Exchange a signed 32-bit value, ordered.
722 *
723 * @returns Current *pu32 value
724 * @param pi32 Pointer to the 32-bit variable to update.
725 * @param i32 The 32-bit value to assign to *pi32.
726 */
727DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
728{
729 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
730}
731
732
733/**
734 * Atomically Exchange an unsigned 64-bit value, ordered.
735 *
736 * @returns Current *pu64 value
737 * @param pu64 Pointer to the 64-bit variable to update.
738 * @param u64 The 64-bit value to assign to *pu64.
739 *
740 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
741 */
742#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
743 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
744RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
745#else
746DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
747{
748# if defined(RT_ARCH_AMD64)
749# if RT_INLINE_ASM_USES_INTRIN
750 return _InterlockedExchange64((__int64 *)pu64, u64);
751
752# elif RT_INLINE_ASM_GNU_STYLE
753 __asm__ __volatile__("xchgq %0, %1\n\t"
754 : "=m" (*pu64)
755 , "=r" (u64)
756 : "1" (u64)
757 , "m" (*pu64));
758 return u64;
759# else
760 __asm
761 {
762 mov rdx, [pu64]
763 mov rax, [u64]
764 xchg [rdx], rax
765 mov [u64], rax
766 }
767 return u64;
768# endif
769
770# elif defined(RT_ARCH_X86)
771# if RT_INLINE_ASM_GNU_STYLE
772# if defined(PIC) || defined(__PIC__)
773 uint32_t u32EBX = (uint32_t)u64;
774 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
775 "xchgl %%ebx, %3\n\t"
776 "1:\n\t"
777 "lock; cmpxchg8b (%5)\n\t"
778 "jnz 1b\n\t"
779 "movl %3, %%ebx\n\t"
780 /*"xchgl %%esi, %5\n\t"*/
781 : "=A" (u64)
782 , "=m" (*pu64)
783 : "0" (*pu64)
784 , "m" ( u32EBX )
785 , "c" ( (uint32_t)(u64 >> 32) )
786 , "S" (pu64)
787 : "cc");
788# else /* !PIC */
789 __asm__ __volatile__("1:\n\t"
790 "lock; cmpxchg8b %1\n\t"
791 "jnz 1b\n\t"
792 : "=A" (u64)
793 , "=m" (*pu64)
794 : "0" (*pu64)
795 , "b" ( (uint32_t)u64 )
796 , "c" ( (uint32_t)(u64 >> 32) )
797 : "cc");
798# endif
799# else
800 __asm
801 {
802 mov ebx, dword ptr [u64]
803 mov ecx, dword ptr [u64 + 4]
804 mov edi, pu64
805 mov eax, dword ptr [edi]
806 mov edx, dword ptr [edi + 4]
807 retry:
808 lock cmpxchg8b [edi]
809 jnz retry
810 mov dword ptr [u64], eax
811 mov dword ptr [u64 + 4], edx
812 }
813# endif
814 return u64;
815
816# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
817 uint32_t rcSpill;
818 uint64_t uOld;
819 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU64_%=:\n\t"
820 RTASM_ARM_DMB_SY
821# if defined(RT_ARCH_ARM64)
822 "ldaxr %[uOld], %[pMem]\n\t"
823 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
824 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU64_%=\n\t"
825# else
826 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
827 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
828 "cmp %[rc], #0\n\t"
829 "bne .Ltry_again_ASMAtomicXchgU64_%=\n\t"
830# endif
831 : [pMem] "+m" (*pu64)
832 , [uOld] "=&r" (uOld)
833 , [rc] "=&r" (rcSpill)
834 : [uNew] "r" (u64)
835 RTASM_ARM_DMB_SY_COMMA_IN_REG
836 : "cc");
837 return uOld;
838
839# else
840# error "Port me"
841# endif
842}
843#endif
844
845
846/**
847 * Atomically Exchange an signed 64-bit value, ordered.
848 *
849 * @returns Current *pi64 value
850 * @param pi64 Pointer to the 64-bit variable to update.
851 * @param i64 The 64-bit value to assign to *pi64.
852 */
853DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
854{
855 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
856}
857
858
859/**
860 * Atomically Exchange a size_t value, ordered.
861 *
862 * @returns Current *ppv value
863 * @param puDst Pointer to the size_t variable to update.
864 * @param uNew The new value to assign to *puDst.
865 */
866DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
867{
868#if ARCH_BITS == 16
869 AssertCompile(sizeof(size_t) == 2);
870 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
871#elif ARCH_BITS == 32
872 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
873#elif ARCH_BITS == 64
874 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
875#else
876# error "ARCH_BITS is bogus"
877#endif
878}
879
880
881/**
882 * Atomically Exchange a pointer value, ordered.
883 *
884 * @returns Current *ppv value
885 * @param ppv Pointer to the pointer variable to update.
886 * @param pv The pointer value to assign to *ppv.
887 */
888DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
889{
890#if ARCH_BITS == 32 || ARCH_BITS == 16
891 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
892#elif ARCH_BITS == 64
893 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
894#else
895# error "ARCH_BITS is bogus"
896#endif
897}
898
899
900/**
901 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
902 *
903 * @returns Current *pv value
904 * @param ppv Pointer to the pointer variable to update.
905 * @param pv The pointer value to assign to *ppv.
906 * @param Type The type of *ppv, sans volatile.
907 */
908#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
909# define ASMAtomicXchgPtrT(ppv, pv, Type) \
910 __extension__ \
911 ({\
912 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
913 Type const pvTypeChecked = (pv); \
914 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
915 pvTypeCheckedRet; \
916 })
917#else
918# define ASMAtomicXchgPtrT(ppv, pv, Type) \
919 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
920#endif
921
922
923/**
924 * Atomically Exchange a raw-mode context pointer value, ordered.
925 *
926 * @returns Current *ppv value
927 * @param ppvRC Pointer to the pointer variable to update.
928 * @param pvRC The pointer value to assign to *ppv.
929 */
930DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
931{
932 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
933}
934
935
936/**
937 * Atomically Exchange a ring-0 pointer value, ordered.
938 *
939 * @returns Current *ppv value
940 * @param ppvR0 Pointer to the pointer variable to update.
941 * @param pvR0 The pointer value to assign to *ppv.
942 */
943DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
944{
945#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
946 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
947#elif R0_ARCH_BITS == 64
948 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
949#else
950# error "R0_ARCH_BITS is bogus"
951#endif
952}
953
954
955/**
956 * Atomically Exchange a ring-3 pointer value, ordered.
957 *
958 * @returns Current *ppv value
959 * @param ppvR3 Pointer to the pointer variable to update.
960 * @param pvR3 The pointer value to assign to *ppv.
961 */
962DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
963{
964#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
965 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
966#elif R3_ARCH_BITS == 64
967 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
968#else
969# error "R3_ARCH_BITS is bogus"
970#endif
971}
972
973
974/** @def ASMAtomicXchgHandle
975 * Atomically Exchange a typical IPRT handle value, ordered.
976 *
977 * @param ph Pointer to the value to update.
978 * @param hNew The new value to assigned to *pu.
979 * @param phRes Where to store the current *ph value.
980 *
981 * @remarks This doesn't currently work for all handles (like RTFILE).
982 */
983#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
984# define ASMAtomicXchgHandle(ph, hNew, phRes) \
985 do { \
986 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
987 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
988 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
989 } while (0)
990#elif HC_ARCH_BITS == 64
991# define ASMAtomicXchgHandle(ph, hNew, phRes) \
992 do { \
993 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
994 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
995 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
996 } while (0)
997#else
998# error HC_ARCH_BITS
999#endif
1000
1001
1002/**
1003 * Atomically Exchange a value which size might differ
1004 * between platforms or compilers, ordered.
1005 *
1006 * @param pu Pointer to the variable to update.
1007 * @param uNew The value to assign to *pu.
1008 * @todo This is busted as its missing the result argument.
1009 */
1010#define ASMAtomicXchgSize(pu, uNew) \
1011 do { \
1012 switch (sizeof(*(pu))) { \
1013 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1014 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1015 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1016 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1017 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1018 } \
1019 } while (0)
1020
1021/**
1022 * Atomically Exchange a value which size might differ
1023 * between platforms or compilers, ordered.
1024 *
1025 * @param pu Pointer to the variable to update.
1026 * @param uNew The value to assign to *pu.
1027 * @param puRes Where to store the current *pu value.
1028 */
1029#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1030 do { \
1031 switch (sizeof(*(pu))) { \
1032 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1033 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1034 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1035 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1036 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1037 } \
1038 } while (0)
1039
1040
1041
1042/**
1043 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1044 *
1045 * @returns true if xchg was done.
1046 * @returns false if xchg wasn't done.
1047 *
1048 * @param pu8 Pointer to the value to update.
1049 * @param u8New The new value to assigned to *pu8.
1050 * @param u8Old The old value to *pu8 compare with.
1051 *
1052 * @remarks x86: Requires a 486 or later.
1053 * @todo Rename ASMAtomicCmpWriteU8
1054 */
1055#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1056RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1057#else
1058DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1059{
1060# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1061 uint8_t u8Ret;
1062 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1063 "setz %1\n\t"
1064 : "=m" (*pu8)
1065 , "=qm" (u8Ret)
1066 , "=a" (u8Old)
1067 : "q" (u8New)
1068 , "2" (u8Old)
1069 , "m" (*pu8)
1070 : "cc");
1071 return (bool)u8Ret;
1072
1073# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1074 union { uint32_t u; bool f; } fXchg;
1075 uint32_t u32Spill;
1076 uint32_t rcSpill;
1077 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1078 RTASM_ARM_DMB_SY
1079# if defined(RT_ARCH_ARM64)
1080 "ldaxrb %w[uOld], %[pMem]\n\t"
1081 "cmp %w[uOld], %w[uCmp]\n\t"
1082 "bne 1f\n\t" /* stop here if not equal */
1083 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1084 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1085 "mov %w[fXchg], #1\n\t"
1086# else
1087 "ldrexb %[uOld], %[pMem]\n\t"
1088 "teq %[uOld], %[uCmp]\n\t"
1089 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1090 "bne 1f\n\t" /* stop here if not equal */
1091 "cmp %[rc], #0\n\t"
1092 "bne .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1093 "mov %[fXchg], #1\n\t"
1094# endif
1095 "1:\n\t"
1096 : [pMem] "+m" (*pu8)
1097 , [uOld] "=&r" (u32Spill)
1098 , [rc] "=&r" (rcSpill)
1099 , [fXchg] "=&r" (fXchg.u)
1100 : [uCmp] "r" ((uint32_t)u8Old)
1101 , [uNew] "r" ((uint32_t)u8New)
1102 , "[fXchg]" (0)
1103 RTASM_ARM_DMB_SY_COMMA_IN_REG
1104 : "cc");
1105 return fXchg.f;
1106
1107# else
1108# error "Port me"
1109# endif
1110}
1111#endif
1112
1113
1114/**
1115 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1116 *
1117 * @returns true if xchg was done.
1118 * @returns false if xchg wasn't done.
1119 *
1120 * @param pi8 Pointer to the value to update.
1121 * @param i8New The new value to assigned to *pi8.
1122 * @param i8Old The old value to *pi8 compare with.
1123 *
1124 * @remarks x86: Requires a 486 or later.
1125 * @todo Rename ASMAtomicCmpWriteS8
1126 */
1127DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1128{
1129 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1130}
1131
1132
1133/**
1134 * Atomically Compare and Exchange a bool value, ordered.
1135 *
1136 * @returns true if xchg was done.
1137 * @returns false if xchg wasn't done.
1138 *
1139 * @param pf Pointer to the value to update.
1140 * @param fNew The new value to assigned to *pf.
1141 * @param fOld The old value to *pf compare with.
1142 *
1143 * @remarks x86: Requires a 486 or later.
1144 * @todo Rename ASMAtomicCmpWriteBool
1145 */
1146DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1147{
1148 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1149}
1150
1151
1152/**
1153 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1154 *
1155 * @returns true if xchg was done.
1156 * @returns false if xchg wasn't done.
1157 *
1158 * @param pu32 Pointer to the value to update.
1159 * @param u32New The new value to assigned to *pu32.
1160 * @param u32Old The old value to *pu32 compare with.
1161 *
1162 * @remarks x86: Requires a 486 or later.
1163 * @todo Rename ASMAtomicCmpWriteU32
1164 */
1165#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1166RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1167#else
1168DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1169{
1170# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1171# if RT_INLINE_ASM_GNU_STYLE
1172 uint8_t u8Ret;
1173 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1174 "setz %1\n\t"
1175 : "=m" (*pu32)
1176 , "=qm" (u8Ret)
1177 , "=a" (u32Old)
1178 : "r" (u32New)
1179 , "2" (u32Old)
1180 , "m" (*pu32)
1181 : "cc");
1182 return (bool)u8Ret;
1183
1184# elif RT_INLINE_ASM_USES_INTRIN
1185 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1186
1187# else
1188 uint32_t u32Ret;
1189 __asm
1190 {
1191# ifdef RT_ARCH_AMD64
1192 mov rdx, [pu32]
1193# else
1194 mov edx, [pu32]
1195# endif
1196 mov eax, [u32Old]
1197 mov ecx, [u32New]
1198# ifdef RT_ARCH_AMD64
1199 lock cmpxchg [rdx], ecx
1200# else
1201 lock cmpxchg [edx], ecx
1202# endif
1203 setz al
1204 movzx eax, al
1205 mov [u32Ret], eax
1206 }
1207 return !!u32Ret;
1208# endif
1209
1210# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1211 union { uint32_t u; bool f; } fXchg;
1212 uint32_t u32Spill;
1213 uint32_t rcSpill;
1214 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1215 RTASM_ARM_DMB_SY
1216# if defined(RT_ARCH_ARM64)
1217 "ldaxr %w[uOld], %[pMem]\n\t"
1218 "cmp %w[uOld], %w[uCmp]\n\t"
1219 "bne 1f\n\t" /* stop here if not equal */
1220 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1221 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1222 "mov %w[fXchg], #1\n\t"
1223# else
1224 "ldrex %[uOld], %[pMem]\n\t"
1225 "teq %[uOld], %[uCmp]\n\t"
1226 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1227 "bne 1f\n\t" /* stop here if not equal */
1228 "cmp %[rc], #0\n\t"
1229 "bne .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1230 "mov %[fXchg], #1\n\t"
1231# endif
1232 "1:\n\t"
1233 : [pMem] "+m" (*pu32)
1234 , [uOld] "=&r" (u32Spill)
1235 , [rc] "=&r" (rcSpill)
1236 , [fXchg] "=&r" (fXchg.u)
1237 : [uCmp] "r" (u32Old)
1238 , [uNew] "r" (u32New)
1239 , "[fXchg]" (0)
1240 RTASM_ARM_DMB_SY_COMMA_IN_REG
1241 : "cc");
1242 return fXchg.f;
1243
1244# else
1245# error "Port me"
1246# endif
1247}
1248#endif
1249
1250
1251/**
1252 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1253 *
1254 * @returns true if xchg was done.
1255 * @returns false if xchg wasn't done.
1256 *
1257 * @param pi32 Pointer to the value to update.
1258 * @param i32New The new value to assigned to *pi32.
1259 * @param i32Old The old value to *pi32 compare with.
1260 *
1261 * @remarks x86: Requires a 486 or later.
1262 * @todo Rename ASMAtomicCmpWriteS32
1263 */
1264DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1265{
1266 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1267}
1268
1269
1270/**
1271 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1272 *
1273 * @returns true if xchg was done.
1274 * @returns false if xchg wasn't done.
1275 *
1276 * @param pu64 Pointer to the 64-bit variable to update.
1277 * @param u64New The 64-bit value to assign to *pu64.
1278 * @param u64Old The value to compare with.
1279 *
1280 * @remarks x86: Requires a Pentium or later.
1281 * @todo Rename ASMAtomicCmpWriteU64
1282 */
1283#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1284 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1285RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1286#else
1287DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1288{
1289# if RT_INLINE_ASM_USES_INTRIN
1290 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1291
1292# elif defined(RT_ARCH_AMD64)
1293# if RT_INLINE_ASM_GNU_STYLE
1294 uint8_t u8Ret;
1295 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1296 "setz %1\n\t"
1297 : "=m" (*pu64)
1298 , "=qm" (u8Ret)
1299 , "=a" (u64Old)
1300 : "r" (u64New)
1301 , "2" (u64Old)
1302 , "m" (*pu64)
1303 : "cc");
1304 return (bool)u8Ret;
1305# else
1306 bool fRet;
1307 __asm
1308 {
1309 mov rdx, [pu32]
1310 mov rax, [u64Old]
1311 mov rcx, [u64New]
1312 lock cmpxchg [rdx], rcx
1313 setz al
1314 mov [fRet], al
1315 }
1316 return fRet;
1317# endif
1318
1319# elif defined(RT_ARCH_X86)
1320 uint32_t u32Ret;
1321# if RT_INLINE_ASM_GNU_STYLE
1322# if defined(PIC) || defined(__PIC__)
1323 uint32_t u32EBX = (uint32_t)u64New;
1324 uint32_t u32Spill;
1325 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1326 "lock; cmpxchg8b (%6)\n\t"
1327 "setz %%al\n\t"
1328 "movl %4, %%ebx\n\t"
1329 "movzbl %%al, %%eax\n\t"
1330 : "=a" (u32Ret)
1331 , "=d" (u32Spill)
1332# if RT_GNUC_PREREQ(4, 3)
1333 , "+m" (*pu64)
1334# else
1335 , "=m" (*pu64)
1336# endif
1337 : "A" (u64Old)
1338 , "m" ( u32EBX )
1339 , "c" ( (uint32_t)(u64New >> 32) )
1340 , "S" (pu64)
1341 : "cc");
1342# else /* !PIC */
1343 uint32_t u32Spill;
1344 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1345 "setz %%al\n\t"
1346 "movzbl %%al, %%eax\n\t"
1347 : "=a" (u32Ret)
1348 , "=d" (u32Spill)
1349 , "+m" (*pu64)
1350 : "A" (u64Old)
1351 , "b" ( (uint32_t)u64New )
1352 , "c" ( (uint32_t)(u64New >> 32) )
1353 : "cc");
1354# endif
1355 return (bool)u32Ret;
1356# else
1357 __asm
1358 {
1359 mov ebx, dword ptr [u64New]
1360 mov ecx, dword ptr [u64New + 4]
1361 mov edi, [pu64]
1362 mov eax, dword ptr [u64Old]
1363 mov edx, dword ptr [u64Old + 4]
1364 lock cmpxchg8b [edi]
1365 setz al
1366 movzx eax, al
1367 mov dword ptr [u32Ret], eax
1368 }
1369 return !!u32Ret;
1370# endif
1371
1372# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1373 union { uint32_t u; bool f; } fXchg;
1374 uint64_t u64Spill;
1375 uint32_t rcSpill;
1376 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1377 RTASM_ARM_DMB_SY
1378# if defined(RT_ARCH_ARM64)
1379 "ldaxr %[uOld], %[pMem]\n\t"
1380 "cmp %[uOld], %[uCmp]\n\t"
1381 "bne 1f\n\t" /* stop here if not equal */
1382 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1383 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1384 "mov %w[fXchg], #1\n\t"
1385# else
1386 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1387 "teq %[uOld], %[uCmp]\n\t"
1388 "teqeq %H[uOld], %H[uCmp]\n\t"
1389 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1390 "bne 1f\n\t" /* stop here if not equal */
1391 "cmp %[rc], #0\n\t"
1392 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1393 "mov %[fXchg], #1\n\t"
1394# endif
1395 "1:\n\t"
1396 : [pMem] "+m" (*pu64)
1397 , [uOld] "=&r" (u64Spill)
1398 , [rc] "=&r" (rcSpill)
1399 , [fXchg] "=&r" (fXchg.u)
1400 : [uCmp] "r" (u64Old)
1401 , [uNew] "r" (u64New)
1402 , "[fXchg]" (0)
1403 RTASM_ARM_DMB_SY_COMMA_IN_REG
1404 : "cc");
1405 return fXchg.f;
1406
1407# else
1408# error "Port me"
1409# endif
1410}
1411#endif
1412
1413
1414/**
1415 * Atomically Compare and exchange a signed 64-bit value, ordered.
1416 *
1417 * @returns true if xchg was done.
1418 * @returns false if xchg wasn't done.
1419 *
1420 * @param pi64 Pointer to the 64-bit variable to update.
1421 * @param i64 The 64-bit value to assign to *pu64.
1422 * @param i64Old The value to compare with.
1423 *
1424 * @remarks x86: Requires a Pentium or later.
1425 * @todo Rename ASMAtomicCmpWriteS64
1426 */
1427DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1428{
1429 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1430}
1431
1432#if defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)
1433
1434/** @def RTASM_HAVE_CMP_WRITE_U128
1435 * Indicates that we've got ASMAtomicCmpWriteU128() available. */
1436# define RTASM_HAVE_CMP_WRITE_U128 1
1437
1438
1439/**
1440 * Atomically compare and write an unsigned 128-bit value, ordered.
1441 *
1442 * @returns true if write was done.
1443 * @returns false if write wasn't done.
1444 *
1445 * @param pu128 Pointer to the 128-bit variable to update.
1446 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
1447 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
1448 * @param u64OldHi The high 64-bit of the value to compare with.
1449 * @param u64OldLo The low 64-bit of the value to compare with.
1450 *
1451 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1452 */
1453# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
1454DECLASM(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1455 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_PROTO;
1456# else
1457DECLINLINE(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1458 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_DEF
1459{
1460# if RT_INLINE_ASM_USES_INTRIN
1461 __int64 ai64Cmp[2];
1462 ai64Cmp[0] = u64OldLo;
1463 ai64Cmp[1] = u64OldHi;
1464 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, ai64Cmp) != 0;
1465
1466# elif defined(RT_ARCH_AMD64)
1467# if RT_INLINE_ASM_GNU_STYLE
1468 uint64_t u64Ret;
1469 uint64_t u64Spill;
1470 __asm__ __volatile__("lock; cmpxchg16b %2\n\t"
1471 "setz %%al\n\t"
1472 "movzbl %%al, %%eax\n\t"
1473 : "=a" (u64Ret)
1474 , "=d" (u64Spill)
1475 , "+m" (*pu128)
1476 : "a" (u64OldLo)
1477 , "d" (u64OldHi)
1478 , "b" (u64NewLo)
1479 , "c" (u64NewHi)
1480 : "cc");
1481
1482 return (bool)u64Ret;
1483# else
1484# error "Port me"
1485# endif
1486# else
1487# error "Port me"
1488# endif
1489}
1490# endif
1491
1492
1493/**
1494 * Atomically compare and write an unsigned 128-bit value, ordered.
1495 *
1496 * @returns true if write was done.
1497 * @returns false if write wasn't done.
1498 *
1499 * @param pu128 Pointer to the 128-bit variable to update.
1500 * @param u128New The 128-bit value to assign to *pu128.
1501 * @param u128Old The value to compare with.
1502 *
1503 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1504 */
1505DECLINLINE(bool) ASMAtomicCmpWriteU128(volatile uint128_t *pu128, const uint128_t u128New, const uint128_t u128Old) RT_NOTHROW_DEF
1506{
1507# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
1508 return ASMAtomicCmpWriteU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
1509 (uint64_t)(u128Old >> 64), (uint64_t)u128Old);
1510# else
1511 return ASMAtomicCmpWriteU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo);
1512# endif
1513}
1514
1515
1516/**
1517 * RTUINT128U wrapper for ASMAtomicCmpWriteU128.
1518 */
1519DECLINLINE(bool) ASMAtomicCmpWriteU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
1520 const RTUINT128U u128Old) RT_NOTHROW_DEF
1521{
1522 return ASMAtomicCmpWriteU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo);
1523}
1524
1525#endif /* RT_ARCH_AMD64 */
1526
1527
1528/**
1529 * Atomically Compare and Exchange a pointer value, ordered.
1530 *
1531 * @returns true if xchg was done.
1532 * @returns false if xchg wasn't done.
1533 *
1534 * @param ppv Pointer to the value to update.
1535 * @param pvNew The new value to assigned to *ppv.
1536 * @param pvOld The old value to *ppv compare with.
1537 *
1538 * @remarks x86: Requires a 486 or later.
1539 * @todo Rename ASMAtomicCmpWritePtrVoid
1540 */
1541DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1542{
1543#if ARCH_BITS == 32 || ARCH_BITS == 16
1544 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1545#elif ARCH_BITS == 64
1546 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1547#else
1548# error "ARCH_BITS is bogus"
1549#endif
1550}
1551
1552
1553/**
1554 * Atomically Compare and Exchange a pointer value, ordered.
1555 *
1556 * @returns true if xchg was done.
1557 * @returns false if xchg wasn't done.
1558 *
1559 * @param ppv Pointer to the value to update.
1560 * @param pvNew The new value to assigned to *ppv.
1561 * @param pvOld The old value to *ppv compare with.
1562 *
1563 * @remarks This is relatively type safe on GCC platforms.
1564 * @remarks x86: Requires a 486 or later.
1565 * @todo Rename ASMAtomicCmpWritePtr
1566 */
1567#ifdef __GNUC__
1568# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1569 __extension__ \
1570 ({\
1571 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1572 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1573 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1574 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1575 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1576 fMacroRet; \
1577 })
1578#else
1579# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1580 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1581#endif
1582
1583
1584/** @def ASMAtomicCmpXchgHandle
1585 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1586 *
1587 * @param ph Pointer to the value to update.
1588 * @param hNew The new value to assigned to *pu.
1589 * @param hOld The old value to *pu compare with.
1590 * @param fRc Where to store the result.
1591 *
1592 * @remarks This doesn't currently work for all handles (like RTFILE).
1593 * @remarks x86: Requires a 486 or later.
1594 * @todo Rename ASMAtomicCmpWriteHandle
1595 */
1596#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1597# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1598 do { \
1599 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1600 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1601 } while (0)
1602#elif HC_ARCH_BITS == 64
1603# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1604 do { \
1605 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1606 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1607 } while (0)
1608#else
1609# error HC_ARCH_BITS
1610#endif
1611
1612
1613/** @def ASMAtomicCmpXchgSize
1614 * Atomically Compare and Exchange a value which size might differ
1615 * between platforms or compilers, ordered.
1616 *
1617 * @param pu Pointer to the value to update.
1618 * @param uNew The new value to assigned to *pu.
1619 * @param uOld The old value to *pu compare with.
1620 * @param fRc Where to store the result.
1621 *
1622 * @remarks x86: Requires a 486 or later.
1623 * @todo Rename ASMAtomicCmpWriteSize
1624 */
1625#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1626 do { \
1627 switch (sizeof(*(pu))) { \
1628 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1629 break; \
1630 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1631 break; \
1632 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1633 (fRc) = false; \
1634 break; \
1635 } \
1636 } while (0)
1637
1638
1639/**
1640 * Atomically Compare and Exchange an unsigned 8-bit value, additionally passes
1641 * back old value, ordered.
1642 *
1643 * @returns true if xchg was done.
1644 * @returns false if xchg wasn't done.
1645 *
1646 * @param pu8 Pointer to the value to update.
1647 * @param u8New The new value to assigned to *pu32.
1648 * @param u8Old The old value to *pu8 compare with.
1649 * @param pu8Old Pointer store the old value at.
1650 *
1651 * @remarks x86: Requires a 486 or later.
1652 */
1653#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1654RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_PROTO;
1655#else
1656DECLINLINE(bool) ASMAtomicCmpXchgExU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old, uint8_t RT_FAR *pu8Old) RT_NOTHROW_DEF
1657{
1658# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1659# if RT_INLINE_ASM_GNU_STYLE
1660 uint8_t u8Ret;
1661 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1662 "setz %1\n\t"
1663 : "=m" (*pu8)
1664 , "=qm" (u8Ret)
1665 , "=a" (*pu8Old)
1666 : "r" (u8New)
1667 , "a" (u8Old)
1668 , "m" (*pu8)
1669 : "cc");
1670 return (bool)u8Ret;
1671
1672# elif RT_INLINE_ASM_USES_INTRIN
1673 return (*pu8Old = _InterlockedCompareExchange8((char RT_FAR *)pu8, u8New, u8Old)) == u8Old;
1674
1675# else
1676 uint8_t u8Ret;
1677 __asm
1678 {
1679# ifdef RT_ARCH_AMD64
1680 mov rdx, [pu8]
1681# else
1682 mov edx, [pu8]
1683# endif
1684 mov eax, [u8Old]
1685 mov ecx, [u8New]
1686# ifdef RT_ARCH_AMD64
1687 lock cmpxchg [rdx], ecx
1688 mov rdx, [pu8Old]
1689 mov [rdx], eax
1690# else
1691 lock cmpxchg [edx], ecx
1692 mov edx, [pu8Old]
1693 mov [edx], eax
1694# endif
1695 setz al
1696 movzx eax, al
1697 mov [u8Ret], eax
1698 }
1699 return !!u8Ret;
1700# endif
1701
1702# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1703 union { uint8_t u; bool f; } fXchg;
1704 uint8_t u8ActualOld;
1705 uint8_t rcSpill;
1706 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU8_%=:\n\t"
1707 RTASM_ARM_DMB_SY
1708# if defined(RT_ARCH_ARM64)
1709 "ldaxrb %w[uOld], %[pMem]\n\t"
1710 "cmp %w[uOld], %w[uCmp]\n\t"
1711 "bne 1f\n\t" /* stop here if not equal */
1712 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1713 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1714 "mov %w[fXchg], #1\n\t"
1715# else
1716 "ldrexb %[uOld], %[pMem]\n\t"
1717 "teq %[uOld], %[uCmp]\n\t"
1718 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1719 "bne 1f\n\t" /* stop here if not equal */
1720 "cmp %[rc], #0\n\t"
1721 "bne .Ltry_again_ASMAtomicCmpXchgExU8_%=\n\t"
1722 "mov %[fXchg], #1\n\t"
1723# endif
1724 "1:\n\t"
1725 : [pMem] "+m" (*pu8)
1726 , [uOld] "=&r" (u8ActualOld)
1727 , [rc] "=&r" (rcSpill)
1728 , [fXchg] "=&r" (fXchg.u)
1729 : [uCmp] "r" (u8Old)
1730 , [uNew] "r" (u8New)
1731 , "[fXchg]" (0)
1732 RTASM_ARM_DMB_SY_COMMA_IN_REG
1733 : "cc");
1734 *pu8Old = u8ActualOld;
1735 return fXchg.f;
1736
1737# else
1738# error "Port me"
1739# endif
1740}
1741#endif
1742
1743
1744/**
1745 * Atomically Compare and Exchange a signed 8-bit value, additionally
1746 * passes back old value, ordered.
1747 *
1748 * @returns true if xchg was done.
1749 * @returns false if xchg wasn't done.
1750 *
1751 * @param pi8 Pointer to the value to update.
1752 * @param i8New The new value to assigned to *pi8.
1753 * @param i8Old The old value to *pi8 compare with.
1754 * @param pi8Old Pointer store the old value at.
1755 *
1756 * @remarks x86: Requires a 486 or later.
1757 */
1758DECLINLINE(bool) ASMAtomicCmpXchgExS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old, int8_t RT_FAR *pi8Old) RT_NOTHROW_DEF
1759{
1760 return ASMAtomicCmpXchgExU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old, (uint8_t RT_FAR *)pi8Old);
1761}
1762
1763
1764/**
1765 * Atomically Compare and Exchange an unsigned 16-bit value, additionally passes
1766 * back old value, ordered.
1767 *
1768 * @returns true if xchg was done.
1769 * @returns false if xchg wasn't done.
1770 *
1771 * @param pu16 Pointer to the value to update.
1772 * @param u16New The new value to assigned to *pu16.
1773 * @param u16Old The old value to *pu32 compare with.
1774 * @param pu16Old Pointer store the old value at.
1775 *
1776 * @remarks x86: Requires a 486 or later.
1777 */
1778#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1779RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_PROTO;
1780#else
1781DECLINLINE(bool) ASMAtomicCmpXchgExU16(volatile uint16_t RT_FAR *pu16, const uint16_t u16New, const uint16_t u16Old, uint16_t RT_FAR *pu16Old) RT_NOTHROW_DEF
1782{
1783# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1784# if RT_INLINE_ASM_GNU_STYLE
1785 uint8_t u8Ret;
1786 __asm__ __volatile__("lock; cmpxchgw %3, %0\n\t"
1787 "setz %1\n\t"
1788 : "=m" (*pu16)
1789 , "=qm" (u8Ret)
1790 , "=a" (*pu16Old)
1791 : "r" (u16New)
1792 , "a" (u16Old)
1793 , "m" (*pu16)
1794 : "cc");
1795 return (bool)u8Ret;
1796
1797# elif RT_INLINE_ASM_USES_INTRIN
1798 return (*pu16Old = _InterlockedCompareExchange16((short RT_FAR *)pu16, u16New, u16Old)) == u16Old;
1799
1800# else
1801 uint16_t u16Ret;
1802 __asm
1803 {
1804# ifdef RT_ARCH_AMD64
1805 mov rdx, [pu16]
1806# else
1807 mov edx, [pu16]
1808# endif
1809 mov eax, [u16Old]
1810 mov ecx, [u16New]
1811# ifdef RT_ARCH_AMD64
1812 lock cmpxchg [rdx], ecx
1813 mov rdx, [pu16Old]
1814 mov [rdx], eax
1815# else
1816 lock cmpxchg [edx], ecx
1817 mov edx, [pu16Old]
1818 mov [edx], eax
1819# endif
1820 setz al
1821 movzx eax, al
1822 mov [u16Ret], eax
1823 }
1824 return !!u16Ret;
1825# endif
1826
1827# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1828 union { uint16_t u; bool f; } fXchg;
1829 uint16_t u16ActualOld;
1830 uint16_t rcSpill;
1831 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU16_%=:\n\t"
1832 RTASM_ARM_DMB_SY
1833# if defined(RT_ARCH_ARM64)
1834 "ldaxrh %w[uOld], %[pMem]\n\t"
1835 "cmp %w[uOld], %w[uCmp]\n\t"
1836 "bne 1f\n\t" /* stop here if not equal */
1837 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
1838 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1839 "mov %w[fXchg], #1\n\t"
1840# else
1841 "ldrexh %[uOld], %[pMem]\n\t"
1842 "teq %[uOld], %[uCmp]\n\t"
1843 "strexheq %[rc], %[uNew], %[pMem]\n\t"
1844 "bne 1f\n\t" /* stop here if not equal */
1845 "cmp %[rc], #0\n\t"
1846 "bne .Ltry_again_ASMAtomicCmpXchgExU16_%=\n\t"
1847 "mov %[fXchg], #1\n\t"
1848# endif
1849 "1:\n\t"
1850 : [pMem] "+m" (*pu16)
1851 , [uOld] "=&r" (u16ActualOld)
1852 , [rc] "=&r" (rcSpill)
1853 , [fXchg] "=&r" (fXchg.u)
1854 : [uCmp] "r" (u16Old)
1855 , [uNew] "r" (u16New)
1856 , "[fXchg]" (0)
1857 RTASM_ARM_DMB_SY_COMMA_IN_REG
1858 : "cc");
1859 *pu16Old = u16ActualOld;
1860 return fXchg.f;
1861
1862# else
1863# error "Port me"
1864# endif
1865}
1866#endif
1867
1868
1869/**
1870 * Atomically Compare and Exchange a signed 16-bit value, additionally
1871 * passes back old value, ordered.
1872 *
1873 * @returns true if xchg was done.
1874 * @returns false if xchg wasn't done.
1875 *
1876 * @param pi16 Pointer to the value to update.
1877 * @param i16New The new value to assigned to *pi16.
1878 * @param i16Old The old value to *pi16 compare with.
1879 * @param pi16Old Pointer store the old value at.
1880 *
1881 * @remarks x86: Requires a 486 or later.
1882 */
1883DECLINLINE(bool) ASMAtomicCmpXchgExS16(volatile int16_t RT_FAR *pi16, const int16_t i16New, const int16_t i16Old, int16_t RT_FAR *pi16Old) RT_NOTHROW_DEF
1884{
1885 return ASMAtomicCmpXchgExU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16New, (uint16_t)i16Old, (uint16_t RT_FAR *)pi16Old);
1886}
1887
1888
1889/**
1890 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1891 * passes back old value, ordered.
1892 *
1893 * @returns true if xchg was done.
1894 * @returns false if xchg wasn't done.
1895 *
1896 * @param pu32 Pointer to the value to update.
1897 * @param u32New The new value to assigned to *pu32.
1898 * @param u32Old The old value to *pu32 compare with.
1899 * @param pu32Old Pointer store the old value at.
1900 *
1901 * @remarks x86: Requires a 486 or later.
1902 */
1903#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1904RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1905#else
1906DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1907{
1908# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1909# if RT_INLINE_ASM_GNU_STYLE
1910 uint8_t u8Ret;
1911 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1912 "setz %1\n\t"
1913 : "=m" (*pu32)
1914 , "=qm" (u8Ret)
1915 , "=a" (*pu32Old)
1916 : "r" (u32New)
1917 , "a" (u32Old)
1918 , "m" (*pu32)
1919 : "cc");
1920 return (bool)u8Ret;
1921
1922# elif RT_INLINE_ASM_USES_INTRIN
1923 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1924
1925# else
1926 uint32_t u32Ret;
1927 __asm
1928 {
1929# ifdef RT_ARCH_AMD64
1930 mov rdx, [pu32]
1931# else
1932 mov edx, [pu32]
1933# endif
1934 mov eax, [u32Old]
1935 mov ecx, [u32New]
1936# ifdef RT_ARCH_AMD64
1937 lock cmpxchg [rdx], ecx
1938 mov rdx, [pu32Old]
1939 mov [rdx], eax
1940# else
1941 lock cmpxchg [edx], ecx
1942 mov edx, [pu32Old]
1943 mov [edx], eax
1944# endif
1945 setz al
1946 movzx eax, al
1947 mov [u32Ret], eax
1948 }
1949 return !!u32Ret;
1950# endif
1951
1952# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1953 union { uint32_t u; bool f; } fXchg;
1954 uint32_t u32ActualOld;
1955 uint32_t rcSpill;
1956 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1957 RTASM_ARM_DMB_SY
1958# if defined(RT_ARCH_ARM64)
1959 "ldaxr %w[uOld], %[pMem]\n\t"
1960 "cmp %w[uOld], %w[uCmp]\n\t"
1961 "bne 1f\n\t" /* stop here if not equal */
1962 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1963 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1964 "mov %w[fXchg], #1\n\t"
1965# else
1966 "ldrex %[uOld], %[pMem]\n\t"
1967 "teq %[uOld], %[uCmp]\n\t"
1968 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1969 "bne 1f\n\t" /* stop here if not equal */
1970 "cmp %[rc], #0\n\t"
1971 "bne .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1972 "mov %[fXchg], #1\n\t"
1973# endif
1974 "1:\n\t"
1975 : [pMem] "+m" (*pu32)
1976 , [uOld] "=&r" (u32ActualOld)
1977 , [rc] "=&r" (rcSpill)
1978 , [fXchg] "=&r" (fXchg.u)
1979 : [uCmp] "r" (u32Old)
1980 , [uNew] "r" (u32New)
1981 , "[fXchg]" (0)
1982 RTASM_ARM_DMB_SY_COMMA_IN_REG
1983 : "cc");
1984 *pu32Old = u32ActualOld;
1985 return fXchg.f;
1986
1987# else
1988# error "Port me"
1989# endif
1990}
1991#endif
1992
1993
1994/**
1995 * Atomically Compare and Exchange a signed 32-bit value, additionally
1996 * passes back old value, ordered.
1997 *
1998 * @returns true if xchg was done.
1999 * @returns false if xchg wasn't done.
2000 *
2001 * @param pi32 Pointer to the value to update.
2002 * @param i32New The new value to assigned to *pi32.
2003 * @param i32Old The old value to *pi32 compare with.
2004 * @param pi32Old Pointer store the old value at.
2005 *
2006 * @remarks x86: Requires a 486 or later.
2007 */
2008DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
2009{
2010 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
2011}
2012
2013
2014/**
2015 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2016 * passing back old value, ordered.
2017 *
2018 * @returns true if xchg was done.
2019 * @returns false if xchg wasn't done.
2020 *
2021 * @param pu64 Pointer to the 64-bit variable to update.
2022 * @param u64New The 64-bit value to assign to *pu64.
2023 * @param u64Old The value to compare with.
2024 * @param pu64Old Pointer store the old value at.
2025 *
2026 * @remarks x86: Requires a Pentium or later.
2027 */
2028#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2029 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2030RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
2031#else
2032DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
2033{
2034# if RT_INLINE_ASM_USES_INTRIN
2035 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
2036
2037# elif defined(RT_ARCH_AMD64)
2038# if RT_INLINE_ASM_GNU_STYLE
2039 uint8_t u8Ret;
2040 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2041 "setz %1\n\t"
2042 : "=m" (*pu64)
2043 , "=qm" (u8Ret)
2044 , "=a" (*pu64Old)
2045 : "r" (u64New)
2046 , "a" (u64Old)
2047 , "m" (*pu64)
2048 : "cc");
2049 return (bool)u8Ret;
2050# else
2051 bool fRet;
2052 __asm
2053 {
2054 mov rdx, [pu32]
2055 mov rax, [u64Old]
2056 mov rcx, [u64New]
2057 lock cmpxchg [rdx], rcx
2058 mov rdx, [pu64Old]
2059 mov [rdx], rax
2060 setz al
2061 mov [fRet], al
2062 }
2063 return fRet;
2064# endif
2065
2066# elif defined(RT_ARCH_X86)
2067# if RT_INLINE_ASM_GNU_STYLE
2068 uint64_t u64Ret;
2069# if defined(PIC) || defined(__PIC__)
2070 /* NB: this code uses a memory clobber description, because the clean
2071 * solution with an output value for *pu64 makes gcc run out of registers.
2072 * This will cause suboptimal code, and anyone with a better solution is
2073 * welcome to improve this. */
2074 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2075 "lock; cmpxchg8b %3\n\t"
2076 "xchgl %%ebx, %1\n\t"
2077 : "=A" (u64Ret)
2078 : "DS" ((uint32_t)u64New)
2079 , "c" ((uint32_t)(u64New >> 32))
2080 , "m" (*pu64)
2081 , "0" (u64Old)
2082 : "memory"
2083 , "cc" );
2084# else /* !PIC */
2085 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2086 : "=A" (u64Ret)
2087 , "=m" (*pu64)
2088 : "b" ((uint32_t)u64New)
2089 , "c" ((uint32_t)(u64New >> 32))
2090 , "m" (*pu64)
2091 , "0" (u64Old)
2092 : "cc");
2093# endif
2094 *pu64Old = u64Ret;
2095 return u64Ret == u64Old;
2096# else
2097 uint32_t u32Ret;
2098 __asm
2099 {
2100 mov ebx, dword ptr [u64New]
2101 mov ecx, dword ptr [u64New + 4]
2102 mov edi, [pu64]
2103 mov eax, dword ptr [u64Old]
2104 mov edx, dword ptr [u64Old + 4]
2105 lock cmpxchg8b [edi]
2106 mov ebx, [pu64Old]
2107 mov [ebx], eax
2108 setz al
2109 movzx eax, al
2110 add ebx, 4
2111 mov [ebx], edx
2112 mov dword ptr [u32Ret], eax
2113 }
2114 return !!u32Ret;
2115# endif
2116
2117# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2118 union { uint32_t u; bool f; } fXchg;
2119 uint64_t u64ActualOld;
2120 uint32_t rcSpill;
2121 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
2122 RTASM_ARM_DMB_SY
2123# if defined(RT_ARCH_ARM64)
2124 "ldaxr %[uOld], %[pMem]\n\t"
2125 "cmp %[uOld], %[uCmp]\n\t"
2126 "bne 1f\n\t" /* stop here if not equal */
2127 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
2128 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2129 "mov %w[fXchg], #1\n\t"
2130# else
2131 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
2132 "teq %[uOld], %[uCmp]\n\t"
2133 "teqeq %H[uOld], %H[uCmp]\n\t"
2134 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
2135 "bne 1f\n\t" /* stop here if not equal */
2136 "cmp %[rc], #0\n\t"
2137 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
2138 "mov %[fXchg], #1\n\t"
2139# endif
2140 "1:\n\t"
2141 : [pMem] "+m" (*pu64)
2142 , [uOld] "=&r" (u64ActualOld)
2143 , [rc] "=&r" (rcSpill)
2144 , [fXchg] "=&r" (fXchg.u)
2145 : [uCmp] "r" (u64Old)
2146 , [uNew] "r" (u64New)
2147 , "[fXchg]" (0)
2148 RTASM_ARM_DMB_SY_COMMA_IN_REG
2149 : "cc");
2150 *pu64Old = u64ActualOld;
2151 return fXchg.f;
2152
2153# else
2154# error "Port me"
2155# endif
2156}
2157#endif
2158
2159
2160/**
2161 * Atomically Compare and exchange a signed 64-bit value, additionally
2162 * passing back old value, ordered.
2163 *
2164 * @returns true if xchg was done.
2165 * @returns false if xchg wasn't done.
2166 *
2167 * @param pi64 Pointer to the 64-bit variable to update.
2168 * @param i64 The 64-bit value to assign to *pu64.
2169 * @param i64Old The value to compare with.
2170 * @param pi64Old Pointer store the old value at.
2171 *
2172 * @remarks x86: Requires a Pentium or later.
2173 */
2174DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
2175{
2176 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
2177}
2178
2179/** @def ASMAtomicCmpXchgExHandle
2180 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2181 *
2182 * @param ph Pointer to the value to update.
2183 * @param hNew The new value to assigned to *pu.
2184 * @param hOld The old value to *pu compare with.
2185 * @param fRc Where to store the result.
2186 * @param phOldVal Pointer to where to store the old value.
2187 *
2188 * @remarks This doesn't currently work for all handles (like RTFILE).
2189 */
2190#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2191# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2192 do { \
2193 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
2194 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
2195 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(ph), (uint32_t)(hNew), (uint32_t)(hOld), (uint32_t RT_FAR *)(phOldVal)); \
2196 } while (0)
2197#elif HC_ARCH_BITS == 64
2198# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2199 do { \
2200 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2201 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
2202 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(ph), (uint64_t)(hNew), (uint64_t)(hOld), (uint64_t RT_FAR *)(phOldVal)); \
2203 } while (0)
2204#else
2205# error HC_ARCH_BITS
2206#endif
2207
2208
2209/** @def ASMAtomicCmpXchgExSize
2210 * Atomically Compare and Exchange a value which size might differ
2211 * between platforms or compilers. Additionally passes back old value.
2212 *
2213 * @param pu Pointer to the value to update.
2214 * @param uNew The new value to assigned to *pu.
2215 * @param uOld The old value to *pu compare with.
2216 * @param fRc Where to store the result.
2217 * @param puOldVal Pointer to where to store the old value.
2218 *
2219 * @remarks x86: Requires a 486 or later.
2220 */
2221#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
2222 do { \
2223 switch (sizeof(*(pu))) { \
2224 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
2225 break; \
2226 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
2227 break; \
2228 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2229 (fRc) = false; \
2230 (uOldVal) = 0; \
2231 break; \
2232 } \
2233 } while (0)
2234
2235
2236/**
2237 * Atomically Compare and Exchange a pointer value, additionally
2238 * passing back old value, ordered.
2239 *
2240 * @returns true if xchg was done.
2241 * @returns false if xchg wasn't done.
2242 *
2243 * @param ppv Pointer to the value to update.
2244 * @param pvNew The new value to assigned to *ppv.
2245 * @param pvOld The old value to *ppv compare with.
2246 * @param ppvOld Pointer store the old value at.
2247 *
2248 * @remarks x86: Requires a 486 or later.
2249 */
2250DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
2251 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
2252{
2253#if ARCH_BITS == 32 || ARCH_BITS == 16
2254 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
2255#elif ARCH_BITS == 64
2256 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
2257#else
2258# error "ARCH_BITS is bogus"
2259#endif
2260}
2261
2262
2263/**
2264 * Atomically Compare and Exchange a pointer value, additionally
2265 * passing back old value, ordered.
2266 *
2267 * @returns true if xchg was done.
2268 * @returns false if xchg wasn't done.
2269 *
2270 * @param ppv Pointer to the value to update.
2271 * @param pvNew The new value to assigned to *ppv.
2272 * @param pvOld The old value to *ppv compare with.
2273 * @param ppvOld Pointer store the old value at.
2274 *
2275 * @remarks This is relatively type safe on GCC platforms.
2276 * @remarks x86: Requires a 486 or later.
2277 */
2278#ifdef __GNUC__
2279# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2280 __extension__ \
2281 ({\
2282 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2283 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
2284 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
2285 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
2286 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
2287 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
2288 (void **)ppvOldTypeChecked); \
2289 fMacroRet; \
2290 })
2291#else
2292# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2293 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
2294#endif
2295
2296
2297/**
2298 * Virtualization unfriendly serializing instruction, always exits.
2299 */
2300#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2301RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
2302#else
2303DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
2304{
2305# if RT_INLINE_ASM_GNU_STYLE
2306 RTCCUINTREG xAX = 0;
2307# ifdef RT_ARCH_AMD64
2308 __asm__ __volatile__ ("cpuid"
2309 : "=a" (xAX)
2310 : "0" (xAX)
2311 : "rbx", "rcx", "rdx", "memory");
2312# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
2313 __asm__ __volatile__ ("push %%ebx\n\t"
2314 "cpuid\n\t"
2315 "pop %%ebx\n\t"
2316 : "=a" (xAX)
2317 : "0" (xAX)
2318 : "ecx", "edx", "memory");
2319# else
2320 __asm__ __volatile__ ("cpuid"
2321 : "=a" (xAX)
2322 : "0" (xAX)
2323 : "ebx", "ecx", "edx", "memory");
2324# endif
2325
2326# elif RT_INLINE_ASM_USES_INTRIN
2327 int aInfo[4];
2328 _ReadWriteBarrier();
2329 __cpuid(aInfo, 0);
2330
2331# else
2332 __asm
2333 {
2334 push ebx
2335 xor eax, eax
2336 cpuid
2337 pop ebx
2338 }
2339# endif
2340}
2341#endif
2342
2343/**
2344 * Virtualization friendly serializing instruction, though more expensive.
2345 */
2346#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2347RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
2348#else
2349DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
2350{
2351# if RT_INLINE_ASM_GNU_STYLE
2352# ifdef RT_ARCH_AMD64
2353 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
2354 "subq $128, %%rsp\n\t" /*redzone*/
2355 "mov %%ss, %%eax\n\t"
2356 "pushq %%rax\n\t"
2357 "pushq %%r10\n\t"
2358 "pushfq\n\t"
2359 "movl %%cs, %%eax\n\t"
2360 "pushq %%rax\n\t"
2361 "leaq 1f(%%rip), %%rax\n\t"
2362 "pushq %%rax\n\t"
2363 "iretq\n\t"
2364 "1:\n\t"
2365 ::: "rax", "r10", "memory", "cc");
2366# else
2367 __asm__ __volatile__ ("pushfl\n\t"
2368 "pushl %%cs\n\t"
2369 "pushl $1f\n\t"
2370 "iretl\n\t"
2371 "1:\n\t"
2372 ::: "memory");
2373# endif
2374
2375# else
2376 __asm
2377 {
2378 pushfd
2379 push cs
2380 push la_ret
2381 iretd
2382 la_ret:
2383 }
2384# endif
2385}
2386#endif
2387
2388/**
2389 * Virtualization friendlier serializing instruction, may still cause exits.
2390 */
2391#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < RT_MSC_VER_VS2008) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2392RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2393#else
2394DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2395{
2396# if RT_INLINE_ASM_GNU_STYLE
2397 /* rdtscp is not supported by ancient linux build VM of course :-( */
2398# ifdef RT_ARCH_AMD64
2399 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2400 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2401# else
2402 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2403 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2404# endif
2405# else
2406# if RT_INLINE_ASM_USES_INTRIN >= RT_MSC_VER_VS2008
2407 uint32_t uIgnore;
2408 _ReadWriteBarrier();
2409 (void)__rdtscp(&uIgnore);
2410 (void)uIgnore;
2411# else
2412 __asm
2413 {
2414 rdtscp
2415 }
2416# endif
2417# endif
2418}
2419#endif
2420
2421
2422/**
2423 * Serialize Instruction (both data store and instruction flush).
2424 */
2425#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2426# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2427#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2428# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2429#elif defined(RT_ARCH_SPARC64)
2430RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2431#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2432DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2433{
2434 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2435}
2436#else
2437# error "Port me"
2438#endif
2439
2440
2441/**
2442 * Memory fence, waits for any pending writes and reads to complete.
2443 * @note No implicit compiler barrier (which is probably stupid).
2444 */
2445DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2446{
2447#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2448# if RT_INLINE_ASM_GNU_STYLE
2449 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2450# elif RT_INLINE_ASM_USES_INTRIN
2451 _mm_mfence();
2452# else
2453 __asm
2454 {
2455 _emit 0x0f
2456 _emit 0xae
2457 _emit 0xf0
2458 }
2459# endif
2460#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2461 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2462#elif ARCH_BITS == 16
2463 uint16_t volatile u16;
2464 ASMAtomicXchgU16(&u16, 0);
2465#else
2466 uint32_t volatile u32;
2467 ASMAtomicXchgU32(&u32, 0);
2468#endif
2469}
2470
2471
2472/**
2473 * Write fence, waits for any pending writes to complete.
2474 * @note No implicit compiler barrier (which is probably stupid).
2475 */
2476DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2477{
2478#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2479# if RT_INLINE_ASM_GNU_STYLE
2480 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2481# elif RT_INLINE_ASM_USES_INTRIN
2482 _mm_sfence();
2483# else
2484 __asm
2485 {
2486 _emit 0x0f
2487 _emit 0xae
2488 _emit 0xf8
2489 }
2490# endif
2491#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2492 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2493#else
2494 ASMMemoryFence();
2495#endif
2496}
2497
2498
2499/**
2500 * Read fence, waits for any pending reads to complete.
2501 * @note No implicit compiler barrier (which is probably stupid).
2502 */
2503DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2504{
2505#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2506# if RT_INLINE_ASM_GNU_STYLE
2507 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2508# elif RT_INLINE_ASM_USES_INTRIN
2509 _mm_lfence();
2510# else
2511 __asm
2512 {
2513 _emit 0x0f
2514 _emit 0xae
2515 _emit 0xe8
2516 }
2517# endif
2518#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2519 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2520#else
2521 ASMMemoryFence();
2522#endif
2523}
2524
2525
2526/**
2527 * Atomically reads an unsigned 8-bit value, ordered.
2528 *
2529 * @returns Current *pu8 value
2530 * @param pu8 Pointer to the 8-bit variable to read.
2531 */
2532DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2533{
2534#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2535 uint32_t u32;
2536 __asm__ __volatile__(".Lstart_ASMAtomicReadU8_%=:\n\t"
2537 RTASM_ARM_DMB_SY
2538# if defined(RT_ARCH_ARM64)
2539 "ldxrb %w[uDst], %[pMem]\n\t"
2540# else
2541 "ldrexb %[uDst], %[pMem]\n\t"
2542# endif
2543 : [uDst] "=&r" (u32)
2544 : [pMem] "m" (*pu8)
2545 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2546 return (uint8_t)u32;
2547#else
2548 ASMMemoryFence();
2549 return *pu8; /* byte reads are atomic on x86 */
2550#endif
2551}
2552
2553
2554/**
2555 * Atomically reads an unsigned 8-bit value, unordered.
2556 *
2557 * @returns Current *pu8 value
2558 * @param pu8 Pointer to the 8-bit variable to read.
2559 */
2560DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2561{
2562#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2563 uint32_t u32;
2564 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU8_%=:\n\t"
2565# if defined(RT_ARCH_ARM64)
2566 "ldxrb %w[uDst], %[pMem]\n\t"
2567# else
2568 "ldrexb %[uDst], %[pMem]\n\t"
2569# endif
2570 : [uDst] "=&r" (u32)
2571 : [pMem] "m" (*pu8));
2572 return (uint8_t)u32;
2573#else
2574 return *pu8; /* byte reads are atomic on x86 */
2575#endif
2576}
2577
2578
2579/**
2580 * Atomically reads a signed 8-bit value, ordered.
2581 *
2582 * @returns Current *pi8 value
2583 * @param pi8 Pointer to the 8-bit variable to read.
2584 */
2585DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2586{
2587 ASMMemoryFence();
2588#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2589 int32_t i32;
2590 __asm__ __volatile__(".Lstart_ASMAtomicReadS8_%=:\n\t"
2591 RTASM_ARM_DMB_SY
2592# if defined(RT_ARCH_ARM64)
2593 "ldxrb %w[iDst], %[pMem]\n\t"
2594# else
2595 "ldrexb %[iDst], %[pMem]\n\t"
2596# endif
2597 : [iDst] "=&r" (i32)
2598 : [pMem] "m" (*pi8)
2599 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2600 return (int8_t)i32;
2601#else
2602 return *pi8; /* byte reads are atomic on x86 */
2603#endif
2604}
2605
2606
2607/**
2608 * Atomically reads a signed 8-bit value, unordered.
2609 *
2610 * @returns Current *pi8 value
2611 * @param pi8 Pointer to the 8-bit variable to read.
2612 */
2613DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2614{
2615#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2616 int32_t i32;
2617 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS8_%=:\n\t"
2618# if defined(RT_ARCH_ARM64)
2619 "ldxrb %w[iDst], %[pMem]\n\t"
2620# else
2621 "ldrexb %[iDst], %[pMem]\n\t"
2622# endif
2623 : [iDst] "=&r" (i32)
2624 : [pMem] "m" (*pi8));
2625 return (int8_t)i32;
2626#else
2627 return *pi8; /* byte reads are atomic on x86 */
2628#endif
2629}
2630
2631
2632/**
2633 * Atomically reads an unsigned 16-bit value, ordered.
2634 *
2635 * @returns Current *pu16 value
2636 * @param pu16 Pointer to the 16-bit variable to read.
2637 */
2638DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2639{
2640 Assert(!((uintptr_t)pu16 & 1));
2641#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2642 uint32_t u32;
2643 __asm__ __volatile__(".Lstart_ASMAtomicReadU16_%=:\n\t"
2644 RTASM_ARM_DMB_SY
2645# if defined(RT_ARCH_ARM64)
2646 "ldxrh %w[uDst], %[pMem]\n\t"
2647# else
2648 "ldrexh %[uDst], %[pMem]\n\t"
2649# endif
2650 : [uDst] "=&r" (u32)
2651 : [pMem] "m" (*pu16)
2652 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2653 return (uint16_t)u32;
2654#else
2655 ASMMemoryFence();
2656 return *pu16;
2657#endif
2658}
2659
2660
2661/**
2662 * Atomically reads an unsigned 16-bit value, unordered.
2663 *
2664 * @returns Current *pu16 value
2665 * @param pu16 Pointer to the 16-bit variable to read.
2666 */
2667DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2668{
2669 Assert(!((uintptr_t)pu16 & 1));
2670#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2671 uint32_t u32;
2672 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU16_%=:\n\t"
2673# if defined(RT_ARCH_ARM64)
2674 "ldxrh %w[uDst], %[pMem]\n\t"
2675# else
2676 "ldrexh %[uDst], %[pMem]\n\t"
2677# endif
2678 : [uDst] "=&r" (u32)
2679 : [pMem] "m" (*pu16));
2680 return (uint16_t)u32;
2681#else
2682 return *pu16;
2683#endif
2684}
2685
2686
2687/**
2688 * Atomically reads a signed 16-bit value, ordered.
2689 *
2690 * @returns Current *pi16 value
2691 * @param pi16 Pointer to the 16-bit variable to read.
2692 */
2693DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2694{
2695 Assert(!((uintptr_t)pi16 & 1));
2696#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2697 int32_t i32;
2698 __asm__ __volatile__(".Lstart_ASMAtomicReadS16_%=:\n\t"
2699 RTASM_ARM_DMB_SY
2700# if defined(RT_ARCH_ARM64)
2701 "ldxrh %w[iDst], %[pMem]\n\t"
2702# else
2703 "ldrexh %[iDst], %[pMem]\n\t"
2704# endif
2705 : [iDst] "=&r" (i32)
2706 : [pMem] "m" (*pi16)
2707 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2708 return (int16_t)i32;
2709#else
2710 ASMMemoryFence();
2711 return *pi16;
2712#endif
2713}
2714
2715
2716/**
2717 * Atomically reads a signed 16-bit value, unordered.
2718 *
2719 * @returns Current *pi16 value
2720 * @param pi16 Pointer to the 16-bit variable to read.
2721 */
2722DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2723{
2724 Assert(!((uintptr_t)pi16 & 1));
2725#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2726 int32_t i32;
2727 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS16_%=:\n\t"
2728# if defined(RT_ARCH_ARM64)
2729 "ldxrh %w[iDst], %[pMem]\n\t"
2730# else
2731 "ldrexh %[iDst], %[pMem]\n\t"
2732# endif
2733 : [iDst] "=&r" (i32)
2734 : [pMem] "m" (*pi16));
2735 return (int16_t)i32;
2736#else
2737 return *pi16;
2738#endif
2739}
2740
2741
2742/**
2743 * Atomically reads an unsigned 32-bit value, ordered.
2744 *
2745 * @returns Current *pu32 value
2746 * @param pu32 Pointer to the 32-bit variable to read.
2747 */
2748DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2749{
2750 Assert(!((uintptr_t)pu32 & 3));
2751#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2752 uint32_t u32;
2753 __asm__ __volatile__(".Lstart_ASMAtomicReadU32_%=:\n\t"
2754 RTASM_ARM_DMB_SY
2755# if defined(RT_ARCH_ARM64)
2756 "ldxr %w[uDst], %[pMem]\n\t"
2757# else
2758 "ldrex %[uDst], %[pMem]\n\t"
2759# endif
2760 : [uDst] "=&r" (u32)
2761 : [pMem] "m" (*pu32)
2762 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2763 return u32;
2764#else
2765 ASMMemoryFence();
2766# if ARCH_BITS == 16
2767 AssertFailed(); /** @todo 16-bit */
2768# endif
2769 return *pu32;
2770#endif
2771}
2772
2773
2774/**
2775 * Atomically reads an unsigned 32-bit value, unordered.
2776 *
2777 * @returns Current *pu32 value
2778 * @param pu32 Pointer to the 32-bit variable to read.
2779 */
2780DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2781{
2782 Assert(!((uintptr_t)pu32 & 3));
2783#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2784 uint32_t u32;
2785 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU32_%=:\n\t"
2786# if defined(RT_ARCH_ARM64)
2787 "ldxr %w[uDst], %[pMem]\n\t"
2788# else
2789 "ldrex %[uDst], %[pMem]\n\t"
2790# endif
2791 : [uDst] "=&r" (u32)
2792 : [pMem] "m" (*pu32));
2793 return u32;
2794#else
2795# if ARCH_BITS == 16
2796 AssertFailed(); /** @todo 16-bit */
2797# endif
2798 return *pu32;
2799#endif
2800}
2801
2802
2803/**
2804 * Atomically reads a signed 32-bit value, ordered.
2805 *
2806 * @returns Current *pi32 value
2807 * @param pi32 Pointer to the 32-bit variable to read.
2808 */
2809DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2810{
2811 Assert(!((uintptr_t)pi32 & 3));
2812#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2813 int32_t i32;
2814 __asm__ __volatile__(".Lstart_ASMAtomicReadS32_%=:\n\t"
2815 RTASM_ARM_DMB_SY
2816# if defined(RT_ARCH_ARM64)
2817 "ldxr %w[iDst], %[pMem]\n\t"
2818# else
2819 "ldrex %[iDst], %[pMem]\n\t"
2820# endif
2821 : [iDst] "=&r" (i32)
2822 : [pMem] "m" (*pi32)
2823 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2824 return i32;
2825#else
2826 ASMMemoryFence();
2827# if ARCH_BITS == 16
2828 AssertFailed(); /** @todo 16-bit */
2829# endif
2830 return *pi32;
2831#endif
2832}
2833
2834
2835/**
2836 * Atomically reads a signed 32-bit value, unordered.
2837 *
2838 * @returns Current *pi32 value
2839 * @param pi32 Pointer to the 32-bit variable to read.
2840 */
2841DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2842{
2843 Assert(!((uintptr_t)pi32 & 3));
2844#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2845 int32_t i32;
2846 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS32_%=:\n\t"
2847# if defined(RT_ARCH_ARM64)
2848 "ldxr %w[iDst], %[pMem]\n\t"
2849# else
2850 "ldrex %[iDst], %[pMem]\n\t"
2851# endif
2852 : [iDst] "=&r" (i32)
2853 : [pMem] "m" (*pi32));
2854 return i32;
2855
2856#else
2857# if ARCH_BITS == 16
2858 AssertFailed(); /** @todo 16-bit */
2859# endif
2860 return *pi32;
2861#endif
2862}
2863
2864
2865/**
2866 * Atomically reads an unsigned 64-bit value, ordered.
2867 *
2868 * @returns Current *pu64 value
2869 * @param pu64 Pointer to the 64-bit variable to read.
2870 * The memory pointed to must be writable.
2871 *
2872 * @remarks This may fault if the memory is read-only!
2873 * @remarks x86: Requires a Pentium or later.
2874 */
2875#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
2876 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2877RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2878#else
2879DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2880{
2881 uint64_t u64;
2882# ifdef RT_ARCH_AMD64
2883 Assert(!((uintptr_t)pu64 & 7));
2884/*# if RT_INLINE_ASM_GNU_STYLE
2885 __asm__ __volatile__( "mfence\n\t"
2886 "movq %1, %0\n\t"
2887 : "=r" (u64)
2888 : "m" (*pu64));
2889# else
2890 __asm
2891 {
2892 mfence
2893 mov rdx, [pu64]
2894 mov rax, [rdx]
2895 mov [u64], rax
2896 }
2897# endif*/
2898 ASMMemoryFence();
2899 u64 = *pu64;
2900
2901# elif defined(RT_ARCH_X86)
2902# if RT_INLINE_ASM_GNU_STYLE
2903# if defined(PIC) || defined(__PIC__)
2904 uint32_t u32EBX = 0;
2905 Assert(!((uintptr_t)pu64 & 7));
2906 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2907 "lock; cmpxchg8b (%5)\n\t"
2908 "movl %3, %%ebx\n\t"
2909 : "=A" (u64)
2910# if RT_GNUC_PREREQ(4, 3)
2911 , "+m" (*pu64)
2912# else
2913 , "=m" (*pu64)
2914# endif
2915 : "0" (0ULL)
2916 , "m" (u32EBX)
2917 , "c" (0)
2918 , "S" (pu64)
2919 : "cc");
2920# else /* !PIC */
2921 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2922 : "=A" (u64)
2923 , "+m" (*pu64)
2924 : "0" (0ULL)
2925 , "b" (0)
2926 , "c" (0)
2927 : "cc");
2928# endif
2929# else
2930 Assert(!((uintptr_t)pu64 & 7));
2931 __asm
2932 {
2933 xor eax, eax
2934 xor edx, edx
2935 mov edi, pu64
2936 xor ecx, ecx
2937 xor ebx, ebx
2938 lock cmpxchg8b [edi]
2939 mov dword ptr [u64], eax
2940 mov dword ptr [u64 + 4], edx
2941 }
2942# endif
2943
2944# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2945 Assert(!((uintptr_t)pu64 & 7));
2946 __asm__ __volatile__(".Lstart_ASMAtomicReadU64_%=:\n\t"
2947 RTASM_ARM_DMB_SY
2948# if defined(RT_ARCH_ARM64)
2949 "ldxr %[uDst], %[pMem]\n\t"
2950# else
2951 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2952# endif
2953 : [uDst] "=&r" (u64)
2954 : [pMem] "m" (*pu64)
2955 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2956
2957# else
2958# error "Port me"
2959# endif
2960 return u64;
2961}
2962#endif
2963
2964
2965/**
2966 * Atomically reads an unsigned 64-bit value, unordered.
2967 *
2968 * @returns Current *pu64 value
2969 * @param pu64 Pointer to the 64-bit variable to read.
2970 * The memory pointed to must be writable.
2971 *
2972 * @remarks This may fault if the memory is read-only!
2973 * @remarks x86: Requires a Pentium or later.
2974 */
2975#if !defined(RT_ARCH_AMD64) \
2976 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2977 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
2978RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2979#else
2980DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2981{
2982 uint64_t u64;
2983# ifdef RT_ARCH_AMD64
2984 Assert(!((uintptr_t)pu64 & 7));
2985/*# if RT_INLINE_ASM_GNU_STYLE
2986 Assert(!((uintptr_t)pu64 & 7));
2987 __asm__ __volatile__("movq %1, %0\n\t"
2988 : "=r" (u64)
2989 : "m" (*pu64));
2990# else
2991 __asm
2992 {
2993 mov rdx, [pu64]
2994 mov rax, [rdx]
2995 mov [u64], rax
2996 }
2997# endif */
2998 u64 = *pu64;
2999
3000# elif defined(RT_ARCH_X86)
3001# if RT_INLINE_ASM_GNU_STYLE
3002# if defined(PIC) || defined(__PIC__)
3003 uint32_t u32EBX = 0;
3004 uint32_t u32Spill;
3005 Assert(!((uintptr_t)pu64 & 7));
3006 __asm__ __volatile__("xor %%eax,%%eax\n\t"
3007 "xor %%ecx,%%ecx\n\t"
3008 "xor %%edx,%%edx\n\t"
3009 "xchgl %%ebx, %3\n\t"
3010 "lock; cmpxchg8b (%4)\n\t"
3011 "movl %3, %%ebx\n\t"
3012 : "=A" (u64)
3013# if RT_GNUC_PREREQ(4, 3)
3014 , "+m" (*pu64)
3015# else
3016 , "=m" (*pu64)
3017# endif
3018 , "=c" (u32Spill)
3019 : "m" (u32EBX)
3020 , "S" (pu64)
3021 : "cc");
3022# else /* !PIC */
3023 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3024 : "=A" (u64)
3025 , "+m" (*pu64)
3026 : "0" (0ULL)
3027 , "b" (0)
3028 , "c" (0)
3029 : "cc");
3030# endif
3031# else
3032 Assert(!((uintptr_t)pu64 & 7));
3033 __asm
3034 {
3035 xor eax, eax
3036 xor edx, edx
3037 mov edi, pu64
3038 xor ecx, ecx
3039 xor ebx, ebx
3040 lock cmpxchg8b [edi]
3041 mov dword ptr [u64], eax
3042 mov dword ptr [u64 + 4], edx
3043 }
3044# endif
3045
3046# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3047 Assert(!((uintptr_t)pu64 & 7));
3048 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU64_%=:\n\t"
3049# if defined(RT_ARCH_ARM64)
3050 "ldxr %[uDst], %[pMem]\n\t"
3051# else
3052 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
3053# endif
3054 : [uDst] "=&r" (u64)
3055 : [pMem] "m" (*pu64));
3056
3057# else
3058# error "Port me"
3059# endif
3060 return u64;
3061}
3062#endif
3063
3064
3065/**
3066 * Atomically reads a signed 64-bit value, ordered.
3067 *
3068 * @returns Current *pi64 value
3069 * @param pi64 Pointer to the 64-bit variable to read.
3070 * The memory pointed to must be writable.
3071 *
3072 * @remarks This may fault if the memory is read-only!
3073 * @remarks x86: Requires a Pentium or later.
3074 */
3075DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3076{
3077 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
3078}
3079
3080
3081/**
3082 * Atomically reads a signed 64-bit value, unordered.
3083 *
3084 * @returns Current *pi64 value
3085 * @param pi64 Pointer to the 64-bit variable to read.
3086 * The memory pointed to must be writable.
3087 *
3088 * @remarks This will fault if the memory is read-only!
3089 * @remarks x86: Requires a Pentium or later.
3090 */
3091DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
3092{
3093 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
3094}
3095
3096
3097/**
3098 * Atomically reads a size_t value, ordered.
3099 *
3100 * @returns Current *pcb value
3101 * @param pcb Pointer to the size_t variable to read.
3102 */
3103DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3104{
3105#if ARCH_BITS == 64
3106 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
3107#elif ARCH_BITS == 32
3108 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
3109#elif ARCH_BITS == 16
3110 AssertCompileSize(size_t, 2);
3111 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
3112#else
3113# error "Unsupported ARCH_BITS value"
3114#endif
3115}
3116
3117
3118/**
3119 * Atomically reads a size_t value, unordered.
3120 *
3121 * @returns Current *pcb value
3122 * @param pcb Pointer to the size_t variable to read.
3123 */
3124DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3125{
3126#if ARCH_BITS == 64 || ARCH_BITS == 16
3127 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
3128#elif ARCH_BITS == 32
3129 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
3130#elif ARCH_BITS == 16
3131 AssertCompileSize(size_t, 2);
3132 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
3133#else
3134# error "Unsupported ARCH_BITS value"
3135#endif
3136}
3137
3138
3139/**
3140 * Atomically reads a pointer value, ordered.
3141 *
3142 * @returns Current *pv value
3143 * @param ppv Pointer to the pointer variable to read.
3144 *
3145 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
3146 * requires less typing (no casts).
3147 */
3148DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3149{
3150#if ARCH_BITS == 32 || ARCH_BITS == 16
3151 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3152#elif ARCH_BITS == 64
3153 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3154#else
3155# error "ARCH_BITS is bogus"
3156#endif
3157}
3158
3159/**
3160 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
3161 *
3162 * @returns Current *pv value
3163 * @param ppv Pointer to the pointer variable to read.
3164 * @param Type The type of *ppv, sans volatile.
3165 */
3166#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3167# define ASMAtomicReadPtrT(ppv, Type) \
3168 __extension__ \
3169 ({\
3170 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
3171 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
3172 pvTypeChecked; \
3173 })
3174#else
3175# define ASMAtomicReadPtrT(ppv, Type) \
3176 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3177#endif
3178
3179
3180/**
3181 * Atomically reads a pointer value, unordered.
3182 *
3183 * @returns Current *pv value
3184 * @param ppv Pointer to the pointer variable to read.
3185 *
3186 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
3187 * requires less typing (no casts).
3188 */
3189DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
3190{
3191#if ARCH_BITS == 32 || ARCH_BITS == 16
3192 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
3193#elif ARCH_BITS == 64
3194 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
3195#else
3196# error "ARCH_BITS is bogus"
3197#endif
3198}
3199
3200
3201/**
3202 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
3203 *
3204 * @returns Current *pv value
3205 * @param ppv Pointer to the pointer variable to read.
3206 * @param Type The type of *ppv, sans volatile.
3207 */
3208#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
3209# define ASMAtomicUoReadPtrT(ppv, Type) \
3210 __extension__ \
3211 ({\
3212 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3213 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
3214 pvTypeChecked; \
3215 })
3216#else
3217# define ASMAtomicUoReadPtrT(ppv, Type) \
3218 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
3219#endif
3220
3221
3222/**
3223 * Atomically reads a boolean value, ordered.
3224 *
3225 * @returns Current *pf value
3226 * @param pf Pointer to the boolean variable to read.
3227 */
3228DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3229{
3230 ASMMemoryFence();
3231 return *pf; /* byte reads are atomic on x86 */
3232}
3233
3234
3235/**
3236 * Atomically reads a boolean value, unordered.
3237 *
3238 * @returns Current *pf value
3239 * @param pf Pointer to the boolean variable to read.
3240 */
3241DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
3242{
3243 return *pf; /* byte reads are atomic on x86 */
3244}
3245
3246
3247/**
3248 * Atomically read a typical IPRT handle value, ordered.
3249 *
3250 * @param ph Pointer to the handle variable to read.
3251 * @param phRes Where to store the result.
3252 *
3253 * @remarks This doesn't currently work for all handles (like RTFILE).
3254 */
3255#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3256# define ASMAtomicReadHandle(ph, phRes) \
3257 do { \
3258 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3259 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3260 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
3261 } while (0)
3262#elif HC_ARCH_BITS == 64
3263# define ASMAtomicReadHandle(ph, phRes) \
3264 do { \
3265 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3266 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3267 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
3268 } while (0)
3269#else
3270# error HC_ARCH_BITS
3271#endif
3272
3273
3274/**
3275 * Atomically read a typical IPRT handle value, unordered.
3276 *
3277 * @param ph Pointer to the handle variable to read.
3278 * @param phRes Where to store the result.
3279 *
3280 * @remarks This doesn't currently work for all handles (like RTFILE).
3281 */
3282#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3283# define ASMAtomicUoReadHandle(ph, phRes) \
3284 do { \
3285 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3286 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3287 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
3288 } while (0)
3289#elif HC_ARCH_BITS == 64
3290# define ASMAtomicUoReadHandle(ph, phRes) \
3291 do { \
3292 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3293 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3294 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
3295 } while (0)
3296#else
3297# error HC_ARCH_BITS
3298#endif
3299
3300
3301/**
3302 * Atomically read a value which size might differ
3303 * between platforms or compilers, ordered.
3304 *
3305 * @param pu Pointer to the variable to read.
3306 * @param puRes Where to store the result.
3307 */
3308#define ASMAtomicReadSize(pu, puRes) \
3309 do { \
3310 switch (sizeof(*(pu))) { \
3311 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3312 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3313 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3314 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3315 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3316 } \
3317 } while (0)
3318
3319
3320/**
3321 * Atomically read a value which size might differ
3322 * between platforms or compilers, unordered.
3323 *
3324 * @param pu Pointer to the variable to read.
3325 * @param puRes Where to store the result.
3326 */
3327#define ASMAtomicUoReadSize(pu, puRes) \
3328 do { \
3329 switch (sizeof(*(pu))) { \
3330 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3331 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3332 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3333 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3334 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3335 } \
3336 } while (0)
3337
3338
3339/**
3340 * Atomically writes an unsigned 8-bit value, ordered.
3341 *
3342 * @param pu8 Pointer to the 8-bit variable.
3343 * @param u8 The 8-bit value to assign to *pu8.
3344 */
3345DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3346{
3347 /** @todo Any possible ARM32/ARM64 optimizations here? */
3348 ASMAtomicXchgU8(pu8, u8);
3349}
3350
3351
3352/**
3353 * Atomically writes an unsigned 8-bit value, unordered.
3354 *
3355 * @param pu8 Pointer to the 8-bit variable.
3356 * @param u8 The 8-bit value to assign to *pu8.
3357 */
3358DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3359{
3360 /** @todo Any possible ARM32/ARM64 improvements here? */
3361 *pu8 = u8; /* byte writes are atomic on x86 */
3362}
3363
3364
3365/**
3366 * Atomically writes a signed 8-bit value, ordered.
3367 *
3368 * @param pi8 Pointer to the 8-bit variable to read.
3369 * @param i8 The 8-bit value to assign to *pi8.
3370 */
3371DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3372{
3373 /** @todo Any possible ARM32/ARM64 optimizations here? */
3374 ASMAtomicXchgS8(pi8, i8);
3375}
3376
3377
3378/**
3379 * Atomically writes a signed 8-bit value, unordered.
3380 *
3381 * @param pi8 Pointer to the 8-bit variable to write.
3382 * @param i8 The 8-bit value to assign to *pi8.
3383 */
3384DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3385{
3386 *pi8 = i8; /* byte writes are atomic on x86 */
3387}
3388
3389
3390/**
3391 * Atomically writes an unsigned 16-bit value, ordered.
3392 *
3393 * @param pu16 Pointer to the 16-bit variable to write.
3394 * @param u16 The 16-bit value to assign to *pu16.
3395 */
3396DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3397{
3398 /** @todo Any possible ARM32/ARM64 optimizations here? */
3399 ASMAtomicXchgU16(pu16, u16);
3400}
3401
3402
3403/**
3404 * Atomically writes an unsigned 16-bit value, unordered.
3405 *
3406 * @param pu16 Pointer to the 16-bit variable to write.
3407 * @param u16 The 16-bit value to assign to *pu16.
3408 */
3409DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3410{
3411 Assert(!((uintptr_t)pu16 & 1));
3412 *pu16 = u16;
3413}
3414
3415
3416/**
3417 * Atomically writes a signed 16-bit value, ordered.
3418 *
3419 * @param pi16 Pointer to the 16-bit variable to write.
3420 * @param i16 The 16-bit value to assign to *pi16.
3421 */
3422DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3423{
3424 /** @todo Any possible ARM32/ARM64 optimizations here? */
3425 ASMAtomicXchgS16(pi16, i16);
3426}
3427
3428
3429/**
3430 * Atomically writes a signed 16-bit value, unordered.
3431 *
3432 * @param pi16 Pointer to the 16-bit variable to write.
3433 * @param i16 The 16-bit value to assign to *pi16.
3434 */
3435DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3436{
3437 Assert(!((uintptr_t)pi16 & 1));
3438 *pi16 = i16;
3439}
3440
3441
3442/**
3443 * Atomically writes an unsigned 32-bit value, ordered.
3444 *
3445 * @param pu32 Pointer to the 32-bit variable to write.
3446 * @param u32 The 32-bit value to assign to *pu32.
3447 */
3448DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3449{
3450 /** @todo Any possible ARM32/ARM64 optimizations here? */
3451 ASMAtomicXchgU32(pu32, u32);
3452}
3453
3454
3455/**
3456 * Atomically writes an unsigned 32-bit value, unordered.
3457 *
3458 * @param pu32 Pointer to the 32-bit variable to write.
3459 * @param u32 The 32-bit value to assign to *pu32.
3460 */
3461DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3462{
3463 Assert(!((uintptr_t)pu32 & 3));
3464#if ARCH_BITS >= 32
3465 *pu32 = u32;
3466#else
3467 ASMAtomicXchgU32(pu32, u32);
3468#endif
3469}
3470
3471
3472/**
3473 * Atomically writes a signed 32-bit value, ordered.
3474 *
3475 * @param pi32 Pointer to the 32-bit variable to write.
3476 * @param i32 The 32-bit value to assign to *pi32.
3477 */
3478DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3479{
3480 ASMAtomicXchgS32(pi32, i32);
3481}
3482
3483
3484/**
3485 * Atomically writes a signed 32-bit value, unordered.
3486 *
3487 * @param pi32 Pointer to the 32-bit variable to write.
3488 * @param i32 The 32-bit value to assign to *pi32.
3489 */
3490DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3491{
3492 Assert(!((uintptr_t)pi32 & 3));
3493#if ARCH_BITS >= 32
3494 *pi32 = i32;
3495#else
3496 ASMAtomicXchgS32(pi32, i32);
3497#endif
3498}
3499
3500
3501/**
3502 * Atomically writes an unsigned 64-bit value, ordered.
3503 *
3504 * @param pu64 Pointer to the 64-bit variable to write.
3505 * @param u64 The 64-bit value to assign to *pu64.
3506 */
3507DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3508{
3509 /** @todo Any possible ARM32/ARM64 optimizations here? */
3510 ASMAtomicXchgU64(pu64, u64);
3511}
3512
3513
3514/**
3515 * Atomically writes an unsigned 64-bit value, unordered.
3516 *
3517 * @param pu64 Pointer to the 64-bit variable to write.
3518 * @param u64 The 64-bit value to assign to *pu64.
3519 */
3520DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3521{
3522 Assert(!((uintptr_t)pu64 & 7));
3523#if ARCH_BITS == 64
3524 *pu64 = u64;
3525#else
3526 ASMAtomicXchgU64(pu64, u64);
3527#endif
3528}
3529
3530
3531/**
3532 * Atomically writes a signed 64-bit value, ordered.
3533 *
3534 * @param pi64 Pointer to the 64-bit variable to write.
3535 * @param i64 The 64-bit value to assign to *pi64.
3536 */
3537DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3538{
3539 /** @todo Any possible ARM32/ARM64 optimizations here? */
3540 ASMAtomicXchgS64(pi64, i64);
3541}
3542
3543
3544/**
3545 * Atomically writes a signed 64-bit value, unordered.
3546 *
3547 * @param pi64 Pointer to the 64-bit variable to write.
3548 * @param i64 The 64-bit value to assign to *pi64.
3549 */
3550DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3551{
3552 Assert(!((uintptr_t)pi64 & 7));
3553#if ARCH_BITS == 64
3554 *pi64 = i64;
3555#else
3556 ASMAtomicXchgS64(pi64, i64);
3557#endif
3558}
3559
3560
3561/**
3562 * Atomically writes a size_t value, ordered.
3563 *
3564 * @returns nothing.
3565 * @param pcb Pointer to the size_t variable to write.
3566 * @param cb The value to assign to *pcb.
3567 */
3568DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3569{
3570#if ARCH_BITS == 64
3571 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3572#elif ARCH_BITS == 32
3573 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3574#elif ARCH_BITS == 16
3575 AssertCompileSize(size_t, 2);
3576 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3577#else
3578# error "Unsupported ARCH_BITS value"
3579#endif
3580}
3581
3582
3583/**
3584 * Atomically writes a size_t value, unordered.
3585 *
3586 * @returns nothing.
3587 * @param pcb Pointer to the size_t variable to write.
3588 * @param cb The value to assign to *pcb.
3589 */
3590DECLINLINE(void) ASMAtomicUoWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3591{
3592#if ARCH_BITS == 64
3593 ASMAtomicUoWriteU64((uint64_t volatile *)pcb, cb);
3594#elif ARCH_BITS == 32
3595 ASMAtomicUoWriteU32((uint32_t volatile *)pcb, cb);
3596#elif ARCH_BITS == 16
3597 AssertCompileSize(size_t, 2);
3598 ASMAtomicUoWriteU16((uint16_t volatile *)pcb, cb);
3599#else
3600# error "Unsupported ARCH_BITS value"
3601#endif
3602}
3603
3604
3605/**
3606 * Atomically writes a boolean value, unordered.
3607 *
3608 * @param pf Pointer to the boolean variable to write.
3609 * @param f The boolean value to assign to *pf.
3610 */
3611DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3612{
3613 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3614}
3615
3616
3617/**
3618 * Atomically writes a boolean value, unordered.
3619 *
3620 * @param pf Pointer to the boolean variable to write.
3621 * @param f The boolean value to assign to *pf.
3622 */
3623DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3624{
3625 *pf = f; /* byte writes are atomic on x86 */
3626}
3627
3628
3629/**
3630 * Atomically writes a pointer value, ordered.
3631 *
3632 * @param ppv Pointer to the pointer variable to write.
3633 * @param pv The pointer value to assign to *ppv.
3634 */
3635DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3636{
3637#if ARCH_BITS == 32 || ARCH_BITS == 16
3638 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3639#elif ARCH_BITS == 64
3640 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3641#else
3642# error "ARCH_BITS is bogus"
3643#endif
3644}
3645
3646
3647/**
3648 * Atomically writes a pointer value, unordered.
3649 *
3650 * @param ppv Pointer to the pointer variable to write.
3651 * @param pv The pointer value to assign to *ppv.
3652 */
3653DECLINLINE(void) ASMAtomicUoWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3654{
3655#if ARCH_BITS == 32 || ARCH_BITS == 16
3656 ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3657#elif ARCH_BITS == 64
3658 ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3659#else
3660# error "ARCH_BITS is bogus"
3661#endif
3662}
3663
3664
3665/**
3666 * Atomically writes a pointer value, ordered.
3667 *
3668 * @param ppv Pointer to the pointer variable to write.
3669 * @param pv The pointer value to assign to *ppv. If NULL use
3670 * ASMAtomicWriteNullPtr or you'll land in trouble.
3671 *
3672 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3673 * NULL.
3674 */
3675#ifdef __GNUC__
3676# define ASMAtomicWritePtr(ppv, pv) \
3677 do \
3678 { \
3679 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3680 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3681 \
3682 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3683 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3684 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3685 \
3686 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3687 } while (0)
3688#else
3689# define ASMAtomicWritePtr(ppv, pv) \
3690 do \
3691 { \
3692 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3693 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3694 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3695 \
3696 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3697 } while (0)
3698#endif
3699
3700
3701/**
3702 * Atomically sets a pointer to NULL, ordered.
3703 *
3704 * @param ppv Pointer to the pointer variable that should be set to NULL.
3705 *
3706 * @remarks This is relatively type safe on GCC platforms.
3707 */
3708#if RT_GNUC_PREREQ(4, 2)
3709# define ASMAtomicWriteNullPtr(ppv) \
3710 do \
3711 { \
3712 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3713 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3714 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3715 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3716 } while (0)
3717#else
3718# define ASMAtomicWriteNullPtr(ppv) \
3719 do \
3720 { \
3721 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3722 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3723 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3724 } while (0)
3725#endif
3726
3727
3728/**
3729 * Atomically writes a pointer value, unordered.
3730 *
3731 * @returns Current *pv value
3732 * @param ppv Pointer to the pointer variable.
3733 * @param pv The pointer value to assign to *ppv. If NULL use
3734 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3735 *
3736 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3737 * NULL.
3738 */
3739#if RT_GNUC_PREREQ(4, 2)
3740# define ASMAtomicUoWritePtr(ppv, pv) \
3741 do \
3742 { \
3743 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3744 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3745 \
3746 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3747 AssertCompile(sizeof(pv) == sizeof(void *)); \
3748 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3749 \
3750 *(ppvTypeChecked) = pvTypeChecked; \
3751 } while (0)
3752#else
3753# define ASMAtomicUoWritePtr(ppv, pv) \
3754 do \
3755 { \
3756 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3757 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3758 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3759 *(ppv) = pv; \
3760 } while (0)
3761#endif
3762
3763
3764/**
3765 * Atomically sets a pointer to NULL, unordered.
3766 *
3767 * @param ppv Pointer to the pointer variable that should be set to NULL.
3768 *
3769 * @remarks This is relatively type safe on GCC platforms.
3770 */
3771#ifdef __GNUC__
3772# define ASMAtomicUoWriteNullPtr(ppv) \
3773 do \
3774 { \
3775 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3776 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3777 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3778 *(ppvTypeChecked) = NULL; \
3779 } while (0)
3780#else
3781# define ASMAtomicUoWriteNullPtr(ppv) \
3782 do \
3783 { \
3784 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3785 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3786 *(ppv) = NULL; \
3787 } while (0)
3788#endif
3789
3790
3791/**
3792 * Atomically write a typical IPRT handle value, ordered.
3793 *
3794 * @param ph Pointer to the variable to update.
3795 * @param hNew The value to assign to *ph.
3796 *
3797 * @remarks This doesn't currently work for all handles (like RTFILE).
3798 */
3799#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3800# define ASMAtomicWriteHandle(ph, hNew) \
3801 do { \
3802 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3803 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3804 } while (0)
3805#elif HC_ARCH_BITS == 64
3806# define ASMAtomicWriteHandle(ph, hNew) \
3807 do { \
3808 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3809 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3810 } while (0)
3811#else
3812# error HC_ARCH_BITS
3813#endif
3814
3815
3816/**
3817 * Atomically write a typical IPRT handle value, unordered.
3818 *
3819 * @param ph Pointer to the variable to update.
3820 * @param hNew The value to assign to *ph.
3821 *
3822 * @remarks This doesn't currently work for all handles (like RTFILE).
3823 */
3824#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3825# define ASMAtomicUoWriteHandle(ph, hNew) \
3826 do { \
3827 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3828 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3829 } while (0)
3830#elif HC_ARCH_BITS == 64
3831# define ASMAtomicUoWriteHandle(ph, hNew) \
3832 do { \
3833 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3834 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
3835 } while (0)
3836#else
3837# error HC_ARCH_BITS
3838#endif
3839
3840
3841/**
3842 * Atomically write a value which size might differ
3843 * between platforms or compilers, ordered.
3844 *
3845 * @param pu Pointer to the variable to update.
3846 * @param uNew The value to assign to *pu.
3847 */
3848#define ASMAtomicWriteSize(pu, uNew) \
3849 do { \
3850 switch (sizeof(*(pu))) { \
3851 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3852 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3853 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3854 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3855 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3856 } \
3857 } while (0)
3858
3859/**
3860 * Atomically write a value which size might differ
3861 * between platforms or compilers, unordered.
3862 *
3863 * @param pu Pointer to the variable to update.
3864 * @param uNew The value to assign to *pu.
3865 */
3866#define ASMAtomicUoWriteSize(pu, uNew) \
3867 do { \
3868 switch (sizeof(*(pu))) { \
3869 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3870 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3871 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3872 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3873 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3874 } \
3875 } while (0)
3876
3877
3878
3879/**
3880 * Atomically exchanges and adds to a 16-bit value, ordered.
3881 *
3882 * @returns The old value.
3883 * @param pu16 Pointer to the value.
3884 * @param u16 Number to add.
3885 *
3886 * @remarks Currently not implemented, just to make 16-bit code happy.
3887 * @remarks x86: Requires a 486 or later.
3888 */
3889RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
3890
3891
3892/**
3893 * Atomically exchanges and adds to a 32-bit value, ordered.
3894 *
3895 * @returns The old value.
3896 * @param pu32 Pointer to the value.
3897 * @param u32 Number to add.
3898 *
3899 * @remarks x86: Requires a 486 or later.
3900 */
3901#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3902RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3903#else
3904DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3905{
3906# if RT_INLINE_ASM_USES_INTRIN
3907 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
3908 return u32;
3909
3910# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3911# if RT_INLINE_ASM_GNU_STYLE
3912 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3913 : "=r" (u32)
3914 , "=m" (*pu32)
3915 : "0" (u32)
3916 , "m" (*pu32)
3917 : "memory"
3918 , "cc");
3919 return u32;
3920# else
3921 __asm
3922 {
3923 mov eax, [u32]
3924# ifdef RT_ARCH_AMD64
3925 mov rdx, [pu32]
3926 lock xadd [rdx], eax
3927# else
3928 mov edx, [pu32]
3929 lock xadd [edx], eax
3930# endif
3931 mov [u32], eax
3932 }
3933 return u32;
3934# endif
3935
3936# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3937 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
3938 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
3939 "add %[uNew], %[uOld], %[uVal]\n\t",
3940 [uVal] "r" (u32));
3941 return u32OldRet;
3942
3943# else
3944# error "Port me"
3945# endif
3946}
3947#endif
3948
3949
3950/**
3951 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3952 *
3953 * @returns The old value.
3954 * @param pi32 Pointer to the value.
3955 * @param i32 Number to add.
3956 *
3957 * @remarks x86: Requires a 486 or later.
3958 */
3959DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3960{
3961 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3962}
3963
3964
3965/**
3966 * Atomically exchanges and adds to a 64-bit value, ordered.
3967 *
3968 * @returns The old value.
3969 * @param pu64 Pointer to the value.
3970 * @param u64 Number to add.
3971 *
3972 * @remarks x86: Requires a Pentium or later.
3973 */
3974#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3975DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3976#else
3977DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3978{
3979# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3980 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
3981 return u64;
3982
3983# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3984 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3985 : "=r" (u64)
3986 , "=m" (*pu64)
3987 : "0" (u64)
3988 , "m" (*pu64)
3989 : "memory"
3990 , "cc");
3991 return u64;
3992
3993# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3994 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
3995 "add %[uNew], %[uOld], %[uVal]\n\t"
3996 ,
3997 "add %[uNew], %[uOld], %[uVal]\n\t"
3998 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
3999 [uVal] "r" (u64));
4000 return u64OldRet;
4001
4002# else
4003 uint64_t u64Old;
4004 for (;;)
4005 {
4006 uint64_t u64New;
4007 u64Old = ASMAtomicUoReadU64(pu64);
4008 u64New = u64Old + u64;
4009 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4010 break;
4011 ASMNopPause();
4012 }
4013 return u64Old;
4014# endif
4015}
4016#endif
4017
4018
4019/**
4020 * Atomically exchanges and adds to a signed 64-bit value, ordered.
4021 *
4022 * @returns The old value.
4023 * @param pi64 Pointer to the value.
4024 * @param i64 Number to add.
4025 *
4026 * @remarks x86: Requires a Pentium or later.
4027 */
4028DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4029{
4030 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4031}
4032
4033
4034/**
4035 * Atomically exchanges and adds to a size_t value, ordered.
4036 *
4037 * @returns The old value.
4038 * @param pcb Pointer to the size_t value.
4039 * @param cb Number to add.
4040 */
4041DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4042{
4043#if ARCH_BITS == 64
4044 AssertCompileSize(size_t, 8);
4045 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
4046#elif ARCH_BITS == 32
4047 AssertCompileSize(size_t, 4);
4048 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
4049#elif ARCH_BITS == 16
4050 AssertCompileSize(size_t, 2);
4051 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
4052#else
4053# error "Unsupported ARCH_BITS value"
4054#endif
4055}
4056
4057
4058/**
4059 * Atomically exchanges and adds a value which size might differ between
4060 * platforms or compilers, ordered.
4061 *
4062 * @param pu Pointer to the variable to update.
4063 * @param uNew The value to add to *pu.
4064 * @param puOld Where to store the old value.
4065 */
4066#define ASMAtomicAddSize(pu, uNew, puOld) \
4067 do { \
4068 switch (sizeof(*(pu))) { \
4069 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4070 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4071 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
4072 } \
4073 } while (0)
4074
4075
4076
4077/**
4078 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
4079 *
4080 * @returns The old value.
4081 * @param pu16 Pointer to the value.
4082 * @param u16 Number to subtract.
4083 *
4084 * @remarks x86: Requires a 486 or later.
4085 */
4086DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
4087{
4088 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
4089}
4090
4091
4092/**
4093 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
4094 *
4095 * @returns The old value.
4096 * @param pi16 Pointer to the value.
4097 * @param i16 Number to subtract.
4098 *
4099 * @remarks x86: Requires a 486 or later.
4100 */
4101DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
4102{
4103 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
4104}
4105
4106
4107/**
4108 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
4109 *
4110 * @returns The old value.
4111 * @param pu32 Pointer to the value.
4112 * @param u32 Number to subtract.
4113 *
4114 * @remarks x86: Requires a 486 or later.
4115 */
4116DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4117{
4118 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
4119}
4120
4121
4122/**
4123 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
4124 *
4125 * @returns The old value.
4126 * @param pi32 Pointer to the value.
4127 * @param i32 Number to subtract.
4128 *
4129 * @remarks x86: Requires a 486 or later.
4130 */
4131DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4132{
4133 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
4134}
4135
4136
4137/**
4138 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
4139 *
4140 * @returns The old value.
4141 * @param pu64 Pointer to the value.
4142 * @param u64 Number to subtract.
4143 *
4144 * @remarks x86: Requires a Pentium or later.
4145 */
4146DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4147{
4148 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
4149}
4150
4151
4152/**
4153 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
4154 *
4155 * @returns The old value.
4156 * @param pi64 Pointer to the value.
4157 * @param i64 Number to subtract.
4158 *
4159 * @remarks x86: Requires a Pentium or later.
4160 */
4161DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4162{
4163 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
4164}
4165
4166
4167/**
4168 * Atomically exchanges and subtracts to a size_t value, ordered.
4169 *
4170 * @returns The old value.
4171 * @param pcb Pointer to the size_t value.
4172 * @param cb Number to subtract.
4173 *
4174 * @remarks x86: Requires a 486 or later.
4175 */
4176DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
4177{
4178#if ARCH_BITS == 64
4179 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
4180#elif ARCH_BITS == 32
4181 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
4182#elif ARCH_BITS == 16
4183 AssertCompileSize(size_t, 2);
4184 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
4185#else
4186# error "Unsupported ARCH_BITS value"
4187#endif
4188}
4189
4190
4191/**
4192 * Atomically exchanges and subtracts a value which size might differ between
4193 * platforms or compilers, ordered.
4194 *
4195 * @param pu Pointer to the variable to update.
4196 * @param uNew The value to subtract to *pu.
4197 * @param puOld Where to store the old value.
4198 *
4199 * @remarks x86: Requires a 486 or later.
4200 */
4201#define ASMAtomicSubSize(pu, uNew, puOld) \
4202 do { \
4203 switch (sizeof(*(pu))) { \
4204 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
4205 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
4206 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
4207 } \
4208 } while (0)
4209
4210
4211
4212/**
4213 * Atomically increment a 16-bit value, ordered.
4214 *
4215 * @returns The new value.
4216 * @param pu16 Pointer to the value to increment.
4217 * @remarks Not implemented. Just to make 16-bit code happy.
4218 *
4219 * @remarks x86: Requires a 486 or later.
4220 */
4221RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4222
4223
4224/**
4225 * Atomically increment a 32-bit value, ordered.
4226 *
4227 * @returns The new value.
4228 * @param pu32 Pointer to the value to increment.
4229 *
4230 * @remarks x86: Requires a 486 or later.
4231 */
4232#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4233RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4234#else
4235DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4236{
4237# if RT_INLINE_ASM_USES_INTRIN
4238 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
4239
4240# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4241# if RT_INLINE_ASM_GNU_STYLE
4242 uint32_t u32;
4243 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4244 : "=r" (u32)
4245 , "=m" (*pu32)
4246 : "0" (1)
4247 , "m" (*pu32)
4248 : "memory"
4249 , "cc");
4250 return u32+1;
4251# else
4252 __asm
4253 {
4254 mov eax, 1
4255# ifdef RT_ARCH_AMD64
4256 mov rdx, [pu32]
4257 lock xadd [rdx], eax
4258# else
4259 mov edx, [pu32]
4260 lock xadd [edx], eax
4261# endif
4262 mov u32, eax
4263 }
4264 return u32+1;
4265# endif
4266
4267# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4268 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
4269 "add %w[uNew], %w[uNew], #1\n\t",
4270 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4271 "X" (0) /* dummy */);
4272 return u32NewRet;
4273
4274# else
4275 return ASMAtomicAddU32(pu32, 1) + 1;
4276# endif
4277}
4278#endif
4279
4280
4281/**
4282 * Atomically increment a signed 32-bit value, ordered.
4283 *
4284 * @returns The new value.
4285 * @param pi32 Pointer to the value to increment.
4286 *
4287 * @remarks x86: Requires a 486 or later.
4288 */
4289DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4290{
4291 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
4292}
4293
4294
4295/**
4296 * Atomically increment a 64-bit value, ordered.
4297 *
4298 * @returns The new value.
4299 * @param pu64 Pointer to the value to increment.
4300 *
4301 * @remarks x86: Requires a Pentium or later.
4302 */
4303#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4304DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4305#else
4306DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4307{
4308# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4309 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
4310
4311# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4312 uint64_t u64;
4313 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4314 : "=r" (u64)
4315 , "=m" (*pu64)
4316 : "0" (1)
4317 , "m" (*pu64)
4318 : "memory"
4319 , "cc");
4320 return u64 + 1;
4321
4322# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4323 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
4324 "add %[uNew], %[uNew], #1\n\t"
4325 ,
4326 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4327 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4328 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4329 return u64NewRet;
4330
4331# else
4332 return ASMAtomicAddU64(pu64, 1) + 1;
4333# endif
4334}
4335#endif
4336
4337
4338/**
4339 * Atomically increment a signed 64-bit value, ordered.
4340 *
4341 * @returns The new value.
4342 * @param pi64 Pointer to the value to increment.
4343 *
4344 * @remarks x86: Requires a Pentium or later.
4345 */
4346DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4347{
4348 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
4349}
4350
4351
4352/**
4353 * Atomically increment a size_t value, ordered.
4354 *
4355 * @returns The new value.
4356 * @param pcb Pointer to the value to increment.
4357 *
4358 * @remarks x86: Requires a 486 or later.
4359 */
4360DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4361{
4362#if ARCH_BITS == 64
4363 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
4364#elif ARCH_BITS == 32
4365 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
4366#elif ARCH_BITS == 16
4367 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
4368#else
4369# error "Unsupported ARCH_BITS value"
4370#endif
4371}
4372
4373
4374
4375/**
4376 * Atomically decrement an unsigned 32-bit value, ordered.
4377 *
4378 * @returns The new value.
4379 * @param pu16 Pointer to the value to decrement.
4380 * @remarks Not implemented. Just to make 16-bit code happy.
4381 *
4382 * @remarks x86: Requires a 486 or later.
4383 */
4384RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4385
4386
4387/**
4388 * Atomically decrement an unsigned 32-bit value, ordered.
4389 *
4390 * @returns The new value.
4391 * @param pu32 Pointer to the value to decrement.
4392 *
4393 * @remarks x86: Requires a 486 or later.
4394 */
4395#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4396RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4397#else
4398DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4399{
4400# if RT_INLINE_ASM_USES_INTRIN
4401 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4402
4403# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4404# if RT_INLINE_ASM_GNU_STYLE
4405 uint32_t u32;
4406 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4407 : "=r" (u32)
4408 , "=m" (*pu32)
4409 : "0" (-1)
4410 , "m" (*pu32)
4411 : "memory"
4412 , "cc");
4413 return u32-1;
4414# else
4415 uint32_t u32;
4416 __asm
4417 {
4418 mov eax, -1
4419# ifdef RT_ARCH_AMD64
4420 mov rdx, [pu32]
4421 lock xadd [rdx], eax
4422# else
4423 mov edx, [pu32]
4424 lock xadd [edx], eax
4425# endif
4426 mov u32, eax
4427 }
4428 return u32-1;
4429# endif
4430
4431# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4432 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4433 "sub %w[uNew], %w[uNew], #1\n\t",
4434 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4435 "X" (0) /* dummy */);
4436 return u32NewRet;
4437
4438# else
4439 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4440# endif
4441}
4442#endif
4443
4444
4445/**
4446 * Atomically decrement a signed 32-bit value, ordered.
4447 *
4448 * @returns The new value.
4449 * @param pi32 Pointer to the value to decrement.
4450 *
4451 * @remarks x86: Requires a 486 or later.
4452 */
4453DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4454{
4455 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4456}
4457
4458
4459/**
4460 * Atomically decrement an unsigned 64-bit value, ordered.
4461 *
4462 * @returns The new value.
4463 * @param pu64 Pointer to the value to decrement.
4464 *
4465 * @remarks x86: Requires a Pentium or later.
4466 */
4467#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4468RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4469#else
4470DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4471{
4472# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4473 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4474
4475# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4476 uint64_t u64;
4477 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4478 : "=r" (u64)
4479 , "=m" (*pu64)
4480 : "0" (~(uint64_t)0)
4481 , "m" (*pu64)
4482 : "memory"
4483 , "cc");
4484 return u64-1;
4485
4486# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4487 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4488 "sub %[uNew], %[uNew], #1\n\t"
4489 ,
4490 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4491 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4492 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4493 return u64NewRet;
4494
4495# else
4496 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4497# endif
4498}
4499#endif
4500
4501
4502/**
4503 * Atomically decrement a signed 64-bit value, ordered.
4504 *
4505 * @returns The new value.
4506 * @param pi64 Pointer to the value to decrement.
4507 *
4508 * @remarks x86: Requires a Pentium or later.
4509 */
4510DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4511{
4512 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4513}
4514
4515
4516/**
4517 * Atomically decrement a size_t value, ordered.
4518 *
4519 * @returns The new value.
4520 * @param pcb Pointer to the value to decrement.
4521 *
4522 * @remarks x86: Requires a 486 or later.
4523 */
4524DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4525{
4526#if ARCH_BITS == 64
4527 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4528#elif ARCH_BITS == 32
4529 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4530#elif ARCH_BITS == 16
4531 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4532#else
4533# error "Unsupported ARCH_BITS value"
4534#endif
4535}
4536
4537
4538/**
4539 * Atomically Or an unsigned 32-bit value, ordered.
4540 *
4541 * @param pu32 Pointer to the pointer variable to OR u32 with.
4542 * @param u32 The value to OR *pu32 with.
4543 *
4544 * @remarks x86: Requires a 386 or later.
4545 */
4546#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4547RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4548#else
4549DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4550{
4551# if RT_INLINE_ASM_USES_INTRIN
4552 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4553
4554# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4555# if RT_INLINE_ASM_GNU_STYLE
4556 __asm__ __volatile__("lock; orl %1, %0\n\t"
4557 : "=m" (*pu32)
4558 : "ir" (u32)
4559 , "m" (*pu32)
4560 : "cc");
4561# else
4562 __asm
4563 {
4564 mov eax, [u32]
4565# ifdef RT_ARCH_AMD64
4566 mov rdx, [pu32]
4567 lock or [rdx], eax
4568# else
4569 mov edx, [pu32]
4570 lock or [edx], eax
4571# endif
4572 }
4573# endif
4574
4575# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4576 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4577 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4578 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4579 "orr %[uNew], %[uNew], %[uVal]\n\t",
4580 [uVal] "r" (u32));
4581
4582# else
4583# error "Port me"
4584# endif
4585}
4586#endif
4587
4588
4589/**
4590 * Atomically OR an unsigned 32-bit value, ordered, extended version (for bitmap
4591 * fallback).
4592 *
4593 * @returns Old value.
4594 * @param pu32 Pointer to the variable to OR @a u32 with.
4595 * @param u32 The value to OR @a *pu32 with.
4596 */
4597DECLINLINE(uint32_t) ASMAtomicOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4598{
4599#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4600 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicOrEx32, pu32, DMB_SY,
4601 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4602 "orr %[uNew], %[uOld], %[uVal]\n\t",
4603 [uVal] "r" (u32));
4604 return u32OldRet;
4605
4606#else
4607 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4608 uint32_t u32New;
4609 do
4610 u32New = u32RetOld | u32;
4611 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4612 return u32RetOld;
4613#endif
4614}
4615
4616
4617/**
4618 * Atomically Or a signed 32-bit value, ordered.
4619 *
4620 * @param pi32 Pointer to the pointer variable to OR u32 with.
4621 * @param i32 The value to OR *pu32 with.
4622 *
4623 * @remarks x86: Requires a 386 or later.
4624 */
4625DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4626{
4627 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4628}
4629
4630
4631/**
4632 * Atomically Or an unsigned 64-bit value, ordered.
4633 *
4634 * @param pu64 Pointer to the pointer variable to OR u64 with.
4635 * @param u64 The value to OR *pu64 with.
4636 *
4637 * @remarks x86: Requires a Pentium or later.
4638 */
4639#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4640DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4641#else
4642DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4643{
4644# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4645 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4646
4647# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4648 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4649 : "=m" (*pu64)
4650 : "r" (u64)
4651 , "m" (*pu64)
4652 : "cc");
4653
4654# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4655 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4656 "orr %[uNew], %[uNew], %[uVal]\n\t"
4657 ,
4658 "orr %[uNew], %[uNew], %[uVal]\n\t"
4659 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4660 [uVal] "r" (u64));
4661
4662# else
4663 for (;;)
4664 {
4665 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4666 uint64_t u64New = u64Old | u64;
4667 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4668 break;
4669 ASMNopPause();
4670 }
4671# endif
4672}
4673#endif
4674
4675
4676/**
4677 * Atomically Or a signed 64-bit value, ordered.
4678 *
4679 * @param pi64 Pointer to the pointer variable to OR u64 with.
4680 * @param i64 The value to OR *pu64 with.
4681 *
4682 * @remarks x86: Requires a Pentium or later.
4683 */
4684DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4685{
4686 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4687}
4688
4689
4690/**
4691 * Atomically And an unsigned 32-bit value, ordered.
4692 *
4693 * @param pu32 Pointer to the pointer variable to AND u32 with.
4694 * @param u32 The value to AND *pu32 with.
4695 *
4696 * @remarks x86: Requires a 386 or later.
4697 */
4698#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4699RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4700#else
4701DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4702{
4703# if RT_INLINE_ASM_USES_INTRIN
4704 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4705
4706# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4707# if RT_INLINE_ASM_GNU_STYLE
4708 __asm__ __volatile__("lock; andl %1, %0\n\t"
4709 : "=m" (*pu32)
4710 : "ir" (u32)
4711 , "m" (*pu32)
4712 : "cc");
4713# else
4714 __asm
4715 {
4716 mov eax, [u32]
4717# ifdef RT_ARCH_AMD64
4718 mov rdx, [pu32]
4719 lock and [rdx], eax
4720# else
4721 mov edx, [pu32]
4722 lock and [edx], eax
4723# endif
4724 }
4725# endif
4726
4727# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4728 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4729 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4730 "and %[uNew], %[uNew], %[uVal]\n\t",
4731 [uVal] "r" (u32));
4732
4733# else
4734# error "Port me"
4735# endif
4736}
4737#endif
4738
4739
4740/**
4741 * Atomically AND an unsigned 32-bit value, ordered, extended version.
4742 *
4743 * @returns Old value.
4744 * @param pu32 Pointer to the variable to AND @a u32 with.
4745 * @param u32 The value to AND @a *pu32 with.
4746 */
4747DECLINLINE(uint32_t) ASMAtomicAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4748{
4749#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4750 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAndEx32, pu32, DMB_SY,
4751 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4752 "and %[uNew], %[uOld], %[uVal]\n\t",
4753 [uVal] "r" (u32));
4754 return u32OldRet;
4755
4756#else
4757 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4758 uint32_t u32New;
4759 do
4760 u32New = u32RetOld & u32;
4761 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4762 return u32RetOld;
4763#endif
4764}
4765
4766
4767/**
4768 * Atomically And a signed 32-bit value, ordered.
4769 *
4770 * @param pi32 Pointer to the pointer variable to AND i32 with.
4771 * @param i32 The value to AND *pi32 with.
4772 *
4773 * @remarks x86: Requires a 386 or later.
4774 */
4775DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4776{
4777 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4778}
4779
4780
4781/**
4782 * Atomically And an unsigned 64-bit value, ordered.
4783 *
4784 * @param pu64 Pointer to the pointer variable to AND u64 with.
4785 * @param u64 The value to AND *pu64 with.
4786 *
4787 * @remarks x86: Requires a Pentium or later.
4788 */
4789#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4790DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4791#else
4792DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4793{
4794# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4795 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4796
4797# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4798 __asm__ __volatile__("lock; andq %1, %0\n\t"
4799 : "=m" (*pu64)
4800 : "r" (u64)
4801 , "m" (*pu64)
4802 : "cc");
4803
4804# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4805 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
4806 "and %[uNew], %[uNew], %[uVal]\n\t"
4807 ,
4808 "and %[uNew], %[uNew], %[uVal]\n\t"
4809 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4810 [uVal] "r" (u64));
4811
4812# else
4813 for (;;)
4814 {
4815 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4816 uint64_t u64New = u64Old & u64;
4817 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4818 break;
4819 ASMNopPause();
4820 }
4821# endif
4822}
4823#endif
4824
4825
4826/**
4827 * Atomically And a signed 64-bit value, ordered.
4828 *
4829 * @param pi64 Pointer to the pointer variable to AND i64 with.
4830 * @param i64 The value to AND *pi64 with.
4831 *
4832 * @remarks x86: Requires a Pentium or later.
4833 */
4834DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4835{
4836 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4837}
4838
4839
4840/**
4841 * Atomically XOR an unsigned 32-bit value and a memory location, ordered.
4842 *
4843 * @param pu32 Pointer to the variable to XOR @a u32 with.
4844 * @param u32 The value to XOR @a *pu32 with.
4845 *
4846 * @remarks x86: Requires a 386 or later.
4847 */
4848#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4849RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4850#else
4851DECLINLINE(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4852{
4853# if RT_INLINE_ASM_USES_INTRIN
4854 _InterlockedXor((long volatile RT_FAR *)pu32, u32);
4855
4856# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4857# if RT_INLINE_ASM_GNU_STYLE
4858 __asm__ __volatile__("lock; xorl %1, %0\n\t"
4859 : "=m" (*pu32)
4860 : "ir" (u32)
4861 , "m" (*pu32)
4862 : "cc");
4863# else
4864 __asm
4865 {
4866 mov eax, [u32]
4867# ifdef RT_ARCH_AMD64
4868 mov rdx, [pu32]
4869 lock xor [rdx], eax
4870# else
4871 mov edx, [pu32]
4872 lock xor [edx], eax
4873# endif
4874 }
4875# endif
4876
4877# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4878 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicXor32, pu32, DMB_SY,
4879 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
4880 "eor %[uNew], %[uNew], %[uVal]\n\t",
4881 [uVal] "r" (u32));
4882
4883# else
4884# error "Port me"
4885# endif
4886}
4887#endif
4888
4889
4890/**
4891 * Atomically XOR an unsigned 32-bit value and a memory location, ordered,
4892 * extended version (for bitmaps).
4893 *
4894 * @returns Old value.
4895 * @param pu32 Pointer to the variable to XOR @a u32 with.
4896 * @param u32 The value to XOR @a *pu32 with.
4897 */
4898DECLINLINE(uint32_t) ASMAtomicXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4899{
4900#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4901 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicXorEx32, pu32, DMB_SY,
4902 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
4903 "eor %[uNew], %[uOld], %[uVal]\n\t",
4904 [uVal] "r" (u32));
4905 return u32OldRet;
4906
4907#else
4908 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4909 uint32_t u32New;
4910 do
4911 u32New = u32RetOld ^ u32;
4912 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4913 return u32RetOld;
4914#endif
4915}
4916
4917
4918/**
4919 * Atomically XOR a signed 32-bit value, ordered.
4920 *
4921 * @param pi32 Pointer to the variable to XOR i32 with.
4922 * @param i32 The value to XOR *pi32 with.
4923 *
4924 * @remarks x86: Requires a 386 or later.
4925 */
4926DECLINLINE(void) ASMAtomicXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4927{
4928 ASMAtomicXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4929}
4930
4931
4932/**
4933 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
4934 *
4935 * @param pu32 Pointer to the pointer variable to OR u32 with.
4936 * @param u32 The value to OR *pu32 with.
4937 *
4938 * @remarks x86: Requires a 386 or later.
4939 */
4940#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4941RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4942#else
4943DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4944{
4945# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4946# if RT_INLINE_ASM_GNU_STYLE
4947 __asm__ __volatile__("orl %1, %0\n\t"
4948 : "=m" (*pu32)
4949 : "ir" (u32)
4950 , "m" (*pu32)
4951 : "cc");
4952# else
4953 __asm
4954 {
4955 mov eax, [u32]
4956# ifdef RT_ARCH_AMD64
4957 mov rdx, [pu32]
4958 or [rdx], eax
4959# else
4960 mov edx, [pu32]
4961 or [edx], eax
4962# endif
4963 }
4964# endif
4965
4966# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4967 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
4968 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4969 "orr %[uNew], %[uNew], %[uVal]\n\t",
4970 [uVal] "r" (u32));
4971
4972# else
4973# error "Port me"
4974# endif
4975}
4976#endif
4977
4978
4979/**
4980 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe,
4981 * extended version (for bitmap fallback).
4982 *
4983 * @returns Old value.
4984 * @param pu32 Pointer to the variable to OR @a u32 with.
4985 * @param u32 The value to OR @a *pu32 with.
4986 */
4987DECLINLINE(uint32_t) ASMAtomicUoOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4988{
4989#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4990 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoOrExU32, pu32, NO_BARRIER,
4991 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4992 "orr %[uNew], %[uOld], %[uVal]\n\t",
4993 [uVal] "r" (u32));
4994 return u32OldRet;
4995
4996#else
4997 return ASMAtomicOrExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
4998#endif
4999}
5000
5001
5002/**
5003 * Atomically OR a signed 32-bit value, unordered.
5004 *
5005 * @param pi32 Pointer to the pointer variable to OR u32 with.
5006 * @param i32 The value to OR *pu32 with.
5007 *
5008 * @remarks x86: Requires a 386 or later.
5009 */
5010DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5011{
5012 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5013}
5014
5015
5016/**
5017 * Atomically OR an unsigned 64-bit value, unordered.
5018 *
5019 * @param pu64 Pointer to the pointer variable to OR u64 with.
5020 * @param u64 The value to OR *pu64 with.
5021 *
5022 * @remarks x86: Requires a Pentium or later.
5023 */
5024#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5025DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5026#else
5027DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5028{
5029# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5030 __asm__ __volatile__("orq %1, %q0\n\t"
5031 : "=m" (*pu64)
5032 : "r" (u64)
5033 , "m" (*pu64)
5034 : "cc");
5035
5036# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5037 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
5038 "orr %[uNew], %[uNew], %[uVal]\n\t"
5039 ,
5040 "orr %[uNew], %[uNew], %[uVal]\n\t"
5041 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
5042 [uVal] "r" (u64));
5043
5044# else
5045 for (;;)
5046 {
5047 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5048 uint64_t u64New = u64Old | u64;
5049 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5050 break;
5051 ASMNopPause();
5052 }
5053# endif
5054}
5055#endif
5056
5057
5058/**
5059 * Atomically Or a signed 64-bit value, unordered.
5060 *
5061 * @param pi64 Pointer to the pointer variable to OR u64 with.
5062 * @param i64 The value to OR *pu64 with.
5063 *
5064 * @remarks x86: Requires a Pentium or later.
5065 */
5066DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5067{
5068 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5069}
5070
5071
5072/**
5073 * Atomically And an unsigned 32-bit value, unordered.
5074 *
5075 * @param pu32 Pointer to the pointer variable to AND u32 with.
5076 * @param u32 The value to AND *pu32 with.
5077 *
5078 * @remarks x86: Requires a 386 or later.
5079 */
5080#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5081RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5082#else
5083DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5084{
5085# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5086# if RT_INLINE_ASM_GNU_STYLE
5087 __asm__ __volatile__("andl %1, %0\n\t"
5088 : "=m" (*pu32)
5089 : "ir" (u32)
5090 , "m" (*pu32)
5091 : "cc");
5092# else
5093 __asm
5094 {
5095 mov eax, [u32]
5096# ifdef RT_ARCH_AMD64
5097 mov rdx, [pu32]
5098 and [rdx], eax
5099# else
5100 mov edx, [pu32]
5101 and [edx], eax
5102# endif
5103 }
5104# endif
5105
5106# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5107 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
5108 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
5109 "and %[uNew], %[uNew], %[uVal]\n\t",
5110 [uVal] "r" (u32));
5111
5112# else
5113# error "Port me"
5114# endif
5115}
5116#endif
5117
5118
5119/**
5120 * Atomically AND an unsigned 32-bit value, unordered, extended version (for
5121 * bitmap fallback).
5122 *
5123 * @returns Old value.
5124 * @param pu32 Pointer to the pointer to AND @a u32 with.
5125 * @param u32 The value to AND @a *pu32 with.
5126 */
5127DECLINLINE(uint32_t) ASMAtomicUoAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5128{
5129#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5130 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoAndEx32, pu32, NO_BARRIER,
5131 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
5132 "and %[uNew], %[uOld], %[uVal]\n\t",
5133 [uVal] "r" (u32));
5134 return u32OldRet;
5135
5136#else
5137 return ASMAtomicAndExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5138#endif
5139}
5140
5141
5142/**
5143 * Atomically And a signed 32-bit value, unordered.
5144 *
5145 * @param pi32 Pointer to the pointer variable to AND i32 with.
5146 * @param i32 The value to AND *pi32 with.
5147 *
5148 * @remarks x86: Requires a 386 or later.
5149 */
5150DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5151{
5152 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5153}
5154
5155
5156/**
5157 * Atomically And an unsigned 64-bit value, unordered.
5158 *
5159 * @param pu64 Pointer to the pointer variable to AND u64 with.
5160 * @param u64 The value to AND *pu64 with.
5161 *
5162 * @remarks x86: Requires a Pentium or later.
5163 */
5164#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5165DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
5166#else
5167DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
5168{
5169# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5170 __asm__ __volatile__("andq %1, %0\n\t"
5171 : "=m" (*pu64)
5172 : "r" (u64)
5173 , "m" (*pu64)
5174 : "cc");
5175
5176# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5177 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
5178 "and %[uNew], %[uNew], %[uVal]\n\t"
5179 ,
5180 "and %[uNew], %[uNew], %[uVal]\n\t"
5181 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
5182 [uVal] "r" (u64));
5183
5184# else
5185 for (;;)
5186 {
5187 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
5188 uint64_t u64New = u64Old & u64;
5189 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
5190 break;
5191 ASMNopPause();
5192 }
5193# endif
5194}
5195#endif
5196
5197
5198/**
5199 * Atomically And a signed 64-bit value, unordered.
5200 *
5201 * @param pi64 Pointer to the pointer variable to AND i64 with.
5202 * @param i64 The value to AND *pi64 with.
5203 *
5204 * @remarks x86: Requires a Pentium or later.
5205 */
5206DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
5207{
5208 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
5209}
5210
5211
5212/**
5213 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe.
5214 *
5215 * @param pu32 Pointer to the variable to XOR @a u32 with.
5216 * @param u32 The value to OR @a *pu32 with.
5217 *
5218 * @remarks x86: Requires a 386 or later.
5219 */
5220#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5221RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
5222#else
5223DECLINLINE(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5224{
5225# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5226# if RT_INLINE_ASM_GNU_STYLE
5227 __asm__ __volatile__("xorl %1, %0\n\t"
5228 : "=m" (*pu32)
5229 : "ir" (u32)
5230 , "m" (*pu32)
5231 : "cc");
5232# else
5233 __asm
5234 {
5235 mov eax, [u32]
5236# ifdef RT_ARCH_AMD64
5237 mov rdx, [pu32]
5238 xor [rdx], eax
5239# else
5240 mov edx, [pu32]
5241 xor [edx], eax
5242# endif
5243 }
5244# endif
5245
5246# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5247 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoXorU32, pu32, NO_BARRIER,
5248 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
5249 "eor %[uNew], %[uNew], %[uVal]\n\t",
5250 [uVal] "r" (u32));
5251
5252# else
5253# error "Port me"
5254# endif
5255}
5256#endif
5257
5258
5259/**
5260 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe,
5261 * extended version (for bitmap fallback).
5262 *
5263 * @returns Old value.
5264 * @param pu32 Pointer to the variable to XOR @a u32 with.
5265 * @param u32 The value to OR @a *pu32 with.
5266 */
5267DECLINLINE(uint32_t) ASMAtomicUoXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5268{
5269#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5270 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoXorExU32, pu32, NO_BARRIER,
5271 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5272 "eor %[uNew], %[uOld], %[uVal]\n\t",
5273 [uVal] "r" (u32));
5274 return u32OldRet;
5275
5276#else
5277 return ASMAtomicXorExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5278#endif
5279}
5280
5281
5282/**
5283 * Atomically XOR a signed 32-bit value, unordered.
5284 *
5285 * @param pi32 Pointer to the variable to XOR @a u32 with.
5286 * @param i32 The value to XOR @a *pu32 with.
5287 *
5288 * @remarks x86: Requires a 386 or later.
5289 */
5290DECLINLINE(void) ASMAtomicUoXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5291{
5292 ASMAtomicUoXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5293}
5294
5295
5296/**
5297 * Atomically increment an unsigned 32-bit value, unordered.
5298 *
5299 * @returns the new value.
5300 * @param pu32 Pointer to the variable to increment.
5301 *
5302 * @remarks x86: Requires a 486 or later.
5303 */
5304#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5305RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5306#else
5307DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5308{
5309# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5310 uint32_t u32;
5311# if RT_INLINE_ASM_GNU_STYLE
5312 __asm__ __volatile__("xaddl %0, %1\n\t"
5313 : "=r" (u32)
5314 , "=m" (*pu32)
5315 : "0" (1)
5316 , "m" (*pu32)
5317 : "memory" /** @todo why 'memory'? */
5318 , "cc");
5319 return u32 + 1;
5320# else
5321 __asm
5322 {
5323 mov eax, 1
5324# ifdef RT_ARCH_AMD64
5325 mov rdx, [pu32]
5326 xadd [rdx], eax
5327# else
5328 mov edx, [pu32]
5329 xadd [edx], eax
5330# endif
5331 mov u32, eax
5332 }
5333 return u32 + 1;
5334# endif
5335
5336# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5337 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
5338 "add %w[uNew], %w[uNew], #1\n\t",
5339 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5340 "X" (0) /* dummy */);
5341 return u32NewRet;
5342
5343# else
5344# error "Port me"
5345# endif
5346}
5347#endif
5348
5349
5350/**
5351 * Atomically decrement an unsigned 32-bit value, unordered.
5352 *
5353 * @returns the new value.
5354 * @param pu32 Pointer to the variable to decrement.
5355 *
5356 * @remarks x86: Requires a 486 or later.
5357 */
5358#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5359RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5360#else
5361DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5362{
5363# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5364 uint32_t u32;
5365# if RT_INLINE_ASM_GNU_STYLE
5366 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
5367 : "=r" (u32)
5368 , "=m" (*pu32)
5369 : "0" (-1)
5370 , "m" (*pu32)
5371 : "memory"
5372 , "cc");
5373 return u32 - 1;
5374# else
5375 __asm
5376 {
5377 mov eax, -1
5378# ifdef RT_ARCH_AMD64
5379 mov rdx, [pu32]
5380 xadd [rdx], eax
5381# else
5382 mov edx, [pu32]
5383 xadd [edx], eax
5384# endif
5385 mov u32, eax
5386 }
5387 return u32 - 1;
5388# endif
5389
5390# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5391 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
5392 "sub %w[uNew], %w[uNew], #1\n\t",
5393 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5394 "X" (0) /* dummy */);
5395 return u32NewRet;
5396
5397# else
5398# error "Port me"
5399# endif
5400}
5401#endif
5402
5403
5404/** @def RT_ASM_PAGE_SIZE
5405 * We try avoid dragging in iprt/param.h here.
5406 * @internal
5407 */
5408#if defined(RT_ARCH_SPARC64)
5409# define RT_ASM_PAGE_SIZE 0x2000
5410# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5411# if PAGE_SIZE != 0x2000
5412# error "PAGE_SIZE is not 0x2000!"
5413# endif
5414# endif
5415#elif defined(RT_ARCH_ARM64)
5416# define RT_ASM_PAGE_SIZE 0x4000
5417# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
5418# if PAGE_SIZE != 0x4000
5419# error "PAGE_SIZE is not 0x4000!"
5420# endif
5421# endif
5422#else
5423# define RT_ASM_PAGE_SIZE 0x1000
5424# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5425# if PAGE_SIZE != 0x1000
5426# error "PAGE_SIZE is not 0x1000!"
5427# endif
5428# endif
5429#endif
5430
5431/**
5432 * Zeros a 4K memory page.
5433 *
5434 * @param pv Pointer to the memory block. This must be page aligned.
5435 */
5436#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5437RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
5438# else
5439DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
5440{
5441# if RT_INLINE_ASM_USES_INTRIN
5442# ifdef RT_ARCH_AMD64
5443 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
5444# else
5445 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
5446# endif
5447
5448# elif RT_INLINE_ASM_GNU_STYLE
5449 RTCCUINTREG uDummy;
5450# ifdef RT_ARCH_AMD64
5451 __asm__ __volatile__("rep stosq"
5452 : "=D" (pv),
5453 "=c" (uDummy)
5454 : "0" (pv),
5455 "c" (RT_ASM_PAGE_SIZE >> 3),
5456 "a" (0)
5457 : "memory");
5458# else
5459 __asm__ __volatile__("rep stosl"
5460 : "=D" (pv),
5461 "=c" (uDummy)
5462 : "0" (pv),
5463 "c" (RT_ASM_PAGE_SIZE >> 2),
5464 "a" (0)
5465 : "memory");
5466# endif
5467# else
5468 __asm
5469 {
5470# ifdef RT_ARCH_AMD64
5471 xor rax, rax
5472 mov ecx, 0200h
5473 mov rdi, [pv]
5474 rep stosq
5475# else
5476 xor eax, eax
5477 mov ecx, 0400h
5478 mov edi, [pv]
5479 rep stosd
5480# endif
5481 }
5482# endif
5483}
5484# endif
5485
5486
5487/**
5488 * Zeros a memory block with a 32-bit aligned size.
5489 *
5490 * @param pv Pointer to the memory block.
5491 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5492 */
5493#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5494RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5495#else
5496DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5497{
5498# if RT_INLINE_ASM_USES_INTRIN
5499# ifdef RT_ARCH_AMD64
5500 if (!(cb & 7))
5501 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
5502 else
5503# endif
5504 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
5505
5506# elif RT_INLINE_ASM_GNU_STYLE
5507 __asm__ __volatile__("rep stosl"
5508 : "=D" (pv),
5509 "=c" (cb)
5510 : "0" (pv),
5511 "1" (cb >> 2),
5512 "a" (0)
5513 : "memory");
5514# else
5515 __asm
5516 {
5517 xor eax, eax
5518# ifdef RT_ARCH_AMD64
5519 mov rcx, [cb]
5520 shr rcx, 2
5521 mov rdi, [pv]
5522# else
5523 mov ecx, [cb]
5524 shr ecx, 2
5525 mov edi, [pv]
5526# endif
5527 rep stosd
5528 }
5529# endif
5530}
5531#endif
5532
5533
5534/**
5535 * Fills a memory block with a 32-bit aligned size.
5536 *
5537 * @param pv Pointer to the memory block.
5538 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5539 * @param u32 The value to fill with.
5540 */
5541#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5542RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
5543#else
5544DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5545{
5546# if RT_INLINE_ASM_USES_INTRIN
5547# ifdef RT_ARCH_AMD64
5548 if (!(cb & 7))
5549 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5550 else
5551# endif
5552 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
5553
5554# elif RT_INLINE_ASM_GNU_STYLE
5555 __asm__ __volatile__("rep stosl"
5556 : "=D" (pv),
5557 "=c" (cb)
5558 : "0" (pv),
5559 "1" (cb >> 2),
5560 "a" (u32)
5561 : "memory");
5562# else
5563 __asm
5564 {
5565# ifdef RT_ARCH_AMD64
5566 mov rcx, [cb]
5567 shr rcx, 2
5568 mov rdi, [pv]
5569# else
5570 mov ecx, [cb]
5571 shr ecx, 2
5572 mov edi, [pv]
5573# endif
5574 mov eax, [u32]
5575 rep stosd
5576 }
5577# endif
5578}
5579#endif
5580
5581
5582/**
5583 * Checks if a memory block is all zeros.
5584 *
5585 * @returns Pointer to the first non-zero byte.
5586 * @returns NULL if all zero.
5587 *
5588 * @param pv Pointer to the memory block.
5589 * @param cb Number of bytes in the block.
5590 */
5591#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
5592DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5593#else
5594DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5595{
5596/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
5597 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5598 for (; cb; cb--, pb++)
5599 if (RT_LIKELY(*pb == 0))
5600 { /* likely */ }
5601 else
5602 return (void RT_FAR *)pb;
5603 return NULL;
5604}
5605#endif
5606
5607
5608/**
5609 * Checks if a memory block is all zeros.
5610 *
5611 * @returns true if zero, false if not.
5612 *
5613 * @param pv Pointer to the memory block.
5614 * @param cb Number of bytes in the block.
5615 *
5616 * @sa ASMMemFirstNonZero
5617 */
5618DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5619{
5620 return ASMMemFirstNonZero(pv, cb) == NULL;
5621}
5622
5623
5624/**
5625 * Checks if a memory page is all zeros.
5626 *
5627 * @returns true / false.
5628 *
5629 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5630 * boundary
5631 */
5632DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
5633{
5634# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5635 union { RTCCUINTREG r; bool f; } uAX;
5636 RTCCUINTREG xCX, xDI;
5637 Assert(!((uintptr_t)pvPage & 15));
5638 __asm__ __volatile__("repe; "
5639# ifdef RT_ARCH_AMD64
5640 "scasq\n\t"
5641# else
5642 "scasl\n\t"
5643# endif
5644 "setnc %%al\n\t"
5645 : "=&c" (xCX)
5646 , "=&D" (xDI)
5647 , "=&a" (uAX.r)
5648 : "mr" (pvPage)
5649# ifdef RT_ARCH_AMD64
5650 , "0" (RT_ASM_PAGE_SIZE/8)
5651# else
5652 , "0" (RT_ASM_PAGE_SIZE/4)
5653# endif
5654 , "1" (pvPage)
5655 , "2" (0)
5656 : "cc");
5657 return uAX.f;
5658# else
5659 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
5660 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
5661 Assert(!((uintptr_t)pvPage & 15));
5662 for (;;)
5663 {
5664 if (puPtr[0]) return false;
5665 if (puPtr[4]) return false;
5666
5667 if (puPtr[2]) return false;
5668 if (puPtr[6]) return false;
5669
5670 if (puPtr[1]) return false;
5671 if (puPtr[5]) return false;
5672
5673 if (puPtr[3]) return false;
5674 if (puPtr[7]) return false;
5675
5676 if (!--cLeft)
5677 return true;
5678 puPtr += 8;
5679 }
5680# endif
5681}
5682
5683
5684/**
5685 * Checks if a memory block is filled with the specified byte, returning the
5686 * first mismatch.
5687 *
5688 * This is sort of an inverted memchr.
5689 *
5690 * @returns Pointer to the byte which doesn't equal u8.
5691 * @returns NULL if all equal to u8.
5692 *
5693 * @param pv Pointer to the memory block.
5694 * @param cb Number of bytes in the block.
5695 * @param u8 The value it's supposed to be filled with.
5696 *
5697 * @remarks No alignment requirements.
5698 */
5699#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5700 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5701DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5702#else
5703DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5704{
5705/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5706 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5707 for (; cb; cb--, pb++)
5708 if (RT_LIKELY(*pb == u8))
5709 { /* likely */ }
5710 else
5711 return (void *)pb;
5712 return NULL;
5713}
5714#endif
5715
5716
5717/**
5718 * Checks if a memory block is filled with the specified byte.
5719 *
5720 * @returns true if all matching, false if not.
5721 *
5722 * @param pv Pointer to the memory block.
5723 * @param cb Number of bytes in the block.
5724 * @param u8 The value it's supposed to be filled with.
5725 *
5726 * @remarks No alignment requirements.
5727 */
5728DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5729{
5730 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5731}
5732
5733
5734/**
5735 * Checks if a memory block is filled with the specified 32-bit value.
5736 *
5737 * This is a sort of inverted memchr.
5738 *
5739 * @returns Pointer to the first value which doesn't equal u32.
5740 * @returns NULL if all equal to u32.
5741 *
5742 * @param pv Pointer to the memory block.
5743 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5744 * @param u32 The value it's supposed to be filled with.
5745 */
5746DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5747{
5748/** @todo rewrite this in inline assembly? */
5749 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5750 for (; cb; cb -= 4, pu32++)
5751 if (RT_LIKELY(*pu32 == u32))
5752 { /* likely */ }
5753 else
5754 return (uint32_t RT_FAR *)pu32;
5755 return NULL;
5756}
5757
5758
5759/**
5760 * Probes a byte pointer for read access.
5761 *
5762 * While the function will not fault if the byte is not read accessible,
5763 * the idea is to do this in a safe place like before acquiring locks
5764 * and such like.
5765 *
5766 * Also, this functions guarantees that an eager compiler is not going
5767 * to optimize the probing away.
5768 *
5769 * @param pvByte Pointer to the byte.
5770 */
5771#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5772RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
5773#else
5774DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
5775{
5776# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5777 uint8_t u8;
5778# if RT_INLINE_ASM_GNU_STYLE
5779 __asm__ __volatile__("movb %1, %0\n\t"
5780 : "=q" (u8)
5781 : "m" (*(const uint8_t *)pvByte));
5782# else
5783 __asm
5784 {
5785# ifdef RT_ARCH_AMD64
5786 mov rax, [pvByte]
5787 mov al, [rax]
5788# else
5789 mov eax, [pvByte]
5790 mov al, [eax]
5791# endif
5792 mov [u8], al
5793 }
5794# endif
5795 return u8;
5796
5797# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5798 uint32_t u32;
5799 __asm__ __volatile__(".Lstart_ASMProbeReadByte_%=:\n\t"
5800# if defined(RT_ARCH_ARM64)
5801 "ldxrb %w[uDst], %[pMem]\n\t"
5802# else
5803 "ldrexb %[uDst], %[pMem]\n\t"
5804# endif
5805 : [uDst] "=&r" (u32)
5806 : [pMem] "m" (*(uint8_t const *)pvByte));
5807 return (uint8_t)u32;
5808
5809# else
5810# error "Port me"
5811# endif
5812}
5813#endif
5814
5815/**
5816 * Probes a buffer for read access page by page.
5817 *
5818 * While the function will fault if the buffer is not fully read
5819 * accessible, the idea is to do this in a safe place like before
5820 * acquiring locks and such like.
5821 *
5822 * Also, this functions guarantees that an eager compiler is not going
5823 * to optimize the probing away.
5824 *
5825 * @param pvBuf Pointer to the buffer.
5826 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5827 */
5828DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
5829{
5830 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5831 /* the first byte */
5832 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
5833 ASMProbeReadByte(pu8);
5834
5835 /* the pages in between pages. */
5836 while (cbBuf > RT_ASM_PAGE_SIZE)
5837 {
5838 ASMProbeReadByte(pu8);
5839 cbBuf -= RT_ASM_PAGE_SIZE;
5840 pu8 += RT_ASM_PAGE_SIZE;
5841 }
5842
5843 /* the last byte */
5844 ASMProbeReadByte(pu8 + cbBuf - 1);
5845}
5846
5847
5848/**
5849 * Reverse the byte order of the given 16-bit integer.
5850 *
5851 * @returns Revert
5852 * @param u16 16-bit integer value.
5853 */
5854#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5855RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
5856#else
5857DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
5858{
5859# if RT_INLINE_ASM_USES_INTRIN
5860 return _byteswap_ushort(u16);
5861
5862# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5863# if RT_INLINE_ASM_GNU_STYLE
5864 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
5865# else
5866 _asm
5867 {
5868 mov ax, [u16]
5869 ror ax, 8
5870 mov [u16], ax
5871 }
5872# endif
5873 return u16;
5874
5875# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5876 uint32_t u32Ret;
5877 __asm__ __volatile__(
5878# if defined(RT_ARCH_ARM64)
5879 "rev16 %w[uRet], %w[uVal]\n\t"
5880# else
5881 "rev16 %[uRet], %[uVal]\n\t"
5882# endif
5883 : [uRet] "=r" (u32Ret)
5884 : [uVal] "r" (u16));
5885 return (uint16_t)u32Ret;
5886
5887# else
5888# error "Port me"
5889# endif
5890}
5891#endif
5892
5893
5894/**
5895 * Reverse the byte order of the given 32-bit integer.
5896 *
5897 * @returns Revert
5898 * @param u32 32-bit integer value.
5899 */
5900#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5901RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
5902#else
5903DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
5904{
5905# if RT_INLINE_ASM_USES_INTRIN
5906 return _byteswap_ulong(u32);
5907
5908# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5909# if RT_INLINE_ASM_GNU_STYLE
5910 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5911# else
5912 _asm
5913 {
5914 mov eax, [u32]
5915 bswap eax
5916 mov [u32], eax
5917 }
5918# endif
5919 return u32;
5920
5921# elif defined(RT_ARCH_ARM64)
5922 uint64_t u64Ret;
5923 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t"
5924 : [uRet] "=r" (u64Ret)
5925 : [uVal] "r" ((uint64_t)u32));
5926 return (uint32_t)u64Ret;
5927
5928# elif defined(RT_ARCH_ARM32)
5929 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
5930 : [uRet] "=r" (u32)
5931 : [uVal] "[uRet]" (u32));
5932 return u32;
5933
5934# else
5935# error "Port me"
5936# endif
5937}
5938#endif
5939
5940
5941/**
5942 * Reverse the byte order of the given 64-bit integer.
5943 *
5944 * @returns Revert
5945 * @param u64 64-bit integer value.
5946 */
5947DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
5948{
5949#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5950 return _byteswap_uint64(u64);
5951
5952# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5953 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64));
5954 return u64;
5955
5956# elif defined(RT_ARCH_ARM64)
5957 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
5958 : [uRet] "=r" (u64)
5959 : [uVal] "[uRet]" (u64));
5960 return u64;
5961
5962#else
5963 return (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5964 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5965#endif
5966}
5967
5968
5969
5970/** @defgroup grp_inline_bits Bit Operations
5971 * @{
5972 */
5973
5974
5975/**
5976 * Sets a bit in a bitmap.
5977 *
5978 * @param pvBitmap Pointer to the bitmap (little endian). This should be
5979 * 32-bit aligned.
5980 * @param iBit The bit to set.
5981 *
5982 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5983 * However, doing so will yield better performance as well as avoiding
5984 * traps accessing the last bits in the bitmap.
5985 */
5986#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5987RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5988#else
5989DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5990{
5991# if RT_INLINE_ASM_USES_INTRIN
5992 _bittestandset((long RT_FAR *)pvBitmap, iBit);
5993
5994# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5995# if RT_INLINE_ASM_GNU_STYLE
5996 __asm__ __volatile__("btsl %1, %0"
5997 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5998 : "Ir" (iBit)
5999 , "m" (*(volatile long RT_FAR *)pvBitmap)
6000 : "memory"
6001 , "cc");
6002# else
6003 __asm
6004 {
6005# ifdef RT_ARCH_AMD64
6006 mov rax, [pvBitmap]
6007 mov edx, [iBit]
6008 bts [rax], edx
6009# else
6010 mov eax, [pvBitmap]
6011 mov edx, [iBit]
6012 bts [eax], edx
6013# endif
6014 }
6015# endif
6016
6017# else
6018 int32_t offBitmap = iBit / 32;
6019 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6020 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6021# endif
6022}
6023#endif
6024
6025
6026/**
6027 * Atomically sets a bit in a bitmap, ordered.
6028 *
6029 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6030 * aligned, otherwise the memory access isn't atomic!
6031 * @param iBit The bit to set.
6032 *
6033 * @remarks x86: Requires a 386 or later.
6034 */
6035#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6036RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6037#else
6038DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6039{
6040 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6041# if RT_INLINE_ASM_USES_INTRIN
6042 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6043# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6044# if RT_INLINE_ASM_GNU_STYLE
6045 __asm__ __volatile__("lock; btsl %1, %0"
6046 : "=m" (*(volatile long *)pvBitmap)
6047 : "Ir" (iBit)
6048 , "m" (*(volatile long *)pvBitmap)
6049 : "memory"
6050 , "cc");
6051# else
6052 __asm
6053 {
6054# ifdef RT_ARCH_AMD64
6055 mov rax, [pvBitmap]
6056 mov edx, [iBit]
6057 lock bts [rax], edx
6058# else
6059 mov eax, [pvBitmap]
6060 mov edx, [iBit]
6061 lock bts [eax], edx
6062# endif
6063 }
6064# endif
6065
6066# else
6067 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6068# endif
6069}
6070#endif
6071
6072
6073/**
6074 * Clears a bit in a bitmap.
6075 *
6076 * @param pvBitmap Pointer to the bitmap (little endian).
6077 * @param iBit The bit to clear.
6078 *
6079 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6080 * However, doing so will yield better performance as well as avoiding
6081 * traps accessing the last bits in the bitmap.
6082 */
6083#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6084RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6085#else
6086DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6087{
6088# if RT_INLINE_ASM_USES_INTRIN
6089 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6090
6091# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6092# if RT_INLINE_ASM_GNU_STYLE
6093 __asm__ __volatile__("btrl %1, %0"
6094 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6095 : "Ir" (iBit)
6096 , "m" (*(volatile long RT_FAR *)pvBitmap)
6097 : "memory"
6098 , "cc");
6099# else
6100 __asm
6101 {
6102# ifdef RT_ARCH_AMD64
6103 mov rax, [pvBitmap]
6104 mov edx, [iBit]
6105 btr [rax], edx
6106# else
6107 mov eax, [pvBitmap]
6108 mov edx, [iBit]
6109 btr [eax], edx
6110# endif
6111 }
6112# endif
6113
6114# else
6115 int32_t offBitmap = iBit / 32;
6116 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6117 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6118# endif
6119}
6120#endif
6121
6122
6123/**
6124 * Atomically clears a bit in a bitmap, ordered.
6125 *
6126 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6127 * aligned, otherwise the memory access isn't atomic!
6128 * @param iBit The bit to toggle set.
6129 *
6130 * @remarks No memory barrier, take care on smp.
6131 * @remarks x86: Requires a 386 or later.
6132 */
6133#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6134RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6135#else
6136DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6137{
6138 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6139# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6140# if RT_INLINE_ASM_GNU_STYLE
6141 __asm__ __volatile__("lock; btrl %1, %0"
6142 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6143 : "Ir" (iBit)
6144 , "m" (*(volatile long RT_FAR *)pvBitmap)
6145 : "memory"
6146 , "cc");
6147# else
6148 __asm
6149 {
6150# ifdef RT_ARCH_AMD64
6151 mov rax, [pvBitmap]
6152 mov edx, [iBit]
6153 lock btr [rax], edx
6154# else
6155 mov eax, [pvBitmap]
6156 mov edx, [iBit]
6157 lock btr [eax], edx
6158# endif
6159 }
6160# endif
6161# else
6162 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
6163# endif
6164}
6165#endif
6166
6167
6168/**
6169 * Toggles a bit in a bitmap.
6170 *
6171 * @param pvBitmap Pointer to the bitmap (little endian).
6172 * @param iBit The bit to toggle.
6173 *
6174 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6175 * However, doing so will yield better performance as well as avoiding
6176 * traps accessing the last bits in the bitmap.
6177 */
6178#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6179RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6180#else
6181DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6182{
6183# if RT_INLINE_ASM_USES_INTRIN
6184 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6185# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6186# if RT_INLINE_ASM_GNU_STYLE
6187 __asm__ __volatile__("btcl %1, %0"
6188 : "=m" (*(volatile long *)pvBitmap)
6189 : "Ir" (iBit)
6190 , "m" (*(volatile long *)pvBitmap)
6191 : "memory"
6192 , "cc");
6193# else
6194 __asm
6195 {
6196# ifdef RT_ARCH_AMD64
6197 mov rax, [pvBitmap]
6198 mov edx, [iBit]
6199 btc [rax], edx
6200# else
6201 mov eax, [pvBitmap]
6202 mov edx, [iBit]
6203 btc [eax], edx
6204# endif
6205 }
6206# endif
6207# else
6208 int32_t offBitmap = iBit / 32;
6209 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6210 ASMAtomicUoXorU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6211# endif
6212}
6213#endif
6214
6215
6216/**
6217 * Atomically toggles a bit in a bitmap, ordered.
6218 *
6219 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6220 * aligned, otherwise the memory access isn't atomic!
6221 * @param iBit The bit to test and set.
6222 *
6223 * @remarks x86: Requires a 386 or later.
6224 */
6225#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6226RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6227#else
6228DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6229{
6230 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6231# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6232# if RT_INLINE_ASM_GNU_STYLE
6233 __asm__ __volatile__("lock; btcl %1, %0"
6234 : "=m" (*(volatile long RT_FAR *)pvBitmap)
6235 : "Ir" (iBit)
6236 , "m" (*(volatile long RT_FAR *)pvBitmap)
6237 : "memory"
6238 , "cc");
6239# else
6240 __asm
6241 {
6242# ifdef RT_ARCH_AMD64
6243 mov rax, [pvBitmap]
6244 mov edx, [iBit]
6245 lock btc [rax], edx
6246# else
6247 mov eax, [pvBitmap]
6248 mov edx, [iBit]
6249 lock btc [eax], edx
6250# endif
6251 }
6252# endif
6253# else
6254 ASMAtomicXorU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6255# endif
6256}
6257#endif
6258
6259
6260/**
6261 * Tests and sets a bit in a bitmap.
6262 *
6263 * @returns true if the bit was set.
6264 * @returns false if the bit was clear.
6265 *
6266 * @param pvBitmap Pointer to the bitmap (little endian).
6267 * @param iBit The bit to test and set.
6268 *
6269 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6270 * However, doing so will yield better performance as well as avoiding
6271 * traps accessing the last bits in the bitmap.
6272 */
6273#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6274RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6275#else
6276DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6277{
6278 union { bool f; uint32_t u32; uint8_t u8; } rc;
6279# if RT_INLINE_ASM_USES_INTRIN
6280 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
6281
6282# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6283# if RT_INLINE_ASM_GNU_STYLE
6284 __asm__ __volatile__("btsl %2, %1\n\t"
6285 "setc %b0\n\t"
6286 "andl $1, %0\n\t"
6287 : "=q" (rc.u32)
6288 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6289 : "Ir" (iBit)
6290 , "m" (*(volatile long RT_FAR *)pvBitmap)
6291 : "memory"
6292 , "cc");
6293# else
6294 __asm
6295 {
6296 mov edx, [iBit]
6297# ifdef RT_ARCH_AMD64
6298 mov rax, [pvBitmap]
6299 bts [rax], edx
6300# else
6301 mov eax, [pvBitmap]
6302 bts [eax], edx
6303# endif
6304 setc al
6305 and eax, 1
6306 mov [rc.u32], eax
6307 }
6308# endif
6309
6310# else
6311 int32_t offBitmap = iBit / 32;
6312 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6313 rc.u32 = RT_LE2H_U32(ASMAtomicUoOrExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6314 >> (iBit & 31);
6315 rc.u32 &= 1;
6316# endif
6317 return rc.f;
6318}
6319#endif
6320
6321
6322/**
6323 * Atomically tests and sets a bit in a bitmap, ordered.
6324 *
6325 * @returns true if the bit was set.
6326 * @returns false if the bit was clear.
6327 *
6328 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6329 * aligned, otherwise the memory access isn't atomic!
6330 * @param iBit The bit to set.
6331 *
6332 * @remarks x86: Requires a 386 or later.
6333 */
6334#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6335RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6336#else
6337DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6338{
6339 union { bool f; uint32_t u32; uint8_t u8; } rc;
6340 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6341# if RT_INLINE_ASM_USES_INTRIN
6342 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6343# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6344# if RT_INLINE_ASM_GNU_STYLE
6345 __asm__ __volatile__("lock; btsl %2, %1\n\t"
6346 "setc %b0\n\t"
6347 "andl $1, %0\n\t"
6348 : "=q" (rc.u32)
6349 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6350 : "Ir" (iBit)
6351 , "m" (*(volatile long RT_FAR *)pvBitmap)
6352 : "memory"
6353 , "cc");
6354# else
6355 __asm
6356 {
6357 mov edx, [iBit]
6358# ifdef RT_ARCH_AMD64
6359 mov rax, [pvBitmap]
6360 lock bts [rax], edx
6361# else
6362 mov eax, [pvBitmap]
6363 lock bts [eax], edx
6364# endif
6365 setc al
6366 and eax, 1
6367 mov [rc.u32], eax
6368 }
6369# endif
6370
6371# else
6372 rc.u32 = RT_LE2H_U32(ASMAtomicOrExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6373 >> (iBit & 31);
6374 rc.u32 &= 1;
6375# endif
6376 return rc.f;
6377}
6378#endif
6379
6380
6381/**
6382 * Tests and clears a bit in a bitmap.
6383 *
6384 * @returns true if the bit was set.
6385 * @returns false if the bit was clear.
6386 *
6387 * @param pvBitmap Pointer to the bitmap (little endian).
6388 * @param iBit The bit to test and clear.
6389 *
6390 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6391 * However, doing so will yield better performance as well as avoiding
6392 * traps accessing the last bits in the bitmap.
6393 */
6394#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6395RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6396#else
6397DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6398{
6399 union { bool f; uint32_t u32; uint8_t u8; } rc;
6400# if RT_INLINE_ASM_USES_INTRIN
6401 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6402
6403# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6404# if RT_INLINE_ASM_GNU_STYLE
6405 __asm__ __volatile__("btrl %2, %1\n\t"
6406 "setc %b0\n\t"
6407 "andl $1, %0\n\t"
6408 : "=q" (rc.u32)
6409 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6410 : "Ir" (iBit)
6411 , "m" (*(volatile long RT_FAR *)pvBitmap)
6412 : "memory"
6413 , "cc");
6414# else
6415 __asm
6416 {
6417 mov edx, [iBit]
6418# ifdef RT_ARCH_AMD64
6419 mov rax, [pvBitmap]
6420 btr [rax], edx
6421# else
6422 mov eax, [pvBitmap]
6423 btr [eax], edx
6424# endif
6425 setc al
6426 and eax, 1
6427 mov [rc.u32], eax
6428 }
6429# endif
6430
6431# else
6432 int32_t offBitmap = iBit / 32;
6433 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6434 rc.u32 = RT_LE2H_U32(ASMAtomicUoAndExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6435 >> (iBit & 31);
6436 rc.u32 &= 1;
6437# endif
6438 return rc.f;
6439}
6440#endif
6441
6442
6443/**
6444 * Atomically tests and clears a bit in a bitmap, ordered.
6445 *
6446 * @returns true if the bit was set.
6447 * @returns false if the bit was clear.
6448 *
6449 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6450 * aligned, otherwise the memory access isn't atomic!
6451 * @param iBit The bit to test and clear.
6452 *
6453 * @remarks No memory barrier, take care on smp.
6454 * @remarks x86: Requires a 386 or later.
6455 */
6456#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6457RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6458#else
6459DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6460{
6461 union { bool f; uint32_t u32; uint8_t u8; } rc;
6462 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6463# if RT_INLINE_ASM_USES_INTRIN
6464 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
6465
6466# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6467# if RT_INLINE_ASM_GNU_STYLE
6468 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6469 "setc %b0\n\t"
6470 "andl $1, %0\n\t"
6471 : "=q" (rc.u32)
6472 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6473 : "Ir" (iBit)
6474 , "m" (*(volatile long RT_FAR *)pvBitmap)
6475 : "memory"
6476 , "cc");
6477# else
6478 __asm
6479 {
6480 mov edx, [iBit]
6481# ifdef RT_ARCH_AMD64
6482 mov rax, [pvBitmap]
6483 lock btr [rax], edx
6484# else
6485 mov eax, [pvBitmap]
6486 lock btr [eax], edx
6487# endif
6488 setc al
6489 and eax, 1
6490 mov [rc.u32], eax
6491 }
6492# endif
6493
6494# else
6495 rc.u32 = RT_LE2H_U32(ASMAtomicAndExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6496 >> (iBit & 31);
6497 rc.u32 &= 1;
6498# endif
6499 return rc.f;
6500}
6501#endif
6502
6503
6504/**
6505 * Tests and toggles a bit in a bitmap.
6506 *
6507 * @returns true if the bit was set.
6508 * @returns false if the bit was clear.
6509 *
6510 * @param pvBitmap Pointer to the bitmap (little endian).
6511 * @param iBit The bit to test and toggle.
6512 *
6513 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6514 * However, doing so will yield better performance as well as avoiding
6515 * traps accessing the last bits in the bitmap.
6516 */
6517#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6518RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6519#else
6520DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6521{
6522 union { bool f; uint32_t u32; uint8_t u8; } rc;
6523# if RT_INLINE_ASM_USES_INTRIN
6524 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6525
6526# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6527# if RT_INLINE_ASM_GNU_STYLE
6528 __asm__ __volatile__("btcl %2, %1\n\t"
6529 "setc %b0\n\t"
6530 "andl $1, %0\n\t"
6531 : "=q" (rc.u32)
6532 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6533 : "Ir" (iBit)
6534 , "m" (*(volatile long RT_FAR *)pvBitmap)
6535 : "memory"
6536 , "cc");
6537# else
6538 __asm
6539 {
6540 mov edx, [iBit]
6541# ifdef RT_ARCH_AMD64
6542 mov rax, [pvBitmap]
6543 btc [rax], edx
6544# else
6545 mov eax, [pvBitmap]
6546 btc [eax], edx
6547# endif
6548 setc al
6549 and eax, 1
6550 mov [rc.u32], eax
6551 }
6552# endif
6553
6554# else
6555 int32_t offBitmap = iBit / 32;
6556 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6557 rc.u32 = RT_LE2H_U32(ASMAtomicUoXorExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6558 >> (iBit & 31);
6559 rc.u32 &= 1;
6560# endif
6561 return rc.f;
6562}
6563#endif
6564
6565
6566/**
6567 * Atomically tests and toggles a bit in a bitmap, ordered.
6568 *
6569 * @returns true if the bit was set.
6570 * @returns false if the bit was clear.
6571 *
6572 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6573 * aligned, otherwise the memory access isn't atomic!
6574 * @param iBit The bit to test and toggle.
6575 *
6576 * @remarks x86: Requires a 386 or later.
6577 */
6578#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6579RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6580#else
6581DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6582{
6583 union { bool f; uint32_t u32; uint8_t u8; } rc;
6584 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6585# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6586# if RT_INLINE_ASM_GNU_STYLE
6587 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6588 "setc %b0\n\t"
6589 "andl $1, %0\n\t"
6590 : "=q" (rc.u32)
6591 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6592 : "Ir" (iBit)
6593 , "m" (*(volatile long RT_FAR *)pvBitmap)
6594 : "memory"
6595 , "cc");
6596# else
6597 __asm
6598 {
6599 mov edx, [iBit]
6600# ifdef RT_ARCH_AMD64
6601 mov rax, [pvBitmap]
6602 lock btc [rax], edx
6603# else
6604 mov eax, [pvBitmap]
6605 lock btc [eax], edx
6606# endif
6607 setc al
6608 and eax, 1
6609 mov [rc.u32], eax
6610 }
6611# endif
6612
6613# else
6614 rc.u32 = RT_H2LE_U32(ASMAtomicXorExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_LE2H_U32(RT_BIT_32(iBit & 31))))
6615 >> (iBit & 31);
6616 rc.u32 &= 1;
6617# endif
6618 return rc.f;
6619}
6620#endif
6621
6622
6623/**
6624 * Tests if a bit in a bitmap is set.
6625 *
6626 * @returns true if the bit is set.
6627 * @returns false if the bit is clear.
6628 *
6629 * @param pvBitmap Pointer to the bitmap (little endian).
6630 * @param iBit The bit to test.
6631 *
6632 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6633 * However, doing so will yield better performance as well as avoiding
6634 * traps accessing the last bits in the bitmap.
6635 */
6636#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6637RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6638#else
6639DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6640{
6641 union { bool f; uint32_t u32; uint8_t u8; } rc;
6642# if RT_INLINE_ASM_USES_INTRIN
6643 rc.u32 = _bittest((long *)pvBitmap, iBit);
6644
6645# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6646# if RT_INLINE_ASM_GNU_STYLE
6647
6648 __asm__ __volatile__("btl %2, %1\n\t"
6649 "setc %b0\n\t"
6650 "andl $1, %0\n\t"
6651 : "=q" (rc.u32)
6652 : "m" (*(const volatile long RT_FAR *)pvBitmap)
6653 , "Ir" (iBit)
6654 : "memory"
6655 , "cc");
6656# else
6657 __asm
6658 {
6659 mov edx, [iBit]
6660# ifdef RT_ARCH_AMD64
6661 mov rax, [pvBitmap]
6662 bt [rax], edx
6663# else
6664 mov eax, [pvBitmap]
6665 bt [eax], edx
6666# endif
6667 setc al
6668 and eax, 1
6669 mov [rc.u32], eax
6670 }
6671# endif
6672
6673# else
6674 int32_t offBitmap = iBit / 32;
6675 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6676 rc.u32 = RT_LE2H_U32(ASMAtomicUoReadU32(&((uint32_t volatile *)pvBitmap)[offBitmap])) >> (iBit & 31);
6677 rc.u32 &= 1;
6678# endif
6679 return rc.f;
6680}
6681#endif
6682
6683
6684/**
6685 * Clears a bit range within a bitmap.
6686 *
6687 * @param pvBitmap Pointer to the bitmap (little endian).
6688 * @param iBitStart The First bit to clear.
6689 * @param iBitEnd The first bit not to clear.
6690 */
6691DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6692{
6693 if (iBitStart < iBitEnd)
6694 {
6695 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6696 int32_t iStart = iBitStart & ~31;
6697 int32_t iEnd = iBitEnd & ~31;
6698 if (iStart == iEnd)
6699 *pu32 &= RT_H2LE_U32(((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6700 else
6701 {
6702 /* bits in first dword. */
6703 if (iBitStart & 31)
6704 {
6705 *pu32 &= RT_H2LE_U32((UINT32_C(1) << (iBitStart & 31)) - 1);
6706 pu32++;
6707 iBitStart = iStart + 32;
6708 }
6709
6710 /* whole dwords. */
6711 if (iBitStart != iEnd)
6712 ASMMemZero32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3);
6713
6714 /* bits in last dword. */
6715 if (iBitEnd & 31)
6716 {
6717 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6718 *pu32 &= RT_H2LE_U32(~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6719 }
6720 }
6721 }
6722}
6723
6724
6725/**
6726 * Sets a bit range within a bitmap.
6727 *
6728 * @param pvBitmap Pointer to the bitmap (little endian).
6729 * @param iBitStart The First bit to set.
6730 * @param iBitEnd The first bit not to set.
6731 */
6732DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6733{
6734 if (iBitStart < iBitEnd)
6735 {
6736 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6737 int32_t iStart = iBitStart & ~31;
6738 int32_t iEnd = iBitEnd & ~31;
6739 if (iStart == iEnd)
6740 *pu32 |= RT_H2LE_U32(((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31));
6741 else
6742 {
6743 /* bits in first dword. */
6744 if (iBitStart & 31)
6745 {
6746 *pu32 |= RT_H2LE_U32(~((UINT32_C(1) << (iBitStart & 31)) - 1));
6747 pu32++;
6748 iBitStart = iStart + 32;
6749 }
6750
6751 /* whole dword. */
6752 if (iBitStart != iEnd)
6753 ASMMemFill32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3, ~UINT32_C(0));
6754
6755 /* bits in last dword. */
6756 if (iBitEnd & 31)
6757 {
6758 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6759 *pu32 |= RT_H2LE_U32((UINT32_C(1) << (iBitEnd & 31)) - 1);
6760 }
6761 }
6762 }
6763}
6764
6765
6766/**
6767 * Finds the first clear bit in a bitmap.
6768 *
6769 * @returns Index of the first zero bit.
6770 * @returns -1 if no clear bit was found.
6771 * @param pvBitmap Pointer to the bitmap (little endian).
6772 * @param cBits The number of bits in the bitmap. Multiple of 32.
6773 */
6774#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6775DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6776#else
6777DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6778{
6779 if (cBits)
6780 {
6781 int32_t iBit;
6782# if RT_INLINE_ASM_GNU_STYLE
6783 RTCCUINTREG uEAX, uECX, uEDI;
6784 cBits = RT_ALIGN_32(cBits, 32);
6785 __asm__ __volatile__("repe; scasl\n\t"
6786 "je 1f\n\t"
6787# ifdef RT_ARCH_AMD64
6788 "lea -4(%%rdi), %%rdi\n\t"
6789 "xorl (%%rdi), %%eax\n\t"
6790 "subq %5, %%rdi\n\t"
6791# else
6792 "lea -4(%%edi), %%edi\n\t"
6793 "xorl (%%edi), %%eax\n\t"
6794 "subl %5, %%edi\n\t"
6795# endif
6796 "shll $3, %%edi\n\t"
6797 "bsfl %%eax, %%edx\n\t"
6798 "addl %%edi, %%edx\n\t"
6799 "1:\t\n"
6800 : "=d" (iBit)
6801 , "=&c" (uECX)
6802 , "=&D" (uEDI)
6803 , "=&a" (uEAX)
6804 : "0" (0xffffffff)
6805 , "mr" (pvBitmap)
6806 , "1" (cBits >> 5)
6807 , "2" (pvBitmap)
6808 , "3" (0xffffffff)
6809 : "cc");
6810# else
6811 cBits = RT_ALIGN_32(cBits, 32);
6812 __asm
6813 {
6814# ifdef RT_ARCH_AMD64
6815 mov rdi, [pvBitmap]
6816 mov rbx, rdi
6817# else
6818 mov edi, [pvBitmap]
6819 mov ebx, edi
6820# endif
6821 mov edx, 0ffffffffh
6822 mov eax, edx
6823 mov ecx, [cBits]
6824 shr ecx, 5
6825 repe scasd
6826 je done
6827
6828# ifdef RT_ARCH_AMD64
6829 lea rdi, [rdi - 4]
6830 xor eax, [rdi]
6831 sub rdi, rbx
6832# else
6833 lea edi, [edi - 4]
6834 xor eax, [edi]
6835 sub edi, ebx
6836# endif
6837 shl edi, 3
6838 bsf edx, eax
6839 add edx, edi
6840 done:
6841 mov [iBit], edx
6842 }
6843# endif
6844 return iBit;
6845 }
6846 return -1;
6847}
6848#endif
6849
6850
6851/**
6852 * Finds the next clear bit in a bitmap.
6853 *
6854 * @returns Index of the first zero bit.
6855 * @returns -1 if no clear bit was found.
6856 * @param pvBitmap Pointer to the bitmap (little endian).
6857 * @param cBits The number of bits in the bitmap. Multiple of 32.
6858 * @param iBitPrev The bit returned from the last search.
6859 * The search will start at iBitPrev + 1.
6860 */
6861#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6862DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6863#else
6864DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6865{
6866 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6867 int iBit = ++iBitPrev & 31;
6868 if (iBit)
6869 {
6870 /*
6871 * Inspect the 32-bit word containing the unaligned bit.
6872 */
6873 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6874
6875# if RT_INLINE_ASM_USES_INTRIN
6876 unsigned long ulBit = 0;
6877 if (_BitScanForward(&ulBit, u32))
6878 return ulBit + iBitPrev;
6879# else
6880# if RT_INLINE_ASM_GNU_STYLE
6881 __asm__ __volatile__("bsf %1, %0\n\t"
6882 "jnz 1f\n\t"
6883 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
6884 "1:\n\t"
6885 : "=r" (iBit)
6886 : "r" (u32)
6887 : "cc");
6888# else
6889 __asm
6890 {
6891 mov edx, [u32]
6892 bsf eax, edx
6893 jnz done
6894 mov eax, 0ffffffffh
6895 done:
6896 mov [iBit], eax
6897 }
6898# endif
6899 if (iBit >= 0)
6900 return iBit + (int)iBitPrev;
6901# endif
6902
6903 /*
6904 * Skip ahead and see if there is anything left to search.
6905 */
6906 iBitPrev |= 31;
6907 iBitPrev++;
6908 if (cBits <= (uint32_t)iBitPrev)
6909 return -1;
6910 }
6911
6912 /*
6913 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6914 */
6915 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6916 if (iBit >= 0)
6917 iBit += iBitPrev;
6918 return iBit;
6919}
6920#endif
6921
6922
6923/**
6924 * Finds the first set bit in a bitmap.
6925 *
6926 * @returns Index of the first set bit.
6927 * @returns -1 if no clear bit was found.
6928 * @param pvBitmap Pointer to the bitmap (little endian).
6929 * @param cBits The number of bits in the bitmap. Multiple of 32.
6930 */
6931#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6932DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6933#else
6934DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6935{
6936 if (cBits)
6937 {
6938 int32_t iBit;
6939# if RT_INLINE_ASM_GNU_STYLE
6940 RTCCUINTREG uEAX, uECX, uEDI;
6941 cBits = RT_ALIGN_32(cBits, 32);
6942 __asm__ __volatile__("repe; scasl\n\t"
6943 "je 1f\n\t"
6944# ifdef RT_ARCH_AMD64
6945 "lea -4(%%rdi), %%rdi\n\t"
6946 "movl (%%rdi), %%eax\n\t"
6947 "subq %5, %%rdi\n\t"
6948# else
6949 "lea -4(%%edi), %%edi\n\t"
6950 "movl (%%edi), %%eax\n\t"
6951 "subl %5, %%edi\n\t"
6952# endif
6953 "shll $3, %%edi\n\t"
6954 "bsfl %%eax, %%edx\n\t"
6955 "addl %%edi, %%edx\n\t"
6956 "1:\t\n"
6957 : "=d" (iBit)
6958 , "=&c" (uECX)
6959 , "=&D" (uEDI)
6960 , "=&a" (uEAX)
6961 : "0" (0xffffffff)
6962 , "mr" (pvBitmap)
6963 , "1" (cBits >> 5)
6964 , "2" (pvBitmap)
6965 , "3" (0)
6966 : "cc");
6967# else
6968 cBits = RT_ALIGN_32(cBits, 32);
6969 __asm
6970 {
6971# ifdef RT_ARCH_AMD64
6972 mov rdi, [pvBitmap]
6973 mov rbx, rdi
6974# else
6975 mov edi, [pvBitmap]
6976 mov ebx, edi
6977# endif
6978 mov edx, 0ffffffffh
6979 xor eax, eax
6980 mov ecx, [cBits]
6981 shr ecx, 5
6982 repe scasd
6983 je done
6984# ifdef RT_ARCH_AMD64
6985 lea rdi, [rdi - 4]
6986 mov eax, [rdi]
6987 sub rdi, rbx
6988# else
6989 lea edi, [edi - 4]
6990 mov eax, [edi]
6991 sub edi, ebx
6992# endif
6993 shl edi, 3
6994 bsf edx, eax
6995 add edx, edi
6996 done:
6997 mov [iBit], edx
6998 }
6999# endif
7000 return iBit;
7001 }
7002 return -1;
7003}
7004#endif
7005
7006
7007/**
7008 * Finds the next set bit in a bitmap.
7009 *
7010 * @returns Index of the next set bit.
7011 * @returns -1 if no set bit was found.
7012 * @param pvBitmap Pointer to the bitmap (little endian).
7013 * @param cBits The number of bits in the bitmap. Multiple of 32.
7014 * @param iBitPrev The bit returned from the last search.
7015 * The search will start at iBitPrev + 1.
7016 */
7017#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
7018DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
7019#else
7020DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
7021{
7022 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
7023 int iBit = ++iBitPrev & 31;
7024 if (iBit)
7025 {
7026 /*
7027 * Inspect the 32-bit word containing the unaligned bit.
7028 */
7029 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
7030
7031# if RT_INLINE_ASM_USES_INTRIN
7032 unsigned long ulBit = 0;
7033 if (_BitScanForward(&ulBit, u32))
7034 return ulBit + iBitPrev;
7035# else
7036# if RT_INLINE_ASM_GNU_STYLE
7037 __asm__ __volatile__("bsf %1, %0\n\t"
7038 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
7039 "movl $-1, %0\n\t"
7040 "1:\n\t"
7041 : "=r" (iBit)
7042 : "r" (u32)
7043 : "cc");
7044# else
7045 __asm
7046 {
7047 mov edx, [u32]
7048 bsf eax, edx
7049 jnz done
7050 mov eax, 0ffffffffh
7051 done:
7052 mov [iBit], eax
7053 }
7054# endif
7055 if (iBit >= 0)
7056 return iBit + (int)iBitPrev;
7057# endif
7058
7059 /*
7060 * Skip ahead and see if there is anything left to search.
7061 */
7062 iBitPrev |= 31;
7063 iBitPrev++;
7064 if (cBits <= (uint32_t)iBitPrev)
7065 return -1;
7066 }
7067
7068 /*
7069 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
7070 */
7071 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
7072 if (iBit >= 0)
7073 iBit += iBitPrev;
7074 return iBit;
7075}
7076#endif
7077
7078
7079/**
7080 * Finds the first bit which is set in the given 32-bit integer.
7081 * Bits are numbered from 1 (least significant) to 32.
7082 *
7083 * @returns index [1..32] of the first set bit.
7084 * @returns 0 if all bits are cleared.
7085 * @param u32 Integer to search for set bits.
7086 * @remarks Similar to ffs() in BSD.
7087 */
7088#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7089RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7090#else
7091DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
7092{
7093# if RT_INLINE_ASM_USES_INTRIN
7094 unsigned long iBit;
7095 if (_BitScanForward(&iBit, u32))
7096 iBit++;
7097 else
7098 iBit = 0;
7099
7100# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7101# if RT_INLINE_ASM_GNU_STYLE
7102 uint32_t iBit;
7103 __asm__ __volatile__("bsf %1, %0\n\t"
7104 "jnz 1f\n\t"
7105 "xorl %0, %0\n\t"
7106 "jmp 2f\n"
7107 "1:\n\t"
7108 "incl %0\n"
7109 "2:\n\t"
7110 : "=r" (iBit)
7111 : "rm" (u32)
7112 : "cc");
7113# else
7114 uint32_t iBit;
7115 _asm
7116 {
7117 bsf eax, [u32]
7118 jnz found
7119 xor eax, eax
7120 jmp done
7121 found:
7122 inc eax
7123 done:
7124 mov [iBit], eax
7125 }
7126# endif
7127
7128# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7129 /*
7130 * Using the "count leading zeros (clz)" instruction here because there
7131 * is no dedicated instruction to get the first set bit.
7132 * Need to reverse the bits in the value with "rbit" first because
7133 * "clz" starts counting from the most significant bit.
7134 */
7135 uint32_t iBit;
7136 __asm__ __volatile__(
7137# if defined(RT_ARCH_ARM64)
7138 "rbit %w[uVal], %w[uVal]\n\t"
7139 "clz %w[iBit], %w[uVal]\n\t"
7140# else
7141 "rbit %[uVal], %[uVal]\n\t"
7142 "clz %[iBit], %[uVal]\n\t"
7143# endif
7144 : [uVal] "=r" (u32)
7145 , [iBit] "=r" (iBit)
7146 : "[uVal]" (u32));
7147 if (iBit != 32)
7148 iBit++;
7149 else
7150 iBit = 0; /* No bit set. */
7151
7152# else
7153# error "Port me"
7154# endif
7155 return iBit;
7156}
7157#endif
7158
7159
7160/**
7161 * Finds the first bit which is set in the given 32-bit integer.
7162 * Bits are numbered from 1 (least significant) to 32.
7163 *
7164 * @returns index [1..32] of the first set bit.
7165 * @returns 0 if all bits are cleared.
7166 * @param i32 Integer to search for set bits.
7167 * @remark Similar to ffs() in BSD.
7168 */
7169DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
7170{
7171 return ASMBitFirstSetU32((uint32_t)i32);
7172}
7173
7174
7175/**
7176 * Finds the first bit which is set in the given 64-bit integer.
7177 *
7178 * Bits are numbered from 1 (least significant) to 64.
7179 *
7180 * @returns index [1..64] of the first set bit.
7181 * @returns 0 if all bits are cleared.
7182 * @param u64 Integer to search for set bits.
7183 * @remarks Similar to ffs() in BSD.
7184 */
7185#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7186RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7187#else
7188DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
7189{
7190# if RT_INLINE_ASM_USES_INTRIN
7191 unsigned long iBit;
7192# if ARCH_BITS == 64
7193 if (_BitScanForward64(&iBit, u64))
7194 iBit++;
7195 else
7196 iBit = 0;
7197# else
7198 if (_BitScanForward(&iBit, (uint32_t)u64))
7199 iBit++;
7200 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
7201 iBit += 33;
7202 else
7203 iBit = 0;
7204# endif
7205
7206# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7207 uint64_t iBit;
7208 __asm__ __volatile__("bsfq %1, %0\n\t"
7209 "jnz 1f\n\t"
7210 "xorl %k0, %k0\n\t"
7211 "jmp 2f\n"
7212 "1:\n\t"
7213 "incl %k0\n"
7214 "2:\n\t"
7215 : "=r" (iBit)
7216 : "rm" (u64)
7217 : "cc");
7218
7219# elif defined(RT_ARCH_ARM64)
7220 uint64_t iBit;
7221 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
7222 "clz %[iBit], %[uVal]\n\t"
7223 : [uVal] "=r" (u64)
7224 , [iBit] "=r" (iBit)
7225 : "[uVal]" (u64));
7226 if (iBit != 64)
7227 iBit++;
7228 else
7229 iBit = 0; /* No bit set. */
7230
7231# else
7232 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
7233 if (!iBit)
7234 {
7235 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
7236 if (iBit)
7237 iBit += 32;
7238 }
7239# endif
7240 return (unsigned)iBit;
7241}
7242#endif
7243
7244
7245/**
7246 * Finds the first bit which is set in the given 16-bit integer.
7247 *
7248 * Bits are numbered from 1 (least significant) to 16.
7249 *
7250 * @returns index [1..16] of the first set bit.
7251 * @returns 0 if all bits are cleared.
7252 * @param u16 Integer to search for set bits.
7253 * @remarks For 16-bit bs3kit code.
7254 */
7255#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7256RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7257#else
7258DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
7259{
7260 return ASMBitFirstSetU32((uint32_t)u16);
7261}
7262#endif
7263
7264
7265/**
7266 * Finds the last bit which is set in the given 32-bit integer.
7267 * Bits are numbered from 1 (least significant) to 32.
7268 *
7269 * @returns index [1..32] of the last set bit.
7270 * @returns 0 if all bits are cleared.
7271 * @param u32 Integer to search for set bits.
7272 * @remark Similar to fls() in BSD.
7273 */
7274#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7275RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7276#else
7277DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
7278{
7279# if RT_INLINE_ASM_USES_INTRIN
7280 unsigned long iBit;
7281 if (_BitScanReverse(&iBit, u32))
7282 iBit++;
7283 else
7284 iBit = 0;
7285
7286# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7287# if RT_INLINE_ASM_GNU_STYLE
7288 uint32_t iBit;
7289 __asm__ __volatile__("bsrl %1, %0\n\t"
7290 "jnz 1f\n\t"
7291 "xorl %0, %0\n\t"
7292 "jmp 2f\n"
7293 "1:\n\t"
7294 "incl %0\n"
7295 "2:\n\t"
7296 : "=r" (iBit)
7297 : "rm" (u32)
7298 : "cc");
7299# else
7300 uint32_t iBit;
7301 _asm
7302 {
7303 bsr eax, [u32]
7304 jnz found
7305 xor eax, eax
7306 jmp done
7307 found:
7308 inc eax
7309 done:
7310 mov [iBit], eax
7311 }
7312# endif
7313
7314# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7315 uint32_t iBit;
7316 __asm__ __volatile__(
7317# if defined(RT_ARCH_ARM64)
7318 "clz %w[iBit], %w[uVal]\n\t"
7319# else
7320 "clz %[iBit], %[uVal]\n\t"
7321# endif
7322 : [iBit] "=r" (iBit)
7323 : [uVal] "r" (u32));
7324 iBit = 32 - iBit;
7325
7326# else
7327# error "Port me"
7328# endif
7329 return iBit;
7330}
7331#endif
7332
7333
7334/**
7335 * Finds the last bit which is set in the given 32-bit integer.
7336 * Bits are numbered from 1 (least significant) to 32.
7337 *
7338 * @returns index [1..32] of the last set bit.
7339 * @returns 0 if all bits are cleared.
7340 * @param i32 Integer to search for set bits.
7341 * @remark Similar to fls() in BSD.
7342 */
7343DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
7344{
7345 return ASMBitLastSetU32((uint32_t)i32);
7346}
7347
7348
7349/**
7350 * Finds the last bit which is set in the given 64-bit integer.
7351 *
7352 * Bits are numbered from 1 (least significant) to 64.
7353 *
7354 * @returns index [1..64] of the last set bit.
7355 * @returns 0 if all bits are cleared.
7356 * @param u64 Integer to search for set bits.
7357 * @remark Similar to fls() in BSD.
7358 */
7359#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7360RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7361#else
7362DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
7363{
7364# if RT_INLINE_ASM_USES_INTRIN
7365 unsigned long iBit;
7366# if ARCH_BITS == 64
7367 if (_BitScanReverse64(&iBit, u64))
7368 iBit++;
7369 else
7370 iBit = 0;
7371# else
7372 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7373 iBit += 33;
7374 else if (_BitScanReverse(&iBit, (uint32_t)u64))
7375 iBit++;
7376 else
7377 iBit = 0;
7378# endif
7379
7380# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7381 uint64_t iBit;
7382 __asm__ __volatile__("bsrq %1, %0\n\t"
7383 "jnz 1f\n\t"
7384 "xorl %k0, %k0\n\t"
7385 "jmp 2f\n"
7386 "1:\n\t"
7387 "incl %k0\n"
7388 "2:\n\t"
7389 : "=r" (iBit)
7390 : "rm" (u64)
7391 : "cc");
7392
7393# elif defined(RT_ARCH_ARM64)
7394 uint64_t iBit;
7395 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7396 : [iBit] "=r" (iBit)
7397 : [uVal] "r" (u64));
7398 iBit = 64 - iBit;
7399
7400# else
7401 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
7402 if (iBit)
7403 iBit += 32;
7404 else
7405 iBit = ASMBitLastSetU32((uint32_t)u64);
7406# endif
7407 return (unsigned)iBit;
7408}
7409#endif
7410
7411
7412/**
7413 * Finds the last bit which is set in the given 16-bit integer.
7414 *
7415 * Bits are numbered from 1 (least significant) to 16.
7416 *
7417 * @returns index [1..16] of the last set bit.
7418 * @returns 0 if all bits are cleared.
7419 * @param u16 Integer to search for set bits.
7420 * @remarks For 16-bit bs3kit code.
7421 */
7422#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7423RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7424#else
7425DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
7426{
7427 return ASMBitLastSetU32((uint32_t)u16);
7428}
7429#endif
7430
7431
7432/**
7433 * Rotate 32-bit unsigned value to the left by @a cShift.
7434 *
7435 * @returns Rotated value.
7436 * @param u32 The value to rotate.
7437 * @param cShift How many bits to rotate by.
7438 */
7439#ifdef __WATCOMC__
7440RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7441#else
7442DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7443{
7444# if RT_INLINE_ASM_USES_INTRIN
7445 return _rotl(u32, cShift);
7446
7447# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7448 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7449 return u32;
7450
7451# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7452 __asm__ __volatile__(
7453# if defined(RT_ARCH_ARM64)
7454 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7455# else
7456 "ror %[uRet], %[uVal], %[cShift]\n\t"
7457# endif
7458 : [uRet] "=r" (u32)
7459 : [uVal] "[uRet]" (u32)
7460 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */
7461 return u32;
7462
7463# else
7464 cShift &= 31;
7465 return (u32 << cShift) | (u32 >> (32 - cShift));
7466# endif
7467}
7468#endif
7469
7470
7471/**
7472 * Rotate 32-bit unsigned value to the right by @a cShift.
7473 *
7474 * @returns Rotated value.
7475 * @param u32 The value to rotate.
7476 * @param cShift How many bits to rotate by.
7477 */
7478#ifdef __WATCOMC__
7479RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7480#else
7481DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7482{
7483# if RT_INLINE_ASM_USES_INTRIN
7484 return _rotr(u32, cShift);
7485
7486# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7487 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7488 return u32;
7489
7490# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7491 __asm__ __volatile__(
7492# if defined(RT_ARCH_ARM64)
7493 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7494# else
7495 "ror %[uRet], %[uVal], %[cShift]\n\t"
7496# endif
7497 : [uRet] "=r" (u32)
7498 : [uVal] "[uRet]" (u32)
7499 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */
7500 return u32;
7501
7502# else
7503 cShift &= 31;
7504 return (u32 >> cShift) | (u32 << (32 - cShift));
7505# endif
7506}
7507#endif
7508
7509
7510/**
7511 * Rotate 64-bit unsigned value to the left by @a cShift.
7512 *
7513 * @returns Rotated value.
7514 * @param u64 The value to rotate.
7515 * @param cShift How many bits to rotate by.
7516 */
7517DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7518{
7519#if RT_INLINE_ASM_USES_INTRIN
7520 return _rotl64(u64, cShift);
7521
7522#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7523 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7524 return u64;
7525
7526#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7527 uint32_t uSpill;
7528 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7529 "jz 1f\n\t"
7530 "xchgl %%eax, %%edx\n\t"
7531 "1:\n\t"
7532 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7533 "jz 2f\n\t"
7534 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7535 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
7536 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
7537 "2:\n\t" /* } */
7538 : "=A" (u64)
7539 , "=c" (cShift)
7540 , "=r" (uSpill)
7541 : "0" (u64)
7542 , "1" (cShift)
7543 : "cc");
7544 return u64;
7545
7546# elif defined(RT_ARCH_ARM64)
7547 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7548 : [uRet] "=r" (u64)
7549 : [uVal] "[uRet]" (u64)
7550 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */
7551 return u64;
7552
7553#else
7554 cShift &= 63;
7555 return (u64 << cShift) | (u64 >> (64 - cShift));
7556#endif
7557}
7558
7559
7560/**
7561 * Rotate 64-bit unsigned value to the right by @a cShift.
7562 *
7563 * @returns Rotated value.
7564 * @param u64 The value to rotate.
7565 * @param cShift How many bits to rotate by.
7566 */
7567DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7568{
7569#if RT_INLINE_ASM_USES_INTRIN
7570 return _rotr64(u64, cShift);
7571
7572#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7573 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7574 return u64;
7575
7576#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7577 uint32_t uSpill;
7578 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7579 "jz 1f\n\t"
7580 "xchgl %%eax, %%edx\n\t"
7581 "1:\n\t"
7582 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7583 "jz 2f\n\t"
7584 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7585 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
7586 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
7587 "2:\n\t" /* } */
7588 : "=A" (u64)
7589 , "=c" (cShift)
7590 , "=r" (uSpill)
7591 : "0" (u64)
7592 , "1" (cShift)
7593 : "cc");
7594 return u64;
7595
7596# elif defined(RT_ARCH_ARM64)
7597 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7598 : [uRet] "=r" (u64)
7599 : [uVal] "[uRet]" (u64)
7600 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */
7601 return u64;
7602
7603#else
7604 cShift &= 63;
7605 return (u64 >> cShift) | (u64 << (64 - cShift));
7606#endif
7607}
7608
7609/** @} */
7610
7611
7612/** @} */
7613
7614/*
7615 * Include #pragma aux definitions for Watcom C/C++.
7616 */
7617#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
7618# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
7619# undef IPRT_INCLUDED_asm_watcom_x86_16_h
7620# include "asm-watcom-x86-16.h"
7621#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
7622# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
7623# undef IPRT_INCLUDED_asm_watcom_x86_32_h
7624# include "asm-watcom-x86-32.h"
7625#endif
7626
7627#endif /* !IPRT_INCLUDED_asm_h */
7628
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette