VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 89236

Last change on this file since 89236 was 87402, checked in by vboxsync, 4 years ago

iprt/cdefs.h,asm*.h,hmvmxinline.h: Changed the RT_INLINE_ASM_USES_INTRIN value to RT_MSC_VER_XXX to make it more readable.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 230.4 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2020 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46/* Emit the intrinsics at all optimization levels. */
47# include <iprt/sanitized/intrin.h>
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(_BitScanForward)
54# pragma intrinsic(_BitScanReverse)
55# pragma intrinsic(_bittest)
56# pragma intrinsic(_bittestandset)
57# pragma intrinsic(_bittestandreset)
58# pragma intrinsic(_bittestandcomplement)
59# pragma intrinsic(_byteswap_ushort)
60# pragma intrinsic(_byteswap_ulong)
61# pragma intrinsic(_interlockedbittestandset)
62# pragma intrinsic(_interlockedbittestandreset)
63# pragma intrinsic(_InterlockedAnd)
64# pragma intrinsic(_InterlockedOr)
65# pragma intrinsic(_InterlockedXor)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# pragma intrinsic(_InterlockedExchangeAdd64)
81# pragma intrinsic(_InterlockedAnd64)
82# pragma intrinsic(_InterlockedOr64)
83# pragma intrinsic(_InterlockedIncrement64)
84# pragma intrinsic(_InterlockedDecrement64)
85# endif
86#endif
87
88/*
89 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
90 */
91#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
92# include "asm-watcom-x86-16.h"
93#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
94# include "asm-watcom-x86-32.h"
95#endif
96
97
98/** @defgroup grp_rt_asm ASM - Assembly Routines
99 * @ingroup grp_rt
100 *
101 * @remarks The difference between ordered and unordered atomic operations are
102 * that the former will complete outstanding reads and writes before
103 * continuing while the latter doesn't make any promises about the
104 * order. Ordered operations doesn't, it seems, make any 100% promise
105 * wrt to whether the operation will complete before any subsequent
106 * memory access. (please, correct if wrong.)
107 *
108 * ASMAtomicSomething operations are all ordered, while
109 * ASMAtomicUoSomething are unordered (note the Uo).
110 *
111 * Please note that ordered operations does not necessarily imply a
112 * compiler (memory) barrier. The user has to use the
113 * ASMCompilerBarrier() macro when that is deemed necessary.
114 *
115 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
116 * to reorder or even optimize assembler instructions away. For
117 * instance, in the following code the second rdmsr instruction is
118 * optimized away because gcc treats that instruction as deterministic:
119 *
120 * @code
121 * static inline uint64_t rdmsr_low(int idx)
122 * {
123 * uint32_t low;
124 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
125 * }
126 * ...
127 * uint32_t msr1 = rdmsr_low(1);
128 * foo(msr1);
129 * msr1 = rdmsr_low(1);
130 * bar(msr1);
131 * @endcode
132 *
133 * The input parameter of rdmsr_low is the same for both calls and
134 * therefore gcc will use the result of the first call as input
135 * parameter for bar() as well. For rdmsr this is not acceptable as
136 * this instruction is _not_ deterministic. This applies to reading
137 * machine status information in general.
138 *
139 * @{
140 */
141
142
143/** @def RT_INLINE_ASM_GCC_4_3_X_X86
144 * Used to work around some 4.3.x register allocation issues in this version of
145 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
146 * definitely not for 5.x */
147#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
148# define RT_INLINE_ASM_GCC_4_3_X_X86 1
149#else
150# define RT_INLINE_ASM_GCC_4_3_X_X86 0
151#endif
152
153/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
154 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
155 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
156 * mode, x86.
157 *
158 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
159 * when in PIC mode on x86.
160 */
161#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
162# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
163# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
164# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
165# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
166# elif ( (defined(PIC) || defined(__PIC__)) \
167 && defined(RT_ARCH_X86) \
168 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
169 || defined(RT_OS_DARWIN)) )
170# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
171# else
172# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
173# endif
174#endif
175
176
177/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
178 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
179#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
180# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
181#else
182# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
183#endif
184
185/*
186 * ARM is great fun.
187 */
188#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
189
190# define RTASM_ARM_NO_BARRIER
191# ifdef RT_ARCH_ARM64
192# define RTASM_ARM_NO_BARRIER_IN_REG
193# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
194# define RTASM_ARM_DSB_SY "dsb sy\n\t"
195# define RTASM_ARM_DSB_SY_IN_REG
196# define RTASM_ARM_DSB_SY_COMMA_IN_REG
197# define RTASM_ARM_DMB_SY "dmb sy\n\t"
198# define RTASM_ARM_DMB_SY_IN_REG
199# define RTASM_ARM_DMB_SY_COMMA_IN_REG
200# define RTASM_ARM_DMB_ST "dmb st\n\t"
201# define RTASM_ARM_DMB_ST_IN_REG
202# define RTASM_ARM_DMB_ST_COMMA_IN_REG
203# define RTASM_ARM_DMB_LD "dmb ld\n\t"
204# define RTASM_ARM_DMB_LD_IN_REG
205# define RTASM_ARM_DMB_LD_COMMA_IN_REG
206# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
207# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
208 uint32_t rcSpill; \
209 uint32_t u32NewRet; \
210 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
211 RTASM_ARM_##barrier_type /* before lable? */ \
212 "ldaxr %w[uNew], %[pMem]\n\t" \
213 modify64 \
214 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
215 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
216 : [pMem] "+m" (*a_pu32Mem) \
217 , [uNew] "=&r" (u32NewRet) \
218 , [rc] "=&r" (rcSpill) \
219 : in_reg \
220 : "cc")
221# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
222 uint32_t rcSpill; \
223 uint32_t u32OldRet; \
224 uint32_t u32NewSpill; \
225 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
226 RTASM_ARM_##barrier_type /* before lable? */ \
227 "ldaxr %w[uOld], %[pMem]\n\t" \
228 modify64 \
229 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
230 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
231 : [pMem] "+m" (*a_pu32Mem) \
232 , [uOld] "=&r" (u32OldRet) \
233 , [uNew] "=&r" (u32NewSpill) \
234 , [rc] "=&r" (rcSpill) \
235 : in_reg \
236 : "cc")
237# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
238 uint32_t rcSpill; \
239 uint64_t u64NewRet; \
240 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
241 RTASM_ARM_##barrier_type /* before lable? */ \
242 "ldaxr %[uNew], %[pMem]\n\t" \
243 modify64 \
244 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
245 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
246 : [pMem] "+m" (*a_pu64Mem) \
247 , [uNew] "=&r" (u64NewRet) \
248 , [rc] "=&r" (rcSpill) \
249 : in_reg \
250 : "cc")
251# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
252 uint32_t rcSpill; \
253 uint64_t u64OldRet; \
254 uint64_t u64NewSpill; \
255 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
256 RTASM_ARM_##barrier_type /* before lable? */ \
257 "ldaxr %[uOld], %[pMem]\n\t" \
258 modify64 \
259 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
260 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
261 : [pMem] "+m" (*a_pu64Mem) \
262 , [uOld] "=&r" (u64OldRet) \
263 , [uNew] "=&r" (u64NewSpill) \
264 , [rc] "=&r" (rcSpill) \
265 : in_reg \
266 : "cc")
267
268# else /* RT_ARCH_ARM32 */
269# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
270# if RT_ARCH_ARM32 >= 7
271# warning armv7
272# define RTASM_ARM_NO_BARRIER_IN_REG
273# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
274# define RTASM_ARM_DSB_SY "dsb sy\n\t"
275# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
276# define RTASM_ARM_DMB_SY "dmb sy\n\t"
277# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
278# define RTASM_ARM_DMB_ST "dmb st\n\t"
279# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
280# define RTASM_ARM_DMB_LD "dmb ld\n\t"
281# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
282
283# elif RT_ARCH_ARM32 >= 6
284# warning armv6
285# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
286# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
287# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
288# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
289# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
290# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
291# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
292# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
293# elif RT_ARCH_ARM32 >= 4
294# warning armv5 or older
295# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
296# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
297# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
298# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
299# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
300# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
301# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
302# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
303# else
304# error "huh? Odd RT_ARCH_ARM32 value!"
305# endif
306# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
307# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
308# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
309# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
310# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
311 uint32_t rcSpill; \
312 uint32_t u32NewRet; \
313 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
314 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
315 "ldrex %[uNew], %[pMem]\n\t" \
316 modify32 \
317 "strex %[rc], %[uNew], %[pMem]\n\t" \
318 "cmp %[rc], #0\n\t" \
319 "bne .Ltry_again_" #name "_%=\n\t" \
320 : [pMem] "+m" (*a_pu32Mem) \
321 , [uNew] "=&r" (u32NewRet) \
322 , [rc] "=&r" (rcSpill) \
323 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
324 , in_reg \
325 : "cc")
326# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
327 uint32_t rcSpill; \
328 uint32_t u32OldRet; \
329 uint32_t u32NewSpill; \
330 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
331 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
332 "ldrex %[uOld], %[pMem]\n\t" \
333 modify32 \
334 "strex %[rc], %[uNew], %[pMem]\n\t" \
335 "cmp %[rc], #0\n\t" \
336 "bne .Ltry_again_" #name "_%=\n\t" \
337 : [pMem] "+m" (*a_pu32Mem) \
338 , [uOld] "=&r" (u32OldRet) \
339 , [uNew] "=&r" (u32NewSpill) \
340 , [rc] "=&r" (rcSpill) \
341 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
342 , in_reg \
343 : "cc")
344# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
345 uint32_t rcSpill; \
346 uint64_t u64NewRet; \
347 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
348 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
349 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
350 modify32 \
351 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
352 "cmp %[rc], #0\n\t" \
353 "bne .Ltry_again_" #name "_%=\n\t" \
354 : [pMem] "+m" (*a_pu64Mem), \
355 [uNew] "=&r" (u64NewRet), \
356 [rc] "=&r" (rcSpill) \
357 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
358 , in_reg \
359 : "cc")
360# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
361 uint32_t rcSpill; \
362 uint64_t u64OldRet; \
363 uint64_t u64NewSpill; \
364 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
365 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
366 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
367 modify32 \
368 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
369 "cmp %[rc], #0\n\t" \
370 "bne .Ltry_again_" #name "_%=\n\t" \
371 : [pMem] "+m" (*a_pu64Mem), \
372 [uOld] "=&r" (u64OldRet), \
373 [uNew] "=&r" (u64NewSpill), \
374 [rc] "=&r" (rcSpill) \
375 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
376 , in_reg \
377 : "cc")
378# endif /* RT_ARCH_ARM32 */
379#endif
380
381
382/** @def ASMReturnAddress
383 * Gets the return address of the current (or calling if you like) function or method.
384 */
385#ifdef _MSC_VER
386# ifdef __cplusplus
387extern "C"
388# endif
389void * _ReturnAddress(void);
390# pragma intrinsic(_ReturnAddress)
391# define ASMReturnAddress() _ReturnAddress()
392#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
393# define ASMReturnAddress() __builtin_return_address(0)
394#elif defined(__WATCOMC__)
395# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
396#else
397# error "Unsupported compiler."
398#endif
399
400
401/**
402 * Compiler memory barrier.
403 *
404 * Ensure that the compiler does not use any cached (register/tmp stack) memory
405 * values or any outstanding writes when returning from this function.
406 *
407 * This function must be used if non-volatile data is modified by a
408 * device or the VMM. Typical cases are port access, MMIO access,
409 * trapping instruction, etc.
410 */
411#if RT_INLINE_ASM_GNU_STYLE
412# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
413#elif RT_INLINE_ASM_USES_INTRIN
414# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
415#elif defined(__WATCOMC__)
416void ASMCompilerBarrier(void);
417#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
418DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
419{
420 __asm
421 {
422 }
423}
424#endif
425
426
427/** @def ASMBreakpoint
428 * Debugger Breakpoint.
429 * @deprecated Use RT_BREAKPOINT instead.
430 * @internal
431 */
432#define ASMBreakpoint() RT_BREAKPOINT()
433
434
435/**
436 * Spinloop hint for platforms that have these, empty function on the other
437 * platforms.
438 *
439 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
440 * spin locks.
441 */
442#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
443RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
444#else
445DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
446{
447# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
448# if RT_INLINE_ASM_GNU_STYLE
449 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
450# else
451 __asm {
452 _emit 0f3h
453 _emit 090h
454 }
455# endif
456
457# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
458 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
459
460# else
461 /* dummy */
462# endif
463}
464#endif
465
466
467/**
468 * Atomically Exchange an unsigned 8-bit value, ordered.
469 *
470 * @returns Current *pu8 value
471 * @param pu8 Pointer to the 8-bit variable to update.
472 * @param u8 The 8-bit value to assign to *pu8.
473 */
474#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
475RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
476#else
477DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
478{
479# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
480# if RT_INLINE_ASM_GNU_STYLE
481 __asm__ __volatile__("xchgb %0, %1\n\t"
482 : "=m" (*pu8)
483 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
484 : "1" (u8)
485 , "m" (*pu8));
486# else
487 __asm
488 {
489# ifdef RT_ARCH_AMD64
490 mov rdx, [pu8]
491 mov al, [u8]
492 xchg [rdx], al
493 mov [u8], al
494# else
495 mov edx, [pu8]
496 mov al, [u8]
497 xchg [edx], al
498 mov [u8], al
499# endif
500 }
501# endif
502 return u8;
503
504# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
505 uint32_t uOld;
506 uint32_t rcSpill;
507 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU8_%=:\n\t"
508 RTASM_ARM_DMB_SY
509# if defined(RT_ARCH_ARM64)
510 "ldaxrb %w[uOld], %[pMem]\n\t"
511 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
512 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU8_%=\n\t"
513# else
514 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
515 "strexb %[rc], %[uNew], %[pMem]\n\t"
516 "cmp %[rc], #0\n\t"
517 "bne .Ltry_again_ASMAtomicXchgU8_%=\n\t"
518# endif
519 : [pMem] "+m" (*pu8)
520 , [uOld] "=&r" (uOld)
521 , [rc] "=&r" (rcSpill)
522 : [uNew] "r" ((uint32_t)u8)
523 RTASM_ARM_DMB_SY_COMMA_IN_REG
524 : "cc");
525 return (uint8_t)uOld;
526
527# else
528# error "Port me"
529# endif
530}
531#endif
532
533
534/**
535 * Atomically Exchange a signed 8-bit value, ordered.
536 *
537 * @returns Current *pu8 value
538 * @param pi8 Pointer to the 8-bit variable to update.
539 * @param i8 The 8-bit value to assign to *pi8.
540 */
541DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
542{
543 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
544}
545
546
547/**
548 * Atomically Exchange a bool value, ordered.
549 *
550 * @returns Current *pf value
551 * @param pf Pointer to the 8-bit variable to update.
552 * @param f The 8-bit value to assign to *pi8.
553 */
554DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
555{
556#ifdef _MSC_VER
557 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
558#else
559 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
560#endif
561}
562
563
564/**
565 * Atomically Exchange an unsigned 16-bit value, ordered.
566 *
567 * @returns Current *pu16 value
568 * @param pu16 Pointer to the 16-bit variable to update.
569 * @param u16 The 16-bit value to assign to *pu16.
570 */
571#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
572RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
573#else
574DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
575{
576# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
577# if RT_INLINE_ASM_GNU_STYLE
578 __asm__ __volatile__("xchgw %0, %1\n\t"
579 : "=m" (*pu16)
580 , "=r" (u16)
581 : "1" (u16)
582 , "m" (*pu16));
583# else
584 __asm
585 {
586# ifdef RT_ARCH_AMD64
587 mov rdx, [pu16]
588 mov ax, [u16]
589 xchg [rdx], ax
590 mov [u16], ax
591# else
592 mov edx, [pu16]
593 mov ax, [u16]
594 xchg [edx], ax
595 mov [u16], ax
596# endif
597 }
598# endif
599 return u16;
600
601# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
602 uint32_t uOld;
603 uint32_t rcSpill;
604 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU16_%=:\n\t"
605 RTASM_ARM_DMB_SY
606# if defined(RT_ARCH_ARM64)
607 "ldaxrh %w[uOld], %[pMem]\n\t"
608 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
609 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU16_%=\n\t"
610# else
611 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
612 "strexh %[rc], %[uNew], %[pMem]\n\t"
613 "cmp %[rc], #0\n\t"
614 "bne .Ltry_again_ASMAtomicXchgU16_%=\n\t"
615# endif
616 : [pMem] "+m" (*pu16)
617 , [uOld] "=&r" (uOld)
618 , [rc] "=&r" (rcSpill)
619 : [uNew] "r" ((uint32_t)u16)
620 RTASM_ARM_DMB_SY_COMMA_IN_REG
621 : "cc");
622 return (uint16_t)uOld;
623
624# else
625# error "Port me"
626# endif
627}
628#endif
629
630
631/**
632 * Atomically Exchange a signed 16-bit value, ordered.
633 *
634 * @returns Current *pu16 value
635 * @param pi16 Pointer to the 16-bit variable to update.
636 * @param i16 The 16-bit value to assign to *pi16.
637 */
638DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
639{
640 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
641}
642
643
644/**
645 * Atomically Exchange an unsigned 32-bit value, ordered.
646 *
647 * @returns Current *pu32 value
648 * @param pu32 Pointer to the 32-bit variable to update.
649 * @param u32 The 32-bit value to assign to *pu32.
650 *
651 * @remarks Does not work on 286 and earlier.
652 */
653#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
654RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
655#else
656DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
657{
658# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
659# if RT_INLINE_ASM_GNU_STYLE
660 __asm__ __volatile__("xchgl %0, %1\n\t"
661 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
662 , "=r" (u32)
663 : "1" (u32)
664 , "m" (*pu32));
665
666# elif RT_INLINE_ASM_USES_INTRIN
667 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
668
669# else
670 __asm
671 {
672# ifdef RT_ARCH_AMD64
673 mov rdx, [pu32]
674 mov eax, u32
675 xchg [rdx], eax
676 mov [u32], eax
677# else
678 mov edx, [pu32]
679 mov eax, u32
680 xchg [edx], eax
681 mov [u32], eax
682# endif
683 }
684# endif
685 return u32;
686
687# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
688 uint32_t uOld;
689 uint32_t rcSpill;
690 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU32_%=:\n\t"
691 RTASM_ARM_DMB_SY
692# if defined(RT_ARCH_ARM64)
693 "ldaxr %w[uOld], %[pMem]\n\t"
694 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
695 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU32_%=\n\t"
696# else
697 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
698 "strex %[rc], %[uNew], %[pMem]\n\t"
699 "cmp %[rc], #0\n\t"
700 "bne .Ltry_again_ASMAtomicXchgU32_%=\n\t"
701# endif
702 : [pMem] "+m" (*pu32)
703 , [uOld] "=&r" (uOld)
704 , [rc] "=&r" (rcSpill)
705 : [uNew] "r" (u32)
706 RTASM_ARM_DMB_SY_COMMA_IN_REG
707 : "cc");
708 return uOld;
709
710# else
711# error "Port me"
712# endif
713}
714#endif
715
716
717/**
718 * Atomically Exchange a signed 32-bit value, ordered.
719 *
720 * @returns Current *pu32 value
721 * @param pi32 Pointer to the 32-bit variable to update.
722 * @param i32 The 32-bit value to assign to *pi32.
723 */
724DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
725{
726 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
727}
728
729
730/**
731 * Atomically Exchange an unsigned 64-bit value, ordered.
732 *
733 * @returns Current *pu64 value
734 * @param pu64 Pointer to the 64-bit variable to update.
735 * @param u64 The 64-bit value to assign to *pu64.
736 *
737 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
738 */
739#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
740 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
741RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
742#else
743DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
744{
745# if defined(RT_ARCH_AMD64)
746# if RT_INLINE_ASM_USES_INTRIN
747 return _InterlockedExchange64((__int64 *)pu64, u64);
748
749# elif RT_INLINE_ASM_GNU_STYLE
750 __asm__ __volatile__("xchgq %0, %1\n\t"
751 : "=m" (*pu64)
752 , "=r" (u64)
753 : "1" (u64)
754 , "m" (*pu64));
755 return u64;
756# else
757 __asm
758 {
759 mov rdx, [pu64]
760 mov rax, [u64]
761 xchg [rdx], rax
762 mov [u64], rax
763 }
764 return u64;
765# endif
766
767# elif defined(RT_ARCH_X86)
768# if RT_INLINE_ASM_GNU_STYLE
769# if defined(PIC) || defined(__PIC__)
770 uint32_t u32EBX = (uint32_t)u64;
771 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
772 "xchgl %%ebx, %3\n\t"
773 "1:\n\t"
774 "lock; cmpxchg8b (%5)\n\t"
775 "jnz 1b\n\t"
776 "movl %3, %%ebx\n\t"
777 /*"xchgl %%esi, %5\n\t"*/
778 : "=A" (u64)
779 , "=m" (*pu64)
780 : "0" (*pu64)
781 , "m" ( u32EBX )
782 , "c" ( (uint32_t)(u64 >> 32) )
783 , "S" (pu64)
784 : "cc");
785# else /* !PIC */
786 __asm__ __volatile__("1:\n\t"
787 "lock; cmpxchg8b %1\n\t"
788 "jnz 1b\n\t"
789 : "=A" (u64)
790 , "=m" (*pu64)
791 : "0" (*pu64)
792 , "b" ( (uint32_t)u64 )
793 , "c" ( (uint32_t)(u64 >> 32) )
794 : "cc");
795# endif
796# else
797 __asm
798 {
799 mov ebx, dword ptr [u64]
800 mov ecx, dword ptr [u64 + 4]
801 mov edi, pu64
802 mov eax, dword ptr [edi]
803 mov edx, dword ptr [edi + 4]
804 retry:
805 lock cmpxchg8b [edi]
806 jnz retry
807 mov dword ptr [u64], eax
808 mov dword ptr [u64 + 4], edx
809 }
810# endif
811 return u64;
812
813# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
814 uint32_t rcSpill;
815 uint64_t uOld;
816 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU64_%=:\n\t"
817 RTASM_ARM_DMB_SY
818# if defined(RT_ARCH_ARM64)
819 "ldaxr %[uOld], %[pMem]\n\t"
820 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
821 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU64_%=\n\t"
822# else
823 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
824 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
825 "cmp %[rc], #0\n\t"
826 "bne .Ltry_again_ASMAtomicXchgU64_%=\n\t"
827# endif
828 : [pMem] "+m" (*pu64)
829 , [uOld] "=&r" (uOld)
830 , [rc] "=&r" (rcSpill)
831 : [uNew] "r" (u64)
832 RTASM_ARM_DMB_SY_COMMA_IN_REG
833 : "cc");
834 return uOld;
835
836# else
837# error "Port me"
838# endif
839}
840#endif
841
842
843/**
844 * Atomically Exchange an signed 64-bit value, ordered.
845 *
846 * @returns Current *pi64 value
847 * @param pi64 Pointer to the 64-bit variable to update.
848 * @param i64 The 64-bit value to assign to *pi64.
849 */
850DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
851{
852 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
853}
854
855
856/**
857 * Atomically Exchange a size_t value, ordered.
858 *
859 * @returns Current *ppv value
860 * @param puDst Pointer to the size_t variable to update.
861 * @param uNew The new value to assign to *puDst.
862 */
863DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
864{
865#if ARCH_BITS == 16
866 AssertCompile(sizeof(size_t) == 2);
867 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
868#elif ARCH_BITS == 32
869 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
870#elif ARCH_BITS == 64
871 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
872#else
873# error "ARCH_BITS is bogus"
874#endif
875}
876
877
878/**
879 * Atomically Exchange a pointer value, ordered.
880 *
881 * @returns Current *ppv value
882 * @param ppv Pointer to the pointer variable to update.
883 * @param pv The pointer value to assign to *ppv.
884 */
885DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
886{
887#if ARCH_BITS == 32 || ARCH_BITS == 16
888 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
889#elif ARCH_BITS == 64
890 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
891#else
892# error "ARCH_BITS is bogus"
893#endif
894}
895
896
897/**
898 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
899 *
900 * @returns Current *pv value
901 * @param ppv Pointer to the pointer variable to update.
902 * @param pv The pointer value to assign to *ppv.
903 * @param Type The type of *ppv, sans volatile.
904 */
905#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
906# define ASMAtomicXchgPtrT(ppv, pv, Type) \
907 __extension__ \
908 ({\
909 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
910 Type const pvTypeChecked = (pv); \
911 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
912 pvTypeCheckedRet; \
913 })
914#else
915# define ASMAtomicXchgPtrT(ppv, pv, Type) \
916 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
917#endif
918
919
920/**
921 * Atomically Exchange a raw-mode context pointer value, ordered.
922 *
923 * @returns Current *ppv value
924 * @param ppvRC Pointer to the pointer variable to update.
925 * @param pvRC The pointer value to assign to *ppv.
926 */
927DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
928{
929 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
930}
931
932
933/**
934 * Atomically Exchange a ring-0 pointer value, ordered.
935 *
936 * @returns Current *ppv value
937 * @param ppvR0 Pointer to the pointer variable to update.
938 * @param pvR0 The pointer value to assign to *ppv.
939 */
940DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
941{
942#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
943 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
944#elif R0_ARCH_BITS == 64
945 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
946#else
947# error "R0_ARCH_BITS is bogus"
948#endif
949}
950
951
952/**
953 * Atomically Exchange a ring-3 pointer value, ordered.
954 *
955 * @returns Current *ppv value
956 * @param ppvR3 Pointer to the pointer variable to update.
957 * @param pvR3 The pointer value to assign to *ppv.
958 */
959DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
960{
961#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
962 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
963#elif R3_ARCH_BITS == 64
964 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
965#else
966# error "R3_ARCH_BITS is bogus"
967#endif
968}
969
970
971/** @def ASMAtomicXchgHandle
972 * Atomically Exchange a typical IPRT handle value, ordered.
973 *
974 * @param ph Pointer to the value to update.
975 * @param hNew The new value to assigned to *pu.
976 * @param phRes Where to store the current *ph value.
977 *
978 * @remarks This doesn't currently work for all handles (like RTFILE).
979 */
980#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
981# define ASMAtomicXchgHandle(ph, hNew, phRes) \
982 do { \
983 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
984 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
985 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
986 } while (0)
987#elif HC_ARCH_BITS == 64
988# define ASMAtomicXchgHandle(ph, hNew, phRes) \
989 do { \
990 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
991 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
992 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
993 } while (0)
994#else
995# error HC_ARCH_BITS
996#endif
997
998
999/**
1000 * Atomically Exchange a value which size might differ
1001 * between platforms or compilers, ordered.
1002 *
1003 * @param pu Pointer to the variable to update.
1004 * @param uNew The value to assign to *pu.
1005 * @todo This is busted as its missing the result argument.
1006 */
1007#define ASMAtomicXchgSize(pu, uNew) \
1008 do { \
1009 switch (sizeof(*(pu))) { \
1010 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1011 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1012 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1013 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1014 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1015 } \
1016 } while (0)
1017
1018/**
1019 * Atomically Exchange a value which size might differ
1020 * between platforms or compilers, ordered.
1021 *
1022 * @param pu Pointer to the variable to update.
1023 * @param uNew The value to assign to *pu.
1024 * @param puRes Where to store the current *pu value.
1025 */
1026#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1027 do { \
1028 switch (sizeof(*(pu))) { \
1029 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1030 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1031 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1032 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1033 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1034 } \
1035 } while (0)
1036
1037
1038
1039/**
1040 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1041 *
1042 * @returns true if xchg was done.
1043 * @returns false if xchg wasn't done.
1044 *
1045 * @param pu8 Pointer to the value to update.
1046 * @param u8New The new value to assigned to *pu8.
1047 * @param u8Old The old value to *pu8 compare with.
1048 *
1049 * @remarks x86: Requires a 486 or later.
1050 * @todo Rename ASMAtomicCmpWriteU8
1051 */
1052#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1053RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1054#else
1055DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1056{
1057# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1058 uint8_t u8Ret;
1059 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1060 "setz %1\n\t"
1061 : "=m" (*pu8)
1062 , "=qm" (u8Ret)
1063 , "=a" (u8Old)
1064 : "q" (u8New)
1065 , "2" (u8Old)
1066 , "m" (*pu8)
1067 : "cc");
1068 return (bool)u8Ret;
1069
1070# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1071 union { uint32_t u; bool f; } fXchg;
1072 uint32_t u32Spill;
1073 uint32_t rcSpill;
1074 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1075 RTASM_ARM_DMB_SY
1076# if defined(RT_ARCH_ARM64)
1077 "ldaxrb %w[uOld], %[pMem]\n\t"
1078 "cmp %w[uOld], %w[uCmp]\n\t"
1079 "bne 1f\n\t" /* stop here if not equal */
1080 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1081 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1082 "mov %w[fXchg], #1\n\t"
1083# else
1084 "ldrexb %[uOld], %[pMem]\n\t"
1085 "teq %[uOld], %[uCmp]\n\t"
1086 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1087 "bne 1f\n\t" /* stop here if not equal */
1088 "cmp %[rc], #0\n\t"
1089 "bne .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1090 "mov %[fXchg], #1\n\t"
1091# endif
1092 "1:\n\t"
1093 : [pMem] "+m" (*pu8)
1094 , [uOld] "=&r" (u32Spill)
1095 , [rc] "=&r" (rcSpill)
1096 , [fXchg] "=&r" (fXchg.u)
1097 : [uCmp] "r" ((uint32_t)u8Old)
1098 , [uNew] "r" ((uint32_t)u8New)
1099 , "[fXchg]" (0)
1100 RTASM_ARM_DMB_SY_COMMA_IN_REG
1101 : "cc");
1102 return fXchg.f;
1103
1104# else
1105# error "Port me"
1106# endif
1107}
1108#endif
1109
1110
1111/**
1112 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1113 *
1114 * @returns true if xchg was done.
1115 * @returns false if xchg wasn't done.
1116 *
1117 * @param pi8 Pointer to the value to update.
1118 * @param i8New The new value to assigned to *pi8.
1119 * @param i8Old The old value to *pi8 compare with.
1120 *
1121 * @remarks x86: Requires a 486 or later.
1122 * @todo Rename ASMAtomicCmpWriteS8
1123 */
1124DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1125{
1126 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1127}
1128
1129
1130/**
1131 * Atomically Compare and Exchange a bool value, ordered.
1132 *
1133 * @returns true if xchg was done.
1134 * @returns false if xchg wasn't done.
1135 *
1136 * @param pf Pointer to the value to update.
1137 * @param fNew The new value to assigned to *pf.
1138 * @param fOld The old value to *pf compare with.
1139 *
1140 * @remarks x86: Requires a 486 or later.
1141 * @todo Rename ASMAtomicCmpWriteBool
1142 */
1143DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1144{
1145 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1146}
1147
1148
1149/**
1150 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1151 *
1152 * @returns true if xchg was done.
1153 * @returns false if xchg wasn't done.
1154 *
1155 * @param pu32 Pointer to the value to update.
1156 * @param u32New The new value to assigned to *pu32.
1157 * @param u32Old The old value to *pu32 compare with.
1158 *
1159 * @remarks x86: Requires a 486 or later.
1160 * @todo Rename ASMAtomicCmpWriteU32
1161 */
1162#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1163RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1164#else
1165DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1166{
1167# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1168# if RT_INLINE_ASM_GNU_STYLE
1169 uint8_t u8Ret;
1170 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1171 "setz %1\n\t"
1172 : "=m" (*pu32)
1173 , "=qm" (u8Ret)
1174 , "=a" (u32Old)
1175 : "r" (u32New)
1176 , "2" (u32Old)
1177 , "m" (*pu32)
1178 : "cc");
1179 return (bool)u8Ret;
1180
1181# elif RT_INLINE_ASM_USES_INTRIN
1182 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1183
1184# else
1185 uint32_t u32Ret;
1186 __asm
1187 {
1188# ifdef RT_ARCH_AMD64
1189 mov rdx, [pu32]
1190# else
1191 mov edx, [pu32]
1192# endif
1193 mov eax, [u32Old]
1194 mov ecx, [u32New]
1195# ifdef RT_ARCH_AMD64
1196 lock cmpxchg [rdx], ecx
1197# else
1198 lock cmpxchg [edx], ecx
1199# endif
1200 setz al
1201 movzx eax, al
1202 mov [u32Ret], eax
1203 }
1204 return !!u32Ret;
1205# endif
1206
1207# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1208 union { uint32_t u; bool f; } fXchg;
1209 uint32_t u32Spill;
1210 uint32_t rcSpill;
1211 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1212 RTASM_ARM_DMB_SY
1213# if defined(RT_ARCH_ARM64)
1214 "ldaxr %w[uOld], %[pMem]\n\t"
1215 "cmp %w[uOld], %w[uCmp]\n\t"
1216 "bne 1f\n\t" /* stop here if not equal */
1217 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1218 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1219 "mov %w[fXchg], #1\n\t"
1220# else
1221 "ldrex %[uOld], %[pMem]\n\t"
1222 "teq %[uOld], %[uCmp]\n\t"
1223 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1224 "bne 1f\n\t" /* stop here if not equal */
1225 "cmp %[rc], #0\n\t"
1226 "bne .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1227 "mov %[fXchg], #1\n\t"
1228# endif
1229 "1:\n\t"
1230 : [pMem] "+m" (*pu32)
1231 , [uOld] "=&r" (u32Spill)
1232 , [rc] "=&r" (rcSpill)
1233 , [fXchg] "=&r" (fXchg.u)
1234 : [uCmp] "r" (u32Old)
1235 , [uNew] "r" (u32New)
1236 , "[fXchg]" (0)
1237 RTASM_ARM_DMB_SY_COMMA_IN_REG
1238 : "cc");
1239 return fXchg.f;
1240
1241# else
1242# error "Port me"
1243# endif
1244}
1245#endif
1246
1247
1248/**
1249 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1250 *
1251 * @returns true if xchg was done.
1252 * @returns false if xchg wasn't done.
1253 *
1254 * @param pi32 Pointer to the value to update.
1255 * @param i32New The new value to assigned to *pi32.
1256 * @param i32Old The old value to *pi32 compare with.
1257 *
1258 * @remarks x86: Requires a 486 or later.
1259 * @todo Rename ASMAtomicCmpWriteS32
1260 */
1261DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1262{
1263 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1264}
1265
1266
1267/**
1268 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1269 *
1270 * @returns true if xchg was done.
1271 * @returns false if xchg wasn't done.
1272 *
1273 * @param pu64 Pointer to the 64-bit variable to update.
1274 * @param u64New The 64-bit value to assign to *pu64.
1275 * @param u64Old The value to compare with.
1276 *
1277 * @remarks x86: Requires a Pentium or later.
1278 * @todo Rename ASMAtomicCmpWriteU64
1279 */
1280#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1281 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1282RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1283#else
1284DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1285{
1286# if RT_INLINE_ASM_USES_INTRIN
1287 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1288
1289# elif defined(RT_ARCH_AMD64)
1290# if RT_INLINE_ASM_GNU_STYLE
1291 uint8_t u8Ret;
1292 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1293 "setz %1\n\t"
1294 : "=m" (*pu64)
1295 , "=qm" (u8Ret)
1296 , "=a" (u64Old)
1297 : "r" (u64New)
1298 , "2" (u64Old)
1299 , "m" (*pu64)
1300 : "cc");
1301 return (bool)u8Ret;
1302# else
1303 bool fRet;
1304 __asm
1305 {
1306 mov rdx, [pu32]
1307 mov rax, [u64Old]
1308 mov rcx, [u64New]
1309 lock cmpxchg [rdx], rcx
1310 setz al
1311 mov [fRet], al
1312 }
1313 return fRet;
1314# endif
1315
1316# elif defined(RT_ARCH_X86)
1317 uint32_t u32Ret;
1318# if RT_INLINE_ASM_GNU_STYLE
1319# if defined(PIC) || defined(__PIC__)
1320 uint32_t u32EBX = (uint32_t)u64New;
1321 uint32_t u32Spill;
1322 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1323 "lock; cmpxchg8b (%6)\n\t"
1324 "setz %%al\n\t"
1325 "movl %4, %%ebx\n\t"
1326 "movzbl %%al, %%eax\n\t"
1327 : "=a" (u32Ret)
1328 , "=d" (u32Spill)
1329# if RT_GNUC_PREREQ(4, 3)
1330 , "+m" (*pu64)
1331# else
1332 , "=m" (*pu64)
1333# endif
1334 : "A" (u64Old)
1335 , "m" ( u32EBX )
1336 , "c" ( (uint32_t)(u64New >> 32) )
1337 , "S" (pu64)
1338 : "cc");
1339# else /* !PIC */
1340 uint32_t u32Spill;
1341 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1342 "setz %%al\n\t"
1343 "movzbl %%al, %%eax\n\t"
1344 : "=a" (u32Ret)
1345 , "=d" (u32Spill)
1346 , "+m" (*pu64)
1347 : "A" (u64Old)
1348 , "b" ( (uint32_t)u64New )
1349 , "c" ( (uint32_t)(u64New >> 32) )
1350 : "cc");
1351# endif
1352 return (bool)u32Ret;
1353# else
1354 __asm
1355 {
1356 mov ebx, dword ptr [u64New]
1357 mov ecx, dword ptr [u64New + 4]
1358 mov edi, [pu64]
1359 mov eax, dword ptr [u64Old]
1360 mov edx, dword ptr [u64Old + 4]
1361 lock cmpxchg8b [edi]
1362 setz al
1363 movzx eax, al
1364 mov dword ptr [u32Ret], eax
1365 }
1366 return !!u32Ret;
1367# endif
1368
1369# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1370 union { uint32_t u; bool f; } fXchg;
1371 uint64_t u64Spill;
1372 uint32_t rcSpill;
1373 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1374 RTASM_ARM_DMB_SY
1375# if defined(RT_ARCH_ARM64)
1376 "ldaxr %[uOld], %[pMem]\n\t"
1377 "cmp %[uOld], %[uCmp]\n\t"
1378 "bne 1f\n\t" /* stop here if not equal */
1379 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1380 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1381 "mov %w[fXchg], #1\n\t"
1382# else
1383 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1384 "teq %[uOld], %[uCmp]\n\t"
1385 "teqeq %H[uOld], %H[uCmp]\n\t"
1386 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1387 "bne 1f\n\t" /* stop here if not equal */
1388 "cmp %[rc], #0\n\t"
1389 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1390 "mov %[fXchg], #1\n\t"
1391# endif
1392 "1:\n\t"
1393 : [pMem] "+m" (*pu64)
1394 , [uOld] "=&r" (u64Spill)
1395 , [rc] "=&r" (rcSpill)
1396 , [fXchg] "=&r" (fXchg.u)
1397 : [uCmp] "r" (u64Old)
1398 , [uNew] "r" (u64New)
1399 , "[fXchg]" (0)
1400 RTASM_ARM_DMB_SY_COMMA_IN_REG
1401 : "cc");
1402 return fXchg.f;
1403
1404# else
1405# error "Port me"
1406# endif
1407}
1408#endif
1409
1410
1411/**
1412 * Atomically Compare and exchange a signed 64-bit value, ordered.
1413 *
1414 * @returns true if xchg was done.
1415 * @returns false if xchg wasn't done.
1416 *
1417 * @param pi64 Pointer to the 64-bit variable to update.
1418 * @param i64 The 64-bit value to assign to *pu64.
1419 * @param i64Old The value to compare with.
1420 *
1421 * @remarks x86: Requires a Pentium or later.
1422 * @todo Rename ASMAtomicCmpWriteS64
1423 */
1424DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1425{
1426 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1427}
1428
1429
1430/**
1431 * Atomically Compare and Exchange a pointer value, ordered.
1432 *
1433 * @returns true if xchg was done.
1434 * @returns false if xchg wasn't done.
1435 *
1436 * @param ppv Pointer to the value to update.
1437 * @param pvNew The new value to assigned to *ppv.
1438 * @param pvOld The old value to *ppv compare with.
1439 *
1440 * @remarks x86: Requires a 486 or later.
1441 * @todo Rename ASMAtomicCmpWritePtrVoid
1442 */
1443DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1444{
1445#if ARCH_BITS == 32 || ARCH_BITS == 16
1446 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1447#elif ARCH_BITS == 64
1448 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1449#else
1450# error "ARCH_BITS is bogus"
1451#endif
1452}
1453
1454
1455/**
1456 * Atomically Compare and Exchange a pointer value, ordered.
1457 *
1458 * @returns true if xchg was done.
1459 * @returns false if xchg wasn't done.
1460 *
1461 * @param ppv Pointer to the value to update.
1462 * @param pvNew The new value to assigned to *ppv.
1463 * @param pvOld The old value to *ppv compare with.
1464 *
1465 * @remarks This is relatively type safe on GCC platforms.
1466 * @remarks x86: Requires a 486 or later.
1467 * @todo Rename ASMAtomicCmpWritePtr
1468 */
1469#ifdef __GNUC__
1470# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1471 __extension__ \
1472 ({\
1473 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1474 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1475 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1476 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1477 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1478 fMacroRet; \
1479 })
1480#else
1481# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1482 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1483#endif
1484
1485
1486/** @def ASMAtomicCmpXchgHandle
1487 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1488 *
1489 * @param ph Pointer to the value to update.
1490 * @param hNew The new value to assigned to *pu.
1491 * @param hOld The old value to *pu compare with.
1492 * @param fRc Where to store the result.
1493 *
1494 * @remarks This doesn't currently work for all handles (like RTFILE).
1495 * @remarks x86: Requires a 486 or later.
1496 * @todo Rename ASMAtomicCmpWriteHandle
1497 */
1498#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1499# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1500 do { \
1501 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1502 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1503 } while (0)
1504#elif HC_ARCH_BITS == 64
1505# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1506 do { \
1507 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1508 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1509 } while (0)
1510#else
1511# error HC_ARCH_BITS
1512#endif
1513
1514
1515/** @def ASMAtomicCmpXchgSize
1516 * Atomically Compare and Exchange a value which size might differ
1517 * between platforms or compilers, ordered.
1518 *
1519 * @param pu Pointer to the value to update.
1520 * @param uNew The new value to assigned to *pu.
1521 * @param uOld The old value to *pu compare with.
1522 * @param fRc Where to store the result.
1523 *
1524 * @remarks x86: Requires a 486 or later.
1525 * @todo Rename ASMAtomicCmpWriteSize
1526 */
1527#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1528 do { \
1529 switch (sizeof(*(pu))) { \
1530 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1531 break; \
1532 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1533 break; \
1534 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1535 (fRc) = false; \
1536 break; \
1537 } \
1538 } while (0)
1539
1540
1541/**
1542 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1543 * passes back old value, ordered.
1544 *
1545 * @returns true if xchg was done.
1546 * @returns false if xchg wasn't done.
1547 *
1548 * @param pu32 Pointer to the value to update.
1549 * @param u32New The new value to assigned to *pu32.
1550 * @param u32Old The old value to *pu32 compare with.
1551 * @param pu32Old Pointer store the old value at.
1552 *
1553 * @remarks x86: Requires a 486 or later.
1554 */
1555#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1556RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1557#else
1558DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1559{
1560# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1561# if RT_INLINE_ASM_GNU_STYLE
1562 uint8_t u8Ret;
1563 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1564 "setz %1\n\t"
1565 : "=m" (*pu32)
1566 , "=qm" (u8Ret)
1567 , "=a" (*pu32Old)
1568 : "r" (u32New)
1569 , "a" (u32Old)
1570 , "m" (*pu32)
1571 : "cc");
1572 return (bool)u8Ret;
1573
1574# elif RT_INLINE_ASM_USES_INTRIN
1575 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1576
1577# else
1578 uint32_t u32Ret;
1579 __asm
1580 {
1581# ifdef RT_ARCH_AMD64
1582 mov rdx, [pu32]
1583# else
1584 mov edx, [pu32]
1585# endif
1586 mov eax, [u32Old]
1587 mov ecx, [u32New]
1588# ifdef RT_ARCH_AMD64
1589 lock cmpxchg [rdx], ecx
1590 mov rdx, [pu32Old]
1591 mov [rdx], eax
1592# else
1593 lock cmpxchg [edx], ecx
1594 mov edx, [pu32Old]
1595 mov [edx], eax
1596# endif
1597 setz al
1598 movzx eax, al
1599 mov [u32Ret], eax
1600 }
1601 return !!u32Ret;
1602# endif
1603
1604# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1605 union { uint32_t u; bool f; } fXchg;
1606 uint32_t u32ActualOld;
1607 uint32_t rcSpill;
1608 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1609 RTASM_ARM_DMB_SY
1610# if defined(RT_ARCH_ARM64)
1611 "ldaxr %w[uOld], %[pMem]\n\t"
1612 "cmp %w[uOld], %w[uCmp]\n\t"
1613 "bne 1f\n\t" /* stop here if not equal */
1614 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1615 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1616 "mov %w[fXchg], #1\n\t"
1617# else
1618 "ldrex %[uOld], %[pMem]\n\t"
1619 "teq %[uOld], %[uCmp]\n\t"
1620 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1621 "bne 1f\n\t" /* stop here if not equal */
1622 "cmp %[rc], #0\n\t"
1623 "bne .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1624 "mov %[fXchg], #1\n\t"
1625# endif
1626 "1:\n\t"
1627 : [pMem] "+m" (*pu32)
1628 , [uOld] "=&r" (u32ActualOld)
1629 , [rc] "=&r" (rcSpill)
1630 , [fXchg] "=&r" (fXchg.u)
1631 : [uCmp] "r" (u32Old)
1632 , [uNew] "r" (u32New)
1633 , "[fXchg]" (0)
1634 RTASM_ARM_DMB_SY_COMMA_IN_REG
1635 : "cc");
1636 *pu32Old = u32ActualOld;
1637 return fXchg.f;
1638
1639# else
1640# error "Port me"
1641# endif
1642}
1643#endif
1644
1645
1646/**
1647 * Atomically Compare and Exchange a signed 32-bit value, additionally
1648 * passes back old value, ordered.
1649 *
1650 * @returns true if xchg was done.
1651 * @returns false if xchg wasn't done.
1652 *
1653 * @param pi32 Pointer to the value to update.
1654 * @param i32New The new value to assigned to *pi32.
1655 * @param i32Old The old value to *pi32 compare with.
1656 * @param pi32Old Pointer store the old value at.
1657 *
1658 * @remarks x86: Requires a 486 or later.
1659 */
1660DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
1661{
1662 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1663}
1664
1665
1666/**
1667 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1668 * passing back old value, ordered.
1669 *
1670 * @returns true if xchg was done.
1671 * @returns false if xchg wasn't done.
1672 *
1673 * @param pu64 Pointer to the 64-bit variable to update.
1674 * @param u64New The 64-bit value to assign to *pu64.
1675 * @param u64Old The value to compare with.
1676 * @param pu64Old Pointer store the old value at.
1677 *
1678 * @remarks x86: Requires a Pentium or later.
1679 */
1680#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1681 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1682RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
1683#else
1684DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
1685{
1686# if RT_INLINE_ASM_USES_INTRIN
1687 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1688
1689# elif defined(RT_ARCH_AMD64)
1690# if RT_INLINE_ASM_GNU_STYLE
1691 uint8_t u8Ret;
1692 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1693 "setz %1\n\t"
1694 : "=m" (*pu64)
1695 , "=qm" (u8Ret)
1696 , "=a" (*pu64Old)
1697 : "r" (u64New)
1698 , "a" (u64Old)
1699 , "m" (*pu64)
1700 : "cc");
1701 return (bool)u8Ret;
1702# else
1703 bool fRet;
1704 __asm
1705 {
1706 mov rdx, [pu32]
1707 mov rax, [u64Old]
1708 mov rcx, [u64New]
1709 lock cmpxchg [rdx], rcx
1710 mov rdx, [pu64Old]
1711 mov [rdx], rax
1712 setz al
1713 mov [fRet], al
1714 }
1715 return fRet;
1716# endif
1717
1718# elif defined(RT_ARCH_X86)
1719# if RT_INLINE_ASM_GNU_STYLE
1720 uint64_t u64Ret;
1721# if defined(PIC) || defined(__PIC__)
1722 /* NB: this code uses a memory clobber description, because the clean
1723 * solution with an output value for *pu64 makes gcc run out of registers.
1724 * This will cause suboptimal code, and anyone with a better solution is
1725 * welcome to improve this. */
1726 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1727 "lock; cmpxchg8b %3\n\t"
1728 "xchgl %%ebx, %1\n\t"
1729 : "=A" (u64Ret)
1730 : "DS" ((uint32_t)u64New)
1731 , "c" ((uint32_t)(u64New >> 32))
1732 , "m" (*pu64)
1733 , "0" (u64Old)
1734 : "memory"
1735 , "cc" );
1736# else /* !PIC */
1737 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1738 : "=A" (u64Ret)
1739 , "=m" (*pu64)
1740 : "b" ((uint32_t)u64New)
1741 , "c" ((uint32_t)(u64New >> 32))
1742 , "m" (*pu64)
1743 , "0" (u64Old)
1744 : "cc");
1745# endif
1746 *pu64Old = u64Ret;
1747 return u64Ret == u64Old;
1748# else
1749 uint32_t u32Ret;
1750 __asm
1751 {
1752 mov ebx, dword ptr [u64New]
1753 mov ecx, dword ptr [u64New + 4]
1754 mov edi, [pu64]
1755 mov eax, dword ptr [u64Old]
1756 mov edx, dword ptr [u64Old + 4]
1757 lock cmpxchg8b [edi]
1758 mov ebx, [pu64Old]
1759 mov [ebx], eax
1760 setz al
1761 movzx eax, al
1762 add ebx, 4
1763 mov [ebx], edx
1764 mov dword ptr [u32Ret], eax
1765 }
1766 return !!u32Ret;
1767# endif
1768
1769# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1770 union { uint32_t u; bool f; } fXchg;
1771 uint64_t u64ActualOld;
1772 uint32_t rcSpill;
1773 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1774 RTASM_ARM_DMB_SY
1775# if defined(RT_ARCH_ARM64)
1776 "ldaxr %[uOld], %[pMem]\n\t"
1777 "cmp %[uOld], %[uCmp]\n\t"
1778 "bne 1f\n\t" /* stop here if not equal */
1779 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1780 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1781 "mov %w[fXchg], #1\n\t"
1782# else
1783 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1784 "teq %[uOld], %[uCmp]\n\t"
1785 "teqeq %H[uOld], %H[uCmp]\n\t"
1786 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1787 "bne 1f\n\t" /* stop here if not equal */
1788 "cmp %[rc], #0\n\t"
1789 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1790 "mov %[fXchg], #1\n\t"
1791# endif
1792 "1:\n\t"
1793 : [pMem] "+m" (*pu64)
1794 , [uOld] "=&r" (u64ActualOld)
1795 , [rc] "=&r" (rcSpill)
1796 , [fXchg] "=&r" (fXchg.u)
1797 : [uCmp] "r" (u64Old)
1798 , [uNew] "r" (u64New)
1799 , "[fXchg]" (0)
1800 RTASM_ARM_DMB_SY_COMMA_IN_REG
1801 : "cc");
1802 *pu64Old = u64ActualOld;
1803 return fXchg.f;
1804
1805# else
1806# error "Port me"
1807# endif
1808}
1809#endif
1810
1811
1812/**
1813 * Atomically Compare and exchange a signed 64-bit value, additionally
1814 * passing back old value, ordered.
1815 *
1816 * @returns true if xchg was done.
1817 * @returns false if xchg wasn't done.
1818 *
1819 * @param pi64 Pointer to the 64-bit variable to update.
1820 * @param i64 The 64-bit value to assign to *pu64.
1821 * @param i64Old The value to compare with.
1822 * @param pi64Old Pointer store the old value at.
1823 *
1824 * @remarks x86: Requires a Pentium or later.
1825 */
1826DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
1827{
1828 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1829}
1830
1831/** @def ASMAtomicCmpXchgExHandle
1832 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1833 *
1834 * @param ph Pointer to the value to update.
1835 * @param hNew The new value to assigned to *pu.
1836 * @param hOld The old value to *pu compare with.
1837 * @param fRc Where to store the result.
1838 * @param phOldVal Pointer to where to store the old value.
1839 *
1840 * @remarks This doesn't currently work for all handles (like RTFILE).
1841 */
1842#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1843# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1844 do { \
1845 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1846 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1847 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(ph), (uint32_t)(hNew), (uint32_t)(hOld), (uint32_t RT_FAR *)(phOldVal)); \
1848 } while (0)
1849#elif HC_ARCH_BITS == 64
1850# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1851 do { \
1852 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1853 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1854 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(ph), (uint64_t)(hNew), (uint64_t)(hOld), (uint64_t RT_FAR *)(phOldVal)); \
1855 } while (0)
1856#else
1857# error HC_ARCH_BITS
1858#endif
1859
1860
1861/** @def ASMAtomicCmpXchgExSize
1862 * Atomically Compare and Exchange a value which size might differ
1863 * between platforms or compilers. Additionally passes back old value.
1864 *
1865 * @param pu Pointer to the value to update.
1866 * @param uNew The new value to assigned to *pu.
1867 * @param uOld The old value to *pu compare with.
1868 * @param fRc Where to store the result.
1869 * @param puOldVal Pointer to where to store the old value.
1870 *
1871 * @remarks x86: Requires a 486 or later.
1872 */
1873#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1874 do { \
1875 switch (sizeof(*(pu))) { \
1876 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1877 break; \
1878 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1879 break; \
1880 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1881 (fRc) = false; \
1882 (uOldVal) = 0; \
1883 break; \
1884 } \
1885 } while (0)
1886
1887
1888/**
1889 * Atomically Compare and Exchange a pointer value, additionally
1890 * passing back old value, ordered.
1891 *
1892 * @returns true if xchg was done.
1893 * @returns false if xchg wasn't done.
1894 *
1895 * @param ppv Pointer to the value to update.
1896 * @param pvNew The new value to assigned to *ppv.
1897 * @param pvOld The old value to *ppv compare with.
1898 * @param ppvOld Pointer store the old value at.
1899 *
1900 * @remarks x86: Requires a 486 or later.
1901 */
1902DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1903 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
1904{
1905#if ARCH_BITS == 32 || ARCH_BITS == 16
1906 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1907#elif ARCH_BITS == 64
1908 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1909#else
1910# error "ARCH_BITS is bogus"
1911#endif
1912}
1913
1914
1915/**
1916 * Atomically Compare and Exchange a pointer value, additionally
1917 * passing back old value, ordered.
1918 *
1919 * @returns true if xchg was done.
1920 * @returns false if xchg wasn't done.
1921 *
1922 * @param ppv Pointer to the value to update.
1923 * @param pvNew The new value to assigned to *ppv.
1924 * @param pvOld The old value to *ppv compare with.
1925 * @param ppvOld Pointer store the old value at.
1926 *
1927 * @remarks This is relatively type safe on GCC platforms.
1928 * @remarks x86: Requires a 486 or later.
1929 */
1930#ifdef __GNUC__
1931# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1932 __extension__ \
1933 ({\
1934 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1935 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1936 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1937 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1938 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1939 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1940 (void **)ppvOldTypeChecked); \
1941 fMacroRet; \
1942 })
1943#else
1944# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1945 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1946#endif
1947
1948
1949/**
1950 * Virtualization unfriendly serializing instruction, always exits.
1951 */
1952#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
1953RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
1954#else
1955DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
1956{
1957# if RT_INLINE_ASM_GNU_STYLE
1958 RTCCUINTREG xAX = 0;
1959# ifdef RT_ARCH_AMD64
1960 __asm__ __volatile__ ("cpuid"
1961 : "=a" (xAX)
1962 : "0" (xAX)
1963 : "rbx", "rcx", "rdx", "memory");
1964# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1965 __asm__ __volatile__ ("push %%ebx\n\t"
1966 "cpuid\n\t"
1967 "pop %%ebx\n\t"
1968 : "=a" (xAX)
1969 : "0" (xAX)
1970 : "ecx", "edx", "memory");
1971# else
1972 __asm__ __volatile__ ("cpuid"
1973 : "=a" (xAX)
1974 : "0" (xAX)
1975 : "ebx", "ecx", "edx", "memory");
1976# endif
1977
1978# elif RT_INLINE_ASM_USES_INTRIN
1979 int aInfo[4];
1980 _ReadWriteBarrier();
1981 __cpuid(aInfo, 0);
1982
1983# else
1984 __asm
1985 {
1986 push ebx
1987 xor eax, eax
1988 cpuid
1989 pop ebx
1990 }
1991# endif
1992}
1993#endif
1994
1995/**
1996 * Virtualization friendly serializing instruction, though more expensive.
1997 */
1998#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
1999RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
2000#else
2001DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
2002{
2003# if RT_INLINE_ASM_GNU_STYLE
2004# ifdef RT_ARCH_AMD64
2005 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
2006 "subq $128, %%rsp\n\t" /*redzone*/
2007 "mov %%ss, %%eax\n\t"
2008 "pushq %%rax\n\t"
2009 "pushq %%r10\n\t"
2010 "pushfq\n\t"
2011 "movl %%cs, %%eax\n\t"
2012 "pushq %%rax\n\t"
2013 "leaq 1f(%%rip), %%rax\n\t"
2014 "pushq %%rax\n\t"
2015 "iretq\n\t"
2016 "1:\n\t"
2017 ::: "rax", "r10", "memory", "cc");
2018# else
2019 __asm__ __volatile__ ("pushfl\n\t"
2020 "pushl %%cs\n\t"
2021 "pushl $1f\n\t"
2022 "iretl\n\t"
2023 "1:\n\t"
2024 ::: "memory");
2025# endif
2026
2027# else
2028 __asm
2029 {
2030 pushfd
2031 push cs
2032 push la_ret
2033 iretd
2034 la_ret:
2035 }
2036# endif
2037}
2038#endif
2039
2040/**
2041 * Virtualization friendlier serializing instruction, may still cause exits.
2042 */
2043#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < RT_MSC_VER_VS2008) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2044RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2045#else
2046DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2047{
2048# if RT_INLINE_ASM_GNU_STYLE
2049 /* rdtscp is not supported by ancient linux build VM of course :-( */
2050# ifdef RT_ARCH_AMD64
2051 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2052 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2053# else
2054 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2055 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2056# endif
2057# else
2058# if RT_INLINE_ASM_USES_INTRIN >= RT_MSC_VER_VS2008
2059 uint32_t uIgnore;
2060 _ReadWriteBarrier();
2061 (void)__rdtscp(&uIgnore);
2062 (void)uIgnore;
2063# else
2064 __asm
2065 {
2066 rdtscp
2067 }
2068# endif
2069# endif
2070}
2071#endif
2072
2073
2074/**
2075 * Serialize Instruction (both data store and instruction flush).
2076 */
2077#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2078# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2079#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2080# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2081#elif defined(RT_ARCH_SPARC64)
2082RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2083#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2084DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2085{
2086 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2087}
2088#else
2089# error "Port me"
2090#endif
2091
2092
2093/**
2094 * Memory fence, waits for any pending writes and reads to complete.
2095 * @note No implicit compiler barrier (which is probably stupid).
2096 */
2097DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2098{
2099#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2100# if RT_INLINE_ASM_GNU_STYLE
2101 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2102# elif RT_INLINE_ASM_USES_INTRIN
2103 _mm_mfence();
2104# else
2105 __asm
2106 {
2107 _emit 0x0f
2108 _emit 0xae
2109 _emit 0xf0
2110 }
2111# endif
2112#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2113 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2114#elif ARCH_BITS == 16
2115 uint16_t volatile u16;
2116 ASMAtomicXchgU16(&u16, 0);
2117#else
2118 uint32_t volatile u32;
2119 ASMAtomicXchgU32(&u32, 0);
2120#endif
2121}
2122
2123
2124/**
2125 * Write fence, waits for any pending writes to complete.
2126 * @note No implicit compiler barrier (which is probably stupid).
2127 */
2128DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2129{
2130#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2131# if RT_INLINE_ASM_GNU_STYLE
2132 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2133# elif RT_INLINE_ASM_USES_INTRIN
2134 _mm_sfence();
2135# else
2136 __asm
2137 {
2138 _emit 0x0f
2139 _emit 0xae
2140 _emit 0xf8
2141 }
2142# endif
2143#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2144 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2145#else
2146 ASMMemoryFence();
2147#endif
2148}
2149
2150
2151/**
2152 * Read fence, waits for any pending reads to complete.
2153 * @note No implicit compiler barrier (which is probably stupid).
2154 */
2155DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2156{
2157#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2158# if RT_INLINE_ASM_GNU_STYLE
2159 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2160# elif RT_INLINE_ASM_USES_INTRIN
2161 _mm_lfence();
2162# else
2163 __asm
2164 {
2165 _emit 0x0f
2166 _emit 0xae
2167 _emit 0xe8
2168 }
2169# endif
2170#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2171 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2172#else
2173 ASMMemoryFence();
2174#endif
2175}
2176
2177
2178/**
2179 * Atomically reads an unsigned 8-bit value, ordered.
2180 *
2181 * @returns Current *pu8 value
2182 * @param pu8 Pointer to the 8-bit variable to read.
2183 */
2184DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2185{
2186#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2187 uint32_t u32;
2188 __asm__ __volatile__(".Lstart_ASMAtomicReadU8_%=:\n\t"
2189 RTASM_ARM_DMB_SY
2190# if defined(RT_ARCH_ARM64)
2191 "ldxrb %w[uDst], %[pMem]\n\t"
2192# else
2193 "ldrexb %[uDst], %[pMem]\n\t"
2194# endif
2195 : [uDst] "=&r" (u32)
2196 : [pMem] "m" (*pu8)
2197 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2198 return (uint8_t)u32;
2199#else
2200 ASMMemoryFence();
2201 return *pu8; /* byte reads are atomic on x86 */
2202#endif
2203}
2204
2205
2206/**
2207 * Atomically reads an unsigned 8-bit value, unordered.
2208 *
2209 * @returns Current *pu8 value
2210 * @param pu8 Pointer to the 8-bit variable to read.
2211 */
2212DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2213{
2214#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2215 uint32_t u32;
2216 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU8_%=:\n\t"
2217# if defined(RT_ARCH_ARM64)
2218 "ldxrb %w[uDst], %[pMem]\n\t"
2219# else
2220 "ldrexb %[uDst], %[pMem]\n\t"
2221# endif
2222 : [uDst] "=&r" (u32)
2223 : [pMem] "m" (*pu8));
2224 return (uint8_t)u32;
2225#else
2226 return *pu8; /* byte reads are atomic on x86 */
2227#endif
2228}
2229
2230
2231/**
2232 * Atomically reads a signed 8-bit value, ordered.
2233 *
2234 * @returns Current *pi8 value
2235 * @param pi8 Pointer to the 8-bit variable to read.
2236 */
2237DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2238{
2239 ASMMemoryFence();
2240#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2241 int32_t i32;
2242 __asm__ __volatile__(".Lstart_ASMAtomicReadS8_%=:\n\t"
2243 RTASM_ARM_DMB_SY
2244# if defined(RT_ARCH_ARM64)
2245 "ldxrb %w[iDst], %[pMem]\n\t"
2246# else
2247 "ldrexb %[iDst], %[pMem]\n\t"
2248# endif
2249 : [iDst] "=&r" (i32)
2250 : [pMem] "m" (*pi8)
2251 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2252 return (int8_t)i32;
2253#else
2254 return *pi8; /* byte reads are atomic on x86 */
2255#endif
2256}
2257
2258
2259/**
2260 * Atomically reads a signed 8-bit value, unordered.
2261 *
2262 * @returns Current *pi8 value
2263 * @param pi8 Pointer to the 8-bit variable to read.
2264 */
2265DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2266{
2267#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2268 int32_t i32;
2269 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS8_%=:\n\t"
2270# if defined(RT_ARCH_ARM64)
2271 "ldxrb %w[iDst], %[pMem]\n\t"
2272# else
2273 "ldrexb %[iDst], %[pMem]\n\t"
2274# endif
2275 : [iDst] "=&r" (i32)
2276 : [pMem] "m" (*pi8));
2277 return (int8_t)i32;
2278#else
2279 return *pi8; /* byte reads are atomic on x86 */
2280#endif
2281}
2282
2283
2284/**
2285 * Atomically reads an unsigned 16-bit value, ordered.
2286 *
2287 * @returns Current *pu16 value
2288 * @param pu16 Pointer to the 16-bit variable to read.
2289 */
2290DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2291{
2292 Assert(!((uintptr_t)pu16 & 1));
2293#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2294 uint32_t u32;
2295 __asm__ __volatile__(".Lstart_ASMAtomicReadU16_%=:\n\t"
2296 RTASM_ARM_DMB_SY
2297# if defined(RT_ARCH_ARM64)
2298 "ldxrh %w[uDst], %[pMem]\n\t"
2299# else
2300 "ldrexh %[uDst], %[pMem]\n\t"
2301# endif
2302 : [uDst] "=&r" (u32)
2303 : [pMem] "m" (*pu16)
2304 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2305 return (uint16_t)u32;
2306#else
2307 ASMMemoryFence();
2308 return *pu16;
2309#endif
2310}
2311
2312
2313/**
2314 * Atomically reads an unsigned 16-bit value, unordered.
2315 *
2316 * @returns Current *pu16 value
2317 * @param pu16 Pointer to the 16-bit variable to read.
2318 */
2319DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2320{
2321 Assert(!((uintptr_t)pu16 & 1));
2322#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2323 uint32_t u32;
2324 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU16_%=:\n\t"
2325# if defined(RT_ARCH_ARM64)
2326 "ldxrh %w[uDst], %[pMem]\n\t"
2327# else
2328 "ldrexh %[uDst], %[pMem]\n\t"
2329# endif
2330 : [uDst] "=&r" (u32)
2331 : [pMem] "m" (*pu16));
2332 return (uint16_t)u32;
2333#else
2334 return *pu16;
2335#endif
2336}
2337
2338
2339/**
2340 * Atomically reads a signed 16-bit value, ordered.
2341 *
2342 * @returns Current *pi16 value
2343 * @param pi16 Pointer to the 16-bit variable to read.
2344 */
2345DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2346{
2347 Assert(!((uintptr_t)pi16 & 1));
2348#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2349 int32_t i32;
2350 __asm__ __volatile__(".Lstart_ASMAtomicReadS16_%=:\n\t"
2351 RTASM_ARM_DMB_SY
2352# if defined(RT_ARCH_ARM64)
2353 "ldxrh %w[iDst], %[pMem]\n\t"
2354# else
2355 "ldrexh %[iDst], %[pMem]\n\t"
2356# endif
2357 : [iDst] "=&r" (i32)
2358 : [pMem] "m" (*pi16)
2359 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2360 return (int16_t)i32;
2361#else
2362 ASMMemoryFence();
2363 return *pi16;
2364#endif
2365}
2366
2367
2368/**
2369 * Atomically reads a signed 16-bit value, unordered.
2370 *
2371 * @returns Current *pi16 value
2372 * @param pi16 Pointer to the 16-bit variable to read.
2373 */
2374DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2375{
2376 Assert(!((uintptr_t)pi16 & 1));
2377#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2378 int32_t i32;
2379 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS16_%=:\n\t"
2380# if defined(RT_ARCH_ARM64)
2381 "ldxrh %w[iDst], %[pMem]\n\t"
2382# else
2383 "ldrexh %[iDst], %[pMem]\n\t"
2384# endif
2385 : [iDst] "=&r" (i32)
2386 : [pMem] "m" (*pi16));
2387 return (int16_t)i32;
2388#else
2389 return *pi16;
2390#endif
2391}
2392
2393
2394/**
2395 * Atomically reads an unsigned 32-bit value, ordered.
2396 *
2397 * @returns Current *pu32 value
2398 * @param pu32 Pointer to the 32-bit variable to read.
2399 */
2400DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2401{
2402 Assert(!((uintptr_t)pu32 & 3));
2403#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2404 uint32_t u32;
2405 __asm__ __volatile__(".Lstart_ASMAtomicReadU32_%=:\n\t"
2406 RTASM_ARM_DMB_SY
2407# if defined(RT_ARCH_ARM64)
2408 "ldxr %w[uDst], %[pMem]\n\t"
2409# else
2410 "ldrex %[uDst], %[pMem]\n\t"
2411# endif
2412 : [uDst] "=&r" (u32)
2413 : [pMem] "m" (*pu32)
2414 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2415 return u32;
2416#else
2417 ASMMemoryFence();
2418# if ARCH_BITS == 16
2419 AssertFailed(); /** @todo 16-bit */
2420# endif
2421 return *pu32;
2422#endif
2423}
2424
2425
2426/**
2427 * Atomically reads an unsigned 32-bit value, unordered.
2428 *
2429 * @returns Current *pu32 value
2430 * @param pu32 Pointer to the 32-bit variable to read.
2431 */
2432DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2433{
2434 Assert(!((uintptr_t)pu32 & 3));
2435#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2436 uint32_t u32;
2437 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU32_%=:\n\t"
2438# if defined(RT_ARCH_ARM64)
2439 "ldxr %w[uDst], %[pMem]\n\t"
2440# else
2441 "ldrex %[uDst], %[pMem]\n\t"
2442# endif
2443 : [uDst] "=&r" (u32)
2444 : [pMem] "m" (*pu32));
2445 return u32;
2446#else
2447# if ARCH_BITS == 16
2448 AssertFailed(); /** @todo 16-bit */
2449# endif
2450 return *pu32;
2451#endif
2452}
2453
2454
2455/**
2456 * Atomically reads a signed 32-bit value, ordered.
2457 *
2458 * @returns Current *pi32 value
2459 * @param pi32 Pointer to the 32-bit variable to read.
2460 */
2461DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2462{
2463 Assert(!((uintptr_t)pi32 & 3));
2464#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2465 int32_t i32;
2466 __asm__ __volatile__(".Lstart_ASMAtomicReadS32_%=:\n\t"
2467 RTASM_ARM_DMB_SY
2468# if defined(RT_ARCH_ARM64)
2469 "ldxr %w[iDst], %[pMem]\n\t"
2470# else
2471 "ldrex %[iDst], %[pMem]\n\t"
2472# endif
2473 : [iDst] "=&r" (i32)
2474 : [pMem] "m" (*pi32)
2475 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2476 return i32;
2477#else
2478 ASMMemoryFence();
2479# if ARCH_BITS == 16
2480 AssertFailed(); /** @todo 16-bit */
2481# endif
2482 return *pi32;
2483#endif
2484}
2485
2486
2487/**
2488 * Atomically reads a signed 32-bit value, unordered.
2489 *
2490 * @returns Current *pi32 value
2491 * @param pi32 Pointer to the 32-bit variable to read.
2492 */
2493DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2494{
2495 Assert(!((uintptr_t)pi32 & 3));
2496#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2497 int32_t i32;
2498 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS32_%=:\n\t"
2499# if defined(RT_ARCH_ARM64)
2500 "ldxr %w[iDst], %[pMem]\n\t"
2501# else
2502 "ldrex %[iDst], %[pMem]\n\t"
2503# endif
2504 : [iDst] "=&r" (i32)
2505 : [pMem] "m" (*pi32));
2506 return i32;
2507
2508#else
2509# if ARCH_BITS == 16
2510 AssertFailed(); /** @todo 16-bit */
2511# endif
2512 return *pi32;
2513#endif
2514}
2515
2516
2517/**
2518 * Atomically reads an unsigned 64-bit value, ordered.
2519 *
2520 * @returns Current *pu64 value
2521 * @param pu64 Pointer to the 64-bit variable to read.
2522 * The memory pointed to must be writable.
2523 *
2524 * @remarks This may fault if the memory is read-only!
2525 * @remarks x86: Requires a Pentium or later.
2526 */
2527#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
2528 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2529RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2530#else
2531DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2532{
2533 uint64_t u64;
2534# ifdef RT_ARCH_AMD64
2535 Assert(!((uintptr_t)pu64 & 7));
2536/*# if RT_INLINE_ASM_GNU_STYLE
2537 __asm__ __volatile__( "mfence\n\t"
2538 "movq %1, %0\n\t"
2539 : "=r" (u64)
2540 : "m" (*pu64));
2541# else
2542 __asm
2543 {
2544 mfence
2545 mov rdx, [pu64]
2546 mov rax, [rdx]
2547 mov [u64], rax
2548 }
2549# endif*/
2550 ASMMemoryFence();
2551 u64 = *pu64;
2552
2553# elif defined(RT_ARCH_X86)
2554# if RT_INLINE_ASM_GNU_STYLE
2555# if defined(PIC) || defined(__PIC__)
2556 uint32_t u32EBX = 0;
2557 Assert(!((uintptr_t)pu64 & 7));
2558 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2559 "lock; cmpxchg8b (%5)\n\t"
2560 "movl %3, %%ebx\n\t"
2561 : "=A" (u64)
2562# if RT_GNUC_PREREQ(4, 3)
2563 , "+m" (*pu64)
2564# else
2565 , "=m" (*pu64)
2566# endif
2567 : "0" (0ULL)
2568 , "m" (u32EBX)
2569 , "c" (0)
2570 , "S" (pu64)
2571 : "cc");
2572# else /* !PIC */
2573 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2574 : "=A" (u64)
2575 , "+m" (*pu64)
2576 : "0" (0ULL)
2577 , "b" (0)
2578 , "c" (0)
2579 : "cc");
2580# endif
2581# else
2582 Assert(!((uintptr_t)pu64 & 7));
2583 __asm
2584 {
2585 xor eax, eax
2586 xor edx, edx
2587 mov edi, pu64
2588 xor ecx, ecx
2589 xor ebx, ebx
2590 lock cmpxchg8b [edi]
2591 mov dword ptr [u64], eax
2592 mov dword ptr [u64 + 4], edx
2593 }
2594# endif
2595
2596# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2597 Assert(!((uintptr_t)pu64 & 7));
2598 __asm__ __volatile__(".Lstart_ASMAtomicReadU64_%=:\n\t"
2599 RTASM_ARM_DMB_SY
2600# if defined(RT_ARCH_ARM64)
2601 "ldxr %[uDst], %[pMem]\n\t"
2602# else
2603 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2604# endif
2605 : [uDst] "=&r" (u64)
2606 : [pMem] "m" (*pu64)
2607 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2608
2609# else
2610# error "Port me"
2611# endif
2612 return u64;
2613}
2614#endif
2615
2616
2617/**
2618 * Atomically reads an unsigned 64-bit value, unordered.
2619 *
2620 * @returns Current *pu64 value
2621 * @param pu64 Pointer to the 64-bit variable to read.
2622 * The memory pointed to must be writable.
2623 *
2624 * @remarks This may fault if the memory is read-only!
2625 * @remarks x86: Requires a Pentium or later.
2626 */
2627#if !defined(RT_ARCH_AMD64) \
2628 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2629 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
2630RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2631#else
2632DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2633{
2634 uint64_t u64;
2635# ifdef RT_ARCH_AMD64
2636 Assert(!((uintptr_t)pu64 & 7));
2637/*# if RT_INLINE_ASM_GNU_STYLE
2638 Assert(!((uintptr_t)pu64 & 7));
2639 __asm__ __volatile__("movq %1, %0\n\t"
2640 : "=r" (u64)
2641 : "m" (*pu64));
2642# else
2643 __asm
2644 {
2645 mov rdx, [pu64]
2646 mov rax, [rdx]
2647 mov [u64], rax
2648 }
2649# endif */
2650 u64 = *pu64;
2651
2652# elif defined(RT_ARCH_X86)
2653# if RT_INLINE_ASM_GNU_STYLE
2654# if defined(PIC) || defined(__PIC__)
2655 uint32_t u32EBX = 0;
2656 uint32_t u32Spill;
2657 Assert(!((uintptr_t)pu64 & 7));
2658 __asm__ __volatile__("xor %%eax,%%eax\n\t"
2659 "xor %%ecx,%%ecx\n\t"
2660 "xor %%edx,%%edx\n\t"
2661 "xchgl %%ebx, %3\n\t"
2662 "lock; cmpxchg8b (%4)\n\t"
2663 "movl %3, %%ebx\n\t"
2664 : "=A" (u64)
2665# if RT_GNUC_PREREQ(4, 3)
2666 , "+m" (*pu64)
2667# else
2668 , "=m" (*pu64)
2669# endif
2670 , "=c" (u32Spill)
2671 : "m" (u32EBX)
2672 , "S" (pu64)
2673 : "cc");
2674# else /* !PIC */
2675 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2676 : "=A" (u64)
2677 , "+m" (*pu64)
2678 : "0" (0ULL)
2679 , "b" (0)
2680 , "c" (0)
2681 : "cc");
2682# endif
2683# else
2684 Assert(!((uintptr_t)pu64 & 7));
2685 __asm
2686 {
2687 xor eax, eax
2688 xor edx, edx
2689 mov edi, pu64
2690 xor ecx, ecx
2691 xor ebx, ebx
2692 lock cmpxchg8b [edi]
2693 mov dword ptr [u64], eax
2694 mov dword ptr [u64 + 4], edx
2695 }
2696# endif
2697
2698# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2699 Assert(!((uintptr_t)pu64 & 7));
2700 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU64_%=:\n\t"
2701# if defined(RT_ARCH_ARM64)
2702 "ldxr %[uDst], %[pMem]\n\t"
2703# else
2704 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2705# endif
2706 : [uDst] "=&r" (u64)
2707 : [pMem] "m" (*pu64));
2708
2709# else
2710# error "Port me"
2711# endif
2712 return u64;
2713}
2714#endif
2715
2716
2717/**
2718 * Atomically reads a signed 64-bit value, ordered.
2719 *
2720 * @returns Current *pi64 value
2721 * @param pi64 Pointer to the 64-bit variable to read.
2722 * The memory pointed to must be writable.
2723 *
2724 * @remarks This may fault if the memory is read-only!
2725 * @remarks x86: Requires a Pentium or later.
2726 */
2727DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2728{
2729 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
2730}
2731
2732
2733/**
2734 * Atomically reads a signed 64-bit value, unordered.
2735 *
2736 * @returns Current *pi64 value
2737 * @param pi64 Pointer to the 64-bit variable to read.
2738 * The memory pointed to must be writable.
2739 *
2740 * @remarks This will fault if the memory is read-only!
2741 * @remarks x86: Requires a Pentium or later.
2742 */
2743DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2744{
2745 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
2746}
2747
2748
2749/**
2750 * Atomically reads a size_t value, ordered.
2751 *
2752 * @returns Current *pcb value
2753 * @param pcb Pointer to the size_t variable to read.
2754 */
2755DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2756{
2757#if ARCH_BITS == 64
2758 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
2759#elif ARCH_BITS == 32
2760 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
2761#elif ARCH_BITS == 16
2762 AssertCompileSize(size_t, 2);
2763 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
2764#else
2765# error "Unsupported ARCH_BITS value"
2766#endif
2767}
2768
2769
2770/**
2771 * Atomically reads a size_t value, unordered.
2772 *
2773 * @returns Current *pcb value
2774 * @param pcb Pointer to the size_t variable to read.
2775 */
2776DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2777{
2778#if ARCH_BITS == 64 || ARCH_BITS == 16
2779 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2780#elif ARCH_BITS == 32
2781 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2782#elif ARCH_BITS == 16
2783 AssertCompileSize(size_t, 2);
2784 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2785#else
2786# error "Unsupported ARCH_BITS value"
2787#endif
2788}
2789
2790
2791/**
2792 * Atomically reads a pointer value, ordered.
2793 *
2794 * @returns Current *pv value
2795 * @param ppv Pointer to the pointer variable to read.
2796 *
2797 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2798 * requires less typing (no casts).
2799 */
2800DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2801{
2802#if ARCH_BITS == 32 || ARCH_BITS == 16
2803 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2804#elif ARCH_BITS == 64
2805 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2806#else
2807# error "ARCH_BITS is bogus"
2808#endif
2809}
2810
2811/**
2812 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2813 *
2814 * @returns Current *pv value
2815 * @param ppv Pointer to the pointer variable to read.
2816 * @param Type The type of *ppv, sans volatile.
2817 */
2818#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2819# define ASMAtomicReadPtrT(ppv, Type) \
2820 __extension__ \
2821 ({\
2822 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2823 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2824 pvTypeChecked; \
2825 })
2826#else
2827# define ASMAtomicReadPtrT(ppv, Type) \
2828 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2829#endif
2830
2831
2832/**
2833 * Atomically reads a pointer value, unordered.
2834 *
2835 * @returns Current *pv value
2836 * @param ppv Pointer to the pointer variable to read.
2837 *
2838 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2839 * requires less typing (no casts).
2840 */
2841DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2842{
2843#if ARCH_BITS == 32 || ARCH_BITS == 16
2844 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2845#elif ARCH_BITS == 64
2846 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2847#else
2848# error "ARCH_BITS is bogus"
2849#endif
2850}
2851
2852
2853/**
2854 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2855 *
2856 * @returns Current *pv value
2857 * @param ppv Pointer to the pointer variable to read.
2858 * @param Type The type of *ppv, sans volatile.
2859 */
2860#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2861# define ASMAtomicUoReadPtrT(ppv, Type) \
2862 __extension__ \
2863 ({\
2864 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2865 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2866 pvTypeChecked; \
2867 })
2868#else
2869# define ASMAtomicUoReadPtrT(ppv, Type) \
2870 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2871#endif
2872
2873
2874/**
2875 * Atomically reads a boolean value, ordered.
2876 *
2877 * @returns Current *pf value
2878 * @param pf Pointer to the boolean variable to read.
2879 */
2880DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2881{
2882 ASMMemoryFence();
2883 return *pf; /* byte reads are atomic on x86 */
2884}
2885
2886
2887/**
2888 * Atomically reads a boolean value, unordered.
2889 *
2890 * @returns Current *pf value
2891 * @param pf Pointer to the boolean variable to read.
2892 */
2893DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2894{
2895 return *pf; /* byte reads are atomic on x86 */
2896}
2897
2898
2899/**
2900 * Atomically read a typical IPRT handle value, ordered.
2901 *
2902 * @param ph Pointer to the handle variable to read.
2903 * @param phRes Where to store the result.
2904 *
2905 * @remarks This doesn't currently work for all handles (like RTFILE).
2906 */
2907#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2908# define ASMAtomicReadHandle(ph, phRes) \
2909 do { \
2910 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2911 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2912 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2913 } while (0)
2914#elif HC_ARCH_BITS == 64
2915# define ASMAtomicReadHandle(ph, phRes) \
2916 do { \
2917 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2918 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2919 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2920 } while (0)
2921#else
2922# error HC_ARCH_BITS
2923#endif
2924
2925
2926/**
2927 * Atomically read a typical IPRT handle value, unordered.
2928 *
2929 * @param ph Pointer to the handle variable to read.
2930 * @param phRes Where to store the result.
2931 *
2932 * @remarks This doesn't currently work for all handles (like RTFILE).
2933 */
2934#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2935# define ASMAtomicUoReadHandle(ph, phRes) \
2936 do { \
2937 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2938 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2939 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2940 } while (0)
2941#elif HC_ARCH_BITS == 64
2942# define ASMAtomicUoReadHandle(ph, phRes) \
2943 do { \
2944 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2945 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2946 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2947 } while (0)
2948#else
2949# error HC_ARCH_BITS
2950#endif
2951
2952
2953/**
2954 * Atomically read a value which size might differ
2955 * between platforms or compilers, ordered.
2956 *
2957 * @param pu Pointer to the variable to read.
2958 * @param puRes Where to store the result.
2959 */
2960#define ASMAtomicReadSize(pu, puRes) \
2961 do { \
2962 switch (sizeof(*(pu))) { \
2963 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2964 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2965 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2966 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2967 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2968 } \
2969 } while (0)
2970
2971
2972/**
2973 * Atomically read a value which size might differ
2974 * between platforms or compilers, unordered.
2975 *
2976 * @param pu Pointer to the variable to read.
2977 * @param puRes Where to store the result.
2978 */
2979#define ASMAtomicUoReadSize(pu, puRes) \
2980 do { \
2981 switch (sizeof(*(pu))) { \
2982 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2983 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2984 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2985 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2986 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2987 } \
2988 } while (0)
2989
2990
2991/**
2992 * Atomically writes an unsigned 8-bit value, ordered.
2993 *
2994 * @param pu8 Pointer to the 8-bit variable.
2995 * @param u8 The 8-bit value to assign to *pu8.
2996 */
2997DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
2998{
2999 /** @todo Any possible ARM32/ARM64 optimizations here? */
3000 ASMAtomicXchgU8(pu8, u8);
3001}
3002
3003
3004/**
3005 * Atomically writes an unsigned 8-bit value, unordered.
3006 *
3007 * @param pu8 Pointer to the 8-bit variable.
3008 * @param u8 The 8-bit value to assign to *pu8.
3009 */
3010DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3011{
3012 /** @todo Any possible ARM32/ARM64 improvements here? */
3013 *pu8 = u8; /* byte writes are atomic on x86 */
3014}
3015
3016
3017/**
3018 * Atomically writes a signed 8-bit value, ordered.
3019 *
3020 * @param pi8 Pointer to the 8-bit variable to read.
3021 * @param i8 The 8-bit value to assign to *pi8.
3022 */
3023DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3024{
3025 /** @todo Any possible ARM32/ARM64 optimizations here? */
3026 ASMAtomicXchgS8(pi8, i8);
3027}
3028
3029
3030/**
3031 * Atomically writes a signed 8-bit value, unordered.
3032 *
3033 * @param pi8 Pointer to the 8-bit variable to write.
3034 * @param i8 The 8-bit value to assign to *pi8.
3035 */
3036DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3037{
3038 *pi8 = i8; /* byte writes are atomic on x86 */
3039}
3040
3041
3042/**
3043 * Atomically writes an unsigned 16-bit value, ordered.
3044 *
3045 * @param pu16 Pointer to the 16-bit variable to write.
3046 * @param u16 The 16-bit value to assign to *pu16.
3047 */
3048DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3049{
3050 /** @todo Any possible ARM32/ARM64 optimizations here? */
3051 ASMAtomicXchgU16(pu16, u16);
3052}
3053
3054
3055/**
3056 * Atomically writes an unsigned 16-bit value, unordered.
3057 *
3058 * @param pu16 Pointer to the 16-bit variable to write.
3059 * @param u16 The 16-bit value to assign to *pu16.
3060 */
3061DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3062{
3063 Assert(!((uintptr_t)pu16 & 1));
3064 *pu16 = u16;
3065}
3066
3067
3068/**
3069 * Atomically writes a signed 16-bit value, ordered.
3070 *
3071 * @param pi16 Pointer to the 16-bit variable to write.
3072 * @param i16 The 16-bit value to assign to *pi16.
3073 */
3074DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3075{
3076 /** @todo Any possible ARM32/ARM64 optimizations here? */
3077 ASMAtomicXchgS16(pi16, i16);
3078}
3079
3080
3081/**
3082 * Atomically writes a signed 16-bit value, unordered.
3083 *
3084 * @param pi16 Pointer to the 16-bit variable to write.
3085 * @param i16 The 16-bit value to assign to *pi16.
3086 */
3087DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3088{
3089 Assert(!((uintptr_t)pi16 & 1));
3090 *pi16 = i16;
3091}
3092
3093
3094/**
3095 * Atomically writes an unsigned 32-bit value, ordered.
3096 *
3097 * @param pu32 Pointer to the 32-bit variable to write.
3098 * @param u32 The 32-bit value to assign to *pu32.
3099 */
3100DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3101{
3102 /** @todo Any possible ARM32/ARM64 optimizations here? */
3103 ASMAtomicXchgU32(pu32, u32);
3104}
3105
3106
3107/**
3108 * Atomically writes an unsigned 32-bit value, unordered.
3109 *
3110 * @param pu32 Pointer to the 32-bit variable to write.
3111 * @param u32 The 32-bit value to assign to *pu32.
3112 */
3113DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3114{
3115 Assert(!((uintptr_t)pu32 & 3));
3116#if ARCH_BITS >= 32
3117 *pu32 = u32;
3118#else
3119 ASMAtomicXchgU32(pu32, u32);
3120#endif
3121}
3122
3123
3124/**
3125 * Atomically writes a signed 32-bit value, ordered.
3126 *
3127 * @param pi32 Pointer to the 32-bit variable to write.
3128 * @param i32 The 32-bit value to assign to *pi32.
3129 */
3130DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3131{
3132 ASMAtomicXchgS32(pi32, i32);
3133}
3134
3135
3136/**
3137 * Atomically writes a signed 32-bit value, unordered.
3138 *
3139 * @param pi32 Pointer to the 32-bit variable to write.
3140 * @param i32 The 32-bit value to assign to *pi32.
3141 */
3142DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3143{
3144 Assert(!((uintptr_t)pi32 & 3));
3145#if ARCH_BITS >= 32
3146 *pi32 = i32;
3147#else
3148 ASMAtomicXchgS32(pi32, i32);
3149#endif
3150}
3151
3152
3153/**
3154 * Atomically writes an unsigned 64-bit value, ordered.
3155 *
3156 * @param pu64 Pointer to the 64-bit variable to write.
3157 * @param u64 The 64-bit value to assign to *pu64.
3158 */
3159DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3160{
3161 /** @todo Any possible ARM32/ARM64 optimizations here? */
3162 ASMAtomicXchgU64(pu64, u64);
3163}
3164
3165
3166/**
3167 * Atomically writes an unsigned 64-bit value, unordered.
3168 *
3169 * @param pu64 Pointer to the 64-bit variable to write.
3170 * @param u64 The 64-bit value to assign to *pu64.
3171 */
3172DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3173{
3174 Assert(!((uintptr_t)pu64 & 7));
3175#if ARCH_BITS == 64
3176 *pu64 = u64;
3177#else
3178 ASMAtomicXchgU64(pu64, u64);
3179#endif
3180}
3181
3182
3183/**
3184 * Atomically writes a signed 64-bit value, ordered.
3185 *
3186 * @param pi64 Pointer to the 64-bit variable to write.
3187 * @param i64 The 64-bit value to assign to *pi64.
3188 */
3189DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3190{
3191 /** @todo Any possible ARM32/ARM64 optimizations here? */
3192 ASMAtomicXchgS64(pi64, i64);
3193}
3194
3195
3196/**
3197 * Atomically writes a signed 64-bit value, unordered.
3198 *
3199 * @param pi64 Pointer to the 64-bit variable to write.
3200 * @param i64 The 64-bit value to assign to *pi64.
3201 */
3202DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3203{
3204 Assert(!((uintptr_t)pi64 & 7));
3205#if ARCH_BITS == 64
3206 *pi64 = i64;
3207#else
3208 ASMAtomicXchgS64(pi64, i64);
3209#endif
3210}
3211
3212
3213/**
3214 * Atomically writes a size_t value, ordered.
3215 *
3216 * @returns nothing.
3217 * @param pcb Pointer to the size_t variable to write.
3218 * @param cb The value to assign to *pcb.
3219 */
3220DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3221{
3222#if ARCH_BITS == 64
3223 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3224#elif ARCH_BITS == 32
3225 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3226#elif ARCH_BITS == 16
3227 AssertCompileSize(size_t, 2);
3228 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3229#else
3230# error "Unsupported ARCH_BITS value"
3231#endif
3232}
3233
3234
3235/**
3236 * Atomically writes a size_t value, unordered.
3237 *
3238 * @returns nothing.
3239 * @param pcb Pointer to the size_t variable to write.
3240 * @param cb The value to assign to *pcb.
3241 */
3242DECLINLINE(void) ASMAtomicUoWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3243{
3244#if ARCH_BITS == 64
3245 ASMAtomicUoWriteU64((uint64_t volatile *)pcb, cb);
3246#elif ARCH_BITS == 32
3247 ASMAtomicUoWriteU32((uint32_t volatile *)pcb, cb);
3248#elif ARCH_BITS == 16
3249 AssertCompileSize(size_t, 2);
3250 ASMAtomicUoWriteU16((uint16_t volatile *)pcb, cb);
3251#else
3252# error "Unsupported ARCH_BITS value"
3253#endif
3254}
3255
3256
3257/**
3258 * Atomically writes a boolean value, unordered.
3259 *
3260 * @param pf Pointer to the boolean variable to write.
3261 * @param f The boolean value to assign to *pf.
3262 */
3263DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3264{
3265 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3266}
3267
3268
3269/**
3270 * Atomically writes a boolean value, unordered.
3271 *
3272 * @param pf Pointer to the boolean variable to write.
3273 * @param f The boolean value to assign to *pf.
3274 */
3275DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3276{
3277 *pf = f; /* byte writes are atomic on x86 */
3278}
3279
3280
3281/**
3282 * Atomically writes a pointer value, ordered.
3283 *
3284 * @param ppv Pointer to the pointer variable to write.
3285 * @param pv The pointer value to assign to *ppv.
3286 */
3287DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3288{
3289#if ARCH_BITS == 32 || ARCH_BITS == 16
3290 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3291#elif ARCH_BITS == 64
3292 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3293#else
3294# error "ARCH_BITS is bogus"
3295#endif
3296}
3297
3298
3299/**
3300 * Atomically writes a pointer value, unordered.
3301 *
3302 * @param ppv Pointer to the pointer variable to write.
3303 * @param pv The pointer value to assign to *ppv.
3304 */
3305DECLINLINE(void) ASMAtomicUoWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3306{
3307#if ARCH_BITS == 32 || ARCH_BITS == 16
3308 ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3309#elif ARCH_BITS == 64
3310 ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3311#else
3312# error "ARCH_BITS is bogus"
3313#endif
3314}
3315
3316
3317/**
3318 * Atomically writes a pointer value, ordered.
3319 *
3320 * @param ppv Pointer to the pointer variable to write.
3321 * @param pv The pointer value to assign to *ppv. If NULL use
3322 * ASMAtomicWriteNullPtr or you'll land in trouble.
3323 *
3324 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3325 * NULL.
3326 */
3327#ifdef __GNUC__
3328# define ASMAtomicWritePtr(ppv, pv) \
3329 do \
3330 { \
3331 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3332 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3333 \
3334 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3335 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3336 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3337 \
3338 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3339 } while (0)
3340#else
3341# define ASMAtomicWritePtr(ppv, pv) \
3342 do \
3343 { \
3344 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3345 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3346 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3347 \
3348 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3349 } while (0)
3350#endif
3351
3352
3353/**
3354 * Atomically sets a pointer to NULL, ordered.
3355 *
3356 * @param ppv Pointer to the pointer variable that should be set to NULL.
3357 *
3358 * @remarks This is relatively type safe on GCC platforms.
3359 */
3360#if RT_GNUC_PREREQ(4, 2)
3361# define ASMAtomicWriteNullPtr(ppv) \
3362 do \
3363 { \
3364 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3365 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3366 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3367 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3368 } while (0)
3369#else
3370# define ASMAtomicWriteNullPtr(ppv) \
3371 do \
3372 { \
3373 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3374 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3375 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3376 } while (0)
3377#endif
3378
3379
3380/**
3381 * Atomically writes a pointer value, unordered.
3382 *
3383 * @returns Current *pv value
3384 * @param ppv Pointer to the pointer variable.
3385 * @param pv The pointer value to assign to *ppv. If NULL use
3386 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3387 *
3388 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3389 * NULL.
3390 */
3391#if RT_GNUC_PREREQ(4, 2)
3392# define ASMAtomicUoWritePtr(ppv, pv) \
3393 do \
3394 { \
3395 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3396 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3397 \
3398 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3399 AssertCompile(sizeof(pv) == sizeof(void *)); \
3400 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3401 \
3402 *(ppvTypeChecked) = pvTypeChecked; \
3403 } while (0)
3404#else
3405# define ASMAtomicUoWritePtr(ppv, pv) \
3406 do \
3407 { \
3408 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3409 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3410 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3411 *(ppv) = pv; \
3412 } while (0)
3413#endif
3414
3415
3416/**
3417 * Atomically sets a pointer to NULL, unordered.
3418 *
3419 * @param ppv Pointer to the pointer variable that should be set to NULL.
3420 *
3421 * @remarks This is relatively type safe on GCC platforms.
3422 */
3423#ifdef __GNUC__
3424# define ASMAtomicUoWriteNullPtr(ppv) \
3425 do \
3426 { \
3427 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3428 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3429 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3430 *(ppvTypeChecked) = NULL; \
3431 } while (0)
3432#else
3433# define ASMAtomicUoWriteNullPtr(ppv) \
3434 do \
3435 { \
3436 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3437 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3438 *(ppv) = NULL; \
3439 } while (0)
3440#endif
3441
3442
3443/**
3444 * Atomically write a typical IPRT handle value, ordered.
3445 *
3446 * @param ph Pointer to the variable to update.
3447 * @param hNew The value to assign to *ph.
3448 *
3449 * @remarks This doesn't currently work for all handles (like RTFILE).
3450 */
3451#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3452# define ASMAtomicWriteHandle(ph, hNew) \
3453 do { \
3454 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3455 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3456 } while (0)
3457#elif HC_ARCH_BITS == 64
3458# define ASMAtomicWriteHandle(ph, hNew) \
3459 do { \
3460 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3461 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3462 } while (0)
3463#else
3464# error HC_ARCH_BITS
3465#endif
3466
3467
3468/**
3469 * Atomically write a typical IPRT handle value, unordered.
3470 *
3471 * @param ph Pointer to the variable to update.
3472 * @param hNew The value to assign to *ph.
3473 *
3474 * @remarks This doesn't currently work for all handles (like RTFILE).
3475 */
3476#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3477# define ASMAtomicUoWriteHandle(ph, hNew) \
3478 do { \
3479 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3480 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3481 } while (0)
3482#elif HC_ARCH_BITS == 64
3483# define ASMAtomicUoWriteHandle(ph, hNew) \
3484 do { \
3485 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3486 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
3487 } while (0)
3488#else
3489# error HC_ARCH_BITS
3490#endif
3491
3492
3493/**
3494 * Atomically write a value which size might differ
3495 * between platforms or compilers, ordered.
3496 *
3497 * @param pu Pointer to the variable to update.
3498 * @param uNew The value to assign to *pu.
3499 */
3500#define ASMAtomicWriteSize(pu, uNew) \
3501 do { \
3502 switch (sizeof(*(pu))) { \
3503 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3504 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3505 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3506 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3507 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3508 } \
3509 } while (0)
3510
3511/**
3512 * Atomically write a value which size might differ
3513 * between platforms or compilers, unordered.
3514 *
3515 * @param pu Pointer to the variable to update.
3516 * @param uNew The value to assign to *pu.
3517 */
3518#define ASMAtomicUoWriteSize(pu, uNew) \
3519 do { \
3520 switch (sizeof(*(pu))) { \
3521 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3522 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3523 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3524 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3525 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3526 } \
3527 } while (0)
3528
3529
3530
3531/**
3532 * Atomically exchanges and adds to a 16-bit value, ordered.
3533 *
3534 * @returns The old value.
3535 * @param pu16 Pointer to the value.
3536 * @param u16 Number to add.
3537 *
3538 * @remarks Currently not implemented, just to make 16-bit code happy.
3539 * @remarks x86: Requires a 486 or later.
3540 */
3541RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
3542
3543
3544/**
3545 * Atomically exchanges and adds to a 32-bit value, ordered.
3546 *
3547 * @returns The old value.
3548 * @param pu32 Pointer to the value.
3549 * @param u32 Number to add.
3550 *
3551 * @remarks x86: Requires a 486 or later.
3552 */
3553#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3554RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3555#else
3556DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3557{
3558# if RT_INLINE_ASM_USES_INTRIN
3559 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
3560 return u32;
3561
3562# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3563# if RT_INLINE_ASM_GNU_STYLE
3564 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3565 : "=r" (u32)
3566 , "=m" (*pu32)
3567 : "0" (u32)
3568 , "m" (*pu32)
3569 : "memory"
3570 , "cc");
3571 return u32;
3572# else
3573 __asm
3574 {
3575 mov eax, [u32]
3576# ifdef RT_ARCH_AMD64
3577 mov rdx, [pu32]
3578 lock xadd [rdx], eax
3579# else
3580 mov edx, [pu32]
3581 lock xadd [edx], eax
3582# endif
3583 mov [u32], eax
3584 }
3585 return u32;
3586# endif
3587
3588# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3589 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
3590 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
3591 "add %[uNew], %[uOld], %[uVal]\n\t",
3592 [uVal] "r" (u32));
3593 return u32OldRet;
3594
3595# else
3596# error "Port me"
3597# endif
3598}
3599#endif
3600
3601
3602/**
3603 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3604 *
3605 * @returns The old value.
3606 * @param pi32 Pointer to the value.
3607 * @param i32 Number to add.
3608 *
3609 * @remarks x86: Requires a 486 or later.
3610 */
3611DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3612{
3613 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3614}
3615
3616
3617/**
3618 * Atomically exchanges and adds to a 64-bit value, ordered.
3619 *
3620 * @returns The old value.
3621 * @param pu64 Pointer to the value.
3622 * @param u64 Number to add.
3623 *
3624 * @remarks x86: Requires a Pentium or later.
3625 */
3626#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3627DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3628#else
3629DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3630{
3631# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3632 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
3633 return u64;
3634
3635# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3636 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3637 : "=r" (u64)
3638 , "=m" (*pu64)
3639 : "0" (u64)
3640 , "m" (*pu64)
3641 : "memory"
3642 , "cc");
3643 return u64;
3644
3645# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3646 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
3647 "add %[uNew], %[uOld], %[uVal]\n\t"
3648 ,
3649 "add %[uNew], %[uOld], %[uVal]\n\t"
3650 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
3651 [uVal] "r" (u64));
3652 return u64OldRet;
3653
3654# else
3655 uint64_t u64Old;
3656 for (;;)
3657 {
3658 uint64_t u64New;
3659 u64Old = ASMAtomicUoReadU64(pu64);
3660 u64New = u64Old + u64;
3661 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3662 break;
3663 ASMNopPause();
3664 }
3665 return u64Old;
3666# endif
3667}
3668#endif
3669
3670
3671/**
3672 * Atomically exchanges and adds to a signed 64-bit value, ordered.
3673 *
3674 * @returns The old value.
3675 * @param pi64 Pointer to the value.
3676 * @param i64 Number to add.
3677 *
3678 * @remarks x86: Requires a Pentium or later.
3679 */
3680DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3681{
3682 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3683}
3684
3685
3686/**
3687 * Atomically exchanges and adds to a size_t value, ordered.
3688 *
3689 * @returns The old value.
3690 * @param pcb Pointer to the size_t value.
3691 * @param cb Number to add.
3692 */
3693DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3694{
3695#if ARCH_BITS == 64
3696 AssertCompileSize(size_t, 8);
3697 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
3698#elif ARCH_BITS == 32
3699 AssertCompileSize(size_t, 4);
3700 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
3701#elif ARCH_BITS == 16
3702 AssertCompileSize(size_t, 2);
3703 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
3704#else
3705# error "Unsupported ARCH_BITS value"
3706#endif
3707}
3708
3709
3710/**
3711 * Atomically exchanges and adds a value which size might differ between
3712 * platforms or compilers, ordered.
3713 *
3714 * @param pu Pointer to the variable to update.
3715 * @param uNew The value to add to *pu.
3716 * @param puOld Where to store the old value.
3717 */
3718#define ASMAtomicAddSize(pu, uNew, puOld) \
3719 do { \
3720 switch (sizeof(*(pu))) { \
3721 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3722 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3723 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
3724 } \
3725 } while (0)
3726
3727
3728
3729/**
3730 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
3731 *
3732 * @returns The old value.
3733 * @param pu16 Pointer to the value.
3734 * @param u16 Number to subtract.
3735 *
3736 * @remarks x86: Requires a 486 or later.
3737 */
3738DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
3739{
3740 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
3741}
3742
3743
3744/**
3745 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
3746 *
3747 * @returns The old value.
3748 * @param pi16 Pointer to the value.
3749 * @param i16 Number to subtract.
3750 *
3751 * @remarks x86: Requires a 486 or later.
3752 */
3753DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3754{
3755 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
3756}
3757
3758
3759/**
3760 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3761 *
3762 * @returns The old value.
3763 * @param pu32 Pointer to the value.
3764 * @param u32 Number to subtract.
3765 *
3766 * @remarks x86: Requires a 486 or later.
3767 */
3768DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3769{
3770 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
3771}
3772
3773
3774/**
3775 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3776 *
3777 * @returns The old value.
3778 * @param pi32 Pointer to the value.
3779 * @param i32 Number to subtract.
3780 *
3781 * @remarks x86: Requires a 486 or later.
3782 */
3783DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3784{
3785 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
3786}
3787
3788
3789/**
3790 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
3791 *
3792 * @returns The old value.
3793 * @param pu64 Pointer to the value.
3794 * @param u64 Number to subtract.
3795 *
3796 * @remarks x86: Requires a Pentium or later.
3797 */
3798DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3799{
3800 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
3801}
3802
3803
3804/**
3805 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
3806 *
3807 * @returns The old value.
3808 * @param pi64 Pointer to the value.
3809 * @param i64 Number to subtract.
3810 *
3811 * @remarks x86: Requires a Pentium or later.
3812 */
3813DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3814{
3815 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
3816}
3817
3818
3819/**
3820 * Atomically exchanges and subtracts to a size_t value, ordered.
3821 *
3822 * @returns The old value.
3823 * @param pcb Pointer to the size_t value.
3824 * @param cb Number to subtract.
3825 *
3826 * @remarks x86: Requires a 486 or later.
3827 */
3828DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3829{
3830#if ARCH_BITS == 64
3831 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
3832#elif ARCH_BITS == 32
3833 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
3834#elif ARCH_BITS == 16
3835 AssertCompileSize(size_t, 2);
3836 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
3837#else
3838# error "Unsupported ARCH_BITS value"
3839#endif
3840}
3841
3842
3843/**
3844 * Atomically exchanges and subtracts a value which size might differ between
3845 * platforms or compilers, ordered.
3846 *
3847 * @param pu Pointer to the variable to update.
3848 * @param uNew The value to subtract to *pu.
3849 * @param puOld Where to store the old value.
3850 *
3851 * @remarks x86: Requires a 486 or later.
3852 */
3853#define ASMAtomicSubSize(pu, uNew, puOld) \
3854 do { \
3855 switch (sizeof(*(pu))) { \
3856 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3857 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3858 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3859 } \
3860 } while (0)
3861
3862
3863
3864/**
3865 * Atomically increment a 16-bit value, ordered.
3866 *
3867 * @returns The new value.
3868 * @param pu16 Pointer to the value to increment.
3869 * @remarks Not implemented. Just to make 16-bit code happy.
3870 *
3871 * @remarks x86: Requires a 486 or later.
3872 */
3873RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
3874
3875
3876/**
3877 * Atomically increment a 32-bit value, ordered.
3878 *
3879 * @returns The new value.
3880 * @param pu32 Pointer to the value to increment.
3881 *
3882 * @remarks x86: Requires a 486 or later.
3883 */
3884#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3885RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3886#else
3887DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
3888{
3889# if RT_INLINE_ASM_USES_INTRIN
3890 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
3891
3892# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3893# if RT_INLINE_ASM_GNU_STYLE
3894 uint32_t u32;
3895 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3896 : "=r" (u32)
3897 , "=m" (*pu32)
3898 : "0" (1)
3899 , "m" (*pu32)
3900 : "memory"
3901 , "cc");
3902 return u32+1;
3903# else
3904 __asm
3905 {
3906 mov eax, 1
3907# ifdef RT_ARCH_AMD64
3908 mov rdx, [pu32]
3909 lock xadd [rdx], eax
3910# else
3911 mov edx, [pu32]
3912 lock xadd [edx], eax
3913# endif
3914 mov u32, eax
3915 }
3916 return u32+1;
3917# endif
3918
3919# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3920 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
3921 "add %w[uNew], %w[uNew], #1\n\t",
3922 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
3923 "X" (0) /* dummy */);
3924 return u32NewRet;
3925
3926# else
3927 return ASMAtomicAddU32(pu32, 1) + 1;
3928# endif
3929}
3930#endif
3931
3932
3933/**
3934 * Atomically increment a signed 32-bit value, ordered.
3935 *
3936 * @returns The new value.
3937 * @param pi32 Pointer to the value to increment.
3938 *
3939 * @remarks x86: Requires a 486 or later.
3940 */
3941DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
3942{
3943 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3944}
3945
3946
3947/**
3948 * Atomically increment a 64-bit value, ordered.
3949 *
3950 * @returns The new value.
3951 * @param pu64 Pointer to the value to increment.
3952 *
3953 * @remarks x86: Requires a Pentium or later.
3954 */
3955#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3956DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
3957#else
3958DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
3959{
3960# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3961 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
3962
3963# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3964 uint64_t u64;
3965 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3966 : "=r" (u64)
3967 , "=m" (*pu64)
3968 : "0" (1)
3969 , "m" (*pu64)
3970 : "memory"
3971 , "cc");
3972 return u64 + 1;
3973
3974# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3975 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
3976 "add %[uNew], %[uNew], #1\n\t"
3977 ,
3978 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
3979 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
3980 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
3981 return u64NewRet;
3982
3983# else
3984 return ASMAtomicAddU64(pu64, 1) + 1;
3985# endif
3986}
3987#endif
3988
3989
3990/**
3991 * Atomically increment a signed 64-bit value, ordered.
3992 *
3993 * @returns The new value.
3994 * @param pi64 Pointer to the value to increment.
3995 *
3996 * @remarks x86: Requires a Pentium or later.
3997 */
3998DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
3999{
4000 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
4001}
4002
4003
4004/**
4005 * Atomically increment a size_t value, ordered.
4006 *
4007 * @returns The new value.
4008 * @param pcb Pointer to the value to increment.
4009 *
4010 * @remarks x86: Requires a 486 or later.
4011 */
4012DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4013{
4014#if ARCH_BITS == 64
4015 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
4016#elif ARCH_BITS == 32
4017 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
4018#elif ARCH_BITS == 16
4019 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
4020#else
4021# error "Unsupported ARCH_BITS value"
4022#endif
4023}
4024
4025
4026
4027/**
4028 * Atomically decrement an unsigned 32-bit value, ordered.
4029 *
4030 * @returns The new value.
4031 * @param pu16 Pointer to the value to decrement.
4032 * @remarks Not implemented. Just to make 16-bit code happy.
4033 *
4034 * @remarks x86: Requires a 486 or later.
4035 */
4036RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4037
4038
4039/**
4040 * Atomically decrement an unsigned 32-bit value, ordered.
4041 *
4042 * @returns The new value.
4043 * @param pu32 Pointer to the value to decrement.
4044 *
4045 * @remarks x86: Requires a 486 or later.
4046 */
4047#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4048RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4049#else
4050DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4051{
4052# if RT_INLINE_ASM_USES_INTRIN
4053 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4054
4055# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4056# if RT_INLINE_ASM_GNU_STYLE
4057 uint32_t u32;
4058 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4059 : "=r" (u32)
4060 , "=m" (*pu32)
4061 : "0" (-1)
4062 , "m" (*pu32)
4063 : "memory"
4064 , "cc");
4065 return u32-1;
4066# else
4067 uint32_t u32;
4068 __asm
4069 {
4070 mov eax, -1
4071# ifdef RT_ARCH_AMD64
4072 mov rdx, [pu32]
4073 lock xadd [rdx], eax
4074# else
4075 mov edx, [pu32]
4076 lock xadd [edx], eax
4077# endif
4078 mov u32, eax
4079 }
4080 return u32-1;
4081# endif
4082
4083# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4084 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4085 "sub %w[uNew], %w[uNew], #1\n\t",
4086 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4087 "X" (0) /* dummy */);
4088 return u32NewRet;
4089
4090# else
4091 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4092# endif
4093}
4094#endif
4095
4096
4097/**
4098 * Atomically decrement a signed 32-bit value, ordered.
4099 *
4100 * @returns The new value.
4101 * @param pi32 Pointer to the value to decrement.
4102 *
4103 * @remarks x86: Requires a 486 or later.
4104 */
4105DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4106{
4107 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4108}
4109
4110
4111/**
4112 * Atomically decrement an unsigned 64-bit value, ordered.
4113 *
4114 * @returns The new value.
4115 * @param pu64 Pointer to the value to decrement.
4116 *
4117 * @remarks x86: Requires a Pentium or later.
4118 */
4119#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4120RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4121#else
4122DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4123{
4124# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4125 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4126
4127# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4128 uint64_t u64;
4129 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4130 : "=r" (u64)
4131 , "=m" (*pu64)
4132 : "0" (~(uint64_t)0)
4133 , "m" (*pu64)
4134 : "memory"
4135 , "cc");
4136 return u64-1;
4137
4138# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4139 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4140 "sub %[uNew], %[uNew], #1\n\t"
4141 ,
4142 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4143 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4144 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4145 return u64NewRet;
4146
4147# else
4148 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4149# endif
4150}
4151#endif
4152
4153
4154/**
4155 * Atomically decrement a signed 64-bit value, ordered.
4156 *
4157 * @returns The new value.
4158 * @param pi64 Pointer to the value to decrement.
4159 *
4160 * @remarks x86: Requires a Pentium or later.
4161 */
4162DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4163{
4164 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4165}
4166
4167
4168/**
4169 * Atomically decrement a size_t value, ordered.
4170 *
4171 * @returns The new value.
4172 * @param pcb Pointer to the value to decrement.
4173 *
4174 * @remarks x86: Requires a 486 or later.
4175 */
4176DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4177{
4178#if ARCH_BITS == 64
4179 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4180#elif ARCH_BITS == 32
4181 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4182#elif ARCH_BITS == 16
4183 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4184#else
4185# error "Unsupported ARCH_BITS value"
4186#endif
4187}
4188
4189
4190/**
4191 * Atomically Or an unsigned 32-bit value, ordered.
4192 *
4193 * @param pu32 Pointer to the pointer variable to OR u32 with.
4194 * @param u32 The value to OR *pu32 with.
4195 *
4196 * @remarks x86: Requires a 386 or later.
4197 */
4198#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4199RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4200#else
4201DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4202{
4203# if RT_INLINE_ASM_USES_INTRIN
4204 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4205
4206# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4207# if RT_INLINE_ASM_GNU_STYLE
4208 __asm__ __volatile__("lock; orl %1, %0\n\t"
4209 : "=m" (*pu32)
4210 : "ir" (u32)
4211 , "m" (*pu32)
4212 : "cc");
4213# else
4214 __asm
4215 {
4216 mov eax, [u32]
4217# ifdef RT_ARCH_AMD64
4218 mov rdx, [pu32]
4219 lock or [rdx], eax
4220# else
4221 mov edx, [pu32]
4222 lock or [edx], eax
4223# endif
4224 }
4225# endif
4226
4227# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4228 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4229 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4230 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4231 "orr %[uNew], %[uNew], %[uVal]\n\t",
4232 [uVal] "r" (u32));
4233
4234# else
4235# error "Port me"
4236# endif
4237}
4238#endif
4239
4240
4241/**
4242 * Atomically OR an unsigned 32-bit value, ordered, extended version (for bitmap
4243 * fallback).
4244 *
4245 * @returns Old value.
4246 * @param pu32 Pointer to the variable to OR @a u32 with.
4247 * @param u32 The value to OR @a *pu32 with.
4248 */
4249DECLINLINE(uint32_t) ASMAtomicOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4250{
4251#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4252 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicOrEx32, pu32, DMB_SY,
4253 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4254 "orr %[uNew], %[uOld], %[uVal]\n\t",
4255 [uVal] "r" (u32));
4256 return u32OldRet;
4257
4258#else
4259 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4260 uint32_t u32New;
4261 do
4262 u32New = u32RetOld | u32;
4263 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4264 return u32RetOld;
4265#endif
4266}
4267
4268
4269/**
4270 * Atomically Or a signed 32-bit value, ordered.
4271 *
4272 * @param pi32 Pointer to the pointer variable to OR u32 with.
4273 * @param i32 The value to OR *pu32 with.
4274 *
4275 * @remarks x86: Requires a 386 or later.
4276 */
4277DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4278{
4279 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4280}
4281
4282
4283/**
4284 * Atomically Or an unsigned 64-bit value, ordered.
4285 *
4286 * @param pu64 Pointer to the pointer variable to OR u64 with.
4287 * @param u64 The value to OR *pu64 with.
4288 *
4289 * @remarks x86: Requires a Pentium or later.
4290 */
4291#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4292DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4293#else
4294DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4295{
4296# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4297 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4298
4299# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4300 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4301 : "=m" (*pu64)
4302 : "r" (u64)
4303 , "m" (*pu64)
4304 : "cc");
4305
4306# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4307 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4308 "orr %[uNew], %[uNew], %[uVal]\n\t"
4309 ,
4310 "orr %[uNew], %[uNew], %[uVal]\n\t"
4311 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4312 [uVal] "r" (u64));
4313
4314# else
4315 for (;;)
4316 {
4317 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4318 uint64_t u64New = u64Old | u64;
4319 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4320 break;
4321 ASMNopPause();
4322 }
4323# endif
4324}
4325#endif
4326
4327
4328/**
4329 * Atomically Or a signed 64-bit value, ordered.
4330 *
4331 * @param pi64 Pointer to the pointer variable to OR u64 with.
4332 * @param i64 The value to OR *pu64 with.
4333 *
4334 * @remarks x86: Requires a Pentium or later.
4335 */
4336DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4337{
4338 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4339}
4340
4341
4342/**
4343 * Atomically And an unsigned 32-bit value, ordered.
4344 *
4345 * @param pu32 Pointer to the pointer variable to AND u32 with.
4346 * @param u32 The value to AND *pu32 with.
4347 *
4348 * @remarks x86: Requires a 386 or later.
4349 */
4350#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4351RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4352#else
4353DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4354{
4355# if RT_INLINE_ASM_USES_INTRIN
4356 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4357
4358# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4359# if RT_INLINE_ASM_GNU_STYLE
4360 __asm__ __volatile__("lock; andl %1, %0\n\t"
4361 : "=m" (*pu32)
4362 : "ir" (u32)
4363 , "m" (*pu32)
4364 : "cc");
4365# else
4366 __asm
4367 {
4368 mov eax, [u32]
4369# ifdef RT_ARCH_AMD64
4370 mov rdx, [pu32]
4371 lock and [rdx], eax
4372# else
4373 mov edx, [pu32]
4374 lock and [edx], eax
4375# endif
4376 }
4377# endif
4378
4379# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4380 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4381 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4382 "and %[uNew], %[uNew], %[uVal]\n\t",
4383 [uVal] "r" (u32));
4384
4385# else
4386# error "Port me"
4387# endif
4388}
4389#endif
4390
4391
4392/**
4393 * Atomically AND an unsigned 32-bit value, ordered, extended version.
4394 *
4395 * @returns Old value.
4396 * @param pu32 Pointer to the variable to AND @a u32 with.
4397 * @param u32 The value to AND @a *pu32 with.
4398 */
4399DECLINLINE(uint32_t) ASMAtomicAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4400{
4401#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4402 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAndEx32, pu32, DMB_SY,
4403 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4404 "and %[uNew], %[uOld], %[uVal]\n\t",
4405 [uVal] "r" (u32));
4406 return u32OldRet;
4407
4408#else
4409 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4410 uint32_t u32New;
4411 do
4412 u32New = u32RetOld & u32;
4413 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4414 return u32RetOld;
4415#endif
4416}
4417
4418
4419/**
4420 * Atomically And a signed 32-bit value, ordered.
4421 *
4422 * @param pi32 Pointer to the pointer variable to AND i32 with.
4423 * @param i32 The value to AND *pi32 with.
4424 *
4425 * @remarks x86: Requires a 386 or later.
4426 */
4427DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4428{
4429 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4430}
4431
4432
4433/**
4434 * Atomically And an unsigned 64-bit value, ordered.
4435 *
4436 * @param pu64 Pointer to the pointer variable to AND u64 with.
4437 * @param u64 The value to AND *pu64 with.
4438 *
4439 * @remarks x86: Requires a Pentium or later.
4440 */
4441#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4442DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4443#else
4444DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4445{
4446# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4447 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4448
4449# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4450 __asm__ __volatile__("lock; andq %1, %0\n\t"
4451 : "=m" (*pu64)
4452 : "r" (u64)
4453 , "m" (*pu64)
4454 : "cc");
4455
4456# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4457 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
4458 "and %[uNew], %[uNew], %[uVal]\n\t"
4459 ,
4460 "and %[uNew], %[uNew], %[uVal]\n\t"
4461 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4462 [uVal] "r" (u64));
4463
4464# else
4465 for (;;)
4466 {
4467 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4468 uint64_t u64New = u64Old & u64;
4469 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4470 break;
4471 ASMNopPause();
4472 }
4473# endif
4474}
4475#endif
4476
4477
4478/**
4479 * Atomically And a signed 64-bit value, ordered.
4480 *
4481 * @param pi64 Pointer to the pointer variable to AND i64 with.
4482 * @param i64 The value to AND *pi64 with.
4483 *
4484 * @remarks x86: Requires a Pentium or later.
4485 */
4486DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4487{
4488 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4489}
4490
4491
4492/**
4493 * Atomically XOR an unsigned 32-bit value and a memory location, ordered.
4494 *
4495 * @param pu32 Pointer to the variable to XOR @a u32 with.
4496 * @param u32 The value to XOR @a *pu32 with.
4497 *
4498 * @remarks x86: Requires a 386 or later.
4499 */
4500#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4501RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4502#else
4503DECLINLINE(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4504{
4505# if RT_INLINE_ASM_USES_INTRIN
4506 _InterlockedXor((long volatile RT_FAR *)pu32, u32);
4507
4508# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4509# if RT_INLINE_ASM_GNU_STYLE
4510 __asm__ __volatile__("lock; xorl %1, %0\n\t"
4511 : "=m" (*pu32)
4512 : "ir" (u32)
4513 , "m" (*pu32)
4514 : "cc");
4515# else
4516 __asm
4517 {
4518 mov eax, [u32]
4519# ifdef RT_ARCH_AMD64
4520 mov rdx, [pu32]
4521 lock xor [rdx], eax
4522# else
4523 mov edx, [pu32]
4524 lock xor [edx], eax
4525# endif
4526 }
4527# endif
4528
4529# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4530 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicXor32, pu32, DMB_SY,
4531 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
4532 "eor %[uNew], %[uNew], %[uVal]\n\t",
4533 [uVal] "r" (u32));
4534
4535# else
4536# error "Port me"
4537# endif
4538}
4539#endif
4540
4541
4542/**
4543 * Atomically XOR an unsigned 32-bit value and a memory location, ordered,
4544 * extended version (for bitmaps).
4545 *
4546 * @returns Old value.
4547 * @param pu32 Pointer to the variable to XOR @a u32 with.
4548 * @param u32 The value to XOR @a *pu32 with.
4549 */
4550DECLINLINE(uint32_t) ASMAtomicXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4551{
4552#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4553 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicXorEx32, pu32, DMB_SY,
4554 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
4555 "eor %[uNew], %[uOld], %[uVal]\n\t",
4556 [uVal] "r" (u32));
4557 return u32OldRet;
4558
4559#else
4560 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4561 uint32_t u32New;
4562 do
4563 u32New = u32RetOld ^ u32;
4564 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4565 return u32RetOld;
4566#endif
4567}
4568
4569
4570/**
4571 * Atomically XOR a signed 32-bit value, ordered.
4572 *
4573 * @param pi32 Pointer to the variable to XOR i32 with.
4574 * @param i32 The value to XOR *pi32 with.
4575 *
4576 * @remarks x86: Requires a 386 or later.
4577 */
4578DECLINLINE(void) ASMAtomicXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4579{
4580 ASMAtomicXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4581}
4582
4583
4584/**
4585 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
4586 *
4587 * @param pu32 Pointer to the pointer variable to OR u32 with.
4588 * @param u32 The value to OR *pu32 with.
4589 *
4590 * @remarks x86: Requires a 386 or later.
4591 */
4592#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4593RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4594#else
4595DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4596{
4597# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4598# if RT_INLINE_ASM_GNU_STYLE
4599 __asm__ __volatile__("orl %1, %0\n\t"
4600 : "=m" (*pu32)
4601 : "ir" (u32)
4602 , "m" (*pu32)
4603 : "cc");
4604# else
4605 __asm
4606 {
4607 mov eax, [u32]
4608# ifdef RT_ARCH_AMD64
4609 mov rdx, [pu32]
4610 or [rdx], eax
4611# else
4612 mov edx, [pu32]
4613 or [edx], eax
4614# endif
4615 }
4616# endif
4617
4618# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4619 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
4620 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4621 "orr %[uNew], %[uNew], %[uVal]\n\t",
4622 [uVal] "r" (u32));
4623
4624# else
4625# error "Port me"
4626# endif
4627}
4628#endif
4629
4630
4631/**
4632 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe,
4633 * extended version (for bitmap fallback).
4634 *
4635 * @returns Old value.
4636 * @param pu32 Pointer to the variable to OR @a u32 with.
4637 * @param u32 The value to OR @a *pu32 with.
4638 */
4639DECLINLINE(uint32_t) ASMAtomicUoOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4640{
4641#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4642 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoOrExU32, pu32, NO_BARRIER,
4643 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4644 "orr %[uNew], %[uOld], %[uVal]\n\t",
4645 [uVal] "r" (u32));
4646 return u32OldRet;
4647
4648#else
4649 return ASMAtomicOrExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
4650#endif
4651}
4652
4653
4654/**
4655 * Atomically OR a signed 32-bit value, unordered.
4656 *
4657 * @param pi32 Pointer to the pointer variable to OR u32 with.
4658 * @param i32 The value to OR *pu32 with.
4659 *
4660 * @remarks x86: Requires a 386 or later.
4661 */
4662DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4663{
4664 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4665}
4666
4667
4668/**
4669 * Atomically OR an unsigned 64-bit value, unordered.
4670 *
4671 * @param pu64 Pointer to the pointer variable to OR u64 with.
4672 * @param u64 The value to OR *pu64 with.
4673 *
4674 * @remarks x86: Requires a Pentium or later.
4675 */
4676#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4677DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4678#else
4679DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4680{
4681# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4682 __asm__ __volatile__("orq %1, %q0\n\t"
4683 : "=m" (*pu64)
4684 : "r" (u64)
4685 , "m" (*pu64)
4686 : "cc");
4687
4688# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4689 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
4690 "orr %[uNew], %[uNew], %[uVal]\n\t"
4691 ,
4692 "orr %[uNew], %[uNew], %[uVal]\n\t"
4693 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4694 [uVal] "r" (u64));
4695
4696# else
4697 for (;;)
4698 {
4699 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4700 uint64_t u64New = u64Old | u64;
4701 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4702 break;
4703 ASMNopPause();
4704 }
4705# endif
4706}
4707#endif
4708
4709
4710/**
4711 * Atomically Or a signed 64-bit value, unordered.
4712 *
4713 * @param pi64 Pointer to the pointer variable to OR u64 with.
4714 * @param i64 The value to OR *pu64 with.
4715 *
4716 * @remarks x86: Requires a Pentium or later.
4717 */
4718DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4719{
4720 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4721}
4722
4723
4724/**
4725 * Atomically And an unsigned 32-bit value, unordered.
4726 *
4727 * @param pu32 Pointer to the pointer variable to AND u32 with.
4728 * @param u32 The value to AND *pu32 with.
4729 *
4730 * @remarks x86: Requires a 386 or later.
4731 */
4732#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4733RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4734#else
4735DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4736{
4737# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4738# if RT_INLINE_ASM_GNU_STYLE
4739 __asm__ __volatile__("andl %1, %0\n\t"
4740 : "=m" (*pu32)
4741 : "ir" (u32)
4742 , "m" (*pu32)
4743 : "cc");
4744# else
4745 __asm
4746 {
4747 mov eax, [u32]
4748# ifdef RT_ARCH_AMD64
4749 mov rdx, [pu32]
4750 and [rdx], eax
4751# else
4752 mov edx, [pu32]
4753 and [edx], eax
4754# endif
4755 }
4756# endif
4757
4758# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4759 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
4760 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4761 "and %[uNew], %[uNew], %[uVal]\n\t",
4762 [uVal] "r" (u32));
4763
4764# else
4765# error "Port me"
4766# endif
4767}
4768#endif
4769
4770
4771/**
4772 * Atomically AND an unsigned 32-bit value, unordered, extended version (for
4773 * bitmap fallback).
4774 *
4775 * @returns Old value.
4776 * @param pu32 Pointer to the pointer to AND @a u32 with.
4777 * @param u32 The value to AND @a *pu32 with.
4778 */
4779DECLINLINE(uint32_t) ASMAtomicUoAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4780{
4781#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4782 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoAndEx32, pu32, NO_BARRIER,
4783 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4784 "and %[uNew], %[uOld], %[uVal]\n\t",
4785 [uVal] "r" (u32));
4786 return u32OldRet;
4787
4788#else
4789 return ASMAtomicAndExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
4790#endif
4791}
4792
4793
4794/**
4795 * Atomically And a signed 32-bit value, unordered.
4796 *
4797 * @param pi32 Pointer to the pointer variable to AND i32 with.
4798 * @param i32 The value to AND *pi32 with.
4799 *
4800 * @remarks x86: Requires a 386 or later.
4801 */
4802DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4803{
4804 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4805}
4806
4807
4808/**
4809 * Atomically And an unsigned 64-bit value, unordered.
4810 *
4811 * @param pu64 Pointer to the pointer variable to AND u64 with.
4812 * @param u64 The value to AND *pu64 with.
4813 *
4814 * @remarks x86: Requires a Pentium or later.
4815 */
4816#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4817DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4818#else
4819DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4820{
4821# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4822 __asm__ __volatile__("andq %1, %0\n\t"
4823 : "=m" (*pu64)
4824 : "r" (u64)
4825 , "m" (*pu64)
4826 : "cc");
4827
4828# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4829 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
4830 "and %[uNew], %[uNew], %[uVal]\n\t"
4831 ,
4832 "and %[uNew], %[uNew], %[uVal]\n\t"
4833 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4834 [uVal] "r" (u64));
4835
4836# else
4837 for (;;)
4838 {
4839 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4840 uint64_t u64New = u64Old & u64;
4841 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4842 break;
4843 ASMNopPause();
4844 }
4845# endif
4846}
4847#endif
4848
4849
4850/**
4851 * Atomically And a signed 64-bit value, unordered.
4852 *
4853 * @param pi64 Pointer to the pointer variable to AND i64 with.
4854 * @param i64 The value to AND *pi64 with.
4855 *
4856 * @remarks x86: Requires a Pentium or later.
4857 */
4858DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4859{
4860 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4861}
4862
4863
4864/**
4865 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe.
4866 *
4867 * @param pu32 Pointer to the variable to XOR @a u32 with.
4868 * @param u32 The value to OR @a *pu32 with.
4869 *
4870 * @remarks x86: Requires a 386 or later.
4871 */
4872#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4873RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4874#else
4875DECLINLINE(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4876{
4877# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4878# if RT_INLINE_ASM_GNU_STYLE
4879 __asm__ __volatile__("xorl %1, %0\n\t"
4880 : "=m" (*pu32)
4881 : "ir" (u32)
4882 , "m" (*pu32)
4883 : "cc");
4884# else
4885 __asm
4886 {
4887 mov eax, [u32]
4888# ifdef RT_ARCH_AMD64
4889 mov rdx, [pu32]
4890 xor [rdx], eax
4891# else
4892 mov edx, [pu32]
4893 xor [edx], eax
4894# endif
4895 }
4896# endif
4897
4898# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4899 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoXorU32, pu32, NO_BARRIER,
4900 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
4901 "eor %[uNew], %[uNew], %[uVal]\n\t",
4902 [uVal] "r" (u32));
4903
4904# else
4905# error "Port me"
4906# endif
4907}
4908#endif
4909
4910
4911/**
4912 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe,
4913 * extended version (for bitmap fallback).
4914 *
4915 * @returns Old value.
4916 * @param pu32 Pointer to the variable to XOR @a u32 with.
4917 * @param u32 The value to OR @a *pu32 with.
4918 */
4919DECLINLINE(uint32_t) ASMAtomicUoXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4920{
4921#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4922 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoXorExU32, pu32, NO_BARRIER,
4923 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
4924 "eor %[uNew], %[uOld], %[uVal]\n\t",
4925 [uVal] "r" (u32));
4926 return u32OldRet;
4927
4928#else
4929 return ASMAtomicXorExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
4930#endif
4931}
4932
4933
4934/**
4935 * Atomically XOR a signed 32-bit value, unordered.
4936 *
4937 * @param pi32 Pointer to the variable to XOR @a u32 with.
4938 * @param i32 The value to XOR @a *pu32 with.
4939 *
4940 * @remarks x86: Requires a 386 or later.
4941 */
4942DECLINLINE(void) ASMAtomicUoXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4943{
4944 ASMAtomicUoXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4945}
4946
4947
4948/**
4949 * Atomically increment an unsigned 32-bit value, unordered.
4950 *
4951 * @returns the new value.
4952 * @param pu32 Pointer to the variable to increment.
4953 *
4954 * @remarks x86: Requires a 486 or later.
4955 */
4956#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4957RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4958#else
4959DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4960{
4961# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4962 uint32_t u32;
4963# if RT_INLINE_ASM_GNU_STYLE
4964 __asm__ __volatile__("xaddl %0, %1\n\t"
4965 : "=r" (u32)
4966 , "=m" (*pu32)
4967 : "0" (1)
4968 , "m" (*pu32)
4969 : "memory" /** @todo why 'memory'? */
4970 , "cc");
4971 return u32 + 1;
4972# else
4973 __asm
4974 {
4975 mov eax, 1
4976# ifdef RT_ARCH_AMD64
4977 mov rdx, [pu32]
4978 xadd [rdx], eax
4979# else
4980 mov edx, [pu32]
4981 xadd [edx], eax
4982# endif
4983 mov u32, eax
4984 }
4985 return u32 + 1;
4986# endif
4987
4988# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4989 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
4990 "add %w[uNew], %w[uNew], #1\n\t",
4991 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4992 "X" (0) /* dummy */);
4993 return u32NewRet;
4994
4995# else
4996# error "Port me"
4997# endif
4998}
4999#endif
5000
5001
5002/**
5003 * Atomically decrement an unsigned 32-bit value, unordered.
5004 *
5005 * @returns the new value.
5006 * @param pu32 Pointer to the variable to decrement.
5007 *
5008 * @remarks x86: Requires a 486 or later.
5009 */
5010#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5011RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5012#else
5013DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5014{
5015# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5016 uint32_t u32;
5017# if RT_INLINE_ASM_GNU_STYLE
5018 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
5019 : "=r" (u32)
5020 , "=m" (*pu32)
5021 : "0" (-1)
5022 , "m" (*pu32)
5023 : "memory"
5024 , "cc");
5025 return u32 - 1;
5026# else
5027 __asm
5028 {
5029 mov eax, -1
5030# ifdef RT_ARCH_AMD64
5031 mov rdx, [pu32]
5032 xadd [rdx], eax
5033# else
5034 mov edx, [pu32]
5035 xadd [edx], eax
5036# endif
5037 mov u32, eax
5038 }
5039 return u32 - 1;
5040# endif
5041
5042# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5043 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
5044 "sub %w[uNew], %w[uNew], #1\n\t",
5045 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5046 "X" (0) /* dummy */);
5047 return u32NewRet;
5048
5049# else
5050# error "Port me"
5051# endif
5052}
5053#endif
5054
5055
5056/** @def RT_ASM_PAGE_SIZE
5057 * We try avoid dragging in iprt/param.h here.
5058 * @internal
5059 */
5060#if defined(RT_ARCH_SPARC64)
5061# define RT_ASM_PAGE_SIZE 0x2000
5062# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5063# if PAGE_SIZE != 0x2000
5064# error "PAGE_SIZE is not 0x2000!"
5065# endif
5066# endif
5067#elif defined(RT_ARCH_ARM64)
5068# define RT_ASM_PAGE_SIZE 0x4000
5069# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
5070# if PAGE_SIZE != 0x4000
5071# error "PAGE_SIZE is not 0x4000!"
5072# endif
5073# endif
5074#else
5075# define RT_ASM_PAGE_SIZE 0x1000
5076# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5077# if PAGE_SIZE != 0x1000
5078# error "PAGE_SIZE is not 0x1000!"
5079# endif
5080# endif
5081#endif
5082
5083/**
5084 * Zeros a 4K memory page.
5085 *
5086 * @param pv Pointer to the memory block. This must be page aligned.
5087 */
5088#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5089RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
5090# else
5091DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
5092{
5093# if RT_INLINE_ASM_USES_INTRIN
5094# ifdef RT_ARCH_AMD64
5095 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
5096# else
5097 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
5098# endif
5099
5100# elif RT_INLINE_ASM_GNU_STYLE
5101 RTCCUINTREG uDummy;
5102# ifdef RT_ARCH_AMD64
5103 __asm__ __volatile__("rep stosq"
5104 : "=D" (pv),
5105 "=c" (uDummy)
5106 : "0" (pv),
5107 "c" (RT_ASM_PAGE_SIZE >> 3),
5108 "a" (0)
5109 : "memory");
5110# else
5111 __asm__ __volatile__("rep stosl"
5112 : "=D" (pv),
5113 "=c" (uDummy)
5114 : "0" (pv),
5115 "c" (RT_ASM_PAGE_SIZE >> 2),
5116 "a" (0)
5117 : "memory");
5118# endif
5119# else
5120 __asm
5121 {
5122# ifdef RT_ARCH_AMD64
5123 xor rax, rax
5124 mov ecx, 0200h
5125 mov rdi, [pv]
5126 rep stosq
5127# else
5128 xor eax, eax
5129 mov ecx, 0400h
5130 mov edi, [pv]
5131 rep stosd
5132# endif
5133 }
5134# endif
5135}
5136# endif
5137
5138
5139/**
5140 * Zeros a memory block with a 32-bit aligned size.
5141 *
5142 * @param pv Pointer to the memory block.
5143 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5144 */
5145#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5146RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5147#else
5148DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5149{
5150# if RT_INLINE_ASM_USES_INTRIN
5151# ifdef RT_ARCH_AMD64
5152 if (!(cb & 7))
5153 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
5154 else
5155# endif
5156 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
5157
5158# elif RT_INLINE_ASM_GNU_STYLE
5159 __asm__ __volatile__("rep stosl"
5160 : "=D" (pv),
5161 "=c" (cb)
5162 : "0" (pv),
5163 "1" (cb >> 2),
5164 "a" (0)
5165 : "memory");
5166# else
5167 __asm
5168 {
5169 xor eax, eax
5170# ifdef RT_ARCH_AMD64
5171 mov rcx, [cb]
5172 shr rcx, 2
5173 mov rdi, [pv]
5174# else
5175 mov ecx, [cb]
5176 shr ecx, 2
5177 mov edi, [pv]
5178# endif
5179 rep stosd
5180 }
5181# endif
5182}
5183#endif
5184
5185
5186/**
5187 * Fills a memory block with a 32-bit aligned size.
5188 *
5189 * @param pv Pointer to the memory block.
5190 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5191 * @param u32 The value to fill with.
5192 */
5193#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5194RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
5195#else
5196DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5197{
5198# if RT_INLINE_ASM_USES_INTRIN
5199# ifdef RT_ARCH_AMD64
5200 if (!(cb & 7))
5201 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5202 else
5203# endif
5204 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
5205
5206# elif RT_INLINE_ASM_GNU_STYLE
5207 __asm__ __volatile__("rep stosl"
5208 : "=D" (pv),
5209 "=c" (cb)
5210 : "0" (pv),
5211 "1" (cb >> 2),
5212 "a" (u32)
5213 : "memory");
5214# else
5215 __asm
5216 {
5217# ifdef RT_ARCH_AMD64
5218 mov rcx, [cb]
5219 shr rcx, 2
5220 mov rdi, [pv]
5221# else
5222 mov ecx, [cb]
5223 shr ecx, 2
5224 mov edi, [pv]
5225# endif
5226 mov eax, [u32]
5227 rep stosd
5228 }
5229# endif
5230}
5231#endif
5232
5233
5234/**
5235 * Checks if a memory block is all zeros.
5236 *
5237 * @returns Pointer to the first non-zero byte.
5238 * @returns NULL if all zero.
5239 *
5240 * @param pv Pointer to the memory block.
5241 * @param cb Number of bytes in the block.
5242 */
5243#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
5244DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5245#else
5246DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5247{
5248/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
5249 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5250 for (; cb; cb--, pb++)
5251 if (RT_LIKELY(*pb == 0))
5252 { /* likely */ }
5253 else
5254 return (void RT_FAR *)pb;
5255 return NULL;
5256}
5257#endif
5258
5259
5260/**
5261 * Checks if a memory block is all zeros.
5262 *
5263 * @returns true if zero, false if not.
5264 *
5265 * @param pv Pointer to the memory block.
5266 * @param cb Number of bytes in the block.
5267 *
5268 * @sa ASMMemFirstNonZero
5269 */
5270DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5271{
5272 return ASMMemFirstNonZero(pv, cb) == NULL;
5273}
5274
5275
5276/**
5277 * Checks if a memory page is all zeros.
5278 *
5279 * @returns true / false.
5280 *
5281 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5282 * boundary
5283 */
5284DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
5285{
5286# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5287 union { RTCCUINTREG r; bool f; } uAX;
5288 RTCCUINTREG xCX, xDI;
5289 Assert(!((uintptr_t)pvPage & 15));
5290 __asm__ __volatile__("repe; "
5291# ifdef RT_ARCH_AMD64
5292 "scasq\n\t"
5293# else
5294 "scasl\n\t"
5295# endif
5296 "setnc %%al\n\t"
5297 : "=&c" (xCX)
5298 , "=&D" (xDI)
5299 , "=&a" (uAX.r)
5300 : "mr" (pvPage)
5301# ifdef RT_ARCH_AMD64
5302 , "0" (RT_ASM_PAGE_SIZE/8)
5303# else
5304 , "0" (RT_ASM_PAGE_SIZE/4)
5305# endif
5306 , "1" (pvPage)
5307 , "2" (0)
5308 : "cc");
5309 return uAX.f;
5310# else
5311 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
5312 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
5313 Assert(!((uintptr_t)pvPage & 15));
5314 for (;;)
5315 {
5316 if (puPtr[0]) return false;
5317 if (puPtr[4]) return false;
5318
5319 if (puPtr[2]) return false;
5320 if (puPtr[6]) return false;
5321
5322 if (puPtr[1]) return false;
5323 if (puPtr[5]) return false;
5324
5325 if (puPtr[3]) return false;
5326 if (puPtr[7]) return false;
5327
5328 if (!--cLeft)
5329 return true;
5330 puPtr += 8;
5331 }
5332# endif
5333}
5334
5335
5336/**
5337 * Checks if a memory block is filled with the specified byte, returning the
5338 * first mismatch.
5339 *
5340 * This is sort of an inverted memchr.
5341 *
5342 * @returns Pointer to the byte which doesn't equal u8.
5343 * @returns NULL if all equal to u8.
5344 *
5345 * @param pv Pointer to the memory block.
5346 * @param cb Number of bytes in the block.
5347 * @param u8 The value it's supposed to be filled with.
5348 *
5349 * @remarks No alignment requirements.
5350 */
5351#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5352 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5353DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5354#else
5355DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5356{
5357/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5358 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5359 for (; cb; cb--, pb++)
5360 if (RT_LIKELY(*pb == u8))
5361 { /* likely */ }
5362 else
5363 return (void *)pb;
5364 return NULL;
5365}
5366#endif
5367
5368
5369/**
5370 * Checks if a memory block is filled with the specified byte.
5371 *
5372 * @returns true if all matching, false if not.
5373 *
5374 * @param pv Pointer to the memory block.
5375 * @param cb Number of bytes in the block.
5376 * @param u8 The value it's supposed to be filled with.
5377 *
5378 * @remarks No alignment requirements.
5379 */
5380DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5381{
5382 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5383}
5384
5385
5386/**
5387 * Checks if a memory block is filled with the specified 32-bit value.
5388 *
5389 * This is a sort of inverted memchr.
5390 *
5391 * @returns Pointer to the first value which doesn't equal u32.
5392 * @returns NULL if all equal to u32.
5393 *
5394 * @param pv Pointer to the memory block.
5395 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5396 * @param u32 The value it's supposed to be filled with.
5397 */
5398DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5399{
5400/** @todo rewrite this in inline assembly? */
5401 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5402 for (; cb; cb -= 4, pu32++)
5403 if (RT_LIKELY(*pu32 == u32))
5404 { /* likely */ }
5405 else
5406 return (uint32_t RT_FAR *)pu32;
5407 return NULL;
5408}
5409
5410
5411/**
5412 * Probes a byte pointer for read access.
5413 *
5414 * While the function will not fault if the byte is not read accessible,
5415 * the idea is to do this in a safe place like before acquiring locks
5416 * and such like.
5417 *
5418 * Also, this functions guarantees that an eager compiler is not going
5419 * to optimize the probing away.
5420 *
5421 * @param pvByte Pointer to the byte.
5422 */
5423#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5424RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
5425#else
5426DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
5427{
5428# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5429 uint8_t u8;
5430# if RT_INLINE_ASM_GNU_STYLE
5431 __asm__ __volatile__("movb %1, %0\n\t"
5432 : "=q" (u8)
5433 : "m" (*(const uint8_t *)pvByte));
5434# else
5435 __asm
5436 {
5437# ifdef RT_ARCH_AMD64
5438 mov rax, [pvByte]
5439 mov al, [rax]
5440# else
5441 mov eax, [pvByte]
5442 mov al, [eax]
5443# endif
5444 mov [u8], al
5445 }
5446# endif
5447 return u8;
5448
5449# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5450 uint32_t u32;
5451 __asm__ __volatile__(".Lstart_ASMProbeReadByte_%=:\n\t"
5452# if defined(RT_ARCH_ARM64)
5453 "ldxrb %w[uDst], %[pMem]\n\t"
5454# else
5455 "ldrexb %[uDst], %[pMem]\n\t"
5456# endif
5457 : [uDst] "=&r" (u32)
5458 : [pMem] "m" (*(uint8_t const *)pvByte));
5459 return (uint8_t)u32;
5460
5461# else
5462# error "Port me"
5463# endif
5464}
5465#endif
5466
5467/**
5468 * Probes a buffer for read access page by page.
5469 *
5470 * While the function will fault if the buffer is not fully read
5471 * accessible, the idea is to do this in a safe place like before
5472 * acquiring locks and such like.
5473 *
5474 * Also, this functions guarantees that an eager compiler is not going
5475 * to optimize the probing away.
5476 *
5477 * @param pvBuf Pointer to the buffer.
5478 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5479 */
5480DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
5481{
5482 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5483 /* the first byte */
5484 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
5485 ASMProbeReadByte(pu8);
5486
5487 /* the pages in between pages. */
5488 while (cbBuf > RT_ASM_PAGE_SIZE)
5489 {
5490 ASMProbeReadByte(pu8);
5491 cbBuf -= RT_ASM_PAGE_SIZE;
5492 pu8 += RT_ASM_PAGE_SIZE;
5493 }
5494
5495 /* the last byte */
5496 ASMProbeReadByte(pu8 + cbBuf - 1);
5497}
5498
5499
5500
5501/** @defgroup grp_inline_bits Bit Operations
5502 * @{
5503 */
5504
5505
5506/**
5507 * Sets a bit in a bitmap.
5508 *
5509 * @param pvBitmap Pointer to the bitmap (little endian). This should be
5510 * 32-bit aligned.
5511 * @param iBit The bit to set.
5512 *
5513 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5514 * However, doing so will yield better performance as well as avoiding
5515 * traps accessing the last bits in the bitmap.
5516 */
5517#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5518RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5519#else
5520DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5521{
5522# if RT_INLINE_ASM_USES_INTRIN
5523 _bittestandset((long RT_FAR *)pvBitmap, iBit);
5524
5525# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5526# if RT_INLINE_ASM_GNU_STYLE
5527 __asm__ __volatile__("btsl %1, %0"
5528 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5529 : "Ir" (iBit)
5530 , "m" (*(volatile long RT_FAR *)pvBitmap)
5531 : "memory"
5532 , "cc");
5533# else
5534 __asm
5535 {
5536# ifdef RT_ARCH_AMD64
5537 mov rax, [pvBitmap]
5538 mov edx, [iBit]
5539 bts [rax], edx
5540# else
5541 mov eax, [pvBitmap]
5542 mov edx, [iBit]
5543 bts [eax], edx
5544# endif
5545 }
5546# endif
5547
5548# else
5549 int32_t offBitmap = iBit / 32;
5550 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5551 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5552# endif
5553}
5554#endif
5555
5556
5557/**
5558 * Atomically sets a bit in a bitmap, ordered.
5559 *
5560 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5561 * aligned, otherwise the memory access isn't atomic!
5562 * @param iBit The bit to set.
5563 *
5564 * @remarks x86: Requires a 386 or later.
5565 */
5566#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5567RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5568#else
5569DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5570{
5571 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5572# if RT_INLINE_ASM_USES_INTRIN
5573 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
5574# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5575# if RT_INLINE_ASM_GNU_STYLE
5576 __asm__ __volatile__("lock; btsl %1, %0"
5577 : "=m" (*(volatile long *)pvBitmap)
5578 : "Ir" (iBit)
5579 , "m" (*(volatile long *)pvBitmap)
5580 : "memory"
5581 , "cc");
5582# else
5583 __asm
5584 {
5585# ifdef RT_ARCH_AMD64
5586 mov rax, [pvBitmap]
5587 mov edx, [iBit]
5588 lock bts [rax], edx
5589# else
5590 mov eax, [pvBitmap]
5591 mov edx, [iBit]
5592 lock bts [eax], edx
5593# endif
5594 }
5595# endif
5596
5597# else
5598 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5599# endif
5600}
5601#endif
5602
5603
5604/**
5605 * Clears a bit in a bitmap.
5606 *
5607 * @param pvBitmap Pointer to the bitmap (little endian).
5608 * @param iBit The bit to clear.
5609 *
5610 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5611 * However, doing so will yield better performance as well as avoiding
5612 * traps accessing the last bits in the bitmap.
5613 */
5614#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5615RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5616#else
5617DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5618{
5619# if RT_INLINE_ASM_USES_INTRIN
5620 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
5621
5622# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5623# if RT_INLINE_ASM_GNU_STYLE
5624 __asm__ __volatile__("btrl %1, %0"
5625 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5626 : "Ir" (iBit)
5627 , "m" (*(volatile long RT_FAR *)pvBitmap)
5628 : "memory"
5629 , "cc");
5630# else
5631 __asm
5632 {
5633# ifdef RT_ARCH_AMD64
5634 mov rax, [pvBitmap]
5635 mov edx, [iBit]
5636 btr [rax], edx
5637# else
5638 mov eax, [pvBitmap]
5639 mov edx, [iBit]
5640 btr [eax], edx
5641# endif
5642 }
5643# endif
5644
5645# else
5646 int32_t offBitmap = iBit / 32;
5647 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5648 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
5649# endif
5650}
5651#endif
5652
5653
5654/**
5655 * Atomically clears a bit in a bitmap, ordered.
5656 *
5657 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5658 * aligned, otherwise the memory access isn't atomic!
5659 * @param iBit The bit to toggle set.
5660 *
5661 * @remarks No memory barrier, take care on smp.
5662 * @remarks x86: Requires a 386 or later.
5663 */
5664#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5665RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5666#else
5667DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5668{
5669 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5670# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5671# if RT_INLINE_ASM_GNU_STYLE
5672 __asm__ __volatile__("lock; btrl %1, %0"
5673 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5674 : "Ir" (iBit)
5675 , "m" (*(volatile long RT_FAR *)pvBitmap)
5676 : "memory"
5677 , "cc");
5678# else
5679 __asm
5680 {
5681# ifdef RT_ARCH_AMD64
5682 mov rax, [pvBitmap]
5683 mov edx, [iBit]
5684 lock btr [rax], edx
5685# else
5686 mov eax, [pvBitmap]
5687 mov edx, [iBit]
5688 lock btr [eax], edx
5689# endif
5690 }
5691# endif
5692# else
5693 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
5694# endif
5695}
5696#endif
5697
5698
5699/**
5700 * Toggles a bit in a bitmap.
5701 *
5702 * @param pvBitmap Pointer to the bitmap (little endian).
5703 * @param iBit The bit to toggle.
5704 *
5705 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5706 * However, doing so will yield better performance as well as avoiding
5707 * traps accessing the last bits in the bitmap.
5708 */
5709#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5710RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5711#else
5712DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5713{
5714# if RT_INLINE_ASM_USES_INTRIN
5715 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
5716# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5717# if RT_INLINE_ASM_GNU_STYLE
5718 __asm__ __volatile__("btcl %1, %0"
5719 : "=m" (*(volatile long *)pvBitmap)
5720 : "Ir" (iBit)
5721 , "m" (*(volatile long *)pvBitmap)
5722 : "memory"
5723 , "cc");
5724# else
5725 __asm
5726 {
5727# ifdef RT_ARCH_AMD64
5728 mov rax, [pvBitmap]
5729 mov edx, [iBit]
5730 btc [rax], edx
5731# else
5732 mov eax, [pvBitmap]
5733 mov edx, [iBit]
5734 btc [eax], edx
5735# endif
5736 }
5737# endif
5738# else
5739 int32_t offBitmap = iBit / 32;
5740 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5741 ASMAtomicUoXorU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5742# endif
5743}
5744#endif
5745
5746
5747/**
5748 * Atomically toggles a bit in a bitmap, ordered.
5749 *
5750 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5751 * aligned, otherwise the memory access isn't atomic!
5752 * @param iBit The bit to test and set.
5753 *
5754 * @remarks x86: Requires a 386 or later.
5755 */
5756#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5757RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5758#else
5759DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5760{
5761 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5762# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5763# if RT_INLINE_ASM_GNU_STYLE
5764 __asm__ __volatile__("lock; btcl %1, %0"
5765 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5766 : "Ir" (iBit)
5767 , "m" (*(volatile long RT_FAR *)pvBitmap)
5768 : "memory"
5769 , "cc");
5770# else
5771 __asm
5772 {
5773# ifdef RT_ARCH_AMD64
5774 mov rax, [pvBitmap]
5775 mov edx, [iBit]
5776 lock btc [rax], edx
5777# else
5778 mov eax, [pvBitmap]
5779 mov edx, [iBit]
5780 lock btc [eax], edx
5781# endif
5782 }
5783# endif
5784# else
5785 ASMAtomicXorU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5786# endif
5787}
5788#endif
5789
5790
5791/**
5792 * Tests and sets a bit in a bitmap.
5793 *
5794 * @returns true if the bit was set.
5795 * @returns false if the bit was clear.
5796 *
5797 * @param pvBitmap Pointer to the bitmap (little endian).
5798 * @param iBit The bit to test and set.
5799 *
5800 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5801 * However, doing so will yield better performance as well as avoiding
5802 * traps accessing the last bits in the bitmap.
5803 */
5804#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5805RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5806#else
5807DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5808{
5809 union { bool f; uint32_t u32; uint8_t u8; } rc;
5810# if RT_INLINE_ASM_USES_INTRIN
5811 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
5812
5813# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5814# if RT_INLINE_ASM_GNU_STYLE
5815 __asm__ __volatile__("btsl %2, %1\n\t"
5816 "setc %b0\n\t"
5817 "andl $1, %0\n\t"
5818 : "=q" (rc.u32)
5819 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5820 : "Ir" (iBit)
5821 , "m" (*(volatile long RT_FAR *)pvBitmap)
5822 : "memory"
5823 , "cc");
5824# else
5825 __asm
5826 {
5827 mov edx, [iBit]
5828# ifdef RT_ARCH_AMD64
5829 mov rax, [pvBitmap]
5830 bts [rax], edx
5831# else
5832 mov eax, [pvBitmap]
5833 bts [eax], edx
5834# endif
5835 setc al
5836 and eax, 1
5837 mov [rc.u32], eax
5838 }
5839# endif
5840
5841# else
5842 int32_t offBitmap = iBit / 32;
5843 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5844 rc.u32 = RT_LE2H_U32(ASMAtomicUoOrExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
5845 >> (iBit & 31);
5846 rc.u32 &= 1;
5847# endif
5848 return rc.f;
5849}
5850#endif
5851
5852
5853/**
5854 * Atomically tests and sets a bit in a bitmap, ordered.
5855 *
5856 * @returns true if the bit was set.
5857 * @returns false if the bit was clear.
5858 *
5859 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5860 * aligned, otherwise the memory access isn't atomic!
5861 * @param iBit The bit to set.
5862 *
5863 * @remarks x86: Requires a 386 or later.
5864 */
5865#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5866RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5867#else
5868DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5869{
5870 union { bool f; uint32_t u32; uint8_t u8; } rc;
5871 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5872# if RT_INLINE_ASM_USES_INTRIN
5873 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
5874# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5875# if RT_INLINE_ASM_GNU_STYLE
5876 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5877 "setc %b0\n\t"
5878 "andl $1, %0\n\t"
5879 : "=q" (rc.u32)
5880 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5881 : "Ir" (iBit)
5882 , "m" (*(volatile long RT_FAR *)pvBitmap)
5883 : "memory"
5884 , "cc");
5885# else
5886 __asm
5887 {
5888 mov edx, [iBit]
5889# ifdef RT_ARCH_AMD64
5890 mov rax, [pvBitmap]
5891 lock bts [rax], edx
5892# else
5893 mov eax, [pvBitmap]
5894 lock bts [eax], edx
5895# endif
5896 setc al
5897 and eax, 1
5898 mov [rc.u32], eax
5899 }
5900# endif
5901
5902# else
5903 rc.u32 = RT_LE2H_U32(ASMAtomicOrExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
5904 >> (iBit & 31);
5905 rc.u32 &= 1;
5906# endif
5907 return rc.f;
5908}
5909#endif
5910
5911
5912/**
5913 * Tests and clears a bit in a bitmap.
5914 *
5915 * @returns true if the bit was set.
5916 * @returns false if the bit was clear.
5917 *
5918 * @param pvBitmap Pointer to the bitmap (little endian).
5919 * @param iBit The bit to test and clear.
5920 *
5921 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5922 * However, doing so will yield better performance as well as avoiding
5923 * traps accessing the last bits in the bitmap.
5924 */
5925#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5926RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5927#else
5928DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5929{
5930 union { bool f; uint32_t u32; uint8_t u8; } rc;
5931# if RT_INLINE_ASM_USES_INTRIN
5932 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
5933
5934# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5935# if RT_INLINE_ASM_GNU_STYLE
5936 __asm__ __volatile__("btrl %2, %1\n\t"
5937 "setc %b0\n\t"
5938 "andl $1, %0\n\t"
5939 : "=q" (rc.u32)
5940 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5941 : "Ir" (iBit)
5942 , "m" (*(volatile long RT_FAR *)pvBitmap)
5943 : "memory"
5944 , "cc");
5945# else
5946 __asm
5947 {
5948 mov edx, [iBit]
5949# ifdef RT_ARCH_AMD64
5950 mov rax, [pvBitmap]
5951 btr [rax], edx
5952# else
5953 mov eax, [pvBitmap]
5954 btr [eax], edx
5955# endif
5956 setc al
5957 and eax, 1
5958 mov [rc.u32], eax
5959 }
5960# endif
5961
5962# else
5963 int32_t offBitmap = iBit / 32;
5964 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5965 rc.u32 = RT_LE2H_U32(ASMAtomicUoAndExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
5966 >> (iBit & 31);
5967 rc.u32 &= 1;
5968# endif
5969 return rc.f;
5970}
5971#endif
5972
5973
5974/**
5975 * Atomically tests and clears a bit in a bitmap, ordered.
5976 *
5977 * @returns true if the bit was set.
5978 * @returns false if the bit was clear.
5979 *
5980 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5981 * aligned, otherwise the memory access isn't atomic!
5982 * @param iBit The bit to test and clear.
5983 *
5984 * @remarks No memory barrier, take care on smp.
5985 * @remarks x86: Requires a 386 or later.
5986 */
5987#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5988RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5989#else
5990DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5991{
5992 union { bool f; uint32_t u32; uint8_t u8; } rc;
5993 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5994# if RT_INLINE_ASM_USES_INTRIN
5995 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
5996
5997# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5998# if RT_INLINE_ASM_GNU_STYLE
5999 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6000 "setc %b0\n\t"
6001 "andl $1, %0\n\t"
6002 : "=q" (rc.u32)
6003 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6004 : "Ir" (iBit)
6005 , "m" (*(volatile long RT_FAR *)pvBitmap)
6006 : "memory"
6007 , "cc");
6008# else
6009 __asm
6010 {
6011 mov edx, [iBit]
6012# ifdef RT_ARCH_AMD64
6013 mov rax, [pvBitmap]
6014 lock btr [rax], edx
6015# else
6016 mov eax, [pvBitmap]
6017 lock btr [eax], edx
6018# endif
6019 setc al
6020 and eax, 1
6021 mov [rc.u32], eax
6022 }
6023# endif
6024
6025# else
6026 rc.u32 = RT_LE2H_U32(ASMAtomicAndExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6027 >> (iBit & 31);
6028 rc.u32 &= 1;
6029# endif
6030 return rc.f;
6031}
6032#endif
6033
6034
6035/**
6036 * Tests and toggles a bit in a bitmap.
6037 *
6038 * @returns true if the bit was set.
6039 * @returns false if the bit was clear.
6040 *
6041 * @param pvBitmap Pointer to the bitmap (little endian).
6042 * @param iBit The bit to test and toggle.
6043 *
6044 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6045 * However, doing so will yield better performance as well as avoiding
6046 * traps accessing the last bits in the bitmap.
6047 */
6048#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6049RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6050#else
6051DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6052{
6053 union { bool f; uint32_t u32; uint8_t u8; } rc;
6054# if RT_INLINE_ASM_USES_INTRIN
6055 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6056
6057# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6058# if RT_INLINE_ASM_GNU_STYLE
6059 __asm__ __volatile__("btcl %2, %1\n\t"
6060 "setc %b0\n\t"
6061 "andl $1, %0\n\t"
6062 : "=q" (rc.u32)
6063 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6064 : "Ir" (iBit)
6065 , "m" (*(volatile long RT_FAR *)pvBitmap)
6066 : "memory"
6067 , "cc");
6068# else
6069 __asm
6070 {
6071 mov edx, [iBit]
6072# ifdef RT_ARCH_AMD64
6073 mov rax, [pvBitmap]
6074 btc [rax], edx
6075# else
6076 mov eax, [pvBitmap]
6077 btc [eax], edx
6078# endif
6079 setc al
6080 and eax, 1
6081 mov [rc.u32], eax
6082 }
6083# endif
6084
6085# else
6086 int32_t offBitmap = iBit / 32;
6087 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6088 rc.u32 = RT_LE2H_U32(ASMAtomicUoXorExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6089 >> (iBit & 31);
6090 rc.u32 &= 1;
6091# endif
6092 return rc.f;
6093}
6094#endif
6095
6096
6097/**
6098 * Atomically tests and toggles a bit in a bitmap, ordered.
6099 *
6100 * @returns true if the bit was set.
6101 * @returns false if the bit was clear.
6102 *
6103 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6104 * aligned, otherwise the memory access isn't atomic!
6105 * @param iBit The bit to test and toggle.
6106 *
6107 * @remarks x86: Requires a 386 or later.
6108 */
6109#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6110RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6111#else
6112DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6113{
6114 union { bool f; uint32_t u32; uint8_t u8; } rc;
6115 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6116# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6117# if RT_INLINE_ASM_GNU_STYLE
6118 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6119 "setc %b0\n\t"
6120 "andl $1, %0\n\t"
6121 : "=q" (rc.u32)
6122 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6123 : "Ir" (iBit)
6124 , "m" (*(volatile long RT_FAR *)pvBitmap)
6125 : "memory"
6126 , "cc");
6127# else
6128 __asm
6129 {
6130 mov edx, [iBit]
6131# ifdef RT_ARCH_AMD64
6132 mov rax, [pvBitmap]
6133 lock btc [rax], edx
6134# else
6135 mov eax, [pvBitmap]
6136 lock btc [eax], edx
6137# endif
6138 setc al
6139 and eax, 1
6140 mov [rc.u32], eax
6141 }
6142# endif
6143
6144# else
6145 rc.u32 = RT_H2LE_U32(ASMAtomicXorExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_LE2H_U32(RT_BIT_32(iBit & 31))))
6146 >> (iBit & 31);
6147 rc.u32 &= 1;
6148# endif
6149 return rc.f;
6150}
6151#endif
6152
6153
6154/**
6155 * Tests if a bit in a bitmap is set.
6156 *
6157 * @returns true if the bit is set.
6158 * @returns false if the bit is clear.
6159 *
6160 * @param pvBitmap Pointer to the bitmap (little endian).
6161 * @param iBit The bit to test.
6162 *
6163 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6164 * However, doing so will yield better performance as well as avoiding
6165 * traps accessing the last bits in the bitmap.
6166 */
6167#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6168RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6169#else
6170DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6171{
6172 union { bool f; uint32_t u32; uint8_t u8; } rc;
6173# if RT_INLINE_ASM_USES_INTRIN
6174 rc.u32 = _bittest((long *)pvBitmap, iBit);
6175
6176# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6177# if RT_INLINE_ASM_GNU_STYLE
6178
6179 __asm__ __volatile__("btl %2, %1\n\t"
6180 "setc %b0\n\t"
6181 "andl $1, %0\n\t"
6182 : "=q" (rc.u32)
6183 : "m" (*(const volatile long RT_FAR *)pvBitmap)
6184 , "Ir" (iBit)
6185 : "memory"
6186 , "cc");
6187# else
6188 __asm
6189 {
6190 mov edx, [iBit]
6191# ifdef RT_ARCH_AMD64
6192 mov rax, [pvBitmap]
6193 bt [rax], edx
6194# else
6195 mov eax, [pvBitmap]
6196 bt [eax], edx
6197# endif
6198 setc al
6199 and eax, 1
6200 mov [rc.u32], eax
6201 }
6202# endif
6203
6204# else
6205 int32_t offBitmap = iBit / 32;
6206 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6207 rc.u32 = RT_LE2H_U32(ASMAtomicUoReadU32(&((uint32_t volatile *)pvBitmap)[offBitmap])) >> (iBit & 31);
6208 rc.u32 &= 1;
6209# endif
6210 return rc.f;
6211}
6212#endif
6213
6214
6215/**
6216 * Clears a bit range within a bitmap.
6217 *
6218 * @param pvBitmap Pointer to the bitmap (little endian).
6219 * @param iBitStart The First bit to clear.
6220 * @param iBitEnd The first bit not to clear.
6221 */
6222DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6223{
6224 if (iBitStart < iBitEnd)
6225 {
6226 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6227 int32_t iStart = iBitStart & ~31;
6228 int32_t iEnd = iBitEnd & ~31;
6229 if (iStart == iEnd)
6230 *pu32 &= RT_H2LE_U32(((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6231 else
6232 {
6233 /* bits in first dword. */
6234 if (iBitStart & 31)
6235 {
6236 *pu32 &= RT_H2LE_U32((UINT32_C(1) << (iBitStart & 31)) - 1);
6237 pu32++;
6238 iBitStart = iStart + 32;
6239 }
6240
6241 /* whole dwords. */
6242 if (iBitStart != iEnd)
6243 ASMMemZero32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3);
6244
6245 /* bits in last dword. */
6246 if (iBitEnd & 31)
6247 {
6248 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6249 *pu32 &= RT_H2LE_U32(~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6250 }
6251 }
6252 }
6253}
6254
6255
6256/**
6257 * Sets a bit range within a bitmap.
6258 *
6259 * @param pvBitmap Pointer to the bitmap (little endian).
6260 * @param iBitStart The First bit to set.
6261 * @param iBitEnd The first bit not to set.
6262 */
6263DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6264{
6265 if (iBitStart < iBitEnd)
6266 {
6267 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6268 int32_t iStart = iBitStart & ~31;
6269 int32_t iEnd = iBitEnd & ~31;
6270 if (iStart == iEnd)
6271 *pu32 |= RT_H2LE_U32(((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31));
6272 else
6273 {
6274 /* bits in first dword. */
6275 if (iBitStart & 31)
6276 {
6277 *pu32 |= RT_H2LE_U32(~((UINT32_C(1) << (iBitStart & 31)) - 1));
6278 pu32++;
6279 iBitStart = iStart + 32;
6280 }
6281
6282 /* whole dword. */
6283 if (iBitStart != iEnd)
6284 ASMMemFill32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3, ~UINT32_C(0));
6285
6286 /* bits in last dword. */
6287 if (iBitEnd & 31)
6288 {
6289 pu32 = RT_H2LE_U32((volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5));
6290 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
6291 }
6292 }
6293 }
6294}
6295
6296
6297/**
6298 * Finds the first clear bit in a bitmap.
6299 *
6300 * @returns Index of the first zero bit.
6301 * @returns -1 if no clear bit was found.
6302 * @param pvBitmap Pointer to the bitmap (little endian).
6303 * @param cBits The number of bits in the bitmap. Multiple of 32.
6304 */
6305#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6306DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6307#else
6308DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6309{
6310 if (cBits)
6311 {
6312 int32_t iBit;
6313# if RT_INLINE_ASM_GNU_STYLE
6314 RTCCUINTREG uEAX, uECX, uEDI;
6315 cBits = RT_ALIGN_32(cBits, 32);
6316 __asm__ __volatile__("repe; scasl\n\t"
6317 "je 1f\n\t"
6318# ifdef RT_ARCH_AMD64
6319 "lea -4(%%rdi), %%rdi\n\t"
6320 "xorl (%%rdi), %%eax\n\t"
6321 "subq %5, %%rdi\n\t"
6322# else
6323 "lea -4(%%edi), %%edi\n\t"
6324 "xorl (%%edi), %%eax\n\t"
6325 "subl %5, %%edi\n\t"
6326# endif
6327 "shll $3, %%edi\n\t"
6328 "bsfl %%eax, %%edx\n\t"
6329 "addl %%edi, %%edx\n\t"
6330 "1:\t\n"
6331 : "=d" (iBit)
6332 , "=&c" (uECX)
6333 , "=&D" (uEDI)
6334 , "=&a" (uEAX)
6335 : "0" (0xffffffff)
6336 , "mr" (pvBitmap)
6337 , "1" (cBits >> 5)
6338 , "2" (pvBitmap)
6339 , "3" (0xffffffff)
6340 : "cc");
6341# else
6342 cBits = RT_ALIGN_32(cBits, 32);
6343 __asm
6344 {
6345# ifdef RT_ARCH_AMD64
6346 mov rdi, [pvBitmap]
6347 mov rbx, rdi
6348# else
6349 mov edi, [pvBitmap]
6350 mov ebx, edi
6351# endif
6352 mov edx, 0ffffffffh
6353 mov eax, edx
6354 mov ecx, [cBits]
6355 shr ecx, 5
6356 repe scasd
6357 je done
6358
6359# ifdef RT_ARCH_AMD64
6360 lea rdi, [rdi - 4]
6361 xor eax, [rdi]
6362 sub rdi, rbx
6363# else
6364 lea edi, [edi - 4]
6365 xor eax, [edi]
6366 sub edi, ebx
6367# endif
6368 shl edi, 3
6369 bsf edx, eax
6370 add edx, edi
6371 done:
6372 mov [iBit], edx
6373 }
6374# endif
6375 return iBit;
6376 }
6377 return -1;
6378}
6379#endif
6380
6381
6382/**
6383 * Finds the next clear bit in a bitmap.
6384 *
6385 * @returns Index of the first zero bit.
6386 * @returns -1 if no clear bit was found.
6387 * @param pvBitmap Pointer to the bitmap (little endian).
6388 * @param cBits The number of bits in the bitmap. Multiple of 32.
6389 * @param iBitPrev The bit returned from the last search.
6390 * The search will start at iBitPrev + 1.
6391 */
6392#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6393DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6394#else
6395DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6396{
6397 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6398 int iBit = ++iBitPrev & 31;
6399 if (iBit)
6400 {
6401 /*
6402 * Inspect the 32-bit word containing the unaligned bit.
6403 */
6404 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6405
6406# if RT_INLINE_ASM_USES_INTRIN
6407 unsigned long ulBit = 0;
6408 if (_BitScanForward(&ulBit, u32))
6409 return ulBit + iBitPrev;
6410# else
6411# if RT_INLINE_ASM_GNU_STYLE
6412 __asm__ __volatile__("bsf %1, %0\n\t"
6413 "jnz 1f\n\t"
6414 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
6415 "1:\n\t"
6416 : "=r" (iBit)
6417 : "r" (u32)
6418 : "cc");
6419# else
6420 __asm
6421 {
6422 mov edx, [u32]
6423 bsf eax, edx
6424 jnz done
6425 mov eax, 0ffffffffh
6426 done:
6427 mov [iBit], eax
6428 }
6429# endif
6430 if (iBit >= 0)
6431 return iBit + (int)iBitPrev;
6432# endif
6433
6434 /*
6435 * Skip ahead and see if there is anything left to search.
6436 */
6437 iBitPrev |= 31;
6438 iBitPrev++;
6439 if (cBits <= (uint32_t)iBitPrev)
6440 return -1;
6441 }
6442
6443 /*
6444 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6445 */
6446 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6447 if (iBit >= 0)
6448 iBit += iBitPrev;
6449 return iBit;
6450}
6451#endif
6452
6453
6454/**
6455 * Finds the first set bit in a bitmap.
6456 *
6457 * @returns Index of the first set bit.
6458 * @returns -1 if no clear bit was found.
6459 * @param pvBitmap Pointer to the bitmap (little endian).
6460 * @param cBits The number of bits in the bitmap. Multiple of 32.
6461 */
6462#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6463DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6464#else
6465DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6466{
6467 if (cBits)
6468 {
6469 int32_t iBit;
6470# if RT_INLINE_ASM_GNU_STYLE
6471 RTCCUINTREG uEAX, uECX, uEDI;
6472 cBits = RT_ALIGN_32(cBits, 32);
6473 __asm__ __volatile__("repe; scasl\n\t"
6474 "je 1f\n\t"
6475# ifdef RT_ARCH_AMD64
6476 "lea -4(%%rdi), %%rdi\n\t"
6477 "movl (%%rdi), %%eax\n\t"
6478 "subq %5, %%rdi\n\t"
6479# else
6480 "lea -4(%%edi), %%edi\n\t"
6481 "movl (%%edi), %%eax\n\t"
6482 "subl %5, %%edi\n\t"
6483# endif
6484 "shll $3, %%edi\n\t"
6485 "bsfl %%eax, %%edx\n\t"
6486 "addl %%edi, %%edx\n\t"
6487 "1:\t\n"
6488 : "=d" (iBit)
6489 , "=&c" (uECX)
6490 , "=&D" (uEDI)
6491 , "=&a" (uEAX)
6492 : "0" (0xffffffff)
6493 , "mr" (pvBitmap)
6494 , "1" (cBits >> 5)
6495 , "2" (pvBitmap)
6496 , "3" (0)
6497 : "cc");
6498# else
6499 cBits = RT_ALIGN_32(cBits, 32);
6500 __asm
6501 {
6502# ifdef RT_ARCH_AMD64
6503 mov rdi, [pvBitmap]
6504 mov rbx, rdi
6505# else
6506 mov edi, [pvBitmap]
6507 mov ebx, edi
6508# endif
6509 mov edx, 0ffffffffh
6510 xor eax, eax
6511 mov ecx, [cBits]
6512 shr ecx, 5
6513 repe scasd
6514 je done
6515# ifdef RT_ARCH_AMD64
6516 lea rdi, [rdi - 4]
6517 mov eax, [rdi]
6518 sub rdi, rbx
6519# else
6520 lea edi, [edi - 4]
6521 mov eax, [edi]
6522 sub edi, ebx
6523# endif
6524 shl edi, 3
6525 bsf edx, eax
6526 add edx, edi
6527 done:
6528 mov [iBit], edx
6529 }
6530# endif
6531 return iBit;
6532 }
6533 return -1;
6534}
6535#endif
6536
6537
6538/**
6539 * Finds the next set bit in a bitmap.
6540 *
6541 * @returns Index of the next set bit.
6542 * @returns -1 if no set bit was found.
6543 * @param pvBitmap Pointer to the bitmap (little endian).
6544 * @param cBits The number of bits in the bitmap. Multiple of 32.
6545 * @param iBitPrev The bit returned from the last search.
6546 * The search will start at iBitPrev + 1.
6547 */
6548#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6549DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6550#else
6551DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6552{
6553 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6554 int iBit = ++iBitPrev & 31;
6555 if (iBit)
6556 {
6557 /*
6558 * Inspect the 32-bit word containing the unaligned bit.
6559 */
6560 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6561
6562# if RT_INLINE_ASM_USES_INTRIN
6563 unsigned long ulBit = 0;
6564 if (_BitScanForward(&ulBit, u32))
6565 return ulBit + iBitPrev;
6566# else
6567# if RT_INLINE_ASM_GNU_STYLE
6568 __asm__ __volatile__("bsf %1, %0\n\t"
6569 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
6570 "movl $-1, %0\n\t"
6571 "1:\n\t"
6572 : "=r" (iBit)
6573 : "r" (u32)
6574 : "cc");
6575# else
6576 __asm
6577 {
6578 mov edx, [u32]
6579 bsf eax, edx
6580 jnz done
6581 mov eax, 0ffffffffh
6582 done:
6583 mov [iBit], eax
6584 }
6585# endif
6586 if (iBit >= 0)
6587 return iBit + (int)iBitPrev;
6588# endif
6589
6590 /*
6591 * Skip ahead and see if there is anything left to search.
6592 */
6593 iBitPrev |= 31;
6594 iBitPrev++;
6595 if (cBits <= (uint32_t)iBitPrev)
6596 return -1;
6597 }
6598
6599 /*
6600 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6601 */
6602 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6603 if (iBit >= 0)
6604 iBit += iBitPrev;
6605 return iBit;
6606}
6607#endif
6608
6609
6610/**
6611 * Finds the first bit which is set in the given 32-bit integer.
6612 * Bits are numbered from 1 (least significant) to 32.
6613 *
6614 * @returns index [1..32] of the first set bit.
6615 * @returns 0 if all bits are cleared.
6616 * @param u32 Integer to search for set bits.
6617 * @remarks Similar to ffs() in BSD.
6618 */
6619#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6620RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
6621#else
6622DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
6623{
6624# if RT_INLINE_ASM_USES_INTRIN
6625 unsigned long iBit;
6626 if (_BitScanForward(&iBit, u32))
6627 iBit++;
6628 else
6629 iBit = 0;
6630
6631# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6632# if RT_INLINE_ASM_GNU_STYLE
6633 uint32_t iBit;
6634 __asm__ __volatile__("bsf %1, %0\n\t"
6635 "jnz 1f\n\t"
6636 "xorl %0, %0\n\t"
6637 "jmp 2f\n"
6638 "1:\n\t"
6639 "incl %0\n"
6640 "2:\n\t"
6641 : "=r" (iBit)
6642 : "rm" (u32)
6643 : "cc");
6644# else
6645 uint32_t iBit;
6646 _asm
6647 {
6648 bsf eax, [u32]
6649 jnz found
6650 xor eax, eax
6651 jmp done
6652 found:
6653 inc eax
6654 done:
6655 mov [iBit], eax
6656 }
6657# endif
6658
6659# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6660 /*
6661 * Using the "count leading zeros (clz)" instruction here because there
6662 * is no dedicated instruction to get the first set bit.
6663 * Need to reverse the bits in the value with "rbit" first because
6664 * "clz" starts counting from the most significant bit.
6665 */
6666 uint32_t iBit;
6667 __asm__ __volatile__(
6668# if defined(RT_ARCH_ARM64)
6669 "rbit %w[uVal], %w[uVal]\n\t"
6670 "clz %w[iBit], %w[uVal]\n\t"
6671# else
6672 "rbit %[uVal], %[uVal]\n\t"
6673 "clz %[iBit], %[uVal]\n\t"
6674# endif
6675 : [uVal] "=r" (u32)
6676 , [iBit] "=r" (iBit)
6677 : "[uVal]" (u32));
6678 if (iBit != 32)
6679 iBit++;
6680 else
6681 iBit = 0; /* No bit set. */
6682
6683# else
6684# error "Port me"
6685# endif
6686 return iBit;
6687}
6688#endif
6689
6690
6691/**
6692 * Finds the first bit which is set in the given 32-bit integer.
6693 * Bits are numbered from 1 (least significant) to 32.
6694 *
6695 * @returns index [1..32] of the first set bit.
6696 * @returns 0 if all bits are cleared.
6697 * @param i32 Integer to search for set bits.
6698 * @remark Similar to ffs() in BSD.
6699 */
6700DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
6701{
6702 return ASMBitFirstSetU32((uint32_t)i32);
6703}
6704
6705
6706/**
6707 * Finds the first bit which is set in the given 64-bit integer.
6708 *
6709 * Bits are numbered from 1 (least significant) to 64.
6710 *
6711 * @returns index [1..64] of the first set bit.
6712 * @returns 0 if all bits are cleared.
6713 * @param u64 Integer to search for set bits.
6714 * @remarks Similar to ffs() in BSD.
6715 */
6716#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6717RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
6718#else
6719DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
6720{
6721# if RT_INLINE_ASM_USES_INTRIN
6722 unsigned long iBit;
6723# if ARCH_BITS == 64
6724 if (_BitScanForward64(&iBit, u64))
6725 iBit++;
6726 else
6727 iBit = 0;
6728# else
6729 if (_BitScanForward(&iBit, (uint32_t)u64))
6730 iBit++;
6731 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
6732 iBit += 33;
6733 else
6734 iBit = 0;
6735# endif
6736
6737# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6738 uint64_t iBit;
6739 __asm__ __volatile__("bsfq %1, %0\n\t"
6740 "jnz 1f\n\t"
6741 "xorl %k0, %k0\n\t"
6742 "jmp 2f\n"
6743 "1:\n\t"
6744 "incl %k0\n"
6745 "2:\n\t"
6746 : "=r" (iBit)
6747 : "rm" (u64)
6748 : "cc");
6749
6750# elif defined(RT_ARCH_ARM64)
6751 uint64_t iBit;
6752 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
6753 "clz %[iBit], %[uVal]\n\t"
6754 : [uVal] "=r" (u64)
6755 , [iBit] "=r" (iBit)
6756 : "[uVal]" (u64));
6757 if (iBit != 64)
6758 iBit++;
6759 else
6760 iBit = 0; /* No bit set. */
6761
6762# else
6763 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
6764 if (!iBit)
6765 {
6766 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
6767 if (iBit)
6768 iBit += 32;
6769 }
6770# endif
6771 return (unsigned)iBit;
6772}
6773#endif
6774
6775
6776/**
6777 * Finds the first bit which is set in the given 16-bit integer.
6778 *
6779 * Bits are numbered from 1 (least significant) to 16.
6780 *
6781 * @returns index [1..16] of the first set bit.
6782 * @returns 0 if all bits are cleared.
6783 * @param u16 Integer to search for set bits.
6784 * @remarks For 16-bit bs3kit code.
6785 */
6786#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6787RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
6788#else
6789DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
6790{
6791 return ASMBitFirstSetU32((uint32_t)u16);
6792}
6793#endif
6794
6795
6796/**
6797 * Finds the last bit which is set in the given 32-bit integer.
6798 * Bits are numbered from 1 (least significant) to 32.
6799 *
6800 * @returns index [1..32] of the last set bit.
6801 * @returns 0 if all bits are cleared.
6802 * @param u32 Integer to search for set bits.
6803 * @remark Similar to fls() in BSD.
6804 */
6805#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6806RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
6807#else
6808DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
6809{
6810# if RT_INLINE_ASM_USES_INTRIN
6811 unsigned long iBit;
6812 if (_BitScanReverse(&iBit, u32))
6813 iBit++;
6814 else
6815 iBit = 0;
6816
6817# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6818# if RT_INLINE_ASM_GNU_STYLE
6819 uint32_t iBit;
6820 __asm__ __volatile__("bsrl %1, %0\n\t"
6821 "jnz 1f\n\t"
6822 "xorl %0, %0\n\t"
6823 "jmp 2f\n"
6824 "1:\n\t"
6825 "incl %0\n"
6826 "2:\n\t"
6827 : "=r" (iBit)
6828 : "rm" (u32)
6829 : "cc");
6830# else
6831 uint32_t iBit;
6832 _asm
6833 {
6834 bsr eax, [u32]
6835 jnz found
6836 xor eax, eax
6837 jmp done
6838 found:
6839 inc eax
6840 done:
6841 mov [iBit], eax
6842 }
6843# endif
6844
6845# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6846 uint32_t iBit;
6847 __asm__ __volatile__(
6848# if defined(RT_ARCH_ARM64)
6849 "clz %w[iBit], %w[uVal]\n\t"
6850# else
6851 "clz %[iBit], %[uVal]\n\t"
6852# endif
6853 : [iBit] "=r" (iBit)
6854 : [uVal] "r" (u32));
6855 iBit = 32 - iBit;
6856
6857# else
6858# error "Port me"
6859# endif
6860 return iBit;
6861}
6862#endif
6863
6864
6865/**
6866 * Finds the last bit which is set in the given 32-bit integer.
6867 * Bits are numbered from 1 (least significant) to 32.
6868 *
6869 * @returns index [1..32] of the last set bit.
6870 * @returns 0 if all bits are cleared.
6871 * @param i32 Integer to search for set bits.
6872 * @remark Similar to fls() in BSD.
6873 */
6874DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
6875{
6876 return ASMBitLastSetU32((uint32_t)i32);
6877}
6878
6879
6880/**
6881 * Finds the last bit which is set in the given 64-bit integer.
6882 *
6883 * Bits are numbered from 1 (least significant) to 64.
6884 *
6885 * @returns index [1..64] of the last set bit.
6886 * @returns 0 if all bits are cleared.
6887 * @param u64 Integer to search for set bits.
6888 * @remark Similar to fls() in BSD.
6889 */
6890#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6891RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
6892#else
6893DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
6894{
6895# if RT_INLINE_ASM_USES_INTRIN
6896 unsigned long iBit;
6897# if ARCH_BITS == 64
6898 if (_BitScanReverse64(&iBit, u64))
6899 iBit++;
6900 else
6901 iBit = 0;
6902# else
6903 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
6904 iBit += 33;
6905 else if (_BitScanReverse(&iBit, (uint32_t)u64))
6906 iBit++;
6907 else
6908 iBit = 0;
6909# endif
6910
6911# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6912 uint64_t iBit;
6913 __asm__ __volatile__("bsrq %1, %0\n\t"
6914 "jnz 1f\n\t"
6915 "xorl %k0, %k0\n\t"
6916 "jmp 2f\n"
6917 "1:\n\t"
6918 "incl %k0\n"
6919 "2:\n\t"
6920 : "=r" (iBit)
6921 : "rm" (u64)
6922 : "cc");
6923
6924# elif defined(RT_ARCH_ARM64)
6925 uint64_t iBit;
6926 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
6927 : [iBit] "=r" (iBit)
6928 : [uVal] "r" (u64));
6929 iBit = 64 - iBit;
6930
6931# else
6932 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
6933 if (iBit)
6934 iBit += 32;
6935 else
6936 iBit = ASMBitLastSetU32((uint32_t)u64);
6937# endif
6938 return (unsigned)iBit;
6939}
6940#endif
6941
6942
6943/**
6944 * Finds the last bit which is set in the given 16-bit integer.
6945 *
6946 * Bits are numbered from 1 (least significant) to 16.
6947 *
6948 * @returns index [1..16] of the last set bit.
6949 * @returns 0 if all bits are cleared.
6950 * @param u16 Integer to search for set bits.
6951 * @remarks For 16-bit bs3kit code.
6952 */
6953#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6954RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
6955#else
6956DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
6957{
6958 return ASMBitLastSetU32((uint32_t)u16);
6959}
6960#endif
6961
6962
6963/**
6964 * Reverse the byte order of the given 16-bit integer.
6965 *
6966 * @returns Revert
6967 * @param u16 16-bit integer value.
6968 */
6969#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6970RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
6971#else
6972DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
6973{
6974# if RT_INLINE_ASM_USES_INTRIN
6975 return _byteswap_ushort(u16);
6976
6977# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6978# if RT_INLINE_ASM_GNU_STYLE
6979 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
6980# else
6981 _asm
6982 {
6983 mov ax, [u16]
6984 ror ax, 8
6985 mov [u16], ax
6986 }
6987# endif
6988 return u16;
6989
6990# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6991 uint32_t u32Ret;
6992 __asm__ __volatile__(
6993# if defined(RT_ARCH_ARM64)
6994 "rev16 %w[uRet], %w[uVal]\n\t"
6995# else
6996 "rev16 %[uRet], %[uVal]\n\t"
6997# endif
6998 : [uRet] "=r" (u32Ret)
6999 : [uVal] "r" (u16));
7000 return (uint16_t)u32Ret;
7001
7002# else
7003# error "Port me"
7004# endif
7005}
7006#endif
7007
7008
7009/**
7010 * Reverse the byte order of the given 32-bit integer.
7011 *
7012 * @returns Revert
7013 * @param u32 32-bit integer value.
7014 */
7015#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7016RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
7017#else
7018DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
7019{
7020# if RT_INLINE_ASM_USES_INTRIN
7021 return _byteswap_ulong(u32);
7022
7023# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7024# if RT_INLINE_ASM_GNU_STYLE
7025 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
7026# else
7027 _asm
7028 {
7029 mov eax, [u32]
7030 bswap eax
7031 mov [u32], eax
7032 }
7033# endif
7034 return u32;
7035
7036# elif defined(RT_ARCH_ARM64)
7037 uint64_t u64Ret;
7038 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t"
7039 : [uRet] "=r" (u64Ret)
7040 : [uVal] "r" ((uint64_t)u32));
7041 return (uint32_t)u64Ret;
7042
7043# elif defined(RT_ARCH_ARM32)
7044 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
7045 : [uRet] "=r" (u32)
7046 : [uVal] "[uRet]" (u32));
7047 return u32;
7048
7049# else
7050# error "Port me"
7051# endif
7052}
7053#endif
7054
7055
7056/**
7057 * Reverse the byte order of the given 64-bit integer.
7058 *
7059 * @returns Revert
7060 * @param u64 64-bit integer value.
7061 */
7062DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
7063{
7064#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
7065 return _byteswap_uint64(u64);
7066
7067# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7068 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64));
7069 return u64;
7070
7071# elif defined(RT_ARCH_ARM64)
7072 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
7073 : [uRet] "=r" (u64)
7074 : [uVal] "[uRet]" (u64));
7075 return u64;
7076
7077#else
7078 return (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
7079 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
7080#endif
7081}
7082
7083
7084/**
7085 * Rotate 32-bit unsigned value to the left by @a cShift.
7086 *
7087 * @returns Rotated value.
7088 * @param u32 The value to rotate.
7089 * @param cShift How many bits to rotate by.
7090 */
7091#ifdef __WATCOMC__
7092RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7093#else
7094DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7095{
7096# if RT_INLINE_ASM_USES_INTRIN
7097 return _rotl(u32, cShift);
7098
7099# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7100 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7101 return u32;
7102
7103# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7104 __asm__ __volatile__(
7105# if defined(RT_ARCH_ARM64)
7106 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7107# else
7108 "ror %[uRet], %[uVal], %[cShift]\n\t"
7109# endif
7110 : [uRet] "=r" (u32)
7111 : [uVal] "[uRet]" (u32)
7112 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */
7113 return u32;
7114
7115# else
7116 cShift &= 31;
7117 return (u32 << cShift) | (u32 >> (32 - cShift));
7118# endif
7119}
7120#endif
7121
7122
7123/**
7124 * Rotate 32-bit unsigned value to the right by @a cShift.
7125 *
7126 * @returns Rotated value.
7127 * @param u32 The value to rotate.
7128 * @param cShift How many bits to rotate by.
7129 */
7130#ifdef __WATCOMC__
7131RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7132#else
7133DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7134{
7135# if RT_INLINE_ASM_USES_INTRIN
7136 return _rotr(u32, cShift);
7137
7138# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7139 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7140 return u32;
7141
7142# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7143 __asm__ __volatile__(
7144# if defined(RT_ARCH_ARM64)
7145 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7146# else
7147 "ror %[uRet], %[uVal], %[cShift]\n\t"
7148# endif
7149 : [uRet] "=r" (u32)
7150 : [uVal] "[uRet]" (u32)
7151 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */
7152 return u32;
7153
7154# else
7155 cShift &= 31;
7156 return (u32 >> cShift) | (u32 << (32 - cShift));
7157# endif
7158}
7159#endif
7160
7161
7162/**
7163 * Rotate 64-bit unsigned value to the left by @a cShift.
7164 *
7165 * @returns Rotated value.
7166 * @param u64 The value to rotate.
7167 * @param cShift How many bits to rotate by.
7168 */
7169DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7170{
7171#if RT_INLINE_ASM_USES_INTRIN
7172 return _rotl64(u64, cShift);
7173
7174#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7175 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7176 return u64;
7177
7178#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7179 uint32_t uSpill;
7180 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7181 "jz 1f\n\t"
7182 "xchgl %%eax, %%edx\n\t"
7183 "1:\n\t"
7184 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7185 "jz 2f\n\t"
7186 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7187 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
7188 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
7189 "2:\n\t" /* } */
7190 : "=A" (u64)
7191 , "=c" (cShift)
7192 , "=r" (uSpill)
7193 : "0" (u64)
7194 , "1" (cShift)
7195 : "cc");
7196 return u64;
7197
7198# elif defined(RT_ARCH_ARM64)
7199 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7200 : [uRet] "=r" (u64)
7201 : [uVal] "[uRet]" (u64)
7202 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */
7203 return u64;
7204
7205#else
7206 cShift &= 63;
7207 return (u64 << cShift) | (u64 >> (64 - cShift));
7208#endif
7209}
7210
7211
7212/**
7213 * Rotate 64-bit unsigned value to the right by @a cShift.
7214 *
7215 * @returns Rotated value.
7216 * @param u64 The value to rotate.
7217 * @param cShift How many bits to rotate by.
7218 */
7219DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7220{
7221#if RT_INLINE_ASM_USES_INTRIN
7222 return _rotr64(u64, cShift);
7223
7224#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7225 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7226 return u64;
7227
7228#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7229 uint32_t uSpill;
7230 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7231 "jz 1f\n\t"
7232 "xchgl %%eax, %%edx\n\t"
7233 "1:\n\t"
7234 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7235 "jz 2f\n\t"
7236 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7237 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
7238 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
7239 "2:\n\t" /* } */
7240 : "=A" (u64)
7241 , "=c" (cShift)
7242 , "=r" (uSpill)
7243 : "0" (u64)
7244 , "1" (cShift)
7245 : "cc");
7246 return u64;
7247
7248# elif defined(RT_ARCH_ARM64)
7249 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7250 : [uRet] "=r" (u64)
7251 : [uVal] "[uRet]" (u64)
7252 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */
7253 return u64;
7254
7255#else
7256 cShift &= 63;
7257 return (u64 >> cShift) | (u64 << (64 - cShift));
7258#endif
7259}
7260
7261/** @} */
7262
7263
7264/** @} */
7265
7266/*
7267 * Include #pragma aux definitions for Watcom C/C++.
7268 */
7269#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
7270# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
7271# undef IPRT_INCLUDED_asm_watcom_x86_16_h
7272# include "asm-watcom-x86-16.h"
7273#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
7274# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
7275# undef IPRT_INCLUDED_asm_watcom_x86_32_h
7276# include "asm-watcom-x86-32.h"
7277#endif
7278
7279#endif /* !IPRT_INCLUDED_asm_h */
7280
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette