VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 87189

Last change on this file since 87189 was 87189, checked in by vboxsync, 4 years ago

iprt/asm.h: More fun. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 211.5 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2020 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46/* Emit the intrinsics at all optimization levels. */
47# include <iprt/sanitized/intrin.h>
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(_BitScanForward)
54# pragma intrinsic(_BitScanReverse)
55# pragma intrinsic(_bittest)
56# pragma intrinsic(_bittestandset)
57# pragma intrinsic(_bittestandreset)
58# pragma intrinsic(_bittestandcomplement)
59# pragma intrinsic(_byteswap_ushort)
60# pragma intrinsic(_byteswap_ulong)
61# pragma intrinsic(_interlockedbittestandset)
62# pragma intrinsic(_interlockedbittestandreset)
63# pragma intrinsic(_InterlockedAnd)
64# pragma intrinsic(_InterlockedOr)
65# pragma intrinsic(_InterlockedIncrement)
66# pragma intrinsic(_InterlockedDecrement)
67# pragma intrinsic(_InterlockedExchange)
68# pragma intrinsic(_InterlockedExchangeAdd)
69# pragma intrinsic(_InterlockedCompareExchange)
70# pragma intrinsic(_InterlockedCompareExchange64)
71# pragma intrinsic(_rotl)
72# pragma intrinsic(_rotr)
73# pragma intrinsic(_rotl64)
74# pragma intrinsic(_rotr64)
75# ifdef RT_ARCH_AMD64
76# pragma intrinsic(__stosq)
77# pragma intrinsic(_byteswap_uint64)
78# pragma intrinsic(_InterlockedExchange64)
79# pragma intrinsic(_InterlockedExchangeAdd64)
80# pragma intrinsic(_InterlockedAnd64)
81# pragma intrinsic(_InterlockedOr64)
82# pragma intrinsic(_InterlockedIncrement64)
83# pragma intrinsic(_InterlockedDecrement64)
84# endif
85#endif
86
87/*
88 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
89 */
90#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
91# include "asm-watcom-x86-16.h"
92#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
93# include "asm-watcom-x86-32.h"
94#endif
95
96
97/** @defgroup grp_rt_asm ASM - Assembly Routines
98 * @ingroup grp_rt
99 *
100 * @remarks The difference between ordered and unordered atomic operations are
101 * that the former will complete outstanding reads and writes before
102 * continuing while the latter doesn't make any promises about the
103 * order. Ordered operations doesn't, it seems, make any 100% promise
104 * wrt to whether the operation will complete before any subsequent
105 * memory access. (please, correct if wrong.)
106 *
107 * ASMAtomicSomething operations are all ordered, while
108 * ASMAtomicUoSomething are unordered (note the Uo).
109 *
110 * Please note that ordered operations does not necessarily imply a
111 * compiler (memory) barrier. The user has to use the
112 * ASMCompilerBarrier() macro when that is deemed necessary.
113 *
114 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
115 * to reorder or even optimize assembler instructions away. For
116 * instance, in the following code the second rdmsr instruction is
117 * optimized away because gcc treats that instruction as deterministic:
118 *
119 * @code
120 * static inline uint64_t rdmsr_low(int idx)
121 * {
122 * uint32_t low;
123 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
124 * }
125 * ...
126 * uint32_t msr1 = rdmsr_low(1);
127 * foo(msr1);
128 * msr1 = rdmsr_low(1);
129 * bar(msr1);
130 * @endcode
131 *
132 * The input parameter of rdmsr_low is the same for both calls and
133 * therefore gcc will use the result of the first call as input
134 * parameter for bar() as well. For rdmsr this is not acceptable as
135 * this instruction is _not_ deterministic. This applies to reading
136 * machine status information in general.
137 *
138 * @{
139 */
140
141
142/** @def RT_INLINE_ASM_GCC_4_3_X_X86
143 * Used to work around some 4.3.x register allocation issues in this version of
144 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
145 * definitely not for 5.x */
146#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
147# define RT_INLINE_ASM_GCC_4_3_X_X86 1
148#else
149# define RT_INLINE_ASM_GCC_4_3_X_X86 0
150#endif
151
152/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
153 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
154 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
155 * mode, x86.
156 *
157 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
158 * when in PIC mode on x86.
159 */
160#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
161# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
162# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
163# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
164# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
165# elif ( (defined(PIC) || defined(__PIC__)) \
166 && defined(RT_ARCH_X86) \
167 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
168 || defined(RT_OS_DARWIN)) )
169# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
170# else
171# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
172# endif
173#endif
174
175
176/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
177 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
178#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
179# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
180#else
181# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
182#endif
183
184/*
185 * ARM is great fun.
186 */
187#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
188
189# define RTASM_ARM_NO_BARRIER
190# ifdef RT_ARCH_ARM64
191# define RTASM_ARM_NO_BARRIER_IN_REG
192# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
193# define RTASM_ARM_DSB_SY "dsb sy\n\t"
194# define RTASM_ARM_DSB_SY_IN_REG
195# define RTASM_ARM_DSB_SY_COMMA_IN_REG
196# define RTASM_ARM_DMB_SY "dmb sy\n\t"
197# define RTASM_ARM_DMB_SY_IN_REG
198# define RTASM_ARM_DMB_SY_COMMA_IN_REG
199# define RTASM_ARM_DMB_ST "dmb st\n\t"
200# define RTASM_ARM_DMB_ST_IN_REG
201# define RTASM_ARM_DMB_ST_COMMA_IN_REG
202# define RTASM_ARM_DMB_LD "dmb ld\n\t"
203# define RTASM_ARM_DMB_LD_IN_REG
204# define RTASM_ARM_DMB_LD_COMMA_IN_REG
205# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
206# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
207 uint32_t rcSpill; \
208 uint32_t u32NewRet; \
209 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
210 RTASM_ARM_##barrier_type /* before lable? */ \
211 "ldaxr %w[uNew], %[pMem]\n\t" \
212 modify64 \
213 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
214 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
215 : [pMem] "+m" (*a_pu32Mem) \
216 , [uNew] "=&r" (u32NewRet) \
217 , [rc] "=&r" (rcSpill) \
218 : in_reg \
219 : "cc")
220# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
221 uint32_t rcSpill; \
222 uint32_t u32OldRet; \
223 uint32_t u32NewSpill; \
224 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
225 RTASM_ARM_##barrier_type /* before lable? */ \
226 "ldaxr %w[uOld], %[pMem]\n\t" \
227 modify64 \
228 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
229 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
230 : [pMem] "+m" (*a_pu32Mem) \
231 , [uOld] "=&r" (u32OldRet) \
232 , [uNew] "=&r" (u32NewSpill) \
233 , [rc] "=&r" (rcSpill) \
234 : in_reg \
235 : "cc")
236# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
237 uint32_t rcSpill; \
238 uint64_t u64NewRet; \
239 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
240 RTASM_ARM_##barrier_type /* before lable? */ \
241 "ldaxr %[uNew], %[pMem]\n\t" \
242 modify64 \
243 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
244 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
245 : [pMem] "+m" (*a_pu64Mem) \
246 , [uNew] "=&r" (u64NewRet) \
247 , [rc] "=&r" (rcSpill) \
248 : in_reg \
249 : "cc")
250# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
251 uint32_t rcSpill; \
252 uint64_t u64OldRet; \
253 uint64_t u64NewSpill; \
254 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
255 RTASM_ARM_##barrier_type /* before lable? */ \
256 "ldaxr %[uOld], %[pMem]\n\t" \
257 modify64 \
258 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
259 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
260 : [pMem] "+m" (*a_pu64Mem) \
261 , [uOld] "=&r" (u64OldRet) \
262 , [uNew] "=&r" (u64NewSpill) \
263 , [rc] "=&r" (rcSpill) \
264 : in_reg \
265 : "cc")
266
267# else /* RT_ARCH_ARM32 */
268# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
269# if RT_ARCH_ARM32 >= 7
270# warning armv7
271# define RTASM_ARM_NO_BARRIER_IN_REG
272# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
273# define RTASM_ARM_DSB_SY "dsb sy\n\t"
274# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
275# define RTASM_ARM_DMB_SY "dmb sy\n\t"
276# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
277# define RTASM_ARM_DMB_ST "dmb st\n\t"
278# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
279# define RTASM_ARM_DMB_LD "dmb ld\n\t"
280# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
281
282# elif RT_ARCH_ARM32 >= 6
283# warning armv6
284# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
285# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
286# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
287# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
288# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
289# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
290# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
291# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
292# elif RT_ARCH_ARM32 >= 4
293# warning armv5 or older
294# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
295# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
296# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
297# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
298# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
299# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
300# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
301# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
302# else
303# error "huh? Odd RT_ARCH_ARM32 value!"
304# endif
305# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
306# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
307# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
308# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
309# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
310 uint32_t rcSpill; \
311 uint32_t u32NewRet; \
312 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
313 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
314 "ldrex %[uNew], %[pMem]\n\t" \
315 modify32 \
316 "strex %[rc], %[uNew], %[pMem]\n\t" \
317 "cmp %[rc], #0\n\t" \
318 "bne .Ltry_again_" #name "_%=\n\t" \
319 : [pMem] "+m" (*a_pu32Mem) \
320 , [uNew] "=&r" (u32NewRet) \
321 , [rc] "=&r" (rcSpill) \
322 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
323 , in_reg \
324 : "cc")
325# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
326 uint32_t rcSpill; \
327 uint32_t u32OldRet; \
328 uint32_t u32NewSpill; \
329 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
330 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
331 "ldrex %[uOld], %[pMem]\n\t" \
332 modify32 \
333 "strex %[rc], %[uNew], %[pMem]\n\t" \
334 "cmp %[rc], #0\n\t" \
335 "bne .Ltry_again_" #name "_%=\n\t" \
336 : [pMem] "+m" (*a_pu32Mem) \
337 , [uOld] "=&r" (u32OldRet) \
338 , [uNew] "=&r" (u32NewSpill) \
339 , [rc] "=&r" (rcSpill) \
340 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
341 , in_reg \
342 : "cc")
343# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
344 uint32_t rcSpill; \
345 uint64_t u64NewRet; \
346 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
347 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
348 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
349 modify32 \
350 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
351 "cmp %[rc], #0\n\t" \
352 "bne .Ltry_again_" #name "_%=\n\t" \
353 : [pMem] "+m" (*a_pu64Mem), \
354 [uNew] "=&r" (u64NewRet), \
355 [rc] "=&r" (rcSpill) \
356 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
357 , in_reg \
358 : "cc")
359# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
360 uint32_t rcSpill; \
361 uint64_t u64OldRet; \
362 uint64_t u64NewSpill; \
363 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
364 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
365 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
366 modify32 \
367 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
368 "cmp %[rc], #0\n\t" \
369 "bne .Ltry_again_" #name "_%=\n\t" \
370 : [pMem] "+m" (*a_pu64Mem), \
371 [uOld] "=&r" (u64OldRet), \
372 [uNew] "=&r" (u64NewSpill), \
373 [rc] "=&r" (rcSpill) \
374 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
375 , in_reg \
376 : "cc")
377# endif /* RT_ARCH_ARM32 */
378#endif
379
380
381/** @def ASMReturnAddress
382 * Gets the return address of the current (or calling if you like) function or method.
383 */
384#ifdef _MSC_VER
385# ifdef __cplusplus
386extern "C"
387# endif
388void * _ReturnAddress(void);
389# pragma intrinsic(_ReturnAddress)
390# define ASMReturnAddress() _ReturnAddress()
391#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
392# define ASMReturnAddress() __builtin_return_address(0)
393#elif defined(__WATCOMC__)
394# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
395#else
396# error "Unsupported compiler."
397#endif
398
399
400/**
401 * Compiler memory barrier.
402 *
403 * Ensure that the compiler does not use any cached (register/tmp stack) memory
404 * values or any outstanding writes when returning from this function.
405 *
406 * This function must be used if non-volatile data is modified by a
407 * device or the VMM. Typical cases are port access, MMIO access,
408 * trapping instruction, etc.
409 */
410#if RT_INLINE_ASM_GNU_STYLE
411# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
412#elif RT_INLINE_ASM_USES_INTRIN
413# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
414#elif defined(__WATCOMC__)
415void ASMCompilerBarrier(void);
416#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
417DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
418{
419 __asm
420 {
421 }
422}
423#endif
424
425
426/** @def ASMBreakpoint
427 * Debugger Breakpoint.
428 * @deprecated Use RT_BREAKPOINT instead.
429 * @internal
430 */
431#define ASMBreakpoint() RT_BREAKPOINT()
432
433
434/**
435 * Spinloop hint for platforms that have these, empty function on the other
436 * platforms.
437 *
438 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
439 * spin locks.
440 */
441#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
442RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
443#else
444DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
445{
446# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
447# if RT_INLINE_ASM_GNU_STYLE
448 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
449# else
450 __asm {
451 _emit 0f3h
452 _emit 090h
453 }
454# endif
455
456# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
457 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
458
459# else
460 /* dummy */
461# endif
462}
463#endif
464
465
466/**
467 * Atomically Exchange an unsigned 8-bit value, ordered.
468 *
469 * @returns Current *pu8 value
470 * @param pu8 Pointer to the 8-bit variable to update.
471 * @param u8 The 8-bit value to assign to *pu8.
472 */
473#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
474RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
475#else
476DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
477{
478# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
479# if RT_INLINE_ASM_GNU_STYLE
480 __asm__ __volatile__("xchgb %0, %1\n\t"
481 : "=m" (*pu8)
482 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
483 : "1" (u8)
484 , "m" (*pu8));
485# else
486 __asm
487 {
488# ifdef RT_ARCH_AMD64
489 mov rdx, [pu8]
490 mov al, [u8]
491 xchg [rdx], al
492 mov [u8], al
493# else
494 mov edx, [pu8]
495 mov al, [u8]
496 xchg [edx], al
497 mov [u8], al
498# endif
499 }
500# endif
501 return u8;
502
503# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
504 uint32_t uOld;
505 uint32_t rcSpill;
506 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU8_%=:\n\t"
507 RTASM_ARM_DMB_SY
508# if defined(RT_ARCH_ARM64)
509 "ldaxrb %w[uOld], %[pMem]\n\t"
510 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
511 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU8_%=\n\t"
512# else
513 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
514 "strexb %[rc], %[uNew], %[pMem]\n\t"
515 "cmp %[rc], #0\n\t"
516 "bne .Ltry_again_ASMAtomicXchgU8_%=\n\t"
517# endif
518 : [pMem] "+m" (*pu8)
519 , [uOld] "=&r" (uOld)
520 , [rc] "=&r" (rcSpill)
521 : [uNew] "r" ((uint32_t)u8)
522 RTASM_ARM_DMB_SY_COMMA_IN_REG
523 : "cc");
524 return (uint8_t)uOld;
525
526# else
527# error "Port me"
528# endif
529}
530#endif
531
532
533/**
534 * Atomically Exchange a signed 8-bit value, ordered.
535 *
536 * @returns Current *pu8 value
537 * @param pi8 Pointer to the 8-bit variable to update.
538 * @param i8 The 8-bit value to assign to *pi8.
539 */
540DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
541{
542 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
543}
544
545
546/**
547 * Atomically Exchange a bool value, ordered.
548 *
549 * @returns Current *pf value
550 * @param pf Pointer to the 8-bit variable to update.
551 * @param f The 8-bit value to assign to *pi8.
552 */
553DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
554{
555#ifdef _MSC_VER
556 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
557#else
558 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
559#endif
560}
561
562
563/**
564 * Atomically Exchange an unsigned 16-bit value, ordered.
565 *
566 * @returns Current *pu16 value
567 * @param pu16 Pointer to the 16-bit variable to update.
568 * @param u16 The 16-bit value to assign to *pu16.
569 */
570#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
571RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
572#else
573DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
574{
575# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
576# if RT_INLINE_ASM_GNU_STYLE
577 __asm__ __volatile__("xchgw %0, %1\n\t"
578 : "=m" (*pu16)
579 , "=r" (u16)
580 : "1" (u16)
581 , "m" (*pu16));
582# else
583 __asm
584 {
585# ifdef RT_ARCH_AMD64
586 mov rdx, [pu16]
587 mov ax, [u16]
588 xchg [rdx], ax
589 mov [u16], ax
590# else
591 mov edx, [pu16]
592 mov ax, [u16]
593 xchg [edx], ax
594 mov [u16], ax
595# endif
596 }
597# endif
598 return u16;
599
600# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
601 uint32_t uOld;
602 uint32_t rcSpill;
603 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU16_%=:\n\t"
604 RTASM_ARM_DMB_SY
605# if defined(RT_ARCH_ARM64)
606 "ldaxrh %w[uOld], %[pMem]\n\t"
607 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
608 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU16_%=\n\t"
609# else
610 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
611 "strexh %[rc], %[uNew], %[pMem]\n\t"
612 "cmp %[rc], #0\n\t"
613 "bne .Ltry_again_ASMAtomicXchgU16_%=\n\t"
614# endif
615 : [pMem] "+m" (*pu16)
616 , [uOld] "=&r" (uOld)
617 , [rc] "=&r" (rcSpill)
618 : [uNew] "r" ((uint32_t)u16)
619 RTASM_ARM_DMB_SY_COMMA_IN_REG
620 : "cc");
621 return (uint16_t)uOld;
622
623# else
624# error "Port me"
625# endif
626}
627#endif
628
629
630/**
631 * Atomically Exchange a signed 16-bit value, ordered.
632 *
633 * @returns Current *pu16 value
634 * @param pi16 Pointer to the 16-bit variable to update.
635 * @param i16 The 16-bit value to assign to *pi16.
636 */
637DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
638{
639 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
640}
641
642
643/**
644 * Atomically Exchange an unsigned 32-bit value, ordered.
645 *
646 * @returns Current *pu32 value
647 * @param pu32 Pointer to the 32-bit variable to update.
648 * @param u32 The 32-bit value to assign to *pu32.
649 *
650 * @remarks Does not work on 286 and earlier.
651 */
652#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
653RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
654#else
655DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
656{
657# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
658# if RT_INLINE_ASM_GNU_STYLE
659 __asm__ __volatile__("xchgl %0, %1\n\t"
660 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
661 , "=r" (u32)
662 : "1" (u32)
663 , "m" (*pu32));
664
665# elif RT_INLINE_ASM_USES_INTRIN
666 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
667
668# else
669 __asm
670 {
671# ifdef RT_ARCH_AMD64
672 mov rdx, [pu32]
673 mov eax, u32
674 xchg [rdx], eax
675 mov [u32], eax
676# else
677 mov edx, [pu32]
678 mov eax, u32
679 xchg [edx], eax
680 mov [u32], eax
681# endif
682 }
683# endif
684 return u32;
685
686# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
687 uint32_t uOld;
688 uint32_t rcSpill;
689 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU32_%=:\n\t"
690 RTASM_ARM_DMB_SY
691# if defined(RT_ARCH_ARM64)
692 "ldaxr %w[uOld], %[pMem]\n\t"
693 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
694 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU32_%=\n\t"
695# else
696 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
697 "strex %[rc], %[uNew], %[pMem]\n\t"
698 "cmp %[rc], #0\n\t"
699 "bne .Ltry_again_ASMAtomicXchgU32_%=\n\t"
700# endif
701 : [pMem] "+m" (*pu32)
702 , [uOld] "=&r" (uOld)
703 , [rc] "=&r" (rcSpill)
704 : [uNew] "r" (u32)
705 RTASM_ARM_DMB_SY_COMMA_IN_REG
706 : "cc");
707 return uOld;
708
709# else
710# error "Port me"
711# endif
712}
713#endif
714
715
716/**
717 * Atomically Exchange a signed 32-bit value, ordered.
718 *
719 * @returns Current *pu32 value
720 * @param pi32 Pointer to the 32-bit variable to update.
721 * @param i32 The 32-bit value to assign to *pi32.
722 */
723DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
724{
725 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
726}
727
728
729/**
730 * Atomically Exchange an unsigned 64-bit value, ordered.
731 *
732 * @returns Current *pu64 value
733 * @param pu64 Pointer to the 64-bit variable to update.
734 * @param u64 The 64-bit value to assign to *pu64.
735 *
736 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
737 */
738#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
739 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
740RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
741#else
742DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
743{
744# if defined(RT_ARCH_AMD64)
745# if RT_INLINE_ASM_USES_INTRIN
746 return _InterlockedExchange64((__int64 *)pu64, u64);
747
748# elif RT_INLINE_ASM_GNU_STYLE
749 __asm__ __volatile__("xchgq %0, %1\n\t"
750 : "=m" (*pu64)
751 , "=r" (u64)
752 : "1" (u64)
753 , "m" (*pu64));
754 return u64;
755# else
756 __asm
757 {
758 mov rdx, [pu64]
759 mov rax, [u64]
760 xchg [rdx], rax
761 mov [u64], rax
762 }
763 return u64;
764# endif
765
766# elif defined(RT_ARCH_X86)
767# if RT_INLINE_ASM_GNU_STYLE
768# if defined(PIC) || defined(__PIC__)
769 uint32_t u32EBX = (uint32_t)u64;
770 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
771 "xchgl %%ebx, %3\n\t"
772 "1:\n\t"
773 "lock; cmpxchg8b (%5)\n\t"
774 "jnz 1b\n\t"
775 "movl %3, %%ebx\n\t"
776 /*"xchgl %%esi, %5\n\t"*/
777 : "=A" (u64)
778 , "=m" (*pu64)
779 : "0" (*pu64)
780 , "m" ( u32EBX )
781 , "c" ( (uint32_t)(u64 >> 32) )
782 , "S" (pu64)
783 : "cc");
784# else /* !PIC */
785 __asm__ __volatile__("1:\n\t"
786 "lock; cmpxchg8b %1\n\t"
787 "jnz 1b\n\t"
788 : "=A" (u64)
789 , "=m" (*pu64)
790 : "0" (*pu64)
791 , "b" ( (uint32_t)u64 )
792 , "c" ( (uint32_t)(u64 >> 32) )
793 : "cc");
794# endif
795# else
796 __asm
797 {
798 mov ebx, dword ptr [u64]
799 mov ecx, dword ptr [u64 + 4]
800 mov edi, pu64
801 mov eax, dword ptr [edi]
802 mov edx, dword ptr [edi + 4]
803 retry:
804 lock cmpxchg8b [edi]
805 jnz retry
806 mov dword ptr [u64], eax
807 mov dword ptr [u64 + 4], edx
808 }
809# endif
810 return u64;
811
812# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
813 uint32_t rcSpill;
814 uint64_t uOld;
815 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU64_%=:\n\t"
816 RTASM_ARM_DMB_SY
817# if defined(RT_ARCH_ARM64)
818 "ldaxr %[uOld], %[pMem]\n\t"
819 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
820 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU64_%=\n\t"
821# else
822 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
823 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
824 "cmp %[rc], #0\n\t"
825 "bne .Ltry_again_ASMAtomicXchgU64_%=\n\t"
826# endif
827 : [pMem] "+m" (*pu64)
828 , [uOld] "=&r" (uOld)
829 , [rc] "=&r" (rcSpill)
830 : [uNew] "r" (u64)
831 RTASM_ARM_DMB_SY_COMMA_IN_REG
832 : "cc");
833 return uOld;
834
835# else
836# error "Port me"
837# endif
838}
839#endif
840
841
842/**
843 * Atomically Exchange an signed 64-bit value, ordered.
844 *
845 * @returns Current *pi64 value
846 * @param pi64 Pointer to the 64-bit variable to update.
847 * @param i64 The 64-bit value to assign to *pi64.
848 */
849DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
850{
851 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
852}
853
854
855/**
856 * Atomically Exchange a size_t value, ordered.
857 *
858 * @returns Current *ppv value
859 * @param puDst Pointer to the size_t variable to update.
860 * @param uNew The new value to assign to *puDst.
861 */
862DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
863{
864#if ARCH_BITS == 16
865 AssertCompile(sizeof(size_t) == 2);
866 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
867#elif ARCH_BITS == 32
868 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
869#elif ARCH_BITS == 64
870 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
871#else
872# error "ARCH_BITS is bogus"
873#endif
874}
875
876
877/**
878 * Atomically Exchange a pointer value, ordered.
879 *
880 * @returns Current *ppv value
881 * @param ppv Pointer to the pointer variable to update.
882 * @param pv The pointer value to assign to *ppv.
883 */
884DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
885{
886#if ARCH_BITS == 32 || ARCH_BITS == 16
887 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
888#elif ARCH_BITS == 64
889 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
890#else
891# error "ARCH_BITS is bogus"
892#endif
893}
894
895
896/**
897 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
898 *
899 * @returns Current *pv value
900 * @param ppv Pointer to the pointer variable to update.
901 * @param pv The pointer value to assign to *ppv.
902 * @param Type The type of *ppv, sans volatile.
903 */
904#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
905# define ASMAtomicXchgPtrT(ppv, pv, Type) \
906 __extension__ \
907 ({\
908 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
909 Type const pvTypeChecked = (pv); \
910 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
911 pvTypeCheckedRet; \
912 })
913#else
914# define ASMAtomicXchgPtrT(ppv, pv, Type) \
915 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
916#endif
917
918
919/**
920 * Atomically Exchange a raw-mode context pointer value, ordered.
921 *
922 * @returns Current *ppv value
923 * @param ppvRC Pointer to the pointer variable to update.
924 * @param pvRC The pointer value to assign to *ppv.
925 */
926DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
927{
928 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
929}
930
931
932/**
933 * Atomically Exchange a ring-0 pointer value, ordered.
934 *
935 * @returns Current *ppv value
936 * @param ppvR0 Pointer to the pointer variable to update.
937 * @param pvR0 The pointer value to assign to *ppv.
938 */
939DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
940{
941#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
942 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
943#elif R0_ARCH_BITS == 64
944 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
945#else
946# error "R0_ARCH_BITS is bogus"
947#endif
948}
949
950
951/**
952 * Atomically Exchange a ring-3 pointer value, ordered.
953 *
954 * @returns Current *ppv value
955 * @param ppvR3 Pointer to the pointer variable to update.
956 * @param pvR3 The pointer value to assign to *ppv.
957 */
958DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
959{
960#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
961 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
962#elif R3_ARCH_BITS == 64
963 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
964#else
965# error "R3_ARCH_BITS is bogus"
966#endif
967}
968
969
970/** @def ASMAtomicXchgHandle
971 * Atomically Exchange a typical IPRT handle value, ordered.
972 *
973 * @param ph Pointer to the value to update.
974 * @param hNew The new value to assigned to *pu.
975 * @param phRes Where to store the current *ph value.
976 *
977 * @remarks This doesn't currently work for all handles (like RTFILE).
978 */
979#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
980# define ASMAtomicXchgHandle(ph, hNew, phRes) \
981 do { \
982 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
983 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
984 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
985 } while (0)
986#elif HC_ARCH_BITS == 64
987# define ASMAtomicXchgHandle(ph, hNew, phRes) \
988 do { \
989 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
990 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
991 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
992 } while (0)
993#else
994# error HC_ARCH_BITS
995#endif
996
997
998/**
999 * Atomically Exchange a value which size might differ
1000 * between platforms or compilers, ordered.
1001 *
1002 * @param pu Pointer to the variable to update.
1003 * @param uNew The value to assign to *pu.
1004 * @todo This is busted as its missing the result argument.
1005 */
1006#define ASMAtomicXchgSize(pu, uNew) \
1007 do { \
1008 switch (sizeof(*(pu))) { \
1009 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1010 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1011 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1012 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1013 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1014 } \
1015 } while (0)
1016
1017/**
1018 * Atomically Exchange a value which size might differ
1019 * between platforms or compilers, ordered.
1020 *
1021 * @param pu Pointer to the variable to update.
1022 * @param uNew The value to assign to *pu.
1023 * @param puRes Where to store the current *pu value.
1024 */
1025#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1026 do { \
1027 switch (sizeof(*(pu))) { \
1028 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1029 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1030 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1031 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1032 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1033 } \
1034 } while (0)
1035
1036
1037
1038/**
1039 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1040 *
1041 * @returns true if xchg was done.
1042 * @returns false if xchg wasn't done.
1043 *
1044 * @param pu8 Pointer to the value to update.
1045 * @param u8New The new value to assigned to *pu8.
1046 * @param u8Old The old value to *pu8 compare with.
1047 *
1048 * @remarks x86: Requires a 486 or later.
1049 */
1050#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1051RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1052#else
1053DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1054{
1055# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1056 uint8_t u8Ret;
1057 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1058 "setz %1\n\t"
1059 : "=m" (*pu8)
1060 , "=qm" (u8Ret)
1061 , "=a" (u8Old)
1062 : "q" (u8New)
1063 , "2" (u8Old)
1064 , "m" (*pu8)
1065 : "cc");
1066 return (bool)u8Ret;
1067
1068# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1069 union { uint32_t u; bool f; } fXchg;
1070 uint32_t u32Spill;
1071 uint32_t rcSpill;
1072 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1073 RTASM_ARM_DMB_SY
1074# if defined(RT_ARCH_ARM64)
1075 "ldaxrb %w[uOld], %[pMem]\n\t"
1076 "cmp %w[uOld], %w[uCmp]\n\t"
1077 "bne 1f\n\t" /* stop here if not equal */
1078 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1079 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1080 "mov %w[fXchg], #1\n\t"
1081# else
1082 "ldrexb %[uOld], %[pMem]\n\t"
1083 "teq %[uOld], %[uCmp]\n\t"
1084 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1085 "bne 1f\n\t" /* stop here if not equal */
1086 "cmp %[rc], #0\n\t"
1087 "bne .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1088 "mov %[fXchg], #1\n\t"
1089# endif
1090 "1:\n\t"
1091 : [pMem] "+m" (*pu8)
1092 , [uOld] "=&r" (u32Spill)
1093 , [rc] "=&r" (rcSpill)
1094 , [fXchg] "=&r" (fXchg.u)
1095 : [uCmp] "r" ((uint32_t)u8Old)
1096 , [uNew] "r" ((uint32_t)u8New)
1097 , "[fXchg]" (0)
1098 RTASM_ARM_DMB_SY_COMMA_IN_REG
1099 : "cc");
1100 return fXchg.f;
1101
1102# else
1103# error "Port me"
1104# endif
1105}
1106#endif
1107
1108
1109/**
1110 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1111 *
1112 * @returns true if xchg was done.
1113 * @returns false if xchg wasn't done.
1114 *
1115 * @param pi8 Pointer to the value to update.
1116 * @param i8New The new value to assigned to *pi8.
1117 * @param i8Old The old value to *pi8 compare with.
1118 *
1119 * @remarks x86: Requires a 486 or later.
1120 */
1121DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1122{
1123 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1124}
1125
1126
1127/**
1128 * Atomically Compare and Exchange a bool value, ordered.
1129 *
1130 * @returns true if xchg was done.
1131 * @returns false if xchg wasn't done.
1132 *
1133 * @param pf Pointer to the value to update.
1134 * @param fNew The new value to assigned to *pf.
1135 * @param fOld The old value to *pf compare with.
1136 *
1137 * @remarks x86: Requires a 486 or later.
1138 */
1139DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1140{
1141 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1142}
1143
1144
1145/**
1146 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1147 *
1148 * @returns true if xchg was done.
1149 * @returns false if xchg wasn't done.
1150 *
1151 * @param pu32 Pointer to the value to update.
1152 * @param u32New The new value to assigned to *pu32.
1153 * @param u32Old The old value to *pu32 compare with.
1154 *
1155 * @remarks x86: Requires a 486 or later.
1156 */
1157#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1158RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1159#else
1160DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1161{
1162# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1163# if RT_INLINE_ASM_GNU_STYLE
1164 uint8_t u8Ret;
1165 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1166 "setz %1\n\t"
1167 : "=m" (*pu32)
1168 , "=qm" (u8Ret)
1169 , "=a" (u32Old)
1170 : "r" (u32New)
1171 , "2" (u32Old)
1172 , "m" (*pu32)
1173 : "cc");
1174 return (bool)u8Ret;
1175
1176# elif RT_INLINE_ASM_USES_INTRIN
1177 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1178
1179# else
1180 uint32_t u32Ret;
1181 __asm
1182 {
1183# ifdef RT_ARCH_AMD64
1184 mov rdx, [pu32]
1185# else
1186 mov edx, [pu32]
1187# endif
1188 mov eax, [u32Old]
1189 mov ecx, [u32New]
1190# ifdef RT_ARCH_AMD64
1191 lock cmpxchg [rdx], ecx
1192# else
1193 lock cmpxchg [edx], ecx
1194# endif
1195 setz al
1196 movzx eax, al
1197 mov [u32Ret], eax
1198 }
1199 return !!u32Ret;
1200# endif
1201
1202# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1203 union { uint32_t u; bool f; } fXchg;
1204 uint32_t u32Spill;
1205 uint32_t rcSpill;
1206 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1207 RTASM_ARM_DMB_SY
1208# if defined(RT_ARCH_ARM64)
1209 "ldaxr %w[uOld], %[pMem]\n\t"
1210 "cmp %w[uOld], %w[uCmp]\n\t"
1211 "bne 1f\n\t" /* stop here if not equal */
1212 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1213 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1214 "mov %w[fXchg], #1\n\t"
1215# else
1216 "ldrex %[uOld], %[pMem]\n\t"
1217 "teq %[uOld], %[uCmp]\n\t"
1218 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1219 "bne 1f\n\t" /* stop here if not equal */
1220 "cmp %[rc], #0\n\t"
1221 "bne .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1222 "mov %[fXchg], #1\n\t"
1223# endif
1224 "1:\n\t"
1225 : [pMem] "+m" (*pu32)
1226 , [uOld] "=&r" (u32Spill)
1227 , [rc] "=&r" (rcSpill)
1228 , [fXchg] "=&r" (fXchg.u)
1229 : [uCmp] "r" (u32Old)
1230 , [uNew] "r" (u32New)
1231 , "[fXchg]" (0)
1232 RTASM_ARM_DMB_SY_COMMA_IN_REG
1233 : "cc");
1234 return fXchg.f;
1235
1236# else
1237# error "Port me"
1238# endif
1239}
1240#endif
1241
1242
1243/**
1244 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1245 *
1246 * @returns true if xchg was done.
1247 * @returns false if xchg wasn't done.
1248 *
1249 * @param pi32 Pointer to the value to update.
1250 * @param i32New The new value to assigned to *pi32.
1251 * @param i32Old The old value to *pi32 compare with.
1252 *
1253 * @remarks x86: Requires a 486 or later.
1254 */
1255DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1256{
1257 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1258}
1259
1260
1261/**
1262 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1263 *
1264 * @returns true if xchg was done.
1265 * @returns false if xchg wasn't done.
1266 *
1267 * @param pu64 Pointer to the 64-bit variable to update.
1268 * @param u64New The 64-bit value to assign to *pu64.
1269 * @param u64Old The value to compare with.
1270 *
1271 * @remarks x86: Requires a Pentium or later.
1272 */
1273#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1274 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1275RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1276#else
1277DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1278{
1279# if RT_INLINE_ASM_USES_INTRIN
1280 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1281
1282# elif defined(RT_ARCH_AMD64)
1283# if RT_INLINE_ASM_GNU_STYLE
1284 uint8_t u8Ret;
1285 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1286 "setz %1\n\t"
1287 : "=m" (*pu64)
1288 , "=qm" (u8Ret)
1289 , "=a" (u64Old)
1290 : "r" (u64New)
1291 , "2" (u64Old)
1292 , "m" (*pu64)
1293 : "cc");
1294 return (bool)u8Ret;
1295# else
1296 bool fRet;
1297 __asm
1298 {
1299 mov rdx, [pu32]
1300 mov rax, [u64Old]
1301 mov rcx, [u64New]
1302 lock cmpxchg [rdx], rcx
1303 setz al
1304 mov [fRet], al
1305 }
1306 return fRet;
1307# endif
1308
1309# elif defined(RT_ARCH_X86)
1310 uint32_t u32Ret;
1311# if RT_INLINE_ASM_GNU_STYLE
1312# if defined(PIC) || defined(__PIC__)
1313 uint32_t u32EBX = (uint32_t)u64New;
1314 uint32_t u32Spill;
1315 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1316 "lock; cmpxchg8b (%6)\n\t"
1317 "setz %%al\n\t"
1318 "movl %4, %%ebx\n\t"
1319 "movzbl %%al, %%eax\n\t"
1320 : "=a" (u32Ret)
1321 , "=d" (u32Spill)
1322# if RT_GNUC_PREREQ(4, 3)
1323 , "+m" (*pu64)
1324# else
1325 , "=m" (*pu64)
1326# endif
1327 : "A" (u64Old)
1328 , "m" ( u32EBX )
1329 , "c" ( (uint32_t)(u64New >> 32) )
1330 , "S" (pu64)
1331 : "cc");
1332# else /* !PIC */
1333 uint32_t u32Spill;
1334 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1335 "setz %%al\n\t"
1336 "movzbl %%al, %%eax\n\t"
1337 : "=a" (u32Ret)
1338 , "=d" (u32Spill)
1339 , "+m" (*pu64)
1340 : "A" (u64Old)
1341 , "b" ( (uint32_t)u64New )
1342 , "c" ( (uint32_t)(u64New >> 32) )
1343 : "cc");
1344# endif
1345 return (bool)u32Ret;
1346# else
1347 __asm
1348 {
1349 mov ebx, dword ptr [u64New]
1350 mov ecx, dword ptr [u64New + 4]
1351 mov edi, [pu64]
1352 mov eax, dword ptr [u64Old]
1353 mov edx, dword ptr [u64Old + 4]
1354 lock cmpxchg8b [edi]
1355 setz al
1356 movzx eax, al
1357 mov dword ptr [u32Ret], eax
1358 }
1359 return !!u32Ret;
1360# endif
1361
1362# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1363 union { uint32_t u; bool f; } fXchg;
1364 uint64_t u64Spill;
1365 uint32_t rcSpill;
1366 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1367 RTASM_ARM_DMB_SY
1368# if defined(RT_ARCH_ARM64)
1369 "ldaxr %[uOld], %[pMem]\n\t"
1370 "cmp %[uOld], %[uCmp]\n\t"
1371 "bne 1f\n\t" /* stop here if not equal */
1372 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1373 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1374 "mov %w[fXchg], #1\n\t"
1375# else
1376 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1377 "teq %[uOld], %[uCmp]\n\t"
1378 "teqeq %H[uOld], %H[uCmp]\n\t"
1379 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1380 "bne 1f\n\t" /* stop here if not equal */
1381 "cmp %[rc], #0\n\t"
1382 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1383 "mov %[fXchg], #1\n\t"
1384# endif
1385 "1:\n\t"
1386 : [pMem] "+m" (*pu64)
1387 , [uOld] "=&r" (u64Spill)
1388 , [rc] "=&r" (rcSpill)
1389 , [fXchg] "=&r" (fXchg.u)
1390 : [uCmp] "r" (u64Old)
1391 , [uNew] "r" (u64New)
1392 , "[fXchg]" (0)
1393 RTASM_ARM_DMB_SY_COMMA_IN_REG
1394 : "cc");
1395 return fXchg.f;
1396
1397# else
1398# error "Port me"
1399# endif
1400}
1401#endif
1402
1403
1404/**
1405 * Atomically Compare and exchange a signed 64-bit value, ordered.
1406 *
1407 * @returns true if xchg was done.
1408 * @returns false if xchg wasn't done.
1409 *
1410 * @param pi64 Pointer to the 64-bit variable to update.
1411 * @param i64 The 64-bit value to assign to *pu64.
1412 * @param i64Old The value to compare with.
1413 *
1414 * @remarks x86: Requires a Pentium or later.
1415 */
1416DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1417{
1418 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1419}
1420
1421
1422/**
1423 * Atomically Compare and Exchange a pointer value, ordered.
1424 *
1425 * @returns true if xchg was done.
1426 * @returns false if xchg wasn't done.
1427 *
1428 * @param ppv Pointer to the value to update.
1429 * @param pvNew The new value to assigned to *ppv.
1430 * @param pvOld The old value to *ppv compare with.
1431 *
1432 * @remarks x86: Requires a 486 or later.
1433 */
1434DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1435{
1436#if ARCH_BITS == 32 || ARCH_BITS == 16
1437 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1438#elif ARCH_BITS == 64
1439 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1440#else
1441# error "ARCH_BITS is bogus"
1442#endif
1443}
1444
1445
1446/**
1447 * Atomically Compare and Exchange a pointer value, ordered.
1448 *
1449 * @returns true if xchg was done.
1450 * @returns false if xchg wasn't done.
1451 *
1452 * @param ppv Pointer to the value to update.
1453 * @param pvNew The new value to assigned to *ppv.
1454 * @param pvOld The old value to *ppv compare with.
1455 *
1456 * @remarks This is relatively type safe on GCC platforms.
1457 * @remarks x86: Requires a 486 or later.
1458 */
1459#ifdef __GNUC__
1460# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1461 __extension__ \
1462 ({\
1463 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1464 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1465 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1466 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1467 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1468 fMacroRet; \
1469 })
1470#else
1471# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1472 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1473#endif
1474
1475
1476/** @def ASMAtomicCmpXchgHandle
1477 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1478 *
1479 * @param ph Pointer to the value to update.
1480 * @param hNew The new value to assigned to *pu.
1481 * @param hOld The old value to *pu compare with.
1482 * @param fRc Where to store the result.
1483 *
1484 * @remarks This doesn't currently work for all handles (like RTFILE).
1485 * @remarks x86: Requires a 486 or later.
1486 */
1487#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1488# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1489 do { \
1490 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1491 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1492 } while (0)
1493#elif HC_ARCH_BITS == 64
1494# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1495 do { \
1496 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1497 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1498 } while (0)
1499#else
1500# error HC_ARCH_BITS
1501#endif
1502
1503
1504/** @def ASMAtomicCmpXchgSize
1505 * Atomically Compare and Exchange a value which size might differ
1506 * between platforms or compilers, ordered.
1507 *
1508 * @param pu Pointer to the value to update.
1509 * @param uNew The new value to assigned to *pu.
1510 * @param uOld The old value to *pu compare with.
1511 * @param fRc Where to store the result.
1512 *
1513 * @remarks x86: Requires a 486 or later.
1514 */
1515#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1516 do { \
1517 switch (sizeof(*(pu))) { \
1518 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1519 break; \
1520 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1521 break; \
1522 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1523 (fRc) = false; \
1524 break; \
1525 } \
1526 } while (0)
1527
1528
1529/**
1530 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1531 * passes back old value, ordered.
1532 *
1533 * @returns true if xchg was done.
1534 * @returns false if xchg wasn't done.
1535 *
1536 * @param pu32 Pointer to the value to update.
1537 * @param u32New The new value to assigned to *pu32.
1538 * @param u32Old The old value to *pu32 compare with.
1539 * @param pu32Old Pointer store the old value at.
1540 *
1541 * @remarks x86: Requires a 486 or later.
1542 */
1543#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1544RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1545#else
1546DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1547{
1548# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1549# if RT_INLINE_ASM_GNU_STYLE
1550 uint8_t u8Ret;
1551 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1552 "setz %1\n\t"
1553 : "=m" (*pu32)
1554 , "=qm" (u8Ret)
1555 , "=a" (*pu32Old)
1556 : "r" (u32New)
1557 , "a" (u32Old)
1558 , "m" (*pu32)
1559 : "cc");
1560 return (bool)u8Ret;
1561
1562# elif RT_INLINE_ASM_USES_INTRIN
1563 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1564
1565# else
1566 uint32_t u32Ret;
1567 __asm
1568 {
1569# ifdef RT_ARCH_AMD64
1570 mov rdx, [pu32]
1571# else
1572 mov edx, [pu32]
1573# endif
1574 mov eax, [u32Old]
1575 mov ecx, [u32New]
1576# ifdef RT_ARCH_AMD64
1577 lock cmpxchg [rdx], ecx
1578 mov rdx, [pu32Old]
1579 mov [rdx], eax
1580# else
1581 lock cmpxchg [edx], ecx
1582 mov edx, [pu32Old]
1583 mov [edx], eax
1584# endif
1585 setz al
1586 movzx eax, al
1587 mov [u32Ret], eax
1588 }
1589 return !!u32Ret;
1590# endif
1591
1592# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1593 union { uint32_t u; bool f; } fXchg;
1594 uint32_t u32ActualOld;
1595 uint32_t rcSpill;
1596 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1597 RTASM_ARM_DMB_SY
1598# if defined(RT_ARCH_ARM64)
1599 "ldaxr %w[uOld], %[pMem]\n\t"
1600 "cmp %w[uOld], %w[uCmp]\n\t"
1601 "bne 1f\n\t" /* stop here if not equal */
1602 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1603 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1604 "mov %w[fXchg], #1\n\t"
1605# else
1606 "ldrex %[uOld], %[pMem]\n\t"
1607 "teq %[uOld], %[uCmp]\n\t"
1608 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1609 "bne 1f\n\t" /* stop here if not equal */
1610 "cmp %[rc], #0\n\t"
1611 "bne .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1612 "mov %[fXchg], #1\n\t"
1613# endif
1614 "1:\n\t"
1615 : [pMem] "+m" (*pu32)
1616 , [uOld] "=&r" (u32ActualOld)
1617 , [rc] "=&r" (rcSpill)
1618 , [fXchg] "=&r" (fXchg.u)
1619 : [uCmp] "r" (u32Old)
1620 , [uNew] "r" (u32New)
1621 , "[fXchg]" (0)
1622 RTASM_ARM_DMB_SY_COMMA_IN_REG
1623 : "cc");
1624 *pu32Old = u32ActualOld;
1625 return fXchg.f;
1626
1627# else
1628# error "Port me"
1629# endif
1630}
1631#endif
1632
1633
1634/**
1635 * Atomically Compare and Exchange a signed 32-bit value, additionally
1636 * passes back old value, ordered.
1637 *
1638 * @returns true if xchg was done.
1639 * @returns false if xchg wasn't done.
1640 *
1641 * @param pi32 Pointer to the value to update.
1642 * @param i32New The new value to assigned to *pi32.
1643 * @param i32Old The old value to *pi32 compare with.
1644 * @param pi32Old Pointer store the old value at.
1645 *
1646 * @remarks x86: Requires a 486 or later.
1647 */
1648DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
1649{
1650 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1651}
1652
1653
1654/**
1655 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1656 * passing back old value, ordered.
1657 *
1658 * @returns true if xchg was done.
1659 * @returns false if xchg wasn't done.
1660 *
1661 * @param pu64 Pointer to the 64-bit variable to update.
1662 * @param u64New The 64-bit value to assign to *pu64.
1663 * @param u64Old The value to compare with.
1664 * @param pu64Old Pointer store the old value at.
1665 *
1666 * @remarks x86: Requires a Pentium or later.
1667 */
1668#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1669 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1670RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
1671#else
1672DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
1673{
1674# if RT_INLINE_ASM_USES_INTRIN
1675 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1676
1677# elif defined(RT_ARCH_AMD64)
1678# if RT_INLINE_ASM_GNU_STYLE
1679 uint8_t u8Ret;
1680 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1681 "setz %1\n\t"
1682 : "=m" (*pu64)
1683 , "=qm" (u8Ret)
1684 , "=a" (*pu64Old)
1685 : "r" (u64New)
1686 , "a" (u64Old)
1687 , "m" (*pu64)
1688 : "cc");
1689 return (bool)u8Ret;
1690# else
1691 bool fRet;
1692 __asm
1693 {
1694 mov rdx, [pu32]
1695 mov rax, [u64Old]
1696 mov rcx, [u64New]
1697 lock cmpxchg [rdx], rcx
1698 mov rdx, [pu64Old]
1699 mov [rdx], rax
1700 setz al
1701 mov [fRet], al
1702 }
1703 return fRet;
1704# endif
1705
1706# elif defined(RT_ARCH_X86)
1707# if RT_INLINE_ASM_GNU_STYLE
1708 uint64_t u64Ret;
1709# if defined(PIC) || defined(__PIC__)
1710 /* NB: this code uses a memory clobber description, because the clean
1711 * solution with an output value for *pu64 makes gcc run out of registers.
1712 * This will cause suboptimal code, and anyone with a better solution is
1713 * welcome to improve this. */
1714 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1715 "lock; cmpxchg8b %3\n\t"
1716 "xchgl %%ebx, %1\n\t"
1717 : "=A" (u64Ret)
1718 : "DS" ((uint32_t)u64New)
1719 , "c" ((uint32_t)(u64New >> 32))
1720 , "m" (*pu64)
1721 , "0" (u64Old)
1722 : "memory"
1723 , "cc" );
1724# else /* !PIC */
1725 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1726 : "=A" (u64Ret)
1727 , "=m" (*pu64)
1728 : "b" ((uint32_t)u64New)
1729 , "c" ((uint32_t)(u64New >> 32))
1730 , "m" (*pu64)
1731 , "0" (u64Old)
1732 : "cc");
1733# endif
1734 *pu64Old = u64Ret;
1735 return u64Ret == u64Old;
1736# else
1737 uint32_t u32Ret;
1738 __asm
1739 {
1740 mov ebx, dword ptr [u64New]
1741 mov ecx, dword ptr [u64New + 4]
1742 mov edi, [pu64]
1743 mov eax, dword ptr [u64Old]
1744 mov edx, dword ptr [u64Old + 4]
1745 lock cmpxchg8b [edi]
1746 mov ebx, [pu64Old]
1747 mov [ebx], eax
1748 setz al
1749 movzx eax, al
1750 add ebx, 4
1751 mov [ebx], edx
1752 mov dword ptr [u32Ret], eax
1753 }
1754 return !!u32Ret;
1755# endif
1756
1757# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1758 union { uint32_t u; bool f; } fXchg;
1759 uint64_t u64ActualOld;
1760 uint32_t rcSpill;
1761 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1762 RTASM_ARM_DMB_SY
1763# if defined(RT_ARCH_ARM64)
1764 "ldaxr %[uOld], %[pMem]\n\t"
1765 "cmp %[uOld], %[uCmp]\n\t"
1766 "bne 1f\n\t" /* stop here if not equal */
1767 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1768 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1769 "mov %w[fXchg], #1\n\t"
1770# else
1771 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1772 "teq %[uOld], %[uCmp]\n\t"
1773 "teqeq %H[uOld], %H[uCmp]\n\t"
1774 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1775 "bne 1f\n\t" /* stop here if not equal */
1776 "cmp %[rc], #0\n\t"
1777 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1778 "mov %[fXchg], #1\n\t"
1779# endif
1780 "1:\n\t"
1781 : [pMem] "+m" (*pu64)
1782 , [uOld] "=&r" (u64ActualOld)
1783 , [rc] "=&r" (rcSpill)
1784 , [fXchg] "=&r" (fXchg.u)
1785 : [uCmp] "r" (u64Old)
1786 , [uNew] "r" (u64New)
1787 , "[fXchg]" (0)
1788 RTASM_ARM_DMB_SY_COMMA_IN_REG
1789 : "cc");
1790 *pu64Old = u64ActualOld;
1791 return fXchg.f;
1792
1793# else
1794# error "Port me"
1795# endif
1796}
1797#endif
1798
1799
1800/**
1801 * Atomically Compare and exchange a signed 64-bit value, additionally
1802 * passing back old value, ordered.
1803 *
1804 * @returns true if xchg was done.
1805 * @returns false if xchg wasn't done.
1806 *
1807 * @param pi64 Pointer to the 64-bit variable to update.
1808 * @param i64 The 64-bit value to assign to *pu64.
1809 * @param i64Old The value to compare with.
1810 * @param pi64Old Pointer store the old value at.
1811 *
1812 * @remarks x86: Requires a Pentium or later.
1813 */
1814DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
1815{
1816 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1817}
1818
1819/** @def ASMAtomicCmpXchgExHandle
1820 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1821 *
1822 * @param ph Pointer to the value to update.
1823 * @param hNew The new value to assigned to *pu.
1824 * @param hOld The old value to *pu compare with.
1825 * @param fRc Where to store the result.
1826 * @param phOldVal Pointer to where to store the old value.
1827 *
1828 * @remarks This doesn't currently work for all handles (like RTFILE).
1829 */
1830#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1831# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1832 do { \
1833 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1834 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1835 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1836 } while (0)
1837#elif HC_ARCH_BITS == 64
1838# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1839 do { \
1840 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1841 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1842 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1843 } while (0)
1844#else
1845# error HC_ARCH_BITS
1846#endif
1847
1848
1849/** @def ASMAtomicCmpXchgExSize
1850 * Atomically Compare and Exchange a value which size might differ
1851 * between platforms or compilers. Additionally passes back old value.
1852 *
1853 * @param pu Pointer to the value to update.
1854 * @param uNew The new value to assigned to *pu.
1855 * @param uOld The old value to *pu compare with.
1856 * @param fRc Where to store the result.
1857 * @param puOldVal Pointer to where to store the old value.
1858 *
1859 * @remarks x86: Requires a 486 or later.
1860 */
1861#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1862 do { \
1863 switch (sizeof(*(pu))) { \
1864 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1865 break; \
1866 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1867 break; \
1868 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1869 (fRc) = false; \
1870 (uOldVal) = 0; \
1871 break; \
1872 } \
1873 } while (0)
1874
1875
1876/**
1877 * Atomically Compare and Exchange a pointer value, additionally
1878 * passing back old value, ordered.
1879 *
1880 * @returns true if xchg was done.
1881 * @returns false if xchg wasn't done.
1882 *
1883 * @param ppv Pointer to the value to update.
1884 * @param pvNew The new value to assigned to *ppv.
1885 * @param pvOld The old value to *ppv compare with.
1886 * @param ppvOld Pointer store the old value at.
1887 *
1888 * @remarks x86: Requires a 486 or later.
1889 */
1890DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1891 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
1892{
1893#if ARCH_BITS == 32 || ARCH_BITS == 16
1894 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1895#elif ARCH_BITS == 64
1896 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1897#else
1898# error "ARCH_BITS is bogus"
1899#endif
1900}
1901
1902
1903/**
1904 * Atomically Compare and Exchange a pointer value, additionally
1905 * passing back old value, ordered.
1906 *
1907 * @returns true if xchg was done.
1908 * @returns false if xchg wasn't done.
1909 *
1910 * @param ppv Pointer to the value to update.
1911 * @param pvNew The new value to assigned to *ppv.
1912 * @param pvOld The old value to *ppv compare with.
1913 * @param ppvOld Pointer store the old value at.
1914 *
1915 * @remarks This is relatively type safe on GCC platforms.
1916 * @remarks x86: Requires a 486 or later.
1917 */
1918#ifdef __GNUC__
1919# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1920 __extension__ \
1921 ({\
1922 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1923 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1924 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1925 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1926 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1927 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1928 (void **)ppvOldTypeChecked); \
1929 fMacroRet; \
1930 })
1931#else
1932# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1933 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1934#endif
1935
1936
1937/**
1938 * Virtualization unfriendly serializing instruction, always exits.
1939 */
1940#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
1941RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
1942#else
1943DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
1944{
1945# if RT_INLINE_ASM_GNU_STYLE
1946 RTCCUINTREG xAX = 0;
1947# ifdef RT_ARCH_AMD64
1948 __asm__ __volatile__ ("cpuid"
1949 : "=a" (xAX)
1950 : "0" (xAX)
1951 : "rbx", "rcx", "rdx", "memory");
1952# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1953 __asm__ __volatile__ ("push %%ebx\n\t"
1954 "cpuid\n\t"
1955 "pop %%ebx\n\t"
1956 : "=a" (xAX)
1957 : "0" (xAX)
1958 : "ecx", "edx", "memory");
1959# else
1960 __asm__ __volatile__ ("cpuid"
1961 : "=a" (xAX)
1962 : "0" (xAX)
1963 : "ebx", "ecx", "edx", "memory");
1964# endif
1965
1966# elif RT_INLINE_ASM_USES_INTRIN
1967 int aInfo[4];
1968 _ReadWriteBarrier();
1969 __cpuid(aInfo, 0);
1970
1971# else
1972 __asm
1973 {
1974 push ebx
1975 xor eax, eax
1976 cpuid
1977 pop ebx
1978 }
1979# endif
1980}
1981#endif
1982
1983/**
1984 * Virtualization friendly serializing instruction, though more expensive.
1985 */
1986#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
1987RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
1988#else
1989DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
1990{
1991# if RT_INLINE_ASM_GNU_STYLE
1992# ifdef RT_ARCH_AMD64
1993 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1994 "subq $128, %%rsp\n\t" /*redzone*/
1995 "mov %%ss, %%eax\n\t"
1996 "pushq %%rax\n\t"
1997 "pushq %%r10\n\t"
1998 "pushfq\n\t"
1999 "movl %%cs, %%eax\n\t"
2000 "pushq %%rax\n\t"
2001 "leaq 1f(%%rip), %%rax\n\t"
2002 "pushq %%rax\n\t"
2003 "iretq\n\t"
2004 "1:\n\t"
2005 ::: "rax", "r10", "memory", "cc");
2006# else
2007 __asm__ __volatile__ ("pushfl\n\t"
2008 "pushl %%cs\n\t"
2009 "pushl $1f\n\t"
2010 "iretl\n\t"
2011 "1:\n\t"
2012 ::: "memory");
2013# endif
2014
2015# else
2016 __asm
2017 {
2018 pushfd
2019 push cs
2020 push la_ret
2021 iretd
2022 la_ret:
2023 }
2024# endif
2025}
2026#endif
2027
2028/**
2029 * Virtualization friendlier serializing instruction, may still cause exits.
2030 */
2031#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2032RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2033#else
2034DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2035{
2036# if RT_INLINE_ASM_GNU_STYLE
2037 /* rdtscp is not supported by ancient linux build VM of course :-( */
2038# ifdef RT_ARCH_AMD64
2039 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2040 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2041# else
2042 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2043 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2044# endif
2045# else
2046# if RT_INLINE_ASM_USES_INTRIN >= 15
2047 uint32_t uIgnore;
2048 _ReadWriteBarrier();
2049 (void)__rdtscp(&uIgnore);
2050 (void)uIgnore;
2051# else
2052 __asm
2053 {
2054 rdtscp
2055 }
2056# endif
2057# endif
2058}
2059#endif
2060
2061
2062/**
2063 * Serialize Instruction (both data store and instruction flush).
2064 */
2065#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2066# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2067#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2068# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2069#elif defined(RT_ARCH_SPARC64)
2070RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2071#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2072DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2073{
2074 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2075}
2076#else
2077# error "Port me"
2078#endif
2079
2080
2081/**
2082 * Memory fence, waits for any pending writes and reads to complete.
2083 * @note No implicit compiler barrier (which is probably stupid).
2084 */
2085DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2086{
2087#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2088# if RT_INLINE_ASM_GNU_STYLE
2089 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2090# elif RT_INLINE_ASM_USES_INTRIN
2091 _mm_mfence();
2092# else
2093 __asm
2094 {
2095 _emit 0x0f
2096 _emit 0xae
2097 _emit 0xf0
2098 }
2099# endif
2100#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2101 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2102#elif ARCH_BITS == 16
2103 uint16_t volatile u16;
2104 ASMAtomicXchgU16(&u16, 0);
2105#else
2106 uint32_t volatile u32;
2107 ASMAtomicXchgU32(&u32, 0);
2108#endif
2109}
2110
2111
2112/**
2113 * Write fence, waits for any pending writes to complete.
2114 * @note No implicit compiler barrier (which is probably stupid).
2115 */
2116DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2117{
2118#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2119# if RT_INLINE_ASM_GNU_STYLE
2120 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2121# elif RT_INLINE_ASM_USES_INTRIN
2122 _mm_sfence();
2123# else
2124 __asm
2125 {
2126 _emit 0x0f
2127 _emit 0xae
2128 _emit 0xf8
2129 }
2130# endif
2131#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2132 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2133#else
2134 ASMMemoryFence();
2135#endif
2136}
2137
2138
2139/**
2140 * Read fence, waits for any pending reads to complete.
2141 * @note No implicit compiler barrier (which is probably stupid).
2142 */
2143DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2144{
2145#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2146# if RT_INLINE_ASM_GNU_STYLE
2147 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2148# elif RT_INLINE_ASM_USES_INTRIN
2149 _mm_lfence();
2150# else
2151 __asm
2152 {
2153 _emit 0x0f
2154 _emit 0xae
2155 _emit 0xe8
2156 }
2157# endif
2158#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2159 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2160#else
2161 ASMMemoryFence();
2162#endif
2163}
2164
2165
2166/**
2167 * Atomically reads an unsigned 8-bit value, ordered.
2168 *
2169 * @returns Current *pu8 value
2170 * @param pu8 Pointer to the 8-bit variable to read.
2171 */
2172DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2173{
2174#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2175 uint32_t u32;
2176 __asm__ __volatile__(".Lstart_ASMAtomicReadU8_%=:\n\t"
2177 RTASM_ARM_DMB_SY
2178# if defined(RT_ARCH_ARM64)
2179 "ldxrb %w[uDst], %[pMem]\n\t"
2180# else
2181 "ldrexb %[uDst], %[pMem]\n\t"
2182# endif
2183 : [uDst] "=&r" (u32)
2184 : [pMem] "m" (*pu8)
2185 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2186 return (uint8_t)u32;
2187#else
2188 ASMMemoryFence();
2189 return *pu8; /* byte reads are atomic on x86 */
2190#endif
2191}
2192
2193
2194/**
2195 * Atomically reads an unsigned 8-bit value, unordered.
2196 *
2197 * @returns Current *pu8 value
2198 * @param pu8 Pointer to the 8-bit variable to read.
2199 */
2200DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2201{
2202#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2203 uint32_t u32;
2204 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU8_%=:\n\t"
2205# if defined(RT_ARCH_ARM64)
2206 "ldxrb %w[uDst], %[pMem]\n\t"
2207# else
2208 "ldrexb %[uDst], %[pMem]\n\t"
2209# endif
2210 : [uDst] "=&r" (u32)
2211 : [pMem] "m" (*pu8));
2212 return (uint8_t)u32;
2213#else
2214 return *pu8; /* byte reads are atomic on x86 */
2215#endif
2216}
2217
2218
2219/**
2220 * Atomically reads a signed 8-bit value, ordered.
2221 *
2222 * @returns Current *pi8 value
2223 * @param pi8 Pointer to the 8-bit variable to read.
2224 */
2225DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2226{
2227 ASMMemoryFence();
2228#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2229 int32_t i32;
2230 __asm__ __volatile__(".Lstart_ASMAtomicReadS8_%=:\n\t"
2231 RTASM_ARM_DMB_SY
2232# if defined(RT_ARCH_ARM64)
2233 "ldxrb %w[iDst], %[pMem]\n\t"
2234# else
2235 "ldrexb %[iDst], %[pMem]\n\t"
2236# endif
2237 : [iDst] "=&r" (i32)
2238 : [pMem] "m" (*pi8)
2239 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2240 return (int8_t)i32;
2241#else
2242 return *pi8; /* byte reads are atomic on x86 */
2243#endif
2244}
2245
2246
2247/**
2248 * Atomically reads a signed 8-bit value, unordered.
2249 *
2250 * @returns Current *pi8 value
2251 * @param pi8 Pointer to the 8-bit variable to read.
2252 */
2253DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2254{
2255#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2256 int32_t i32;
2257 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS8_%=:\n\t"
2258# if defined(RT_ARCH_ARM64)
2259 "ldxrb %w[iDst], %[pMem]\n\t"
2260# else
2261 "ldrexb %[iDst], %[pMem]\n\t"
2262# endif
2263 : [iDst] "=&r" (i32)
2264 : [pMem] "m" (*pi8));
2265 return (int8_t)i32;
2266#else
2267 return *pi8; /* byte reads are atomic on x86 */
2268#endif
2269}
2270
2271
2272/**
2273 * Atomically reads an unsigned 16-bit value, ordered.
2274 *
2275 * @returns Current *pu16 value
2276 * @param pu16 Pointer to the 16-bit variable to read.
2277 */
2278DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2279{
2280 Assert(!((uintptr_t)pu16 & 1));
2281#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2282 uint32_t u32;
2283 __asm__ __volatile__(".Lstart_ASMAtomicReadU16_%=:\n\t"
2284 RTASM_ARM_DMB_SY
2285# if defined(RT_ARCH_ARM64)
2286 "ldxrh %w[uDst], %[pMem]\n\t"
2287# else
2288 "ldrexh %[uDst], %[pMem]\n\t"
2289# endif
2290 : [uDst] "=&r" (u32)
2291 : [pMem] "m" (*pu16)
2292 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2293 return (uint16_t)u32;
2294#else
2295 ASMMemoryFence();
2296 return *pu16;
2297#endif
2298}
2299
2300
2301/**
2302 * Atomically reads an unsigned 16-bit value, unordered.
2303 *
2304 * @returns Current *pu16 value
2305 * @param pu16 Pointer to the 16-bit variable to read.
2306 */
2307DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2308{
2309 Assert(!((uintptr_t)pu16 & 1));
2310#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2311 uint32_t u32;
2312 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU16_%=:\n\t"
2313# if defined(RT_ARCH_ARM64)
2314 "ldxrh %w[uDst], %[pMem]\n\t"
2315# else
2316 "ldrexh %[uDst], %[pMem]\n\t"
2317# endif
2318 : [uDst] "=&r" (u32)
2319 : [pMem] "m" (*pu16));
2320 return (uint16_t)u32;
2321#else
2322 return *pu16;
2323#endif
2324}
2325
2326
2327/**
2328 * Atomically reads a signed 16-bit value, ordered.
2329 *
2330 * @returns Current *pi16 value
2331 * @param pi16 Pointer to the 16-bit variable to read.
2332 */
2333DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2334{
2335 Assert(!((uintptr_t)pi16 & 1));
2336#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2337 int32_t i32;
2338 __asm__ __volatile__(".Lstart_ASMAtomicReadS16_%=:\n\t"
2339 RTASM_ARM_DMB_SY
2340# if defined(RT_ARCH_ARM64)
2341 "ldxrh %w[iDst], %[pMem]\n\t"
2342# else
2343 "ldrexh %[iDst], %[pMem]\n\t"
2344# endif
2345 : [iDst] "=&r" (i32)
2346 : [pMem] "m" (*pi16)
2347 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2348 return (int16_t)i32;
2349#else
2350 ASMMemoryFence();
2351 return *pi16;
2352#endif
2353}
2354
2355
2356/**
2357 * Atomically reads a signed 16-bit value, unordered.
2358 *
2359 * @returns Current *pi16 value
2360 * @param pi16 Pointer to the 16-bit variable to read.
2361 */
2362DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2363{
2364 Assert(!((uintptr_t)pi16 & 1));
2365#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2366 int32_t i32;
2367 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS16_%=:\n\t"
2368# if defined(RT_ARCH_ARM64)
2369 "ldxrh %w[iDst], %[pMem]\n\t"
2370# else
2371 "ldrexh %[iDst], %[pMem]\n\t"
2372# endif
2373 : [iDst] "=&r" (i32)
2374 : [pMem] "m" (*pi16));
2375 return (int16_t)i32;
2376#else
2377 return *pi16;
2378#endif
2379}
2380
2381
2382/**
2383 * Atomically reads an unsigned 32-bit value, ordered.
2384 *
2385 * @returns Current *pu32 value
2386 * @param pu32 Pointer to the 32-bit variable to read.
2387 */
2388DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2389{
2390 Assert(!((uintptr_t)pu32 & 3));
2391#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2392 uint32_t u32;
2393 __asm__ __volatile__(".Lstart_ASMAtomicReadU32_%=:\n\t"
2394 RTASM_ARM_DMB_SY
2395# if defined(RT_ARCH_ARM64)
2396 "ldxr %w[uDst], %[pMem]\n\t"
2397# else
2398 "ldrex %[uDst], %[pMem]\n\t"
2399# endif
2400 : [uDst] "=&r" (u32)
2401 : [pMem] "m" (*pu32)
2402 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2403 return u32;
2404#else
2405 ASMMemoryFence();
2406# if ARCH_BITS == 16
2407 AssertFailed(); /** @todo 16-bit */
2408# endif
2409 return *pu32;
2410#endif
2411}
2412
2413
2414/**
2415 * Atomically reads an unsigned 32-bit value, unordered.
2416 *
2417 * @returns Current *pu32 value
2418 * @param pu32 Pointer to the 32-bit variable to read.
2419 */
2420DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2421{
2422 Assert(!((uintptr_t)pu32 & 3));
2423#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2424 uint32_t u32;
2425 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU32_%=:\n\t"
2426# if defined(RT_ARCH_ARM64)
2427 "ldxr %w[uDst], %[pMem]\n\t"
2428# else
2429 "ldrex %[uDst], %[pMem]\n\t"
2430# endif
2431 : [uDst] "=&r" (u32)
2432 : [pMem] "m" (*pu32));
2433 return u32;
2434#else
2435# if ARCH_BITS == 16
2436 AssertFailed(); /** @todo 16-bit */
2437# endif
2438 return *pu32;
2439#endif
2440}
2441
2442
2443/**
2444 * Atomically reads a signed 32-bit value, ordered.
2445 *
2446 * @returns Current *pi32 value
2447 * @param pi32 Pointer to the 32-bit variable to read.
2448 */
2449DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2450{
2451 Assert(!((uintptr_t)pi32 & 3));
2452#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2453 int32_t i32;
2454 __asm__ __volatile__(".Lstart_ASMAtomicReadS32_%=:\n\t"
2455 RTASM_ARM_DMB_SY
2456# if defined(RT_ARCH_ARM64)
2457 "ldxr %w[iDst], %[pMem]\n\t"
2458# else
2459 "ldrex %[iDst], %[pMem]\n\t"
2460# endif
2461 : [iDst] "=&r" (i32)
2462 : [pMem] "m" (*pi32)
2463 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2464 return i32;
2465#else
2466 ASMMemoryFence();
2467# if ARCH_BITS == 16
2468 AssertFailed(); /** @todo 16-bit */
2469# endif
2470 return *pi32;
2471#endif
2472}
2473
2474
2475/**
2476 * Atomically reads a signed 32-bit value, unordered.
2477 *
2478 * @returns Current *pi32 value
2479 * @param pi32 Pointer to the 32-bit variable to read.
2480 */
2481DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2482{
2483 Assert(!((uintptr_t)pi32 & 3));
2484#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2485 Assert(!((uintptr_t)pi32 & 7));
2486 int32_t i32;
2487 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS32_%=:\n\t"
2488# if defined(RT_ARCH_ARM64)
2489 "ldxr %w[iDst], %[pMem]\n\t"
2490# else
2491 "ldrex %[iDst], %[pMem]\n\t"
2492# endif
2493 : [iDst] "=&r" (i32)
2494 : [pMem] "m" (*pi32));
2495 return i32;
2496
2497#else
2498# if ARCH_BITS == 16
2499 AssertFailed(); /** @todo 16-bit */
2500# endif
2501 return *pi32;
2502#endif
2503}
2504
2505
2506/**
2507 * Atomically reads an unsigned 64-bit value, ordered.
2508 *
2509 * @returns Current *pu64 value
2510 * @param pu64 Pointer to the 64-bit variable to read.
2511 * The memory pointed to must be writable.
2512 *
2513 * @remarks This may fault if the memory is read-only!
2514 * @remarks x86: Requires a Pentium or later.
2515 */
2516#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
2517 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2518RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2519#else
2520DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2521{
2522 uint64_t u64;
2523# ifdef RT_ARCH_AMD64
2524 Assert(!((uintptr_t)pu64 & 7));
2525/*# if RT_INLINE_ASM_GNU_STYLE
2526 __asm__ __volatile__( "mfence\n\t"
2527 "movq %1, %0\n\t"
2528 : "=r" (u64)
2529 : "m" (*pu64));
2530# else
2531 __asm
2532 {
2533 mfence
2534 mov rdx, [pu64]
2535 mov rax, [rdx]
2536 mov [u64], rax
2537 }
2538# endif*/
2539 ASMMemoryFence();
2540 u64 = *pu64;
2541
2542# elif defined(RT_ARCH_X86)
2543# if RT_INLINE_ASM_GNU_STYLE
2544# if defined(PIC) || defined(__PIC__)
2545 uint32_t u32EBX = 0;
2546 Assert(!((uintptr_t)pu64 & 7));
2547 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2548 "lock; cmpxchg8b (%5)\n\t"
2549 "movl %3, %%ebx\n\t"
2550 : "=A" (u64)
2551# if RT_GNUC_PREREQ(4, 3)
2552 , "+m" (*pu64)
2553# else
2554 , "=m" (*pu64)
2555# endif
2556 : "0" (0ULL)
2557 , "m" (u32EBX)
2558 , "c" (0)
2559 , "S" (pu64)
2560 : "cc");
2561# else /* !PIC */
2562 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2563 : "=A" (u64)
2564 , "+m" (*pu64)
2565 : "0" (0ULL)
2566 , "b" (0)
2567 , "c" (0)
2568 : "cc");
2569# endif
2570# else
2571 Assert(!((uintptr_t)pu64 & 7));
2572 __asm
2573 {
2574 xor eax, eax
2575 xor edx, edx
2576 mov edi, pu64
2577 xor ecx, ecx
2578 xor ebx, ebx
2579 lock cmpxchg8b [edi]
2580 mov dword ptr [u64], eax
2581 mov dword ptr [u64 + 4], edx
2582 }
2583# endif
2584
2585# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2586 Assert(!((uintptr_t)pu64 & 7));
2587 __asm__ __volatile__(".Lstart_ASMAtomicReadU64_%=:\n\t"
2588 RTASM_ARM_DMB_SY
2589# if defined(RT_ARCH_ARM64)
2590 "ldxr %[uDst], %[pMem]\n\t"
2591# else
2592 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2593# endif
2594 : [uDst] "=&r" (u64)
2595 : [pMem] "m" (*pu64)
2596 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2597
2598# else
2599# error "Port me"
2600# endif
2601 return u64;
2602}
2603#endif
2604
2605
2606/**
2607 * Atomically reads an unsigned 64-bit value, unordered.
2608 *
2609 * @returns Current *pu64 value
2610 * @param pu64 Pointer to the 64-bit variable to read.
2611 * The memory pointed to must be writable.
2612 *
2613 * @remarks This may fault if the memory is read-only!
2614 * @remarks x86: Requires a Pentium or later.
2615 */
2616#if !defined(RT_ARCH_AMD64) \
2617 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2618 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
2619RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2620#else
2621DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2622{
2623 uint64_t u64;
2624# ifdef RT_ARCH_AMD64
2625 Assert(!((uintptr_t)pu64 & 7));
2626/*# if RT_INLINE_ASM_GNU_STYLE
2627 Assert(!((uintptr_t)pu64 & 7));
2628 __asm__ __volatile__("movq %1, %0\n\t"
2629 : "=r" (u64)
2630 : "m" (*pu64));
2631# else
2632 __asm
2633 {
2634 mov rdx, [pu64]
2635 mov rax, [rdx]
2636 mov [u64], rax
2637 }
2638# endif */
2639 u64 = *pu64;
2640
2641# elif defined(RT_ARCH_X86)
2642# if RT_INLINE_ASM_GNU_STYLE
2643# if defined(PIC) || defined(__PIC__)
2644 uint32_t u32EBX = 0;
2645 uint32_t u32Spill;
2646 Assert(!((uintptr_t)pu64 & 7));
2647 __asm__ __volatile__("xor %%eax,%%eax\n\t"
2648 "xor %%ecx,%%ecx\n\t"
2649 "xor %%edx,%%edx\n\t"
2650 "xchgl %%ebx, %3\n\t"
2651 "lock; cmpxchg8b (%4)\n\t"
2652 "movl %3, %%ebx\n\t"
2653 : "=A" (u64)
2654# if RT_GNUC_PREREQ(4, 3)
2655 , "+m" (*pu64)
2656# else
2657 , "=m" (*pu64)
2658# endif
2659 , "=c" (u32Spill)
2660 : "m" (u32EBX)
2661 , "S" (pu64)
2662 : "cc");
2663# else /* !PIC */
2664 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2665 : "=A" (u64)
2666 , "+m" (*pu64)
2667 : "0" (0ULL)
2668 , "b" (0)
2669 , "c" (0)
2670 : "cc");
2671# endif
2672# else
2673 Assert(!((uintptr_t)pu64 & 7));
2674 __asm
2675 {
2676 xor eax, eax
2677 xor edx, edx
2678 mov edi, pu64
2679 xor ecx, ecx
2680 xor ebx, ebx
2681 lock cmpxchg8b [edi]
2682 mov dword ptr [u64], eax
2683 mov dword ptr [u64 + 4], edx
2684 }
2685# endif
2686
2687# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2688 Assert(!((uintptr_t)pu64 & 7));
2689 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU64_%=:\n\t"
2690# if defined(RT_ARCH_ARM64)
2691 "ldxr %[uDst], %[pMem]\n\t"
2692# else
2693 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2694# endif
2695 : [uDst] "=&r" (u64)
2696 : [pMem] "m" (*pu64));
2697
2698# else
2699# error "Port me"
2700# endif
2701 return u64;
2702}
2703#endif
2704
2705
2706/**
2707 * Atomically reads a signed 64-bit value, ordered.
2708 *
2709 * @returns Current *pi64 value
2710 * @param pi64 Pointer to the 64-bit variable to read.
2711 * The memory pointed to must be writable.
2712 *
2713 * @remarks This may fault if the memory is read-only!
2714 * @remarks x86: Requires a Pentium or later.
2715 */
2716DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2717{
2718 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
2719}
2720
2721
2722/**
2723 * Atomically reads a signed 64-bit value, unordered.
2724 *
2725 * @returns Current *pi64 value
2726 * @param pi64 Pointer to the 64-bit variable to read.
2727 * The memory pointed to must be writable.
2728 *
2729 * @remarks This will fault if the memory is read-only!
2730 * @remarks x86: Requires a Pentium or later.
2731 */
2732DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2733{
2734 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
2735}
2736
2737
2738/**
2739 * Atomically reads a size_t value, ordered.
2740 *
2741 * @returns Current *pcb value
2742 * @param pcb Pointer to the size_t variable to read.
2743 */
2744DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2745{
2746#if ARCH_BITS == 64
2747 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
2748#elif ARCH_BITS == 32
2749 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
2750#elif ARCH_BITS == 16
2751 AssertCompileSize(size_t, 2);
2752 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
2753#else
2754# error "Unsupported ARCH_BITS value"
2755#endif
2756}
2757
2758
2759/**
2760 * Atomically reads a size_t value, unordered.
2761 *
2762 * @returns Current *pcb value
2763 * @param pcb Pointer to the size_t variable to read.
2764 */
2765DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2766{
2767#if ARCH_BITS == 64 || ARCH_BITS == 16
2768 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2769#elif ARCH_BITS == 32
2770 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2771#elif ARCH_BITS == 16
2772 AssertCompileSize(size_t, 2);
2773 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2774#else
2775# error "Unsupported ARCH_BITS value"
2776#endif
2777}
2778
2779
2780/**
2781 * Atomically reads a pointer value, ordered.
2782 *
2783 * @returns Current *pv value
2784 * @param ppv Pointer to the pointer variable to read.
2785 *
2786 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2787 * requires less typing (no casts).
2788 */
2789DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2790{
2791#if ARCH_BITS == 32 || ARCH_BITS == 16
2792 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2793#elif ARCH_BITS == 64
2794 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2795#else
2796# error "ARCH_BITS is bogus"
2797#endif
2798}
2799
2800/**
2801 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2802 *
2803 * @returns Current *pv value
2804 * @param ppv Pointer to the pointer variable to read.
2805 * @param Type The type of *ppv, sans volatile.
2806 */
2807#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2808# define ASMAtomicReadPtrT(ppv, Type) \
2809 __extension__ \
2810 ({\
2811 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2812 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2813 pvTypeChecked; \
2814 })
2815#else
2816# define ASMAtomicReadPtrT(ppv, Type) \
2817 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2818#endif
2819
2820
2821/**
2822 * Atomically reads a pointer value, unordered.
2823 *
2824 * @returns Current *pv value
2825 * @param ppv Pointer to the pointer variable to read.
2826 *
2827 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2828 * requires less typing (no casts).
2829 */
2830DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2831{
2832#if ARCH_BITS == 32 || ARCH_BITS == 16
2833 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2834#elif ARCH_BITS == 64
2835 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2836#else
2837# error "ARCH_BITS is bogus"
2838#endif
2839}
2840
2841
2842/**
2843 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2844 *
2845 * @returns Current *pv value
2846 * @param ppv Pointer to the pointer variable to read.
2847 * @param Type The type of *ppv, sans volatile.
2848 */
2849#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2850# define ASMAtomicUoReadPtrT(ppv, Type) \
2851 __extension__ \
2852 ({\
2853 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2854 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2855 pvTypeChecked; \
2856 })
2857#else
2858# define ASMAtomicUoReadPtrT(ppv, Type) \
2859 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2860#endif
2861
2862
2863/**
2864 * Atomically reads a boolean value, ordered.
2865 *
2866 * @returns Current *pf value
2867 * @param pf Pointer to the boolean variable to read.
2868 */
2869DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2870{
2871 ASMMemoryFence();
2872 return *pf; /* byte reads are atomic on x86 */
2873}
2874
2875
2876/**
2877 * Atomically reads a boolean value, unordered.
2878 *
2879 * @returns Current *pf value
2880 * @param pf Pointer to the boolean variable to read.
2881 */
2882DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2883{
2884 return *pf; /* byte reads are atomic on x86 */
2885}
2886
2887
2888/**
2889 * Atomically read a typical IPRT handle value, ordered.
2890 *
2891 * @param ph Pointer to the handle variable to read.
2892 * @param phRes Where to store the result.
2893 *
2894 * @remarks This doesn't currently work for all handles (like RTFILE).
2895 */
2896#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2897# define ASMAtomicReadHandle(ph, phRes) \
2898 do { \
2899 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2900 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2901 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2902 } while (0)
2903#elif HC_ARCH_BITS == 64
2904# define ASMAtomicReadHandle(ph, phRes) \
2905 do { \
2906 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2907 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2908 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2909 } while (0)
2910#else
2911# error HC_ARCH_BITS
2912#endif
2913
2914
2915/**
2916 * Atomically read a typical IPRT handle value, unordered.
2917 *
2918 * @param ph Pointer to the handle variable to read.
2919 * @param phRes Where to store the result.
2920 *
2921 * @remarks This doesn't currently work for all handles (like RTFILE).
2922 */
2923#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2924# define ASMAtomicUoReadHandle(ph, phRes) \
2925 do { \
2926 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2927 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2928 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2929 } while (0)
2930#elif HC_ARCH_BITS == 64
2931# define ASMAtomicUoReadHandle(ph, phRes) \
2932 do { \
2933 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2934 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2935 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2936 } while (0)
2937#else
2938# error HC_ARCH_BITS
2939#endif
2940
2941
2942/**
2943 * Atomically read a value which size might differ
2944 * between platforms or compilers, ordered.
2945 *
2946 * @param pu Pointer to the variable to read.
2947 * @param puRes Where to store the result.
2948 */
2949#define ASMAtomicReadSize(pu, puRes) \
2950 do { \
2951 switch (sizeof(*(pu))) { \
2952 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2953 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2954 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2955 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2956 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2957 } \
2958 } while (0)
2959
2960
2961/**
2962 * Atomically read a value which size might differ
2963 * between platforms or compilers, unordered.
2964 *
2965 * @param pu Pointer to the variable to read.
2966 * @param puRes Where to store the result.
2967 */
2968#define ASMAtomicUoReadSize(pu, puRes) \
2969 do { \
2970 switch (sizeof(*(pu))) { \
2971 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2972 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2973 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2974 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2975 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2976 } \
2977 } while (0)
2978
2979
2980/**
2981 * Atomically writes an unsigned 8-bit value, ordered.
2982 *
2983 * @param pu8 Pointer to the 8-bit variable.
2984 * @param u8 The 8-bit value to assign to *pu8.
2985 */
2986DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
2987{
2988 /** @todo Any possible ARM32/ARM64 optimizations here? */
2989 ASMAtomicXchgU8(pu8, u8);
2990}
2991
2992
2993/**
2994 * Atomically writes an unsigned 8-bit value, unordered.
2995 *
2996 * @param pu8 Pointer to the 8-bit variable.
2997 * @param u8 The 8-bit value to assign to *pu8.
2998 */
2999DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3000{
3001 /** @todo Any possible ARM32/ARM64 improvements here? */
3002 *pu8 = u8; /* byte writes are atomic on x86 */
3003}
3004
3005
3006/**
3007 * Atomically writes a signed 8-bit value, ordered.
3008 *
3009 * @param pi8 Pointer to the 8-bit variable to read.
3010 * @param i8 The 8-bit value to assign to *pi8.
3011 */
3012DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3013{
3014 /** @todo Any possible ARM32/ARM64 optimizations here? */
3015 ASMAtomicXchgS8(pi8, i8);
3016}
3017
3018
3019/**
3020 * Atomically writes a signed 8-bit value, unordered.
3021 *
3022 * @param pi8 Pointer to the 8-bit variable to write.
3023 * @param i8 The 8-bit value to assign to *pi8.
3024 */
3025DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3026{
3027 *pi8 = i8; /* byte writes are atomic on x86 */
3028}
3029
3030
3031/**
3032 * Atomically writes an unsigned 16-bit value, ordered.
3033 *
3034 * @param pu16 Pointer to the 16-bit variable to write.
3035 * @param u16 The 16-bit value to assign to *pu16.
3036 */
3037DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3038{
3039 /** @todo Any possible ARM32/ARM64 optimizations here? */
3040 ASMAtomicXchgU16(pu16, u16);
3041}
3042
3043
3044/**
3045 * Atomically writes an unsigned 16-bit value, unordered.
3046 *
3047 * @param pu16 Pointer to the 16-bit variable to write.
3048 * @param u16 The 16-bit value to assign to *pu16.
3049 */
3050DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3051{
3052 Assert(!((uintptr_t)pu16 & 1));
3053 *pu16 = u16;
3054}
3055
3056
3057/**
3058 * Atomically writes a signed 16-bit value, ordered.
3059 *
3060 * @param pi16 Pointer to the 16-bit variable to write.
3061 * @param i16 The 16-bit value to assign to *pi16.
3062 */
3063DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3064{
3065 /** @todo Any possible ARM32/ARM64 optimizations here? */
3066 ASMAtomicXchgS16(pi16, i16);
3067}
3068
3069
3070/**
3071 * Atomically writes a signed 16-bit value, unordered.
3072 *
3073 * @param pi16 Pointer to the 16-bit variable to write.
3074 * @param i16 The 16-bit value to assign to *pi16.
3075 */
3076DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3077{
3078 Assert(!((uintptr_t)pi16 & 1));
3079 *pi16 = i16;
3080}
3081
3082
3083/**
3084 * Atomically writes an unsigned 32-bit value, ordered.
3085 *
3086 * @param pu32 Pointer to the 32-bit variable to write.
3087 * @param u32 The 32-bit value to assign to *pu32.
3088 */
3089DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3090{
3091 /** @todo Any possible ARM32/ARM64 optimizations here? */
3092 ASMAtomicXchgU32(pu32, u32);
3093}
3094
3095
3096/**
3097 * Atomically writes an unsigned 32-bit value, unordered.
3098 *
3099 * @param pu32 Pointer to the 32-bit variable to write.
3100 * @param u32 The 32-bit value to assign to *pu32.
3101 */
3102DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3103{
3104 Assert(!((uintptr_t)pu32 & 3));
3105#if ARCH_BITS >= 32
3106 *pu32 = u32;
3107#else
3108 ASMAtomicXchgU32(pu32, u32);
3109#endif
3110}
3111
3112
3113/**
3114 * Atomically writes a signed 32-bit value, ordered.
3115 *
3116 * @param pi32 Pointer to the 32-bit variable to write.
3117 * @param i32 The 32-bit value to assign to *pi32.
3118 */
3119DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3120{
3121 ASMAtomicXchgS32(pi32, i32);
3122}
3123
3124
3125/**
3126 * Atomically writes a signed 32-bit value, unordered.
3127 *
3128 * @param pi32 Pointer to the 32-bit variable to write.
3129 * @param i32 The 32-bit value to assign to *pi32.
3130 */
3131DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3132{
3133 Assert(!((uintptr_t)pi32 & 3));
3134#if ARCH_BITS >= 32
3135 *pi32 = i32;
3136#else
3137 ASMAtomicXchgS32(pi32, i32);
3138#endif
3139}
3140
3141
3142/**
3143 * Atomically writes an unsigned 64-bit value, ordered.
3144 *
3145 * @param pu64 Pointer to the 64-bit variable to write.
3146 * @param u64 The 64-bit value to assign to *pu64.
3147 */
3148DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3149{
3150 /** @todo Any possible ARM32/ARM64 optimizations here? */
3151 ASMAtomicXchgU64(pu64, u64);
3152}
3153
3154
3155/**
3156 * Atomically writes an unsigned 64-bit value, unordered.
3157 *
3158 * @param pu64 Pointer to the 64-bit variable to write.
3159 * @param u64 The 64-bit value to assign to *pu64.
3160 */
3161DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3162{
3163 Assert(!((uintptr_t)pu64 & 7));
3164#if ARCH_BITS == 64
3165 *pu64 = u64;
3166#else
3167 ASMAtomicXchgU64(pu64, u64);
3168#endif
3169}
3170
3171
3172/**
3173 * Atomically writes a signed 64-bit value, ordered.
3174 *
3175 * @param pi64 Pointer to the 64-bit variable to write.
3176 * @param i64 The 64-bit value to assign to *pi64.
3177 */
3178DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3179{
3180 /** @todo Any possible ARM32/ARM64 optimizations here? */
3181 ASMAtomicXchgS64(pi64, i64);
3182}
3183
3184
3185/**
3186 * Atomically writes a signed 64-bit value, unordered.
3187 *
3188 * @param pi64 Pointer to the 64-bit variable to write.
3189 * @param i64 The 64-bit value to assign to *pi64.
3190 */
3191DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3192{
3193 Assert(!((uintptr_t)pi64 & 7));
3194#if ARCH_BITS == 64
3195 *pi64 = i64;
3196#else
3197 ASMAtomicXchgS64(pi64, i64);
3198#endif
3199}
3200
3201
3202/**
3203 * Atomically writes a size_t value, ordered.
3204 *
3205 * @returns nothing.
3206 * @param pcb Pointer to the size_t variable to write.
3207 * @param cb The value to assign to *pcb.
3208 */
3209DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3210{
3211#if ARCH_BITS == 64
3212 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3213#elif ARCH_BITS == 32
3214 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3215#elif ARCH_BITS == 16
3216 AssertCompileSize(size_t, 2);
3217 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3218#else
3219# error "Unsupported ARCH_BITS value"
3220#endif
3221}
3222
3223
3224/**
3225 * Atomically writes a boolean value, unordered.
3226 *
3227 * @param pf Pointer to the boolean variable to write.
3228 * @param f The boolean value to assign to *pf.
3229 */
3230DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3231{
3232 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3233}
3234
3235
3236/**
3237 * Atomically writes a boolean value, unordered.
3238 *
3239 * @param pf Pointer to the boolean variable to write.
3240 * @param f The boolean value to assign to *pf.
3241 */
3242DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3243{
3244 *pf = f; /* byte writes are atomic on x86 */
3245}
3246
3247
3248/**
3249 * Atomically writes a pointer value, ordered.
3250 *
3251 * @param ppv Pointer to the pointer variable to write.
3252 * @param pv The pointer value to assign to *ppv.
3253 */
3254DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3255{
3256#if ARCH_BITS == 32 || ARCH_BITS == 16
3257 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3258#elif ARCH_BITS == 64
3259 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3260#else
3261# error "ARCH_BITS is bogus"
3262#endif
3263}
3264
3265
3266/**
3267 * Atomically writes a pointer value, ordered.
3268 *
3269 * @param ppv Pointer to the pointer variable to write.
3270 * @param pv The pointer value to assign to *ppv. If NULL use
3271 * ASMAtomicWriteNullPtr or you'll land in trouble.
3272 *
3273 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3274 * NULL.
3275 */
3276#ifdef __GNUC__
3277# define ASMAtomicWritePtr(ppv, pv) \
3278 do \
3279 { \
3280 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3281 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3282 \
3283 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3284 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3285 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3286 \
3287 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3288 } while (0)
3289#else
3290# define ASMAtomicWritePtr(ppv, pv) \
3291 do \
3292 { \
3293 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3294 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3295 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3296 \
3297 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3298 } while (0)
3299#endif
3300
3301
3302/**
3303 * Atomically sets a pointer to NULL, ordered.
3304 *
3305 * @param ppv Pointer to the pointer variable that should be set to NULL.
3306 *
3307 * @remarks This is relatively type safe on GCC platforms.
3308 */
3309#if RT_GNUC_PREREQ(4, 2)
3310# define ASMAtomicWriteNullPtr(ppv) \
3311 do \
3312 { \
3313 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3314 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3315 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3316 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3317 } while (0)
3318#else
3319# define ASMAtomicWriteNullPtr(ppv) \
3320 do \
3321 { \
3322 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3323 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3324 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3325 } while (0)
3326#endif
3327
3328
3329/**
3330 * Atomically writes a pointer value, unordered.
3331 *
3332 * @returns Current *pv value
3333 * @param ppv Pointer to the pointer variable.
3334 * @param pv The pointer value to assign to *ppv. If NULL use
3335 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3336 *
3337 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3338 * NULL.
3339 */
3340#if RT_GNUC_PREREQ(4, 2)
3341# define ASMAtomicUoWritePtr(ppv, pv) \
3342 do \
3343 { \
3344 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3345 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3346 \
3347 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3348 AssertCompile(sizeof(pv) == sizeof(void *)); \
3349 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3350 \
3351 *(ppvTypeChecked) = pvTypeChecked; \
3352 } while (0)
3353#else
3354# define ASMAtomicUoWritePtr(ppv, pv) \
3355 do \
3356 { \
3357 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3358 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3359 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3360 *(ppv) = pv; \
3361 } while (0)
3362#endif
3363
3364
3365/**
3366 * Atomically sets a pointer to NULL, unordered.
3367 *
3368 * @param ppv Pointer to the pointer variable that should be set to NULL.
3369 *
3370 * @remarks This is relatively type safe on GCC platforms.
3371 */
3372#ifdef __GNUC__
3373# define ASMAtomicUoWriteNullPtr(ppv) \
3374 do \
3375 { \
3376 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3377 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3378 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3379 *(ppvTypeChecked) = NULL; \
3380 } while (0)
3381#else
3382# define ASMAtomicUoWriteNullPtr(ppv) \
3383 do \
3384 { \
3385 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3386 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3387 *(ppv) = NULL; \
3388 } while (0)
3389#endif
3390
3391
3392/**
3393 * Atomically write a typical IPRT handle value, ordered.
3394 *
3395 * @param ph Pointer to the variable to update.
3396 * @param hNew The value to assign to *ph.
3397 *
3398 * @remarks This doesn't currently work for all handles (like RTFILE).
3399 */
3400#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3401# define ASMAtomicWriteHandle(ph, hNew) \
3402 do { \
3403 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3404 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3405 } while (0)
3406#elif HC_ARCH_BITS == 64
3407# define ASMAtomicWriteHandle(ph, hNew) \
3408 do { \
3409 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3410 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3411 } while (0)
3412#else
3413# error HC_ARCH_BITS
3414#endif
3415
3416
3417/**
3418 * Atomically write a typical IPRT handle value, unordered.
3419 *
3420 * @param ph Pointer to the variable to update.
3421 * @param hNew The value to assign to *ph.
3422 *
3423 * @remarks This doesn't currently work for all handles (like RTFILE).
3424 */
3425#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3426# define ASMAtomicUoWriteHandle(ph, hNew) \
3427 do { \
3428 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3429 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3430 } while (0)
3431#elif HC_ARCH_BITS == 64
3432# define ASMAtomicUoWriteHandle(ph, hNew) \
3433 do { \
3434 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3435 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
3436 } while (0)
3437#else
3438# error HC_ARCH_BITS
3439#endif
3440
3441
3442/**
3443 * Atomically write a value which size might differ
3444 * between platforms or compilers, ordered.
3445 *
3446 * @param pu Pointer to the variable to update.
3447 * @param uNew The value to assign to *pu.
3448 */
3449#define ASMAtomicWriteSize(pu, uNew) \
3450 do { \
3451 switch (sizeof(*(pu))) { \
3452 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3453 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3454 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3455 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3456 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3457 } \
3458 } while (0)
3459
3460/**
3461 * Atomically write a value which size might differ
3462 * between platforms or compilers, unordered.
3463 *
3464 * @param pu Pointer to the variable to update.
3465 * @param uNew The value to assign to *pu.
3466 */
3467#define ASMAtomicUoWriteSize(pu, uNew) \
3468 do { \
3469 switch (sizeof(*(pu))) { \
3470 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3471 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3472 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3473 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3474 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3475 } \
3476 } while (0)
3477
3478
3479
3480/**
3481 * Atomically exchanges and adds to a 16-bit value, ordered.
3482 *
3483 * @returns The old value.
3484 * @param pu16 Pointer to the value.
3485 * @param u16 Number to add.
3486 *
3487 * @remarks Currently not implemented, just to make 16-bit code happy.
3488 * @remarks x86: Requires a 486 or later.
3489 */
3490RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
3491
3492
3493/**
3494 * Atomically exchanges and adds to a 32-bit value, ordered.
3495 *
3496 * @returns The old value.
3497 * @param pu32 Pointer to the value.
3498 * @param u32 Number to add.
3499 *
3500 * @remarks x86: Requires a 486 or later.
3501 */
3502#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3503RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3504#else
3505DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3506{
3507# if RT_INLINE_ASM_USES_INTRIN
3508 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
3509 return u32;
3510
3511# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3512# if RT_INLINE_ASM_GNU_STYLE
3513 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3514 : "=r" (u32)
3515 , "=m" (*pu32)
3516 : "0" (u32)
3517 , "m" (*pu32)
3518 : "memory"
3519 , "cc");
3520 return u32;
3521# else
3522 __asm
3523 {
3524 mov eax, [u32]
3525# ifdef RT_ARCH_AMD64
3526 mov rdx, [pu32]
3527 lock xadd [rdx], eax
3528# else
3529 mov edx, [pu32]
3530 lock xadd [edx], eax
3531# endif
3532 mov [u32], eax
3533 }
3534 return u32;
3535# endif
3536
3537# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3538 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
3539 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
3540 "add %[uNew], %[uOld], %[uVal]\n\t",
3541 [uVal] "r" (u32));
3542 return u32OldRet;
3543
3544# else
3545# error "Port me"
3546# endif
3547}
3548#endif
3549
3550
3551/**
3552 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3553 *
3554 * @returns The old value.
3555 * @param pi32 Pointer to the value.
3556 * @param i32 Number to add.
3557 *
3558 * @remarks x86: Requires a 486 or later.
3559 */
3560DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3561{
3562 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3563}
3564
3565
3566/**
3567 * Atomically exchanges and adds to a 64-bit value, ordered.
3568 *
3569 * @returns The old value.
3570 * @param pu64 Pointer to the value.
3571 * @param u64 Number to add.
3572 *
3573 * @remarks x86: Requires a Pentium or later.
3574 */
3575#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3576DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3577#else
3578DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3579{
3580# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3581 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
3582 return u64;
3583
3584# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3585 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3586 : "=r" (u64)
3587 , "=m" (*pu64)
3588 : "0" (u64)
3589 , "m" (*pu64)
3590 : "memory"
3591 , "cc");
3592 return u64;
3593
3594# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3595 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
3596 "add %[uNew], %[uOld], %[uVal]\n\t"
3597 ,
3598 "add %[uNew], %[uOld], %[uVal]\n\t"
3599 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
3600 [uVal] "r" (u64));
3601 return u64OldRet;
3602
3603# else
3604 uint64_t u64Old;
3605 for (;;)
3606 {
3607 uint64_t u64New;
3608 u64Old = ASMAtomicUoReadU64(pu64);
3609 u64New = u64Old + u64;
3610 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3611 break;
3612 ASMNopPause();
3613 }
3614 return u64Old;
3615# endif
3616}
3617#endif
3618
3619
3620/**
3621 * Atomically exchanges and adds to a signed 64-bit value, ordered.
3622 *
3623 * @returns The old value.
3624 * @param pi64 Pointer to the value.
3625 * @param i64 Number to add.
3626 *
3627 * @remarks x86: Requires a Pentium or later.
3628 */
3629DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3630{
3631 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3632}
3633
3634
3635/**
3636 * Atomically exchanges and adds to a size_t value, ordered.
3637 *
3638 * @returns The old value.
3639 * @param pcb Pointer to the size_t value.
3640 * @param cb Number to add.
3641 */
3642DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3643{
3644#if ARCH_BITS == 64
3645 AssertCompileSize(size_t, 8);
3646 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
3647#elif ARCH_BITS == 32
3648 AssertCompileSize(size_t, 4);
3649 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
3650#elif ARCH_BITS == 16
3651 AssertCompileSize(size_t, 2);
3652 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
3653#else
3654# error "Unsupported ARCH_BITS value"
3655#endif
3656}
3657
3658
3659/**
3660 * Atomically exchanges and adds a value which size might differ between
3661 * platforms or compilers, ordered.
3662 *
3663 * @param pu Pointer to the variable to update.
3664 * @param uNew The value to add to *pu.
3665 * @param puOld Where to store the old value.
3666 */
3667#define ASMAtomicAddSize(pu, uNew, puOld) \
3668 do { \
3669 switch (sizeof(*(pu))) { \
3670 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3671 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3672 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
3673 } \
3674 } while (0)
3675
3676
3677
3678/**
3679 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
3680 *
3681 * @returns The old value.
3682 * @param pu16 Pointer to the value.
3683 * @param u16 Number to subtract.
3684 *
3685 * @remarks x86: Requires a 486 or later.
3686 */
3687DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
3688{
3689 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
3690}
3691
3692
3693/**
3694 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
3695 *
3696 * @returns The old value.
3697 * @param pi16 Pointer to the value.
3698 * @param i16 Number to subtract.
3699 *
3700 * @remarks x86: Requires a 486 or later.
3701 */
3702DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3703{
3704 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
3705}
3706
3707
3708/**
3709 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3710 *
3711 * @returns The old value.
3712 * @param pu32 Pointer to the value.
3713 * @param u32 Number to subtract.
3714 *
3715 * @remarks x86: Requires a 486 or later.
3716 */
3717DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3718{
3719 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
3720}
3721
3722
3723/**
3724 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3725 *
3726 * @returns The old value.
3727 * @param pi32 Pointer to the value.
3728 * @param i32 Number to subtract.
3729 *
3730 * @remarks x86: Requires a 486 or later.
3731 */
3732DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3733{
3734 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
3735}
3736
3737
3738/**
3739 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
3740 *
3741 * @returns The old value.
3742 * @param pu64 Pointer to the value.
3743 * @param u64 Number to subtract.
3744 *
3745 * @remarks x86: Requires a Pentium or later.
3746 */
3747DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3748{
3749 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
3750}
3751
3752
3753/**
3754 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
3755 *
3756 * @returns The old value.
3757 * @param pi64 Pointer to the value.
3758 * @param i64 Number to subtract.
3759 *
3760 * @remarks x86: Requires a Pentium or later.
3761 */
3762DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3763{
3764 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
3765}
3766
3767
3768/**
3769 * Atomically exchanges and subtracts to a size_t value, ordered.
3770 *
3771 * @returns The old value.
3772 * @param pcb Pointer to the size_t value.
3773 * @param cb Number to subtract.
3774 *
3775 * @remarks x86: Requires a 486 or later.
3776 */
3777DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3778{
3779#if ARCH_BITS == 64
3780 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
3781#elif ARCH_BITS == 32
3782 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
3783#elif ARCH_BITS == 16
3784 AssertCompileSize(size_t, 2);
3785 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
3786#else
3787# error "Unsupported ARCH_BITS value"
3788#endif
3789}
3790
3791
3792/**
3793 * Atomically exchanges and subtracts a value which size might differ between
3794 * platforms or compilers, ordered.
3795 *
3796 * @param pu Pointer to the variable to update.
3797 * @param uNew The value to subtract to *pu.
3798 * @param puOld Where to store the old value.
3799 *
3800 * @remarks x86: Requires a 486 or later.
3801 */
3802#define ASMAtomicSubSize(pu, uNew, puOld) \
3803 do { \
3804 switch (sizeof(*(pu))) { \
3805 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3806 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3807 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3808 } \
3809 } while (0)
3810
3811
3812
3813/**
3814 * Atomically increment a 16-bit value, ordered.
3815 *
3816 * @returns The new value.
3817 * @param pu16 Pointer to the value to increment.
3818 * @remarks Not implemented. Just to make 16-bit code happy.
3819 *
3820 * @remarks x86: Requires a 486 or later.
3821 */
3822RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
3823
3824
3825/**
3826 * Atomically increment a 32-bit value, ordered.
3827 *
3828 * @returns The new value.
3829 * @param pu32 Pointer to the value to increment.
3830 *
3831 * @remarks x86: Requires a 486 or later.
3832 */
3833#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3834RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3835#else
3836DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
3837{
3838# if RT_INLINE_ASM_USES_INTRIN
3839 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
3840
3841# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3842# if RT_INLINE_ASM_GNU_STYLE
3843 uint32_t u32;
3844 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3845 : "=r" (u32)
3846 , "=m" (*pu32)
3847 : "0" (1)
3848 , "m" (*pu32)
3849 : "memory"
3850 , "cc");
3851 return u32+1;
3852# else
3853 __asm
3854 {
3855 mov eax, 1
3856# ifdef RT_ARCH_AMD64
3857 mov rdx, [pu32]
3858 lock xadd [rdx], eax
3859# else
3860 mov edx, [pu32]
3861 lock xadd [edx], eax
3862# endif
3863 mov u32, eax
3864 }
3865 return u32+1;
3866# endif
3867
3868# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3869 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
3870 "add %w[uNew], %w[uNew], #1\n\t",
3871 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
3872 "X" (0) /* dummy */);
3873 return u32NewRet;
3874
3875# else
3876 return ASMAtomicAddU32(pu32, 1) + 1;
3877# endif
3878}
3879#endif
3880
3881
3882/**
3883 * Atomically increment a signed 32-bit value, ordered.
3884 *
3885 * @returns The new value.
3886 * @param pi32 Pointer to the value to increment.
3887 *
3888 * @remarks x86: Requires a 486 or later.
3889 */
3890DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
3891{
3892 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3893}
3894
3895
3896/**
3897 * Atomically increment a 64-bit value, ordered.
3898 *
3899 * @returns The new value.
3900 * @param pu64 Pointer to the value to increment.
3901 *
3902 * @remarks x86: Requires a Pentium or later.
3903 */
3904#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3905DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
3906#else
3907DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
3908{
3909# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3910 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
3911
3912# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3913 uint64_t u64;
3914 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3915 : "=r" (u64)
3916 , "=m" (*pu64)
3917 : "0" (1)
3918 , "m" (*pu64)
3919 : "memory"
3920 , "cc");
3921 return u64 + 1;
3922
3923# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3924 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
3925 "add %[uNew], %[uNew], #1\n\t"
3926 ,
3927 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
3928 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
3929 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
3930 return u64NewRet;
3931
3932# else
3933 return ASMAtomicAddU64(pu64, 1) + 1;
3934# endif
3935}
3936#endif
3937
3938
3939/**
3940 * Atomically increment a signed 64-bit value, ordered.
3941 *
3942 * @returns The new value.
3943 * @param pi64 Pointer to the value to increment.
3944 *
3945 * @remarks x86: Requires a Pentium or later.
3946 */
3947DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
3948{
3949 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3950}
3951
3952
3953/**
3954 * Atomically increment a size_t value, ordered.
3955 *
3956 * @returns The new value.
3957 * @param pcb Pointer to the value to increment.
3958 *
3959 * @remarks x86: Requires a 486 or later.
3960 */
3961DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3962{
3963#if ARCH_BITS == 64
3964 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3965#elif ARCH_BITS == 32
3966 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3967#elif ARCH_BITS == 16
3968 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3969#else
3970# error "Unsupported ARCH_BITS value"
3971#endif
3972}
3973
3974
3975
3976/**
3977 * Atomically decrement an unsigned 32-bit value, ordered.
3978 *
3979 * @returns The new value.
3980 * @param pu16 Pointer to the value to decrement.
3981 * @remarks Not implemented. Just to make 16-bit code happy.
3982 *
3983 * @remarks x86: Requires a 486 or later.
3984 */
3985RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
3986
3987
3988/**
3989 * Atomically decrement an unsigned 32-bit value, ordered.
3990 *
3991 * @returns The new value.
3992 * @param pu32 Pointer to the value to decrement.
3993 *
3994 * @remarks x86: Requires a 486 or later.
3995 */
3996#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3997RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3998#else
3999DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4000{
4001# if RT_INLINE_ASM_USES_INTRIN
4002 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4003
4004# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4005# if RT_INLINE_ASM_GNU_STYLE
4006 uint32_t u32;
4007 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4008 : "=r" (u32)
4009 , "=m" (*pu32)
4010 : "0" (-1)
4011 , "m" (*pu32)
4012 : "memory"
4013 , "cc");
4014 return u32-1;
4015# else
4016 uint32_t u32;
4017 __asm
4018 {
4019 mov eax, -1
4020# ifdef RT_ARCH_AMD64
4021 mov rdx, [pu32]
4022 lock xadd [rdx], eax
4023# else
4024 mov edx, [pu32]
4025 lock xadd [edx], eax
4026# endif
4027 mov u32, eax
4028 }
4029 return u32-1;
4030# endif
4031
4032# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4033 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4034 "sub %w[uNew], %w[uNew], #1\n\t",
4035 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4036 "X" (0) /* dummy */);
4037 return u32NewRet;
4038
4039# else
4040 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4041# endif
4042}
4043#endif
4044
4045
4046/**
4047 * Atomically decrement a signed 32-bit value, ordered.
4048 *
4049 * @returns The new value.
4050 * @param pi32 Pointer to the value to decrement.
4051 *
4052 * @remarks x86: Requires a 486 or later.
4053 */
4054DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4055{
4056 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4057}
4058
4059
4060/**
4061 * Atomically decrement an unsigned 64-bit value, ordered.
4062 *
4063 * @returns The new value.
4064 * @param pu64 Pointer to the value to decrement.
4065 *
4066 * @remarks x86: Requires a Pentium or later.
4067 */
4068#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4069RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4070#else
4071DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4072{
4073# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4074 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4075
4076# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4077 uint64_t u64;
4078 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4079 : "=r" (u64)
4080 , "=m" (*pu64)
4081 : "0" (~(uint64_t)0)
4082 , "m" (*pu64)
4083 : "memory"
4084 , "cc");
4085 return u64-1;
4086
4087# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4088 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4089 "sub %[uNew], %[uNew], #1\n\t"
4090 ,
4091 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4092 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4093 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4094 return u64NewRet;
4095
4096# else
4097 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4098# endif
4099}
4100#endif
4101
4102
4103/**
4104 * Atomically decrement a signed 64-bit value, ordered.
4105 *
4106 * @returns The new value.
4107 * @param pi64 Pointer to the value to decrement.
4108 *
4109 * @remarks x86: Requires a Pentium or later.
4110 */
4111DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4112{
4113 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4114}
4115
4116
4117/**
4118 * Atomically decrement a size_t value, ordered.
4119 *
4120 * @returns The new value.
4121 * @param pcb Pointer to the value to decrement.
4122 *
4123 * @remarks x86: Requires a 486 or later.
4124 */
4125DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4126{
4127#if ARCH_BITS == 64
4128 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4129#elif ARCH_BITS == 32
4130 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4131#elif ARCH_BITS == 16
4132 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4133#else
4134# error "Unsupported ARCH_BITS value"
4135#endif
4136}
4137
4138
4139/**
4140 * Atomically Or an unsigned 32-bit value, ordered.
4141 *
4142 * @param pu32 Pointer to the pointer variable to OR u32 with.
4143 * @param u32 The value to OR *pu32 with.
4144 *
4145 * @remarks x86: Requires a 386 or later.
4146 */
4147#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4148RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4149#else
4150DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4151{
4152# if RT_INLINE_ASM_USES_INTRIN
4153 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4154
4155# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4156# if RT_INLINE_ASM_GNU_STYLE
4157 __asm__ __volatile__("lock; orl %1, %0\n\t"
4158 : "=m" (*pu32)
4159 : "ir" (u32)
4160 , "m" (*pu32)
4161 : "cc");
4162# else
4163 __asm
4164 {
4165 mov eax, [u32]
4166# ifdef RT_ARCH_AMD64
4167 mov rdx, [pu32]
4168 lock or [rdx], eax
4169# else
4170 mov edx, [pu32]
4171 lock or [edx], eax
4172# endif
4173 }
4174# endif
4175
4176# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4177 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4178 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4179 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4180 "orr %[uNew], %[uNew], %[uVal]\n\t",
4181 [uVal] "r" (u32));
4182
4183# else
4184# error "Port me"
4185# endif
4186}
4187#endif
4188
4189
4190/**
4191 * Atomically Or a signed 32-bit value, ordered.
4192 *
4193 * @param pi32 Pointer to the pointer variable to OR u32 with.
4194 * @param i32 The value to OR *pu32 with.
4195 *
4196 * @remarks x86: Requires a 386 or later.
4197 */
4198DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4199{
4200 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4201}
4202
4203
4204/**
4205 * Atomically Or an unsigned 64-bit value, ordered.
4206 *
4207 * @param pu64 Pointer to the pointer variable to OR u64 with.
4208 * @param u64 The value to OR *pu64 with.
4209 *
4210 * @remarks x86: Requires a Pentium or later.
4211 */
4212#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4213DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4214#else
4215DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4216{
4217# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4218 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4219
4220# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4221 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4222 : "=m" (*pu64)
4223 : "r" (u64)
4224 , "m" (*pu64)
4225 : "cc");
4226
4227# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4228 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4229 "orr %[uNew], %[uNew], %[uVal]\n\t"
4230 ,
4231 "orr %[uNew], %[uNew], %[uVal]\n\t"
4232 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4233 [uVal] "r" (u64));
4234
4235# else
4236 for (;;)
4237 {
4238 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4239 uint64_t u64New = u64Old | u64;
4240 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4241 break;
4242 ASMNopPause();
4243 }
4244# endif
4245}
4246#endif
4247
4248
4249/**
4250 * Atomically Or a signed 64-bit value, ordered.
4251 *
4252 * @param pi64 Pointer to the pointer variable to OR u64 with.
4253 * @param i64 The value to OR *pu64 with.
4254 *
4255 * @remarks x86: Requires a Pentium or later.
4256 */
4257DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4258{
4259 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4260}
4261
4262
4263/**
4264 * Atomically And an unsigned 32-bit value, ordered.
4265 *
4266 * @param pu32 Pointer to the pointer variable to AND u32 with.
4267 * @param u32 The value to AND *pu32 with.
4268 *
4269 * @remarks x86: Requires a 386 or later.
4270 */
4271#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4272RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4273#else
4274DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4275{
4276# if RT_INLINE_ASM_USES_INTRIN
4277 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4278
4279# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4280# if RT_INLINE_ASM_GNU_STYLE
4281 __asm__ __volatile__("lock; andl %1, %0\n\t"
4282 : "=m" (*pu32)
4283 : "ir" (u32)
4284 , "m" (*pu32)
4285 : "cc");
4286# else
4287 __asm
4288 {
4289 mov eax, [u32]
4290# ifdef RT_ARCH_AMD64
4291 mov rdx, [pu32]
4292 lock and [rdx], eax
4293# else
4294 mov edx, [pu32]
4295 lock and [edx], eax
4296# endif
4297 }
4298# endif
4299
4300# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4301 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4302 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4303 "and %[uNew], %[uNew], %[uVal]\n\t",
4304 [uVal] "r" (u32));
4305
4306# else
4307# error "Port me"
4308# endif
4309}
4310#endif
4311
4312
4313/**
4314 * Atomically And a signed 32-bit value, ordered.
4315 *
4316 * @param pi32 Pointer to the pointer variable to AND i32 with.
4317 * @param i32 The value to AND *pi32 with.
4318 *
4319 * @remarks x86: Requires a 386 or later.
4320 */
4321DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4322{
4323 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4324}
4325
4326
4327/**
4328 * Atomically And an unsigned 64-bit value, ordered.
4329 *
4330 * @param pu64 Pointer to the pointer variable to AND u64 with.
4331 * @param u64 The value to AND *pu64 with.
4332 *
4333 * @remarks x86: Requires a Pentium or later.
4334 */
4335#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4336DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4337#else
4338DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4339{
4340# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4341 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4342
4343# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4344 __asm__ __volatile__("lock; andq %1, %0\n\t"
4345 : "=m" (*pu64)
4346 : "r" (u64)
4347 , "m" (*pu64)
4348 : "cc");
4349
4350# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4351 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
4352 "and %[uNew], %[uNew], %[uVal]\n\t"
4353 ,
4354 "and %[uNew], %[uNew], %[uVal]\n\t"
4355 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4356 [uVal] "r" (u64));
4357
4358# else
4359 for (;;)
4360 {
4361 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4362 uint64_t u64New = u64Old & u64;
4363 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4364 break;
4365 ASMNopPause();
4366 }
4367# endif
4368}
4369#endif
4370
4371
4372/**
4373 * Atomically And a signed 64-bit value, ordered.
4374 *
4375 * @param pi64 Pointer to the pointer variable to AND i64 with.
4376 * @param i64 The value to AND *pi64 with.
4377 *
4378 * @remarks x86: Requires a Pentium or later.
4379 */
4380DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4381{
4382 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4383}
4384
4385
4386/**
4387 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
4388 *
4389 * @param pu32 Pointer to the pointer variable to OR u32 with.
4390 * @param u32 The value to OR *pu32 with.
4391 *
4392 * @remarks x86: Requires a 386 or later.
4393 */
4394#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4395RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4396#else
4397DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4398{
4399# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4400# if RT_INLINE_ASM_GNU_STYLE
4401 __asm__ __volatile__("orl %1, %0\n\t"
4402 : "=m" (*pu32)
4403 : "ir" (u32)
4404 , "m" (*pu32)
4405 : "cc");
4406# else
4407 __asm
4408 {
4409 mov eax, [u32]
4410# ifdef RT_ARCH_AMD64
4411 mov rdx, [pu32]
4412 or [rdx], eax
4413# else
4414 mov edx, [pu32]
4415 or [edx], eax
4416# endif
4417 }
4418# endif
4419
4420# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4421 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
4422 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4423 "orr %[uNew], %[uNew], %[uVal]\n\t",
4424 [uVal] "r" (u32));
4425
4426# else
4427# error "Port me"
4428# endif
4429}
4430#endif
4431
4432
4433/**
4434 * Atomically OR a signed 32-bit value, unordered.
4435 *
4436 * @param pi32 Pointer to the pointer variable to OR u32 with.
4437 * @param i32 The value to OR *pu32 with.
4438 *
4439 * @remarks x86: Requires a 386 or later.
4440 */
4441DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4442{
4443 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4444}
4445
4446
4447/**
4448 * Atomically OR an unsigned 64-bit value, unordered.
4449 *
4450 * @param pu64 Pointer to the pointer variable to OR u64 with.
4451 * @param u64 The value to OR *pu64 with.
4452 *
4453 * @remarks x86: Requires a Pentium or later.
4454 */
4455#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4456DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4457#else
4458DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4459{
4460# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4461 __asm__ __volatile__("orq %1, %q0\n\t"
4462 : "=m" (*pu64)
4463 : "r" (u64)
4464 , "m" (*pu64)
4465 : "cc");
4466
4467# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4468 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
4469 "orr %[uNew], %[uNew], %[uVal]\n\t"
4470 ,
4471 "orr %[uNew], %[uNew], %[uVal]\n\t"
4472 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4473 [uVal] "r" (u64));
4474
4475# else
4476 for (;;)
4477 {
4478 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4479 uint64_t u64New = u64Old | u64;
4480 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4481 break;
4482 ASMNopPause();
4483 }
4484# endif
4485}
4486#endif
4487
4488
4489/**
4490 * Atomically Or a signed 64-bit value, unordered.
4491 *
4492 * @param pi64 Pointer to the pointer variable to OR u64 with.
4493 * @param i64 The value to OR *pu64 with.
4494 *
4495 * @remarks x86: Requires a Pentium or later.
4496 */
4497DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4498{
4499 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4500}
4501
4502
4503/**
4504 * Atomically And an unsigned 32-bit value, unordered.
4505 *
4506 * @param pu32 Pointer to the pointer variable to AND u32 with.
4507 * @param u32 The value to AND *pu32 with.
4508 *
4509 * @remarks x86: Requires a 386 or later.
4510 */
4511#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4512RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4513#else
4514DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4515{
4516# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4517# if RT_INLINE_ASM_GNU_STYLE
4518 __asm__ __volatile__("andl %1, %0\n\t"
4519 : "=m" (*pu32)
4520 : "ir" (u32)
4521 , "m" (*pu32)
4522 : "cc");
4523# else
4524 __asm
4525 {
4526 mov eax, [u32]
4527# ifdef RT_ARCH_AMD64
4528 mov rdx, [pu32]
4529 and [rdx], eax
4530# else
4531 mov edx, [pu32]
4532 and [edx], eax
4533# endif
4534 }
4535# endif
4536
4537# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4538 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
4539 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4540 "and %[uNew], %[uNew], %[uVal]\n\t",
4541 [uVal] "r" (u32));
4542
4543# else
4544# error "Port me"
4545# endif
4546}
4547#endif
4548
4549
4550/**
4551 * Atomically And a signed 32-bit value, unordered.
4552 *
4553 * @param pi32 Pointer to the pointer variable to AND i32 with.
4554 * @param i32 The value to AND *pi32 with.
4555 *
4556 * @remarks x86: Requires a 386 or later.
4557 */
4558DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4559{
4560 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4561}
4562
4563
4564/**
4565 * Atomically And an unsigned 64-bit value, unordered.
4566 *
4567 * @param pu64 Pointer to the pointer variable to AND u64 with.
4568 * @param u64 The value to AND *pu64 with.
4569 *
4570 * @remarks x86: Requires a Pentium or later.
4571 */
4572#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4573DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4574#else
4575DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4576{
4577# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4578 __asm__ __volatile__("andq %1, %0\n\t"
4579 : "=m" (*pu64)
4580 : "r" (u64)
4581 , "m" (*pu64)
4582 : "cc");
4583
4584# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4585 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
4586 "and %[uNew], %[uNew], %[uVal]\n\t"
4587 ,
4588 "and %[uNew], %[uNew], %[uVal]\n\t"
4589 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4590 [uVal] "r" (u64));
4591
4592# else
4593 for (;;)
4594 {
4595 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4596 uint64_t u64New = u64Old & u64;
4597 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4598 break;
4599 ASMNopPause();
4600 }
4601# endif
4602}
4603#endif
4604
4605
4606/**
4607 * Atomically And a signed 64-bit value, unordered.
4608 *
4609 * @param pi64 Pointer to the pointer variable to AND i64 with.
4610 * @param i64 The value to AND *pi64 with.
4611 *
4612 * @remarks x86: Requires a Pentium or later.
4613 */
4614DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4615{
4616 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4617}
4618
4619
4620/**
4621 * Atomically increment an unsigned 32-bit value, unordered.
4622 *
4623 * @returns the new value.
4624 * @param pu32 Pointer to the variable to increment.
4625 *
4626 * @remarks x86: Requires a 486 or later.
4627 */
4628#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4629RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4630#else
4631DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4632{
4633# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4634 uint32_t u32;
4635# if RT_INLINE_ASM_GNU_STYLE
4636 __asm__ __volatile__("xaddl %0, %1\n\t"
4637 : "=r" (u32)
4638 , "=m" (*pu32)
4639 : "0" (1)
4640 , "m" (*pu32)
4641 : "memory" /** @todo why 'memory'? */
4642 , "cc");
4643 return u32 + 1;
4644# else
4645 __asm
4646 {
4647 mov eax, 1
4648# ifdef RT_ARCH_AMD64
4649 mov rdx, [pu32]
4650 xadd [rdx], eax
4651# else
4652 mov edx, [pu32]
4653 xadd [edx], eax
4654# endif
4655 mov u32, eax
4656 }
4657 return u32 + 1;
4658# endif
4659
4660# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4661 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
4662 "add %w[uNew], %w[uNew], #1\n\t",
4663 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4664 "X" (0) /* dummy */);
4665 return u32NewRet;
4666
4667# else
4668# error "Port me"
4669# endif
4670}
4671#endif
4672
4673
4674/**
4675 * Atomically decrement an unsigned 32-bit value, unordered.
4676 *
4677 * @returns the new value.
4678 * @param pu32 Pointer to the variable to decrement.
4679 *
4680 * @remarks x86: Requires a 486 or later.
4681 */
4682#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4683RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4684#else
4685DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4686{
4687# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4688 uint32_t u32;
4689# if RT_INLINE_ASM_GNU_STYLE
4690 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4691 : "=r" (u32)
4692 , "=m" (*pu32)
4693 : "0" (-1)
4694 , "m" (*pu32)
4695 : "memory"
4696 , "cc");
4697 return u32 - 1;
4698# else
4699 __asm
4700 {
4701 mov eax, -1
4702# ifdef RT_ARCH_AMD64
4703 mov rdx, [pu32]
4704 xadd [rdx], eax
4705# else
4706 mov edx, [pu32]
4707 xadd [edx], eax
4708# endif
4709 mov u32, eax
4710 }
4711 return u32 - 1;
4712# endif
4713
4714# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4715 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
4716 "sub %w[uNew], %w[uNew], #1\n\t",
4717 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4718 "X" (0) /* dummy */);
4719 return u32NewRet;
4720
4721# else
4722# error "Port me"
4723# endif
4724}
4725#endif
4726
4727
4728/** @def RT_ASM_PAGE_SIZE
4729 * We try avoid dragging in iprt/param.h here.
4730 * @internal
4731 */
4732#if defined(RT_ARCH_SPARC64)
4733# define RT_ASM_PAGE_SIZE 0x2000
4734# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4735# if PAGE_SIZE != 0x2000
4736# error "PAGE_SIZE is not 0x2000!"
4737# endif
4738# endif
4739#elif defined(RT_ARCH_ARM64)
4740# define RT_ASM_PAGE_SIZE 0x4000
4741# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
4742# if PAGE_SIZE != 0x4000
4743# error "PAGE_SIZE is not 0x4000!"
4744# endif
4745# endif
4746#else
4747# define RT_ASM_PAGE_SIZE 0x1000
4748# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4749# if PAGE_SIZE != 0x1000
4750# error "PAGE_SIZE is not 0x1000!"
4751# endif
4752# endif
4753#endif
4754
4755/**
4756 * Zeros a 4K memory page.
4757 *
4758 * @param pv Pointer to the memory block. This must be page aligned.
4759 */
4760#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
4761RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
4762# else
4763DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
4764{
4765# if RT_INLINE_ASM_USES_INTRIN
4766# ifdef RT_ARCH_AMD64
4767 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
4768# else
4769 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
4770# endif
4771
4772# elif RT_INLINE_ASM_GNU_STYLE
4773 RTCCUINTREG uDummy;
4774# ifdef RT_ARCH_AMD64
4775 __asm__ __volatile__("rep stosq"
4776 : "=D" (pv),
4777 "=c" (uDummy)
4778 : "0" (pv),
4779 "c" (RT_ASM_PAGE_SIZE >> 3),
4780 "a" (0)
4781 : "memory");
4782# else
4783 __asm__ __volatile__("rep stosl"
4784 : "=D" (pv),
4785 "=c" (uDummy)
4786 : "0" (pv),
4787 "c" (RT_ASM_PAGE_SIZE >> 2),
4788 "a" (0)
4789 : "memory");
4790# endif
4791# else
4792 __asm
4793 {
4794# ifdef RT_ARCH_AMD64
4795 xor rax, rax
4796 mov ecx, 0200h
4797 mov rdi, [pv]
4798 rep stosq
4799# else
4800 xor eax, eax
4801 mov ecx, 0400h
4802 mov edi, [pv]
4803 rep stosd
4804# endif
4805 }
4806# endif
4807}
4808# endif
4809
4810
4811/**
4812 * Zeros a memory block with a 32-bit aligned size.
4813 *
4814 * @param pv Pointer to the memory block.
4815 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4816 */
4817#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
4818RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
4819#else
4820DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
4821{
4822# if RT_INLINE_ASM_USES_INTRIN
4823# ifdef RT_ARCH_AMD64
4824 if (!(cb & 7))
4825 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
4826 else
4827# endif
4828 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
4829
4830# elif RT_INLINE_ASM_GNU_STYLE
4831 __asm__ __volatile__("rep stosl"
4832 : "=D" (pv),
4833 "=c" (cb)
4834 : "0" (pv),
4835 "1" (cb >> 2),
4836 "a" (0)
4837 : "memory");
4838# else
4839 __asm
4840 {
4841 xor eax, eax
4842# ifdef RT_ARCH_AMD64
4843 mov rcx, [cb]
4844 shr rcx, 2
4845 mov rdi, [pv]
4846# else
4847 mov ecx, [cb]
4848 shr ecx, 2
4849 mov edi, [pv]
4850# endif
4851 rep stosd
4852 }
4853# endif
4854}
4855#endif
4856
4857
4858/**
4859 * Fills a memory block with a 32-bit aligned size.
4860 *
4861 * @param pv Pointer to the memory block.
4862 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4863 * @param u32 The value to fill with.
4864 */
4865#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
4866RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
4867#else
4868DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
4869{
4870# if RT_INLINE_ASM_USES_INTRIN
4871# ifdef RT_ARCH_AMD64
4872 if (!(cb & 7))
4873 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4874 else
4875# endif
4876 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
4877
4878# elif RT_INLINE_ASM_GNU_STYLE
4879 __asm__ __volatile__("rep stosl"
4880 : "=D" (pv),
4881 "=c" (cb)
4882 : "0" (pv),
4883 "1" (cb >> 2),
4884 "a" (u32)
4885 : "memory");
4886# else
4887 __asm
4888 {
4889# ifdef RT_ARCH_AMD64
4890 mov rcx, [cb]
4891 shr rcx, 2
4892 mov rdi, [pv]
4893# else
4894 mov ecx, [cb]
4895 shr ecx, 2
4896 mov edi, [pv]
4897# endif
4898 mov eax, [u32]
4899 rep stosd
4900 }
4901# endif
4902}
4903#endif
4904
4905
4906/**
4907 * Checks if a memory block is all zeros.
4908 *
4909 * @returns Pointer to the first non-zero byte.
4910 * @returns NULL if all zero.
4911 *
4912 * @param pv Pointer to the memory block.
4913 * @param cb Number of bytes in the block.
4914 */
4915#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
4916DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
4917#else
4918DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
4919{
4920/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
4921 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4922 for (; cb; cb--, pb++)
4923 if (RT_LIKELY(*pb == 0))
4924 { /* likely */ }
4925 else
4926 return (void RT_FAR *)pb;
4927 return NULL;
4928}
4929#endif
4930
4931
4932/**
4933 * Checks if a memory block is all zeros.
4934 *
4935 * @returns true if zero, false if not.
4936 *
4937 * @param pv Pointer to the memory block.
4938 * @param cb Number of bytes in the block.
4939 *
4940 * @sa ASMMemFirstNonZero
4941 */
4942DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
4943{
4944 return ASMMemFirstNonZero(pv, cb) == NULL;
4945}
4946
4947
4948/**
4949 * Checks if a memory page is all zeros.
4950 *
4951 * @returns true / false.
4952 *
4953 * @param pvPage Pointer to the page. Must be aligned on 16 byte
4954 * boundary
4955 */
4956DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
4957{
4958# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
4959 union { RTCCUINTREG r; bool f; } uAX;
4960 RTCCUINTREG xCX, xDI;
4961 Assert(!((uintptr_t)pvPage & 15));
4962 __asm__ __volatile__("repe; "
4963# ifdef RT_ARCH_AMD64
4964 "scasq\n\t"
4965# else
4966 "scasl\n\t"
4967# endif
4968 "setnc %%al\n\t"
4969 : "=&c" (xCX)
4970 , "=&D" (xDI)
4971 , "=&a" (uAX.r)
4972 : "mr" (pvPage)
4973# ifdef RT_ARCH_AMD64
4974 , "0" (RT_ASM_PAGE_SIZE/8)
4975# else
4976 , "0" (RT_ASM_PAGE_SIZE/4)
4977# endif
4978 , "1" (pvPage)
4979 , "2" (0)
4980 : "cc");
4981 return uAX.f;
4982# else
4983 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
4984 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
4985 Assert(!((uintptr_t)pvPage & 15));
4986 for (;;)
4987 {
4988 if (puPtr[0]) return false;
4989 if (puPtr[4]) return false;
4990
4991 if (puPtr[2]) return false;
4992 if (puPtr[6]) return false;
4993
4994 if (puPtr[1]) return false;
4995 if (puPtr[5]) return false;
4996
4997 if (puPtr[3]) return false;
4998 if (puPtr[7]) return false;
4999
5000 if (!--cLeft)
5001 return true;
5002 puPtr += 8;
5003 }
5004# endif
5005}
5006
5007
5008/**
5009 * Checks if a memory block is filled with the specified byte, returning the
5010 * first mismatch.
5011 *
5012 * This is sort of an inverted memchr.
5013 *
5014 * @returns Pointer to the byte which doesn't equal u8.
5015 * @returns NULL if all equal to u8.
5016 *
5017 * @param pv Pointer to the memory block.
5018 * @param cb Number of bytes in the block.
5019 * @param u8 The value it's supposed to be filled with.
5020 *
5021 * @remarks No alignment requirements.
5022 */
5023#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5024 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5025DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5026#else
5027DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5028{
5029/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5030 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5031 for (; cb; cb--, pb++)
5032 if (RT_LIKELY(*pb == u8))
5033 { /* likely */ }
5034 else
5035 return (void *)pb;
5036 return NULL;
5037}
5038#endif
5039
5040
5041/**
5042 * Checks if a memory block is filled with the specified byte.
5043 *
5044 * @returns true if all matching, false if not.
5045 *
5046 * @param pv Pointer to the memory block.
5047 * @param cb Number of bytes in the block.
5048 * @param u8 The value it's supposed to be filled with.
5049 *
5050 * @remarks No alignment requirements.
5051 */
5052DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5053{
5054 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5055}
5056
5057
5058/**
5059 * Checks if a memory block is filled with the specified 32-bit value.
5060 *
5061 * This is a sort of inverted memchr.
5062 *
5063 * @returns Pointer to the first value which doesn't equal u32.
5064 * @returns NULL if all equal to u32.
5065 *
5066 * @param pv Pointer to the memory block.
5067 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5068 * @param u32 The value it's supposed to be filled with.
5069 */
5070DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5071{
5072/** @todo rewrite this in inline assembly? */
5073 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5074 for (; cb; cb -= 4, pu32++)
5075 if (RT_LIKELY(*pu32 == u32))
5076 { /* likely */ }
5077 else
5078 return (uint32_t RT_FAR *)pu32;
5079 return NULL;
5080}
5081
5082
5083/**
5084 * Probes a byte pointer for read access.
5085 *
5086 * While the function will not fault if the byte is not read accessible,
5087 * the idea is to do this in a safe place like before acquiring locks
5088 * and such like.
5089 *
5090 * Also, this functions guarantees that an eager compiler is not going
5091 * to optimize the probing away.
5092 *
5093 * @param pvByte Pointer to the byte.
5094 */
5095#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5096RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
5097#else
5098DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
5099{
5100# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5101 uint8_t u8;
5102# if RT_INLINE_ASM_GNU_STYLE
5103 __asm__ __volatile__("movb (%1), %0\n\t"
5104 : "=r" (u8)
5105 : "r" (pvByte));
5106# else
5107 __asm
5108 {
5109# ifdef RT_ARCH_AMD64
5110 mov rax, [pvByte]
5111 mov al, [rax]
5112# else
5113 mov eax, [pvByte]
5114 mov al, [eax]
5115# endif
5116 mov [u8], al
5117 }
5118# endif
5119 return u8;
5120
5121# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5122 uint32_t u32;
5123 __asm__ __volatile__(".Lstart_ASMProbeReadByte_%=:\n\t"
5124# if defined(RT_ARCH_ARM64)
5125 "ldxrb %w[uDst], %[pMem]\n\t"
5126# else
5127 "ldrexb %[uDst], %[pMem]\n\t"
5128# endif
5129 : [uDst] "=&r" (u32)
5130 : [pMem] "m" (*(uint8_t const *)pvByte));
5131 return (uint8_t)u32;
5132
5133# else
5134# error "Port me"
5135# endif
5136}
5137#endif
5138
5139/**
5140 * Probes a buffer for read access page by page.
5141 *
5142 * While the function will fault if the buffer is not fully read
5143 * accessible, the idea is to do this in a safe place like before
5144 * acquiring locks and such like.
5145 *
5146 * Also, this functions guarantees that an eager compiler is not going
5147 * to optimize the probing away.
5148 *
5149 * @param pvBuf Pointer to the buffer.
5150 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5151 */
5152DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
5153{
5154 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5155 /* the first byte */
5156 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
5157 ASMProbeReadByte(pu8);
5158
5159 /* the pages in between pages. */
5160 while (cbBuf > RT_ASM_PAGE_SIZE)
5161 {
5162 ASMProbeReadByte(pu8);
5163 cbBuf -= RT_ASM_PAGE_SIZE;
5164 pu8 += RT_ASM_PAGE_SIZE;
5165 }
5166
5167 /* the last byte */
5168 ASMProbeReadByte(pu8 + cbBuf - 1);
5169}
5170
5171
5172
5173/** @defgroup grp_inline_bits Bit Operations
5174 * @{
5175 */
5176
5177
5178/**
5179 * Sets a bit in a bitmap.
5180 *
5181 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
5182 * @param iBit The bit to set.
5183 *
5184 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5185 * However, doing so will yield better performance as well as avoiding
5186 * traps accessing the last bits in the bitmap.
5187 */
5188#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5189RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5190#else
5191DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5192{
5193# if RT_INLINE_ASM_USES_INTRIN
5194 _bittestandset((long RT_FAR *)pvBitmap, iBit);
5195
5196# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5197# if RT_INLINE_ASM_GNU_STYLE
5198 __asm__ __volatile__("btsl %1, %0"
5199 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5200 : "Ir" (iBit)
5201 , "m" (*(volatile long RT_FAR *)pvBitmap)
5202 : "memory"
5203 , "cc");
5204# else
5205 __asm
5206 {
5207# ifdef RT_ARCH_AMD64
5208 mov rax, [pvBitmap]
5209 mov edx, [iBit]
5210 bts [rax], edx
5211# else
5212 mov eax, [pvBitmap]
5213 mov edx, [iBit]
5214 bts [eax], edx
5215# endif
5216 }
5217# endif
5218
5219# else
5220 int32_t offBitmap = iBit / 32;
5221 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5222 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_BIT_32(iBit & 31));
5223# endif
5224}
5225#endif
5226
5227
5228/**
5229 * Atomically sets a bit in a bitmap, ordered.
5230 *
5231 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5232 * the memory access isn't atomic!
5233 * @param iBit The bit to set.
5234 *
5235 * @remarks x86: Requires a 386 or later.
5236 */
5237#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5238RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5239#else
5240DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5241{
5242 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5243# if RT_INLINE_ASM_USES_INTRIN
5244 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
5245# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5246# if RT_INLINE_ASM_GNU_STYLE
5247 __asm__ __volatile__("lock; btsl %1, %0"
5248 : "=m" (*(volatile long *)pvBitmap)
5249 : "Ir" (iBit)
5250 , "m" (*(volatile long *)pvBitmap)
5251 : "memory"
5252 , "cc");
5253# else
5254 __asm
5255 {
5256# ifdef RT_ARCH_AMD64
5257 mov rax, [pvBitmap]
5258 mov edx, [iBit]
5259 lock bts [rax], edx
5260# else
5261 mov eax, [pvBitmap]
5262 mov edx, [iBit]
5263 lock bts [eax], edx
5264# endif
5265 }
5266# endif
5267
5268# else
5269 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_BIT_32(iBit & 31));
5270# endif
5271}
5272#endif
5273
5274
5275/**
5276 * Clears a bit in a bitmap.
5277 *
5278 * @param pvBitmap Pointer to the bitmap.
5279 * @param iBit The bit to clear.
5280 *
5281 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5282 * However, doing so will yield better performance as well as avoiding
5283 * traps accessing the last bits in the bitmap.
5284 */
5285#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5286RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5287#else
5288DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5289{
5290# if RT_INLINE_ASM_USES_INTRIN
5291 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
5292
5293# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5294# if RT_INLINE_ASM_GNU_STYLE
5295 __asm__ __volatile__("btrl %1, %0"
5296 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5297 : "Ir" (iBit)
5298 , "m" (*(volatile long RT_FAR *)pvBitmap)
5299 : "memory"
5300 , "cc");
5301# else
5302 __asm
5303 {
5304# ifdef RT_ARCH_AMD64
5305 mov rax, [pvBitmap]
5306 mov edx, [iBit]
5307 btr [rax], edx
5308# else
5309 mov eax, [pvBitmap]
5310 mov edx, [iBit]
5311 btr [eax], edx
5312# endif
5313 }
5314# endif
5315
5316# else
5317 int32_t offBitmap = iBit / 32;
5318 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5319 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], ~RT_BIT_32(iBit & 31));
5320# endif
5321}
5322#endif
5323
5324
5325/**
5326 * Atomically clears a bit in a bitmap, ordered.
5327 *
5328 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5329 * the memory access isn't atomic!
5330 * @param iBit The bit to toggle set.
5331 *
5332 * @remarks No memory barrier, take care on smp.
5333 * @remarks x86: Requires a 386 or later.
5334 */
5335#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5336RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5337#else
5338DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5339{
5340 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5341# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5342# if RT_INLINE_ASM_GNU_STYLE
5343 __asm__ __volatile__("lock; btrl %1, %0"
5344 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5345 : "Ir" (iBit)
5346 , "m" (*(volatile long RT_FAR *)pvBitmap)
5347 : "memory"
5348 , "cc");
5349# else
5350 __asm
5351 {
5352# ifdef RT_ARCH_AMD64
5353 mov rax, [pvBitmap]
5354 mov edx, [iBit]
5355 lock btr [rax], edx
5356# else
5357 mov eax, [pvBitmap]
5358 mov edx, [iBit]
5359 lock btr [eax], edx
5360# endif
5361 }
5362# endif
5363# else
5364 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], ~RT_BIT_32(iBit & 31));
5365# endif
5366}
5367#endif
5368
5369
5370/**
5371 * Toggles a bit in a bitmap.
5372 *
5373 * @param pvBitmap Pointer to the bitmap.
5374 * @param iBit The bit to toggle.
5375 *
5376 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5377 * However, doing so will yield better performance as well as avoiding
5378 * traps accessing the last bits in the bitmap.
5379 */
5380#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5381RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5382#else
5383DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5384{
5385# if RT_INLINE_ASM_USES_INTRIN
5386 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
5387# elif RT_INLINE_ASM_GNU_STYLE
5388 __asm__ __volatile__("btcl %1, %0"
5389 : "=m" (*(volatile long *)pvBitmap)
5390 : "Ir" (iBit)
5391 , "m" (*(volatile long *)pvBitmap)
5392 : "memory"
5393 , "cc");
5394# else
5395 __asm
5396 {
5397# ifdef RT_ARCH_AMD64
5398 mov rax, [pvBitmap]
5399 mov edx, [iBit]
5400 btc [rax], edx
5401# else
5402 mov eax, [pvBitmap]
5403 mov edx, [iBit]
5404 btc [eax], edx
5405# endif
5406 }
5407# endif
5408}
5409#endif
5410
5411
5412/**
5413 * Atomically toggles a bit in a bitmap, ordered.
5414 *
5415 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5416 * the memory access isn't atomic!
5417 * @param iBit The bit to test and set.
5418 *
5419 * @remarks x86: Requires a 386 or later.
5420 */
5421#if RT_INLINE_ASM_EXTERNAL
5422RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5423#else
5424DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5425{
5426 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5427# if RT_INLINE_ASM_GNU_STYLE
5428 __asm__ __volatile__("lock; btcl %1, %0"
5429 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5430 : "Ir" (iBit)
5431 , "m" (*(volatile long RT_FAR *)pvBitmap)
5432 : "memory"
5433 , "cc");
5434# else
5435 __asm
5436 {
5437# ifdef RT_ARCH_AMD64
5438 mov rax, [pvBitmap]
5439 mov edx, [iBit]
5440 lock btc [rax], edx
5441# else
5442 mov eax, [pvBitmap]
5443 mov edx, [iBit]
5444 lock btc [eax], edx
5445# endif
5446 }
5447# endif
5448}
5449#endif
5450
5451
5452/**
5453 * Tests and sets a bit in a bitmap.
5454 *
5455 * @returns true if the bit was set.
5456 * @returns false if the bit was clear.
5457 *
5458 * @param pvBitmap Pointer to the bitmap.
5459 * @param iBit The bit to test and set.
5460 *
5461 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5462 * However, doing so will yield better performance as well as avoiding
5463 * traps accessing the last bits in the bitmap.
5464 */
5465#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5466RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5467#else
5468DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5469{
5470 union { bool f; uint32_t u32; uint8_t u8; } rc;
5471# if RT_INLINE_ASM_USES_INTRIN
5472 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
5473
5474# elif RT_INLINE_ASM_GNU_STYLE
5475 __asm__ __volatile__("btsl %2, %1\n\t"
5476 "setc %b0\n\t"
5477 "andl $1, %0\n\t"
5478 : "=q" (rc.u32)
5479 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5480 : "Ir" (iBit)
5481 , "m" (*(volatile long RT_FAR *)pvBitmap)
5482 : "memory"
5483 , "cc");
5484# else
5485 __asm
5486 {
5487 mov edx, [iBit]
5488# ifdef RT_ARCH_AMD64
5489 mov rax, [pvBitmap]
5490 bts [rax], edx
5491# else
5492 mov eax, [pvBitmap]
5493 bts [eax], edx
5494# endif
5495 setc al
5496 and eax, 1
5497 mov [rc.u32], eax
5498 }
5499# endif
5500 return rc.f;
5501}
5502#endif
5503
5504
5505/**
5506 * Atomically tests and sets a bit in a bitmap, ordered.
5507 *
5508 * @returns true if the bit was set.
5509 * @returns false if the bit was clear.
5510 *
5511 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5512 * the memory access isn't atomic!
5513 * @param iBit The bit to set.
5514 *
5515 * @remarks x86: Requires a 386 or later.
5516 */
5517#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5518RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5519#else
5520DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5521{
5522 union { bool f; uint32_t u32; uint8_t u8; } rc;
5523 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5524# if RT_INLINE_ASM_USES_INTRIN
5525 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
5526# elif RT_INLINE_ASM_GNU_STYLE
5527 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5528 "setc %b0\n\t"
5529 "andl $1, %0\n\t"
5530 : "=q" (rc.u32)
5531 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5532 : "Ir" (iBit)
5533 , "m" (*(volatile long RT_FAR *)pvBitmap)
5534 : "memory"
5535 , "cc");
5536# else
5537 __asm
5538 {
5539 mov edx, [iBit]
5540# ifdef RT_ARCH_AMD64
5541 mov rax, [pvBitmap]
5542 lock bts [rax], edx
5543# else
5544 mov eax, [pvBitmap]
5545 lock bts [eax], edx
5546# endif
5547 setc al
5548 and eax, 1
5549 mov [rc.u32], eax
5550 }
5551# endif
5552 return rc.f;
5553}
5554#endif
5555
5556
5557/**
5558 * Tests and clears a bit in a bitmap.
5559 *
5560 * @returns true if the bit was set.
5561 * @returns false if the bit was clear.
5562 *
5563 * @param pvBitmap Pointer to the bitmap.
5564 * @param iBit The bit to test and clear.
5565 *
5566 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5567 * However, doing so will yield better performance as well as avoiding
5568 * traps accessing the last bits in the bitmap.
5569 */
5570#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5571RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5572#else
5573DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5574{
5575 union { bool f; uint32_t u32; uint8_t u8; } rc;
5576# if RT_INLINE_ASM_USES_INTRIN
5577 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
5578
5579# elif RT_INLINE_ASM_GNU_STYLE
5580 __asm__ __volatile__("btrl %2, %1\n\t"
5581 "setc %b0\n\t"
5582 "andl $1, %0\n\t"
5583 : "=q" (rc.u32)
5584 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5585 : "Ir" (iBit)
5586 , "m" (*(volatile long RT_FAR *)pvBitmap)
5587 : "memory"
5588 , "cc");
5589# else
5590 __asm
5591 {
5592 mov edx, [iBit]
5593# ifdef RT_ARCH_AMD64
5594 mov rax, [pvBitmap]
5595 btr [rax], edx
5596# else
5597 mov eax, [pvBitmap]
5598 btr [eax], edx
5599# endif
5600 setc al
5601 and eax, 1
5602 mov [rc.u32], eax
5603 }
5604# endif
5605 return rc.f;
5606}
5607#endif
5608
5609
5610/**
5611 * Atomically tests and clears a bit in a bitmap, ordered.
5612 *
5613 * @returns true if the bit was set.
5614 * @returns false if the bit was clear.
5615 *
5616 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5617 * the memory access isn't atomic!
5618 * @param iBit The bit to test and clear.
5619 *
5620 * @remarks No memory barrier, take care on smp.
5621 * @remarks x86: Requires a 386 or later.
5622 */
5623#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5624RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5625#else
5626DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5627{
5628 union { bool f; uint32_t u32; uint8_t u8; } rc;
5629 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5630# if RT_INLINE_ASM_USES_INTRIN
5631 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
5632
5633# elif RT_INLINE_ASM_GNU_STYLE
5634 __asm__ __volatile__("lock; btrl %2, %1\n\t"
5635 "setc %b0\n\t"
5636 "andl $1, %0\n\t"
5637 : "=q" (rc.u32)
5638 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5639 : "Ir" (iBit)
5640 , "m" (*(volatile long RT_FAR *)pvBitmap)
5641 : "memory"
5642 , "cc");
5643# else
5644 __asm
5645 {
5646 mov edx, [iBit]
5647# ifdef RT_ARCH_AMD64
5648 mov rax, [pvBitmap]
5649 lock btr [rax], edx
5650# else
5651 mov eax, [pvBitmap]
5652 lock btr [eax], edx
5653# endif
5654 setc al
5655 and eax, 1
5656 mov [rc.u32], eax
5657 }
5658# endif
5659 return rc.f;
5660}
5661#endif
5662
5663
5664/**
5665 * Tests and toggles a bit in a bitmap.
5666 *
5667 * @returns true if the bit was set.
5668 * @returns false if the bit was clear.
5669 *
5670 * @param pvBitmap Pointer to the bitmap.
5671 * @param iBit The bit to test and toggle.
5672 *
5673 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5674 * However, doing so will yield better performance as well as avoiding
5675 * traps accessing the last bits in the bitmap.
5676 */
5677#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5678RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5679#else
5680DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5681{
5682 union { bool f; uint32_t u32; uint8_t u8; } rc;
5683# if RT_INLINE_ASM_USES_INTRIN
5684 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
5685
5686# elif RT_INLINE_ASM_GNU_STYLE
5687 __asm__ __volatile__("btcl %2, %1\n\t"
5688 "setc %b0\n\t"
5689 "andl $1, %0\n\t"
5690 : "=q" (rc.u32)
5691 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5692 : "Ir" (iBit)
5693 , "m" (*(volatile long RT_FAR *)pvBitmap)
5694 : "memory"
5695 , "cc");
5696# else
5697 __asm
5698 {
5699 mov edx, [iBit]
5700# ifdef RT_ARCH_AMD64
5701 mov rax, [pvBitmap]
5702 btc [rax], edx
5703# else
5704 mov eax, [pvBitmap]
5705 btc [eax], edx
5706# endif
5707 setc al
5708 and eax, 1
5709 mov [rc.u32], eax
5710 }
5711# endif
5712 return rc.f;
5713}
5714#endif
5715
5716
5717/**
5718 * Atomically tests and toggles a bit in a bitmap, ordered.
5719 *
5720 * @returns true if the bit was set.
5721 * @returns false if the bit was clear.
5722 *
5723 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5724 * the memory access isn't atomic!
5725 * @param iBit The bit to test and toggle.
5726 *
5727 * @remarks x86: Requires a 386 or later.
5728 */
5729#if RT_INLINE_ASM_EXTERNAL
5730RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5731#else
5732DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5733{
5734 union { bool f; uint32_t u32; uint8_t u8; } rc;
5735 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5736# if RT_INLINE_ASM_GNU_STYLE
5737 __asm__ __volatile__("lock; btcl %2, %1\n\t"
5738 "setc %b0\n\t"
5739 "andl $1, %0\n\t"
5740 : "=q" (rc.u32)
5741 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5742 : "Ir" (iBit)
5743 , "m" (*(volatile long RT_FAR *)pvBitmap)
5744 : "memory"
5745 , "cc");
5746# else
5747 __asm
5748 {
5749 mov edx, [iBit]
5750# ifdef RT_ARCH_AMD64
5751 mov rax, [pvBitmap]
5752 lock btc [rax], edx
5753# else
5754 mov eax, [pvBitmap]
5755 lock btc [eax], edx
5756# endif
5757 setc al
5758 and eax, 1
5759 mov [rc.u32], eax
5760 }
5761# endif
5762 return rc.f;
5763}
5764#endif
5765
5766
5767/**
5768 * Tests if a bit in a bitmap is set.
5769 *
5770 * @returns true if the bit is set.
5771 * @returns false if the bit is clear.
5772 *
5773 * @param pvBitmap Pointer to the bitmap.
5774 * @param iBit The bit to test.
5775 *
5776 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5777 * However, doing so will yield better performance as well as avoiding
5778 * traps accessing the last bits in the bitmap.
5779 */
5780#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5781RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5782#else
5783DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5784{
5785 union { bool f; uint32_t u32; uint8_t u8; } rc;
5786# if RT_INLINE_ASM_USES_INTRIN
5787 rc.u32 = _bittest((long *)pvBitmap, iBit);
5788# elif RT_INLINE_ASM_GNU_STYLE
5789
5790 __asm__ __volatile__("btl %2, %1\n\t"
5791 "setc %b0\n\t"
5792 "andl $1, %0\n\t"
5793 : "=q" (rc.u32)
5794 : "m" (*(const volatile long RT_FAR *)pvBitmap)
5795 , "Ir" (iBit)
5796 : "memory"
5797 , "cc");
5798# else
5799 __asm
5800 {
5801 mov edx, [iBit]
5802# ifdef RT_ARCH_AMD64
5803 mov rax, [pvBitmap]
5804 bt [rax], edx
5805# else
5806 mov eax, [pvBitmap]
5807 bt [eax], edx
5808# endif
5809 setc al
5810 and eax, 1
5811 mov [rc.u32], eax
5812 }
5813# endif
5814 return rc.f;
5815}
5816#endif
5817
5818
5819/**
5820 * Clears a bit range within a bitmap.
5821 *
5822 * @param pvBitmap Pointer to the bitmap.
5823 * @param iBitStart The First bit to clear.
5824 * @param iBitEnd The first bit not to clear.
5825 */
5826DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
5827{
5828 if (iBitStart < iBitEnd)
5829 {
5830 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
5831 int32_t iStart = iBitStart & ~31;
5832 int32_t iEnd = iBitEnd & ~31;
5833 if (iStart == iEnd)
5834 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
5835 else
5836 {
5837 /* bits in first dword. */
5838 if (iBitStart & 31)
5839 {
5840 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
5841 pu32++;
5842 iBitStart = iStart + 32;
5843 }
5844
5845 /* whole dwords. */
5846 if (iBitStart != iEnd)
5847 ASMMemZero32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3);
5848
5849 /* bits in last dword. */
5850 if (iBitEnd & 31)
5851 {
5852 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5853 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
5854 }
5855 }
5856 }
5857}
5858
5859
5860/**
5861 * Sets a bit range within a bitmap.
5862 *
5863 * @param pvBitmap Pointer to the bitmap.
5864 * @param iBitStart The First bit to set.
5865 * @param iBitEnd The first bit not to set.
5866 */
5867DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
5868{
5869 if (iBitStart < iBitEnd)
5870 {
5871 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
5872 int32_t iStart = iBitStart & ~31;
5873 int32_t iEnd = iBitEnd & ~31;
5874 if (iStart == iEnd)
5875 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
5876 else
5877 {
5878 /* bits in first dword. */
5879 if (iBitStart & 31)
5880 {
5881 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
5882 pu32++;
5883 iBitStart = iStart + 32;
5884 }
5885
5886 /* whole dword. */
5887 if (iBitStart != iEnd)
5888 ASMMemFill32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3, ~UINT32_C(0));
5889
5890 /* bits in last dword. */
5891 if (iBitEnd & 31)
5892 {
5893 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
5894 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
5895 }
5896 }
5897 }
5898}
5899
5900
5901/**
5902 * Finds the first clear bit in a bitmap.
5903 *
5904 * @returns Index of the first zero bit.
5905 * @returns -1 if no clear bit was found.
5906 * @param pvBitmap Pointer to the bitmap.
5907 * @param cBits The number of bits in the bitmap. Multiple of 32.
5908 */
5909#if RT_INLINE_ASM_EXTERNAL
5910DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
5911#else
5912DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
5913{
5914 if (cBits)
5915 {
5916 int32_t iBit;
5917# if RT_INLINE_ASM_GNU_STYLE
5918 RTCCUINTREG uEAX, uECX, uEDI;
5919 cBits = RT_ALIGN_32(cBits, 32);
5920 __asm__ __volatile__("repe; scasl\n\t"
5921 "je 1f\n\t"
5922# ifdef RT_ARCH_AMD64
5923 "lea -4(%%rdi), %%rdi\n\t"
5924 "xorl (%%rdi), %%eax\n\t"
5925 "subq %5, %%rdi\n\t"
5926# else
5927 "lea -4(%%edi), %%edi\n\t"
5928 "xorl (%%edi), %%eax\n\t"
5929 "subl %5, %%edi\n\t"
5930# endif
5931 "shll $3, %%edi\n\t"
5932 "bsfl %%eax, %%edx\n\t"
5933 "addl %%edi, %%edx\n\t"
5934 "1:\t\n"
5935 : "=d" (iBit)
5936 , "=&c" (uECX)
5937 , "=&D" (uEDI)
5938 , "=&a" (uEAX)
5939 : "0" (0xffffffff)
5940 , "mr" (pvBitmap)
5941 , "1" (cBits >> 5)
5942 , "2" (pvBitmap)
5943 , "3" (0xffffffff)
5944 : "cc");
5945# else
5946 cBits = RT_ALIGN_32(cBits, 32);
5947 __asm
5948 {
5949# ifdef RT_ARCH_AMD64
5950 mov rdi, [pvBitmap]
5951 mov rbx, rdi
5952# else
5953 mov edi, [pvBitmap]
5954 mov ebx, edi
5955# endif
5956 mov edx, 0ffffffffh
5957 mov eax, edx
5958 mov ecx, [cBits]
5959 shr ecx, 5
5960 repe scasd
5961 je done
5962
5963# ifdef RT_ARCH_AMD64
5964 lea rdi, [rdi - 4]
5965 xor eax, [rdi]
5966 sub rdi, rbx
5967# else
5968 lea edi, [edi - 4]
5969 xor eax, [edi]
5970 sub edi, ebx
5971# endif
5972 shl edi, 3
5973 bsf edx, eax
5974 add edx, edi
5975 done:
5976 mov [iBit], edx
5977 }
5978# endif
5979 return iBit;
5980 }
5981 return -1;
5982}
5983#endif
5984
5985
5986/**
5987 * Finds the next clear bit in a bitmap.
5988 *
5989 * @returns Index of the first zero bit.
5990 * @returns -1 if no clear bit was found.
5991 * @param pvBitmap Pointer to the bitmap.
5992 * @param cBits The number of bits in the bitmap. Multiple of 32.
5993 * @param iBitPrev The bit returned from the last search.
5994 * The search will start at iBitPrev + 1.
5995 */
5996#if RT_INLINE_ASM_EXTERNAL
5997DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
5998#else
5999DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6000{
6001 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6002 int iBit = ++iBitPrev & 31;
6003 if (iBit)
6004 {
6005 /*
6006 * Inspect the 32-bit word containing the unaligned bit.
6007 */
6008 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6009
6010# if RT_INLINE_ASM_USES_INTRIN
6011 unsigned long ulBit = 0;
6012 if (_BitScanForward(&ulBit, u32))
6013 return ulBit + iBitPrev;
6014# else
6015# if RT_INLINE_ASM_GNU_STYLE
6016 __asm__ __volatile__("bsf %1, %0\n\t"
6017 "jnz 1f\n\t"
6018 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
6019 "1:\n\t"
6020 : "=r" (iBit)
6021 : "r" (u32)
6022 : "cc");
6023# else
6024 __asm
6025 {
6026 mov edx, [u32]
6027 bsf eax, edx
6028 jnz done
6029 mov eax, 0ffffffffh
6030 done:
6031 mov [iBit], eax
6032 }
6033# endif
6034 if (iBit >= 0)
6035 return iBit + (int)iBitPrev;
6036# endif
6037
6038 /*
6039 * Skip ahead and see if there is anything left to search.
6040 */
6041 iBitPrev |= 31;
6042 iBitPrev++;
6043 if (cBits <= (uint32_t)iBitPrev)
6044 return -1;
6045 }
6046
6047 /*
6048 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6049 */
6050 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6051 if (iBit >= 0)
6052 iBit += iBitPrev;
6053 return iBit;
6054}
6055#endif
6056
6057
6058/**
6059 * Finds the first set bit in a bitmap.
6060 *
6061 * @returns Index of the first set bit.
6062 * @returns -1 if no clear bit was found.
6063 * @param pvBitmap Pointer to the bitmap.
6064 * @param cBits The number of bits in the bitmap. Multiple of 32.
6065 */
6066#if RT_INLINE_ASM_EXTERNAL
6067DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6068#else
6069DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6070{
6071 if (cBits)
6072 {
6073 int32_t iBit;
6074# if RT_INLINE_ASM_GNU_STYLE
6075 RTCCUINTREG uEAX, uECX, uEDI;
6076 cBits = RT_ALIGN_32(cBits, 32);
6077 __asm__ __volatile__("repe; scasl\n\t"
6078 "je 1f\n\t"
6079# ifdef RT_ARCH_AMD64
6080 "lea -4(%%rdi), %%rdi\n\t"
6081 "movl (%%rdi), %%eax\n\t"
6082 "subq %5, %%rdi\n\t"
6083# else
6084 "lea -4(%%edi), %%edi\n\t"
6085 "movl (%%edi), %%eax\n\t"
6086 "subl %5, %%edi\n\t"
6087# endif
6088 "shll $3, %%edi\n\t"
6089 "bsfl %%eax, %%edx\n\t"
6090 "addl %%edi, %%edx\n\t"
6091 "1:\t\n"
6092 : "=d" (iBit)
6093 , "=&c" (uECX)
6094 , "=&D" (uEDI)
6095 , "=&a" (uEAX)
6096 : "0" (0xffffffff)
6097 , "mr" (pvBitmap)
6098 , "1" (cBits >> 5)
6099 , "2" (pvBitmap)
6100 , "3" (0)
6101 : "cc");
6102# else
6103 cBits = RT_ALIGN_32(cBits, 32);
6104 __asm
6105 {
6106# ifdef RT_ARCH_AMD64
6107 mov rdi, [pvBitmap]
6108 mov rbx, rdi
6109# else
6110 mov edi, [pvBitmap]
6111 mov ebx, edi
6112# endif
6113 mov edx, 0ffffffffh
6114 xor eax, eax
6115 mov ecx, [cBits]
6116 shr ecx, 5
6117 repe scasd
6118 je done
6119# ifdef RT_ARCH_AMD64
6120 lea rdi, [rdi - 4]
6121 mov eax, [rdi]
6122 sub rdi, rbx
6123# else
6124 lea edi, [edi - 4]
6125 mov eax, [edi]
6126 sub edi, ebx
6127# endif
6128 shl edi, 3
6129 bsf edx, eax
6130 add edx, edi
6131 done:
6132 mov [iBit], edx
6133 }
6134# endif
6135 return iBit;
6136 }
6137 return -1;
6138}
6139#endif
6140
6141
6142/**
6143 * Finds the next set bit in a bitmap.
6144 *
6145 * @returns Index of the next set bit.
6146 * @returns -1 if no set bit was found.
6147 * @param pvBitmap Pointer to the bitmap.
6148 * @param cBits The number of bits in the bitmap. Multiple of 32.
6149 * @param iBitPrev The bit returned from the last search.
6150 * The search will start at iBitPrev + 1.
6151 */
6152#if RT_INLINE_ASM_EXTERNAL
6153DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6154#else
6155DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6156{
6157 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6158 int iBit = ++iBitPrev & 31;
6159 if (iBit)
6160 {
6161 /*
6162 * Inspect the 32-bit word containing the unaligned bit.
6163 */
6164 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6165
6166# if RT_INLINE_ASM_USES_INTRIN
6167 unsigned long ulBit = 0;
6168 if (_BitScanForward(&ulBit, u32))
6169 return ulBit + iBitPrev;
6170# else
6171# if RT_INLINE_ASM_GNU_STYLE
6172 __asm__ __volatile__("bsf %1, %0\n\t"
6173 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
6174 "movl $-1, %0\n\t"
6175 "1:\n\t"
6176 : "=r" (iBit)
6177 : "r" (u32)
6178 : "cc");
6179# else
6180 __asm
6181 {
6182 mov edx, [u32]
6183 bsf eax, edx
6184 jnz done
6185 mov eax, 0ffffffffh
6186 done:
6187 mov [iBit], eax
6188 }
6189# endif
6190 if (iBit >= 0)
6191 return iBit + (int)iBitPrev;
6192# endif
6193
6194 /*
6195 * Skip ahead and see if there is anything left to search.
6196 */
6197 iBitPrev |= 31;
6198 iBitPrev++;
6199 if (cBits <= (uint32_t)iBitPrev)
6200 return -1;
6201 }
6202
6203 /*
6204 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6205 */
6206 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6207 if (iBit >= 0)
6208 iBit += iBitPrev;
6209 return iBit;
6210}
6211#endif
6212
6213
6214/**
6215 * Finds the first bit which is set in the given 32-bit integer.
6216 * Bits are numbered from 1 (least significant) to 32.
6217 *
6218 * @returns index [1..32] of the first set bit.
6219 * @returns 0 if all bits are cleared.
6220 * @param u32 Integer to search for set bits.
6221 * @remarks Similar to ffs() in BSD.
6222 */
6223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6224RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
6225#else
6226DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
6227{
6228# if RT_INLINE_ASM_USES_INTRIN
6229 unsigned long iBit;
6230 if (_BitScanForward(&iBit, u32))
6231 iBit++;
6232 else
6233 iBit = 0;
6234# elif RT_INLINE_ASM_GNU_STYLE
6235 uint32_t iBit;
6236 __asm__ __volatile__("bsf %1, %0\n\t"
6237 "jnz 1f\n\t"
6238 "xorl %0, %0\n\t"
6239 "jmp 2f\n"
6240 "1:\n\t"
6241 "incl %0\n"
6242 "2:\n\t"
6243 : "=r" (iBit)
6244 : "rm" (u32)
6245 : "cc");
6246# else
6247 uint32_t iBit;
6248 _asm
6249 {
6250 bsf eax, [u32]
6251 jnz found
6252 xor eax, eax
6253 jmp done
6254 found:
6255 inc eax
6256 done:
6257 mov [iBit], eax
6258 }
6259# endif
6260 return iBit;
6261}
6262#endif
6263
6264
6265/**
6266 * Finds the first bit which is set in the given 32-bit integer.
6267 * Bits are numbered from 1 (least significant) to 32.
6268 *
6269 * @returns index [1..32] of the first set bit.
6270 * @returns 0 if all bits are cleared.
6271 * @param i32 Integer to search for set bits.
6272 * @remark Similar to ffs() in BSD.
6273 */
6274DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
6275{
6276 return ASMBitFirstSetU32((uint32_t)i32);
6277}
6278
6279
6280/**
6281 * Finds the first bit which is set in the given 64-bit integer.
6282 *
6283 * Bits are numbered from 1 (least significant) to 64.
6284 *
6285 * @returns index [1..64] of the first set bit.
6286 * @returns 0 if all bits are cleared.
6287 * @param u64 Integer to search for set bits.
6288 * @remarks Similar to ffs() in BSD.
6289 */
6290#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6291RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
6292#else
6293DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
6294{
6295# if RT_INLINE_ASM_USES_INTRIN
6296 unsigned long iBit;
6297# if ARCH_BITS == 64
6298 if (_BitScanForward64(&iBit, u64))
6299 iBit++;
6300 else
6301 iBit = 0;
6302# else
6303 if (_BitScanForward(&iBit, (uint32_t)u64))
6304 iBit++;
6305 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
6306 iBit += 33;
6307 else
6308 iBit = 0;
6309# endif
6310# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
6311 uint64_t iBit;
6312 __asm__ __volatile__("bsfq %1, %0\n\t"
6313 "jnz 1f\n\t"
6314 "xorl %k0, %k0\n\t"
6315 "jmp 2f\n"
6316 "1:\n\t"
6317 "incl %k0\n"
6318 "2:\n\t"
6319 : "=r" (iBit)
6320 : "rm" (u64)
6321 : "cc");
6322# else
6323 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
6324 if (!iBit)
6325 {
6326 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
6327 if (iBit)
6328 iBit += 32;
6329 }
6330# endif
6331 return (unsigned)iBit;
6332}
6333#endif
6334
6335
6336/**
6337 * Finds the first bit which is set in the given 16-bit integer.
6338 *
6339 * Bits are numbered from 1 (least significant) to 16.
6340 *
6341 * @returns index [1..16] of the first set bit.
6342 * @returns 0 if all bits are cleared.
6343 * @param u16 Integer to search for set bits.
6344 * @remarks For 16-bit bs3kit code.
6345 */
6346#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6347RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
6348#else
6349DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
6350{
6351 return ASMBitFirstSetU32((uint32_t)u16);
6352}
6353#endif
6354
6355
6356/**
6357 * Finds the last bit which is set in the given 32-bit integer.
6358 * Bits are numbered from 1 (least significant) to 32.
6359 *
6360 * @returns index [1..32] of the last set bit.
6361 * @returns 0 if all bits are cleared.
6362 * @param u32 Integer to search for set bits.
6363 * @remark Similar to fls() in BSD.
6364 */
6365#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6366RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
6367#else
6368DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
6369{
6370# if RT_INLINE_ASM_USES_INTRIN
6371 unsigned long iBit;
6372 if (_BitScanReverse(&iBit, u32))
6373 iBit++;
6374 else
6375 iBit = 0;
6376# elif RT_INLINE_ASM_GNU_STYLE
6377 uint32_t iBit;
6378 __asm__ __volatile__("bsrl %1, %0\n\t"
6379 "jnz 1f\n\t"
6380 "xorl %0, %0\n\t"
6381 "jmp 2f\n"
6382 "1:\n\t"
6383 "incl %0\n"
6384 "2:\n\t"
6385 : "=r" (iBit)
6386 : "rm" (u32)
6387 : "cc");
6388# else
6389 uint32_t iBit;
6390 _asm
6391 {
6392 bsr eax, [u32]
6393 jnz found
6394 xor eax, eax
6395 jmp done
6396 found:
6397 inc eax
6398 done:
6399 mov [iBit], eax
6400 }
6401# endif
6402 return iBit;
6403}
6404#endif
6405
6406
6407/**
6408 * Finds the last bit which is set in the given 32-bit integer.
6409 * Bits are numbered from 1 (least significant) to 32.
6410 *
6411 * @returns index [1..32] of the last set bit.
6412 * @returns 0 if all bits are cleared.
6413 * @param i32 Integer to search for set bits.
6414 * @remark Similar to fls() in BSD.
6415 */
6416DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
6417{
6418 return ASMBitLastSetU32((uint32_t)i32);
6419}
6420
6421
6422/**
6423 * Finds the last bit which is set in the given 64-bit integer.
6424 *
6425 * Bits are numbered from 1 (least significant) to 64.
6426 *
6427 * @returns index [1..64] of the last set bit.
6428 * @returns 0 if all bits are cleared.
6429 * @param u64 Integer to search for set bits.
6430 * @remark Similar to fls() in BSD.
6431 */
6432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6433RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
6434#else
6435DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
6436{
6437# if RT_INLINE_ASM_USES_INTRIN
6438 unsigned long iBit;
6439# if ARCH_BITS == 64
6440 if (_BitScanReverse64(&iBit, u64))
6441 iBit++;
6442 else
6443 iBit = 0;
6444# else
6445 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
6446 iBit += 33;
6447 else if (_BitScanReverse(&iBit, (uint32_t)u64))
6448 iBit++;
6449 else
6450 iBit = 0;
6451# endif
6452# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
6453 uint64_t iBit;
6454 __asm__ __volatile__("bsrq %1, %0\n\t"
6455 "jnz 1f\n\t"
6456 "xorl %k0, %k0\n\t"
6457 "jmp 2f\n"
6458 "1:\n\t"
6459 "incl %k0\n"
6460 "2:\n\t"
6461 : "=r" (iBit)
6462 : "rm" (u64)
6463 : "cc");
6464# else
6465 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
6466 if (iBit)
6467 iBit += 32;
6468 else
6469 iBit = ASMBitLastSetU32((uint32_t)u64);
6470#endif
6471 return (unsigned)iBit;
6472}
6473#endif
6474
6475
6476/**
6477 * Finds the last bit which is set in the given 16-bit integer.
6478 *
6479 * Bits are numbered from 1 (least significant) to 16.
6480 *
6481 * @returns index [1..16] of the last set bit.
6482 * @returns 0 if all bits are cleared.
6483 * @param u16 Integer to search for set bits.
6484 * @remarks For 16-bit bs3kit code.
6485 */
6486#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6487RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
6488#else
6489DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
6490{
6491 return ASMBitLastSetU32((uint32_t)u16);
6492}
6493#endif
6494
6495
6496/**
6497 * Reverse the byte order of the given 16-bit integer.
6498 *
6499 * @returns Revert
6500 * @param u16 16-bit integer value.
6501 */
6502#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6503RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
6504#else
6505DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
6506{
6507# if RT_INLINE_ASM_USES_INTRIN
6508 u16 = _byteswap_ushort(u16);
6509# elif RT_INLINE_ASM_GNU_STYLE
6510 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
6511# else
6512 _asm
6513 {
6514 mov ax, [u16]
6515 ror ax, 8
6516 mov [u16], ax
6517 }
6518# endif
6519 return u16;
6520}
6521#endif
6522
6523
6524/**
6525 * Reverse the byte order of the given 32-bit integer.
6526 *
6527 * @returns Revert
6528 * @param u32 32-bit integer value.
6529 */
6530#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6531RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
6532#else
6533DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
6534{
6535# if RT_INLINE_ASM_USES_INTRIN
6536 u32 = _byteswap_ulong(u32);
6537# elif RT_INLINE_ASM_GNU_STYLE
6538 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6539# else
6540 _asm
6541 {
6542 mov eax, [u32]
6543 bswap eax
6544 mov [u32], eax
6545 }
6546# endif
6547 return u32;
6548}
6549#endif
6550
6551
6552/**
6553 * Reverse the byte order of the given 64-bit integer.
6554 *
6555 * @returns Revert
6556 * @param u64 64-bit integer value.
6557 */
6558DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
6559{
6560#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6561 u64 = _byteswap_uint64(u64);
6562#else
6563 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6564 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6565#endif
6566 return u64;
6567}
6568
6569
6570/**
6571 * Rotate 32-bit unsigned value to the left by @a cShift.
6572 *
6573 * @returns Rotated value.
6574 * @param u32 The value to rotate.
6575 * @param cShift How many bits to rotate by.
6576 */
6577#ifdef __WATCOMC__
6578RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
6579#else
6580DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
6581{
6582# if RT_INLINE_ASM_USES_INTRIN
6583 return _rotl(u32, cShift);
6584# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
6585 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
6586 return u32;
6587# else
6588 cShift &= 31;
6589 return (u32 << cShift) | (u32 >> (32 - cShift));
6590# endif
6591}
6592#endif
6593
6594
6595/**
6596 * Rotate 32-bit unsigned value to the right by @a cShift.
6597 *
6598 * @returns Rotated value.
6599 * @param u32 The value to rotate.
6600 * @param cShift How many bits to rotate by.
6601 */
6602#ifdef __WATCOMC__
6603RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
6604#else
6605DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
6606{
6607# if RT_INLINE_ASM_USES_INTRIN
6608 return _rotr(u32, cShift);
6609# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
6610 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
6611 return u32;
6612# else
6613 cShift &= 31;
6614 return (u32 >> cShift) | (u32 << (32 - cShift));
6615# endif
6616}
6617#endif
6618
6619
6620/**
6621 * Rotate 64-bit unsigned value to the left by @a cShift.
6622 *
6623 * @returns Rotated value.
6624 * @param u64 The value to rotate.
6625 * @param cShift How many bits to rotate by.
6626 */
6627DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
6628{
6629#if RT_INLINE_ASM_USES_INTRIN
6630 return _rotl64(u64, cShift);
6631#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6632 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
6633 return u64;
6634#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
6635 uint32_t uSpill;
6636 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
6637 "jz 1f\n\t"
6638 "xchgl %%eax, %%edx\n\t"
6639 "1:\n\t"
6640 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
6641 "jz 2f\n\t"
6642 "movl %%edx, %2\n\t" /* save the hi value in %3. */
6643 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
6644 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
6645 "2:\n\t" /* } */
6646 : "=A" (u64)
6647 , "=c" (cShift)
6648 , "=r" (uSpill)
6649 : "0" (u64)
6650 , "1" (cShift)
6651 : "cc");
6652 return u64;
6653#else
6654 cShift &= 63;
6655 return (u64 << cShift) | (u64 >> (64 - cShift));
6656#endif
6657}
6658
6659
6660/**
6661 * Rotate 64-bit unsigned value to the right by @a cShift.
6662 *
6663 * @returns Rotated value.
6664 * @param u64 The value to rotate.
6665 * @param cShift How many bits to rotate by.
6666 */
6667DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
6668{
6669#if RT_INLINE_ASM_USES_INTRIN
6670 return _rotr64(u64, cShift);
6671#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6672 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
6673 return u64;
6674#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
6675 uint32_t uSpill;
6676 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
6677 "jz 1f\n\t"
6678 "xchgl %%eax, %%edx\n\t"
6679 "1:\n\t"
6680 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
6681 "jz 2f\n\t"
6682 "movl %%edx, %2\n\t" /* save the hi value in %3. */
6683 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
6684 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
6685 "2:\n\t" /* } */
6686 : "=A" (u64)
6687 , "=c" (cShift)
6688 , "=r" (uSpill)
6689 : "0" (u64)
6690 , "1" (cShift)
6691 : "cc");
6692 return u64;
6693#else
6694 cShift &= 63;
6695 return (u64 >> cShift) | (u64 << (64 - cShift));
6696#endif
6697}
6698
6699/** @} */
6700
6701
6702/** @} */
6703
6704/*
6705 * Include #pragma aux definitions for Watcom C/C++.
6706 */
6707#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
6708# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
6709# undef IPRT_INCLUDED_asm_watcom_x86_16_h
6710# include "asm-watcom-x86-16.h"
6711#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
6712# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
6713# undef IPRT_INCLUDED_asm_watcom_x86_32_h
6714# include "asm-watcom-x86-32.h"
6715#endif
6716
6717#endif /* !IPRT_INCLUDED_asm_h */
6718
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette