VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 87177

Last change on this file since 87177 was 87177, checked in by vboxsync, 4 years ago

iprt/asm.h: add 'cc' to clobber list for x86/amd64 asm too (pedantic as it's implicitly done). Style.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 191.7 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2020 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46/* Emit the intrinsics at all optimization levels. */
47# include <iprt/sanitized/intrin.h>
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(_BitScanForward)
54# pragma intrinsic(_BitScanReverse)
55# pragma intrinsic(_bittest)
56# pragma intrinsic(_bittestandset)
57# pragma intrinsic(_bittestandreset)
58# pragma intrinsic(_bittestandcomplement)
59# pragma intrinsic(_byteswap_ushort)
60# pragma intrinsic(_byteswap_ulong)
61# pragma intrinsic(_interlockedbittestandset)
62# pragma intrinsic(_interlockedbittestandreset)
63# pragma intrinsic(_InterlockedAnd)
64# pragma intrinsic(_InterlockedOr)
65# pragma intrinsic(_InterlockedIncrement)
66# pragma intrinsic(_InterlockedDecrement)
67# pragma intrinsic(_InterlockedExchange)
68# pragma intrinsic(_InterlockedExchangeAdd)
69# pragma intrinsic(_InterlockedCompareExchange)
70# pragma intrinsic(_InterlockedCompareExchange64)
71# pragma intrinsic(_rotl)
72# pragma intrinsic(_rotr)
73# pragma intrinsic(_rotl64)
74# pragma intrinsic(_rotr64)
75# ifdef RT_ARCH_AMD64
76# pragma intrinsic(__stosq)
77# pragma intrinsic(_byteswap_uint64)
78# pragma intrinsic(_InterlockedExchange64)
79# pragma intrinsic(_InterlockedExchangeAdd64)
80# pragma intrinsic(_InterlockedAnd64)
81# pragma intrinsic(_InterlockedOr64)
82# pragma intrinsic(_InterlockedIncrement64)
83# pragma intrinsic(_InterlockedDecrement64)
84# endif
85#endif
86
87/*
88 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
89 */
90#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
91# include "asm-watcom-x86-16.h"
92#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
93# include "asm-watcom-x86-32.h"
94#endif
95
96
97/** @defgroup grp_rt_asm ASM - Assembly Routines
98 * @ingroup grp_rt
99 *
100 * @remarks The difference between ordered and unordered atomic operations are that
101 * the former will complete outstanding reads and writes before continuing
102 * while the latter doesn't make any promises about the order. Ordered
103 * operations doesn't, it seems, make any 100% promise wrt to whether
104 * the operation will complete before any subsequent memory access.
105 * (please, correct if wrong.)
106 *
107 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
108 * are unordered (note the Uo).
109 *
110 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
111 * or even optimize assembler instructions away. For instance, in the following code
112 * the second rdmsr instruction is optimized away because gcc treats that instruction
113 * as deterministic:
114 *
115 * @code
116 * static inline uint64_t rdmsr_low(int idx)
117 * {
118 * uint32_t low;
119 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
120 * }
121 * ...
122 * uint32_t msr1 = rdmsr_low(1);
123 * foo(msr1);
124 * msr1 = rdmsr_low(1);
125 * bar(msr1);
126 * @endcode
127 *
128 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
129 * use the result of the first call as input parameter for bar() as well. For rdmsr this
130 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
131 * machine status information in general.
132 *
133 * @{
134 */
135
136
137/** @def RT_INLINE_ASM_GCC_4_3_X_X86
138 * Used to work around some 4.3.x register allocation issues in this version of
139 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
140 * definitely not for 5.x */
141#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
142# define RT_INLINE_ASM_GCC_4_3_X_X86 1
143#else
144# define RT_INLINE_ASM_GCC_4_3_X_X86 0
145#endif
146
147/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
148 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
149 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
150 * mode, x86.
151 *
152 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
153 * when in PIC mode on x86.
154 */
155#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
156# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
157# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
158# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
159# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
160# elif ( (defined(PIC) || defined(__PIC__)) \
161 && defined(RT_ARCH_X86) \
162 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
163 || defined(RT_OS_DARWIN)) )
164# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
165# else
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
167# endif
168#endif
169
170
171/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
172 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
173#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
174# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
175#else
176# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
177#endif
178
179
180/** @def ASMReturnAddress
181 * Gets the return address of the current (or calling if you like) function or method.
182 */
183#ifdef _MSC_VER
184# ifdef __cplusplus
185extern "C"
186# endif
187void * _ReturnAddress(void);
188# pragma intrinsic(_ReturnAddress)
189# define ASMReturnAddress() _ReturnAddress()
190#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
191# define ASMReturnAddress() __builtin_return_address(0)
192#elif defined(__WATCOMC__)
193# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
194#else
195# error "Unsupported compiler."
196#endif
197
198
199/**
200 * Compiler memory barrier.
201 *
202 * Ensure that the compiler does not use any cached (register/tmp stack) memory
203 * values or any outstanding writes when returning from this function.
204 *
205 * This function must be used if non-volatile data is modified by a
206 * device or the VMM. Typical cases are port access, MMIO access,
207 * trapping instruction, etc.
208 */
209#if RT_INLINE_ASM_GNU_STYLE
210# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
211#elif RT_INLINE_ASM_USES_INTRIN
212# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
213#elif defined(__WATCOMC__)
214void ASMCompilerBarrier(void);
215#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
216DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
217{
218 __asm
219 {
220 }
221}
222#endif
223
224
225/** @def ASMBreakpoint
226 * Debugger Breakpoint.
227 * @deprecated Use RT_BREAKPOINT instead.
228 * @internal
229 */
230#define ASMBreakpoint() RT_BREAKPOINT()
231
232
233/**
234 * Spinloop hint for platforms that have these, empty function on the other
235 * platforms.
236 *
237 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
238 * spin locks.
239 */
240#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
241RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
242#else
243DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
244{
245# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
246# if RT_INLINE_ASM_GNU_STYLE
247 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
248# else
249 __asm {
250 _emit 0f3h
251 _emit 090h
252 }
253# endif
254
255# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
256 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
257
258# else
259 /* dummy */
260# endif
261}
262#endif
263
264
265/**
266 * Atomically Exchange an unsigned 8-bit value, ordered.
267 *
268 * @returns Current *pu8 value
269 * @param pu8 Pointer to the 8-bit variable to update.
270 * @param u8 The 8-bit value to assign to *pu8.
271 */
272#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
273RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
274#else
275DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
276{
277# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
278# if RT_INLINE_ASM_GNU_STYLE
279 __asm__ __volatile__("xchgb %0, %1\n\t"
280 : "=m" (*pu8)
281 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
282 : "1" (u8)
283 , "m" (*pu8));
284# else
285 __asm
286 {
287# ifdef RT_ARCH_AMD64
288 mov rdx, [pu8]
289 mov al, [u8]
290 xchg [rdx], al
291 mov [u8], al
292# else
293 mov edx, [pu8]
294 mov al, [u8]
295 xchg [edx], al
296 mov [u8], al
297# endif
298 }
299# endif
300 return u8;
301
302# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
303 uint32_t uOld;
304 uint32_t rcSpill;
305 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU8_%=:\n\t"
306 "dmb sy\n\t"
307# if defined(RT_ARCH_ARM64)
308 "ldaxrb %w0, [%3]\n\t"
309 "stlxrb %w1, %w2, [%3]\n\t"
310 "cbnz %w1, .Ltry_again_ASMAtomicXchgU8_%=\n\t"
311# else
312 "ldrexb %0, [%3]\n\t" /* ARMv6+ */
313 "strexb %1, %2, [%3]\n\t"
314 "cmp %1, #0\n\t"
315 "bne .Ltry_again_ASMAtomicXchgU8_%=\n\t"
316# endif
317 : "=&r" (uOld),
318 "=&r" (rcSpill)
319 : "r" ((uint32_t)u8),
320 "r" (pu8)
321 : "memory",
322 "cc");
323 return (uint8_t)uOld;
324
325# else
326# error "Port me"
327# endif
328}
329#endif
330
331
332/**
333 * Atomically Exchange a signed 8-bit value, ordered.
334 *
335 * @returns Current *pu8 value
336 * @param pi8 Pointer to the 8-bit variable to update.
337 * @param i8 The 8-bit value to assign to *pi8.
338 */
339DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
340{
341 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
342}
343
344
345/**
346 * Atomically Exchange a bool value, ordered.
347 *
348 * @returns Current *pf value
349 * @param pf Pointer to the 8-bit variable to update.
350 * @param f The 8-bit value to assign to *pi8.
351 */
352DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
353{
354#ifdef _MSC_VER
355 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
356#else
357 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
358#endif
359}
360
361
362/**
363 * Atomically Exchange an unsigned 16-bit value, ordered.
364 *
365 * @returns Current *pu16 value
366 * @param pu16 Pointer to the 16-bit variable to update.
367 * @param u16 The 16-bit value to assign to *pu16.
368 */
369#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
370RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
371#else
372DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
373{
374# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
375# if RT_INLINE_ASM_GNU_STYLE
376 __asm__ __volatile__("xchgw %0, %1\n\t"
377 : "=m" (*pu16)
378 , "=r" (u16)
379 : "1" (u16)
380 , "m" (*pu16));
381# else
382 __asm
383 {
384# ifdef RT_ARCH_AMD64
385 mov rdx, [pu16]
386 mov ax, [u16]
387 xchg [rdx], ax
388 mov [u16], ax
389# else
390 mov edx, [pu16]
391 mov ax, [u16]
392 xchg [edx], ax
393 mov [u16], ax
394# endif
395 }
396# endif
397 return u16;
398
399# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
400 uint32_t uOld;
401 uint32_t rcSpill;
402 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU16_%=:\n\t"
403 "dmb sy\n\t"
404# if defined(RT_ARCH_ARM64)
405 "ldaxrh %w0, [%3]\n\t"
406 "stlxrh %w1, %w2, [%3]\n\t"
407 "cbnz %w1, .Ltry_again_ASMAtomicXchgU16_%=\n\t"
408# else
409 "ldrexh %0, [%3]\n\t" /* ARMv6+ */
410 "strexh %1, %2, [%3]\n\t"
411 "cmp %1, #0\n\t"
412 "bne .Ltry_again_ASMAtomicXchgU16_%=\n\t"
413# endif
414 : "=&r" (uOld),
415 "=&r" (rcSpill)
416 : "r" ((uint32_t)u16),
417 "r" (pu16)
418 : "memory",
419 "cc");
420 return (uint16_t)uOld;
421
422# else
423# error "Port me"
424# endif
425}
426#endif
427
428
429/**
430 * Atomically Exchange a signed 16-bit value, ordered.
431 *
432 * @returns Current *pu16 value
433 * @param pi16 Pointer to the 16-bit variable to update.
434 * @param i16 The 16-bit value to assign to *pi16.
435 */
436DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
437{
438 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
439}
440
441
442/**
443 * Atomically Exchange an unsigned 32-bit value, ordered.
444 *
445 * @returns Current *pu32 value
446 * @param pu32 Pointer to the 32-bit variable to update.
447 * @param u32 The 32-bit value to assign to *pu32.
448 *
449 * @remarks Does not work on 286 and earlier.
450 */
451#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
452RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
453#else
454DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
455{
456# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
457# if RT_INLINE_ASM_GNU_STYLE
458 __asm__ __volatile__("xchgl %0, %1\n\t"
459 : "=m" (*pu32)
460 , "=r" (u32)
461 : "1" (u32)
462 , "m" (*pu32));
463
464# elif RT_INLINE_ASM_USES_INTRIN
465 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
466
467# else
468 __asm
469 {
470# ifdef RT_ARCH_AMD64
471 mov rdx, [pu32]
472 mov eax, u32
473 xchg [rdx], eax
474 mov [u32], eax
475# else
476 mov edx, [pu32]
477 mov eax, u32
478 xchg [edx], eax
479 mov [u32], eax
480# endif
481 }
482# endif
483 return u32;
484
485# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
486 uint32_t uOld;
487 uint32_t rcSpill;
488 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU32_%=:\n\t"
489 "dmb sy\n\t"
490# if defined(RT_ARCH_ARM64)
491 "ldaxr %w0, [%3]\n\t"
492 "stlxr %w1, %w2, [%3]\n\t"
493 "cbnz %w1, .Ltry_again_ASMAtomicXchgU32_%=\n\t"
494# else
495 "ldrex %0, [%3]\n\t" /* ARMv6+ */
496 "strex %1, %2, [%3]\n\t"
497 "cmp %1, #0\n\t"
498 "bne .Ltry_again_ASMAtomicXchgU32_%=\n\t"
499# endif
500 : "=&r" (uOld),
501 "=&r" (rcSpill)
502 : "r" ((uint32_t)u32),
503 "r" (pu32)
504 : "memory",
505 "cc");
506 return (uint32_t)uOld;
507
508# else
509# error "Port me"
510# endif
511}
512#endif
513
514
515/**
516 * Atomically Exchange a signed 32-bit value, ordered.
517 *
518 * @returns Current *pu32 value
519 * @param pi32 Pointer to the 32-bit variable to update.
520 * @param i32 The 32-bit value to assign to *pi32.
521 */
522DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
523{
524 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
525}
526
527
528/**
529 * Atomically Exchange an unsigned 64-bit value, ordered.
530 *
531 * @returns Current *pu64 value
532 * @param pu64 Pointer to the 64-bit variable to update.
533 * @param u64 The 64-bit value to assign to *pu64.
534 *
535 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
536 */
537#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
538 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
539RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
540#else
541DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
542{
543# if defined(RT_ARCH_AMD64)
544# if RT_INLINE_ASM_USES_INTRIN
545 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
546
547# elif RT_INLINE_ASM_GNU_STYLE
548 __asm__ __volatile__("xchgq %0, %1\n\t"
549 : "=m" (*pu64)
550 , "=r" (u64)
551 : "1" (u64)
552 , "m" (*pu64));
553# else
554 __asm
555 {
556 mov rdx, [pu64]
557 mov rax, [u64]
558 xchg [rdx], rax
559 mov [u64], rax
560 }
561# endif
562
563# elif defined(RT_ARCH_X86)
564# if RT_INLINE_ASM_GNU_STYLE
565# if defined(PIC) || defined(__PIC__)
566 uint32_t u32EBX = (uint32_t)u64;
567 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
568 "xchgl %%ebx, %3\n\t"
569 "1:\n\t"
570 "lock; cmpxchg8b (%5)\n\t"
571 "jnz 1b\n\t"
572 "movl %3, %%ebx\n\t"
573 /*"xchgl %%esi, %5\n\t"*/
574 : "=A" (u64)
575 , "=m" (*pu64)
576 : "0" (*pu64)
577 , "m" ( u32EBX )
578 , "c" ( (uint32_t)(u64 >> 32) )
579 , "S" (pu64)
580 : "cc");
581# else /* !PIC */
582 __asm__ __volatile__("1:\n\t"
583 "lock; cmpxchg8b %1\n\t"
584 "jnz 1b\n\t"
585 : "=A" (u64)
586 , "=m" (*pu64)
587 : "0" (*pu64)
588 , "b" ( (uint32_t)u64 )
589 , "c" ( (uint32_t)(u64 >> 32) )
590 : "cc");
591# endif
592# else
593 __asm
594 {
595 mov ebx, dword ptr [u64]
596 mov ecx, dword ptr [u64 + 4]
597 mov edi, pu64
598 mov eax, dword ptr [edi]
599 mov edx, dword ptr [edi + 4]
600 retry:
601 lock cmpxchg8b [edi]
602 jnz retry
603 mov dword ptr [u64], eax
604 mov dword ptr [u64 + 4], edx
605 }
606# endif
607
608# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
609 uint32_t rcSpill;
610 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU64_%=:\n\t"
611 "dmb sy\n\t"
612# if defined(RT_ARCH_ARM64)
613 "ldaxr %0, [%3]\n\t"
614 "stlxr %w1, %2, [%3]\n\t"
615 "cbnz %w1, .Ltry_again_ASMAtomicXchgU64_%=\n\t"
616# else
617 "ldrexd %H0, [%3]\n\t" /* ARMv6+ */
618 "strexd %1, %H2, [%3]\n\t"
619 "cmp %1, #0\n\t"
620 "bne .Ltry_again_ASMAtomicXchgU64_%=\n\t"
621# endif
622 : "=&r" (u64),
623 "=&r" (rcSpill)
624 : "r" (u64),
625 "r" (pu64)
626 : "memory",
627 "cc");
628
629# else
630# error "Port me"
631# endif
632 return u64;
633}
634#endif
635
636
637/**
638 * Atomically Exchange an signed 64-bit value, ordered.
639 *
640 * @returns Current *pi64 value
641 * @param pi64 Pointer to the 64-bit variable to update.
642 * @param i64 The 64-bit value to assign to *pi64.
643 */
644DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
645{
646 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
647}
648
649
650/**
651 * Atomically Exchange a size_t value, ordered.
652 *
653 * @returns Current *ppv value
654 * @param puDst Pointer to the size_t variable to update.
655 * @param uNew The new value to assign to *puDst.
656 */
657DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
658{
659#if ARCH_BITS == 16
660 AssertCompile(sizeof(size_t) == 2);
661 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
662#elif ARCH_BITS == 32
663 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
664#elif ARCH_BITS == 64
665 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
666#else
667# error "ARCH_BITS is bogus"
668#endif
669}
670
671
672/**
673 * Atomically Exchange a pointer value, ordered.
674 *
675 * @returns Current *ppv value
676 * @param ppv Pointer to the pointer variable to update.
677 * @param pv The pointer value to assign to *ppv.
678 */
679DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
680{
681#if ARCH_BITS == 32 || ARCH_BITS == 16
682 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
683#elif ARCH_BITS == 64
684 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
685#else
686# error "ARCH_BITS is bogus"
687#endif
688}
689
690
691/**
692 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
693 *
694 * @returns Current *pv value
695 * @param ppv Pointer to the pointer variable to update.
696 * @param pv The pointer value to assign to *ppv.
697 * @param Type The type of *ppv, sans volatile.
698 */
699#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
700# define ASMAtomicXchgPtrT(ppv, pv, Type) \
701 __extension__ \
702 ({\
703 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
704 Type const pvTypeChecked = (pv); \
705 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
706 pvTypeCheckedRet; \
707 })
708#else
709# define ASMAtomicXchgPtrT(ppv, pv, Type) \
710 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
711#endif
712
713
714/**
715 * Atomically Exchange a raw-mode context pointer value, ordered.
716 *
717 * @returns Current *ppv value
718 * @param ppvRC Pointer to the pointer variable to update.
719 * @param pvRC The pointer value to assign to *ppv.
720 */
721DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
722{
723 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
724}
725
726
727/**
728 * Atomically Exchange a ring-0 pointer value, ordered.
729 *
730 * @returns Current *ppv value
731 * @param ppvR0 Pointer to the pointer variable to update.
732 * @param pvR0 The pointer value to assign to *ppv.
733 */
734DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
735{
736#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
737 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
738#elif R0_ARCH_BITS == 64
739 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
740#else
741# error "R0_ARCH_BITS is bogus"
742#endif
743}
744
745
746/**
747 * Atomically Exchange a ring-3 pointer value, ordered.
748 *
749 * @returns Current *ppv value
750 * @param ppvR3 Pointer to the pointer variable to update.
751 * @param pvR3 The pointer value to assign to *ppv.
752 */
753DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
754{
755#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
756 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
757#elif R3_ARCH_BITS == 64
758 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
759#else
760# error "R3_ARCH_BITS is bogus"
761#endif
762}
763
764
765/** @def ASMAtomicXchgHandle
766 * Atomically Exchange a typical IPRT handle value, ordered.
767 *
768 * @param ph Pointer to the value to update.
769 * @param hNew The new value to assigned to *pu.
770 * @param phRes Where to store the current *ph value.
771 *
772 * @remarks This doesn't currently work for all handles (like RTFILE).
773 */
774#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
775# define ASMAtomicXchgHandle(ph, hNew, phRes) \
776 do { \
777 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
778 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
779 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
780 } while (0)
781#elif HC_ARCH_BITS == 64
782# define ASMAtomicXchgHandle(ph, hNew, phRes) \
783 do { \
784 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
785 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
786 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
787 } while (0)
788#else
789# error HC_ARCH_BITS
790#endif
791
792
793/**
794 * Atomically Exchange a value which size might differ
795 * between platforms or compilers, ordered.
796 *
797 * @param pu Pointer to the variable to update.
798 * @param uNew The value to assign to *pu.
799 * @todo This is busted as its missing the result argument.
800 */
801#define ASMAtomicXchgSize(pu, uNew) \
802 do { \
803 switch (sizeof(*(pu))) { \
804 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
805 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
806 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
807 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
808 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
809 } \
810 } while (0)
811
812/**
813 * Atomically Exchange a value which size might differ
814 * between platforms or compilers, ordered.
815 *
816 * @param pu Pointer to the variable to update.
817 * @param uNew The value to assign to *pu.
818 * @param puRes Where to store the current *pu value.
819 */
820#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
821 do { \
822 switch (sizeof(*(pu))) { \
823 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
824 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
825 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
826 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
827 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
828 } \
829 } while (0)
830
831
832
833/**
834 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
835 *
836 * @returns true if xchg was done.
837 * @returns false if xchg wasn't done.
838 *
839 * @param pu8 Pointer to the value to update.
840 * @param u8New The new value to assigned to *pu8.
841 * @param u8Old The old value to *pu8 compare with.
842 *
843 * @remarks x86: Requires a 486 or later.
844 */
845#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
846RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
847#else
848DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
849{
850# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
851 uint8_t u8Ret;
852 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
853 "setz %1\n\t"
854 : "=m" (*pu8)
855 , "=qm" (u8Ret)
856 , "=a" (u8Old)
857 : "q" (u8New)
858 , "2" (u8Old)
859 , "m" (*pu8)
860 : "cc");
861 return (bool)u8Ret;
862
863# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
864 union { uint32_t u; bool f; } fXchg;
865 uint32_t u32Spill;
866 uint32_t rcSpill;
867 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
868 "dmb sy\n\t"
869# if defined(RT_ARCH_ARM64)
870 "ldaxrb %w0, [%5]\n\t"
871 "cmp %w0, %w3\n\t"
872 "bne 1f\n\t" /* stop here if not equal */
873 "stlxrb %w1, %w4, [%5]\n\t"
874 "cbnz %w1, .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
875 "mov %w2, #1\n\t"
876# else
877 "ldrexb %0, [%5]\n\t"
878 "teq %0, %3\n\t"
879 "strexbeq %1, %4, [%5]\n\t"
880 "bne 1f\n\t" /* stop here if not equal */
881 "cmp %1, #0\n\t"
882 "bne .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
883 "mov %2, #1\n\t"
884# endif
885 "1:\n\t"
886 : "=&r" (u32Spill),
887 "=&r" (rcSpill),
888 "=&r" (fXchg.u)
889 : "r" ((uint32_t)u8Old),
890 "r" ((uint32_t)u8New),
891 "r" (pu8),
892 "2" (0) /*fXchg*/
893 : "memory",
894 "cc");
895 return fXchg.f;
896
897# else
898# error "Port me"
899# endif
900}
901#endif
902
903
904/**
905 * Atomically Compare and Exchange a signed 8-bit value, ordered.
906 *
907 * @returns true if xchg was done.
908 * @returns false if xchg wasn't done.
909 *
910 * @param pi8 Pointer to the value to update.
911 * @param i8New The new value to assigned to *pi8.
912 * @param i8Old The old value to *pi8 compare with.
913 *
914 * @remarks x86: Requires a 486 or later.
915 */
916DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
917{
918 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
919}
920
921
922/**
923 * Atomically Compare and Exchange a bool value, ordered.
924 *
925 * @returns true if xchg was done.
926 * @returns false if xchg wasn't done.
927 *
928 * @param pf Pointer to the value to update.
929 * @param fNew The new value to assigned to *pf.
930 * @param fOld The old value to *pf compare with.
931 *
932 * @remarks x86: Requires a 486 or later.
933 */
934DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
935{
936 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
937}
938
939
940/**
941 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
942 *
943 * @returns true if xchg was done.
944 * @returns false if xchg wasn't done.
945 *
946 * @param pu32 Pointer to the value to update.
947 * @param u32New The new value to assigned to *pu32.
948 * @param u32Old The old value to *pu32 compare with.
949 *
950 * @remarks x86: Requires a 486 or later.
951 */
952#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
953RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
954#else
955DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
956{
957# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
958# if RT_INLINE_ASM_GNU_STYLE
959 uint8_t u8Ret;
960 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
961 "setz %1\n\t"
962 : "=m" (*pu32)
963 , "=qm" (u8Ret)
964 , "=a" (u32Old)
965 : "r" (u32New)
966 , "2" (u32Old)
967 , "m" (*pu32)
968 : "cc");
969 return (bool)u8Ret;
970
971# elif RT_INLINE_ASM_USES_INTRIN
972 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
973
974# else
975 uint32_t u32Ret;
976 __asm
977 {
978# ifdef RT_ARCH_AMD64
979 mov rdx, [pu32]
980# else
981 mov edx, [pu32]
982# endif
983 mov eax, [u32Old]
984 mov ecx, [u32New]
985# ifdef RT_ARCH_AMD64
986 lock cmpxchg [rdx], ecx
987# else
988 lock cmpxchg [edx], ecx
989# endif
990 setz al
991 movzx eax, al
992 mov [u32Ret], eax
993 }
994 return !!u32Ret;
995# endif
996
997# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
998 union { uint32_t u; bool f; } fXchg;
999 uint32_t u32Spill;
1000 uint32_t rcSpill;
1001 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1002 "dmb sy\n\t"
1003# if defined(RT_ARCH_ARM64)
1004 "ldaxr %w0, [%5]\n\t"
1005 "cmp %w0, %w3\n\t"
1006 "bne 1f\n\t" /* stop here if not equal */
1007 "stlxr %w1, %w4, [%5]\n\t"
1008 "cbnz %w1, .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1009 "mov %w2, #1\n\t"
1010# else
1011 "ldrex %0, [%5]\n\t"
1012 "teq %0, %3\n\t"
1013 "strexeq %1, %4, [%5]\n\t"
1014 "bne 1f\n\t" /* stop here if not equal */
1015 "cmp %1, #0\n\t"
1016 "bne .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1017 "mov %2, #1\n\t"
1018# endif
1019 "1:\n\t"
1020 : "=&r" (u32Spill),
1021 "=&r" (rcSpill),
1022 "=&r" (fXchg.u)
1023 : "r" (u32Old),
1024 "r" (u32New),
1025 "r" (pu32),
1026 "2" (0) /*fXchg*/
1027 : "memory",
1028 "cc");
1029 return fXchg.f;
1030
1031# else
1032# error "Port me"
1033# endif
1034}
1035#endif
1036
1037
1038/**
1039 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1040 *
1041 * @returns true if xchg was done.
1042 * @returns false if xchg wasn't done.
1043 *
1044 * @param pi32 Pointer to the value to update.
1045 * @param i32New The new value to assigned to *pi32.
1046 * @param i32Old The old value to *pi32 compare with.
1047 *
1048 * @remarks x86: Requires a 486 or later.
1049 */
1050DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1051{
1052 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1053}
1054
1055
1056/**
1057 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1058 *
1059 * @returns true if xchg was done.
1060 * @returns false if xchg wasn't done.
1061 *
1062 * @param pu64 Pointer to the 64-bit variable to update.
1063 * @param u64New The 64-bit value to assign to *pu64.
1064 * @param u64Old The value to compare with.
1065 *
1066 * @remarks x86: Requires a Pentium or later.
1067 */
1068#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1069 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1070RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1071#else
1072DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1073{
1074# if RT_INLINE_ASM_USES_INTRIN
1075 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1076
1077# elif defined(RT_ARCH_AMD64)
1078# if RT_INLINE_ASM_GNU_STYLE
1079 uint8_t u8Ret;
1080 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1081 "setz %1\n\t"
1082 : "=m" (*pu64)
1083 , "=qm" (u8Ret)
1084 , "=a" (u64Old)
1085 : "r" (u64New)
1086 , "2" (u64Old)
1087 , "m" (*pu64)
1088 : "cc");
1089 return (bool)u8Ret;
1090# else
1091 bool fRet;
1092 __asm
1093 {
1094 mov rdx, [pu32]
1095 mov rax, [u64Old]
1096 mov rcx, [u64New]
1097 lock cmpxchg [rdx], rcx
1098 setz al
1099 mov [fRet], al
1100 }
1101 return fRet;
1102# endif
1103
1104# elif defined(RT_ARCH_X86)
1105 uint32_t u32Ret;
1106# if RT_INLINE_ASM_GNU_STYLE
1107# if defined(PIC) || defined(__PIC__)
1108 uint32_t u32EBX = (uint32_t)u64New;
1109 uint32_t u32Spill;
1110 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1111 "lock; cmpxchg8b (%6)\n\t"
1112 "setz %%al\n\t"
1113 "movl %4, %%ebx\n\t"
1114 "movzbl %%al, %%eax\n\t"
1115 : "=a" (u32Ret)
1116 , "=d" (u32Spill)
1117# if RT_GNUC_PREREQ(4, 3)
1118 , "+m" (*pu64)
1119# else
1120 , "=m" (*pu64)
1121# endif
1122 : "A" (u64Old),
1123 , "m" ( u32EBX )
1124 , "c" ( (uint32_t)(u64New >> 32) )
1125 , "S" (pu64)
1126 : "cc");
1127# else /* !PIC */
1128 uint32_t u32Spill;
1129 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1130 "setz %%al\n\t"
1131 "movzbl %%al, %%eax\n\t"
1132 : "=a" (u32Ret)
1133 , "=d" (u32Spill)
1134 , "+m" (*pu64)
1135 : "A" (u64Old)
1136 , "b" ( (uint32_t)u64New )
1137 , "c" ( (uint32_t)(u64New >> 32) )
1138 : "cc");
1139# endif
1140 return (bool)u32Ret;
1141# else
1142 __asm
1143 {
1144 mov ebx, dword ptr [u64New]
1145 mov ecx, dword ptr [u64New + 4]
1146 mov edi, [pu64]
1147 mov eax, dword ptr [u64Old]
1148 mov edx, dword ptr [u64Old + 4]
1149 lock cmpxchg8b [edi]
1150 setz al
1151 movzx eax, al
1152 mov dword ptr [u32Ret], eax
1153 }
1154 return !!u32Ret;
1155# endif
1156
1157# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1158 union { uint32_t u; bool f; } fXchg;
1159 uint64_t u64Spill;
1160 uint32_t rcSpill;
1161 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1162 "dmb sy\n\t"
1163# if defined(RT_ARCH_ARM64)
1164 "ldaxr %0, [%5]\n\t"
1165 "cmp %0, %3\n\t"
1166 "bne 1f\n\t" /* stop here if not equal */
1167 "stlxr %w1, %4, [%5]\n\t"
1168 "cbnz %w1, .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1169 "mov %w2, #1\n\t"
1170# else
1171 "ldrexd %0, %H0, [%5]\n\t"
1172 "teq %0, %3\n\t"
1173 "teqeq %H0, %H3\n\t"
1174 "strexdeq %1, %4, %H4, [%5]\n\t"
1175 "bne 1f\n\t" /* stop here if not equal */
1176 "cmp %1, #0\n\t"
1177 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1178 "mov %2, #1\n\t"
1179# endif
1180 "1:\n\t"
1181 : "=&r" (u64Spill),
1182 "=&r" (rcSpill),
1183 "=&r" (fXchg.u)
1184 : "r" (u64Old),
1185 "r" (u64New),
1186 "r" (pu64),
1187 "2" (0) /*fXchg*/
1188 : "memory",
1189 "cc");
1190 return fXchg.f;
1191
1192# else
1193# error "Port me"
1194# endif
1195}
1196#endif
1197
1198
1199/**
1200 * Atomically Compare and exchange a signed 64-bit value, ordered.
1201 *
1202 * @returns true if xchg was done.
1203 * @returns false if xchg wasn't done.
1204 *
1205 * @param pi64 Pointer to the 64-bit variable to update.
1206 * @param i64 The 64-bit value to assign to *pu64.
1207 * @param i64Old The value to compare with.
1208 *
1209 * @remarks x86: Requires a Pentium or later.
1210 */
1211DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1212{
1213 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1214}
1215
1216
1217/**
1218 * Atomically Compare and Exchange a pointer value, ordered.
1219 *
1220 * @returns true if xchg was done.
1221 * @returns false if xchg wasn't done.
1222 *
1223 * @param ppv Pointer to the value to update.
1224 * @param pvNew The new value to assigned to *ppv.
1225 * @param pvOld The old value to *ppv compare with.
1226 *
1227 * @remarks x86: Requires a 486 or later.
1228 */
1229DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1230{
1231#if ARCH_BITS == 32 || ARCH_BITS == 16
1232 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1233#elif ARCH_BITS == 64
1234 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1235#else
1236# error "ARCH_BITS is bogus"
1237#endif
1238}
1239
1240
1241/**
1242 * Atomically Compare and Exchange a pointer value, ordered.
1243 *
1244 * @returns true if xchg was done.
1245 * @returns false if xchg wasn't done.
1246 *
1247 * @param ppv Pointer to the value to update.
1248 * @param pvNew The new value to assigned to *ppv.
1249 * @param pvOld The old value to *ppv compare with.
1250 *
1251 * @remarks This is relatively type safe on GCC platforms.
1252 * @remarks x86: Requires a 486 or later.
1253 */
1254#ifdef __GNUC__
1255# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1256 __extension__ \
1257 ({\
1258 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1259 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1260 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1261 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1262 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1263 fMacroRet; \
1264 })
1265#else
1266# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1267 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1268#endif
1269
1270
1271/** @def ASMAtomicCmpXchgHandle
1272 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1273 *
1274 * @param ph Pointer to the value to update.
1275 * @param hNew The new value to assigned to *pu.
1276 * @param hOld The old value to *pu compare with.
1277 * @param fRc Where to store the result.
1278 *
1279 * @remarks This doesn't currently work for all handles (like RTFILE).
1280 * @remarks x86: Requires a 486 or later.
1281 */
1282#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1283# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1284 do { \
1285 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1286 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1287 } while (0)
1288#elif HC_ARCH_BITS == 64
1289# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1290 do { \
1291 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1292 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1293 } while (0)
1294#else
1295# error HC_ARCH_BITS
1296#endif
1297
1298
1299/** @def ASMAtomicCmpXchgSize
1300 * Atomically Compare and Exchange a value which size might differ
1301 * between platforms or compilers, ordered.
1302 *
1303 * @param pu Pointer to the value to update.
1304 * @param uNew The new value to assigned to *pu.
1305 * @param uOld The old value to *pu compare with.
1306 * @param fRc Where to store the result.
1307 *
1308 * @remarks x86: Requires a 486 or later.
1309 */
1310#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1311 do { \
1312 switch (sizeof(*(pu))) { \
1313 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1314 break; \
1315 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1316 break; \
1317 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1318 (fRc) = false; \
1319 break; \
1320 } \
1321 } while (0)
1322
1323
1324/**
1325 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1326 * passes back old value, ordered.
1327 *
1328 * @returns true if xchg was done.
1329 * @returns false if xchg wasn't done.
1330 *
1331 * @param pu32 Pointer to the value to update.
1332 * @param u32New The new value to assigned to *pu32.
1333 * @param u32Old The old value to *pu32 compare with.
1334 * @param pu32Old Pointer store the old value at.
1335 *
1336 * @remarks x86: Requires a 486 or later.
1337 */
1338#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1339RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1340#else
1341DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1342{
1343# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1344# if RT_INLINE_ASM_GNU_STYLE
1345 uint8_t u8Ret;
1346 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1347 "setz %1\n\t"
1348 : "=m" (*pu32)
1349 , "=qm" (u8Ret)
1350 , "=a" (*pu32Old)
1351 : "r" (u32New)
1352 , "a" (u32Old)
1353 , "m" (*pu32)
1354 : "cc");
1355 return (bool)u8Ret;
1356
1357# elif RT_INLINE_ASM_USES_INTRIN
1358 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1359
1360# else
1361 uint32_t u32Ret;
1362 __asm
1363 {
1364# ifdef RT_ARCH_AMD64
1365 mov rdx, [pu32]
1366# else
1367 mov edx, [pu32]
1368# endif
1369 mov eax, [u32Old]
1370 mov ecx, [u32New]
1371# ifdef RT_ARCH_AMD64
1372 lock cmpxchg [rdx], ecx
1373 mov rdx, [pu32Old]
1374 mov [rdx], eax
1375# else
1376 lock cmpxchg [edx], ecx
1377 mov edx, [pu32Old]
1378 mov [edx], eax
1379# endif
1380 setz al
1381 movzx eax, al
1382 mov [u32Ret], eax
1383 }
1384 return !!u32Ret;
1385# endif
1386
1387# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1388 union { uint32_t u; bool f; } fXchg;
1389 uint32_t u32ActualOld;
1390 uint32_t rcSpill;
1391 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1392 "dmb sy\n\t"
1393# if defined(RT_ARCH_ARM64)
1394 "ldaxr %w0, [%5]\n\t"
1395 "cmp %w0, %w3\n\t"
1396 "bne 1f\n\t" /* stop here if not equal */
1397 "stlxr %w1, %w4, [%5]\n\t"
1398 "cbnz %w1, .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1399 "mov %w2, #1\n\t"
1400# else
1401 "ldrex %0, [%5]\n\t"
1402 "teq %0, %3\n\t"
1403 "strexeq %1, %4, [%5]\n\t"
1404 "bne 1f\n\t" /* stop here if not equal */
1405 "cmp %1, #0\n\t"
1406 "bne .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1407 "mov %2, #1\n\t"
1408# endif
1409 "1:\n\t"
1410 : "=&r" (u32ActualOld),
1411 "=&r" (rcSpill),
1412 "=&r" (fXchg.u)
1413 : "r" (u32Old),
1414 "r" (u32New),
1415 "r" (pu32),
1416 "2" (0) /*fXchg*/
1417 : "memory",
1418 "cc");
1419 *pu32Old = u32ActualOld;
1420 return fXchg.f;
1421
1422# else
1423# error "Port me"
1424# endif
1425}
1426#endif
1427
1428
1429/**
1430 * Atomically Compare and Exchange a signed 32-bit value, additionally
1431 * passes back old value, ordered.
1432 *
1433 * @returns true if xchg was done.
1434 * @returns false if xchg wasn't done.
1435 *
1436 * @param pi32 Pointer to the value to update.
1437 * @param i32New The new value to assigned to *pi32.
1438 * @param i32Old The old value to *pi32 compare with.
1439 * @param pi32Old Pointer store the old value at.
1440 *
1441 * @remarks x86: Requires a 486 or later.
1442 */
1443DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
1444{
1445 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1446}
1447
1448
1449/**
1450 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1451 * passing back old value, ordered.
1452 *
1453 * @returns true if xchg was done.
1454 * @returns false if xchg wasn't done.
1455 *
1456 * @param pu64 Pointer to the 64-bit variable to update.
1457 * @param u64New The 64-bit value to assign to *pu64.
1458 * @param u64Old The value to compare with.
1459 * @param pu64Old Pointer store the old value at.
1460 *
1461 * @remarks x86: Requires a Pentium or later.
1462 */
1463#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1464 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1465RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
1466#else
1467DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
1468{
1469# if RT_INLINE_ASM_USES_INTRIN
1470 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1471
1472# elif defined(RT_ARCH_AMD64)
1473# if RT_INLINE_ASM_GNU_STYLE
1474 uint8_t u8Ret;
1475 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1476 "setz %1\n\t"
1477 : "=m" (*pu64)
1478 , "=qm" (u8Ret)
1479 , "=a" (*pu64Old)
1480 : "r" (u64New)
1481 , "a" (u64Old)
1482 , "m" (*pu64)
1483 : "cc");
1484 return (bool)u8Ret;
1485# else
1486 bool fRet;
1487 __asm
1488 {
1489 mov rdx, [pu32]
1490 mov rax, [u64Old]
1491 mov rcx, [u64New]
1492 lock cmpxchg [rdx], rcx
1493 mov rdx, [pu64Old]
1494 mov [rdx], rax
1495 setz al
1496 mov [fRet], al
1497 }
1498 return fRet;
1499# endif
1500
1501# elif defined(RT_ARCH_X86)
1502# if RT_INLINE_ASM_GNU_STYLE
1503 uint64_t u64Ret;
1504# if defined(PIC) || defined(__PIC__)
1505 /* NB: this code uses a memory clobber description, because the clean
1506 * solution with an output value for *pu64 makes gcc run out of registers.
1507 * This will cause suboptimal code, and anyone with a better solution is
1508 * welcome to improve this. */
1509 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1510 "lock; cmpxchg8b %3\n\t"
1511 "xchgl %%ebx, %1\n\t"
1512 : "=A" (u64Ret)
1513 : "DS" ((uint32_t)u64New)
1514 , "c" ((uint32_t)(u64New >> 32))
1515 , "m" (*pu64)
1516 , "0" (u64Old)
1517 : "memory"
1518 , "cc" );
1519# else /* !PIC */
1520 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1521 : "=A" (u64Ret)
1522 , "=m" (*pu64)
1523 : "b" ((uint32_t)u64New)
1524 , "c" ((uint32_t)(u64New >> 32))
1525 , "m" (*pu64)
1526 , "0" (u64Old)
1527 : "cc");
1528# endif
1529 *pu64Old = u64Ret;
1530 return u64Ret == u64Old;
1531# else
1532 uint32_t u32Ret;
1533 __asm
1534 {
1535 mov ebx, dword ptr [u64New]
1536 mov ecx, dword ptr [u64New + 4]
1537 mov edi, [pu64]
1538 mov eax, dword ptr [u64Old]
1539 mov edx, dword ptr [u64Old + 4]
1540 lock cmpxchg8b [edi]
1541 mov ebx, [pu64Old]
1542 mov [ebx], eax
1543 setz al
1544 movzx eax, al
1545 add ebx, 4
1546 mov [ebx], edx
1547 mov dword ptr [u32Ret], eax
1548 }
1549 return !!u32Ret;
1550# endif
1551
1552# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1553 union { uint32_t u; bool f; } fXchg;
1554 uint64_t u64ActualOld;
1555 uint32_t rcSpill;
1556 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU64_%=:\n\t"
1557 "dmb sy\n\t"
1558# if defined(RT_ARCH_ARM64)
1559 "ldaxr %0, [%5]\n\t"
1560 "cmp %0, %3\n\t"
1561 "bne 1f\n\t" /* stop here if not equal */
1562 "stlxr %w1, %4, [%5]\n\t"
1563 "cbnz %w1, .Ltry_again_ASMAtomicCmpXchgExU64_%=\n\t"
1564 "mov %w2, #1\n\t"
1565# else
1566 "ldrexd %0, %H0, [%5]\n\t"
1567 "teq %0, %3\n\t"
1568 "teqeq %H0, %H3\n\t"
1569 "strexdeq %1, %4, %H4, [%5]\n\t"
1570 "bne 1f\n\t" /* stop here if not equal */
1571 "cmp %1, #0\n\t"
1572 "bne .Ltry_again_ASMAtomicCmpXchgExU64_%=\n\t"
1573 "mov %2, #1\n\t"
1574# endif
1575 "1:\n\t"
1576 : "=&r" (u64ActualOld),
1577 "=&r" (rcSpill),
1578 "=&r" (fXchg.u)
1579 : "r" (u64Old),
1580 "r" (u64New),
1581 "r" (pu64),
1582 "2" (0) /*fXchg*/
1583 : "memory",
1584 "cc");
1585 *pu64Old = u64ActualOld;
1586 return fXchg.f;
1587
1588# else
1589# error "Port me"
1590# endif
1591}
1592#endif
1593
1594
1595/**
1596 * Atomically Compare and exchange a signed 64-bit value, additionally
1597 * passing back old value, ordered.
1598 *
1599 * @returns true if xchg was done.
1600 * @returns false if xchg wasn't done.
1601 *
1602 * @param pi64 Pointer to the 64-bit variable to update.
1603 * @param i64 The 64-bit value to assign to *pu64.
1604 * @param i64Old The value to compare with.
1605 * @param pi64Old Pointer store the old value at.
1606 *
1607 * @remarks x86: Requires a Pentium or later.
1608 */
1609DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
1610{
1611 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1612}
1613
1614/** @def ASMAtomicCmpXchgExHandle
1615 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1616 *
1617 * @param ph Pointer to the value to update.
1618 * @param hNew The new value to assigned to *pu.
1619 * @param hOld The old value to *pu compare with.
1620 * @param fRc Where to store the result.
1621 * @param phOldVal Pointer to where to store the old value.
1622 *
1623 * @remarks This doesn't currently work for all handles (like RTFILE).
1624 */
1625#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1626# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1627 do { \
1628 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1629 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1630 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1631 } while (0)
1632#elif HC_ARCH_BITS == 64
1633# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1634 do { \
1635 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1636 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1637 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1638 } while (0)
1639#else
1640# error HC_ARCH_BITS
1641#endif
1642
1643
1644/** @def ASMAtomicCmpXchgExSize
1645 * Atomically Compare and Exchange a value which size might differ
1646 * between platforms or compilers. Additionally passes back old value.
1647 *
1648 * @param pu Pointer to the value to update.
1649 * @param uNew The new value to assigned to *pu.
1650 * @param uOld The old value to *pu compare with.
1651 * @param fRc Where to store the result.
1652 * @param puOldVal Pointer to where to store the old value.
1653 *
1654 * @remarks x86: Requires a 486 or later.
1655 */
1656#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1657 do { \
1658 switch (sizeof(*(pu))) { \
1659 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1660 break; \
1661 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1662 break; \
1663 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1664 (fRc) = false; \
1665 (uOldVal) = 0; \
1666 break; \
1667 } \
1668 } while (0)
1669
1670
1671/**
1672 * Atomically Compare and Exchange a pointer value, additionally
1673 * passing back old value, ordered.
1674 *
1675 * @returns true if xchg was done.
1676 * @returns false if xchg wasn't done.
1677 *
1678 * @param ppv Pointer to the value to update.
1679 * @param pvNew The new value to assigned to *ppv.
1680 * @param pvOld The old value to *ppv compare with.
1681 * @param ppvOld Pointer store the old value at.
1682 *
1683 * @remarks x86: Requires a 486 or later.
1684 */
1685DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1686 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
1687{
1688#if ARCH_BITS == 32 || ARCH_BITS == 16
1689 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1690#elif ARCH_BITS == 64
1691 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1692#else
1693# error "ARCH_BITS is bogus"
1694#endif
1695}
1696
1697
1698/**
1699 * Atomically Compare and Exchange a pointer value, additionally
1700 * passing back old value, ordered.
1701 *
1702 * @returns true if xchg was done.
1703 * @returns false if xchg wasn't done.
1704 *
1705 * @param ppv Pointer to the value to update.
1706 * @param pvNew The new value to assigned to *ppv.
1707 * @param pvOld The old value to *ppv compare with.
1708 * @param ppvOld Pointer store the old value at.
1709 *
1710 * @remarks This is relatively type safe on GCC platforms.
1711 * @remarks x86: Requires a 486 or later.
1712 */
1713#ifdef __GNUC__
1714# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1715 __extension__ \
1716 ({\
1717 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1718 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1719 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1720 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1721 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1722 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1723 (void **)ppvOldTypeChecked); \
1724 fMacroRet; \
1725 })
1726#else
1727# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1728 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1729#endif
1730
1731
1732/**
1733 * Virtualization unfriendly serializing instruction, always exits.
1734 */
1735#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
1736RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
1737#else
1738DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
1739{
1740# if RT_INLINE_ASM_GNU_STYLE
1741 RTCCUINTREG xAX = 0;
1742# ifdef RT_ARCH_AMD64
1743 __asm__ __volatile__ ("cpuid"
1744 : "=a" (xAX)
1745 : "0" (xAX)
1746 : "rbx", "rcx", "rdx", "memory");
1747# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1748 __asm__ __volatile__ ("push %%ebx\n\t"
1749 "cpuid\n\t"
1750 "pop %%ebx\n\t"
1751 : "=a" (xAX)
1752 : "0" (xAX)
1753 : "ecx", "edx", "memory");
1754# else
1755 __asm__ __volatile__ ("cpuid"
1756 : "=a" (xAX)
1757 : "0" (xAX)
1758 : "ebx", "ecx", "edx", "memory");
1759# endif
1760
1761# elif RT_INLINE_ASM_USES_INTRIN
1762 int aInfo[4];
1763 _ReadWriteBarrier();
1764 __cpuid(aInfo, 0);
1765
1766# else
1767 __asm
1768 {
1769 push ebx
1770 xor eax, eax
1771 cpuid
1772 pop ebx
1773 }
1774# endif
1775}
1776#endif
1777
1778/**
1779 * Virtualization friendly serializing instruction, though more expensive.
1780 */
1781#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
1782RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
1783#else
1784DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
1785{
1786# if RT_INLINE_ASM_GNU_STYLE
1787# ifdef RT_ARCH_AMD64
1788 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1789 "subq $128, %%rsp\n\t" /*redzone*/
1790 "mov %%ss, %%eax\n\t"
1791 "pushq %%rax\n\t"
1792 "pushq %%r10\n\t"
1793 "pushfq\n\t"
1794 "movl %%cs, %%eax\n\t"
1795 "pushq %%rax\n\t"
1796 "leaq 1f(%%rip), %%rax\n\t"
1797 "pushq %%rax\n\t"
1798 "iretq\n\t"
1799 "1:\n\t"
1800 ::: "rax", "r10", "memory", "cc");
1801# else
1802 __asm__ __volatile__ ("pushfl\n\t"
1803 "pushl %%cs\n\t"
1804 "pushl $1f\n\t"
1805 "iretl\n\t"
1806 "1:\n\t"
1807 ::: "memory");
1808# endif
1809
1810# else
1811 __asm
1812 {
1813 pushfd
1814 push cs
1815 push la_ret
1816 iretd
1817 la_ret:
1818 }
1819# endif
1820}
1821#endif
1822
1823/**
1824 * Virtualization friendlier serializing instruction, may still cause exits.
1825 */
1826#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
1827RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
1828#else
1829DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
1830{
1831# if RT_INLINE_ASM_GNU_STYLE
1832 /* rdtscp is not supported by ancient linux build VM of course :-( */
1833# ifdef RT_ARCH_AMD64
1834 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1835 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1836# else
1837 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1838 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1839# endif
1840# else
1841# if RT_INLINE_ASM_USES_INTRIN >= 15
1842 uint32_t uIgnore;
1843 _ReadWriteBarrier();
1844 (void)__rdtscp(&uIgnore);
1845 (void)uIgnore;
1846# else
1847 __asm
1848 {
1849 rdtscp
1850 }
1851# endif
1852# endif
1853}
1854#endif
1855
1856
1857/**
1858 * Serialize Instruction (both data store and instruction flush).
1859 */
1860#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1861# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1862#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
1863# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1864#elif defined(RT_ARCH_SPARC64)
1865RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
1866#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1867DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
1868{
1869 /* Note! Only armv7 and later. */
1870 __asm__ __volatile__ ("dsb sy\n\t" ::: "memory");
1871}
1872#else
1873# error "Port me"
1874#endif
1875
1876
1877/**
1878 * Memory fence, waits for any pending writes and reads to complete.
1879 */
1880DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
1881{
1882#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1883# if RT_INLINE_ASM_GNU_STYLE
1884 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
1885# elif RT_INLINE_ASM_USES_INTRIN
1886 _mm_mfence();
1887# else
1888 __asm
1889 {
1890 _emit 0x0f
1891 _emit 0xae
1892 _emit 0xf0
1893 }
1894# endif
1895#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1896 /* Note! Only armv7 and later. */
1897 __asm__ __volatile__ ("dmb sy\n\t" ::: "memory");
1898#elif ARCH_BITS == 16
1899 uint16_t volatile u16;
1900 ASMAtomicXchgU16(&u16, 0);
1901#else
1902 uint32_t volatile u32;
1903 ASMAtomicXchgU32(&u32, 0);
1904#endif
1905}
1906
1907
1908/**
1909 * Write fence, waits for any pending writes to complete.
1910 */
1911DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
1912{
1913#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1914# if RT_INLINE_ASM_GNU_STYLE
1915 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
1916# elif RT_INLINE_ASM_USES_INTRIN
1917 _mm_sfence();
1918# else
1919 __asm
1920 {
1921 _emit 0x0f
1922 _emit 0xae
1923 _emit 0xf8
1924 }
1925# endif
1926#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1927 /* Note! Only armv7 and later. */
1928 __asm__ __volatile__ ("dmb st\n\t" ::: "memory");
1929#else
1930 ASMMemoryFence();
1931#endif
1932}
1933
1934
1935/**
1936 * Read fence, waits for any pending reads to complete.
1937 */
1938DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
1939{
1940#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1941# if RT_INLINE_ASM_GNU_STYLE
1942 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
1943# elif RT_INLINE_ASM_USES_INTRIN
1944 _mm_lfence();
1945# else
1946 __asm
1947 {
1948 _emit 0x0f
1949 _emit 0xae
1950 _emit 0xe8
1951 }
1952# endif
1953#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1954 /* Note! Only armv7 and later. */
1955 __asm__ __volatile__ ("dmb ld\n\t" ::: "memory");
1956#else
1957 ASMMemoryFence();
1958#endif
1959}
1960
1961
1962/**
1963 * Atomically reads an unsigned 8-bit value, ordered.
1964 *
1965 * @returns Current *pu8 value
1966 * @param pu8 Pointer to the 8-bit variable to read.
1967 */
1968DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
1969{
1970 ASMMemoryFence();
1971 return *pu8; /* byte reads are atomic on x86 */
1972}
1973
1974
1975/**
1976 * Atomically reads an unsigned 8-bit value, unordered.
1977 *
1978 * @returns Current *pu8 value
1979 * @param pu8 Pointer to the 8-bit variable to read.
1980 */
1981DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
1982{
1983 return *pu8; /* byte reads are atomic on x86 */
1984}
1985
1986
1987/**
1988 * Atomically reads a signed 8-bit value, ordered.
1989 *
1990 * @returns Current *pi8 value
1991 * @param pi8 Pointer to the 8-bit variable to read.
1992 */
1993DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
1994{
1995 ASMMemoryFence();
1996 return *pi8; /* byte reads are atomic on x86 */
1997}
1998
1999
2000/**
2001 * Atomically reads a signed 8-bit value, unordered.
2002 *
2003 * @returns Current *pi8 value
2004 * @param pi8 Pointer to the 8-bit variable to read.
2005 */
2006DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2007{
2008 return *pi8; /* byte reads are atomic on x86 */
2009}
2010
2011
2012/**
2013 * Atomically reads an unsigned 16-bit value, ordered.
2014 *
2015 * @returns Current *pu16 value
2016 * @param pu16 Pointer to the 16-bit variable to read.
2017 */
2018DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2019{
2020 Assert(!((uintptr_t)pu16 & 1));
2021 ASMMemoryFence();
2022 return *pu16;
2023}
2024
2025
2026/**
2027 * Atomically reads an unsigned 16-bit value, unordered.
2028 *
2029 * @returns Current *pu16 value
2030 * @param pu16 Pointer to the 16-bit variable to read.
2031 */
2032DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2033{
2034 Assert(!((uintptr_t)pu16 & 1));
2035 return *pu16;
2036}
2037
2038
2039/**
2040 * Atomically reads a signed 16-bit value, ordered.
2041 *
2042 * @returns Current *pi16 value
2043 * @param pi16 Pointer to the 16-bit variable to read.
2044 */
2045DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2046{
2047 Assert(!((uintptr_t)pi16 & 1));
2048 ASMMemoryFence();
2049 return *pi16;
2050}
2051
2052
2053/**
2054 * Atomically reads a signed 16-bit value, unordered.
2055 *
2056 * @returns Current *pi16 value
2057 * @param pi16 Pointer to the 16-bit variable to read.
2058 */
2059DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2060{
2061 Assert(!((uintptr_t)pi16 & 1));
2062 return *pi16;
2063}
2064
2065
2066/**
2067 * Atomically reads an unsigned 32-bit value, ordered.
2068 *
2069 * @returns Current *pu32 value
2070 * @param pu32 Pointer to the 32-bit variable to read.
2071 */
2072DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2073{
2074 Assert(!((uintptr_t)pu32 & 3));
2075 ASMMemoryFence();
2076#if ARCH_BITS == 16
2077 AssertFailed(); /** @todo 16-bit */
2078#endif
2079 return *pu32;
2080}
2081
2082
2083/**
2084 * Atomically reads an unsigned 32-bit value, unordered.
2085 *
2086 * @returns Current *pu32 value
2087 * @param pu32 Pointer to the 32-bit variable to read.
2088 */
2089DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2090{
2091 Assert(!((uintptr_t)pu32 & 3));
2092#if ARCH_BITS == 16
2093 AssertFailed(); /** @todo 16-bit */
2094#endif
2095 return *pu32;
2096}
2097
2098
2099/**
2100 * Atomically reads a signed 32-bit value, ordered.
2101 *
2102 * @returns Current *pi32 value
2103 * @param pi32 Pointer to the 32-bit variable to read.
2104 */
2105DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2106{
2107 Assert(!((uintptr_t)pi32 & 3));
2108 ASMMemoryFence();
2109#if ARCH_BITS == 16
2110 AssertFailed(); /** @todo 16-bit */
2111#endif
2112 return *pi32;
2113}
2114
2115
2116/**
2117 * Atomically reads a signed 32-bit value, unordered.
2118 *
2119 * @returns Current *pi32 value
2120 * @param pi32 Pointer to the 32-bit variable to read.
2121 */
2122DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2123{
2124 Assert(!((uintptr_t)pi32 & 3));
2125#if ARCH_BITS == 16
2126 AssertFailed(); /** @todo 16-bit */
2127#endif
2128 return *pi32;
2129}
2130
2131
2132/**
2133 * Atomically reads an unsigned 64-bit value, ordered.
2134 *
2135 * @returns Current *pu64 value
2136 * @param pu64 Pointer to the 64-bit variable to read.
2137 * The memory pointed to must be writable.
2138 *
2139 * @remarks This may fault if the memory is read-only!
2140 * @remarks x86: Requires a Pentium or later.
2141 */
2142#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
2143 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2144RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2145#else
2146DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2147{
2148 uint64_t u64;
2149# ifdef RT_ARCH_AMD64
2150 Assert(!((uintptr_t)pu64 & 7));
2151/*# if RT_INLINE_ASM_GNU_STYLE
2152 __asm__ __volatile__( "mfence\n\t"
2153 "movq %1, %0\n\t"
2154 : "=r" (u64)
2155 : "m" (*pu64));
2156# else
2157 __asm
2158 {
2159 mfence
2160 mov rdx, [pu64]
2161 mov rax, [rdx]
2162 mov [u64], rax
2163 }
2164# endif*/
2165 ASMMemoryFence();
2166 u64 = *pu64;
2167
2168# elif defined(RT_ARCH_X86)
2169# if RT_INLINE_ASM_GNU_STYLE
2170# if defined(PIC) || defined(__PIC__)
2171 uint32_t u32EBX = 0;
2172 Assert(!((uintptr_t)pu64 & 7));
2173 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2174 "lock; cmpxchg8b (%5)\n\t"
2175 "movl %3, %%ebx\n\t"
2176 : "=A" (u64)
2177# if RT_GNUC_PREREQ(4, 3)
2178 , "+m" (*pu64)
2179# else
2180 , "=m" (*pu64)
2181# endif
2182 : "0" (0ULL)
2183 , "m" (u32EBX)
2184 , "c" (0)
2185 , "S" (pu64)
2186 : "cc");
2187# else /* !PIC */
2188 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2189 : "=A" (u64)
2190 , "+m" (*pu64)
2191 : "0" (0ULL)
2192 , "b" (0)
2193 , "c" (0)
2194 : "cc");
2195# endif
2196# else
2197 Assert(!((uintptr_t)pu64 & 7));
2198 __asm
2199 {
2200 xor eax, eax
2201 xor edx, edx
2202 mov edi, pu64
2203 xor ecx, ecx
2204 xor ebx, ebx
2205 lock cmpxchg8b [edi]
2206 mov dword ptr [u64], eax
2207 mov dword ptr [u64 + 4], edx
2208 }
2209# endif
2210
2211# elif defined(RT_ARCH_ARM64)
2212 Assert(!((uintptr_t)pu64 & 7));
2213 ASMMemoryFence();
2214 u64 = *pu64;
2215
2216# elif defined(RT_ARCH_ARM32)
2217 Assert(!((uintptr_t)pu64 & 7));
2218 __asm__ __volatile__("dmb sy\n\t"
2219 "ldrexd %0, %H0, [%1]\n\t"
2220 : "=&r" (u64)
2221 : "r" (pu64)
2222 : "memory");
2223
2224# else
2225# error "Port me"
2226# endif
2227 return u64;
2228}
2229#endif
2230
2231
2232/**
2233 * Atomically reads an unsigned 64-bit value, unordered.
2234 *
2235 * @returns Current *pu64 value
2236 * @param pu64 Pointer to the 64-bit variable to read.
2237 * The memory pointed to must be writable.
2238 *
2239 * @remarks This may fault if the memory is read-only!
2240 * @remarks x86: Requires a Pentium or later.
2241 */
2242#if !defined(RT_ARCH_AMD64) \
2243 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2244 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
2245RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2246#else
2247DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2248{
2249 uint64_t u64;
2250# ifdef RT_ARCH_AMD64
2251 Assert(!((uintptr_t)pu64 & 7));
2252/*# if RT_INLINE_ASM_GNU_STYLE
2253 Assert(!((uintptr_t)pu64 & 7));
2254 __asm__ __volatile__("movq %1, %0\n\t"
2255 : "=r" (u64)
2256 : "m" (*pu64));
2257# else
2258 __asm
2259 {
2260 mov rdx, [pu64]
2261 mov rax, [rdx]
2262 mov [u64], rax
2263 }
2264# endif */
2265 u64 = *pu64;
2266
2267# elif defined(RT_ARCH_X86)
2268# if RT_INLINE_ASM_GNU_STYLE
2269# if defined(PIC) || defined(__PIC__)
2270 uint32_t u32EBX = 0;
2271 uint32_t u32Spill;
2272 Assert(!((uintptr_t)pu64 & 7));
2273 __asm__ __volatile__("xor %%eax,%%eax\n\t"
2274 "xor %%ecx,%%ecx\n\t"
2275 "xor %%edx,%%edx\n\t"
2276 "xchgl %%ebx, %3\n\t"
2277 "lock; cmpxchg8b (%4)\n\t"
2278 "movl %3, %%ebx\n\t"
2279 : "=A" (u64)
2280# if RT_GNUC_PREREQ(4, 3)
2281 , "+m" (*pu64)
2282# else
2283 , "=m" (*pu64)
2284# endif
2285 , "=c" (u32Spill)
2286 : "m" (u32EBX)
2287 , "S" (pu64)
2288 : "cc");
2289# else /* !PIC */
2290 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2291 : "=A" (u64)
2292 , "+m" (*pu64)
2293 : "0" (0ULL)
2294 , "b" (0)
2295 , "c" (0)
2296 : "cc");
2297# endif
2298# else
2299 Assert(!((uintptr_t)pu64 & 7));
2300 __asm
2301 {
2302 xor eax, eax
2303 xor edx, edx
2304 mov edi, pu64
2305 xor ecx, ecx
2306 xor ebx, ebx
2307 lock cmpxchg8b [edi]
2308 mov dword ptr [u64], eax
2309 mov dword ptr [u64 + 4], edx
2310 }
2311# endif
2312
2313# elif defined(RT_ARCH_ARM64)
2314 Assert(!((uintptr_t)pu64 & 7));
2315 u64 = *pu64;
2316
2317# elif defined(RT_ARCH_ARM32)
2318 Assert(!((uintptr_t)pu64 & 7));
2319 __asm__ __volatile__("ldrexd %0, %H0, [%1]\n\t"
2320 : "=&r" (u64)
2321 : "r" (pu64)
2322 : );
2323
2324# else
2325# error "Port me"
2326# endif
2327 return u64;
2328}
2329#endif
2330
2331
2332/**
2333 * Atomically reads a signed 64-bit value, ordered.
2334 *
2335 * @returns Current *pi64 value
2336 * @param pi64 Pointer to the 64-bit variable to read.
2337 * The memory pointed to must be writable.
2338 *
2339 * @remarks This may fault if the memory is read-only!
2340 * @remarks x86: Requires a Pentium or later.
2341 */
2342DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2343{
2344 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
2345}
2346
2347
2348/**
2349 * Atomically reads a signed 64-bit value, unordered.
2350 *
2351 * @returns Current *pi64 value
2352 * @param pi64 Pointer to the 64-bit variable to read.
2353 * The memory pointed to must be writable.
2354 *
2355 * @remarks This will fault if the memory is read-only!
2356 * @remarks x86: Requires a Pentium or later.
2357 */
2358DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2359{
2360 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
2361}
2362
2363
2364/**
2365 * Atomically reads a size_t value, ordered.
2366 *
2367 * @returns Current *pcb value
2368 * @param pcb Pointer to the size_t variable to read.
2369 */
2370DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2371{
2372#if ARCH_BITS == 64
2373 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
2374#elif ARCH_BITS == 32
2375 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
2376#elif ARCH_BITS == 16
2377 AssertCompileSize(size_t, 2);
2378 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
2379#else
2380# error "Unsupported ARCH_BITS value"
2381#endif
2382}
2383
2384
2385/**
2386 * Atomically reads a size_t value, unordered.
2387 *
2388 * @returns Current *pcb value
2389 * @param pcb Pointer to the size_t variable to read.
2390 */
2391DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2392{
2393#if ARCH_BITS == 64 || ARCH_BITS == 16
2394 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2395#elif ARCH_BITS == 32
2396 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2397#elif ARCH_BITS == 16
2398 AssertCompileSize(size_t, 2);
2399 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2400#else
2401# error "Unsupported ARCH_BITS value"
2402#endif
2403}
2404
2405
2406/**
2407 * Atomically reads a pointer value, ordered.
2408 *
2409 * @returns Current *pv value
2410 * @param ppv Pointer to the pointer variable to read.
2411 *
2412 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2413 * requires less typing (no casts).
2414 */
2415DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2416{
2417#if ARCH_BITS == 32 || ARCH_BITS == 16
2418 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2419#elif ARCH_BITS == 64
2420 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2421#else
2422# error "ARCH_BITS is bogus"
2423#endif
2424}
2425
2426/**
2427 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2428 *
2429 * @returns Current *pv value
2430 * @param ppv Pointer to the pointer variable to read.
2431 * @param Type The type of *ppv, sans volatile.
2432 */
2433#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2434# define ASMAtomicReadPtrT(ppv, Type) \
2435 __extension__ \
2436 ({\
2437 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2438 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2439 pvTypeChecked; \
2440 })
2441#else
2442# define ASMAtomicReadPtrT(ppv, Type) \
2443 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2444#endif
2445
2446
2447/**
2448 * Atomically reads a pointer value, unordered.
2449 *
2450 * @returns Current *pv value
2451 * @param ppv Pointer to the pointer variable to read.
2452 *
2453 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2454 * requires less typing (no casts).
2455 */
2456DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2457{
2458#if ARCH_BITS == 32 || ARCH_BITS == 16
2459 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2460#elif ARCH_BITS == 64
2461 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2462#else
2463# error "ARCH_BITS is bogus"
2464#endif
2465}
2466
2467
2468/**
2469 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2470 *
2471 * @returns Current *pv value
2472 * @param ppv Pointer to the pointer variable to read.
2473 * @param Type The type of *ppv, sans volatile.
2474 */
2475#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2476# define ASMAtomicUoReadPtrT(ppv, Type) \
2477 __extension__ \
2478 ({\
2479 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2480 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2481 pvTypeChecked; \
2482 })
2483#else
2484# define ASMAtomicUoReadPtrT(ppv, Type) \
2485 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2486#endif
2487
2488
2489/**
2490 * Atomically reads a boolean value, ordered.
2491 *
2492 * @returns Current *pf value
2493 * @param pf Pointer to the boolean variable to read.
2494 */
2495DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2496{
2497 ASMMemoryFence();
2498 return *pf; /* byte reads are atomic on x86 */
2499}
2500
2501
2502/**
2503 * Atomically reads a boolean value, unordered.
2504 *
2505 * @returns Current *pf value
2506 * @param pf Pointer to the boolean variable to read.
2507 */
2508DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2509{
2510 return *pf; /* byte reads are atomic on x86 */
2511}
2512
2513
2514/**
2515 * Atomically read a typical IPRT handle value, ordered.
2516 *
2517 * @param ph Pointer to the handle variable to read.
2518 * @param phRes Where to store the result.
2519 *
2520 * @remarks This doesn't currently work for all handles (like RTFILE).
2521 */
2522#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2523# define ASMAtomicReadHandle(ph, phRes) \
2524 do { \
2525 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2526 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2527 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2528 } while (0)
2529#elif HC_ARCH_BITS == 64
2530# define ASMAtomicReadHandle(ph, phRes) \
2531 do { \
2532 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2533 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2534 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2535 } while (0)
2536#else
2537# error HC_ARCH_BITS
2538#endif
2539
2540
2541/**
2542 * Atomically read a typical IPRT handle value, unordered.
2543 *
2544 * @param ph Pointer to the handle variable to read.
2545 * @param phRes Where to store the result.
2546 *
2547 * @remarks This doesn't currently work for all handles (like RTFILE).
2548 */
2549#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2550# define ASMAtomicUoReadHandle(ph, phRes) \
2551 do { \
2552 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2553 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2554 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2555 } while (0)
2556#elif HC_ARCH_BITS == 64
2557# define ASMAtomicUoReadHandle(ph, phRes) \
2558 do { \
2559 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2560 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2561 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2562 } while (0)
2563#else
2564# error HC_ARCH_BITS
2565#endif
2566
2567
2568/**
2569 * Atomically read a value which size might differ
2570 * between platforms or compilers, ordered.
2571 *
2572 * @param pu Pointer to the variable to read.
2573 * @param puRes Where to store the result.
2574 */
2575#define ASMAtomicReadSize(pu, puRes) \
2576 do { \
2577 switch (sizeof(*(pu))) { \
2578 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2579 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2580 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2581 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2582 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2583 } \
2584 } while (0)
2585
2586
2587/**
2588 * Atomically read a value which size might differ
2589 * between platforms or compilers, unordered.
2590 *
2591 * @param pu Pointer to the variable to read.
2592 * @param puRes Where to store the result.
2593 */
2594#define ASMAtomicUoReadSize(pu, puRes) \
2595 do { \
2596 switch (sizeof(*(pu))) { \
2597 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2598 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2599 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2600 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2601 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2602 } \
2603 } while (0)
2604
2605
2606/**
2607 * Atomically writes an unsigned 8-bit value, ordered.
2608 *
2609 * @param pu8 Pointer to the 8-bit variable.
2610 * @param u8 The 8-bit value to assign to *pu8.
2611 */
2612DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
2613{
2614 /** @todo Any possible ARM32/ARM64 optimizations here? */
2615 ASMAtomicXchgU8(pu8, u8);
2616}
2617
2618
2619/**
2620 * Atomically writes an unsigned 8-bit value, unordered.
2621 *
2622 * @param pu8 Pointer to the 8-bit variable.
2623 * @param u8 The 8-bit value to assign to *pu8.
2624 */
2625DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
2626{
2627 *pu8 = u8; /* byte writes are atomic on x86 */
2628}
2629
2630
2631/**
2632 * Atomically writes a signed 8-bit value, ordered.
2633 *
2634 * @param pi8 Pointer to the 8-bit variable to read.
2635 * @param i8 The 8-bit value to assign to *pi8.
2636 */
2637DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
2638{
2639 /** @todo Any possible ARM32/ARM64 optimizations here? */
2640 ASMAtomicXchgS8(pi8, i8);
2641}
2642
2643
2644/**
2645 * Atomically writes a signed 8-bit value, unordered.
2646 *
2647 * @param pi8 Pointer to the 8-bit variable to write.
2648 * @param i8 The 8-bit value to assign to *pi8.
2649 */
2650DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
2651{
2652 *pi8 = i8; /* byte writes are atomic on x86 */
2653}
2654
2655
2656/**
2657 * Atomically writes an unsigned 16-bit value, ordered.
2658 *
2659 * @param pu16 Pointer to the 16-bit variable to write.
2660 * @param u16 The 16-bit value to assign to *pu16.
2661 */
2662DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
2663{
2664 /** @todo Any possible ARM32/ARM64 optimizations here? */
2665 ASMAtomicXchgU16(pu16, u16);
2666}
2667
2668
2669/**
2670 * Atomically writes an unsigned 16-bit value, unordered.
2671 *
2672 * @param pu16 Pointer to the 16-bit variable to write.
2673 * @param u16 The 16-bit value to assign to *pu16.
2674 */
2675DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
2676{
2677 Assert(!((uintptr_t)pu16 & 1));
2678 *pu16 = u16;
2679}
2680
2681
2682/**
2683 * Atomically writes a signed 16-bit value, ordered.
2684 *
2685 * @param pi16 Pointer to the 16-bit variable to write.
2686 * @param i16 The 16-bit value to assign to *pi16.
2687 */
2688DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
2689{
2690 /** @todo Any possible ARM32/ARM64 optimizations here? */
2691 ASMAtomicXchgS16(pi16, i16);
2692}
2693
2694
2695/**
2696 * Atomically writes a signed 16-bit value, unordered.
2697 *
2698 * @param pi16 Pointer to the 16-bit variable to write.
2699 * @param i16 The 16-bit value to assign to *pi16.
2700 */
2701DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
2702{
2703 Assert(!((uintptr_t)pi16 & 1));
2704 *pi16 = i16;
2705}
2706
2707
2708/**
2709 * Atomically writes an unsigned 32-bit value, ordered.
2710 *
2711 * @param pu32 Pointer to the 32-bit variable to write.
2712 * @param u32 The 32-bit value to assign to *pu32.
2713 */
2714DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
2715{
2716 /** @todo Any possible ARM32/ARM64 optimizations here? */
2717 ASMAtomicXchgU32(pu32, u32);
2718}
2719
2720
2721/**
2722 * Atomically writes an unsigned 32-bit value, unordered.
2723 *
2724 * @param pu32 Pointer to the 32-bit variable to write.
2725 * @param u32 The 32-bit value to assign to *pu32.
2726 */
2727DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
2728{
2729 Assert(!((uintptr_t)pu32 & 3));
2730#if ARCH_BITS >= 32
2731 *pu32 = u32;
2732#else
2733 ASMAtomicXchgU32(pu32, u32);
2734#endif
2735}
2736
2737
2738/**
2739 * Atomically writes a signed 32-bit value, ordered.
2740 *
2741 * @param pi32 Pointer to the 32-bit variable to write.
2742 * @param i32 The 32-bit value to assign to *pi32.
2743 */
2744DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
2745{
2746 ASMAtomicXchgS32(pi32, i32);
2747}
2748
2749
2750/**
2751 * Atomically writes a signed 32-bit value, unordered.
2752 *
2753 * @param pi32 Pointer to the 32-bit variable to write.
2754 * @param i32 The 32-bit value to assign to *pi32.
2755 */
2756DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
2757{
2758 Assert(!((uintptr_t)pi32 & 3));
2759#if ARCH_BITS >= 32
2760 *pi32 = i32;
2761#else
2762 ASMAtomicXchgS32(pi32, i32);
2763#endif
2764}
2765
2766
2767/**
2768 * Atomically writes an unsigned 64-bit value, ordered.
2769 *
2770 * @param pu64 Pointer to the 64-bit variable to write.
2771 * @param u64 The 64-bit value to assign to *pu64.
2772 */
2773DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
2774{
2775 /** @todo Any possible ARM32/ARM64 optimizations here? */
2776 ASMAtomicXchgU64(pu64, u64);
2777}
2778
2779
2780/**
2781 * Atomically writes an unsigned 64-bit value, unordered.
2782 *
2783 * @param pu64 Pointer to the 64-bit variable to write.
2784 * @param u64 The 64-bit value to assign to *pu64.
2785 */
2786DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
2787{
2788 Assert(!((uintptr_t)pu64 & 7));
2789#if ARCH_BITS == 64
2790 *pu64 = u64;
2791#else
2792 ASMAtomicXchgU64(pu64, u64);
2793#endif
2794}
2795
2796
2797/**
2798 * Atomically writes a signed 64-bit value, ordered.
2799 *
2800 * @param pi64 Pointer to the 64-bit variable to write.
2801 * @param i64 The 64-bit value to assign to *pi64.
2802 */
2803DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
2804{
2805 /** @todo Any possible ARM32/ARM64 optimizations here? */
2806 ASMAtomicXchgS64(pi64, i64);
2807}
2808
2809
2810/**
2811 * Atomically writes a signed 64-bit value, unordered.
2812 *
2813 * @param pi64 Pointer to the 64-bit variable to write.
2814 * @param i64 The 64-bit value to assign to *pi64.
2815 */
2816DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
2817{
2818 Assert(!((uintptr_t)pi64 & 7));
2819#if ARCH_BITS == 64
2820 *pi64 = i64;
2821#else
2822 ASMAtomicXchgS64(pi64, i64);
2823#endif
2824}
2825
2826
2827/**
2828 * Atomically writes a size_t value, ordered.
2829 *
2830 * @returns nothing.
2831 * @param pcb Pointer to the size_t variable to write.
2832 * @param cb The value to assign to *pcb.
2833 */
2834DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
2835{
2836#if ARCH_BITS == 64
2837 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
2838#elif ARCH_BITS == 32
2839 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
2840#elif ARCH_BITS == 16
2841 AssertCompileSize(size_t, 2);
2842 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
2843#else
2844# error "Unsupported ARCH_BITS value"
2845#endif
2846}
2847
2848
2849/**
2850 * Atomically writes a boolean value, unordered.
2851 *
2852 * @param pf Pointer to the boolean variable to write.
2853 * @param f The boolean value to assign to *pf.
2854 */
2855DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
2856{
2857 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
2858}
2859
2860
2861/**
2862 * Atomically writes a boolean value, unordered.
2863 *
2864 * @param pf Pointer to the boolean variable to write.
2865 * @param f The boolean value to assign to *pf.
2866 */
2867DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
2868{
2869 *pf = f; /* byte writes are atomic on x86 */
2870}
2871
2872
2873/**
2874 * Atomically writes a pointer value, ordered.
2875 *
2876 * @param ppv Pointer to the pointer variable to write.
2877 * @param pv The pointer value to assign to *ppv.
2878 */
2879DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
2880{
2881#if ARCH_BITS == 32 || ARCH_BITS == 16
2882 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
2883#elif ARCH_BITS == 64
2884 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
2885#else
2886# error "ARCH_BITS is bogus"
2887#endif
2888}
2889
2890
2891/**
2892 * Atomically writes a pointer value, ordered.
2893 *
2894 * @param ppv Pointer to the pointer variable to write.
2895 * @param pv The pointer value to assign to *ppv. If NULL use
2896 * ASMAtomicWriteNullPtr or you'll land in trouble.
2897 *
2898 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2899 * NULL.
2900 */
2901#ifdef __GNUC__
2902# define ASMAtomicWritePtr(ppv, pv) \
2903 do \
2904 { \
2905 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
2906 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2907 \
2908 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2909 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2910 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2911 \
2912 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
2913 } while (0)
2914#else
2915# define ASMAtomicWritePtr(ppv, pv) \
2916 do \
2917 { \
2918 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2919 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2920 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2921 \
2922 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
2923 } while (0)
2924#endif
2925
2926
2927/**
2928 * Atomically sets a pointer to NULL, ordered.
2929 *
2930 * @param ppv Pointer to the pointer variable that should be set to NULL.
2931 *
2932 * @remarks This is relatively type safe on GCC platforms.
2933 */
2934#if RT_GNUC_PREREQ(4, 2)
2935# define ASMAtomicWriteNullPtr(ppv) \
2936 do \
2937 { \
2938 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2939 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2940 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2941 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
2942 } while (0)
2943#else
2944# define ASMAtomicWriteNullPtr(ppv) \
2945 do \
2946 { \
2947 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2948 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2949 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
2950 } while (0)
2951#endif
2952
2953
2954/**
2955 * Atomically writes a pointer value, unordered.
2956 *
2957 * @returns Current *pv value
2958 * @param ppv Pointer to the pointer variable.
2959 * @param pv The pointer value to assign to *ppv. If NULL use
2960 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2961 *
2962 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2963 * NULL.
2964 */
2965#if RT_GNUC_PREREQ(4, 2)
2966# define ASMAtomicUoWritePtr(ppv, pv) \
2967 do \
2968 { \
2969 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2970 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2971 \
2972 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2973 AssertCompile(sizeof(pv) == sizeof(void *)); \
2974 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2975 \
2976 *(ppvTypeChecked) = pvTypeChecked; \
2977 } while (0)
2978#else
2979# define ASMAtomicUoWritePtr(ppv, pv) \
2980 do \
2981 { \
2982 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2983 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2984 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2985 *(ppv) = pv; \
2986 } while (0)
2987#endif
2988
2989
2990/**
2991 * Atomically sets a pointer to NULL, unordered.
2992 *
2993 * @param ppv Pointer to the pointer variable that should be set to NULL.
2994 *
2995 * @remarks This is relatively type safe on GCC platforms.
2996 */
2997#ifdef __GNUC__
2998# define ASMAtomicUoWriteNullPtr(ppv) \
2999 do \
3000 { \
3001 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3002 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3003 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3004 *(ppvTypeChecked) = NULL; \
3005 } while (0)
3006#else
3007# define ASMAtomicUoWriteNullPtr(ppv) \
3008 do \
3009 { \
3010 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3011 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3012 *(ppv) = NULL; \
3013 } while (0)
3014#endif
3015
3016
3017/**
3018 * Atomically write a typical IPRT handle value, ordered.
3019 *
3020 * @param ph Pointer to the variable to update.
3021 * @param hNew The value to assign to *ph.
3022 *
3023 * @remarks This doesn't currently work for all handles (like RTFILE).
3024 */
3025#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3026# define ASMAtomicWriteHandle(ph, hNew) \
3027 do { \
3028 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3029 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3030 } while (0)
3031#elif HC_ARCH_BITS == 64
3032# define ASMAtomicWriteHandle(ph, hNew) \
3033 do { \
3034 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3035 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3036 } while (0)
3037#else
3038# error HC_ARCH_BITS
3039#endif
3040
3041
3042/**
3043 * Atomically write a typical IPRT handle value, unordered.
3044 *
3045 * @param ph Pointer to the variable to update.
3046 * @param hNew The value to assign to *ph.
3047 *
3048 * @remarks This doesn't currently work for all handles (like RTFILE).
3049 */
3050#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3051# define ASMAtomicUoWriteHandle(ph, hNew) \
3052 do { \
3053 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3054 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3055 } while (0)
3056#elif HC_ARCH_BITS == 64
3057# define ASMAtomicUoWriteHandle(ph, hNew) \
3058 do { \
3059 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3060 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
3061 } while (0)
3062#else
3063# error HC_ARCH_BITS
3064#endif
3065
3066
3067/**
3068 * Atomically write a value which size might differ
3069 * between platforms or compilers, ordered.
3070 *
3071 * @param pu Pointer to the variable to update.
3072 * @param uNew The value to assign to *pu.
3073 */
3074#define ASMAtomicWriteSize(pu, uNew) \
3075 do { \
3076 switch (sizeof(*(pu))) { \
3077 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3078 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3079 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3080 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3081 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3082 } \
3083 } while (0)
3084
3085/**
3086 * Atomically write a value which size might differ
3087 * between platforms or compilers, unordered.
3088 *
3089 * @param pu Pointer to the variable to update.
3090 * @param uNew The value to assign to *pu.
3091 */
3092#define ASMAtomicUoWriteSize(pu, uNew) \
3093 do { \
3094 switch (sizeof(*(pu))) { \
3095 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3096 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3097 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3098 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3099 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3100 } \
3101 } while (0)
3102
3103
3104
3105/**
3106 * Atomically exchanges and adds to a 16-bit value, ordered.
3107 *
3108 * @returns The old value.
3109 * @param pu16 Pointer to the value.
3110 * @param u16 Number to add.
3111 *
3112 * @remarks Currently not implemented, just to make 16-bit code happy.
3113 * @remarks x86: Requires a 486 or later.
3114 */
3115RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
3116
3117
3118/**
3119 * Atomically exchanges and adds to a 32-bit value, ordered.
3120 *
3121 * @returns The old value.
3122 * @param pu32 Pointer to the value.
3123 * @param u32 Number to add.
3124 *
3125 * @remarks x86: Requires a 486 or later.
3126 */
3127#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3128RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3129#else
3130DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3131{
3132# if RT_INLINE_ASM_USES_INTRIN
3133 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
3134 return u32;
3135
3136# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3137# if RT_INLINE_ASM_GNU_STYLE
3138 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3139 : "=r" (u32)
3140 , "=m" (*pu32)
3141 : "0" (u32)
3142 , "m" (*pu32)
3143 : "memory"
3144 , "cc");
3145 return u32;
3146# else
3147 __asm
3148 {
3149 mov eax, [u32]
3150# ifdef RT_ARCH_AMD64
3151 mov rdx, [pu32]
3152 lock xadd [rdx], eax
3153# else
3154 mov edx, [pu32]
3155 lock xadd [edx], eax
3156# endif
3157 mov [u32], eax
3158 }
3159 return u32;
3160# endif
3161
3162# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3163 uint32_t u32Ret;
3164 uint32_t rcSpill;
3165 uint32_t u32Spill;
3166 __asm__ __volatile__(".Ltry_again_add_u32_%=:\n\t"
3167 "dmb sy\n\t"
3168# if defined(RT_ARCH_ARM64)
3169 "ldaxr %w0, [%4]\n\t"
3170 "add %w1, %w0, %w3\n\t"
3171 "stlxr %w2, %w1, [%4]\n\t"
3172 "cbnz %w2, .Ltry_again_add_u32_%=\n\t"
3173# else
3174 "ldrex %0, [%4]\n\t"
3175 "add %1, %0, %3\n\t"
3176 "strex %2, %1, [%4]\n\t"
3177 "cmp %2, #0\n\t"
3178 "bne .Ltry_again_add_u32_%=\n\t"
3179# endif
3180 : "=&r" (u32Ret),
3181 "=&r" (u32Spill),
3182 "=&r" (rcSpill)
3183 : "r" (u32),
3184 "r" (pu32)
3185 : "memory",
3186 "cc");
3187 return u32Ret;
3188
3189# else
3190# error "Port me"
3191# endif
3192}
3193#endif
3194
3195
3196/**
3197 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3198 *
3199 * @returns The old value.
3200 * @param pi32 Pointer to the value.
3201 * @param i32 Number to add.
3202 *
3203 * @remarks x86: Requires a 486 or later.
3204 */
3205DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3206{
3207 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3208}
3209
3210
3211/**
3212 * Atomically exchanges and adds to a 64-bit value, ordered.
3213 *
3214 * @returns The old value.
3215 * @param pu64 Pointer to the value.
3216 * @param u64 Number to add.
3217 *
3218 * @remarks x86: Requires a Pentium or later.
3219 */
3220#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3221DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3222#else
3223DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3224{
3225# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3226 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
3227 return u64;
3228
3229# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3230 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3231 : "=r" (u64)
3232 , "=m" (*pu64)
3233 : "0" (u64)
3234 , "m" (*pu64)
3235 : "memory"
3236 , "cc");
3237 return u64;
3238
3239# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3240 uint64_t u64Ret;
3241 uint32_t rcSpill;
3242 uint64_t u64Spill;
3243 __asm__ __volatile__(".Ltry_again_add_u64_%=:\n\t"
3244 "dmb sy\n\t"
3245# if defined(RT_ARCH_ARM64)
3246 "ldaxr %0, [%4]\n\t"
3247 "add %1, %0, %3\n\t"
3248 "stlxr %w2, %1, [%4]\n\t"
3249 "cbnz %w2, .Ltry_again_add_u64_%=\n\t"
3250# else
3251 "ldrexd %0, %H0, [%4]\n\t"
3252 "add %1, %0, %3\n\t"
3253 "adc %H1, %H0, %H3\n\t"
3254 "strexd %2, %1, %H1, [%4]\n\t"
3255 "cmp %2, #0\n\t"
3256 "bne .Ltry_again_add_u64_%=\n\t"
3257# endif
3258 : "=&r" (u64Ret),
3259 "=&r" (u64Spill),
3260 "=&r" (rcSpill)
3261 : "r" (u64),
3262 "r" (pu64)
3263 : "memory",
3264 "cc");
3265 return u64Ret;
3266
3267# else
3268 uint64_t u64Old;
3269 for (;;)
3270 {
3271 uint64_t u64New;
3272 u64Old = ASMAtomicUoReadU64(pu64);
3273 u64New = u64Old + u64;
3274 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3275 break;
3276 ASMNopPause();
3277 }
3278 return u64Old;
3279# endif
3280}
3281#endif
3282
3283
3284/**
3285 * Atomically exchanges and adds to a signed 64-bit value, ordered.
3286 *
3287 * @returns The old value.
3288 * @param pi64 Pointer to the value.
3289 * @param i64 Number to add.
3290 *
3291 * @remarks x86: Requires a Pentium or later.
3292 */
3293DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3294{
3295 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3296}
3297
3298
3299/**
3300 * Atomically exchanges and adds to a size_t value, ordered.
3301 *
3302 * @returns The old value.
3303 * @param pcb Pointer to the size_t value.
3304 * @param cb Number to add.
3305 */
3306DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3307{
3308#if ARCH_BITS == 64
3309 AssertCompileSize(size_t, 8);
3310 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
3311#elif ARCH_BITS == 32
3312 AssertCompileSize(size_t, 4);
3313 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
3314#elif ARCH_BITS == 16
3315 AssertCompileSize(size_t, 2);
3316 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
3317#else
3318# error "Unsupported ARCH_BITS value"
3319#endif
3320}
3321
3322
3323/**
3324 * Atomically exchanges and adds a value which size might differ between
3325 * platforms or compilers, ordered.
3326 *
3327 * @param pu Pointer to the variable to update.
3328 * @param uNew The value to add to *pu.
3329 * @param puOld Where to store the old value.
3330 */
3331#define ASMAtomicAddSize(pu, uNew, puOld) \
3332 do { \
3333 switch (sizeof(*(pu))) { \
3334 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3335 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3336 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
3337 } \
3338 } while (0)
3339
3340
3341
3342/**
3343 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
3344 *
3345 * @returns The old value.
3346 * @param pu16 Pointer to the value.
3347 * @param u16 Number to subtract.
3348 *
3349 * @remarks x86: Requires a 486 or later.
3350 */
3351DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
3352{
3353 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
3354}
3355
3356
3357/**
3358 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
3359 *
3360 * @returns The old value.
3361 * @param pi16 Pointer to the value.
3362 * @param i16 Number to subtract.
3363 *
3364 * @remarks x86: Requires a 486 or later.
3365 */
3366DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3367{
3368 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
3369}
3370
3371
3372/**
3373 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3374 *
3375 * @returns The old value.
3376 * @param pu32 Pointer to the value.
3377 * @param u32 Number to subtract.
3378 *
3379 * @remarks x86: Requires a 486 or later.
3380 */
3381DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3382{
3383 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
3384}
3385
3386
3387/**
3388 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3389 *
3390 * @returns The old value.
3391 * @param pi32 Pointer to the value.
3392 * @param i32 Number to subtract.
3393 *
3394 * @remarks x86: Requires a 486 or later.
3395 */
3396DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3397{
3398 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
3399}
3400
3401
3402/**
3403 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
3404 *
3405 * @returns The old value.
3406 * @param pu64 Pointer to the value.
3407 * @param u64 Number to subtract.
3408 *
3409 * @remarks x86: Requires a Pentium or later.
3410 */
3411DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3412{
3413 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
3414}
3415
3416
3417/**
3418 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
3419 *
3420 * @returns The old value.
3421 * @param pi64 Pointer to the value.
3422 * @param i64 Number to subtract.
3423 *
3424 * @remarks x86: Requires a Pentium or later.
3425 */
3426DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3427{
3428 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
3429}
3430
3431
3432/**
3433 * Atomically exchanges and subtracts to a size_t value, ordered.
3434 *
3435 * @returns The old value.
3436 * @param pcb Pointer to the size_t value.
3437 * @param cb Number to subtract.
3438 *
3439 * @remarks x86: Requires a 486 or later.
3440 */
3441DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3442{
3443#if ARCH_BITS == 64
3444 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
3445#elif ARCH_BITS == 32
3446 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
3447#elif ARCH_BITS == 16
3448 AssertCompileSize(size_t, 2);
3449 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
3450#else
3451# error "Unsupported ARCH_BITS value"
3452#endif
3453}
3454
3455
3456/**
3457 * Atomically exchanges and subtracts a value which size might differ between
3458 * platforms or compilers, ordered.
3459 *
3460 * @param pu Pointer to the variable to update.
3461 * @param uNew The value to subtract to *pu.
3462 * @param puOld Where to store the old value.
3463 *
3464 * @remarks x86: Requires a 486 or later.
3465 */
3466#define ASMAtomicSubSize(pu, uNew, puOld) \
3467 do { \
3468 switch (sizeof(*(pu))) { \
3469 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3470 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3471 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3472 } \
3473 } while (0)
3474
3475
3476
3477/**
3478 * Atomically increment a 16-bit value, ordered.
3479 *
3480 * @returns The new value.
3481 * @param pu16 Pointer to the value to increment.
3482 * @remarks Not implemented. Just to make 16-bit code happy.
3483 *
3484 * @remarks x86: Requires a 486 or later.
3485 */
3486RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
3487
3488
3489/**
3490 * Atomically increment a 32-bit value, ordered.
3491 *
3492 * @returns The new value.
3493 * @param pu32 Pointer to the value to increment.
3494 *
3495 * @remarks x86: Requires a 486 or later.
3496 */
3497#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3498RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3499#else
3500DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
3501{
3502# if RT_INLINE_ASM_USES_INTRIN
3503 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
3504
3505# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3506# if RT_INLINE_ASM_GNU_STYLE
3507 uint32_t u32;
3508 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3509 : "=r" (u32)
3510 , "=m" (*pu32)
3511 : "0" (1)
3512 , "m" (*pu32)
3513 : "memory"
3514 , "cc");
3515 return u32+1;
3516# else
3517 __asm
3518 {
3519 mov eax, 1
3520# ifdef RT_ARCH_AMD64
3521 mov rdx, [pu32]
3522 lock xadd [rdx], eax
3523# else
3524 mov edx, [pu32]
3525 lock xadd [edx], eax
3526# endif
3527 mov u32, eax
3528 }
3529 return u32+1;
3530# endif
3531
3532# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3533 uint32_t u32Ret;
3534 uint32_t rcSpill;
3535 __asm__ __volatile__(".Ltry_again_inc_u32_%=:\n\t"
3536 "dmb sy\n\t"
3537# if defined(RT_ARCH_ARM64)
3538 "ldaxr %w0, [%2]\n\t"
3539 "add %w0, %w0, #1\n\t"
3540 "stlxr %w1, %w0, [%2]\n\t"
3541 "cbnz %w1, .Ltry_again_inc_u32_%=\n\t"
3542# else
3543 "ldrex %0, [%2]\n\t"
3544 "add %0, %0, #1\n\t" /* arm6 / thumb2+ */
3545 "strex %1, %0, [%2]\n\t"
3546 "cmp %1, #0\n\t"
3547 "bne .Ltry_again_inc_u32_%=\n\t"
3548# endif
3549 : "=&r" (u32Ret),
3550 "=&r" (rcSpill)
3551 : "r" (pu32)
3552 : "memory",
3553 "cc");
3554 return u32Ret;
3555
3556# else
3557 return ASMAtomicAddU32(pu32, 1) + 1;
3558# endif
3559}
3560#endif
3561
3562
3563/**
3564 * Atomically increment a signed 32-bit value, ordered.
3565 *
3566 * @returns The new value.
3567 * @param pi32 Pointer to the value to increment.
3568 *
3569 * @remarks x86: Requires a 486 or later.
3570 */
3571DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
3572{
3573 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3574}
3575
3576
3577/**
3578 * Atomically increment a 64-bit value, ordered.
3579 *
3580 * @returns The new value.
3581 * @param pu64 Pointer to the value to increment.
3582 *
3583 * @remarks x86: Requires a Pentium or later.
3584 */
3585#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3586DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
3587#else
3588DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
3589{
3590# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3591 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
3592
3593# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3594 uint64_t u64;
3595 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3596 : "=r" (u64)
3597 , "=m" (*pu64)
3598 : "0" (1)
3599 , "m" (*pu64)
3600 : "memory"
3601 , "cc");
3602 return u64 + 1;
3603
3604# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3605 uint64_t u64Ret;
3606 uint32_t rcSpill;
3607 __asm__ __volatile__(".Ltry_again_inc_u64_%=:\n\t"
3608 "dmb sy\n\t"
3609# if defined(RT_ARCH_ARM64)
3610 "ldaxr %0, [%2]\n\t"
3611 "add %0, %0, #1\n\t"
3612 "stlxr %w1, %0, [%2]\n\t"
3613 "cbnz %w1, .Ltry_again_inc_u64_%=\n\t"
3614# else
3615 "ldrexd %0, %H0, [%2]\n\t"
3616 "add %0, %0, #1\n\t" /* arm6 / thumb2+ */
3617 "adc %H0, %H0, %3\n\t"
3618 "strexd %1, %0, %H0, [%2]\n\t"
3619 "cmp %1, #0\n\t"
3620 "bne .Ltry_again_inc_u64_%=\n\t"
3621# endif
3622 : "=&r" (u64Ret),
3623 "=&r" (rcSpill)
3624 : "r" (pu64)
3625# if !defined(RT_ARCH_ARM64)
3626 , "r" (0)
3627# endif
3628 : "memory",
3629 "cc");
3630 return u64Ret;
3631
3632# else
3633 return ASMAtomicAddU64(pu64, 1) + 1;
3634# endif
3635}
3636#endif
3637
3638
3639/**
3640 * Atomically increment a signed 64-bit value, ordered.
3641 *
3642 * @returns The new value.
3643 * @param pi64 Pointer to the value to increment.
3644 *
3645 * @remarks x86: Requires a Pentium or later.
3646 */
3647DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
3648{
3649 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3650}
3651
3652
3653/**
3654 * Atomically increment a size_t value, ordered.
3655 *
3656 * @returns The new value.
3657 * @param pcb Pointer to the value to increment.
3658 *
3659 * @remarks x86: Requires a 486 or later.
3660 */
3661DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3662{
3663#if ARCH_BITS == 64
3664 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3665#elif ARCH_BITS == 32
3666 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3667#elif ARCH_BITS == 16
3668 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3669#else
3670# error "Unsupported ARCH_BITS value"
3671#endif
3672}
3673
3674
3675
3676/**
3677 * Atomically decrement an unsigned 32-bit value, ordered.
3678 *
3679 * @returns The new value.
3680 * @param pu16 Pointer to the value to decrement.
3681 * @remarks Not implemented. Just to make 16-bit code happy.
3682 *
3683 * @remarks x86: Requires a 486 or later.
3684 */
3685RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
3686
3687
3688/**
3689 * Atomically decrement an unsigned 32-bit value, ordered.
3690 *
3691 * @returns The new value.
3692 * @param pu32 Pointer to the value to decrement.
3693 *
3694 * @remarks x86: Requires a 486 or later.
3695 */
3696#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3697RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3698#else
3699DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
3700{
3701# if RT_INLINE_ASM_USES_INTRIN
3702 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
3703
3704# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3705# if RT_INLINE_ASM_GNU_STYLE
3706 uint32_t u32;
3707 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3708 : "=r" (u32)
3709 , "=m" (*pu32)
3710 : "0" (-1)
3711 , "m" (*pu32)
3712 : "memory"
3713 , "cc");
3714 return u32-1;
3715# else
3716 uint32_t u32;
3717 __asm
3718 {
3719 mov eax, -1
3720# ifdef RT_ARCH_AMD64
3721 mov rdx, [pu32]
3722 lock xadd [rdx], eax
3723# else
3724 mov edx, [pu32]
3725 lock xadd [edx], eax
3726# endif
3727 mov u32, eax
3728 }
3729 return u32-1;
3730# endif
3731
3732# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3733 uint32_t u32Ret;
3734 uint32_t rcSpill;
3735 __asm__ __volatile__(".Ltry_again_dec_u32_%=:\n\t"
3736 "dmb sy\n\t"
3737# if defined(RT_ARCH_ARM64)
3738 "ldaxr %w0, [%2]\n\t"
3739 "sub %w0, %w0, #1\n\t"
3740 "stlxr %w1, %w0, [%2]\n\t"
3741 "cbnz %w1, .Ltry_again_dec_u32_%=\n\t"
3742# else
3743 "ldrex %0, [%2]\n\t"
3744 "sub %0, %0, #1\n\t" /* arm6 / thumb2+ */
3745 "strex %1, %0, [%2]\n\t"
3746 "cmp %1, #0\n\t"
3747 "bne .Ltry_again_dec_u32_%=\n\t"
3748# endif
3749 : "=&r" (u32Ret),
3750 "=&r" (rcSpill)
3751 : "r" (pu32)
3752 : "memory",
3753 "cc");
3754 return u32Ret;
3755
3756# else
3757 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
3758# endif
3759}
3760#endif
3761
3762
3763/**
3764 * Atomically decrement a signed 32-bit value, ordered.
3765 *
3766 * @returns The new value.
3767 * @param pi32 Pointer to the value to decrement.
3768 *
3769 * @remarks x86: Requires a 486 or later.
3770 */
3771DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
3772{
3773 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
3774}
3775
3776
3777/**
3778 * Atomically decrement an unsigned 64-bit value, ordered.
3779 *
3780 * @returns The new value.
3781 * @param pu64 Pointer to the value to decrement.
3782 *
3783 * @remarks x86: Requires a Pentium or later.
3784 */
3785#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3786RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
3787#else
3788DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
3789{
3790# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3791 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
3792
3793# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3794 uint64_t u64;
3795 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3796 : "=r" (u64)
3797 , "=m" (*pu64)
3798 : "0" (~(uint64_t)0)
3799 , "m" (*pu64)
3800 : "memory"
3801 , "cc");
3802 return u64-1;
3803
3804# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3805 uint64_t u64Ret;
3806 uint32_t rcSpill;
3807 __asm__ __volatile__(".Ltry_again_dec_u64_%=:\n\t"
3808 "dmb sy\n\t"
3809# if defined(RT_ARCH_ARM64)
3810 "ldaxr %0, [%2]\n\t"
3811 "sub %0, %0, #1\n\t"
3812 "stlxr %w1, %0, [%2]\n\t"
3813 "cbnz %w1, .Ltry_again_dec_u64_%=\n\t"
3814# else
3815 "ldrexd %0, %H0, [%2]\n\t"
3816 "sub %0, %0, #1\n\t" /* arm6 / thumb2+ */
3817 "sbc %H0, %H0, %3\n\t"
3818 "strexd %1, %0, %H0, [%2]\n\t"
3819 "cmp %1, #0\n\t"
3820 "bne .Ltry_again_dec_u64_%=\n\t"
3821# endif
3822 : "=&r" (u64Ret),
3823 "=&r" (rcSpill)
3824 : "r" (pu64)
3825# if !defined(RT_ARCH_ARM64)
3826 , "r" (0)
3827# endif
3828 : "memory",
3829 "cc");
3830 return u64Ret;
3831
3832# else
3833 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3834# endif
3835}
3836#endif
3837
3838
3839/**
3840 * Atomically decrement a signed 64-bit value, ordered.
3841 *
3842 * @returns The new value.
3843 * @param pi64 Pointer to the value to decrement.
3844 *
3845 * @remarks x86: Requires a Pentium or later.
3846 */
3847DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
3848{
3849 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
3850}
3851
3852
3853/**
3854 * Atomically decrement a size_t value, ordered.
3855 *
3856 * @returns The new value.
3857 * @param pcb Pointer to the value to decrement.
3858 *
3859 * @remarks x86: Requires a 486 or later.
3860 */
3861DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3862{
3863#if ARCH_BITS == 64
3864 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
3865#elif ARCH_BITS == 32
3866 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
3867#elif ARCH_BITS == 16
3868 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
3869#else
3870# error "Unsupported ARCH_BITS value"
3871#endif
3872}
3873
3874
3875/**
3876 * Atomically Or an unsigned 32-bit value, ordered.
3877 *
3878 * @param pu32 Pointer to the pointer variable to OR u32 with.
3879 * @param u32 The value to OR *pu32 with.
3880 *
3881 * @remarks x86: Requires a 386 or later.
3882 */
3883#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3884RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3885#else
3886DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3887{
3888# if RT_INLINE_ASM_USES_INTRIN
3889 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
3890
3891# elif RT_INLINE_ASM_GNU_STYLE
3892 __asm__ __volatile__("lock; orl %1, %0\n\t"
3893 : "=m" (*pu32)
3894 : "ir" (u32)
3895 , "m" (*pu32)
3896 : "cc");
3897# else
3898 __asm
3899 {
3900 mov eax, [u32]
3901# ifdef RT_ARCH_AMD64
3902 mov rdx, [pu32]
3903 lock or [rdx], eax
3904# else
3905 mov edx, [pu32]
3906 lock or [edx], eax
3907# endif
3908 }
3909# endif
3910}
3911#endif
3912
3913
3914/**
3915 * Atomically Or a signed 32-bit value, ordered.
3916 *
3917 * @param pi32 Pointer to the pointer variable to OR u32 with.
3918 * @param i32 The value to OR *pu32 with.
3919 *
3920 * @remarks x86: Requires a 386 or later.
3921 */
3922DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3923{
3924 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3925}
3926
3927
3928/**
3929 * Atomically Or an unsigned 64-bit value, ordered.
3930 *
3931 * @param pu64 Pointer to the pointer variable to OR u64 with.
3932 * @param u64 The value to OR *pu64 with.
3933 *
3934 * @remarks x86: Requires a Pentium or later.
3935 */
3936#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3937DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3938#else
3939DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3940{
3941# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3942 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
3943
3944# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3945 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3946 : "=m" (*pu64)
3947 : "r" (u64)
3948 , "m" (*pu64)
3949 : "cc");
3950# else
3951 for (;;)
3952 {
3953 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3954 uint64_t u64New = u64Old | u64;
3955 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3956 break;
3957 ASMNopPause();
3958 }
3959# endif
3960}
3961#endif
3962
3963
3964/**
3965 * Atomically Or a signed 64-bit value, ordered.
3966 *
3967 * @param pi64 Pointer to the pointer variable to OR u64 with.
3968 * @param i64 The value to OR *pu64 with.
3969 *
3970 * @remarks x86: Requires a Pentium or later.
3971 */
3972DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3973{
3974 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3975}
3976
3977
3978/**
3979 * Atomically And an unsigned 32-bit value, ordered.
3980 *
3981 * @param pu32 Pointer to the pointer variable to AND u32 with.
3982 * @param u32 The value to AND *pu32 with.
3983 *
3984 * @remarks x86: Requires a 386 or later.
3985 */
3986#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3987RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3988#else
3989DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3990{
3991# if RT_INLINE_ASM_USES_INTRIN
3992 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
3993
3994# elif RT_INLINE_ASM_GNU_STYLE
3995 __asm__ __volatile__("lock; andl %1, %0\n\t"
3996 : "=m" (*pu32)
3997 : "ir" (u32)
3998 , "m" (*pu32)
3999 : "cc");
4000# else
4001 __asm
4002 {
4003 mov eax, [u32]
4004# ifdef RT_ARCH_AMD64
4005 mov rdx, [pu32]
4006 lock and [rdx], eax
4007# else
4008 mov edx, [pu32]
4009 lock and [edx], eax
4010# endif
4011 }
4012# endif
4013}
4014#endif
4015
4016
4017/**
4018 * Atomically And a signed 32-bit value, ordered.
4019 *
4020 * @param pi32 Pointer to the pointer variable to AND i32 with.
4021 * @param i32 The value to AND *pi32 with.
4022 *
4023 * @remarks x86: Requires a 386 or later.
4024 */
4025DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4026{
4027 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4028}
4029
4030
4031/**
4032 * Atomically And an unsigned 64-bit value, ordered.
4033 *
4034 * @param pu64 Pointer to the pointer variable to AND u64 with.
4035 * @param u64 The value to AND *pu64 with.
4036 *
4037 * @remarks x86: Requires a Pentium or later.
4038 */
4039#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4040DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4041#else
4042DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4043{
4044# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4045 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4046
4047# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4048 __asm__ __volatile__("lock; andq %1, %0\n\t"
4049 : "=m" (*pu64)
4050 : "r" (u64)
4051 , "m" (*pu64)
4052 : "cc");
4053# else
4054 for (;;)
4055 {
4056 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4057 uint64_t u64New = u64Old & u64;
4058 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4059 break;
4060 ASMNopPause();
4061 }
4062# endif
4063}
4064#endif
4065
4066
4067/**
4068 * Atomically And a signed 64-bit value, ordered.
4069 *
4070 * @param pi64 Pointer to the pointer variable to AND i64 with.
4071 * @param i64 The value to AND *pi64 with.
4072 *
4073 * @remarks x86: Requires a Pentium or later.
4074 */
4075DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4076{
4077 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4078}
4079
4080
4081/**
4082 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
4083 *
4084 * @param pu32 Pointer to the pointer variable to OR u32 with.
4085 * @param u32 The value to OR *pu32 with.
4086 *
4087 * @remarks x86: Requires a 386 or later.
4088 */
4089#if RT_INLINE_ASM_EXTERNAL
4090RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4091#else
4092DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4093{
4094# if RT_INLINE_ASM_GNU_STYLE
4095 __asm__ __volatile__("orl %1, %0\n\t"
4096 : "=m" (*pu32)
4097 : "ir" (u32)
4098 , "m" (*pu32)
4099 : "cc");
4100# else
4101 __asm
4102 {
4103 mov eax, [u32]
4104# ifdef RT_ARCH_AMD64
4105 mov rdx, [pu32]
4106 or [rdx], eax
4107# else
4108 mov edx, [pu32]
4109 or [edx], eax
4110# endif
4111 }
4112# endif
4113}
4114#endif
4115
4116
4117/**
4118 * Atomically OR a signed 32-bit value, unordered.
4119 *
4120 * @param pi32 Pointer to the pointer variable to OR u32 with.
4121 * @param i32 The value to OR *pu32 with.
4122 *
4123 * @remarks x86: Requires a 386 or later.
4124 */
4125DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4126{
4127 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4128}
4129
4130
4131/**
4132 * Atomically OR an unsigned 64-bit value, unordered.
4133 *
4134 * @param pu64 Pointer to the pointer variable to OR u64 with.
4135 * @param u64 The value to OR *pu64 with.
4136 *
4137 * @remarks x86: Requires a Pentium or later.
4138 */
4139#if RT_INLINE_ASM_EXTERNAL
4140DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4141#else
4142DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4143{
4144# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4145 __asm__ __volatile__("orq %1, %q0\n\t"
4146 : "=m" (*pu64)
4147 : "r" (u64)
4148 , "m" (*pu64)
4149 : "cc");
4150# else
4151 for (;;)
4152 {
4153 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4154 uint64_t u64New = u64Old | u64;
4155 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4156 break;
4157 ASMNopPause();
4158 }
4159# endif
4160}
4161#endif
4162
4163
4164/**
4165 * Atomically Or a signed 64-bit value, unordered.
4166 *
4167 * @param pi64 Pointer to the pointer variable to OR u64 with.
4168 * @param i64 The value to OR *pu64 with.
4169 *
4170 * @remarks x86: Requires a Pentium or later.
4171 */
4172DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4173{
4174 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4175}
4176
4177
4178/**
4179 * Atomically And an unsigned 32-bit value, unordered.
4180 *
4181 * @param pu32 Pointer to the pointer variable to AND u32 with.
4182 * @param u32 The value to AND *pu32 with.
4183 *
4184 * @remarks x86: Requires a 386 or later.
4185 */
4186#if RT_INLINE_ASM_EXTERNAL
4187RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4188#else
4189DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4190{
4191# if RT_INLINE_ASM_GNU_STYLE
4192 __asm__ __volatile__("andl %1, %0\n\t"
4193 : "=m" (*pu32)
4194 : "ir" (u32)
4195 , "m" (*pu32)
4196 : "cc");
4197# else
4198 __asm
4199 {
4200 mov eax, [u32]
4201# ifdef RT_ARCH_AMD64
4202 mov rdx, [pu32]
4203 and [rdx], eax
4204# else
4205 mov edx, [pu32]
4206 and [edx], eax
4207# endif
4208 }
4209# endif
4210}
4211#endif
4212
4213
4214/**
4215 * Atomically And a signed 32-bit value, unordered.
4216 *
4217 * @param pi32 Pointer to the pointer variable to AND i32 with.
4218 * @param i32 The value to AND *pi32 with.
4219 *
4220 * @remarks x86: Requires a 386 or later.
4221 */
4222DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4223{
4224 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4225}
4226
4227
4228/**
4229 * Atomically And an unsigned 64-bit value, unordered.
4230 *
4231 * @param pu64 Pointer to the pointer variable to AND u64 with.
4232 * @param u64 The value to AND *pu64 with.
4233 *
4234 * @remarks x86: Requires a Pentium or later.
4235 */
4236#if RT_INLINE_ASM_EXTERNAL
4237DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4238#else
4239DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4240{
4241# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4242 __asm__ __volatile__("andq %1, %0\n\t"
4243 : "=m" (*pu64)
4244 : "r" (u64)
4245 , "m" (*pu64)
4246 : "cc");
4247# else
4248 for (;;)
4249 {
4250 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4251 uint64_t u64New = u64Old & u64;
4252 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4253 break;
4254 ASMNopPause();
4255 }
4256# endif
4257}
4258#endif
4259
4260
4261/**
4262 * Atomically And a signed 64-bit value, unordered.
4263 *
4264 * @param pi64 Pointer to the pointer variable to AND i64 with.
4265 * @param i64 The value to AND *pi64 with.
4266 *
4267 * @remarks x86: Requires a Pentium or later.
4268 */
4269DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4270{
4271 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4272}
4273
4274
4275/**
4276 * Atomically increment an unsigned 32-bit value, unordered.
4277 *
4278 * @returns the new value.
4279 * @param pu32 Pointer to the variable to increment.
4280 *
4281 * @remarks x86: Requires a 486 or later.
4282 */
4283#if RT_INLINE_ASM_EXTERNAL
4284RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4285#else
4286DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4287{
4288 uint32_t u32;
4289# if RT_INLINE_ASM_GNU_STYLE
4290 __asm__ __volatile__("xaddl %0, %1\n\t"
4291 : "=r" (u32)
4292 , "=m" (*pu32)
4293 : "0" (1)
4294 , "m" (*pu32)
4295 : "memory" /** @todo why 'memory'? */
4296 , "cc");
4297 return u32 + 1;
4298# else
4299 __asm
4300 {
4301 mov eax, 1
4302# ifdef RT_ARCH_AMD64
4303 mov rdx, [pu32]
4304 xadd [rdx], eax
4305# else
4306 mov edx, [pu32]
4307 xadd [edx], eax
4308# endif
4309 mov u32, eax
4310 }
4311 return u32 + 1;
4312# endif
4313}
4314#endif
4315
4316
4317/**
4318 * Atomically decrement an unsigned 32-bit value, unordered.
4319 *
4320 * @returns the new value.
4321 * @param pu32 Pointer to the variable to decrement.
4322 *
4323 * @remarks x86: Requires a 486 or later.
4324 */
4325#if RT_INLINE_ASM_EXTERNAL
4326RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4327#else
4328DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4329{
4330 uint32_t u32;
4331# if RT_INLINE_ASM_GNU_STYLE
4332 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4333 : "=r" (u32)
4334 , "=m" (*pu32)
4335 : "0" (-1)
4336 , "m" (*pu32)
4337 : "memory"
4338 , "cc");
4339 return u32 - 1;
4340# else
4341 __asm
4342 {
4343 mov eax, -1
4344# ifdef RT_ARCH_AMD64
4345 mov rdx, [pu32]
4346 xadd [rdx], eax
4347# else
4348 mov edx, [pu32]
4349 xadd [edx], eax
4350# endif
4351 mov u32, eax
4352 }
4353 return u32 - 1;
4354# endif
4355}
4356#endif
4357
4358
4359/** @def RT_ASM_PAGE_SIZE
4360 * We try avoid dragging in iprt/param.h here.
4361 * @internal
4362 */
4363#if defined(RT_ARCH_SPARC64)
4364# define RT_ASM_PAGE_SIZE 0x2000
4365# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4366# if PAGE_SIZE != 0x2000
4367# error "PAGE_SIZE is not 0x2000!"
4368# endif
4369# endif
4370#elif defined(RT_ARCH_ARM64)
4371# define RT_ASM_PAGE_SIZE 0x4000
4372# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
4373# if PAGE_SIZE != 0x4000
4374# error "PAGE_SIZE is not 0x4000!"
4375# endif
4376# endif
4377#else
4378# define RT_ASM_PAGE_SIZE 0x1000
4379# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4380# if PAGE_SIZE != 0x1000
4381# error "PAGE_SIZE is not 0x1000!"
4382# endif
4383# endif
4384#endif
4385
4386/**
4387 * Zeros a 4K memory page.
4388 *
4389 * @param pv Pointer to the memory block. This must be page aligned.
4390 */
4391#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4392RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
4393# else
4394DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
4395{
4396# if RT_INLINE_ASM_USES_INTRIN
4397# ifdef RT_ARCH_AMD64
4398 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
4399# else
4400 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
4401# endif
4402
4403# elif RT_INLINE_ASM_GNU_STYLE
4404 RTCCUINTREG uDummy;
4405# ifdef RT_ARCH_AMD64
4406 __asm__ __volatile__("rep stosq"
4407 : "=D" (pv),
4408 "=c" (uDummy)
4409 : "0" (pv),
4410 "c" (RT_ASM_PAGE_SIZE >> 3),
4411 "a" (0)
4412 : "memory");
4413# else
4414 __asm__ __volatile__("rep stosl"
4415 : "=D" (pv),
4416 "=c" (uDummy)
4417 : "0" (pv),
4418 "c" (RT_ASM_PAGE_SIZE >> 2),
4419 "a" (0)
4420 : "memory");
4421# endif
4422# else
4423 __asm
4424 {
4425# ifdef RT_ARCH_AMD64
4426 xor rax, rax
4427 mov ecx, 0200h
4428 mov rdi, [pv]
4429 rep stosq
4430# else
4431 xor eax, eax
4432 mov ecx, 0400h
4433 mov edi, [pv]
4434 rep stosd
4435# endif
4436 }
4437# endif
4438}
4439# endif
4440
4441
4442/**
4443 * Zeros a memory block with a 32-bit aligned size.
4444 *
4445 * @param pv Pointer to the memory block.
4446 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4447 */
4448#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4449RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
4450#else
4451DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
4452{
4453# if RT_INLINE_ASM_USES_INTRIN
4454# ifdef RT_ARCH_AMD64
4455 if (!(cb & 7))
4456 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
4457 else
4458# endif
4459 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
4460
4461# elif RT_INLINE_ASM_GNU_STYLE
4462 __asm__ __volatile__("rep stosl"
4463 : "=D" (pv),
4464 "=c" (cb)
4465 : "0" (pv),
4466 "1" (cb >> 2),
4467 "a" (0)
4468 : "memory");
4469# else
4470 __asm
4471 {
4472 xor eax, eax
4473# ifdef RT_ARCH_AMD64
4474 mov rcx, [cb]
4475 shr rcx, 2
4476 mov rdi, [pv]
4477# else
4478 mov ecx, [cb]
4479 shr ecx, 2
4480 mov edi, [pv]
4481# endif
4482 rep stosd
4483 }
4484# endif
4485}
4486#endif
4487
4488
4489/**
4490 * Fills a memory block with a 32-bit aligned size.
4491 *
4492 * @param pv Pointer to the memory block.
4493 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4494 * @param u32 The value to fill with.
4495 */
4496#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4497RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
4498#else
4499DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
4500{
4501# if RT_INLINE_ASM_USES_INTRIN
4502# ifdef RT_ARCH_AMD64
4503 if (!(cb & 7))
4504 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4505 else
4506# endif
4507 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
4508
4509# elif RT_INLINE_ASM_GNU_STYLE
4510 __asm__ __volatile__("rep stosl"
4511 : "=D" (pv),
4512 "=c" (cb)
4513 : "0" (pv),
4514 "1" (cb >> 2),
4515 "a" (u32)
4516 : "memory");
4517# else
4518 __asm
4519 {
4520# ifdef RT_ARCH_AMD64
4521 mov rcx, [cb]
4522 shr rcx, 2
4523 mov rdi, [pv]
4524# else
4525 mov ecx, [cb]
4526 shr ecx, 2
4527 mov edi, [pv]
4528# endif
4529 mov eax, [u32]
4530 rep stosd
4531 }
4532# endif
4533}
4534#endif
4535
4536
4537/**
4538 * Checks if a memory block is all zeros.
4539 *
4540 * @returns Pointer to the first non-zero byte.
4541 * @returns NULL if all zero.
4542 *
4543 * @param pv Pointer to the memory block.
4544 * @param cb Number of bytes in the block.
4545 *
4546 * @todo Fix name, it is a predicate function but it's not returning boolean!
4547 */
4548#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
4549 && !defined(RT_ARCH_SPARC64) \
4550 && !defined(RT_ARCH_SPARC)
4551DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
4552#else
4553DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
4554{
4555 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4556 for (; cb; cb--, pb++)
4557 if (RT_LIKELY(*pb == 0))
4558 { /* likely */ }
4559 else
4560 return (void RT_FAR *)pb;
4561 return NULL;
4562}
4563#endif
4564
4565
4566/**
4567 * Checks if a memory block is all zeros.
4568 *
4569 * @returns true if zero, false if not.
4570 *
4571 * @param pv Pointer to the memory block.
4572 * @param cb Number of bytes in the block.
4573 *
4574 * @sa ASMMemFirstNonZero
4575 */
4576DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
4577{
4578 return ASMMemFirstNonZero(pv, cb) == NULL;
4579}
4580
4581
4582/**
4583 * Checks if a memory page is all zeros.
4584 *
4585 * @returns true / false.
4586 *
4587 * @param pvPage Pointer to the page. Must be aligned on 16 byte
4588 * boundary
4589 */
4590DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
4591{
4592# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
4593 union { RTCCUINTREG r; bool f; } uAX;
4594 RTCCUINTREG xCX, xDI;
4595 Assert(!((uintptr_t)pvPage & 15));
4596 __asm__ __volatile__("repe; "
4597# ifdef RT_ARCH_AMD64
4598 "scasq\n\t"
4599# else
4600 "scasl\n\t"
4601# endif
4602 "setnc %%al\n\t"
4603 : "=&c" (xCX)
4604 , "=&D" (xDI)
4605 , "=&a" (uAX.r)
4606 : "mr" (pvPage)
4607# ifdef RT_ARCH_AMD64
4608 , "0" (RT_ASM_PAGE_SIZE/8)
4609# else
4610 , "0" (RT_ASM_PAGE_SIZE/4)
4611# endif
4612 , "1" (pvPage)
4613 , "2" (0)
4614 : "cc");
4615 return uAX.f;
4616# else
4617 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
4618 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
4619 Assert(!((uintptr_t)pvPage & 15));
4620 for (;;)
4621 {
4622 if (puPtr[0]) return false;
4623 if (puPtr[4]) return false;
4624
4625 if (puPtr[2]) return false;
4626 if (puPtr[6]) return false;
4627
4628 if (puPtr[1]) return false;
4629 if (puPtr[5]) return false;
4630
4631 if (puPtr[3]) return false;
4632 if (puPtr[7]) return false;
4633
4634 if (!--cLeft)
4635 return true;
4636 puPtr += 8;
4637 }
4638# endif
4639}
4640
4641
4642/**
4643 * Checks if a memory block is filled with the specified byte, returning the
4644 * first mismatch.
4645 *
4646 * This is sort of an inverted memchr.
4647 *
4648 * @returns Pointer to the byte which doesn't equal u8.
4649 * @returns NULL if all equal to u8.
4650 *
4651 * @param pv Pointer to the memory block.
4652 * @param cb Number of bytes in the block.
4653 * @param u8 The value it's supposed to be filled with.
4654 *
4655 * @remarks No alignment requirements.
4656 */
4657#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
4658 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL)) \
4659 && !defined(RT_ARCH_SPARC64) \
4660 && !defined(RT_ARCH_SPARC)
4661DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
4662#else
4663DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
4664{
4665 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4666 for (; cb; cb--, pb++)
4667 if (RT_LIKELY(*pb == u8))
4668 { /* likely */ }
4669 else
4670 return (void *)pb;
4671 return NULL;
4672}
4673#endif
4674
4675
4676/**
4677 * Checks if a memory block is filled with the specified byte.
4678 *
4679 * @returns true if all matching, false if not.
4680 *
4681 * @param pv Pointer to the memory block.
4682 * @param cb Number of bytes in the block.
4683 * @param u8 The value it's supposed to be filled with.
4684 *
4685 * @remarks No alignment requirements.
4686 */
4687DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
4688{
4689 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4690}
4691
4692
4693/**
4694 * Checks if a memory block is filled with the specified 32-bit value.
4695 *
4696 * This is a sort of inverted memchr.
4697 *
4698 * @returns Pointer to the first value which doesn't equal u32.
4699 * @returns NULL if all equal to u32.
4700 *
4701 * @param pv Pointer to the memory block.
4702 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4703 * @param u32 The value it's supposed to be filled with.
4704 */
4705DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
4706{
4707/** @todo rewrite this in inline assembly? */
4708 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
4709 for (; cb; cb -= 4, pu32++)
4710 if (RT_LIKELY(*pu32 == u32))
4711 { /* likely */ }
4712 else
4713 return (uint32_t RT_FAR *)pu32;
4714 return NULL;
4715}
4716
4717
4718/**
4719 * Probes a byte pointer for read access.
4720 *
4721 * While the function will not fault if the byte is not read accessible,
4722 * the idea is to do this in a safe place like before acquiring locks
4723 * and such like.
4724 *
4725 * Also, this functions guarantees that an eager compiler is not going
4726 * to optimize the probing away.
4727 *
4728 * @param pvByte Pointer to the byte.
4729 */
4730#if RT_INLINE_ASM_EXTERNAL
4731RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
4732#else
4733DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
4734{
4735 uint8_t u8;
4736# if RT_INLINE_ASM_GNU_STYLE
4737 __asm__ __volatile__("movb (%1), %0\n\t"
4738 : "=r" (u8)
4739 : "r" (pvByte));
4740# else
4741 __asm
4742 {
4743# ifdef RT_ARCH_AMD64
4744 mov rax, [pvByte]
4745 mov al, [rax]
4746# else
4747 mov eax, [pvByte]
4748 mov al, [eax]
4749# endif
4750 mov [u8], al
4751 }
4752# endif
4753 return u8;
4754}
4755#endif
4756
4757/**
4758 * Probes a buffer for read access page by page.
4759 *
4760 * While the function will fault if the buffer is not fully read
4761 * accessible, the idea is to do this in a safe place like before
4762 * acquiring locks and such like.
4763 *
4764 * Also, this functions guarantees that an eager compiler is not going
4765 * to optimize the probing away.
4766 *
4767 * @param pvBuf Pointer to the buffer.
4768 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4769 */
4770DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
4771{
4772 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4773 /* the first byte */
4774 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
4775 ASMProbeReadByte(pu8);
4776
4777 /* the pages in between pages. */
4778 while (cbBuf > RT_ASM_PAGE_SIZE)
4779 {
4780 ASMProbeReadByte(pu8);
4781 cbBuf -= RT_ASM_PAGE_SIZE;
4782 pu8 += RT_ASM_PAGE_SIZE;
4783 }
4784
4785 /* the last byte */
4786 ASMProbeReadByte(pu8 + cbBuf - 1);
4787}
4788
4789
4790
4791/** @defgroup grp_inline_bits Bit Operations
4792 * @{
4793 */
4794
4795
4796/**
4797 * Sets a bit in a bitmap.
4798 *
4799 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4800 * @param iBit The bit to set.
4801 *
4802 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4803 * However, doing so will yield better performance as well as avoiding
4804 * traps accessing the last bits in the bitmap.
4805 */
4806#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4807RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4808#else
4809DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4810{
4811# if RT_INLINE_ASM_USES_INTRIN
4812 _bittestandset((long RT_FAR *)pvBitmap, iBit);
4813
4814# elif RT_INLINE_ASM_GNU_STYLE
4815 __asm__ __volatile__("btsl %1, %0"
4816 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4817 : "Ir" (iBit)
4818 , "m" (*(volatile long RT_FAR *)pvBitmap)
4819 : "memory"
4820 , "cc");
4821# else
4822 __asm
4823 {
4824# ifdef RT_ARCH_AMD64
4825 mov rax, [pvBitmap]
4826 mov edx, [iBit]
4827 bts [rax], edx
4828# else
4829 mov eax, [pvBitmap]
4830 mov edx, [iBit]
4831 bts [eax], edx
4832# endif
4833 }
4834# endif
4835}
4836#endif
4837
4838
4839/**
4840 * Atomically sets a bit in a bitmap, ordered.
4841 *
4842 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4843 * the memory access isn't atomic!
4844 * @param iBit The bit to set.
4845 *
4846 * @remarks x86: Requires a 386 or later.
4847 */
4848#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4849RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4850#else
4851DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4852{
4853 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4854# if RT_INLINE_ASM_USES_INTRIN
4855 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4856# elif RT_INLINE_ASM_GNU_STYLE
4857 __asm__ __volatile__("lock; btsl %1, %0"
4858 : "=m" (*(volatile long *)pvBitmap)
4859 : "Ir" (iBit)
4860 , "m" (*(volatile long *)pvBitmap)
4861 : "memory"
4862 , "cc");
4863# else
4864 __asm
4865 {
4866# ifdef RT_ARCH_AMD64
4867 mov rax, [pvBitmap]
4868 mov edx, [iBit]
4869 lock bts [rax], edx
4870# else
4871 mov eax, [pvBitmap]
4872 mov edx, [iBit]
4873 lock bts [eax], edx
4874# endif
4875 }
4876# endif
4877}
4878#endif
4879
4880
4881/**
4882 * Clears a bit in a bitmap.
4883 *
4884 * @param pvBitmap Pointer to the bitmap.
4885 * @param iBit The bit to clear.
4886 *
4887 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4888 * However, doing so will yield better performance as well as avoiding
4889 * traps accessing the last bits in the bitmap.
4890 */
4891#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4892RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4893#else
4894DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4895{
4896# if RT_INLINE_ASM_USES_INTRIN
4897 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4898
4899# elif RT_INLINE_ASM_GNU_STYLE
4900 __asm__ __volatile__("btrl %1, %0"
4901 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4902 : "Ir" (iBit)
4903 , "m" (*(volatile long RT_FAR *)pvBitmap)
4904 : "memory"
4905 , "cc");
4906# else
4907 __asm
4908 {
4909# ifdef RT_ARCH_AMD64
4910 mov rax, [pvBitmap]
4911 mov edx, [iBit]
4912 btr [rax], edx
4913# else
4914 mov eax, [pvBitmap]
4915 mov edx, [iBit]
4916 btr [eax], edx
4917# endif
4918 }
4919# endif
4920}
4921#endif
4922
4923
4924/**
4925 * Atomically clears a bit in a bitmap, ordered.
4926 *
4927 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4928 * the memory access isn't atomic!
4929 * @param iBit The bit to toggle set.
4930 *
4931 * @remarks No memory barrier, take care on smp.
4932 * @remarks x86: Requires a 386 or later.
4933 */
4934#if RT_INLINE_ASM_EXTERNAL
4935RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4936#else
4937DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4938{
4939 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4940# if RT_INLINE_ASM_GNU_STYLE
4941 __asm__ __volatile__("lock; btrl %1, %0"
4942 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4943 : "Ir" (iBit)
4944 , "m" (*(volatile long RT_FAR *)pvBitmap)
4945 : "memory"
4946 , "cc");
4947# else
4948 __asm
4949 {
4950# ifdef RT_ARCH_AMD64
4951 mov rax, [pvBitmap]
4952 mov edx, [iBit]
4953 lock btr [rax], edx
4954# else
4955 mov eax, [pvBitmap]
4956 mov edx, [iBit]
4957 lock btr [eax], edx
4958# endif
4959 }
4960# endif
4961}
4962#endif
4963
4964
4965/**
4966 * Toggles a bit in a bitmap.
4967 *
4968 * @param pvBitmap Pointer to the bitmap.
4969 * @param iBit The bit to toggle.
4970 *
4971 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4972 * However, doing so will yield better performance as well as avoiding
4973 * traps accessing the last bits in the bitmap.
4974 */
4975#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4976RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4977#else
4978DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4979{
4980# if RT_INLINE_ASM_USES_INTRIN
4981 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4982# elif RT_INLINE_ASM_GNU_STYLE
4983 __asm__ __volatile__("btcl %1, %0"
4984 : "=m" (*(volatile long *)pvBitmap)
4985 : "Ir" (iBit)
4986 , "m" (*(volatile long *)pvBitmap)
4987 : "memory"
4988 , "cc");
4989# else
4990 __asm
4991 {
4992# ifdef RT_ARCH_AMD64
4993 mov rax, [pvBitmap]
4994 mov edx, [iBit]
4995 btc [rax], edx
4996# else
4997 mov eax, [pvBitmap]
4998 mov edx, [iBit]
4999 btc [eax], edx
5000# endif
5001 }
5002# endif
5003}
5004#endif
5005
5006
5007/**
5008 * Atomically toggles a bit in a bitmap, ordered.
5009 *
5010 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5011 * the memory access isn't atomic!
5012 * @param iBit The bit to test and set.
5013 *
5014 * @remarks x86: Requires a 386 or later.
5015 */
5016#if RT_INLINE_ASM_EXTERNAL
5017RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5018#else
5019DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5020{
5021 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5022# if RT_INLINE_ASM_GNU_STYLE
5023 __asm__ __volatile__("lock; btcl %1, %0"
5024 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5025 : "Ir" (iBit)
5026 , "m" (*(volatile long RT_FAR *)pvBitmap)
5027 : "memory"
5028 , "cc");
5029# else
5030 __asm
5031 {
5032# ifdef RT_ARCH_AMD64
5033 mov rax, [pvBitmap]
5034 mov edx, [iBit]
5035 lock btc [rax], edx
5036# else
5037 mov eax, [pvBitmap]
5038 mov edx, [iBit]
5039 lock btc [eax], edx
5040# endif
5041 }
5042# endif
5043}
5044#endif
5045
5046
5047/**
5048 * Tests and sets a bit in a bitmap.
5049 *
5050 * @returns true if the bit was set.
5051 * @returns false if the bit was clear.
5052 *
5053 * @param pvBitmap Pointer to the bitmap.
5054 * @param iBit The bit to test and set.
5055 *
5056 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5057 * However, doing so will yield better performance as well as avoiding
5058 * traps accessing the last bits in the bitmap.
5059 */
5060#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5061RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5062#else
5063DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5064{
5065 union { bool f; uint32_t u32; uint8_t u8; } rc;
5066# if RT_INLINE_ASM_USES_INTRIN
5067 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
5068
5069# elif RT_INLINE_ASM_GNU_STYLE
5070 __asm__ __volatile__("btsl %2, %1\n\t"
5071 "setc %b0\n\t"
5072 "andl $1, %0\n\t"
5073 : "=q" (rc.u32)
5074 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5075 : "Ir" (iBit)
5076 , "m" (*(volatile long RT_FAR *)pvBitmap)
5077 : "memory"
5078 , "cc");
5079# else
5080 __asm
5081 {
5082 mov edx, [iBit]
5083# ifdef RT_ARCH_AMD64
5084 mov rax, [pvBitmap]
5085 bts [rax], edx
5086# else
5087 mov eax, [pvBitmap]
5088 bts [eax], edx
5089# endif
5090 setc al
5091 and eax, 1
5092 mov [rc.u32], eax
5093 }
5094# endif
5095 return rc.f;
5096}
5097#endif
5098
5099
5100/**
5101 * Atomically tests and sets a bit in a bitmap, ordered.
5102 *
5103 * @returns true if the bit was set.
5104 * @returns false if the bit was clear.
5105 *
5106 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5107 * the memory access isn't atomic!
5108 * @param iBit The bit to set.
5109 *
5110 * @remarks x86: Requires a 386 or later.
5111 */
5112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5113RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5114#else
5115DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5116{
5117 union { bool f; uint32_t u32; uint8_t u8; } rc;
5118 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5119# if RT_INLINE_ASM_USES_INTRIN
5120 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
5121# elif RT_INLINE_ASM_GNU_STYLE
5122 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5123 "setc %b0\n\t"
5124 "andl $1, %0\n\t"
5125 : "=q" (rc.u32)
5126 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5127 : "Ir" (iBit)
5128 , "m" (*(volatile long RT_FAR *)pvBitmap)
5129 : "memory"
5130 , "cc");
5131# else
5132 __asm
5133 {
5134 mov edx, [iBit]
5135# ifdef RT_ARCH_AMD64
5136 mov rax, [pvBitmap]
5137 lock bts [rax], edx
5138# else
5139 mov eax, [pvBitmap]
5140 lock bts [eax], edx
5141# endif
5142 setc al
5143 and eax, 1
5144 mov [rc.u32], eax
5145 }
5146# endif
5147 return rc.f;
5148}
5149#endif
5150
5151
5152/**
5153 * Tests and clears a bit in a bitmap.
5154 *
5155 * @returns true if the bit was set.
5156 * @returns false if the bit was clear.
5157 *
5158 * @param pvBitmap Pointer to the bitmap.
5159 * @param iBit The bit to test and clear.
5160 *
5161 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5162 * However, doing so will yield better performance as well as avoiding
5163 * traps accessing the last bits in the bitmap.
5164 */
5165#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5166RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5167#else
5168DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5169{
5170 union { bool f; uint32_t u32; uint8_t u8; } rc;
5171# if RT_INLINE_ASM_USES_INTRIN
5172 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
5173
5174# elif RT_INLINE_ASM_GNU_STYLE
5175 __asm__ __volatile__("btrl %2, %1\n\t"
5176 "setc %b0\n\t"
5177 "andl $1, %0\n\t"
5178 : "=q" (rc.u32)
5179 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5180 : "Ir" (iBit)
5181 , "m" (*(volatile long RT_FAR *)pvBitmap)
5182 : "memory"
5183 , "cc");
5184# else
5185 __asm
5186 {
5187 mov edx, [iBit]
5188# ifdef RT_ARCH_AMD64
5189 mov rax, [pvBitmap]
5190 btr [rax], edx
5191# else
5192 mov eax, [pvBitmap]
5193 btr [eax], edx
5194# endif
5195 setc al
5196 and eax, 1
5197 mov [rc.u32], eax
5198 }
5199# endif
5200 return rc.f;
5201}
5202#endif
5203
5204
5205/**
5206 * Atomically tests and clears a bit in a bitmap, ordered.
5207 *
5208 * @returns true if the bit was set.
5209 * @returns false if the bit was clear.
5210 *
5211 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5212 * the memory access isn't atomic!
5213 * @param iBit The bit to test and clear.
5214 *
5215 * @remarks No memory barrier, take care on smp.
5216 * @remarks x86: Requires a 386 or later.
5217 */
5218#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5219RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5220#else
5221DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5222{
5223 union { bool f; uint32_t u32; uint8_t u8; } rc;
5224 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5225# if RT_INLINE_ASM_USES_INTRIN
5226 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
5227
5228# elif RT_INLINE_ASM_GNU_STYLE
5229 __asm__ __volatile__("lock; btrl %2, %1\n\t"
5230 "setc %b0\n\t"
5231 "andl $1, %0\n\t"
5232 : "=q" (rc.u32)
5233 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5234 : "Ir" (iBit)
5235 , "m" (*(volatile long RT_FAR *)pvBitmap)
5236 : "memory"
5237 , "cc");
5238# else
5239 __asm
5240 {
5241 mov edx, [iBit]
5242# ifdef RT_ARCH_AMD64
5243 mov rax, [pvBitmap]
5244 lock btr [rax], edx
5245# else
5246 mov eax, [pvBitmap]
5247 lock btr [eax], edx
5248# endif
5249 setc al
5250 and eax, 1
5251 mov [rc.u32], eax
5252 }
5253# endif
5254 return rc.f;
5255}
5256#endif
5257
5258
5259/**
5260 * Tests and toggles a bit in a bitmap.
5261 *
5262 * @returns true if the bit was set.
5263 * @returns false if the bit was clear.
5264 *
5265 * @param pvBitmap Pointer to the bitmap.
5266 * @param iBit The bit to test and toggle.
5267 *
5268 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5269 * However, doing so will yield better performance as well as avoiding
5270 * traps accessing the last bits in the bitmap.
5271 */
5272#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5273RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5274#else
5275DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5276{
5277 union { bool f; uint32_t u32; uint8_t u8; } rc;
5278# if RT_INLINE_ASM_USES_INTRIN
5279 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
5280
5281# elif RT_INLINE_ASM_GNU_STYLE
5282 __asm__ __volatile__("btcl %2, %1\n\t"
5283 "setc %b0\n\t"
5284 "andl $1, %0\n\t"
5285 : "=q" (rc.u32)
5286 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5287 : "Ir" (iBit)
5288 , "m" (*(volatile long RT_FAR *)pvBitmap)
5289 : "memory"
5290 , "cc");
5291# else
5292 __asm
5293 {
5294 mov edx, [iBit]
5295# ifdef RT_ARCH_AMD64
5296 mov rax, [pvBitmap]
5297 btc [rax], edx
5298# else
5299 mov eax, [pvBitmap]
5300 btc [eax], edx
5301# endif
5302 setc al
5303 and eax, 1
5304 mov [rc.u32], eax
5305 }
5306# endif
5307 return rc.f;
5308}
5309#endif
5310
5311
5312/**
5313 * Atomically tests and toggles a bit in a bitmap, ordered.
5314 *
5315 * @returns true if the bit was set.
5316 * @returns false if the bit was clear.
5317 *
5318 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5319 * the memory access isn't atomic!
5320 * @param iBit The bit to test and toggle.
5321 *
5322 * @remarks x86: Requires a 386 or later.
5323 */
5324#if RT_INLINE_ASM_EXTERNAL
5325RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5326#else
5327DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5328{
5329 union { bool f; uint32_t u32; uint8_t u8; } rc;
5330 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5331# if RT_INLINE_ASM_GNU_STYLE
5332 __asm__ __volatile__("lock; btcl %2, %1\n\t"
5333 "setc %b0\n\t"
5334 "andl $1, %0\n\t"
5335 : "=q" (rc.u32)
5336 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5337 : "Ir" (iBit)
5338 , "m" (*(volatile long RT_FAR *)pvBitmap)
5339 : "memory"
5340 , "cc");
5341# else
5342 __asm
5343 {
5344 mov edx, [iBit]
5345# ifdef RT_ARCH_AMD64
5346 mov rax, [pvBitmap]
5347 lock btc [rax], edx
5348# else
5349 mov eax, [pvBitmap]
5350 lock btc [eax], edx
5351# endif
5352 setc al
5353 and eax, 1
5354 mov [rc.u32], eax
5355 }
5356# endif
5357 return rc.f;
5358}
5359#endif
5360
5361
5362/**
5363 * Tests if a bit in a bitmap is set.
5364 *
5365 * @returns true if the bit is set.
5366 * @returns false if the bit is clear.
5367 *
5368 * @param pvBitmap Pointer to the bitmap.
5369 * @param iBit The bit to test.
5370 *
5371 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5372 * However, doing so will yield better performance as well as avoiding
5373 * traps accessing the last bits in the bitmap.
5374 */
5375#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5376RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5377#else
5378DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5379{
5380 union { bool f; uint32_t u32; uint8_t u8; } rc;
5381# if RT_INLINE_ASM_USES_INTRIN
5382 rc.u32 = _bittest((long *)pvBitmap, iBit);
5383# elif RT_INLINE_ASM_GNU_STYLE
5384
5385 __asm__ __volatile__("btl %2, %1\n\t"
5386 "setc %b0\n\t"
5387 "andl $1, %0\n\t"
5388 : "=q" (rc.u32)
5389 : "m" (*(const volatile long RT_FAR *)pvBitmap)
5390 , "Ir" (iBit)
5391 : "memory"
5392 , "cc");
5393# else
5394 __asm
5395 {
5396 mov edx, [iBit]
5397# ifdef RT_ARCH_AMD64
5398 mov rax, [pvBitmap]
5399 bt [rax], edx
5400# else
5401 mov eax, [pvBitmap]
5402 bt [eax], edx
5403# endif
5404 setc al
5405 and eax, 1
5406 mov [rc.u32], eax
5407 }
5408# endif
5409 return rc.f;
5410}
5411#endif
5412
5413
5414/**
5415 * Clears a bit range within a bitmap.
5416 *
5417 * @param pvBitmap Pointer to the bitmap.
5418 * @param iBitStart The First bit to clear.
5419 * @param iBitEnd The first bit not to clear.
5420 */
5421DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
5422{
5423 if (iBitStart < iBitEnd)
5424 {
5425 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
5426 int32_t iStart = iBitStart & ~31;
5427 int32_t iEnd = iBitEnd & ~31;
5428 if (iStart == iEnd)
5429 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
5430 else
5431 {
5432 /* bits in first dword. */
5433 if (iBitStart & 31)
5434 {
5435 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
5436 pu32++;
5437 iBitStart = iStart + 32;
5438 }
5439
5440 /* whole dwords. */
5441 if (iBitStart != iEnd)
5442 ASMMemZero32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3);
5443
5444 /* bits in last dword. */
5445 if (iBitEnd & 31)
5446 {
5447 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5448 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
5449 }
5450 }
5451 }
5452}
5453
5454
5455/**
5456 * Sets a bit range within a bitmap.
5457 *
5458 * @param pvBitmap Pointer to the bitmap.
5459 * @param iBitStart The First bit to set.
5460 * @param iBitEnd The first bit not to set.
5461 */
5462DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
5463{
5464 if (iBitStart < iBitEnd)
5465 {
5466 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
5467 int32_t iStart = iBitStart & ~31;
5468 int32_t iEnd = iBitEnd & ~31;
5469 if (iStart == iEnd)
5470 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
5471 else
5472 {
5473 /* bits in first dword. */
5474 if (iBitStart & 31)
5475 {
5476 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
5477 pu32++;
5478 iBitStart = iStart + 32;
5479 }
5480
5481 /* whole dword. */
5482 if (iBitStart != iEnd)
5483 ASMMemFill32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3, ~UINT32_C(0));
5484
5485 /* bits in last dword. */
5486 if (iBitEnd & 31)
5487 {
5488 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
5489 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
5490 }
5491 }
5492 }
5493}
5494
5495
5496/**
5497 * Finds the first clear bit in a bitmap.
5498 *
5499 * @returns Index of the first zero bit.
5500 * @returns -1 if no clear bit was found.
5501 * @param pvBitmap Pointer to the bitmap.
5502 * @param cBits The number of bits in the bitmap. Multiple of 32.
5503 */
5504#if RT_INLINE_ASM_EXTERNAL
5505DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
5506#else
5507DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
5508{
5509 if (cBits)
5510 {
5511 int32_t iBit;
5512# if RT_INLINE_ASM_GNU_STYLE
5513 RTCCUINTREG uEAX, uECX, uEDI;
5514 cBits = RT_ALIGN_32(cBits, 32);
5515 __asm__ __volatile__("repe; scasl\n\t"
5516 "je 1f\n\t"
5517# ifdef RT_ARCH_AMD64
5518 "lea -4(%%rdi), %%rdi\n\t"
5519 "xorl (%%rdi), %%eax\n\t"
5520 "subq %5, %%rdi\n\t"
5521# else
5522 "lea -4(%%edi), %%edi\n\t"
5523 "xorl (%%edi), %%eax\n\t"
5524 "subl %5, %%edi\n\t"
5525# endif
5526 "shll $3, %%edi\n\t"
5527 "bsfl %%eax, %%edx\n\t"
5528 "addl %%edi, %%edx\n\t"
5529 "1:\t\n"
5530 : "=d" (iBit)
5531 , "=&c" (uECX)
5532 , "=&D" (uEDI)
5533 , "=&a" (uEAX)
5534 : "0" (0xffffffff)
5535 , "mr" (pvBitmap)
5536 , "1" (cBits >> 5)
5537 , "2" (pvBitmap)
5538 , "3" (0xffffffff)
5539 : "cc");
5540# else
5541 cBits = RT_ALIGN_32(cBits, 32);
5542 __asm
5543 {
5544# ifdef RT_ARCH_AMD64
5545 mov rdi, [pvBitmap]
5546 mov rbx, rdi
5547# else
5548 mov edi, [pvBitmap]
5549 mov ebx, edi
5550# endif
5551 mov edx, 0ffffffffh
5552 mov eax, edx
5553 mov ecx, [cBits]
5554 shr ecx, 5
5555 repe scasd
5556 je done
5557
5558# ifdef RT_ARCH_AMD64
5559 lea rdi, [rdi - 4]
5560 xor eax, [rdi]
5561 sub rdi, rbx
5562# else
5563 lea edi, [edi - 4]
5564 xor eax, [edi]
5565 sub edi, ebx
5566# endif
5567 shl edi, 3
5568 bsf edx, eax
5569 add edx, edi
5570 done:
5571 mov [iBit], edx
5572 }
5573# endif
5574 return iBit;
5575 }
5576 return -1;
5577}
5578#endif
5579
5580
5581/**
5582 * Finds the next clear bit in a bitmap.
5583 *
5584 * @returns Index of the first zero bit.
5585 * @returns -1 if no clear bit was found.
5586 * @param pvBitmap Pointer to the bitmap.
5587 * @param cBits The number of bits in the bitmap. Multiple of 32.
5588 * @param iBitPrev The bit returned from the last search.
5589 * The search will start at iBitPrev + 1.
5590 */
5591#if RT_INLINE_ASM_EXTERNAL
5592DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
5593#else
5594DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
5595{
5596 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5597 int iBit = ++iBitPrev & 31;
5598 if (iBit)
5599 {
5600 /*
5601 * Inspect the 32-bit word containing the unaligned bit.
5602 */
5603 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
5604
5605# if RT_INLINE_ASM_USES_INTRIN
5606 unsigned long ulBit = 0;
5607 if (_BitScanForward(&ulBit, u32))
5608 return ulBit + iBitPrev;
5609# else
5610# if RT_INLINE_ASM_GNU_STYLE
5611 __asm__ __volatile__("bsf %1, %0\n\t"
5612 "jnz 1f\n\t"
5613 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
5614 "1:\n\t"
5615 : "=r" (iBit)
5616 : "r" (u32)
5617 : "cc");
5618# else
5619 __asm
5620 {
5621 mov edx, [u32]
5622 bsf eax, edx
5623 jnz done
5624 mov eax, 0ffffffffh
5625 done:
5626 mov [iBit], eax
5627 }
5628# endif
5629 if (iBit >= 0)
5630 return iBit + (int)iBitPrev;
5631# endif
5632
5633 /*
5634 * Skip ahead and see if there is anything left to search.
5635 */
5636 iBitPrev |= 31;
5637 iBitPrev++;
5638 if (cBits <= (uint32_t)iBitPrev)
5639 return -1;
5640 }
5641
5642 /*
5643 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5644 */
5645 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5646 if (iBit >= 0)
5647 iBit += iBitPrev;
5648 return iBit;
5649}
5650#endif
5651
5652
5653/**
5654 * Finds the first set bit in a bitmap.
5655 *
5656 * @returns Index of the first set bit.
5657 * @returns -1 if no clear bit was found.
5658 * @param pvBitmap Pointer to the bitmap.
5659 * @param cBits The number of bits in the bitmap. Multiple of 32.
5660 */
5661#if RT_INLINE_ASM_EXTERNAL
5662DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
5663#else
5664DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
5665{
5666 if (cBits)
5667 {
5668 int32_t iBit;
5669# if RT_INLINE_ASM_GNU_STYLE
5670 RTCCUINTREG uEAX, uECX, uEDI;
5671 cBits = RT_ALIGN_32(cBits, 32);
5672 __asm__ __volatile__("repe; scasl\n\t"
5673 "je 1f\n\t"
5674# ifdef RT_ARCH_AMD64
5675 "lea -4(%%rdi), %%rdi\n\t"
5676 "movl (%%rdi), %%eax\n\t"
5677 "subq %5, %%rdi\n\t"
5678# else
5679 "lea -4(%%edi), %%edi\n\t"
5680 "movl (%%edi), %%eax\n\t"
5681 "subl %5, %%edi\n\t"
5682# endif
5683 "shll $3, %%edi\n\t"
5684 "bsfl %%eax, %%edx\n\t"
5685 "addl %%edi, %%edx\n\t"
5686 "1:\t\n"
5687 : "=d" (iBit)
5688 , "=&c" (uECX)
5689 , "=&D" (uEDI)
5690 , "=&a" (uEAX)
5691 : "0" (0xffffffff)
5692 , "mr" (pvBitmap)
5693 , "1" (cBits >> 5)
5694 , "2" (pvBitmap)
5695 , "3" (0)
5696 : "cc");
5697# else
5698 cBits = RT_ALIGN_32(cBits, 32);
5699 __asm
5700 {
5701# ifdef RT_ARCH_AMD64
5702 mov rdi, [pvBitmap]
5703 mov rbx, rdi
5704# else
5705 mov edi, [pvBitmap]
5706 mov ebx, edi
5707# endif
5708 mov edx, 0ffffffffh
5709 xor eax, eax
5710 mov ecx, [cBits]
5711 shr ecx, 5
5712 repe scasd
5713 je done
5714# ifdef RT_ARCH_AMD64
5715 lea rdi, [rdi - 4]
5716 mov eax, [rdi]
5717 sub rdi, rbx
5718# else
5719 lea edi, [edi - 4]
5720 mov eax, [edi]
5721 sub edi, ebx
5722# endif
5723 shl edi, 3
5724 bsf edx, eax
5725 add edx, edi
5726 done:
5727 mov [iBit], edx
5728 }
5729# endif
5730 return iBit;
5731 }
5732 return -1;
5733}
5734#endif
5735
5736
5737/**
5738 * Finds the next set bit in a bitmap.
5739 *
5740 * @returns Index of the next set bit.
5741 * @returns -1 if no set bit was found.
5742 * @param pvBitmap Pointer to the bitmap.
5743 * @param cBits The number of bits in the bitmap. Multiple of 32.
5744 * @param iBitPrev The bit returned from the last search.
5745 * The search will start at iBitPrev + 1.
5746 */
5747#if RT_INLINE_ASM_EXTERNAL
5748DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
5749#else
5750DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
5751{
5752 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5753 int iBit = ++iBitPrev & 31;
5754 if (iBit)
5755 {
5756 /*
5757 * Inspect the 32-bit word containing the unaligned bit.
5758 */
5759 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5760
5761# if RT_INLINE_ASM_USES_INTRIN
5762 unsigned long ulBit = 0;
5763 if (_BitScanForward(&ulBit, u32))
5764 return ulBit + iBitPrev;
5765# else
5766# if RT_INLINE_ASM_GNU_STYLE
5767 __asm__ __volatile__("bsf %1, %0\n\t"
5768 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
5769 "movl $-1, %0\n\t"
5770 "1:\n\t"
5771 : "=r" (iBit)
5772 : "r" (u32)
5773 : "cc");
5774# else
5775 __asm
5776 {
5777 mov edx, [u32]
5778 bsf eax, edx
5779 jnz done
5780 mov eax, 0ffffffffh
5781 done:
5782 mov [iBit], eax
5783 }
5784# endif
5785 if (iBit >= 0)
5786 return iBit + (int)iBitPrev;
5787# endif
5788
5789 /*
5790 * Skip ahead and see if there is anything left to search.
5791 */
5792 iBitPrev |= 31;
5793 iBitPrev++;
5794 if (cBits <= (uint32_t)iBitPrev)
5795 return -1;
5796 }
5797
5798 /*
5799 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5800 */
5801 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5802 if (iBit >= 0)
5803 iBit += iBitPrev;
5804 return iBit;
5805}
5806#endif
5807
5808
5809/**
5810 * Finds the first bit which is set in the given 32-bit integer.
5811 * Bits are numbered from 1 (least significant) to 32.
5812 *
5813 * @returns index [1..32] of the first set bit.
5814 * @returns 0 if all bits are cleared.
5815 * @param u32 Integer to search for set bits.
5816 * @remarks Similar to ffs() in BSD.
5817 */
5818#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5819RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
5820#else
5821DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
5822{
5823# if RT_INLINE_ASM_USES_INTRIN
5824 unsigned long iBit;
5825 if (_BitScanForward(&iBit, u32))
5826 iBit++;
5827 else
5828 iBit = 0;
5829# elif RT_INLINE_ASM_GNU_STYLE
5830 uint32_t iBit;
5831 __asm__ __volatile__("bsf %1, %0\n\t"
5832 "jnz 1f\n\t"
5833 "xorl %0, %0\n\t"
5834 "jmp 2f\n"
5835 "1:\n\t"
5836 "incl %0\n"
5837 "2:\n\t"
5838 : "=r" (iBit)
5839 : "rm" (u32)
5840 : "cc");
5841# else
5842 uint32_t iBit;
5843 _asm
5844 {
5845 bsf eax, [u32]
5846 jnz found
5847 xor eax, eax
5848 jmp done
5849 found:
5850 inc eax
5851 done:
5852 mov [iBit], eax
5853 }
5854# endif
5855 return iBit;
5856}
5857#endif
5858
5859
5860/**
5861 * Finds the first bit which is set in the given 32-bit integer.
5862 * Bits are numbered from 1 (least significant) to 32.
5863 *
5864 * @returns index [1..32] of the first set bit.
5865 * @returns 0 if all bits are cleared.
5866 * @param i32 Integer to search for set bits.
5867 * @remark Similar to ffs() in BSD.
5868 */
5869DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
5870{
5871 return ASMBitFirstSetU32((uint32_t)i32);
5872}
5873
5874
5875/**
5876 * Finds the first bit which is set in the given 64-bit integer.
5877 *
5878 * Bits are numbered from 1 (least significant) to 64.
5879 *
5880 * @returns index [1..64] of the first set bit.
5881 * @returns 0 if all bits are cleared.
5882 * @param u64 Integer to search for set bits.
5883 * @remarks Similar to ffs() in BSD.
5884 */
5885#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5886RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
5887#else
5888DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
5889{
5890# if RT_INLINE_ASM_USES_INTRIN
5891 unsigned long iBit;
5892# if ARCH_BITS == 64
5893 if (_BitScanForward64(&iBit, u64))
5894 iBit++;
5895 else
5896 iBit = 0;
5897# else
5898 if (_BitScanForward(&iBit, (uint32_t)u64))
5899 iBit++;
5900 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5901 iBit += 33;
5902 else
5903 iBit = 0;
5904# endif
5905# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5906 uint64_t iBit;
5907 __asm__ __volatile__("bsfq %1, %0\n\t"
5908 "jnz 1f\n\t"
5909 "xorl %k0, %k0\n\t"
5910 "jmp 2f\n"
5911 "1:\n\t"
5912 "incl %k0\n"
5913 "2:\n\t"
5914 : "=r" (iBit)
5915 : "rm" (u64)
5916 : "cc");
5917# else
5918 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5919 if (!iBit)
5920 {
5921 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5922 if (iBit)
5923 iBit += 32;
5924 }
5925# endif
5926 return (unsigned)iBit;
5927}
5928#endif
5929
5930
5931/**
5932 * Finds the first bit which is set in the given 16-bit integer.
5933 *
5934 * Bits are numbered from 1 (least significant) to 16.
5935 *
5936 * @returns index [1..16] of the first set bit.
5937 * @returns 0 if all bits are cleared.
5938 * @param u16 Integer to search for set bits.
5939 * @remarks For 16-bit bs3kit code.
5940 */
5941#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5942RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
5943#else
5944DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
5945{
5946 return ASMBitFirstSetU32((uint32_t)u16);
5947}
5948#endif
5949
5950
5951/**
5952 * Finds the last bit which is set in the given 32-bit integer.
5953 * Bits are numbered from 1 (least significant) to 32.
5954 *
5955 * @returns index [1..32] of the last set bit.
5956 * @returns 0 if all bits are cleared.
5957 * @param u32 Integer to search for set bits.
5958 * @remark Similar to fls() in BSD.
5959 */
5960#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5961RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
5962#else
5963DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
5964{
5965# if RT_INLINE_ASM_USES_INTRIN
5966 unsigned long iBit;
5967 if (_BitScanReverse(&iBit, u32))
5968 iBit++;
5969 else
5970 iBit = 0;
5971# elif RT_INLINE_ASM_GNU_STYLE
5972 uint32_t iBit;
5973 __asm__ __volatile__("bsrl %1, %0\n\t"
5974 "jnz 1f\n\t"
5975 "xorl %0, %0\n\t"
5976 "jmp 2f\n"
5977 "1:\n\t"
5978 "incl %0\n"
5979 "2:\n\t"
5980 : "=r" (iBit)
5981 : "rm" (u32)
5982 : "cc");
5983# else
5984 uint32_t iBit;
5985 _asm
5986 {
5987 bsr eax, [u32]
5988 jnz found
5989 xor eax, eax
5990 jmp done
5991 found:
5992 inc eax
5993 done:
5994 mov [iBit], eax
5995 }
5996# endif
5997 return iBit;
5998}
5999#endif
6000
6001
6002/**
6003 * Finds the last bit which is set in the given 32-bit integer.
6004 * Bits are numbered from 1 (least significant) to 32.
6005 *
6006 * @returns index [1..32] of the last set bit.
6007 * @returns 0 if all bits are cleared.
6008 * @param i32 Integer to search for set bits.
6009 * @remark Similar to fls() in BSD.
6010 */
6011DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
6012{
6013 return ASMBitLastSetU32((uint32_t)i32);
6014}
6015
6016
6017/**
6018 * Finds the last bit which is set in the given 64-bit integer.
6019 *
6020 * Bits are numbered from 1 (least significant) to 64.
6021 *
6022 * @returns index [1..64] of the last set bit.
6023 * @returns 0 if all bits are cleared.
6024 * @param u64 Integer to search for set bits.
6025 * @remark Similar to fls() in BSD.
6026 */
6027#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6028RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
6029#else
6030DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
6031{
6032# if RT_INLINE_ASM_USES_INTRIN
6033 unsigned long iBit;
6034# if ARCH_BITS == 64
6035 if (_BitScanReverse64(&iBit, u64))
6036 iBit++;
6037 else
6038 iBit = 0;
6039# else
6040 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
6041 iBit += 33;
6042 else if (_BitScanReverse(&iBit, (uint32_t)u64))
6043 iBit++;
6044 else
6045 iBit = 0;
6046# endif
6047# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
6048 uint64_t iBit;
6049 __asm__ __volatile__("bsrq %1, %0\n\t"
6050 "jnz 1f\n\t"
6051 "xorl %k0, %k0\n\t"
6052 "jmp 2f\n"
6053 "1:\n\t"
6054 "incl %k0\n"
6055 "2:\n\t"
6056 : "=r" (iBit)
6057 : "rm" (u64)
6058 : "cc");
6059# else
6060 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
6061 if (iBit)
6062 iBit += 32;
6063 else
6064 iBit = ASMBitLastSetU32((uint32_t)u64);
6065#endif
6066 return (unsigned)iBit;
6067}
6068#endif
6069
6070
6071/**
6072 * Finds the last bit which is set in the given 16-bit integer.
6073 *
6074 * Bits are numbered from 1 (least significant) to 16.
6075 *
6076 * @returns index [1..16] of the last set bit.
6077 * @returns 0 if all bits are cleared.
6078 * @param u16 Integer to search for set bits.
6079 * @remarks For 16-bit bs3kit code.
6080 */
6081#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6082RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
6083#else
6084DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
6085{
6086 return ASMBitLastSetU32((uint32_t)u16);
6087}
6088#endif
6089
6090
6091/**
6092 * Reverse the byte order of the given 16-bit integer.
6093 *
6094 * @returns Revert
6095 * @param u16 16-bit integer value.
6096 */
6097#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6098RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
6099#else
6100DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
6101{
6102# if RT_INLINE_ASM_USES_INTRIN
6103 u16 = _byteswap_ushort(u16);
6104# elif RT_INLINE_ASM_GNU_STYLE
6105 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
6106# else
6107 _asm
6108 {
6109 mov ax, [u16]
6110 ror ax, 8
6111 mov [u16], ax
6112 }
6113# endif
6114 return u16;
6115}
6116#endif
6117
6118
6119/**
6120 * Reverse the byte order of the given 32-bit integer.
6121 *
6122 * @returns Revert
6123 * @param u32 32-bit integer value.
6124 */
6125#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6126RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
6127#else
6128DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
6129{
6130# if RT_INLINE_ASM_USES_INTRIN
6131 u32 = _byteswap_ulong(u32);
6132# elif RT_INLINE_ASM_GNU_STYLE
6133 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6134# else
6135 _asm
6136 {
6137 mov eax, [u32]
6138 bswap eax
6139 mov [u32], eax
6140 }
6141# endif
6142 return u32;
6143}
6144#endif
6145
6146
6147/**
6148 * Reverse the byte order of the given 64-bit integer.
6149 *
6150 * @returns Revert
6151 * @param u64 64-bit integer value.
6152 */
6153DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
6154{
6155#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6156 u64 = _byteswap_uint64(u64);
6157#else
6158 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6159 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6160#endif
6161 return u64;
6162}
6163
6164
6165/**
6166 * Rotate 32-bit unsigned value to the left by @a cShift.
6167 *
6168 * @returns Rotated value.
6169 * @param u32 The value to rotate.
6170 * @param cShift How many bits to rotate by.
6171 */
6172#ifdef __WATCOMC__
6173RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
6174#else
6175DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
6176{
6177# if RT_INLINE_ASM_USES_INTRIN
6178 return _rotl(u32, cShift);
6179# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
6180 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
6181 return u32;
6182# else
6183 cShift &= 31;
6184 return (u32 << cShift) | (u32 >> (32 - cShift));
6185# endif
6186}
6187#endif
6188
6189
6190/**
6191 * Rotate 32-bit unsigned value to the right by @a cShift.
6192 *
6193 * @returns Rotated value.
6194 * @param u32 The value to rotate.
6195 * @param cShift How many bits to rotate by.
6196 */
6197#ifdef __WATCOMC__
6198RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
6199#else
6200DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
6201{
6202# if RT_INLINE_ASM_USES_INTRIN
6203 return _rotr(u32, cShift);
6204# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
6205 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
6206 return u32;
6207# else
6208 cShift &= 31;
6209 return (u32 >> cShift) | (u32 << (32 - cShift));
6210# endif
6211}
6212#endif
6213
6214
6215/**
6216 * Rotate 64-bit unsigned value to the left by @a cShift.
6217 *
6218 * @returns Rotated value.
6219 * @param u64 The value to rotate.
6220 * @param cShift How many bits to rotate by.
6221 */
6222DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
6223{
6224#if RT_INLINE_ASM_USES_INTRIN
6225 return _rotl64(u64, cShift);
6226#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6227 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
6228 return u64;
6229#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
6230 uint32_t uSpill;
6231 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
6232 "jz 1f\n\t"
6233 "xchgl %%eax, %%edx\n\t"
6234 "1:\n\t"
6235 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
6236 "jz 2f\n\t"
6237 "movl %%edx, %2\n\t" /* save the hi value in %3. */
6238 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
6239 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
6240 "2:\n\t" /* } */
6241 : "=A" (u64)
6242 , "=c" (cShift)
6243 , "=r" (uSpill)
6244 : "0" (u64)
6245 , "1" (cShift)
6246 : "cc");
6247 return u64;
6248#else
6249 cShift &= 63;
6250 return (u64 << cShift) | (u64 >> (64 - cShift));
6251#endif
6252}
6253
6254
6255/**
6256 * Rotate 64-bit unsigned value to the right by @a cShift.
6257 *
6258 * @returns Rotated value.
6259 * @param u64 The value to rotate.
6260 * @param cShift How many bits to rotate by.
6261 */
6262DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
6263{
6264#if RT_INLINE_ASM_USES_INTRIN
6265 return _rotr64(u64, cShift);
6266#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6267 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
6268 return u64;
6269#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
6270 uint32_t uSpill;
6271 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
6272 "jz 1f\n\t"
6273 "xchgl %%eax, %%edx\n\t"
6274 "1:\n\t"
6275 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
6276 "jz 2f\n\t"
6277 "movl %%edx, %2\n\t" /* save the hi value in %3. */
6278 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
6279 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
6280 "2:\n\t" /* } */
6281 : "=A" (u64)
6282 , "=c" (cShift)
6283 , "=r" (uSpill)
6284 : "0" (u64)
6285 , "1" (cShift)
6286 : "cc");
6287 return u64;
6288#else
6289 cShift &= 63;
6290 return (u64 >> cShift) | (u64 << (64 - cShift));
6291#endif
6292}
6293
6294/** @} */
6295
6296
6297/** @} */
6298
6299/*
6300 * Include #pragma aux definitions for Watcom C/C++.
6301 */
6302#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
6303# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
6304# undef IPRT_INCLUDED_asm_watcom_x86_16_h
6305# include "asm-watcom-x86-16.h"
6306#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
6307# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
6308# undef IPRT_INCLUDED_asm_watcom_x86_32_h
6309# include "asm-watcom-x86-32.h"
6310#endif
6311
6312#endif /* !IPRT_INCLUDED_asm_h */
6313
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette