VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 79043

Last change on this file since 79043 was 76723, checked in by vboxsync, 6 years ago

asm.h: do not use the GCC version of ASMAtomicUoWritePtr for old GCC versions to prevent warnings building kernel modules.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 165.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2019 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46# pragma warning(push)
47# pragma warning(disable:4668) /* Several incorrect __cplusplus uses. */
48# pragma warning(disable:4255) /* Incorrect __slwpcb prototype. */
49# include <intrin.h>
50# pragma warning(pop)
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(__stosd)
55# pragma intrinsic(__stosw)
56# pragma intrinsic(__stosb)
57# pragma intrinsic(_BitScanForward)
58# pragma intrinsic(_BitScanReverse)
59# pragma intrinsic(_bittest)
60# pragma intrinsic(_bittestandset)
61# pragma intrinsic(_bittestandreset)
62# pragma intrinsic(_bittestandcomplement)
63# pragma intrinsic(_byteswap_ushort)
64# pragma intrinsic(_byteswap_ulong)
65# pragma intrinsic(_interlockedbittestandset)
66# pragma intrinsic(_interlockedbittestandreset)
67# pragma intrinsic(_InterlockedAnd)
68# pragma intrinsic(_InterlockedOr)
69# pragma intrinsic(_InterlockedIncrement)
70# pragma intrinsic(_InterlockedDecrement)
71# pragma intrinsic(_InterlockedExchange)
72# pragma intrinsic(_InterlockedExchangeAdd)
73# pragma intrinsic(_InterlockedCompareExchange)
74# pragma intrinsic(_InterlockedCompareExchange64)
75# pragma intrinsic(_rotl)
76# pragma intrinsic(_rotr)
77# pragma intrinsic(_rotl64)
78# pragma intrinsic(_rotr64)
79# ifdef RT_ARCH_AMD64
80# pragma intrinsic(__stosq)
81# pragma intrinsic(_byteswap_uint64)
82# pragma intrinsic(_InterlockedExchange64)
83# pragma intrinsic(_InterlockedExchangeAdd64)
84# pragma intrinsic(_InterlockedAnd64)
85# pragma intrinsic(_InterlockedOr64)
86# pragma intrinsic(_InterlockedIncrement64)
87# pragma intrinsic(_InterlockedDecrement64)
88# endif
89#endif
90
91/*
92 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
93 */
94#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
95# include "asm-watcom-x86-16.h"
96#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
97# include "asm-watcom-x86-32.h"
98#endif
99
100
101/** @defgroup grp_rt_asm ASM - Assembly Routines
102 * @ingroup grp_rt
103 *
104 * @remarks The difference between ordered and unordered atomic operations are that
105 * the former will complete outstanding reads and writes before continuing
106 * while the latter doesn't make any promises about the order. Ordered
107 * operations doesn't, it seems, make any 100% promise wrt to whether
108 * the operation will complete before any subsequent memory access.
109 * (please, correct if wrong.)
110 *
111 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
112 * are unordered (note the Uo).
113 *
114 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
115 * or even optimize assembler instructions away. For instance, in the following code
116 * the second rdmsr instruction is optimized away because gcc treats that instruction
117 * as deterministic:
118 *
119 * @code
120 * static inline uint64_t rdmsr_low(int idx)
121 * {
122 * uint32_t low;
123 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
124 * }
125 * ...
126 * uint32_t msr1 = rdmsr_low(1);
127 * foo(msr1);
128 * msr1 = rdmsr_low(1);
129 * bar(msr1);
130 * @endcode
131 *
132 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
133 * use the result of the first call as input parameter for bar() as well. For rdmsr this
134 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
135 * machine status information in general.
136 *
137 * @{
138 */
139
140
141/** @def RT_INLINE_ASM_GCC_4_3_X_X86
142 * Used to work around some 4.3.x register allocation issues in this version of
143 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
144 * definitely not for 5.x */
145#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
146# define RT_INLINE_ASM_GCC_4_3_X_X86 1
147#else
148# define RT_INLINE_ASM_GCC_4_3_X_X86 0
149#endif
150
151/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
152 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
153 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
154 * mode, x86.
155 *
156 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
157 * when in PIC mode on x86.
158 */
159#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
160# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
161# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
162# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
163# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
164# elif ( (defined(PIC) || defined(__PIC__)) \
165 && defined(RT_ARCH_X86) \
166 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
167 || defined(RT_OS_DARWIN)) )
168# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
169# else
170# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
171# endif
172#endif
173
174
175/** @def ASMReturnAddress
176 * Gets the return address of the current (or calling if you like) function or method.
177 */
178#ifdef _MSC_VER
179# ifdef __cplusplus
180extern "C"
181# endif
182void * _ReturnAddress(void);
183# pragma intrinsic(_ReturnAddress)
184# define ASMReturnAddress() _ReturnAddress()
185#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
186# define ASMReturnAddress() __builtin_return_address(0)
187#elif defined(__WATCOMC__)
188# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
189#else
190# error "Unsupported compiler."
191#endif
192
193
194/**
195 * Compiler memory barrier.
196 *
197 * Ensure that the compiler does not use any cached (register/tmp stack) memory
198 * values or any outstanding writes when returning from this function.
199 *
200 * This function must be used if non-volatile data is modified by a
201 * device or the VMM. Typical cases are port access, MMIO access,
202 * trapping instruction, etc.
203 */
204#if RT_INLINE_ASM_GNU_STYLE
205# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
206#elif RT_INLINE_ASM_USES_INTRIN
207# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
208#elif defined(__WATCOMC__)
209void ASMCompilerBarrier(void);
210#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
211DECLINLINE(void) ASMCompilerBarrier(void)
212{
213 __asm
214 {
215 }
216}
217#endif
218
219
220/** @def ASMBreakpoint
221 * Debugger Breakpoint.
222 * @deprecated Use RT_BREAKPOINT instead.
223 * @internal
224 */
225#define ASMBreakpoint() RT_BREAKPOINT()
226
227
228/**
229 * Spinloop hint for platforms that have these, empty function on the other
230 * platforms.
231 *
232 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
233 * spin locks.
234 */
235#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
236RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void);
237#else
238DECLINLINE(void) ASMNopPause(void)
239{
240# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
241# if RT_INLINE_ASM_GNU_STYLE
242 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
243# else
244 __asm {
245 _emit 0f3h
246 _emit 090h
247 }
248# endif
249# else
250 /* dummy */
251# endif
252}
253#endif
254
255
256/**
257 * Atomically Exchange an unsigned 8-bit value, ordered.
258 *
259 * @returns Current *pu8 value
260 * @param pu8 Pointer to the 8-bit variable to update.
261 * @param u8 The 8-bit value to assign to *pu8.
262 */
263#if RT_INLINE_ASM_EXTERNAL
264RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8);
265#else
266DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
267{
268# if RT_INLINE_ASM_GNU_STYLE
269 __asm__ __volatile__("xchgb %0, %1\n\t"
270 : "=m" (*pu8),
271 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
272 : "1" (u8),
273 "m" (*pu8));
274# else
275 __asm
276 {
277# ifdef RT_ARCH_AMD64
278 mov rdx, [pu8]
279 mov al, [u8]
280 xchg [rdx], al
281 mov [u8], al
282# else
283 mov edx, [pu8]
284 mov al, [u8]
285 xchg [edx], al
286 mov [u8], al
287# endif
288 }
289# endif
290 return u8;
291}
292#endif
293
294
295/**
296 * Atomically Exchange a signed 8-bit value, ordered.
297 *
298 * @returns Current *pu8 value
299 * @param pi8 Pointer to the 8-bit variable to update.
300 * @param i8 The 8-bit value to assign to *pi8.
301 */
302DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8)
303{
304 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
305}
306
307
308/**
309 * Atomically Exchange a bool value, ordered.
310 *
311 * @returns Current *pf value
312 * @param pf Pointer to the 8-bit variable to update.
313 * @param f The 8-bit value to assign to *pi8.
314 */
315DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f)
316{
317#ifdef _MSC_VER
318 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
319#else
320 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
321#endif
322}
323
324
325/**
326 * Atomically Exchange an unsigned 16-bit value, ordered.
327 *
328 * @returns Current *pu16 value
329 * @param pu16 Pointer to the 16-bit variable to update.
330 * @param u16 The 16-bit value to assign to *pu16.
331 */
332#if RT_INLINE_ASM_EXTERNAL
333RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16);
334#else
335DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
336{
337# if RT_INLINE_ASM_GNU_STYLE
338 __asm__ __volatile__("xchgw %0, %1\n\t"
339 : "=m" (*pu16),
340 "=r" (u16)
341 : "1" (u16),
342 "m" (*pu16));
343# else
344 __asm
345 {
346# ifdef RT_ARCH_AMD64
347 mov rdx, [pu16]
348 mov ax, [u16]
349 xchg [rdx], ax
350 mov [u16], ax
351# else
352 mov edx, [pu16]
353 mov ax, [u16]
354 xchg [edx], ax
355 mov [u16], ax
356# endif
357 }
358# endif
359 return u16;
360}
361#endif
362
363
364/**
365 * Atomically Exchange a signed 16-bit value, ordered.
366 *
367 * @returns Current *pu16 value
368 * @param pi16 Pointer to the 16-bit variable to update.
369 * @param i16 The 16-bit value to assign to *pi16.
370 */
371DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16)
372{
373 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
374}
375
376
377/**
378 * Atomically Exchange an unsigned 32-bit value, ordered.
379 *
380 * @returns Current *pu32 value
381 * @param pu32 Pointer to the 32-bit variable to update.
382 * @param u32 The 32-bit value to assign to *pu32.
383 *
384 * @remarks Does not work on 286 and earlier.
385 */
386#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
387RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32);
388#else
389DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
390{
391# if RT_INLINE_ASM_GNU_STYLE
392 __asm__ __volatile__("xchgl %0, %1\n\t"
393 : "=m" (*pu32),
394 "=r" (u32)
395 : "1" (u32),
396 "m" (*pu32));
397
398# elif RT_INLINE_ASM_USES_INTRIN
399 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
400
401# else
402 __asm
403 {
404# ifdef RT_ARCH_AMD64
405 mov rdx, [pu32]
406 mov eax, u32
407 xchg [rdx], eax
408 mov [u32], eax
409# else
410 mov edx, [pu32]
411 mov eax, u32
412 xchg [edx], eax
413 mov [u32], eax
414# endif
415 }
416# endif
417 return u32;
418}
419#endif
420
421
422/**
423 * Atomically Exchange a signed 32-bit value, ordered.
424 *
425 * @returns Current *pu32 value
426 * @param pi32 Pointer to the 32-bit variable to update.
427 * @param i32 The 32-bit value to assign to *pi32.
428 */
429DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32)
430{
431 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
432}
433
434
435/**
436 * Atomically Exchange an unsigned 64-bit value, ordered.
437 *
438 * @returns Current *pu64 value
439 * @param pu64 Pointer to the 64-bit variable to update.
440 * @param u64 The 64-bit value to assign to *pu64.
441 *
442 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
443 */
444#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
445 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
446RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64);
447#else
448DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
449{
450# if defined(RT_ARCH_AMD64)
451# if RT_INLINE_ASM_USES_INTRIN
452 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
453
454# elif RT_INLINE_ASM_GNU_STYLE
455 __asm__ __volatile__("xchgq %0, %1\n\t"
456 : "=m" (*pu64),
457 "=r" (u64)
458 : "1" (u64),
459 "m" (*pu64));
460# else
461 __asm
462 {
463 mov rdx, [pu64]
464 mov rax, [u64]
465 xchg [rdx], rax
466 mov [u64], rax
467 }
468# endif
469# else /* !RT_ARCH_AMD64 */
470# if RT_INLINE_ASM_GNU_STYLE
471# if defined(PIC) || defined(__PIC__)
472 uint32_t u32EBX = (uint32_t)u64;
473 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
474 "xchgl %%ebx, %3\n\t"
475 "1:\n\t"
476 "lock; cmpxchg8b (%5)\n\t"
477 "jnz 1b\n\t"
478 "movl %3, %%ebx\n\t"
479 /*"xchgl %%esi, %5\n\t"*/
480 : "=A" (u64),
481 "=m" (*pu64)
482 : "0" (*pu64),
483 "m" ( u32EBX ),
484 "c" ( (uint32_t)(u64 >> 32) ),
485 "S" (pu64));
486# else /* !PIC */
487 __asm__ __volatile__("1:\n\t"
488 "lock; cmpxchg8b %1\n\t"
489 "jnz 1b\n\t"
490 : "=A" (u64),
491 "=m" (*pu64)
492 : "0" (*pu64),
493 "b" ( (uint32_t)u64 ),
494 "c" ( (uint32_t)(u64 >> 32) ));
495# endif
496# else
497 __asm
498 {
499 mov ebx, dword ptr [u64]
500 mov ecx, dword ptr [u64 + 4]
501 mov edi, pu64
502 mov eax, dword ptr [edi]
503 mov edx, dword ptr [edi + 4]
504 retry:
505 lock cmpxchg8b [edi]
506 jnz retry
507 mov dword ptr [u64], eax
508 mov dword ptr [u64 + 4], edx
509 }
510# endif
511# endif /* !RT_ARCH_AMD64 */
512 return u64;
513}
514#endif
515
516
517/**
518 * Atomically Exchange an signed 64-bit value, ordered.
519 *
520 * @returns Current *pi64 value
521 * @param pi64 Pointer to the 64-bit variable to update.
522 * @param i64 The 64-bit value to assign to *pi64.
523 */
524DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64)
525{
526 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
527}
528
529
530/**
531 * Atomically Exchange a size_t value, ordered.
532 *
533 * @returns Current *ppv value
534 * @param puDst Pointer to the size_t variable to update.
535 * @param uNew The new value to assign to *puDst.
536 */
537DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew)
538{
539#if ARCH_BITS == 16
540 AssertCompile(sizeof(size_t) == 2);
541 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
542#elif ARCH_BITS == 32
543 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
544#elif ARCH_BITS == 64
545 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
546#else
547# error "ARCH_BITS is bogus"
548#endif
549}
550
551
552/**
553 * Atomically Exchange a pointer value, ordered.
554 *
555 * @returns Current *ppv value
556 * @param ppv Pointer to the pointer variable to update.
557 * @param pv The pointer value to assign to *ppv.
558 */
559DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv)
560{
561#if ARCH_BITS == 32 || ARCH_BITS == 16
562 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
563#elif ARCH_BITS == 64
564 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
565#else
566# error "ARCH_BITS is bogus"
567#endif
568}
569
570
571/**
572 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
573 *
574 * @returns Current *pv value
575 * @param ppv Pointer to the pointer variable to update.
576 * @param pv The pointer value to assign to *ppv.
577 * @param Type The type of *ppv, sans volatile.
578 */
579#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
580# define ASMAtomicXchgPtrT(ppv, pv, Type) \
581 __extension__ \
582 ({\
583 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
584 Type const pvTypeChecked = (pv); \
585 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
586 pvTypeCheckedRet; \
587 })
588#else
589# define ASMAtomicXchgPtrT(ppv, pv, Type) \
590 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
591#endif
592
593
594/**
595 * Atomically Exchange a raw-mode context pointer value, ordered.
596 *
597 * @returns Current *ppv value
598 * @param ppvRC Pointer to the pointer variable to update.
599 * @param pvRC The pointer value to assign to *ppv.
600 */
601DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC)
602{
603 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
604}
605
606
607/**
608 * Atomically Exchange a ring-0 pointer value, ordered.
609 *
610 * @returns Current *ppv value
611 * @param ppvR0 Pointer to the pointer variable to update.
612 * @param pvR0 The pointer value to assign to *ppv.
613 */
614DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0)
615{
616#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
617 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
618#elif R0_ARCH_BITS == 64
619 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
620#else
621# error "R0_ARCH_BITS is bogus"
622#endif
623}
624
625
626/**
627 * Atomically Exchange a ring-3 pointer value, ordered.
628 *
629 * @returns Current *ppv value
630 * @param ppvR3 Pointer to the pointer variable to update.
631 * @param pvR3 The pointer value to assign to *ppv.
632 */
633DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3)
634{
635#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
636 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
637#elif R3_ARCH_BITS == 64
638 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
639#else
640# error "R3_ARCH_BITS is bogus"
641#endif
642}
643
644
645/** @def ASMAtomicXchgHandle
646 * Atomically Exchange a typical IPRT handle value, ordered.
647 *
648 * @param ph Pointer to the value to update.
649 * @param hNew The new value to assigned to *pu.
650 * @param phRes Where to store the current *ph value.
651 *
652 * @remarks This doesn't currently work for all handles (like RTFILE).
653 */
654#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
655# define ASMAtomicXchgHandle(ph, hNew, phRes) \
656 do { \
657 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
658 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
659 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
660 } while (0)
661#elif HC_ARCH_BITS == 64
662# define ASMAtomicXchgHandle(ph, hNew, phRes) \
663 do { \
664 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
665 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
666 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
667 } while (0)
668#else
669# error HC_ARCH_BITS
670#endif
671
672
673/**
674 * Atomically Exchange a value which size might differ
675 * between platforms or compilers, ordered.
676 *
677 * @param pu Pointer to the variable to update.
678 * @param uNew The value to assign to *pu.
679 * @todo This is busted as its missing the result argument.
680 */
681#define ASMAtomicXchgSize(pu, uNew) \
682 do { \
683 switch (sizeof(*(pu))) { \
684 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
685 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
686 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
687 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
688 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
689 } \
690 } while (0)
691
692/**
693 * Atomically Exchange a value which size might differ
694 * between platforms or compilers, ordered.
695 *
696 * @param pu Pointer to the variable to update.
697 * @param uNew The value to assign to *pu.
698 * @param puRes Where to store the current *pu value.
699 */
700#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
701 do { \
702 switch (sizeof(*(pu))) { \
703 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
704 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
705 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
706 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
707 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
708 } \
709 } while (0)
710
711
712
713/**
714 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
715 *
716 * @returns true if xchg was done.
717 * @returns false if xchg wasn't done.
718 *
719 * @param pu8 Pointer to the value to update.
720 * @param u8New The new value to assigned to *pu8.
721 * @param u8Old The old value to *pu8 compare with.
722 *
723 * @remarks x86: Requires a 486 or later.
724 */
725#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
726RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old);
727#else
728DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old)
729{
730 uint8_t u8Ret;
731 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
732 "setz %1\n\t"
733 : "=m" (*pu8),
734 "=qm" (u8Ret),
735 "=a" (u8Old)
736 : "q" (u8New),
737 "2" (u8Old),
738 "m" (*pu8));
739 return (bool)u8Ret;
740}
741#endif
742
743
744/**
745 * Atomically Compare and Exchange a signed 8-bit value, ordered.
746 *
747 * @returns true if xchg was done.
748 * @returns false if xchg wasn't done.
749 *
750 * @param pi8 Pointer to the value to update.
751 * @param i8New The new value to assigned to *pi8.
752 * @param i8Old The old value to *pi8 compare with.
753 *
754 * @remarks x86: Requires a 486 or later.
755 */
756DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old)
757{
758 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
759}
760
761
762/**
763 * Atomically Compare and Exchange a bool value, ordered.
764 *
765 * @returns true if xchg was done.
766 * @returns false if xchg wasn't done.
767 *
768 * @param pf Pointer to the value to update.
769 * @param fNew The new value to assigned to *pf.
770 * @param fOld The old value to *pf compare with.
771 *
772 * @remarks x86: Requires a 486 or later.
773 */
774DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld)
775{
776 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
777}
778
779
780/**
781 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
782 *
783 * @returns true if xchg was done.
784 * @returns false if xchg wasn't done.
785 *
786 * @param pu32 Pointer to the value to update.
787 * @param u32New The new value to assigned to *pu32.
788 * @param u32Old The old value to *pu32 compare with.
789 *
790 * @remarks x86: Requires a 486 or later.
791 */
792#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
793RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old);
794#else
795DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old)
796{
797# if RT_INLINE_ASM_GNU_STYLE
798 uint8_t u8Ret;
799 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
800 "setz %1\n\t"
801 : "=m" (*pu32),
802 "=qm" (u8Ret),
803 "=a" (u32Old)
804 : "r" (u32New),
805 "2" (u32Old),
806 "m" (*pu32));
807 return (bool)u8Ret;
808
809# elif RT_INLINE_ASM_USES_INTRIN
810 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
811
812# else
813 uint32_t u32Ret;
814 __asm
815 {
816# ifdef RT_ARCH_AMD64
817 mov rdx, [pu32]
818# else
819 mov edx, [pu32]
820# endif
821 mov eax, [u32Old]
822 mov ecx, [u32New]
823# ifdef RT_ARCH_AMD64
824 lock cmpxchg [rdx], ecx
825# else
826 lock cmpxchg [edx], ecx
827# endif
828 setz al
829 movzx eax, al
830 mov [u32Ret], eax
831 }
832 return !!u32Ret;
833# endif
834}
835#endif
836
837
838/**
839 * Atomically Compare and Exchange a signed 32-bit value, ordered.
840 *
841 * @returns true if xchg was done.
842 * @returns false if xchg wasn't done.
843 *
844 * @param pi32 Pointer to the value to update.
845 * @param i32New The new value to assigned to *pi32.
846 * @param i32Old The old value to *pi32 compare with.
847 *
848 * @remarks x86: Requires a 486 or later.
849 */
850DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old)
851{
852 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
853}
854
855
856/**
857 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
858 *
859 * @returns true if xchg was done.
860 * @returns false if xchg wasn't done.
861 *
862 * @param pu64 Pointer to the 64-bit variable to update.
863 * @param u64New The 64-bit value to assign to *pu64.
864 * @param u64Old The value to compare with.
865 *
866 * @remarks x86: Requires a Pentium or later.
867 */
868#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
869 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
870RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old);
871#else
872DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old)
873{
874# if RT_INLINE_ASM_USES_INTRIN
875 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
876
877# elif defined(RT_ARCH_AMD64)
878# if RT_INLINE_ASM_GNU_STYLE
879 uint8_t u8Ret;
880 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
881 "setz %1\n\t"
882 : "=m" (*pu64),
883 "=qm" (u8Ret),
884 "=a" (u64Old)
885 : "r" (u64New),
886 "2" (u64Old),
887 "m" (*pu64));
888 return (bool)u8Ret;
889# else
890 bool fRet;
891 __asm
892 {
893 mov rdx, [pu32]
894 mov rax, [u64Old]
895 mov rcx, [u64New]
896 lock cmpxchg [rdx], rcx
897 setz al
898 mov [fRet], al
899 }
900 return fRet;
901# endif
902# else /* !RT_ARCH_AMD64 */
903 uint32_t u32Ret;
904# if RT_INLINE_ASM_GNU_STYLE
905# if defined(PIC) || defined(__PIC__)
906 uint32_t u32EBX = (uint32_t)u64New;
907 uint32_t u32Spill;
908 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
909 "lock; cmpxchg8b (%6)\n\t"
910 "setz %%al\n\t"
911 "movl %4, %%ebx\n\t"
912 "movzbl %%al, %%eax\n\t"
913 : "=a" (u32Ret),
914 "=d" (u32Spill),
915# if RT_GNUC_PREREQ(4, 3)
916 "+m" (*pu64)
917# else
918 "=m" (*pu64)
919# endif
920 : "A" (u64Old),
921 "m" ( u32EBX ),
922 "c" ( (uint32_t)(u64New >> 32) ),
923 "S" (pu64));
924# else /* !PIC */
925 uint32_t u32Spill;
926 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
927 "setz %%al\n\t"
928 "movzbl %%al, %%eax\n\t"
929 : "=a" (u32Ret),
930 "=d" (u32Spill),
931 "+m" (*pu64)
932 : "A" (u64Old),
933 "b" ( (uint32_t)u64New ),
934 "c" ( (uint32_t)(u64New >> 32) ));
935# endif
936 return (bool)u32Ret;
937# else
938 __asm
939 {
940 mov ebx, dword ptr [u64New]
941 mov ecx, dword ptr [u64New + 4]
942 mov edi, [pu64]
943 mov eax, dword ptr [u64Old]
944 mov edx, dword ptr [u64Old + 4]
945 lock cmpxchg8b [edi]
946 setz al
947 movzx eax, al
948 mov dword ptr [u32Ret], eax
949 }
950 return !!u32Ret;
951# endif
952# endif /* !RT_ARCH_AMD64 */
953}
954#endif
955
956
957/**
958 * Atomically Compare and exchange a signed 64-bit value, ordered.
959 *
960 * @returns true if xchg was done.
961 * @returns false if xchg wasn't done.
962 *
963 * @param pi64 Pointer to the 64-bit variable to update.
964 * @param i64 The 64-bit value to assign to *pu64.
965 * @param i64Old The value to compare with.
966 *
967 * @remarks x86: Requires a Pentium or later.
968 */
969DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old)
970{
971 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
972}
973
974
975/**
976 * Atomically Compare and Exchange a pointer value, ordered.
977 *
978 * @returns true if xchg was done.
979 * @returns false if xchg wasn't done.
980 *
981 * @param ppv Pointer to the value to update.
982 * @param pvNew The new value to assigned to *ppv.
983 * @param pvOld The old value to *ppv compare with.
984 *
985 * @remarks x86: Requires a 486 or later.
986 */
987DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld)
988{
989#if ARCH_BITS == 32 || ARCH_BITS == 16
990 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
991#elif ARCH_BITS == 64
992 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
993#else
994# error "ARCH_BITS is bogus"
995#endif
996}
997
998
999/**
1000 * Atomically Compare and Exchange a pointer value, ordered.
1001 *
1002 * @returns true if xchg was done.
1003 * @returns false if xchg wasn't done.
1004 *
1005 * @param ppv Pointer to the value to update.
1006 * @param pvNew The new value to assigned to *ppv.
1007 * @param pvOld The old value to *ppv compare with.
1008 *
1009 * @remarks This is relatively type safe on GCC platforms.
1010 * @remarks x86: Requires a 486 or later.
1011 */
1012#ifdef __GNUC__
1013# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1014 __extension__ \
1015 ({\
1016 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1017 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1018 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1019 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1020 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1021 fMacroRet; \
1022 })
1023#else
1024# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1025 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1026#endif
1027
1028
1029/** @def ASMAtomicCmpXchgHandle
1030 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1031 *
1032 * @param ph Pointer to the value to update.
1033 * @param hNew The new value to assigned to *pu.
1034 * @param hOld The old value to *pu compare with.
1035 * @param fRc Where to store the result.
1036 *
1037 * @remarks This doesn't currently work for all handles (like RTFILE).
1038 * @remarks x86: Requires a 486 or later.
1039 */
1040#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1041# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1042 do { \
1043 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1044 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1045 } while (0)
1046#elif HC_ARCH_BITS == 64
1047# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1048 do { \
1049 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1050 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1051 } while (0)
1052#else
1053# error HC_ARCH_BITS
1054#endif
1055
1056
1057/** @def ASMAtomicCmpXchgSize
1058 * Atomically Compare and Exchange a value which size might differ
1059 * between platforms or compilers, ordered.
1060 *
1061 * @param pu Pointer to the value to update.
1062 * @param uNew The new value to assigned to *pu.
1063 * @param uOld The old value to *pu compare with.
1064 * @param fRc Where to store the result.
1065 *
1066 * @remarks x86: Requires a 486 or later.
1067 */
1068#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1069 do { \
1070 switch (sizeof(*(pu))) { \
1071 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1072 break; \
1073 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1074 break; \
1075 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1076 (fRc) = false; \
1077 break; \
1078 } \
1079 } while (0)
1080
1081
1082/**
1083 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1084 * passes back old value, ordered.
1085 *
1086 * @returns true if xchg was done.
1087 * @returns false if xchg wasn't done.
1088 *
1089 * @param pu32 Pointer to the value to update.
1090 * @param u32New The new value to assigned to *pu32.
1091 * @param u32Old The old value to *pu32 compare with.
1092 * @param pu32Old Pointer store the old value at.
1093 *
1094 * @remarks x86: Requires a 486 or later.
1095 */
1096#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1097RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old);
1098#else
1099DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old)
1100{
1101# if RT_INLINE_ASM_GNU_STYLE
1102 uint8_t u8Ret;
1103 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1104 "setz %1\n\t"
1105 : "=m" (*pu32),
1106 "=qm" (u8Ret),
1107 "=a" (*pu32Old)
1108 : "r" (u32New),
1109 "a" (u32Old),
1110 "m" (*pu32));
1111 return (bool)u8Ret;
1112
1113# elif RT_INLINE_ASM_USES_INTRIN
1114 return (*pu32Old =_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1115
1116# else
1117 uint32_t u32Ret;
1118 __asm
1119 {
1120# ifdef RT_ARCH_AMD64
1121 mov rdx, [pu32]
1122# else
1123 mov edx, [pu32]
1124# endif
1125 mov eax, [u32Old]
1126 mov ecx, [u32New]
1127# ifdef RT_ARCH_AMD64
1128 lock cmpxchg [rdx], ecx
1129 mov rdx, [pu32Old]
1130 mov [rdx], eax
1131# else
1132 lock cmpxchg [edx], ecx
1133 mov edx, [pu32Old]
1134 mov [edx], eax
1135# endif
1136 setz al
1137 movzx eax, al
1138 mov [u32Ret], eax
1139 }
1140 return !!u32Ret;
1141# endif
1142}
1143#endif
1144
1145
1146/**
1147 * Atomically Compare and Exchange a signed 32-bit value, additionally
1148 * passes back old value, ordered.
1149 *
1150 * @returns true if xchg was done.
1151 * @returns false if xchg wasn't done.
1152 *
1153 * @param pi32 Pointer to the value to update.
1154 * @param i32New The new value to assigned to *pi32.
1155 * @param i32Old The old value to *pi32 compare with.
1156 * @param pi32Old Pointer store the old value at.
1157 *
1158 * @remarks x86: Requires a 486 or later.
1159 */
1160DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old)
1161{
1162 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1163}
1164
1165
1166/**
1167 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1168 * passing back old value, ordered.
1169 *
1170 * @returns true if xchg was done.
1171 * @returns false if xchg wasn't done.
1172 *
1173 * @param pu64 Pointer to the 64-bit variable to update.
1174 * @param u64New The 64-bit value to assign to *pu64.
1175 * @param u64Old The value to compare with.
1176 * @param pu64Old Pointer store the old value at.
1177 *
1178 * @remarks x86: Requires a Pentium or later.
1179 */
1180#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1181 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1182RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old);
1183#else
1184DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old)
1185{
1186# if RT_INLINE_ASM_USES_INTRIN
1187 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1188
1189# elif defined(RT_ARCH_AMD64)
1190# if RT_INLINE_ASM_GNU_STYLE
1191 uint8_t u8Ret;
1192 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1193 "setz %1\n\t"
1194 : "=m" (*pu64),
1195 "=qm" (u8Ret),
1196 "=a" (*pu64Old)
1197 : "r" (u64New),
1198 "a" (u64Old),
1199 "m" (*pu64));
1200 return (bool)u8Ret;
1201# else
1202 bool fRet;
1203 __asm
1204 {
1205 mov rdx, [pu32]
1206 mov rax, [u64Old]
1207 mov rcx, [u64New]
1208 lock cmpxchg [rdx], rcx
1209 mov rdx, [pu64Old]
1210 mov [rdx], rax
1211 setz al
1212 mov [fRet], al
1213 }
1214 return fRet;
1215# endif
1216# else /* !RT_ARCH_AMD64 */
1217# if RT_INLINE_ASM_GNU_STYLE
1218 uint64_t u64Ret;
1219# if defined(PIC) || defined(__PIC__)
1220 /* NB: this code uses a memory clobber description, because the clean
1221 * solution with an output value for *pu64 makes gcc run out of registers.
1222 * This will cause suboptimal code, and anyone with a better solution is
1223 * welcome to improve this. */
1224 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1225 "lock; cmpxchg8b %3\n\t"
1226 "xchgl %%ebx, %1\n\t"
1227 : "=A" (u64Ret)
1228 : "DS" ((uint32_t)u64New),
1229 "c" ((uint32_t)(u64New >> 32)),
1230 "m" (*pu64),
1231 "0" (u64Old)
1232 : "memory" );
1233# else /* !PIC */
1234 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1235 : "=A" (u64Ret),
1236 "=m" (*pu64)
1237 : "b" ((uint32_t)u64New),
1238 "c" ((uint32_t)(u64New >> 32)),
1239 "m" (*pu64),
1240 "0" (u64Old));
1241# endif
1242 *pu64Old = u64Ret;
1243 return u64Ret == u64Old;
1244# else
1245 uint32_t u32Ret;
1246 __asm
1247 {
1248 mov ebx, dword ptr [u64New]
1249 mov ecx, dword ptr [u64New + 4]
1250 mov edi, [pu64]
1251 mov eax, dword ptr [u64Old]
1252 mov edx, dword ptr [u64Old + 4]
1253 lock cmpxchg8b [edi]
1254 mov ebx, [pu64Old]
1255 mov [ebx], eax
1256 setz al
1257 movzx eax, al
1258 add ebx, 4
1259 mov [ebx], edx
1260 mov dword ptr [u32Ret], eax
1261 }
1262 return !!u32Ret;
1263# endif
1264# endif /* !RT_ARCH_AMD64 */
1265}
1266#endif
1267
1268
1269/**
1270 * Atomically Compare and exchange a signed 64-bit value, additionally
1271 * passing back old value, ordered.
1272 *
1273 * @returns true if xchg was done.
1274 * @returns false if xchg wasn't done.
1275 *
1276 * @param pi64 Pointer to the 64-bit variable to update.
1277 * @param i64 The 64-bit value to assign to *pu64.
1278 * @param i64Old The value to compare with.
1279 * @param pi64Old Pointer store the old value at.
1280 *
1281 * @remarks x86: Requires a Pentium or later.
1282 */
1283DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old)
1284{
1285 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1286}
1287
1288/** @def ASMAtomicCmpXchgExHandle
1289 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1290 *
1291 * @param ph Pointer to the value to update.
1292 * @param hNew The new value to assigned to *pu.
1293 * @param hOld The old value to *pu compare with.
1294 * @param fRc Where to store the result.
1295 * @param phOldVal Pointer to where to store the old value.
1296 *
1297 * @remarks This doesn't currently work for all handles (like RTFILE).
1298 */
1299#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1300# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1301 do { \
1302 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1303 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1304 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1305 } while (0)
1306#elif HC_ARCH_BITS == 64
1307# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1308 do { \
1309 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1310 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1311 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1312 } while (0)
1313#else
1314# error HC_ARCH_BITS
1315#endif
1316
1317
1318/** @def ASMAtomicCmpXchgExSize
1319 * Atomically Compare and Exchange a value which size might differ
1320 * between platforms or compilers. Additionally passes back old value.
1321 *
1322 * @param pu Pointer to the value to update.
1323 * @param uNew The new value to assigned to *pu.
1324 * @param uOld The old value to *pu compare with.
1325 * @param fRc Where to store the result.
1326 * @param puOldVal Pointer to where to store the old value.
1327 *
1328 * @remarks x86: Requires a 486 or later.
1329 */
1330#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1331 do { \
1332 switch (sizeof(*(pu))) { \
1333 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1334 break; \
1335 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1336 break; \
1337 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1338 (fRc) = false; \
1339 (uOldVal) = 0; \
1340 break; \
1341 } \
1342 } while (0)
1343
1344
1345/**
1346 * Atomically Compare and Exchange a pointer value, additionally
1347 * passing back old value, ordered.
1348 *
1349 * @returns true if xchg was done.
1350 * @returns false if xchg wasn't done.
1351 *
1352 * @param ppv Pointer to the value to update.
1353 * @param pvNew The new value to assigned to *ppv.
1354 * @param pvOld The old value to *ppv compare with.
1355 * @param ppvOld Pointer store the old value at.
1356 *
1357 * @remarks x86: Requires a 486 or later.
1358 */
1359DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1360 void RT_FAR * RT_FAR *ppvOld)
1361{
1362#if ARCH_BITS == 32 || ARCH_BITS == 16
1363 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1364#elif ARCH_BITS == 64
1365 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1366#else
1367# error "ARCH_BITS is bogus"
1368#endif
1369}
1370
1371
1372/**
1373 * Atomically Compare and Exchange a pointer value, additionally
1374 * passing back old value, ordered.
1375 *
1376 * @returns true if xchg was done.
1377 * @returns false if xchg wasn't done.
1378 *
1379 * @param ppv Pointer to the value to update.
1380 * @param pvNew The new value to assigned to *ppv.
1381 * @param pvOld The old value to *ppv compare with.
1382 * @param ppvOld Pointer store the old value at.
1383 *
1384 * @remarks This is relatively type safe on GCC platforms.
1385 * @remarks x86: Requires a 486 or later.
1386 */
1387#ifdef __GNUC__
1388# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1389 __extension__ \
1390 ({\
1391 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1392 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1393 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1394 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1395 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1396 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1397 (void **)ppvOldTypeChecked); \
1398 fMacroRet; \
1399 })
1400#else
1401# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1402 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1403#endif
1404
1405
1406/**
1407 * Virtualization unfriendly serializing instruction, always exits.
1408 */
1409#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1410RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void);
1411#else
1412DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1413{
1414# if RT_INLINE_ASM_GNU_STYLE
1415 RTCCUINTREG xAX = 0;
1416# ifdef RT_ARCH_AMD64
1417 __asm__ __volatile__ ("cpuid"
1418 : "=a" (xAX)
1419 : "0" (xAX)
1420 : "rbx", "rcx", "rdx", "memory");
1421# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1422 __asm__ __volatile__ ("push %%ebx\n\t"
1423 "cpuid\n\t"
1424 "pop %%ebx\n\t"
1425 : "=a" (xAX)
1426 : "0" (xAX)
1427 : "ecx", "edx", "memory");
1428# else
1429 __asm__ __volatile__ ("cpuid"
1430 : "=a" (xAX)
1431 : "0" (xAX)
1432 : "ebx", "ecx", "edx", "memory");
1433# endif
1434
1435# elif RT_INLINE_ASM_USES_INTRIN
1436 int aInfo[4];
1437 _ReadWriteBarrier();
1438 __cpuid(aInfo, 0);
1439
1440# else
1441 __asm
1442 {
1443 push ebx
1444 xor eax, eax
1445 cpuid
1446 pop ebx
1447 }
1448# endif
1449}
1450#endif
1451
1452/**
1453 * Virtualization friendly serializing instruction, though more expensive.
1454 */
1455#if RT_INLINE_ASM_EXTERNAL
1456RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void);
1457#else
1458DECLINLINE(void) ASMSerializeInstructionIRet(void)
1459{
1460# if RT_INLINE_ASM_GNU_STYLE
1461# ifdef RT_ARCH_AMD64
1462 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1463 "subq $128, %%rsp\n\t" /*redzone*/
1464 "mov %%ss, %%eax\n\t"
1465 "pushq %%rax\n\t"
1466 "pushq %%r10\n\t"
1467 "pushfq\n\t"
1468 "movl %%cs, %%eax\n\t"
1469 "pushq %%rax\n\t"
1470 "leaq 1f(%%rip), %%rax\n\t"
1471 "pushq %%rax\n\t"
1472 "iretq\n\t"
1473 "1:\n\t"
1474 ::: "rax", "r10", "memory");
1475# else
1476 __asm__ __volatile__ ("pushfl\n\t"
1477 "pushl %%cs\n\t"
1478 "pushl $1f\n\t"
1479 "iretl\n\t"
1480 "1:\n\t"
1481 ::: "memory");
1482# endif
1483
1484# else
1485 __asm
1486 {
1487 pushfd
1488 push cs
1489 push la_ret
1490 iretd
1491 la_ret:
1492 }
1493# endif
1494}
1495#endif
1496
1497/**
1498 * Virtualization friendlier serializing instruction, may still cause exits.
1499 */
1500#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1501RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void);
1502#else
1503DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1504{
1505# if RT_INLINE_ASM_GNU_STYLE
1506 /* rdtscp is not supported by ancient linux build VM of course :-( */
1507# ifdef RT_ARCH_AMD64
1508 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1509 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1510# else
1511 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1512 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1513# endif
1514# else
1515# if RT_INLINE_ASM_USES_INTRIN >= 15
1516 uint32_t uIgnore;
1517 _ReadWriteBarrier();
1518 (void)__rdtscp(&uIgnore);
1519 (void)uIgnore;
1520# else
1521 __asm
1522 {
1523 rdtscp
1524 }
1525# endif
1526# endif
1527}
1528#endif
1529
1530
1531/**
1532 * Serialize Instruction.
1533 */
1534#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1535# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1536#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
1537# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1538#elif defined(RT_ARCH_SPARC64)
1539RTDECL(void) ASMSerializeInstruction(void);
1540#else
1541# error "Port me"
1542#endif
1543
1544
1545/**
1546 * Memory fence, waits for any pending writes and reads to complete.
1547 */
1548DECLINLINE(void) ASMMemoryFence(void)
1549{
1550#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1551# if RT_INLINE_ASM_GNU_STYLE
1552 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
1553# elif RT_INLINE_ASM_USES_INTRIN
1554 _mm_mfence();
1555# else
1556 __asm
1557 {
1558 _emit 0x0f
1559 _emit 0xae
1560 _emit 0xf0
1561 }
1562# endif
1563#elif ARCH_BITS == 16
1564 uint16_t volatile u16;
1565 ASMAtomicXchgU16(&u16, 0);
1566#else
1567 uint32_t volatile u32;
1568 ASMAtomicXchgU32(&u32, 0);
1569#endif
1570}
1571
1572
1573/**
1574 * Write fence, waits for any pending writes to complete.
1575 */
1576DECLINLINE(void) ASMWriteFence(void)
1577{
1578#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1579# if RT_INLINE_ASM_GNU_STYLE
1580 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
1581# elif RT_INLINE_ASM_USES_INTRIN
1582 _mm_sfence();
1583# else
1584 __asm
1585 {
1586 _emit 0x0f
1587 _emit 0xae
1588 _emit 0xf8
1589 }
1590# endif
1591#else
1592 ASMMemoryFence();
1593#endif
1594}
1595
1596
1597/**
1598 * Read fence, waits for any pending reads to complete.
1599 */
1600DECLINLINE(void) ASMReadFence(void)
1601{
1602#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1603# if RT_INLINE_ASM_GNU_STYLE
1604 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
1605# elif RT_INLINE_ASM_USES_INTRIN
1606 _mm_lfence();
1607# else
1608 __asm
1609 {
1610 _emit 0x0f
1611 _emit 0xae
1612 _emit 0xe8
1613 }
1614# endif
1615#else
1616 ASMMemoryFence();
1617#endif
1618}
1619
1620
1621/**
1622 * Atomically reads an unsigned 8-bit value, ordered.
1623 *
1624 * @returns Current *pu8 value
1625 * @param pu8 Pointer to the 8-bit variable to read.
1626 */
1627DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8)
1628{
1629 ASMMemoryFence();
1630 return *pu8; /* byte reads are atomic on x86 */
1631}
1632
1633
1634/**
1635 * Atomically reads an unsigned 8-bit value, unordered.
1636 *
1637 * @returns Current *pu8 value
1638 * @param pu8 Pointer to the 8-bit variable to read.
1639 */
1640DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8)
1641{
1642 return *pu8; /* byte reads are atomic on x86 */
1643}
1644
1645
1646/**
1647 * Atomically reads a signed 8-bit value, ordered.
1648 *
1649 * @returns Current *pi8 value
1650 * @param pi8 Pointer to the 8-bit variable to read.
1651 */
1652DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8)
1653{
1654 ASMMemoryFence();
1655 return *pi8; /* byte reads are atomic on x86 */
1656}
1657
1658
1659/**
1660 * Atomically reads a signed 8-bit value, unordered.
1661 *
1662 * @returns Current *pi8 value
1663 * @param pi8 Pointer to the 8-bit variable to read.
1664 */
1665DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8)
1666{
1667 return *pi8; /* byte reads are atomic on x86 */
1668}
1669
1670
1671/**
1672 * Atomically reads an unsigned 16-bit value, ordered.
1673 *
1674 * @returns Current *pu16 value
1675 * @param pu16 Pointer to the 16-bit variable to read.
1676 */
1677DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16)
1678{
1679 ASMMemoryFence();
1680 Assert(!((uintptr_t)pu16 & 1));
1681 return *pu16;
1682}
1683
1684
1685/**
1686 * Atomically reads an unsigned 16-bit value, unordered.
1687 *
1688 * @returns Current *pu16 value
1689 * @param pu16 Pointer to the 16-bit variable to read.
1690 */
1691DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16)
1692{
1693 Assert(!((uintptr_t)pu16 & 1));
1694 return *pu16;
1695}
1696
1697
1698/**
1699 * Atomically reads a signed 16-bit value, ordered.
1700 *
1701 * @returns Current *pi16 value
1702 * @param pi16 Pointer to the 16-bit variable to read.
1703 */
1704DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16)
1705{
1706 ASMMemoryFence();
1707 Assert(!((uintptr_t)pi16 & 1));
1708 return *pi16;
1709}
1710
1711
1712/**
1713 * Atomically reads a signed 16-bit value, unordered.
1714 *
1715 * @returns Current *pi16 value
1716 * @param pi16 Pointer to the 16-bit variable to read.
1717 */
1718DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16)
1719{
1720 Assert(!((uintptr_t)pi16 & 1));
1721 return *pi16;
1722}
1723
1724
1725/**
1726 * Atomically reads an unsigned 32-bit value, ordered.
1727 *
1728 * @returns Current *pu32 value
1729 * @param pu32 Pointer to the 32-bit variable to read.
1730 */
1731DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32)
1732{
1733 ASMMemoryFence();
1734 Assert(!((uintptr_t)pu32 & 3));
1735#if ARCH_BITS == 16
1736 AssertFailed(); /** @todo 16-bit */
1737#endif
1738 return *pu32;
1739}
1740
1741
1742/**
1743 * Atomically reads an unsigned 32-bit value, unordered.
1744 *
1745 * @returns Current *pu32 value
1746 * @param pu32 Pointer to the 32-bit variable to read.
1747 */
1748DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32)
1749{
1750 Assert(!((uintptr_t)pu32 & 3));
1751#if ARCH_BITS == 16
1752 AssertFailed(); /** @todo 16-bit */
1753#endif
1754 return *pu32;
1755}
1756
1757
1758/**
1759 * Atomically reads a signed 32-bit value, ordered.
1760 *
1761 * @returns Current *pi32 value
1762 * @param pi32 Pointer to the 32-bit variable to read.
1763 */
1764DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32)
1765{
1766 ASMMemoryFence();
1767 Assert(!((uintptr_t)pi32 & 3));
1768#if ARCH_BITS == 16
1769 AssertFailed(); /** @todo 16-bit */
1770#endif
1771 return *pi32;
1772}
1773
1774
1775/**
1776 * Atomically reads a signed 32-bit value, unordered.
1777 *
1778 * @returns Current *pi32 value
1779 * @param pi32 Pointer to the 32-bit variable to read.
1780 */
1781DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32)
1782{
1783 Assert(!((uintptr_t)pi32 & 3));
1784#if ARCH_BITS == 16
1785 AssertFailed(); /** @todo 16-bit */
1786#endif
1787 return *pi32;
1788}
1789
1790
1791/**
1792 * Atomically reads an unsigned 64-bit value, ordered.
1793 *
1794 * @returns Current *pu64 value
1795 * @param pu64 Pointer to the 64-bit variable to read.
1796 * The memory pointed to must be writable.
1797 *
1798 * @remarks This may fault if the memory is read-only!
1799 * @remarks x86: Requires a Pentium or later.
1800 */
1801#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1802 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1803RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64);
1804#else
1805DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64)
1806{
1807 uint64_t u64;
1808# ifdef RT_ARCH_AMD64
1809 Assert(!((uintptr_t)pu64 & 7));
1810/*# if RT_INLINE_ASM_GNU_STYLE
1811 __asm__ __volatile__( "mfence\n\t"
1812 "movq %1, %0\n\t"
1813 : "=r" (u64)
1814 : "m" (*pu64));
1815# else
1816 __asm
1817 {
1818 mfence
1819 mov rdx, [pu64]
1820 mov rax, [rdx]
1821 mov [u64], rax
1822 }
1823# endif*/
1824 ASMMemoryFence();
1825 u64 = *pu64;
1826# else /* !RT_ARCH_AMD64 */
1827# if RT_INLINE_ASM_GNU_STYLE
1828# if defined(PIC) || defined(__PIC__)
1829 uint32_t u32EBX = 0;
1830 Assert(!((uintptr_t)pu64 & 7));
1831 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1832 "lock; cmpxchg8b (%5)\n\t"
1833 "movl %3, %%ebx\n\t"
1834 : "=A" (u64),
1835# if RT_GNUC_PREREQ(4, 3)
1836 "+m" (*pu64)
1837# else
1838 "=m" (*pu64)
1839# endif
1840 : "0" (0ULL),
1841 "m" (u32EBX),
1842 "c" (0),
1843 "S" (pu64));
1844# else /* !PIC */
1845 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1846 : "=A" (u64),
1847 "+m" (*pu64)
1848 : "0" (0ULL),
1849 "b" (0),
1850 "c" (0));
1851# endif
1852# else
1853 Assert(!((uintptr_t)pu64 & 7));
1854 __asm
1855 {
1856 xor eax, eax
1857 xor edx, edx
1858 mov edi, pu64
1859 xor ecx, ecx
1860 xor ebx, ebx
1861 lock cmpxchg8b [edi]
1862 mov dword ptr [u64], eax
1863 mov dword ptr [u64 + 4], edx
1864 }
1865# endif
1866# endif /* !RT_ARCH_AMD64 */
1867 return u64;
1868}
1869#endif
1870
1871
1872/**
1873 * Atomically reads an unsigned 64-bit value, unordered.
1874 *
1875 * @returns Current *pu64 value
1876 * @param pu64 Pointer to the 64-bit variable to read.
1877 * The memory pointed to must be writable.
1878 *
1879 * @remarks This may fault if the memory is read-only!
1880 * @remarks x86: Requires a Pentium or later.
1881 */
1882#if !defined(RT_ARCH_AMD64) \
1883 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1884 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1885RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64);
1886#else
1887DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64)
1888{
1889 uint64_t u64;
1890# ifdef RT_ARCH_AMD64
1891 Assert(!((uintptr_t)pu64 & 7));
1892/*# if RT_INLINE_ASM_GNU_STYLE
1893 Assert(!((uintptr_t)pu64 & 7));
1894 __asm__ __volatile__("movq %1, %0\n\t"
1895 : "=r" (u64)
1896 : "m" (*pu64));
1897# else
1898 __asm
1899 {
1900 mov rdx, [pu64]
1901 mov rax, [rdx]
1902 mov [u64], rax
1903 }
1904# endif */
1905 u64 = *pu64;
1906# else /* !RT_ARCH_AMD64 */
1907# if RT_INLINE_ASM_GNU_STYLE
1908# if defined(PIC) || defined(__PIC__)
1909 uint32_t u32EBX = 0;
1910 uint32_t u32Spill;
1911 Assert(!((uintptr_t)pu64 & 7));
1912 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1913 "xor %%ecx,%%ecx\n\t"
1914 "xor %%edx,%%edx\n\t"
1915 "xchgl %%ebx, %3\n\t"
1916 "lock; cmpxchg8b (%4)\n\t"
1917 "movl %3, %%ebx\n\t"
1918 : "=A" (u64),
1919# if RT_GNUC_PREREQ(4, 3)
1920 "+m" (*pu64),
1921# else
1922 "=m" (*pu64),
1923# endif
1924 "=c" (u32Spill)
1925 : "m" (u32EBX),
1926 "S" (pu64));
1927# else /* !PIC */
1928 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1929 : "=A" (u64),
1930 "+m" (*pu64)
1931 : "0" (0ULL),
1932 "b" (0),
1933 "c" (0));
1934# endif
1935# else
1936 Assert(!((uintptr_t)pu64 & 7));
1937 __asm
1938 {
1939 xor eax, eax
1940 xor edx, edx
1941 mov edi, pu64
1942 xor ecx, ecx
1943 xor ebx, ebx
1944 lock cmpxchg8b [edi]
1945 mov dword ptr [u64], eax
1946 mov dword ptr [u64 + 4], edx
1947 }
1948# endif
1949# endif /* !RT_ARCH_AMD64 */
1950 return u64;
1951}
1952#endif
1953
1954
1955/**
1956 * Atomically reads a signed 64-bit value, ordered.
1957 *
1958 * @returns Current *pi64 value
1959 * @param pi64 Pointer to the 64-bit variable to read.
1960 * The memory pointed to must be writable.
1961 *
1962 * @remarks This may fault if the memory is read-only!
1963 * @remarks x86: Requires a Pentium or later.
1964 */
1965DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64)
1966{
1967 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
1968}
1969
1970
1971/**
1972 * Atomically reads a signed 64-bit value, unordered.
1973 *
1974 * @returns Current *pi64 value
1975 * @param pi64 Pointer to the 64-bit variable to read.
1976 * The memory pointed to must be writable.
1977 *
1978 * @remarks This will fault if the memory is read-only!
1979 * @remarks x86: Requires a Pentium or later.
1980 */
1981DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64)
1982{
1983 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
1984}
1985
1986
1987/**
1988 * Atomically reads a size_t value, ordered.
1989 *
1990 * @returns Current *pcb value
1991 * @param pcb Pointer to the size_t variable to read.
1992 */
1993DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb)
1994{
1995#if ARCH_BITS == 64
1996 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
1997#elif ARCH_BITS == 32
1998 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
1999#elif ARCH_BITS == 16
2000 AssertCompileSize(size_t, 2);
2001 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
2002#else
2003# error "Unsupported ARCH_BITS value"
2004#endif
2005}
2006
2007
2008/**
2009 * Atomically reads a size_t value, unordered.
2010 *
2011 * @returns Current *pcb value
2012 * @param pcb Pointer to the size_t variable to read.
2013 */
2014DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb)
2015{
2016#if ARCH_BITS == 64 || ARCH_BITS == 16
2017 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2018#elif ARCH_BITS == 32
2019 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2020#elif ARCH_BITS == 16
2021 AssertCompileSize(size_t, 2);
2022 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2023#else
2024# error "Unsupported ARCH_BITS value"
2025#endif
2026}
2027
2028
2029/**
2030 * Atomically reads a pointer value, ordered.
2031 *
2032 * @returns Current *pv value
2033 * @param ppv Pointer to the pointer variable to read.
2034 *
2035 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2036 * requires less typing (no casts).
2037 */
2038DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2039{
2040#if ARCH_BITS == 32 || ARCH_BITS == 16
2041 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2042#elif ARCH_BITS == 64
2043 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2044#else
2045# error "ARCH_BITS is bogus"
2046#endif
2047}
2048
2049/**
2050 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2051 *
2052 * @returns Current *pv value
2053 * @param ppv Pointer to the pointer variable to read.
2054 * @param Type The type of *ppv, sans volatile.
2055 */
2056#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2057# define ASMAtomicReadPtrT(ppv, Type) \
2058 __extension__ \
2059 ({\
2060 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2061 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2062 pvTypeChecked; \
2063 })
2064#else
2065# define ASMAtomicReadPtrT(ppv, Type) \
2066 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2067#endif
2068
2069
2070/**
2071 * Atomically reads a pointer value, unordered.
2072 *
2073 * @returns Current *pv value
2074 * @param ppv Pointer to the pointer variable to read.
2075 *
2076 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2077 * requires less typing (no casts).
2078 */
2079DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2080{
2081#if ARCH_BITS == 32 || ARCH_BITS == 16
2082 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2083#elif ARCH_BITS == 64
2084 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2085#else
2086# error "ARCH_BITS is bogus"
2087#endif
2088}
2089
2090
2091/**
2092 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2093 *
2094 * @returns Current *pv value
2095 * @param ppv Pointer to the pointer variable to read.
2096 * @param Type The type of *ppv, sans volatile.
2097 */
2098#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2099# define ASMAtomicUoReadPtrT(ppv, Type) \
2100 __extension__ \
2101 ({\
2102 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2103 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2104 pvTypeChecked; \
2105 })
2106#else
2107# define ASMAtomicUoReadPtrT(ppv, Type) \
2108 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2109#endif
2110
2111
2112/**
2113 * Atomically reads a boolean value, ordered.
2114 *
2115 * @returns Current *pf value
2116 * @param pf Pointer to the boolean variable to read.
2117 */
2118DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf)
2119{
2120 ASMMemoryFence();
2121 return *pf; /* byte reads are atomic on x86 */
2122}
2123
2124
2125/**
2126 * Atomically reads a boolean value, unordered.
2127 *
2128 * @returns Current *pf value
2129 * @param pf Pointer to the boolean variable to read.
2130 */
2131DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf)
2132{
2133 return *pf; /* byte reads are atomic on x86 */
2134}
2135
2136
2137/**
2138 * Atomically read a typical IPRT handle value, ordered.
2139 *
2140 * @param ph Pointer to the handle variable to read.
2141 * @param phRes Where to store the result.
2142 *
2143 * @remarks This doesn't currently work for all handles (like RTFILE).
2144 */
2145#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2146# define ASMAtomicReadHandle(ph, phRes) \
2147 do { \
2148 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2149 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2150 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2151 } while (0)
2152#elif HC_ARCH_BITS == 64
2153# define ASMAtomicReadHandle(ph, phRes) \
2154 do { \
2155 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2156 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2157 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2158 } while (0)
2159#else
2160# error HC_ARCH_BITS
2161#endif
2162
2163
2164/**
2165 * Atomically read a typical IPRT handle value, unordered.
2166 *
2167 * @param ph Pointer to the handle variable to read.
2168 * @param phRes Where to store the result.
2169 *
2170 * @remarks This doesn't currently work for all handles (like RTFILE).
2171 */
2172#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2173# define ASMAtomicUoReadHandle(ph, phRes) \
2174 do { \
2175 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2176 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2177 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2178 } while (0)
2179#elif HC_ARCH_BITS == 64
2180# define ASMAtomicUoReadHandle(ph, phRes) \
2181 do { \
2182 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2183 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2184 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2185 } while (0)
2186#else
2187# error HC_ARCH_BITS
2188#endif
2189
2190
2191/**
2192 * Atomically read a value which size might differ
2193 * between platforms or compilers, ordered.
2194 *
2195 * @param pu Pointer to the variable to read.
2196 * @param puRes Where to store the result.
2197 */
2198#define ASMAtomicReadSize(pu, puRes) \
2199 do { \
2200 switch (sizeof(*(pu))) { \
2201 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2202 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2203 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2204 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2205 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2206 } \
2207 } while (0)
2208
2209
2210/**
2211 * Atomically read a value which size might differ
2212 * between platforms or compilers, unordered.
2213 *
2214 * @param pu Pointer to the variable to read.
2215 * @param puRes Where to store the result.
2216 */
2217#define ASMAtomicUoReadSize(pu, puRes) \
2218 do { \
2219 switch (sizeof(*(pu))) { \
2220 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2221 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2222 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2223 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2224 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2225 } \
2226 } while (0)
2227
2228
2229/**
2230 * Atomically writes an unsigned 8-bit value, ordered.
2231 *
2232 * @param pu8 Pointer to the 8-bit variable.
2233 * @param u8 The 8-bit value to assign to *pu8.
2234 */
2235DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2236{
2237 ASMAtomicXchgU8(pu8, u8);
2238}
2239
2240
2241/**
2242 * Atomically writes an unsigned 8-bit value, unordered.
2243 *
2244 * @param pu8 Pointer to the 8-bit variable.
2245 * @param u8 The 8-bit value to assign to *pu8.
2246 */
2247DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2248{
2249 *pu8 = u8; /* byte writes are atomic on x86 */
2250}
2251
2252
2253/**
2254 * Atomically writes a signed 8-bit value, ordered.
2255 *
2256 * @param pi8 Pointer to the 8-bit variable to read.
2257 * @param i8 The 8-bit value to assign to *pi8.
2258 */
2259DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2260{
2261 ASMAtomicXchgS8(pi8, i8);
2262}
2263
2264
2265/**
2266 * Atomically writes a signed 8-bit value, unordered.
2267 *
2268 * @param pi8 Pointer to the 8-bit variable to write.
2269 * @param i8 The 8-bit value to assign to *pi8.
2270 */
2271DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2272{
2273 *pi8 = i8; /* byte writes are atomic on x86 */
2274}
2275
2276
2277/**
2278 * Atomically writes an unsigned 16-bit value, ordered.
2279 *
2280 * @param pu16 Pointer to the 16-bit variable to write.
2281 * @param u16 The 16-bit value to assign to *pu16.
2282 */
2283DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2284{
2285 ASMAtomicXchgU16(pu16, u16);
2286}
2287
2288
2289/**
2290 * Atomically writes an unsigned 16-bit value, unordered.
2291 *
2292 * @param pu16 Pointer to the 16-bit variable to write.
2293 * @param u16 The 16-bit value to assign to *pu16.
2294 */
2295DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2296{
2297 Assert(!((uintptr_t)pu16 & 1));
2298 *pu16 = u16;
2299}
2300
2301
2302/**
2303 * Atomically writes a signed 16-bit value, ordered.
2304 *
2305 * @param pi16 Pointer to the 16-bit variable to write.
2306 * @param i16 The 16-bit value to assign to *pi16.
2307 */
2308DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2309{
2310 ASMAtomicXchgS16(pi16, i16);
2311}
2312
2313
2314/**
2315 * Atomically writes a signed 16-bit value, unordered.
2316 *
2317 * @param pi16 Pointer to the 16-bit variable to write.
2318 * @param i16 The 16-bit value to assign to *pi16.
2319 */
2320DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2321{
2322 Assert(!((uintptr_t)pi16 & 1));
2323 *pi16 = i16;
2324}
2325
2326
2327/**
2328 * Atomically writes an unsigned 32-bit value, ordered.
2329 *
2330 * @param pu32 Pointer to the 32-bit variable to write.
2331 * @param u32 The 32-bit value to assign to *pu32.
2332 */
2333DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2334{
2335 ASMAtomicXchgU32(pu32, u32);
2336}
2337
2338
2339/**
2340 * Atomically writes an unsigned 32-bit value, unordered.
2341 *
2342 * @param pu32 Pointer to the 32-bit variable to write.
2343 * @param u32 The 32-bit value to assign to *pu32.
2344 */
2345DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2346{
2347 Assert(!((uintptr_t)pu32 & 3));
2348#if ARCH_BITS >= 32
2349 *pu32 = u32;
2350#else
2351 ASMAtomicXchgU32(pu32, u32);
2352#endif
2353}
2354
2355
2356/**
2357 * Atomically writes a signed 32-bit value, ordered.
2358 *
2359 * @param pi32 Pointer to the 32-bit variable to write.
2360 * @param i32 The 32-bit value to assign to *pi32.
2361 */
2362DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2363{
2364 ASMAtomicXchgS32(pi32, i32);
2365}
2366
2367
2368/**
2369 * Atomically writes a signed 32-bit value, unordered.
2370 *
2371 * @param pi32 Pointer to the 32-bit variable to write.
2372 * @param i32 The 32-bit value to assign to *pi32.
2373 */
2374DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2375{
2376 Assert(!((uintptr_t)pi32 & 3));
2377#if ARCH_BITS >= 32
2378 *pi32 = i32;
2379#else
2380 ASMAtomicXchgS32(pi32, i32);
2381#endif
2382}
2383
2384
2385/**
2386 * Atomically writes an unsigned 64-bit value, ordered.
2387 *
2388 * @param pu64 Pointer to the 64-bit variable to write.
2389 * @param u64 The 64-bit value to assign to *pu64.
2390 */
2391DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2392{
2393 ASMAtomicXchgU64(pu64, u64);
2394}
2395
2396
2397/**
2398 * Atomically writes an unsigned 64-bit value, unordered.
2399 *
2400 * @param pu64 Pointer to the 64-bit variable to write.
2401 * @param u64 The 64-bit value to assign to *pu64.
2402 */
2403DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2404{
2405 Assert(!((uintptr_t)pu64 & 7));
2406#if ARCH_BITS == 64
2407 *pu64 = u64;
2408#else
2409 ASMAtomicXchgU64(pu64, u64);
2410#endif
2411}
2412
2413
2414/**
2415 * Atomically writes a signed 64-bit value, ordered.
2416 *
2417 * @param pi64 Pointer to the 64-bit variable to write.
2418 * @param i64 The 64-bit value to assign to *pi64.
2419 */
2420DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2421{
2422 ASMAtomicXchgS64(pi64, i64);
2423}
2424
2425
2426/**
2427 * Atomically writes a signed 64-bit value, unordered.
2428 *
2429 * @param pi64 Pointer to the 64-bit variable to write.
2430 * @param i64 The 64-bit value to assign to *pi64.
2431 */
2432DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2433{
2434 Assert(!((uintptr_t)pi64 & 7));
2435#if ARCH_BITS == 64
2436 *pi64 = i64;
2437#else
2438 ASMAtomicXchgS64(pi64, i64);
2439#endif
2440}
2441
2442
2443/**
2444 * Atomically writes a size_t value, ordered.
2445 *
2446 * @returns nothing.
2447 * @param pcb Pointer to the size_t variable to write.
2448 * @param cb The value to assign to *pcb.
2449 */
2450DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb)
2451{
2452#if ARCH_BITS == 64
2453 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
2454#elif ARCH_BITS == 32
2455 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
2456#elif ARCH_BITS == 16
2457 AssertCompileSize(size_t, 2);
2458 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
2459#else
2460# error "Unsupported ARCH_BITS value"
2461#endif
2462}
2463
2464
2465/**
2466 * Atomically writes a boolean value, unordered.
2467 *
2468 * @param pf Pointer to the boolean variable to write.
2469 * @param f The boolean value to assign to *pf.
2470 */
2471DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f)
2472{
2473 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
2474}
2475
2476
2477/**
2478 * Atomically writes a boolean value, unordered.
2479 *
2480 * @param pf Pointer to the boolean variable to write.
2481 * @param f The boolean value to assign to *pf.
2482 */
2483DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f)
2484{
2485 *pf = f; /* byte writes are atomic on x86 */
2486}
2487
2488
2489/**
2490 * Atomically writes a pointer value, ordered.
2491 *
2492 * @param ppv Pointer to the pointer variable to write.
2493 * @param pv The pointer value to assign to *ppv.
2494 */
2495DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv)
2496{
2497#if ARCH_BITS == 32 || ARCH_BITS == 16
2498 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
2499#elif ARCH_BITS == 64
2500 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
2501#else
2502# error "ARCH_BITS is bogus"
2503#endif
2504}
2505
2506
2507/**
2508 * Atomically writes a pointer value, ordered.
2509 *
2510 * @param ppv Pointer to the pointer variable to write.
2511 * @param pv The pointer value to assign to *ppv. If NULL use
2512 * ASMAtomicWriteNullPtr or you'll land in trouble.
2513 *
2514 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2515 * NULL.
2516 */
2517#ifdef __GNUC__
2518# define ASMAtomicWritePtr(ppv, pv) \
2519 do \
2520 { \
2521 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
2522 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2523 \
2524 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2525 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2526 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2527 \
2528 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
2529 } while (0)
2530#else
2531# define ASMAtomicWritePtr(ppv, pv) \
2532 do \
2533 { \
2534 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2535 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2536 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2537 \
2538 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
2539 } while (0)
2540#endif
2541
2542
2543/**
2544 * Atomically sets a pointer to NULL, ordered.
2545 *
2546 * @param ppv Pointer to the pointer variable that should be set to NULL.
2547 *
2548 * @remarks This is relatively type safe on GCC platforms.
2549 */
2550#if RT_GNUC_PREREQ(4, 2)
2551# define ASMAtomicWriteNullPtr(ppv) \
2552 do \
2553 { \
2554 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2555 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2556 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2557 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
2558 } while (0)
2559#else
2560# define ASMAtomicWriteNullPtr(ppv) \
2561 do \
2562 { \
2563 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2564 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2565 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
2566 } while (0)
2567#endif
2568
2569
2570/**
2571 * Atomically writes a pointer value, unordered.
2572 *
2573 * @returns Current *pv value
2574 * @param ppv Pointer to the pointer variable.
2575 * @param pv The pointer value to assign to *ppv. If NULL use
2576 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2577 *
2578 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2579 * NULL.
2580 */
2581#if RT_GNUC_PREREQ(4, 2)
2582# define ASMAtomicUoWritePtr(ppv, pv) \
2583 do \
2584 { \
2585 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2586 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2587 \
2588 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2589 AssertCompile(sizeof(pv) == sizeof(void *)); \
2590 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2591 \
2592 *(ppvTypeChecked) = pvTypeChecked; \
2593 } while (0)
2594#else
2595# define ASMAtomicUoWritePtr(ppv, pv) \
2596 do \
2597 { \
2598 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2599 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2600 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2601 *(ppv) = pv; \
2602 } while (0)
2603#endif
2604
2605
2606/**
2607 * Atomically sets a pointer to NULL, unordered.
2608 *
2609 * @param ppv Pointer to the pointer variable that should be set to NULL.
2610 *
2611 * @remarks This is relatively type safe on GCC platforms.
2612 */
2613#ifdef __GNUC__
2614# define ASMAtomicUoWriteNullPtr(ppv) \
2615 do \
2616 { \
2617 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2618 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2619 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2620 *(ppvTypeChecked) = NULL; \
2621 } while (0)
2622#else
2623# define ASMAtomicUoWriteNullPtr(ppv) \
2624 do \
2625 { \
2626 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2627 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2628 *(ppv) = NULL; \
2629 } while (0)
2630#endif
2631
2632
2633/**
2634 * Atomically write a typical IPRT handle value, ordered.
2635 *
2636 * @param ph Pointer to the variable to update.
2637 * @param hNew The value to assign to *ph.
2638 *
2639 * @remarks This doesn't currently work for all handles (like RTFILE).
2640 */
2641#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2642# define ASMAtomicWriteHandle(ph, hNew) \
2643 do { \
2644 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2645 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
2646 } while (0)
2647#elif HC_ARCH_BITS == 64
2648# define ASMAtomicWriteHandle(ph, hNew) \
2649 do { \
2650 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2651 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
2652 } while (0)
2653#else
2654# error HC_ARCH_BITS
2655#endif
2656
2657
2658/**
2659 * Atomically write a typical IPRT handle value, unordered.
2660 *
2661 * @param ph Pointer to the variable to update.
2662 * @param hNew The value to assign to *ph.
2663 *
2664 * @remarks This doesn't currently work for all handles (like RTFILE).
2665 */
2666#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2667# define ASMAtomicUoWriteHandle(ph, hNew) \
2668 do { \
2669 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2670 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
2671 } while (0)
2672#elif HC_ARCH_BITS == 64
2673# define ASMAtomicUoWriteHandle(ph, hNew) \
2674 do { \
2675 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2676 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
2677 } while (0)
2678#else
2679# error HC_ARCH_BITS
2680#endif
2681
2682
2683/**
2684 * Atomically write a value which size might differ
2685 * between platforms or compilers, ordered.
2686 *
2687 * @param pu Pointer to the variable to update.
2688 * @param uNew The value to assign to *pu.
2689 */
2690#define ASMAtomicWriteSize(pu, uNew) \
2691 do { \
2692 switch (sizeof(*(pu))) { \
2693 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2694 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2695 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2696 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2697 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2698 } \
2699 } while (0)
2700
2701/**
2702 * Atomically write a value which size might differ
2703 * between platforms or compilers, unordered.
2704 *
2705 * @param pu Pointer to the variable to update.
2706 * @param uNew The value to assign to *pu.
2707 */
2708#define ASMAtomicUoWriteSize(pu, uNew) \
2709 do { \
2710 switch (sizeof(*(pu))) { \
2711 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2712 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2713 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2714 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2715 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2716 } \
2717 } while (0)
2718
2719
2720
2721/**
2722 * Atomically exchanges and adds to a 16-bit value, ordered.
2723 *
2724 * @returns The old value.
2725 * @param pu16 Pointer to the value.
2726 * @param u16 Number to add.
2727 *
2728 * @remarks Currently not implemented, just to make 16-bit code happy.
2729 * @remarks x86: Requires a 486 or later.
2730 */
2731RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16);
2732
2733
2734/**
2735 * Atomically exchanges and adds to a 32-bit value, ordered.
2736 *
2737 * @returns The old value.
2738 * @param pu32 Pointer to the value.
2739 * @param u32 Number to add.
2740 *
2741 * @remarks x86: Requires a 486 or later.
2742 */
2743#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2744RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
2745#else
2746DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2747{
2748# if RT_INLINE_ASM_USES_INTRIN
2749 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
2750 return u32;
2751
2752# elif RT_INLINE_ASM_GNU_STYLE
2753 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2754 : "=r" (u32),
2755 "=m" (*pu32)
2756 : "0" (u32),
2757 "m" (*pu32)
2758 : "memory");
2759 return u32;
2760# else
2761 __asm
2762 {
2763 mov eax, [u32]
2764# ifdef RT_ARCH_AMD64
2765 mov rdx, [pu32]
2766 lock xadd [rdx], eax
2767# else
2768 mov edx, [pu32]
2769 lock xadd [edx], eax
2770# endif
2771 mov [u32], eax
2772 }
2773 return u32;
2774# endif
2775}
2776#endif
2777
2778
2779/**
2780 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2781 *
2782 * @returns The old value.
2783 * @param pi32 Pointer to the value.
2784 * @param i32 Number to add.
2785 *
2786 * @remarks x86: Requires a 486 or later.
2787 */
2788DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2789{
2790 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
2791}
2792
2793
2794/**
2795 * Atomically exchanges and adds to a 64-bit value, ordered.
2796 *
2797 * @returns The old value.
2798 * @param pu64 Pointer to the value.
2799 * @param u64 Number to add.
2800 *
2801 * @remarks x86: Requires a Pentium or later.
2802 */
2803#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2804DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
2805#else
2806DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2807{
2808# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2809 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
2810 return u64;
2811
2812# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2813 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2814 : "=r" (u64),
2815 "=m" (*pu64)
2816 : "0" (u64),
2817 "m" (*pu64)
2818 : "memory");
2819 return u64;
2820# else
2821 uint64_t u64Old;
2822 for (;;)
2823 {
2824 uint64_t u64New;
2825 u64Old = ASMAtomicUoReadU64(pu64);
2826 u64New = u64Old + u64;
2827 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2828 break;
2829 ASMNopPause();
2830 }
2831 return u64Old;
2832# endif
2833}
2834#endif
2835
2836
2837/**
2838 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2839 *
2840 * @returns The old value.
2841 * @param pi64 Pointer to the value.
2842 * @param i64 Number to add.
2843 *
2844 * @remarks x86: Requires a Pentium or later.
2845 */
2846DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2847{
2848 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
2849}
2850
2851
2852/**
2853 * Atomically exchanges and adds to a size_t value, ordered.
2854 *
2855 * @returns The old value.
2856 * @param pcb Pointer to the size_t value.
2857 * @param cb Number to add.
2858 */
2859DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb)
2860{
2861#if ARCH_BITS == 64
2862 AssertCompileSize(size_t, 8);
2863 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
2864#elif ARCH_BITS == 32
2865 AssertCompileSize(size_t, 4);
2866 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
2867#elif ARCH_BITS == 16
2868 AssertCompileSize(size_t, 2);
2869 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
2870#else
2871# error "Unsupported ARCH_BITS value"
2872#endif
2873}
2874
2875
2876/**
2877 * Atomically exchanges and adds a value which size might differ between
2878 * platforms or compilers, ordered.
2879 *
2880 * @param pu Pointer to the variable to update.
2881 * @param uNew The value to add to *pu.
2882 * @param puOld Where to store the old value.
2883 */
2884#define ASMAtomicAddSize(pu, uNew, puOld) \
2885 do { \
2886 switch (sizeof(*(pu))) { \
2887 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2888 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2889 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2890 } \
2891 } while (0)
2892
2893
2894
2895/**
2896 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2897 *
2898 * @returns The old value.
2899 * @param pu16 Pointer to the value.
2900 * @param u16 Number to subtract.
2901 *
2902 * @remarks x86: Requires a 486 or later.
2903 */
2904DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16)
2905{
2906 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2907}
2908
2909
2910/**
2911 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2912 *
2913 * @returns The old value.
2914 * @param pi16 Pointer to the value.
2915 * @param i16 Number to subtract.
2916 *
2917 * @remarks x86: Requires a 486 or later.
2918 */
2919DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16)
2920{
2921 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
2922}
2923
2924
2925/**
2926 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2927 *
2928 * @returns The old value.
2929 * @param pu32 Pointer to the value.
2930 * @param u32 Number to subtract.
2931 *
2932 * @remarks x86: Requires a 486 or later.
2933 */
2934DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2935{
2936 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2937}
2938
2939
2940/**
2941 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2942 *
2943 * @returns The old value.
2944 * @param pi32 Pointer to the value.
2945 * @param i32 Number to subtract.
2946 *
2947 * @remarks x86: Requires a 486 or later.
2948 */
2949DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2950{
2951 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
2952}
2953
2954
2955/**
2956 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2957 *
2958 * @returns The old value.
2959 * @param pu64 Pointer to the value.
2960 * @param u64 Number to subtract.
2961 *
2962 * @remarks x86: Requires a Pentium or later.
2963 */
2964DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2965{
2966 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2967}
2968
2969
2970/**
2971 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2972 *
2973 * @returns The old value.
2974 * @param pi64 Pointer to the value.
2975 * @param i64 Number to subtract.
2976 *
2977 * @remarks x86: Requires a Pentium or later.
2978 */
2979DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2980{
2981 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
2982}
2983
2984
2985/**
2986 * Atomically exchanges and subtracts to a size_t value, ordered.
2987 *
2988 * @returns The old value.
2989 * @param pcb Pointer to the size_t value.
2990 * @param cb Number to subtract.
2991 *
2992 * @remarks x86: Requires a 486 or later.
2993 */
2994DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb)
2995{
2996#if ARCH_BITS == 64
2997 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
2998#elif ARCH_BITS == 32
2999 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
3000#elif ARCH_BITS == 16
3001 AssertCompileSize(size_t, 2);
3002 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
3003#else
3004# error "Unsupported ARCH_BITS value"
3005#endif
3006}
3007
3008
3009/**
3010 * Atomically exchanges and subtracts a value which size might differ between
3011 * platforms or compilers, ordered.
3012 *
3013 * @param pu Pointer to the variable to update.
3014 * @param uNew The value to subtract to *pu.
3015 * @param puOld Where to store the old value.
3016 *
3017 * @remarks x86: Requires a 486 or later.
3018 */
3019#define ASMAtomicSubSize(pu, uNew, puOld) \
3020 do { \
3021 switch (sizeof(*(pu))) { \
3022 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3023 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3024 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3025 } \
3026 } while (0)
3027
3028
3029
3030/**
3031 * Atomically increment a 16-bit value, ordered.
3032 *
3033 * @returns The new value.
3034 * @param pu16 Pointer to the value to increment.
3035 * @remarks Not implemented. Just to make 16-bit code happy.
3036 *
3037 * @remarks x86: Requires a 486 or later.
3038 */
3039RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16);
3040
3041
3042/**
3043 * Atomically increment a 32-bit value, ordered.
3044 *
3045 * @returns The new value.
3046 * @param pu32 Pointer to the value to increment.
3047 *
3048 * @remarks x86: Requires a 486 or later.
3049 */
3050#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3051RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32);
3052#else
3053DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32)
3054{
3055 uint32_t u32;
3056# if RT_INLINE_ASM_USES_INTRIN
3057 u32 = _InterlockedIncrement((long RT_FAR *)pu32);
3058 return u32;
3059
3060# elif RT_INLINE_ASM_GNU_STYLE
3061 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3062 : "=r" (u32),
3063 "=m" (*pu32)
3064 : "0" (1),
3065 "m" (*pu32)
3066 : "memory");
3067 return u32+1;
3068# else
3069 __asm
3070 {
3071 mov eax, 1
3072# ifdef RT_ARCH_AMD64
3073 mov rdx, [pu32]
3074 lock xadd [rdx], eax
3075# else
3076 mov edx, [pu32]
3077 lock xadd [edx], eax
3078# endif
3079 mov u32, eax
3080 }
3081 return u32+1;
3082# endif
3083}
3084#endif
3085
3086
3087/**
3088 * Atomically increment a signed 32-bit value, ordered.
3089 *
3090 * @returns The new value.
3091 * @param pi32 Pointer to the value to increment.
3092 *
3093 * @remarks x86: Requires a 486 or later.
3094 */
3095DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32)
3096{
3097 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3098}
3099
3100
3101/**
3102 * Atomically increment a 64-bit value, ordered.
3103 *
3104 * @returns The new value.
3105 * @param pu64 Pointer to the value to increment.
3106 *
3107 * @remarks x86: Requires a Pentium or later.
3108 */
3109#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3110DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64);
3111#else
3112DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64)
3113{
3114# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3115 uint64_t u64;
3116 u64 = _InterlockedIncrement64((__int64 RT_FAR *)pu64);
3117 return u64;
3118
3119# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3120 uint64_t u64;
3121 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3122 : "=r" (u64),
3123 "=m" (*pu64)
3124 : "0" (1),
3125 "m" (*pu64)
3126 : "memory");
3127 return u64 + 1;
3128# else
3129 return ASMAtomicAddU64(pu64, 1) + 1;
3130# endif
3131}
3132#endif
3133
3134
3135/**
3136 * Atomically increment a signed 64-bit value, ordered.
3137 *
3138 * @returns The new value.
3139 * @param pi64 Pointer to the value to increment.
3140 *
3141 * @remarks x86: Requires a Pentium or later.
3142 */
3143DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64)
3144{
3145 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3146}
3147
3148
3149/**
3150 * Atomically increment a size_t value, ordered.
3151 *
3152 * @returns The new value.
3153 * @param pcb Pointer to the value to increment.
3154 *
3155 * @remarks x86: Requires a 486 or later.
3156 */
3157DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb)
3158{
3159#if ARCH_BITS == 64
3160 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3161#elif ARCH_BITS == 32
3162 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3163#elif ARCH_BITS == 16
3164 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3165#else
3166# error "Unsupported ARCH_BITS value"
3167#endif
3168}
3169
3170
3171
3172/**
3173 * Atomically decrement an unsigned 32-bit value, ordered.
3174 *
3175 * @returns The new value.
3176 * @param pu16 Pointer to the value to decrement.
3177 * @remarks Not implemented. Just to make 16-bit code happy.
3178 *
3179 * @remarks x86: Requires a 486 or later.
3180 */
3181RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16);
3182
3183
3184/**
3185 * Atomically decrement an unsigned 32-bit value, ordered.
3186 *
3187 * @returns The new value.
3188 * @param pu32 Pointer to the value to decrement.
3189 *
3190 * @remarks x86: Requires a 486 or later.
3191 */
3192#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3193RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32);
3194#else
3195DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32)
3196{
3197 uint32_t u32;
3198# if RT_INLINE_ASM_USES_INTRIN
3199 u32 = _InterlockedDecrement((long RT_FAR *)pu32);
3200 return u32;
3201
3202# elif RT_INLINE_ASM_GNU_STYLE
3203 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3204 : "=r" (u32),
3205 "=m" (*pu32)
3206 : "0" (-1),
3207 "m" (*pu32)
3208 : "memory");
3209 return u32-1;
3210# else
3211 __asm
3212 {
3213 mov eax, -1
3214# ifdef RT_ARCH_AMD64
3215 mov rdx, [pu32]
3216 lock xadd [rdx], eax
3217# else
3218 mov edx, [pu32]
3219 lock xadd [edx], eax
3220# endif
3221 mov u32, eax
3222 }
3223 return u32-1;
3224# endif
3225}
3226#endif
3227
3228
3229/**
3230 * Atomically decrement a signed 32-bit value, ordered.
3231 *
3232 * @returns The new value.
3233 * @param pi32 Pointer to the value to decrement.
3234 *
3235 * @remarks x86: Requires a 486 or later.
3236 */
3237DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32)
3238{
3239 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
3240}
3241
3242
3243/**
3244 * Atomically decrement an unsigned 64-bit value, ordered.
3245 *
3246 * @returns The new value.
3247 * @param pu64 Pointer to the value to decrement.
3248 *
3249 * @remarks x86: Requires a Pentium or later.
3250 */
3251#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3252RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64);
3253#else
3254DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64)
3255{
3256# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3257 uint64_t u64 = _InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
3258 return u64;
3259
3260# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3261 uint64_t u64;
3262 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3263 : "=r" (u64),
3264 "=m" (*pu64)
3265 : "0" (~(uint64_t)0),
3266 "m" (*pu64)
3267 : "memory");
3268 return u64-1;
3269# else
3270 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3271# endif
3272}
3273#endif
3274
3275
3276/**
3277 * Atomically decrement a signed 64-bit value, ordered.
3278 *
3279 * @returns The new value.
3280 * @param pi64 Pointer to the value to decrement.
3281 *
3282 * @remarks x86: Requires a Pentium or later.
3283 */
3284DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64)
3285{
3286 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
3287}
3288
3289
3290/**
3291 * Atomically decrement a size_t value, ordered.
3292 *
3293 * @returns The new value.
3294 * @param pcb Pointer to the value to decrement.
3295 *
3296 * @remarks x86: Requires a 486 or later.
3297 */
3298DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb)
3299{
3300#if ARCH_BITS == 64
3301 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
3302#elif ARCH_BITS == 32
3303 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
3304#elif ARCH_BITS == 16
3305 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
3306#else
3307# error "Unsupported ARCH_BITS value"
3308#endif
3309}
3310
3311
3312/**
3313 * Atomically Or an unsigned 32-bit value, ordered.
3314 *
3315 * @param pu32 Pointer to the pointer variable to OR u32 with.
3316 * @param u32 The value to OR *pu32 with.
3317 *
3318 * @remarks x86: Requires a 386 or later.
3319 */
3320#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3321RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3322#else
3323DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3324{
3325# if RT_INLINE_ASM_USES_INTRIN
3326 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
3327
3328# elif RT_INLINE_ASM_GNU_STYLE
3329 __asm__ __volatile__("lock; orl %1, %0\n\t"
3330 : "=m" (*pu32)
3331 : "ir" (u32),
3332 "m" (*pu32));
3333# else
3334 __asm
3335 {
3336 mov eax, [u32]
3337# ifdef RT_ARCH_AMD64
3338 mov rdx, [pu32]
3339 lock or [rdx], eax
3340# else
3341 mov edx, [pu32]
3342 lock or [edx], eax
3343# endif
3344 }
3345# endif
3346}
3347#endif
3348
3349
3350/**
3351 * Atomically Or a signed 32-bit value, ordered.
3352 *
3353 * @param pi32 Pointer to the pointer variable to OR u32 with.
3354 * @param i32 The value to OR *pu32 with.
3355 *
3356 * @remarks x86: Requires a 386 or later.
3357 */
3358DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3359{
3360 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3361}
3362
3363
3364/**
3365 * Atomically Or an unsigned 64-bit value, ordered.
3366 *
3367 * @param pu64 Pointer to the pointer variable to OR u64 with.
3368 * @param u64 The value to OR *pu64 with.
3369 *
3370 * @remarks x86: Requires a Pentium or later.
3371 */
3372#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3373DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3374#else
3375DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3376{
3377# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3378 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
3379
3380# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3381 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3382 : "=m" (*pu64)
3383 : "r" (u64),
3384 "m" (*pu64));
3385# else
3386 for (;;)
3387 {
3388 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3389 uint64_t u64New = u64Old | u64;
3390 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3391 break;
3392 ASMNopPause();
3393 }
3394# endif
3395}
3396#endif
3397
3398
3399/**
3400 * Atomically Or a signed 64-bit value, ordered.
3401 *
3402 * @param pi64 Pointer to the pointer variable to OR u64 with.
3403 * @param i64 The value to OR *pu64 with.
3404 *
3405 * @remarks x86: Requires a Pentium or later.
3406 */
3407DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3408{
3409 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3410}
3411
3412
3413/**
3414 * Atomically And an unsigned 32-bit value, ordered.
3415 *
3416 * @param pu32 Pointer to the pointer variable to AND u32 with.
3417 * @param u32 The value to AND *pu32 with.
3418 *
3419 * @remarks x86: Requires a 386 or later.
3420 */
3421#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3422RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3423#else
3424DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3425{
3426# if RT_INLINE_ASM_USES_INTRIN
3427 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
3428
3429# elif RT_INLINE_ASM_GNU_STYLE
3430 __asm__ __volatile__("lock; andl %1, %0\n\t"
3431 : "=m" (*pu32)
3432 : "ir" (u32),
3433 "m" (*pu32));
3434# else
3435 __asm
3436 {
3437 mov eax, [u32]
3438# ifdef RT_ARCH_AMD64
3439 mov rdx, [pu32]
3440 lock and [rdx], eax
3441# else
3442 mov edx, [pu32]
3443 lock and [edx], eax
3444# endif
3445 }
3446# endif
3447}
3448#endif
3449
3450
3451/**
3452 * Atomically And a signed 32-bit value, ordered.
3453 *
3454 * @param pi32 Pointer to the pointer variable to AND i32 with.
3455 * @param i32 The value to AND *pi32 with.
3456 *
3457 * @remarks x86: Requires a 386 or later.
3458 */
3459DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3460{
3461 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3462}
3463
3464
3465/**
3466 * Atomically And an unsigned 64-bit value, ordered.
3467 *
3468 * @param pu64 Pointer to the pointer variable to AND u64 with.
3469 * @param u64 The value to AND *pu64 with.
3470 *
3471 * @remarks x86: Requires a Pentium or later.
3472 */
3473#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3474DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3475#else
3476DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3477{
3478# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3479 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
3480
3481# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3482 __asm__ __volatile__("lock; andq %1, %0\n\t"
3483 : "=m" (*pu64)
3484 : "r" (u64),
3485 "m" (*pu64));
3486# else
3487 for (;;)
3488 {
3489 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3490 uint64_t u64New = u64Old & u64;
3491 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3492 break;
3493 ASMNopPause();
3494 }
3495# endif
3496}
3497#endif
3498
3499
3500/**
3501 * Atomically And a signed 64-bit value, ordered.
3502 *
3503 * @param pi64 Pointer to the pointer variable to AND i64 with.
3504 * @param i64 The value to AND *pi64 with.
3505 *
3506 * @remarks x86: Requires a Pentium or later.
3507 */
3508DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3509{
3510 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3511}
3512
3513
3514/**
3515 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3516 *
3517 * @param pu32 Pointer to the pointer variable to OR u32 with.
3518 * @param u32 The value to OR *pu32 with.
3519 *
3520 * @remarks x86: Requires a 386 or later.
3521 */
3522#if RT_INLINE_ASM_EXTERNAL
3523RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3524#else
3525DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3526{
3527# if RT_INLINE_ASM_GNU_STYLE
3528 __asm__ __volatile__("orl %1, %0\n\t"
3529 : "=m" (*pu32)
3530 : "ir" (u32),
3531 "m" (*pu32));
3532# else
3533 __asm
3534 {
3535 mov eax, [u32]
3536# ifdef RT_ARCH_AMD64
3537 mov rdx, [pu32]
3538 or [rdx], eax
3539# else
3540 mov edx, [pu32]
3541 or [edx], eax
3542# endif
3543 }
3544# endif
3545}
3546#endif
3547
3548
3549/**
3550 * Atomically OR a signed 32-bit value, unordered.
3551 *
3552 * @param pi32 Pointer to the pointer variable to OR u32 with.
3553 * @param i32 The value to OR *pu32 with.
3554 *
3555 * @remarks x86: Requires a 386 or later.
3556 */
3557DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3558{
3559 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3560}
3561
3562
3563/**
3564 * Atomically OR an unsigned 64-bit value, unordered.
3565 *
3566 * @param pu64 Pointer to the pointer variable to OR u64 with.
3567 * @param u64 The value to OR *pu64 with.
3568 *
3569 * @remarks x86: Requires a Pentium or later.
3570 */
3571#if RT_INLINE_ASM_EXTERNAL
3572DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3573#else
3574DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3575{
3576# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3577 __asm__ __volatile__("orq %1, %q0\n\t"
3578 : "=m" (*pu64)
3579 : "r" (u64),
3580 "m" (*pu64));
3581# else
3582 for (;;)
3583 {
3584 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3585 uint64_t u64New = u64Old | u64;
3586 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3587 break;
3588 ASMNopPause();
3589 }
3590# endif
3591}
3592#endif
3593
3594
3595/**
3596 * Atomically Or a signed 64-bit value, unordered.
3597 *
3598 * @param pi64 Pointer to the pointer variable to OR u64 with.
3599 * @param i64 The value to OR *pu64 with.
3600 *
3601 * @remarks x86: Requires a Pentium or later.
3602 */
3603DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3604{
3605 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3606}
3607
3608
3609/**
3610 * Atomically And an unsigned 32-bit value, unordered.
3611 *
3612 * @param pu32 Pointer to the pointer variable to AND u32 with.
3613 * @param u32 The value to AND *pu32 with.
3614 *
3615 * @remarks x86: Requires a 386 or later.
3616 */
3617#if RT_INLINE_ASM_EXTERNAL
3618RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3619#else
3620DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3621{
3622# if RT_INLINE_ASM_GNU_STYLE
3623 __asm__ __volatile__("andl %1, %0\n\t"
3624 : "=m" (*pu32)
3625 : "ir" (u32),
3626 "m" (*pu32));
3627# else
3628 __asm
3629 {
3630 mov eax, [u32]
3631# ifdef RT_ARCH_AMD64
3632 mov rdx, [pu32]
3633 and [rdx], eax
3634# else
3635 mov edx, [pu32]
3636 and [edx], eax
3637# endif
3638 }
3639# endif
3640}
3641#endif
3642
3643
3644/**
3645 * Atomically And a signed 32-bit value, unordered.
3646 *
3647 * @param pi32 Pointer to the pointer variable to AND i32 with.
3648 * @param i32 The value to AND *pi32 with.
3649 *
3650 * @remarks x86: Requires a 386 or later.
3651 */
3652DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3653{
3654 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3655}
3656
3657
3658/**
3659 * Atomically And an unsigned 64-bit value, unordered.
3660 *
3661 * @param pu64 Pointer to the pointer variable to AND u64 with.
3662 * @param u64 The value to AND *pu64 with.
3663 *
3664 * @remarks x86: Requires a Pentium or later.
3665 */
3666#if RT_INLINE_ASM_EXTERNAL
3667DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3668#else
3669DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3670{
3671# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3672 __asm__ __volatile__("andq %1, %0\n\t"
3673 : "=m" (*pu64)
3674 : "r" (u64),
3675 "m" (*pu64));
3676# else
3677 for (;;)
3678 {
3679 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3680 uint64_t u64New = u64Old & u64;
3681 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3682 break;
3683 ASMNopPause();
3684 }
3685# endif
3686}
3687#endif
3688
3689
3690/**
3691 * Atomically And a signed 64-bit value, unordered.
3692 *
3693 * @param pi64 Pointer to the pointer variable to AND i64 with.
3694 * @param i64 The value to AND *pi64 with.
3695 *
3696 * @remarks x86: Requires a Pentium or later.
3697 */
3698DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3699{
3700 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3701}
3702
3703
3704/**
3705 * Atomically increment an unsigned 32-bit value, unordered.
3706 *
3707 * @returns the new value.
3708 * @param pu32 Pointer to the variable to increment.
3709 *
3710 * @remarks x86: Requires a 486 or later.
3711 */
3712#if RT_INLINE_ASM_EXTERNAL
3713RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32);
3714#else
3715DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32)
3716{
3717 uint32_t u32;
3718# if RT_INLINE_ASM_GNU_STYLE
3719 __asm__ __volatile__("xaddl %0, %1\n\t"
3720 : "=r" (u32),
3721 "=m" (*pu32)
3722 : "0" (1),
3723 "m" (*pu32)
3724 : "memory");
3725 return u32 + 1;
3726# else
3727 __asm
3728 {
3729 mov eax, 1
3730# ifdef RT_ARCH_AMD64
3731 mov rdx, [pu32]
3732 xadd [rdx], eax
3733# else
3734 mov edx, [pu32]
3735 xadd [edx], eax
3736# endif
3737 mov u32, eax
3738 }
3739 return u32 + 1;
3740# endif
3741}
3742#endif
3743
3744
3745/**
3746 * Atomically decrement an unsigned 32-bit value, unordered.
3747 *
3748 * @returns the new value.
3749 * @param pu32 Pointer to the variable to decrement.
3750 *
3751 * @remarks x86: Requires a 486 or later.
3752 */
3753#if RT_INLINE_ASM_EXTERNAL
3754RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32);
3755#else
3756DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32)
3757{
3758 uint32_t u32;
3759# if RT_INLINE_ASM_GNU_STYLE
3760 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3761 : "=r" (u32),
3762 "=m" (*pu32)
3763 : "0" (-1),
3764 "m" (*pu32)
3765 : "memory");
3766 return u32 - 1;
3767# else
3768 __asm
3769 {
3770 mov eax, -1
3771# ifdef RT_ARCH_AMD64
3772 mov rdx, [pu32]
3773 xadd [rdx], eax
3774# else
3775 mov edx, [pu32]
3776 xadd [edx], eax
3777# endif
3778 mov u32, eax
3779 }
3780 return u32 - 1;
3781# endif
3782}
3783#endif
3784
3785
3786/** @def RT_ASM_PAGE_SIZE
3787 * We try avoid dragging in iprt/param.h here.
3788 * @internal
3789 */
3790#if defined(RT_ARCH_SPARC64)
3791# define RT_ASM_PAGE_SIZE 0x2000
3792# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3793# if PAGE_SIZE != 0x2000
3794# error "PAGE_SIZE is not 0x2000!"
3795# endif
3796# endif
3797#else
3798# define RT_ASM_PAGE_SIZE 0x1000
3799# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3800# if PAGE_SIZE != 0x1000
3801# error "PAGE_SIZE is not 0x1000!"
3802# endif
3803# endif
3804#endif
3805
3806/**
3807 * Zeros a 4K memory page.
3808 *
3809 * @param pv Pointer to the memory block. This must be page aligned.
3810 */
3811#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3812RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv);
3813# else
3814DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv)
3815{
3816# if RT_INLINE_ASM_USES_INTRIN
3817# ifdef RT_ARCH_AMD64
3818 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3819# else
3820 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3821# endif
3822
3823# elif RT_INLINE_ASM_GNU_STYLE
3824 RTCCUINTREG uDummy;
3825# ifdef RT_ARCH_AMD64
3826 __asm__ __volatile__("rep stosq"
3827 : "=D" (pv),
3828 "=c" (uDummy)
3829 : "0" (pv),
3830 "c" (RT_ASM_PAGE_SIZE >> 3),
3831 "a" (0)
3832 : "memory");
3833# else
3834 __asm__ __volatile__("rep stosl"
3835 : "=D" (pv),
3836 "=c" (uDummy)
3837 : "0" (pv),
3838 "c" (RT_ASM_PAGE_SIZE >> 2),
3839 "a" (0)
3840 : "memory");
3841# endif
3842# else
3843 __asm
3844 {
3845# ifdef RT_ARCH_AMD64
3846 xor rax, rax
3847 mov ecx, 0200h
3848 mov rdi, [pv]
3849 rep stosq
3850# else
3851 xor eax, eax
3852 mov ecx, 0400h
3853 mov edi, [pv]
3854 rep stosd
3855# endif
3856 }
3857# endif
3858}
3859# endif
3860
3861
3862/**
3863 * Zeros a memory block with a 32-bit aligned size.
3864 *
3865 * @param pv Pointer to the memory block.
3866 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3867 */
3868#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3869RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb);
3870#else
3871DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb)
3872{
3873# if RT_INLINE_ASM_USES_INTRIN
3874# ifdef RT_ARCH_AMD64
3875 if (!(cb & 7))
3876 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
3877 else
3878# endif
3879 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
3880
3881# elif RT_INLINE_ASM_GNU_STYLE
3882 __asm__ __volatile__("rep stosl"
3883 : "=D" (pv),
3884 "=c" (cb)
3885 : "0" (pv),
3886 "1" (cb >> 2),
3887 "a" (0)
3888 : "memory");
3889# else
3890 __asm
3891 {
3892 xor eax, eax
3893# ifdef RT_ARCH_AMD64
3894 mov rcx, [cb]
3895 shr rcx, 2
3896 mov rdi, [pv]
3897# else
3898 mov ecx, [cb]
3899 shr ecx, 2
3900 mov edi, [pv]
3901# endif
3902 rep stosd
3903 }
3904# endif
3905}
3906#endif
3907
3908
3909/**
3910 * Fills a memory block with a 32-bit aligned size.
3911 *
3912 * @param pv Pointer to the memory block.
3913 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3914 * @param u32 The value to fill with.
3915 */
3916#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3917RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32);
3918#else
3919DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32)
3920{
3921# if RT_INLINE_ASM_USES_INTRIN
3922# ifdef RT_ARCH_AMD64
3923 if (!(cb & 7))
3924 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3925 else
3926# endif
3927 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
3928
3929# elif RT_INLINE_ASM_GNU_STYLE
3930 __asm__ __volatile__("rep stosl"
3931 : "=D" (pv),
3932 "=c" (cb)
3933 : "0" (pv),
3934 "1" (cb >> 2),
3935 "a" (u32)
3936 : "memory");
3937# else
3938 __asm
3939 {
3940# ifdef RT_ARCH_AMD64
3941 mov rcx, [cb]
3942 shr rcx, 2
3943 mov rdi, [pv]
3944# else
3945 mov ecx, [cb]
3946 shr ecx, 2
3947 mov edi, [pv]
3948# endif
3949 mov eax, [u32]
3950 rep stosd
3951 }
3952# endif
3953}
3954#endif
3955
3956
3957/**
3958 * Checks if a memory block is all zeros.
3959 *
3960 * @returns Pointer to the first non-zero byte.
3961 * @returns NULL if all zero.
3962 *
3963 * @param pv Pointer to the memory block.
3964 * @param cb Number of bytes in the block.
3965 *
3966 * @todo Fix name, it is a predicate function but it's not returning boolean!
3967 */
3968#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3969 && !defined(RT_ARCH_SPARC64) \
3970 && !defined(RT_ARCH_SPARC)
3971DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb);
3972#else
3973DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb)
3974{
3975 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
3976 for (; cb; cb--, pb++)
3977 if (RT_LIKELY(*pb == 0))
3978 { /* likely */ }
3979 else
3980 return (void RT_FAR *)pb;
3981 return NULL;
3982}
3983#endif
3984
3985
3986/**
3987 * Checks if a memory block is all zeros.
3988 *
3989 * @returns true if zero, false if not.
3990 *
3991 * @param pv Pointer to the memory block.
3992 * @param cb Number of bytes in the block.
3993 *
3994 * @sa ASMMemFirstNonZero
3995 */
3996DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb)
3997{
3998 return ASMMemFirstNonZero(pv, cb) == NULL;
3999}
4000
4001
4002/**
4003 * Checks if a memory page is all zeros.
4004 *
4005 * @returns true / false.
4006 *
4007 * @param pvPage Pointer to the page. Must be aligned on 16 byte
4008 * boundary
4009 */
4010DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage)
4011{
4012# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
4013 union { RTCCUINTREG r; bool f; } uAX;
4014 RTCCUINTREG xCX, xDI;
4015 Assert(!((uintptr_t)pvPage & 15));
4016 __asm__ __volatile__("repe; "
4017# ifdef RT_ARCH_AMD64
4018 "scasq\n\t"
4019# else
4020 "scasl\n\t"
4021# endif
4022 "setnc %%al\n\t"
4023 : "=&c" (xCX),
4024 "=&D" (xDI),
4025 "=&a" (uAX.r)
4026 : "mr" (pvPage),
4027# ifdef RT_ARCH_AMD64
4028 "0" (RT_ASM_PAGE_SIZE/8),
4029# else
4030 "0" (RT_ASM_PAGE_SIZE/4),
4031# endif
4032 "1" (pvPage),
4033 "2" (0));
4034 return uAX.f;
4035# else
4036 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
4037 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
4038 Assert(!((uintptr_t)pvPage & 15));
4039 for (;;)
4040 {
4041 if (puPtr[0]) return false;
4042 if (puPtr[4]) return false;
4043
4044 if (puPtr[2]) return false;
4045 if (puPtr[6]) return false;
4046
4047 if (puPtr[1]) return false;
4048 if (puPtr[5]) return false;
4049
4050 if (puPtr[3]) return false;
4051 if (puPtr[7]) return false;
4052
4053 if (!--cLeft)
4054 return true;
4055 puPtr += 8;
4056 }
4057# endif
4058}
4059
4060
4061/**
4062 * Checks if a memory block is filled with the specified byte, returning the
4063 * first mismatch.
4064 *
4065 * This is sort of an inverted memchr.
4066 *
4067 * @returns Pointer to the byte which doesn't equal u8.
4068 * @returns NULL if all equal to u8.
4069 *
4070 * @param pv Pointer to the memory block.
4071 * @param cb Number of bytes in the block.
4072 * @param u8 The value it's supposed to be filled with.
4073 *
4074 * @remarks No alignment requirements.
4075 */
4076#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
4077 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL)) \
4078 && !defined(RT_ARCH_SPARC64) \
4079 && !defined(RT_ARCH_SPARC)
4080DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8);
4081#else
4082DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4083{
4084 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4085 for (; cb; cb--, pb++)
4086 if (RT_LIKELY(*pb == u8))
4087 { /* likely */ }
4088 else
4089 return (void *)pb;
4090 return NULL;
4091}
4092#endif
4093
4094
4095/**
4096 * Checks if a memory block is filled with the specified byte.
4097 *
4098 * @returns true if all matching, false if not.
4099 *
4100 * @param pv Pointer to the memory block.
4101 * @param cb Number of bytes in the block.
4102 * @param u8 The value it's supposed to be filled with.
4103 *
4104 * @remarks No alignment requirements.
4105 */
4106DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4107{
4108 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4109}
4110
4111
4112/**
4113 * Checks if a memory block is filled with the specified 32-bit value.
4114 *
4115 * This is a sort of inverted memchr.
4116 *
4117 * @returns Pointer to the first value which doesn't equal u32.
4118 * @returns NULL if all equal to u32.
4119 *
4120 * @param pv Pointer to the memory block.
4121 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4122 * @param u32 The value it's supposed to be filled with.
4123 */
4124DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32)
4125{
4126/** @todo rewrite this in inline assembly? */
4127 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
4128 for (; cb; cb -= 4, pu32++)
4129 if (RT_LIKELY(*pu32 == u32))
4130 { /* likely */ }
4131 else
4132 return (uint32_t RT_FAR *)pu32;
4133 return NULL;
4134}
4135
4136
4137/**
4138 * Probes a byte pointer for read access.
4139 *
4140 * While the function will not fault if the byte is not read accessible,
4141 * the idea is to do this in a safe place like before acquiring locks
4142 * and such like.
4143 *
4144 * Also, this functions guarantees that an eager compiler is not going
4145 * to optimize the probing away.
4146 *
4147 * @param pvByte Pointer to the byte.
4148 */
4149#if RT_INLINE_ASM_EXTERNAL
4150RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte);
4151#else
4152DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte)
4153{
4154 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4155 uint8_t u8;
4156# if RT_INLINE_ASM_GNU_STYLE
4157 __asm__ __volatile__("movb (%1), %0\n\t"
4158 : "=r" (u8)
4159 : "r" (pvByte));
4160# else
4161 __asm
4162 {
4163# ifdef RT_ARCH_AMD64
4164 mov rax, [pvByte]
4165 mov al, [rax]
4166# else
4167 mov eax, [pvByte]
4168 mov al, [eax]
4169# endif
4170 mov [u8], al
4171 }
4172# endif
4173 return u8;
4174}
4175#endif
4176
4177/**
4178 * Probes a buffer for read access page by page.
4179 *
4180 * While the function will fault if the buffer is not fully read
4181 * accessible, the idea is to do this in a safe place like before
4182 * acquiring locks and such like.
4183 *
4184 * Also, this functions guarantees that an eager compiler is not going
4185 * to optimize the probing away.
4186 *
4187 * @param pvBuf Pointer to the buffer.
4188 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4189 */
4190DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf)
4191{
4192 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4193 /* the first byte */
4194 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
4195 ASMProbeReadByte(pu8);
4196
4197 /* the pages in between pages. */
4198 while (cbBuf > RT_ASM_PAGE_SIZE)
4199 {
4200 ASMProbeReadByte(pu8);
4201 cbBuf -= RT_ASM_PAGE_SIZE;
4202 pu8 += RT_ASM_PAGE_SIZE;
4203 }
4204
4205 /* the last byte */
4206 ASMProbeReadByte(pu8 + cbBuf - 1);
4207}
4208
4209
4210
4211/** @defgroup grp_inline_bits Bit Operations
4212 * @{
4213 */
4214
4215
4216/**
4217 * Sets a bit in a bitmap.
4218 *
4219 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4220 * @param iBit The bit to set.
4221 *
4222 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4223 * However, doing so will yield better performance as well as avoiding
4224 * traps accessing the last bits in the bitmap.
4225 */
4226#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4227RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4228#else
4229DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4230{
4231# if RT_INLINE_ASM_USES_INTRIN
4232 _bittestandset((long RT_FAR *)pvBitmap, iBit);
4233
4234# elif RT_INLINE_ASM_GNU_STYLE
4235 __asm__ __volatile__("btsl %1, %0"
4236 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4237 : "Ir" (iBit),
4238 "m" (*(volatile long RT_FAR *)pvBitmap)
4239 : "memory");
4240# else
4241 __asm
4242 {
4243# ifdef RT_ARCH_AMD64
4244 mov rax, [pvBitmap]
4245 mov edx, [iBit]
4246 bts [rax], edx
4247# else
4248 mov eax, [pvBitmap]
4249 mov edx, [iBit]
4250 bts [eax], edx
4251# endif
4252 }
4253# endif
4254}
4255#endif
4256
4257
4258/**
4259 * Atomically sets a bit in a bitmap, ordered.
4260 *
4261 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4262 * the memory access isn't atomic!
4263 * @param iBit The bit to set.
4264 *
4265 * @remarks x86: Requires a 386 or later.
4266 */
4267#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4268RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4269#else
4270DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4271{
4272 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4273# if RT_INLINE_ASM_USES_INTRIN
4274 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4275# elif RT_INLINE_ASM_GNU_STYLE
4276 __asm__ __volatile__("lock; btsl %1, %0"
4277 : "=m" (*(volatile long *)pvBitmap)
4278 : "Ir" (iBit),
4279 "m" (*(volatile long *)pvBitmap)
4280 : "memory");
4281# else
4282 __asm
4283 {
4284# ifdef RT_ARCH_AMD64
4285 mov rax, [pvBitmap]
4286 mov edx, [iBit]
4287 lock bts [rax], edx
4288# else
4289 mov eax, [pvBitmap]
4290 mov edx, [iBit]
4291 lock bts [eax], edx
4292# endif
4293 }
4294# endif
4295}
4296#endif
4297
4298
4299/**
4300 * Clears a bit in a bitmap.
4301 *
4302 * @param pvBitmap Pointer to the bitmap.
4303 * @param iBit The bit to clear.
4304 *
4305 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4306 * However, doing so will yield better performance as well as avoiding
4307 * traps accessing the last bits in the bitmap.
4308 */
4309#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4310RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4311#else
4312DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4313{
4314# if RT_INLINE_ASM_USES_INTRIN
4315 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4316
4317# elif RT_INLINE_ASM_GNU_STYLE
4318 __asm__ __volatile__("btrl %1, %0"
4319 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4320 : "Ir" (iBit),
4321 "m" (*(volatile long RT_FAR *)pvBitmap)
4322 : "memory");
4323# else
4324 __asm
4325 {
4326# ifdef RT_ARCH_AMD64
4327 mov rax, [pvBitmap]
4328 mov edx, [iBit]
4329 btr [rax], edx
4330# else
4331 mov eax, [pvBitmap]
4332 mov edx, [iBit]
4333 btr [eax], edx
4334# endif
4335 }
4336# endif
4337}
4338#endif
4339
4340
4341/**
4342 * Atomically clears a bit in a bitmap, ordered.
4343 *
4344 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4345 * the memory access isn't atomic!
4346 * @param iBit The bit to toggle set.
4347 *
4348 * @remarks No memory barrier, take care on smp.
4349 * @remarks x86: Requires a 386 or later.
4350 */
4351#if RT_INLINE_ASM_EXTERNAL
4352RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4353#else
4354DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4355{
4356 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4357# if RT_INLINE_ASM_GNU_STYLE
4358 __asm__ __volatile__("lock; btrl %1, %0"
4359 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4360 : "Ir" (iBit),
4361 "m" (*(volatile long RT_FAR *)pvBitmap)
4362 : "memory");
4363# else
4364 __asm
4365 {
4366# ifdef RT_ARCH_AMD64
4367 mov rax, [pvBitmap]
4368 mov edx, [iBit]
4369 lock btr [rax], edx
4370# else
4371 mov eax, [pvBitmap]
4372 mov edx, [iBit]
4373 lock btr [eax], edx
4374# endif
4375 }
4376# endif
4377}
4378#endif
4379
4380
4381/**
4382 * Toggles a bit in a bitmap.
4383 *
4384 * @param pvBitmap Pointer to the bitmap.
4385 * @param iBit The bit to toggle.
4386 *
4387 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4388 * However, doing so will yield better performance as well as avoiding
4389 * traps accessing the last bits in the bitmap.
4390 */
4391#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4392RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4393#else
4394DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4395{
4396# if RT_INLINE_ASM_USES_INTRIN
4397 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4398# elif RT_INLINE_ASM_GNU_STYLE
4399 __asm__ __volatile__("btcl %1, %0"
4400 : "=m" (*(volatile long *)pvBitmap)
4401 : "Ir" (iBit),
4402 "m" (*(volatile long *)pvBitmap)
4403 : "memory");
4404# else
4405 __asm
4406 {
4407# ifdef RT_ARCH_AMD64
4408 mov rax, [pvBitmap]
4409 mov edx, [iBit]
4410 btc [rax], edx
4411# else
4412 mov eax, [pvBitmap]
4413 mov edx, [iBit]
4414 btc [eax], edx
4415# endif
4416 }
4417# endif
4418}
4419#endif
4420
4421
4422/**
4423 * Atomically toggles a bit in a bitmap, ordered.
4424 *
4425 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4426 * the memory access isn't atomic!
4427 * @param iBit The bit to test and set.
4428 *
4429 * @remarks x86: Requires a 386 or later.
4430 */
4431#if RT_INLINE_ASM_EXTERNAL
4432RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4433#else
4434DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4435{
4436 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4437# if RT_INLINE_ASM_GNU_STYLE
4438 __asm__ __volatile__("lock; btcl %1, %0"
4439 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4440 : "Ir" (iBit),
4441 "m" (*(volatile long RT_FAR *)pvBitmap)
4442 : "memory");
4443# else
4444 __asm
4445 {
4446# ifdef RT_ARCH_AMD64
4447 mov rax, [pvBitmap]
4448 mov edx, [iBit]
4449 lock btc [rax], edx
4450# else
4451 mov eax, [pvBitmap]
4452 mov edx, [iBit]
4453 lock btc [eax], edx
4454# endif
4455 }
4456# endif
4457}
4458#endif
4459
4460
4461/**
4462 * Tests and sets a bit in a bitmap.
4463 *
4464 * @returns true if the bit was set.
4465 * @returns false if the bit was clear.
4466 *
4467 * @param pvBitmap Pointer to the bitmap.
4468 * @param iBit The bit to test and set.
4469 *
4470 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4471 * However, doing so will yield better performance as well as avoiding
4472 * traps accessing the last bits in the bitmap.
4473 */
4474#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4475RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4476#else
4477DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4478{
4479 union { bool f; uint32_t u32; uint8_t u8; } rc;
4480# if RT_INLINE_ASM_USES_INTRIN
4481 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
4482
4483# elif RT_INLINE_ASM_GNU_STYLE
4484 __asm__ __volatile__("btsl %2, %1\n\t"
4485 "setc %b0\n\t"
4486 "andl $1, %0\n\t"
4487 : "=q" (rc.u32),
4488 "=m" (*(volatile long RT_FAR *)pvBitmap)
4489 : "Ir" (iBit),
4490 "m" (*(volatile long RT_FAR *)pvBitmap)
4491 : "memory");
4492# else
4493 __asm
4494 {
4495 mov edx, [iBit]
4496# ifdef RT_ARCH_AMD64
4497 mov rax, [pvBitmap]
4498 bts [rax], edx
4499# else
4500 mov eax, [pvBitmap]
4501 bts [eax], edx
4502# endif
4503 setc al
4504 and eax, 1
4505 mov [rc.u32], eax
4506 }
4507# endif
4508 return rc.f;
4509}
4510#endif
4511
4512
4513/**
4514 * Atomically tests and sets a bit in a bitmap, ordered.
4515 *
4516 * @returns true if the bit was set.
4517 * @returns false if the bit was clear.
4518 *
4519 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4520 * the memory access isn't atomic!
4521 * @param iBit The bit to set.
4522 *
4523 * @remarks x86: Requires a 386 or later.
4524 */
4525#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4526RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4527#else
4528DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4529{
4530 union { bool f; uint32_t u32; uint8_t u8; } rc;
4531 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4532# if RT_INLINE_ASM_USES_INTRIN
4533 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4534# elif RT_INLINE_ASM_GNU_STYLE
4535 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4536 "setc %b0\n\t"
4537 "andl $1, %0\n\t"
4538 : "=q" (rc.u32),
4539 "=m" (*(volatile long RT_FAR *)pvBitmap)
4540 : "Ir" (iBit),
4541 "m" (*(volatile long RT_FAR *)pvBitmap)
4542 : "memory");
4543# else
4544 __asm
4545 {
4546 mov edx, [iBit]
4547# ifdef RT_ARCH_AMD64
4548 mov rax, [pvBitmap]
4549 lock bts [rax], edx
4550# else
4551 mov eax, [pvBitmap]
4552 lock bts [eax], edx
4553# endif
4554 setc al
4555 and eax, 1
4556 mov [rc.u32], eax
4557 }
4558# endif
4559 return rc.f;
4560}
4561#endif
4562
4563
4564/**
4565 * Tests and clears a bit in a bitmap.
4566 *
4567 * @returns true if the bit was set.
4568 * @returns false if the bit was clear.
4569 *
4570 * @param pvBitmap Pointer to the bitmap.
4571 * @param iBit The bit to test and clear.
4572 *
4573 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4574 * However, doing so will yield better performance as well as avoiding
4575 * traps accessing the last bits in the bitmap.
4576 */
4577#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4578RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4579#else
4580DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4581{
4582 union { bool f; uint32_t u32; uint8_t u8; } rc;
4583# if RT_INLINE_ASM_USES_INTRIN
4584 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4585
4586# elif RT_INLINE_ASM_GNU_STYLE
4587 __asm__ __volatile__("btrl %2, %1\n\t"
4588 "setc %b0\n\t"
4589 "andl $1, %0\n\t"
4590 : "=q" (rc.u32),
4591 "=m" (*(volatile long RT_FAR *)pvBitmap)
4592 : "Ir" (iBit),
4593 "m" (*(volatile long RT_FAR *)pvBitmap)
4594 : "memory");
4595# else
4596 __asm
4597 {
4598 mov edx, [iBit]
4599# ifdef RT_ARCH_AMD64
4600 mov rax, [pvBitmap]
4601 btr [rax], edx
4602# else
4603 mov eax, [pvBitmap]
4604 btr [eax], edx
4605# endif
4606 setc al
4607 and eax, 1
4608 mov [rc.u32], eax
4609 }
4610# endif
4611 return rc.f;
4612}
4613#endif
4614
4615
4616/**
4617 * Atomically tests and clears a bit in a bitmap, ordered.
4618 *
4619 * @returns true if the bit was set.
4620 * @returns false if the bit was clear.
4621 *
4622 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4623 * the memory access isn't atomic!
4624 * @param iBit The bit to test and clear.
4625 *
4626 * @remarks No memory barrier, take care on smp.
4627 * @remarks x86: Requires a 386 or later.
4628 */
4629#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4630RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4631#else
4632DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4633{
4634 union { bool f; uint32_t u32; uint8_t u8; } rc;
4635 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4636# if RT_INLINE_ASM_USES_INTRIN
4637 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
4638
4639# elif RT_INLINE_ASM_GNU_STYLE
4640 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4641 "setc %b0\n\t"
4642 "andl $1, %0\n\t"
4643 : "=q" (rc.u32),
4644 "=m" (*(volatile long RT_FAR *)pvBitmap)
4645 : "Ir" (iBit),
4646 "m" (*(volatile long RT_FAR *)pvBitmap)
4647 : "memory");
4648# else
4649 __asm
4650 {
4651 mov edx, [iBit]
4652# ifdef RT_ARCH_AMD64
4653 mov rax, [pvBitmap]
4654 lock btr [rax], edx
4655# else
4656 mov eax, [pvBitmap]
4657 lock btr [eax], edx
4658# endif
4659 setc al
4660 and eax, 1
4661 mov [rc.u32], eax
4662 }
4663# endif
4664 return rc.f;
4665}
4666#endif
4667
4668
4669/**
4670 * Tests and toggles a bit in a bitmap.
4671 *
4672 * @returns true if the bit was set.
4673 * @returns false if the bit was clear.
4674 *
4675 * @param pvBitmap Pointer to the bitmap.
4676 * @param iBit The bit to test and toggle.
4677 *
4678 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4679 * However, doing so will yield better performance as well as avoiding
4680 * traps accessing the last bits in the bitmap.
4681 */
4682#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4683RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4684#else
4685DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4686{
4687 union { bool f; uint32_t u32; uint8_t u8; } rc;
4688# if RT_INLINE_ASM_USES_INTRIN
4689 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4690
4691# elif RT_INLINE_ASM_GNU_STYLE
4692 __asm__ __volatile__("btcl %2, %1\n\t"
4693 "setc %b0\n\t"
4694 "andl $1, %0\n\t"
4695 : "=q" (rc.u32),
4696 "=m" (*(volatile long RT_FAR *)pvBitmap)
4697 : "Ir" (iBit),
4698 "m" (*(volatile long RT_FAR *)pvBitmap)
4699 : "memory");
4700# else
4701 __asm
4702 {
4703 mov edx, [iBit]
4704# ifdef RT_ARCH_AMD64
4705 mov rax, [pvBitmap]
4706 btc [rax], edx
4707# else
4708 mov eax, [pvBitmap]
4709 btc [eax], edx
4710# endif
4711 setc al
4712 and eax, 1
4713 mov [rc.u32], eax
4714 }
4715# endif
4716 return rc.f;
4717}
4718#endif
4719
4720
4721/**
4722 * Atomically tests and toggles a bit in a bitmap, ordered.
4723 *
4724 * @returns true if the bit was set.
4725 * @returns false if the bit was clear.
4726 *
4727 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4728 * the memory access isn't atomic!
4729 * @param iBit The bit to test and toggle.
4730 *
4731 * @remarks x86: Requires a 386 or later.
4732 */
4733#if RT_INLINE_ASM_EXTERNAL
4734RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4735#else
4736DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4737{
4738 union { bool f; uint32_t u32; uint8_t u8; } rc;
4739 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4740# if RT_INLINE_ASM_GNU_STYLE
4741 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4742 "setc %b0\n\t"
4743 "andl $1, %0\n\t"
4744 : "=q" (rc.u32),
4745 "=m" (*(volatile long RT_FAR *)pvBitmap)
4746 : "Ir" (iBit),
4747 "m" (*(volatile long RT_FAR *)pvBitmap)
4748 : "memory");
4749# else
4750 __asm
4751 {
4752 mov edx, [iBit]
4753# ifdef RT_ARCH_AMD64
4754 mov rax, [pvBitmap]
4755 lock btc [rax], edx
4756# else
4757 mov eax, [pvBitmap]
4758 lock btc [eax], edx
4759# endif
4760 setc al
4761 and eax, 1
4762 mov [rc.u32], eax
4763 }
4764# endif
4765 return rc.f;
4766}
4767#endif
4768
4769
4770/**
4771 * Tests if a bit in a bitmap is set.
4772 *
4773 * @returns true if the bit is set.
4774 * @returns false if the bit is clear.
4775 *
4776 * @param pvBitmap Pointer to the bitmap.
4777 * @param iBit The bit to test.
4778 *
4779 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4780 * However, doing so will yield better performance as well as avoiding
4781 * traps accessing the last bits in the bitmap.
4782 */
4783#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4784RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit);
4785#else
4786DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit)
4787{
4788 union { bool f; uint32_t u32; uint8_t u8; } rc;
4789# if RT_INLINE_ASM_USES_INTRIN
4790 rc.u32 = _bittest((long *)pvBitmap, iBit);
4791# elif RT_INLINE_ASM_GNU_STYLE
4792
4793 __asm__ __volatile__("btl %2, %1\n\t"
4794 "setc %b0\n\t"
4795 "andl $1, %0\n\t"
4796 : "=q" (rc.u32)
4797 : "m" (*(const volatile long RT_FAR *)pvBitmap),
4798 "Ir" (iBit)
4799 : "memory");
4800# else
4801 __asm
4802 {
4803 mov edx, [iBit]
4804# ifdef RT_ARCH_AMD64
4805 mov rax, [pvBitmap]
4806 bt [rax], edx
4807# else
4808 mov eax, [pvBitmap]
4809 bt [eax], edx
4810# endif
4811 setc al
4812 and eax, 1
4813 mov [rc.u32], eax
4814 }
4815# endif
4816 return rc.f;
4817}
4818#endif
4819
4820
4821/**
4822 * Clears a bit range within a bitmap.
4823 *
4824 * @param pvBitmap Pointer to the bitmap.
4825 * @param iBitStart The First bit to clear.
4826 * @param iBitEnd The first bit not to clear.
4827 */
4828DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4829{
4830 if (iBitStart < iBitEnd)
4831 {
4832 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4833 int32_t iStart = iBitStart & ~31;
4834 int32_t iEnd = iBitEnd & ~31;
4835 if (iStart == iEnd)
4836 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4837 else
4838 {
4839 /* bits in first dword. */
4840 if (iBitStart & 31)
4841 {
4842 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4843 pu32++;
4844 iBitStart = iStart + 32;
4845 }
4846
4847 /* whole dword. */
4848 if (iBitStart != iEnd)
4849 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4850
4851 /* bits in last dword. */
4852 if (iBitEnd & 31)
4853 {
4854 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4855 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4856 }
4857 }
4858 }
4859}
4860
4861
4862/**
4863 * Sets a bit range within a bitmap.
4864 *
4865 * @param pvBitmap Pointer to the bitmap.
4866 * @param iBitStart The First bit to set.
4867 * @param iBitEnd The first bit not to set.
4868 */
4869DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4870{
4871 if (iBitStart < iBitEnd)
4872 {
4873 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4874 int32_t iStart = iBitStart & ~31;
4875 int32_t iEnd = iBitEnd & ~31;
4876 if (iStart == iEnd)
4877 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4878 else
4879 {
4880 /* bits in first dword. */
4881 if (iBitStart & 31)
4882 {
4883 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4884 pu32++;
4885 iBitStart = iStart + 32;
4886 }
4887
4888 /* whole dword. */
4889 if (iBitStart != iEnd)
4890 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4891
4892 /* bits in last dword. */
4893 if (iBitEnd & 31)
4894 {
4895 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
4896 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4897 }
4898 }
4899 }
4900}
4901
4902
4903/**
4904 * Finds the first clear bit in a bitmap.
4905 *
4906 * @returns Index of the first zero bit.
4907 * @returns -1 if no clear bit was found.
4908 * @param pvBitmap Pointer to the bitmap.
4909 * @param cBits The number of bits in the bitmap. Multiple of 32.
4910 */
4911#if RT_INLINE_ASM_EXTERNAL
4912DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
4913#else
4914DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
4915{
4916 if (cBits)
4917 {
4918 int32_t iBit;
4919# if RT_INLINE_ASM_GNU_STYLE
4920 RTCCUINTREG uEAX, uECX, uEDI;
4921 cBits = RT_ALIGN_32(cBits, 32);
4922 __asm__ __volatile__("repe; scasl\n\t"
4923 "je 1f\n\t"
4924# ifdef RT_ARCH_AMD64
4925 "lea -4(%%rdi), %%rdi\n\t"
4926 "xorl (%%rdi), %%eax\n\t"
4927 "subq %5, %%rdi\n\t"
4928# else
4929 "lea -4(%%edi), %%edi\n\t"
4930 "xorl (%%edi), %%eax\n\t"
4931 "subl %5, %%edi\n\t"
4932# endif
4933 "shll $3, %%edi\n\t"
4934 "bsfl %%eax, %%edx\n\t"
4935 "addl %%edi, %%edx\n\t"
4936 "1:\t\n"
4937 : "=d" (iBit),
4938 "=&c" (uECX),
4939 "=&D" (uEDI),
4940 "=&a" (uEAX)
4941 : "0" (0xffffffff),
4942 "mr" (pvBitmap),
4943 "1" (cBits >> 5),
4944 "2" (pvBitmap),
4945 "3" (0xffffffff));
4946# else
4947 cBits = RT_ALIGN_32(cBits, 32);
4948 __asm
4949 {
4950# ifdef RT_ARCH_AMD64
4951 mov rdi, [pvBitmap]
4952 mov rbx, rdi
4953# else
4954 mov edi, [pvBitmap]
4955 mov ebx, edi
4956# endif
4957 mov edx, 0ffffffffh
4958 mov eax, edx
4959 mov ecx, [cBits]
4960 shr ecx, 5
4961 repe scasd
4962 je done
4963
4964# ifdef RT_ARCH_AMD64
4965 lea rdi, [rdi - 4]
4966 xor eax, [rdi]
4967 sub rdi, rbx
4968# else
4969 lea edi, [edi - 4]
4970 xor eax, [edi]
4971 sub edi, ebx
4972# endif
4973 shl edi, 3
4974 bsf edx, eax
4975 add edx, edi
4976 done:
4977 mov [iBit], edx
4978 }
4979# endif
4980 return iBit;
4981 }
4982 return -1;
4983}
4984#endif
4985
4986
4987/**
4988 * Finds the next clear bit in a bitmap.
4989 *
4990 * @returns Index of the first zero bit.
4991 * @returns -1 if no clear bit was found.
4992 * @param pvBitmap Pointer to the bitmap.
4993 * @param cBits The number of bits in the bitmap. Multiple of 32.
4994 * @param iBitPrev The bit returned from the last search.
4995 * The search will start at iBitPrev + 1.
4996 */
4997#if RT_INLINE_ASM_EXTERNAL
4998DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4999#else
5000DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5001{
5002 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5003 int iBit = ++iBitPrev & 31;
5004 if (iBit)
5005 {
5006 /*
5007 * Inspect the 32-bit word containing the unaligned bit.
5008 */
5009 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
5010
5011# if RT_INLINE_ASM_USES_INTRIN
5012 unsigned long ulBit = 0;
5013 if (_BitScanForward(&ulBit, u32))
5014 return ulBit + iBitPrev;
5015# else
5016# if RT_INLINE_ASM_GNU_STYLE
5017 __asm__ __volatile__("bsf %1, %0\n\t"
5018 "jnz 1f\n\t"
5019 "movl $-1, %0\n\t"
5020 "1:\n\t"
5021 : "=r" (iBit)
5022 : "r" (u32));
5023# else
5024 __asm
5025 {
5026 mov edx, [u32]
5027 bsf eax, edx
5028 jnz done
5029 mov eax, 0ffffffffh
5030 done:
5031 mov [iBit], eax
5032 }
5033# endif
5034 if (iBit >= 0)
5035 return iBit + iBitPrev;
5036# endif
5037
5038 /*
5039 * Skip ahead and see if there is anything left to search.
5040 */
5041 iBitPrev |= 31;
5042 iBitPrev++;
5043 if (cBits <= (uint32_t)iBitPrev)
5044 return -1;
5045 }
5046
5047 /*
5048 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5049 */
5050 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5051 if (iBit >= 0)
5052 iBit += iBitPrev;
5053 return iBit;
5054}
5055#endif
5056
5057
5058/**
5059 * Finds the first set bit in a bitmap.
5060 *
5061 * @returns Index of the first set bit.
5062 * @returns -1 if no clear bit was found.
5063 * @param pvBitmap Pointer to the bitmap.
5064 * @param cBits The number of bits in the bitmap. Multiple of 32.
5065 */
5066#if RT_INLINE_ASM_EXTERNAL
5067DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
5068#else
5069DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
5070{
5071 if (cBits)
5072 {
5073 int32_t iBit;
5074# if RT_INLINE_ASM_GNU_STYLE
5075 RTCCUINTREG uEAX, uECX, uEDI;
5076 cBits = RT_ALIGN_32(cBits, 32);
5077 __asm__ __volatile__("repe; scasl\n\t"
5078 "je 1f\n\t"
5079# ifdef RT_ARCH_AMD64
5080 "lea -4(%%rdi), %%rdi\n\t"
5081 "movl (%%rdi), %%eax\n\t"
5082 "subq %5, %%rdi\n\t"
5083# else
5084 "lea -4(%%edi), %%edi\n\t"
5085 "movl (%%edi), %%eax\n\t"
5086 "subl %5, %%edi\n\t"
5087# endif
5088 "shll $3, %%edi\n\t"
5089 "bsfl %%eax, %%edx\n\t"
5090 "addl %%edi, %%edx\n\t"
5091 "1:\t\n"
5092 : "=d" (iBit),
5093 "=&c" (uECX),
5094 "=&D" (uEDI),
5095 "=&a" (uEAX)
5096 : "0" (0xffffffff),
5097 "mr" (pvBitmap),
5098 "1" (cBits >> 5),
5099 "2" (pvBitmap),
5100 "3" (0));
5101# else
5102 cBits = RT_ALIGN_32(cBits, 32);
5103 __asm
5104 {
5105# ifdef RT_ARCH_AMD64
5106 mov rdi, [pvBitmap]
5107 mov rbx, rdi
5108# else
5109 mov edi, [pvBitmap]
5110 mov ebx, edi
5111# endif
5112 mov edx, 0ffffffffh
5113 xor eax, eax
5114 mov ecx, [cBits]
5115 shr ecx, 5
5116 repe scasd
5117 je done
5118# ifdef RT_ARCH_AMD64
5119 lea rdi, [rdi - 4]
5120 mov eax, [rdi]
5121 sub rdi, rbx
5122# else
5123 lea edi, [edi - 4]
5124 mov eax, [edi]
5125 sub edi, ebx
5126# endif
5127 shl edi, 3
5128 bsf edx, eax
5129 add edx, edi
5130 done:
5131 mov [iBit], edx
5132 }
5133# endif
5134 return iBit;
5135 }
5136 return -1;
5137}
5138#endif
5139
5140
5141/**
5142 * Finds the next set bit in a bitmap.
5143 *
5144 * @returns Index of the next set bit.
5145 * @returns -1 if no set bit was found.
5146 * @param pvBitmap Pointer to the bitmap.
5147 * @param cBits The number of bits in the bitmap. Multiple of 32.
5148 * @param iBitPrev The bit returned from the last search.
5149 * The search will start at iBitPrev + 1.
5150 */
5151#if RT_INLINE_ASM_EXTERNAL
5152DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5153#else
5154DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5155{
5156 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5157 int iBit = ++iBitPrev & 31;
5158 if (iBit)
5159 {
5160 /*
5161 * Inspect the 32-bit word containing the unaligned bit.
5162 */
5163 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5164
5165# if RT_INLINE_ASM_USES_INTRIN
5166 unsigned long ulBit = 0;
5167 if (_BitScanForward(&ulBit, u32))
5168 return ulBit + iBitPrev;
5169# else
5170# if RT_INLINE_ASM_GNU_STYLE
5171 __asm__ __volatile__("bsf %1, %0\n\t"
5172 "jnz 1f\n\t"
5173 "movl $-1, %0\n\t"
5174 "1:\n\t"
5175 : "=r" (iBit)
5176 : "r" (u32));
5177# else
5178 __asm
5179 {
5180 mov edx, [u32]
5181 bsf eax, edx
5182 jnz done
5183 mov eax, 0ffffffffh
5184 done:
5185 mov [iBit], eax
5186 }
5187# endif
5188 if (iBit >= 0)
5189 return iBit + iBitPrev;
5190# endif
5191
5192 /*
5193 * Skip ahead and see if there is anything left to search.
5194 */
5195 iBitPrev |= 31;
5196 iBitPrev++;
5197 if (cBits <= (uint32_t)iBitPrev)
5198 return -1;
5199 }
5200
5201 /*
5202 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5203 */
5204 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5205 if (iBit >= 0)
5206 iBit += iBitPrev;
5207 return iBit;
5208}
5209#endif
5210
5211
5212/**
5213 * Finds the first bit which is set in the given 32-bit integer.
5214 * Bits are numbered from 1 (least significant) to 32.
5215 *
5216 * @returns index [1..32] of the first set bit.
5217 * @returns 0 if all bits are cleared.
5218 * @param u32 Integer to search for set bits.
5219 * @remarks Similar to ffs() in BSD.
5220 */
5221#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5222RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32);
5223#else
5224DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5225{
5226# if RT_INLINE_ASM_USES_INTRIN
5227 unsigned long iBit;
5228 if (_BitScanForward(&iBit, u32))
5229 iBit++;
5230 else
5231 iBit = 0;
5232# elif RT_INLINE_ASM_GNU_STYLE
5233 uint32_t iBit;
5234 __asm__ __volatile__("bsf %1, %0\n\t"
5235 "jnz 1f\n\t"
5236 "xorl %0, %0\n\t"
5237 "jmp 2f\n"
5238 "1:\n\t"
5239 "incl %0\n"
5240 "2:\n\t"
5241 : "=r" (iBit)
5242 : "rm" (u32));
5243# else
5244 uint32_t iBit;
5245 _asm
5246 {
5247 bsf eax, [u32]
5248 jnz found
5249 xor eax, eax
5250 jmp done
5251 found:
5252 inc eax
5253 done:
5254 mov [iBit], eax
5255 }
5256# endif
5257 return iBit;
5258}
5259#endif
5260
5261
5262/**
5263 * Finds the first bit which is set in the given 32-bit integer.
5264 * Bits are numbered from 1 (least significant) to 32.
5265 *
5266 * @returns index [1..32] of the first set bit.
5267 * @returns 0 if all bits are cleared.
5268 * @param i32 Integer to search for set bits.
5269 * @remark Similar to ffs() in BSD.
5270 */
5271DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5272{
5273 return ASMBitFirstSetU32((uint32_t)i32);
5274}
5275
5276
5277/**
5278 * Finds the first bit which is set in the given 64-bit integer.
5279 *
5280 * Bits are numbered from 1 (least significant) to 64.
5281 *
5282 * @returns index [1..64] of the first set bit.
5283 * @returns 0 if all bits are cleared.
5284 * @param u64 Integer to search for set bits.
5285 * @remarks Similar to ffs() in BSD.
5286 */
5287#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5288RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64);
5289#else
5290DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5291{
5292# if RT_INLINE_ASM_USES_INTRIN
5293 unsigned long iBit;
5294# if ARCH_BITS == 64
5295 if (_BitScanForward64(&iBit, u64))
5296 iBit++;
5297 else
5298 iBit = 0;
5299# else
5300 if (_BitScanForward(&iBit, (uint32_t)u64))
5301 iBit++;
5302 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5303 iBit += 33;
5304 else
5305 iBit = 0;
5306# endif
5307# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5308 uint64_t iBit;
5309 __asm__ __volatile__("bsfq %1, %0\n\t"
5310 "jnz 1f\n\t"
5311 "xorl %k0, %k0\n\t"
5312 "jmp 2f\n"
5313 "1:\n\t"
5314 "incl %k0\n"
5315 "2:\n\t"
5316 : "=r" (iBit)
5317 : "rm" (u64));
5318# else
5319 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5320 if (!iBit)
5321 {
5322 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5323 if (iBit)
5324 iBit += 32;
5325 }
5326# endif
5327 return (unsigned)iBit;
5328}
5329#endif
5330
5331
5332/**
5333 * Finds the first bit which is set in the given 16-bit integer.
5334 *
5335 * Bits are numbered from 1 (least significant) to 16.
5336 *
5337 * @returns index [1..16] of the first set bit.
5338 * @returns 0 if all bits are cleared.
5339 * @param u16 Integer to search for set bits.
5340 * @remarks For 16-bit bs3kit code.
5341 */
5342#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5343RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16);
5344#else
5345DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5346{
5347 return ASMBitFirstSetU32((uint32_t)u16);
5348}
5349#endif
5350
5351
5352/**
5353 * Finds the last bit which is set in the given 32-bit integer.
5354 * Bits are numbered from 1 (least significant) to 32.
5355 *
5356 * @returns index [1..32] of the last set bit.
5357 * @returns 0 if all bits are cleared.
5358 * @param u32 Integer to search for set bits.
5359 * @remark Similar to fls() in BSD.
5360 */
5361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5362RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32);
5363#else
5364DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5365{
5366# if RT_INLINE_ASM_USES_INTRIN
5367 unsigned long iBit;
5368 if (_BitScanReverse(&iBit, u32))
5369 iBit++;
5370 else
5371 iBit = 0;
5372# elif RT_INLINE_ASM_GNU_STYLE
5373 uint32_t iBit;
5374 __asm__ __volatile__("bsrl %1, %0\n\t"
5375 "jnz 1f\n\t"
5376 "xorl %0, %0\n\t"
5377 "jmp 2f\n"
5378 "1:\n\t"
5379 "incl %0\n"
5380 "2:\n\t"
5381 : "=r" (iBit)
5382 : "rm" (u32));
5383# else
5384 uint32_t iBit;
5385 _asm
5386 {
5387 bsr eax, [u32]
5388 jnz found
5389 xor eax, eax
5390 jmp done
5391 found:
5392 inc eax
5393 done:
5394 mov [iBit], eax
5395 }
5396# endif
5397 return iBit;
5398}
5399#endif
5400
5401
5402/**
5403 * Finds the last bit which is set in the given 32-bit integer.
5404 * Bits are numbered from 1 (least significant) to 32.
5405 *
5406 * @returns index [1..32] of the last set bit.
5407 * @returns 0 if all bits are cleared.
5408 * @param i32 Integer to search for set bits.
5409 * @remark Similar to fls() in BSD.
5410 */
5411DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5412{
5413 return ASMBitLastSetU32((uint32_t)i32);
5414}
5415
5416
5417/**
5418 * Finds the last bit which is set in the given 64-bit integer.
5419 *
5420 * Bits are numbered from 1 (least significant) to 64.
5421 *
5422 * @returns index [1..64] of the last set bit.
5423 * @returns 0 if all bits are cleared.
5424 * @param u64 Integer to search for set bits.
5425 * @remark Similar to fls() in BSD.
5426 */
5427#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5428RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64);
5429#else
5430DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5431{
5432# if RT_INLINE_ASM_USES_INTRIN
5433 unsigned long iBit;
5434# if ARCH_BITS == 64
5435 if (_BitScanReverse64(&iBit, u64))
5436 iBit++;
5437 else
5438 iBit = 0;
5439# else
5440 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5441 iBit += 33;
5442 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5443 iBit++;
5444 else
5445 iBit = 0;
5446# endif
5447# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5448 uint64_t iBit;
5449 __asm__ __volatile__("bsrq %1, %0\n\t"
5450 "jnz 1f\n\t"
5451 "xorl %k0, %k0\n\t"
5452 "jmp 2f\n"
5453 "1:\n\t"
5454 "incl %k0\n"
5455 "2:\n\t"
5456 : "=r" (iBit)
5457 : "rm" (u64));
5458# else
5459 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5460 if (iBit)
5461 iBit += 32;
5462 else
5463 iBit = ASMBitLastSetU32((uint32_t)u64);
5464#endif
5465 return (unsigned)iBit;
5466}
5467#endif
5468
5469
5470/**
5471 * Finds the last bit which is set in the given 16-bit integer.
5472 *
5473 * Bits are numbered from 1 (least significant) to 16.
5474 *
5475 * @returns index [1..16] of the last set bit.
5476 * @returns 0 if all bits are cleared.
5477 * @param u16 Integer to search for set bits.
5478 * @remarks For 16-bit bs3kit code.
5479 */
5480#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5481RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16);
5482#else
5483DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5484{
5485 return ASMBitLastSetU32((uint32_t)u16);
5486}
5487#endif
5488
5489
5490/**
5491 * Reverse the byte order of the given 16-bit integer.
5492 *
5493 * @returns Revert
5494 * @param u16 16-bit integer value.
5495 */
5496#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5497RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16);
5498#else
5499DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5500{
5501# if RT_INLINE_ASM_USES_INTRIN
5502 u16 = _byteswap_ushort(u16);
5503# elif RT_INLINE_ASM_GNU_STYLE
5504 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5505# else
5506 _asm
5507 {
5508 mov ax, [u16]
5509 ror ax, 8
5510 mov [u16], ax
5511 }
5512# endif
5513 return u16;
5514}
5515#endif
5516
5517
5518/**
5519 * Reverse the byte order of the given 32-bit integer.
5520 *
5521 * @returns Revert
5522 * @param u32 32-bit integer value.
5523 */
5524#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5525RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32);
5526#else
5527DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5528{
5529# if RT_INLINE_ASM_USES_INTRIN
5530 u32 = _byteswap_ulong(u32);
5531# elif RT_INLINE_ASM_GNU_STYLE
5532 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5533# else
5534 _asm
5535 {
5536 mov eax, [u32]
5537 bswap eax
5538 mov [u32], eax
5539 }
5540# endif
5541 return u32;
5542}
5543#endif
5544
5545
5546/**
5547 * Reverse the byte order of the given 64-bit integer.
5548 *
5549 * @returns Revert
5550 * @param u64 64-bit integer value.
5551 */
5552DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5553{
5554#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5555 u64 = _byteswap_uint64(u64);
5556#else
5557 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5558 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5559#endif
5560 return u64;
5561}
5562
5563
5564/**
5565 * Rotate 32-bit unsigned value to the left by @a cShift.
5566 *
5567 * @returns Rotated value.
5568 * @param u32 The value to rotate.
5569 * @param cShift How many bits to rotate by.
5570 */
5571#ifdef __WATCOMC__
5572RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5573#else
5574DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5575{
5576# if RT_INLINE_ASM_USES_INTRIN
5577 return _rotl(u32, cShift);
5578# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5579 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5580 return u32;
5581# else
5582 cShift &= 31;
5583 return (u32 << cShift) | (u32 >> (32 - cShift));
5584# endif
5585}
5586#endif
5587
5588
5589/**
5590 * Rotate 32-bit unsigned value to the right by @a cShift.
5591 *
5592 * @returns Rotated value.
5593 * @param u32 The value to rotate.
5594 * @param cShift How many bits to rotate by.
5595 */
5596#ifdef __WATCOMC__
5597RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5598#else
5599DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5600{
5601# if RT_INLINE_ASM_USES_INTRIN
5602 return _rotr(u32, cShift);
5603# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5604 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5605 return u32;
5606# else
5607 cShift &= 31;
5608 return (u32 >> cShift) | (u32 << (32 - cShift));
5609# endif
5610}
5611#endif
5612
5613
5614/**
5615 * Rotate 64-bit unsigned value to the left by @a cShift.
5616 *
5617 * @returns Rotated value.
5618 * @param u64 The value to rotate.
5619 * @param cShift How many bits to rotate by.
5620 */
5621DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5622{
5623#if RT_INLINE_ASM_USES_INTRIN
5624 return _rotl64(u64, cShift);
5625#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5626 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5627 return u64;
5628#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5629 uint32_t uSpill;
5630 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5631 "jz 1f\n\t"
5632 "xchgl %%eax, %%edx\n\t"
5633 "1:\n\t"
5634 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5635 "jz 2f\n\t"
5636 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5637 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5638 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5639 "2:\n\t" /* } */
5640 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5641 : "0" (u64),
5642 "1" (cShift));
5643 return u64;
5644#else
5645 cShift &= 63;
5646 return (u64 << cShift) | (u64 >> (64 - cShift));
5647#endif
5648}
5649
5650
5651/**
5652 * Rotate 64-bit unsigned value to the right by @a cShift.
5653 *
5654 * @returns Rotated value.
5655 * @param u64 The value to rotate.
5656 * @param cShift How many bits to rotate by.
5657 */
5658DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5659{
5660#if RT_INLINE_ASM_USES_INTRIN
5661 return _rotr64(u64, cShift);
5662#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5663 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5664 return u64;
5665#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5666 uint32_t uSpill;
5667 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5668 "jz 1f\n\t"
5669 "xchgl %%eax, %%edx\n\t"
5670 "1:\n\t"
5671 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5672 "jz 2f\n\t"
5673 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5674 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5675 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5676 "2:\n\t" /* } */
5677 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5678 : "0" (u64),
5679 "1" (cShift));
5680 return u64;
5681#else
5682 cShift &= 63;
5683 return (u64 >> cShift) | (u64 << (64 - cShift));
5684#endif
5685}
5686
5687/** @} */
5688
5689
5690/** @} */
5691
5692/*
5693 * Include #pragma aux definitions for Watcom C/C++.
5694 */
5695#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
5696# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
5697# undef IPRT_INCLUDED_asm_watcom_x86_16_h
5698# include "asm-watcom-x86-16.h"
5699#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
5700# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
5701# undef IPRT_INCLUDED_asm_watcom_x86_32_h
5702# include "asm-watcom-x86-32.h"
5703#endif
5704
5705#endif /* !IPRT_INCLUDED_asm_h */
5706
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette