VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 83941

Last change on this file since 83941 was 83782, checked in by vboxsync, 5 years ago

iprt/win/msvc_intrin.h -> iprt/sanitized/intrin.h bugref:8489

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 165.4 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2020 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46/* Emit the intrinsics at all optimization levels. */
47# include <iprt/sanitized/intrin.h>
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(_BitScanForward)
54# pragma intrinsic(_BitScanReverse)
55# pragma intrinsic(_bittest)
56# pragma intrinsic(_bittestandset)
57# pragma intrinsic(_bittestandreset)
58# pragma intrinsic(_bittestandcomplement)
59# pragma intrinsic(_byteswap_ushort)
60# pragma intrinsic(_byteswap_ulong)
61# pragma intrinsic(_interlockedbittestandset)
62# pragma intrinsic(_interlockedbittestandreset)
63# pragma intrinsic(_InterlockedAnd)
64# pragma intrinsic(_InterlockedOr)
65# pragma intrinsic(_InterlockedIncrement)
66# pragma intrinsic(_InterlockedDecrement)
67# pragma intrinsic(_InterlockedExchange)
68# pragma intrinsic(_InterlockedExchangeAdd)
69# pragma intrinsic(_InterlockedCompareExchange)
70# pragma intrinsic(_InterlockedCompareExchange64)
71# pragma intrinsic(_rotl)
72# pragma intrinsic(_rotr)
73# pragma intrinsic(_rotl64)
74# pragma intrinsic(_rotr64)
75# ifdef RT_ARCH_AMD64
76# pragma intrinsic(__stosq)
77# pragma intrinsic(_byteswap_uint64)
78# pragma intrinsic(_InterlockedExchange64)
79# pragma intrinsic(_InterlockedExchangeAdd64)
80# pragma intrinsic(_InterlockedAnd64)
81# pragma intrinsic(_InterlockedOr64)
82# pragma intrinsic(_InterlockedIncrement64)
83# pragma intrinsic(_InterlockedDecrement64)
84# endif
85#endif
86
87/*
88 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
89 */
90#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
91# include "asm-watcom-x86-16.h"
92#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
93# include "asm-watcom-x86-32.h"
94#endif
95
96
97/** @defgroup grp_rt_asm ASM - Assembly Routines
98 * @ingroup grp_rt
99 *
100 * @remarks The difference between ordered and unordered atomic operations are that
101 * the former will complete outstanding reads and writes before continuing
102 * while the latter doesn't make any promises about the order. Ordered
103 * operations doesn't, it seems, make any 100% promise wrt to whether
104 * the operation will complete before any subsequent memory access.
105 * (please, correct if wrong.)
106 *
107 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
108 * are unordered (note the Uo).
109 *
110 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
111 * or even optimize assembler instructions away. For instance, in the following code
112 * the second rdmsr instruction is optimized away because gcc treats that instruction
113 * as deterministic:
114 *
115 * @code
116 * static inline uint64_t rdmsr_low(int idx)
117 * {
118 * uint32_t low;
119 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
120 * }
121 * ...
122 * uint32_t msr1 = rdmsr_low(1);
123 * foo(msr1);
124 * msr1 = rdmsr_low(1);
125 * bar(msr1);
126 * @endcode
127 *
128 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
129 * use the result of the first call as input parameter for bar() as well. For rdmsr this
130 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
131 * machine status information in general.
132 *
133 * @{
134 */
135
136
137/** @def RT_INLINE_ASM_GCC_4_3_X_X86
138 * Used to work around some 4.3.x register allocation issues in this version of
139 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
140 * definitely not for 5.x */
141#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
142# define RT_INLINE_ASM_GCC_4_3_X_X86 1
143#else
144# define RT_INLINE_ASM_GCC_4_3_X_X86 0
145#endif
146
147/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
148 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
149 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
150 * mode, x86.
151 *
152 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
153 * when in PIC mode on x86.
154 */
155#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
156# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
157# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
158# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
159# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
160# elif ( (defined(PIC) || defined(__PIC__)) \
161 && defined(RT_ARCH_X86) \
162 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
163 || defined(RT_OS_DARWIN)) )
164# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
165# else
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
167# endif
168#endif
169
170
171/** @def ASMReturnAddress
172 * Gets the return address of the current (or calling if you like) function or method.
173 */
174#ifdef _MSC_VER
175# ifdef __cplusplus
176extern "C"
177# endif
178void * _ReturnAddress(void);
179# pragma intrinsic(_ReturnAddress)
180# define ASMReturnAddress() _ReturnAddress()
181#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
182# define ASMReturnAddress() __builtin_return_address(0)
183#elif defined(__WATCOMC__)
184# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
185#else
186# error "Unsupported compiler."
187#endif
188
189
190/**
191 * Compiler memory barrier.
192 *
193 * Ensure that the compiler does not use any cached (register/tmp stack) memory
194 * values or any outstanding writes when returning from this function.
195 *
196 * This function must be used if non-volatile data is modified by a
197 * device or the VMM. Typical cases are port access, MMIO access,
198 * trapping instruction, etc.
199 */
200#if RT_INLINE_ASM_GNU_STYLE
201# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
202#elif RT_INLINE_ASM_USES_INTRIN
203# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
204#elif defined(__WATCOMC__)
205void ASMCompilerBarrier(void);
206#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
207DECLINLINE(void) ASMCompilerBarrier(void)
208{
209 __asm
210 {
211 }
212}
213#endif
214
215
216/** @def ASMBreakpoint
217 * Debugger Breakpoint.
218 * @deprecated Use RT_BREAKPOINT instead.
219 * @internal
220 */
221#define ASMBreakpoint() RT_BREAKPOINT()
222
223
224/**
225 * Spinloop hint for platforms that have these, empty function on the other
226 * platforms.
227 *
228 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
229 * spin locks.
230 */
231#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
232RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void);
233#else
234DECLINLINE(void) ASMNopPause(void)
235{
236# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
237# if RT_INLINE_ASM_GNU_STYLE
238 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
239# else
240 __asm {
241 _emit 0f3h
242 _emit 090h
243 }
244# endif
245# else
246 /* dummy */
247# endif
248}
249#endif
250
251
252/**
253 * Atomically Exchange an unsigned 8-bit value, ordered.
254 *
255 * @returns Current *pu8 value
256 * @param pu8 Pointer to the 8-bit variable to update.
257 * @param u8 The 8-bit value to assign to *pu8.
258 */
259#if RT_INLINE_ASM_EXTERNAL
260RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8);
261#else
262DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
263{
264# if RT_INLINE_ASM_GNU_STYLE
265 __asm__ __volatile__("xchgb %0, %1\n\t"
266 : "=m" (*pu8),
267 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
268 : "1" (u8),
269 "m" (*pu8));
270# else
271 __asm
272 {
273# ifdef RT_ARCH_AMD64
274 mov rdx, [pu8]
275 mov al, [u8]
276 xchg [rdx], al
277 mov [u8], al
278# else
279 mov edx, [pu8]
280 mov al, [u8]
281 xchg [edx], al
282 mov [u8], al
283# endif
284 }
285# endif
286 return u8;
287}
288#endif
289
290
291/**
292 * Atomically Exchange a signed 8-bit value, ordered.
293 *
294 * @returns Current *pu8 value
295 * @param pi8 Pointer to the 8-bit variable to update.
296 * @param i8 The 8-bit value to assign to *pi8.
297 */
298DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8)
299{
300 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
301}
302
303
304/**
305 * Atomically Exchange a bool value, ordered.
306 *
307 * @returns Current *pf value
308 * @param pf Pointer to the 8-bit variable to update.
309 * @param f The 8-bit value to assign to *pi8.
310 */
311DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f)
312{
313#ifdef _MSC_VER
314 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
315#else
316 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
317#endif
318}
319
320
321/**
322 * Atomically Exchange an unsigned 16-bit value, ordered.
323 *
324 * @returns Current *pu16 value
325 * @param pu16 Pointer to the 16-bit variable to update.
326 * @param u16 The 16-bit value to assign to *pu16.
327 */
328#if RT_INLINE_ASM_EXTERNAL
329RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16);
330#else
331DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
332{
333# if RT_INLINE_ASM_GNU_STYLE
334 __asm__ __volatile__("xchgw %0, %1\n\t"
335 : "=m" (*pu16),
336 "=r" (u16)
337 : "1" (u16),
338 "m" (*pu16));
339# else
340 __asm
341 {
342# ifdef RT_ARCH_AMD64
343 mov rdx, [pu16]
344 mov ax, [u16]
345 xchg [rdx], ax
346 mov [u16], ax
347# else
348 mov edx, [pu16]
349 mov ax, [u16]
350 xchg [edx], ax
351 mov [u16], ax
352# endif
353 }
354# endif
355 return u16;
356}
357#endif
358
359
360/**
361 * Atomically Exchange a signed 16-bit value, ordered.
362 *
363 * @returns Current *pu16 value
364 * @param pi16 Pointer to the 16-bit variable to update.
365 * @param i16 The 16-bit value to assign to *pi16.
366 */
367DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16)
368{
369 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
370}
371
372
373/**
374 * Atomically Exchange an unsigned 32-bit value, ordered.
375 *
376 * @returns Current *pu32 value
377 * @param pu32 Pointer to the 32-bit variable to update.
378 * @param u32 The 32-bit value to assign to *pu32.
379 *
380 * @remarks Does not work on 286 and earlier.
381 */
382#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
383RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32);
384#else
385DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
386{
387# if RT_INLINE_ASM_GNU_STYLE
388 __asm__ __volatile__("xchgl %0, %1\n\t"
389 : "=m" (*pu32),
390 "=r" (u32)
391 : "1" (u32),
392 "m" (*pu32));
393
394# elif RT_INLINE_ASM_USES_INTRIN
395 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
396
397# else
398 __asm
399 {
400# ifdef RT_ARCH_AMD64
401 mov rdx, [pu32]
402 mov eax, u32
403 xchg [rdx], eax
404 mov [u32], eax
405# else
406 mov edx, [pu32]
407 mov eax, u32
408 xchg [edx], eax
409 mov [u32], eax
410# endif
411 }
412# endif
413 return u32;
414}
415#endif
416
417
418/**
419 * Atomically Exchange a signed 32-bit value, ordered.
420 *
421 * @returns Current *pu32 value
422 * @param pi32 Pointer to the 32-bit variable to update.
423 * @param i32 The 32-bit value to assign to *pi32.
424 */
425DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32)
426{
427 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
428}
429
430
431/**
432 * Atomically Exchange an unsigned 64-bit value, ordered.
433 *
434 * @returns Current *pu64 value
435 * @param pu64 Pointer to the 64-bit variable to update.
436 * @param u64 The 64-bit value to assign to *pu64.
437 *
438 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
439 */
440#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
441 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
442RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64);
443#else
444DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
445{
446# if defined(RT_ARCH_AMD64)
447# if RT_INLINE_ASM_USES_INTRIN
448 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
449
450# elif RT_INLINE_ASM_GNU_STYLE
451 __asm__ __volatile__("xchgq %0, %1\n\t"
452 : "=m" (*pu64),
453 "=r" (u64)
454 : "1" (u64),
455 "m" (*pu64));
456# else
457 __asm
458 {
459 mov rdx, [pu64]
460 mov rax, [u64]
461 xchg [rdx], rax
462 mov [u64], rax
463 }
464# endif
465# else /* !RT_ARCH_AMD64 */
466# if RT_INLINE_ASM_GNU_STYLE
467# if defined(PIC) || defined(__PIC__)
468 uint32_t u32EBX = (uint32_t)u64;
469 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
470 "xchgl %%ebx, %3\n\t"
471 "1:\n\t"
472 "lock; cmpxchg8b (%5)\n\t"
473 "jnz 1b\n\t"
474 "movl %3, %%ebx\n\t"
475 /*"xchgl %%esi, %5\n\t"*/
476 : "=A" (u64),
477 "=m" (*pu64)
478 : "0" (*pu64),
479 "m" ( u32EBX ),
480 "c" ( (uint32_t)(u64 >> 32) ),
481 "S" (pu64));
482# else /* !PIC */
483 __asm__ __volatile__("1:\n\t"
484 "lock; cmpxchg8b %1\n\t"
485 "jnz 1b\n\t"
486 : "=A" (u64),
487 "=m" (*pu64)
488 : "0" (*pu64),
489 "b" ( (uint32_t)u64 ),
490 "c" ( (uint32_t)(u64 >> 32) ));
491# endif
492# else
493 __asm
494 {
495 mov ebx, dword ptr [u64]
496 mov ecx, dword ptr [u64 + 4]
497 mov edi, pu64
498 mov eax, dword ptr [edi]
499 mov edx, dword ptr [edi + 4]
500 retry:
501 lock cmpxchg8b [edi]
502 jnz retry
503 mov dword ptr [u64], eax
504 mov dword ptr [u64 + 4], edx
505 }
506# endif
507# endif /* !RT_ARCH_AMD64 */
508 return u64;
509}
510#endif
511
512
513/**
514 * Atomically Exchange an signed 64-bit value, ordered.
515 *
516 * @returns Current *pi64 value
517 * @param pi64 Pointer to the 64-bit variable to update.
518 * @param i64 The 64-bit value to assign to *pi64.
519 */
520DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64)
521{
522 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
523}
524
525
526/**
527 * Atomically Exchange a size_t value, ordered.
528 *
529 * @returns Current *ppv value
530 * @param puDst Pointer to the size_t variable to update.
531 * @param uNew The new value to assign to *puDst.
532 */
533DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew)
534{
535#if ARCH_BITS == 16
536 AssertCompile(sizeof(size_t) == 2);
537 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
538#elif ARCH_BITS == 32
539 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
540#elif ARCH_BITS == 64
541 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
542#else
543# error "ARCH_BITS is bogus"
544#endif
545}
546
547
548/**
549 * Atomically Exchange a pointer value, ordered.
550 *
551 * @returns Current *ppv value
552 * @param ppv Pointer to the pointer variable to update.
553 * @param pv The pointer value to assign to *ppv.
554 */
555DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv)
556{
557#if ARCH_BITS == 32 || ARCH_BITS == 16
558 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
559#elif ARCH_BITS == 64
560 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
561#else
562# error "ARCH_BITS is bogus"
563#endif
564}
565
566
567/**
568 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
569 *
570 * @returns Current *pv value
571 * @param ppv Pointer to the pointer variable to update.
572 * @param pv The pointer value to assign to *ppv.
573 * @param Type The type of *ppv, sans volatile.
574 */
575#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
576# define ASMAtomicXchgPtrT(ppv, pv, Type) \
577 __extension__ \
578 ({\
579 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
580 Type const pvTypeChecked = (pv); \
581 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
582 pvTypeCheckedRet; \
583 })
584#else
585# define ASMAtomicXchgPtrT(ppv, pv, Type) \
586 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
587#endif
588
589
590/**
591 * Atomically Exchange a raw-mode context pointer value, ordered.
592 *
593 * @returns Current *ppv value
594 * @param ppvRC Pointer to the pointer variable to update.
595 * @param pvRC The pointer value to assign to *ppv.
596 */
597DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC)
598{
599 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
600}
601
602
603/**
604 * Atomically Exchange a ring-0 pointer value, ordered.
605 *
606 * @returns Current *ppv value
607 * @param ppvR0 Pointer to the pointer variable to update.
608 * @param pvR0 The pointer value to assign to *ppv.
609 */
610DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0)
611{
612#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
613 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
614#elif R0_ARCH_BITS == 64
615 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
616#else
617# error "R0_ARCH_BITS is bogus"
618#endif
619}
620
621
622/**
623 * Atomically Exchange a ring-3 pointer value, ordered.
624 *
625 * @returns Current *ppv value
626 * @param ppvR3 Pointer to the pointer variable to update.
627 * @param pvR3 The pointer value to assign to *ppv.
628 */
629DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3)
630{
631#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
632 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
633#elif R3_ARCH_BITS == 64
634 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
635#else
636# error "R3_ARCH_BITS is bogus"
637#endif
638}
639
640
641/** @def ASMAtomicXchgHandle
642 * Atomically Exchange a typical IPRT handle value, ordered.
643 *
644 * @param ph Pointer to the value to update.
645 * @param hNew The new value to assigned to *pu.
646 * @param phRes Where to store the current *ph value.
647 *
648 * @remarks This doesn't currently work for all handles (like RTFILE).
649 */
650#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
651# define ASMAtomicXchgHandle(ph, hNew, phRes) \
652 do { \
653 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
654 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
655 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
656 } while (0)
657#elif HC_ARCH_BITS == 64
658# define ASMAtomicXchgHandle(ph, hNew, phRes) \
659 do { \
660 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
661 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
662 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
663 } while (0)
664#else
665# error HC_ARCH_BITS
666#endif
667
668
669/**
670 * Atomically Exchange a value which size might differ
671 * between platforms or compilers, ordered.
672 *
673 * @param pu Pointer to the variable to update.
674 * @param uNew The value to assign to *pu.
675 * @todo This is busted as its missing the result argument.
676 */
677#define ASMAtomicXchgSize(pu, uNew) \
678 do { \
679 switch (sizeof(*(pu))) { \
680 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
681 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
682 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
683 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
684 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
685 } \
686 } while (0)
687
688/**
689 * Atomically Exchange a value which size might differ
690 * between platforms or compilers, ordered.
691 *
692 * @param pu Pointer to the variable to update.
693 * @param uNew The value to assign to *pu.
694 * @param puRes Where to store the current *pu value.
695 */
696#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
697 do { \
698 switch (sizeof(*(pu))) { \
699 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
700 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
701 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
702 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
703 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
704 } \
705 } while (0)
706
707
708
709/**
710 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
711 *
712 * @returns true if xchg was done.
713 * @returns false if xchg wasn't done.
714 *
715 * @param pu8 Pointer to the value to update.
716 * @param u8New The new value to assigned to *pu8.
717 * @param u8Old The old value to *pu8 compare with.
718 *
719 * @remarks x86: Requires a 486 or later.
720 */
721#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
722RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old);
723#else
724DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old)
725{
726 uint8_t u8Ret;
727 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
728 "setz %1\n\t"
729 : "=m" (*pu8),
730 "=qm" (u8Ret),
731 "=a" (u8Old)
732 : "q" (u8New),
733 "2" (u8Old),
734 "m" (*pu8));
735 return (bool)u8Ret;
736}
737#endif
738
739
740/**
741 * Atomically Compare and Exchange a signed 8-bit value, ordered.
742 *
743 * @returns true if xchg was done.
744 * @returns false if xchg wasn't done.
745 *
746 * @param pi8 Pointer to the value to update.
747 * @param i8New The new value to assigned to *pi8.
748 * @param i8Old The old value to *pi8 compare with.
749 *
750 * @remarks x86: Requires a 486 or later.
751 */
752DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old)
753{
754 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
755}
756
757
758/**
759 * Atomically Compare and Exchange a bool value, ordered.
760 *
761 * @returns true if xchg was done.
762 * @returns false if xchg wasn't done.
763 *
764 * @param pf Pointer to the value to update.
765 * @param fNew The new value to assigned to *pf.
766 * @param fOld The old value to *pf compare with.
767 *
768 * @remarks x86: Requires a 486 or later.
769 */
770DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld)
771{
772 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
773}
774
775
776/**
777 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
778 *
779 * @returns true if xchg was done.
780 * @returns false if xchg wasn't done.
781 *
782 * @param pu32 Pointer to the value to update.
783 * @param u32New The new value to assigned to *pu32.
784 * @param u32Old The old value to *pu32 compare with.
785 *
786 * @remarks x86: Requires a 486 or later.
787 */
788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
789RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old);
790#else
791DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old)
792{
793# if RT_INLINE_ASM_GNU_STYLE
794 uint8_t u8Ret;
795 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
796 "setz %1\n\t"
797 : "=m" (*pu32),
798 "=qm" (u8Ret),
799 "=a" (u32Old)
800 : "r" (u32New),
801 "2" (u32Old),
802 "m" (*pu32));
803 return (bool)u8Ret;
804
805# elif RT_INLINE_ASM_USES_INTRIN
806 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
807
808# else
809 uint32_t u32Ret;
810 __asm
811 {
812# ifdef RT_ARCH_AMD64
813 mov rdx, [pu32]
814# else
815 mov edx, [pu32]
816# endif
817 mov eax, [u32Old]
818 mov ecx, [u32New]
819# ifdef RT_ARCH_AMD64
820 lock cmpxchg [rdx], ecx
821# else
822 lock cmpxchg [edx], ecx
823# endif
824 setz al
825 movzx eax, al
826 mov [u32Ret], eax
827 }
828 return !!u32Ret;
829# endif
830}
831#endif
832
833
834/**
835 * Atomically Compare and Exchange a signed 32-bit value, ordered.
836 *
837 * @returns true if xchg was done.
838 * @returns false if xchg wasn't done.
839 *
840 * @param pi32 Pointer to the value to update.
841 * @param i32New The new value to assigned to *pi32.
842 * @param i32Old The old value to *pi32 compare with.
843 *
844 * @remarks x86: Requires a 486 or later.
845 */
846DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old)
847{
848 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
849}
850
851
852/**
853 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
854 *
855 * @returns true if xchg was done.
856 * @returns false if xchg wasn't done.
857 *
858 * @param pu64 Pointer to the 64-bit variable to update.
859 * @param u64New The 64-bit value to assign to *pu64.
860 * @param u64Old The value to compare with.
861 *
862 * @remarks x86: Requires a Pentium or later.
863 */
864#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
865 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
866RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old);
867#else
868DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old)
869{
870# if RT_INLINE_ASM_USES_INTRIN
871 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
872
873# elif defined(RT_ARCH_AMD64)
874# if RT_INLINE_ASM_GNU_STYLE
875 uint8_t u8Ret;
876 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
877 "setz %1\n\t"
878 : "=m" (*pu64),
879 "=qm" (u8Ret),
880 "=a" (u64Old)
881 : "r" (u64New),
882 "2" (u64Old),
883 "m" (*pu64));
884 return (bool)u8Ret;
885# else
886 bool fRet;
887 __asm
888 {
889 mov rdx, [pu32]
890 mov rax, [u64Old]
891 mov rcx, [u64New]
892 lock cmpxchg [rdx], rcx
893 setz al
894 mov [fRet], al
895 }
896 return fRet;
897# endif
898# else /* !RT_ARCH_AMD64 */
899 uint32_t u32Ret;
900# if RT_INLINE_ASM_GNU_STYLE
901# if defined(PIC) || defined(__PIC__)
902 uint32_t u32EBX = (uint32_t)u64New;
903 uint32_t u32Spill;
904 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
905 "lock; cmpxchg8b (%6)\n\t"
906 "setz %%al\n\t"
907 "movl %4, %%ebx\n\t"
908 "movzbl %%al, %%eax\n\t"
909 : "=a" (u32Ret),
910 "=d" (u32Spill),
911# if RT_GNUC_PREREQ(4, 3)
912 "+m" (*pu64)
913# else
914 "=m" (*pu64)
915# endif
916 : "A" (u64Old),
917 "m" ( u32EBX ),
918 "c" ( (uint32_t)(u64New >> 32) ),
919 "S" (pu64));
920# else /* !PIC */
921 uint32_t u32Spill;
922 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
923 "setz %%al\n\t"
924 "movzbl %%al, %%eax\n\t"
925 : "=a" (u32Ret),
926 "=d" (u32Spill),
927 "+m" (*pu64)
928 : "A" (u64Old),
929 "b" ( (uint32_t)u64New ),
930 "c" ( (uint32_t)(u64New >> 32) ));
931# endif
932 return (bool)u32Ret;
933# else
934 __asm
935 {
936 mov ebx, dword ptr [u64New]
937 mov ecx, dword ptr [u64New + 4]
938 mov edi, [pu64]
939 mov eax, dword ptr [u64Old]
940 mov edx, dword ptr [u64Old + 4]
941 lock cmpxchg8b [edi]
942 setz al
943 movzx eax, al
944 mov dword ptr [u32Ret], eax
945 }
946 return !!u32Ret;
947# endif
948# endif /* !RT_ARCH_AMD64 */
949}
950#endif
951
952
953/**
954 * Atomically Compare and exchange a signed 64-bit value, ordered.
955 *
956 * @returns true if xchg was done.
957 * @returns false if xchg wasn't done.
958 *
959 * @param pi64 Pointer to the 64-bit variable to update.
960 * @param i64 The 64-bit value to assign to *pu64.
961 * @param i64Old The value to compare with.
962 *
963 * @remarks x86: Requires a Pentium or later.
964 */
965DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old)
966{
967 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
968}
969
970
971/**
972 * Atomically Compare and Exchange a pointer value, ordered.
973 *
974 * @returns true if xchg was done.
975 * @returns false if xchg wasn't done.
976 *
977 * @param ppv Pointer to the value to update.
978 * @param pvNew The new value to assigned to *ppv.
979 * @param pvOld The old value to *ppv compare with.
980 *
981 * @remarks x86: Requires a 486 or later.
982 */
983DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld)
984{
985#if ARCH_BITS == 32 || ARCH_BITS == 16
986 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
987#elif ARCH_BITS == 64
988 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
989#else
990# error "ARCH_BITS is bogus"
991#endif
992}
993
994
995/**
996 * Atomically Compare and Exchange a pointer value, ordered.
997 *
998 * @returns true if xchg was done.
999 * @returns false if xchg wasn't done.
1000 *
1001 * @param ppv Pointer to the value to update.
1002 * @param pvNew The new value to assigned to *ppv.
1003 * @param pvOld The old value to *ppv compare with.
1004 *
1005 * @remarks This is relatively type safe on GCC platforms.
1006 * @remarks x86: Requires a 486 or later.
1007 */
1008#ifdef __GNUC__
1009# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1010 __extension__ \
1011 ({\
1012 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1013 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1014 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1015 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1016 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1017 fMacroRet; \
1018 })
1019#else
1020# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1021 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1022#endif
1023
1024
1025/** @def ASMAtomicCmpXchgHandle
1026 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1027 *
1028 * @param ph Pointer to the value to update.
1029 * @param hNew The new value to assigned to *pu.
1030 * @param hOld The old value to *pu compare with.
1031 * @param fRc Where to store the result.
1032 *
1033 * @remarks This doesn't currently work for all handles (like RTFILE).
1034 * @remarks x86: Requires a 486 or later.
1035 */
1036#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1037# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1038 do { \
1039 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1040 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1041 } while (0)
1042#elif HC_ARCH_BITS == 64
1043# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1044 do { \
1045 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1046 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1047 } while (0)
1048#else
1049# error HC_ARCH_BITS
1050#endif
1051
1052
1053/** @def ASMAtomicCmpXchgSize
1054 * Atomically Compare and Exchange a value which size might differ
1055 * between platforms or compilers, ordered.
1056 *
1057 * @param pu Pointer to the value to update.
1058 * @param uNew The new value to assigned to *pu.
1059 * @param uOld The old value to *pu compare with.
1060 * @param fRc Where to store the result.
1061 *
1062 * @remarks x86: Requires a 486 or later.
1063 */
1064#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1065 do { \
1066 switch (sizeof(*(pu))) { \
1067 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1068 break; \
1069 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1070 break; \
1071 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1072 (fRc) = false; \
1073 break; \
1074 } \
1075 } while (0)
1076
1077
1078/**
1079 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1080 * passes back old value, ordered.
1081 *
1082 * @returns true if xchg was done.
1083 * @returns false if xchg wasn't done.
1084 *
1085 * @param pu32 Pointer to the value to update.
1086 * @param u32New The new value to assigned to *pu32.
1087 * @param u32Old The old value to *pu32 compare with.
1088 * @param pu32Old Pointer store the old value at.
1089 *
1090 * @remarks x86: Requires a 486 or later.
1091 */
1092#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1093RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old);
1094#else
1095DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old)
1096{
1097# if RT_INLINE_ASM_GNU_STYLE
1098 uint8_t u8Ret;
1099 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1100 "setz %1\n\t"
1101 : "=m" (*pu32),
1102 "=qm" (u8Ret),
1103 "=a" (*pu32Old)
1104 : "r" (u32New),
1105 "a" (u32Old),
1106 "m" (*pu32));
1107 return (bool)u8Ret;
1108
1109# elif RT_INLINE_ASM_USES_INTRIN
1110 return (*pu32Old =_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1111
1112# else
1113 uint32_t u32Ret;
1114 __asm
1115 {
1116# ifdef RT_ARCH_AMD64
1117 mov rdx, [pu32]
1118# else
1119 mov edx, [pu32]
1120# endif
1121 mov eax, [u32Old]
1122 mov ecx, [u32New]
1123# ifdef RT_ARCH_AMD64
1124 lock cmpxchg [rdx], ecx
1125 mov rdx, [pu32Old]
1126 mov [rdx], eax
1127# else
1128 lock cmpxchg [edx], ecx
1129 mov edx, [pu32Old]
1130 mov [edx], eax
1131# endif
1132 setz al
1133 movzx eax, al
1134 mov [u32Ret], eax
1135 }
1136 return !!u32Ret;
1137# endif
1138}
1139#endif
1140
1141
1142/**
1143 * Atomically Compare and Exchange a signed 32-bit value, additionally
1144 * passes back old value, ordered.
1145 *
1146 * @returns true if xchg was done.
1147 * @returns false if xchg wasn't done.
1148 *
1149 * @param pi32 Pointer to the value to update.
1150 * @param i32New The new value to assigned to *pi32.
1151 * @param i32Old The old value to *pi32 compare with.
1152 * @param pi32Old Pointer store the old value at.
1153 *
1154 * @remarks x86: Requires a 486 or later.
1155 */
1156DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old)
1157{
1158 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1159}
1160
1161
1162/**
1163 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1164 * passing back old value, ordered.
1165 *
1166 * @returns true if xchg was done.
1167 * @returns false if xchg wasn't done.
1168 *
1169 * @param pu64 Pointer to the 64-bit variable to update.
1170 * @param u64New The 64-bit value to assign to *pu64.
1171 * @param u64Old The value to compare with.
1172 * @param pu64Old Pointer store the old value at.
1173 *
1174 * @remarks x86: Requires a Pentium or later.
1175 */
1176#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1177 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1178RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old);
1179#else
1180DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old)
1181{
1182# if RT_INLINE_ASM_USES_INTRIN
1183 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1184
1185# elif defined(RT_ARCH_AMD64)
1186# if RT_INLINE_ASM_GNU_STYLE
1187 uint8_t u8Ret;
1188 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1189 "setz %1\n\t"
1190 : "=m" (*pu64),
1191 "=qm" (u8Ret),
1192 "=a" (*pu64Old)
1193 : "r" (u64New),
1194 "a" (u64Old),
1195 "m" (*pu64));
1196 return (bool)u8Ret;
1197# else
1198 bool fRet;
1199 __asm
1200 {
1201 mov rdx, [pu32]
1202 mov rax, [u64Old]
1203 mov rcx, [u64New]
1204 lock cmpxchg [rdx], rcx
1205 mov rdx, [pu64Old]
1206 mov [rdx], rax
1207 setz al
1208 mov [fRet], al
1209 }
1210 return fRet;
1211# endif
1212# else /* !RT_ARCH_AMD64 */
1213# if RT_INLINE_ASM_GNU_STYLE
1214 uint64_t u64Ret;
1215# if defined(PIC) || defined(__PIC__)
1216 /* NB: this code uses a memory clobber description, because the clean
1217 * solution with an output value for *pu64 makes gcc run out of registers.
1218 * This will cause suboptimal code, and anyone with a better solution is
1219 * welcome to improve this. */
1220 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1221 "lock; cmpxchg8b %3\n\t"
1222 "xchgl %%ebx, %1\n\t"
1223 : "=A" (u64Ret)
1224 : "DS" ((uint32_t)u64New),
1225 "c" ((uint32_t)(u64New >> 32)),
1226 "m" (*pu64),
1227 "0" (u64Old)
1228 : "memory" );
1229# else /* !PIC */
1230 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1231 : "=A" (u64Ret),
1232 "=m" (*pu64)
1233 : "b" ((uint32_t)u64New),
1234 "c" ((uint32_t)(u64New >> 32)),
1235 "m" (*pu64),
1236 "0" (u64Old));
1237# endif
1238 *pu64Old = u64Ret;
1239 return u64Ret == u64Old;
1240# else
1241 uint32_t u32Ret;
1242 __asm
1243 {
1244 mov ebx, dword ptr [u64New]
1245 mov ecx, dword ptr [u64New + 4]
1246 mov edi, [pu64]
1247 mov eax, dword ptr [u64Old]
1248 mov edx, dword ptr [u64Old + 4]
1249 lock cmpxchg8b [edi]
1250 mov ebx, [pu64Old]
1251 mov [ebx], eax
1252 setz al
1253 movzx eax, al
1254 add ebx, 4
1255 mov [ebx], edx
1256 mov dword ptr [u32Ret], eax
1257 }
1258 return !!u32Ret;
1259# endif
1260# endif /* !RT_ARCH_AMD64 */
1261}
1262#endif
1263
1264
1265/**
1266 * Atomically Compare and exchange a signed 64-bit value, additionally
1267 * passing back old value, ordered.
1268 *
1269 * @returns true if xchg was done.
1270 * @returns false if xchg wasn't done.
1271 *
1272 * @param pi64 Pointer to the 64-bit variable to update.
1273 * @param i64 The 64-bit value to assign to *pu64.
1274 * @param i64Old The value to compare with.
1275 * @param pi64Old Pointer store the old value at.
1276 *
1277 * @remarks x86: Requires a Pentium or later.
1278 */
1279DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old)
1280{
1281 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1282}
1283
1284/** @def ASMAtomicCmpXchgExHandle
1285 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1286 *
1287 * @param ph Pointer to the value to update.
1288 * @param hNew The new value to assigned to *pu.
1289 * @param hOld The old value to *pu compare with.
1290 * @param fRc Where to store the result.
1291 * @param phOldVal Pointer to where to store the old value.
1292 *
1293 * @remarks This doesn't currently work for all handles (like RTFILE).
1294 */
1295#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1296# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1297 do { \
1298 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1299 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1300 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1301 } while (0)
1302#elif HC_ARCH_BITS == 64
1303# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1304 do { \
1305 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1306 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1307 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1308 } while (0)
1309#else
1310# error HC_ARCH_BITS
1311#endif
1312
1313
1314/** @def ASMAtomicCmpXchgExSize
1315 * Atomically Compare and Exchange a value which size might differ
1316 * between platforms or compilers. Additionally passes back old value.
1317 *
1318 * @param pu Pointer to the value to update.
1319 * @param uNew The new value to assigned to *pu.
1320 * @param uOld The old value to *pu compare with.
1321 * @param fRc Where to store the result.
1322 * @param puOldVal Pointer to where to store the old value.
1323 *
1324 * @remarks x86: Requires a 486 or later.
1325 */
1326#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1327 do { \
1328 switch (sizeof(*(pu))) { \
1329 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1330 break; \
1331 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1332 break; \
1333 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1334 (fRc) = false; \
1335 (uOldVal) = 0; \
1336 break; \
1337 } \
1338 } while (0)
1339
1340
1341/**
1342 * Atomically Compare and Exchange a pointer value, additionally
1343 * passing back old value, ordered.
1344 *
1345 * @returns true if xchg was done.
1346 * @returns false if xchg wasn't done.
1347 *
1348 * @param ppv Pointer to the value to update.
1349 * @param pvNew The new value to assigned to *ppv.
1350 * @param pvOld The old value to *ppv compare with.
1351 * @param ppvOld Pointer store the old value at.
1352 *
1353 * @remarks x86: Requires a 486 or later.
1354 */
1355DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1356 void RT_FAR * RT_FAR *ppvOld)
1357{
1358#if ARCH_BITS == 32 || ARCH_BITS == 16
1359 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1360#elif ARCH_BITS == 64
1361 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1362#else
1363# error "ARCH_BITS is bogus"
1364#endif
1365}
1366
1367
1368/**
1369 * Atomically Compare and Exchange a pointer value, additionally
1370 * passing back old value, ordered.
1371 *
1372 * @returns true if xchg was done.
1373 * @returns false if xchg wasn't done.
1374 *
1375 * @param ppv Pointer to the value to update.
1376 * @param pvNew The new value to assigned to *ppv.
1377 * @param pvOld The old value to *ppv compare with.
1378 * @param ppvOld Pointer store the old value at.
1379 *
1380 * @remarks This is relatively type safe on GCC platforms.
1381 * @remarks x86: Requires a 486 or later.
1382 */
1383#ifdef __GNUC__
1384# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1385 __extension__ \
1386 ({\
1387 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1388 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1389 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1390 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1391 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1392 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1393 (void **)ppvOldTypeChecked); \
1394 fMacroRet; \
1395 })
1396#else
1397# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1398 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1399#endif
1400
1401
1402/**
1403 * Virtualization unfriendly serializing instruction, always exits.
1404 */
1405#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1406RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void);
1407#else
1408DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1409{
1410# if RT_INLINE_ASM_GNU_STYLE
1411 RTCCUINTREG xAX = 0;
1412# ifdef RT_ARCH_AMD64
1413 __asm__ __volatile__ ("cpuid"
1414 : "=a" (xAX)
1415 : "0" (xAX)
1416 : "rbx", "rcx", "rdx", "memory");
1417# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1418 __asm__ __volatile__ ("push %%ebx\n\t"
1419 "cpuid\n\t"
1420 "pop %%ebx\n\t"
1421 : "=a" (xAX)
1422 : "0" (xAX)
1423 : "ecx", "edx", "memory");
1424# else
1425 __asm__ __volatile__ ("cpuid"
1426 : "=a" (xAX)
1427 : "0" (xAX)
1428 : "ebx", "ecx", "edx", "memory");
1429# endif
1430
1431# elif RT_INLINE_ASM_USES_INTRIN
1432 int aInfo[4];
1433 _ReadWriteBarrier();
1434 __cpuid(aInfo, 0);
1435
1436# else
1437 __asm
1438 {
1439 push ebx
1440 xor eax, eax
1441 cpuid
1442 pop ebx
1443 }
1444# endif
1445}
1446#endif
1447
1448/**
1449 * Virtualization friendly serializing instruction, though more expensive.
1450 */
1451#if RT_INLINE_ASM_EXTERNAL
1452RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void);
1453#else
1454DECLINLINE(void) ASMSerializeInstructionIRet(void)
1455{
1456# if RT_INLINE_ASM_GNU_STYLE
1457# ifdef RT_ARCH_AMD64
1458 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1459 "subq $128, %%rsp\n\t" /*redzone*/
1460 "mov %%ss, %%eax\n\t"
1461 "pushq %%rax\n\t"
1462 "pushq %%r10\n\t"
1463 "pushfq\n\t"
1464 "movl %%cs, %%eax\n\t"
1465 "pushq %%rax\n\t"
1466 "leaq 1f(%%rip), %%rax\n\t"
1467 "pushq %%rax\n\t"
1468 "iretq\n\t"
1469 "1:\n\t"
1470 ::: "rax", "r10", "memory");
1471# else
1472 __asm__ __volatile__ ("pushfl\n\t"
1473 "pushl %%cs\n\t"
1474 "pushl $1f\n\t"
1475 "iretl\n\t"
1476 "1:\n\t"
1477 ::: "memory");
1478# endif
1479
1480# else
1481 __asm
1482 {
1483 pushfd
1484 push cs
1485 push la_ret
1486 iretd
1487 la_ret:
1488 }
1489# endif
1490}
1491#endif
1492
1493/**
1494 * Virtualization friendlier serializing instruction, may still cause exits.
1495 */
1496#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1497RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void);
1498#else
1499DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1500{
1501# if RT_INLINE_ASM_GNU_STYLE
1502 /* rdtscp is not supported by ancient linux build VM of course :-( */
1503# ifdef RT_ARCH_AMD64
1504 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1505 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1506# else
1507 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1508 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1509# endif
1510# else
1511# if RT_INLINE_ASM_USES_INTRIN >= 15
1512 uint32_t uIgnore;
1513 _ReadWriteBarrier();
1514 (void)__rdtscp(&uIgnore);
1515 (void)uIgnore;
1516# else
1517 __asm
1518 {
1519 rdtscp
1520 }
1521# endif
1522# endif
1523}
1524#endif
1525
1526
1527/**
1528 * Serialize Instruction.
1529 */
1530#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1531# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1532#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
1533# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1534#elif defined(RT_ARCH_SPARC64)
1535RTDECL(void) ASMSerializeInstruction(void);
1536#else
1537# error "Port me"
1538#endif
1539
1540
1541/**
1542 * Memory fence, waits for any pending writes and reads to complete.
1543 */
1544DECLINLINE(void) ASMMemoryFence(void)
1545{
1546#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1547# if RT_INLINE_ASM_GNU_STYLE
1548 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
1549# elif RT_INLINE_ASM_USES_INTRIN
1550 _mm_mfence();
1551# else
1552 __asm
1553 {
1554 _emit 0x0f
1555 _emit 0xae
1556 _emit 0xf0
1557 }
1558# endif
1559#elif ARCH_BITS == 16
1560 uint16_t volatile u16;
1561 ASMAtomicXchgU16(&u16, 0);
1562#else
1563 uint32_t volatile u32;
1564 ASMAtomicXchgU32(&u32, 0);
1565#endif
1566}
1567
1568
1569/**
1570 * Write fence, waits for any pending writes to complete.
1571 */
1572DECLINLINE(void) ASMWriteFence(void)
1573{
1574#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1575# if RT_INLINE_ASM_GNU_STYLE
1576 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
1577# elif RT_INLINE_ASM_USES_INTRIN
1578 _mm_sfence();
1579# else
1580 __asm
1581 {
1582 _emit 0x0f
1583 _emit 0xae
1584 _emit 0xf8
1585 }
1586# endif
1587#else
1588 ASMMemoryFence();
1589#endif
1590}
1591
1592
1593/**
1594 * Read fence, waits for any pending reads to complete.
1595 */
1596DECLINLINE(void) ASMReadFence(void)
1597{
1598#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1599# if RT_INLINE_ASM_GNU_STYLE
1600 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
1601# elif RT_INLINE_ASM_USES_INTRIN
1602 _mm_lfence();
1603# else
1604 __asm
1605 {
1606 _emit 0x0f
1607 _emit 0xae
1608 _emit 0xe8
1609 }
1610# endif
1611#else
1612 ASMMemoryFence();
1613#endif
1614}
1615
1616
1617/**
1618 * Atomically reads an unsigned 8-bit value, ordered.
1619 *
1620 * @returns Current *pu8 value
1621 * @param pu8 Pointer to the 8-bit variable to read.
1622 */
1623DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8)
1624{
1625 ASMMemoryFence();
1626 return *pu8; /* byte reads are atomic on x86 */
1627}
1628
1629
1630/**
1631 * Atomically reads an unsigned 8-bit value, unordered.
1632 *
1633 * @returns Current *pu8 value
1634 * @param pu8 Pointer to the 8-bit variable to read.
1635 */
1636DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8)
1637{
1638 return *pu8; /* byte reads are atomic on x86 */
1639}
1640
1641
1642/**
1643 * Atomically reads a signed 8-bit value, ordered.
1644 *
1645 * @returns Current *pi8 value
1646 * @param pi8 Pointer to the 8-bit variable to read.
1647 */
1648DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8)
1649{
1650 ASMMemoryFence();
1651 return *pi8; /* byte reads are atomic on x86 */
1652}
1653
1654
1655/**
1656 * Atomically reads a signed 8-bit value, unordered.
1657 *
1658 * @returns Current *pi8 value
1659 * @param pi8 Pointer to the 8-bit variable to read.
1660 */
1661DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8)
1662{
1663 return *pi8; /* byte reads are atomic on x86 */
1664}
1665
1666
1667/**
1668 * Atomically reads an unsigned 16-bit value, ordered.
1669 *
1670 * @returns Current *pu16 value
1671 * @param pu16 Pointer to the 16-bit variable to read.
1672 */
1673DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16)
1674{
1675 ASMMemoryFence();
1676 Assert(!((uintptr_t)pu16 & 1));
1677 return *pu16;
1678}
1679
1680
1681/**
1682 * Atomically reads an unsigned 16-bit value, unordered.
1683 *
1684 * @returns Current *pu16 value
1685 * @param pu16 Pointer to the 16-bit variable to read.
1686 */
1687DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16)
1688{
1689 Assert(!((uintptr_t)pu16 & 1));
1690 return *pu16;
1691}
1692
1693
1694/**
1695 * Atomically reads a signed 16-bit value, ordered.
1696 *
1697 * @returns Current *pi16 value
1698 * @param pi16 Pointer to the 16-bit variable to read.
1699 */
1700DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16)
1701{
1702 ASMMemoryFence();
1703 Assert(!((uintptr_t)pi16 & 1));
1704 return *pi16;
1705}
1706
1707
1708/**
1709 * Atomically reads a signed 16-bit value, unordered.
1710 *
1711 * @returns Current *pi16 value
1712 * @param pi16 Pointer to the 16-bit variable to read.
1713 */
1714DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16)
1715{
1716 Assert(!((uintptr_t)pi16 & 1));
1717 return *pi16;
1718}
1719
1720
1721/**
1722 * Atomically reads an unsigned 32-bit value, ordered.
1723 *
1724 * @returns Current *pu32 value
1725 * @param pu32 Pointer to the 32-bit variable to read.
1726 */
1727DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32)
1728{
1729 ASMMemoryFence();
1730 Assert(!((uintptr_t)pu32 & 3));
1731#if ARCH_BITS == 16
1732 AssertFailed(); /** @todo 16-bit */
1733#endif
1734 return *pu32;
1735}
1736
1737
1738/**
1739 * Atomically reads an unsigned 32-bit value, unordered.
1740 *
1741 * @returns Current *pu32 value
1742 * @param pu32 Pointer to the 32-bit variable to read.
1743 */
1744DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32)
1745{
1746 Assert(!((uintptr_t)pu32 & 3));
1747#if ARCH_BITS == 16
1748 AssertFailed(); /** @todo 16-bit */
1749#endif
1750 return *pu32;
1751}
1752
1753
1754/**
1755 * Atomically reads a signed 32-bit value, ordered.
1756 *
1757 * @returns Current *pi32 value
1758 * @param pi32 Pointer to the 32-bit variable to read.
1759 */
1760DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32)
1761{
1762 ASMMemoryFence();
1763 Assert(!((uintptr_t)pi32 & 3));
1764#if ARCH_BITS == 16
1765 AssertFailed(); /** @todo 16-bit */
1766#endif
1767 return *pi32;
1768}
1769
1770
1771/**
1772 * Atomically reads a signed 32-bit value, unordered.
1773 *
1774 * @returns Current *pi32 value
1775 * @param pi32 Pointer to the 32-bit variable to read.
1776 */
1777DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32)
1778{
1779 Assert(!((uintptr_t)pi32 & 3));
1780#if ARCH_BITS == 16
1781 AssertFailed(); /** @todo 16-bit */
1782#endif
1783 return *pi32;
1784}
1785
1786
1787/**
1788 * Atomically reads an unsigned 64-bit value, ordered.
1789 *
1790 * @returns Current *pu64 value
1791 * @param pu64 Pointer to the 64-bit variable to read.
1792 * The memory pointed to must be writable.
1793 *
1794 * @remarks This may fault if the memory is read-only!
1795 * @remarks x86: Requires a Pentium or later.
1796 */
1797#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1798 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1799RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64);
1800#else
1801DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64)
1802{
1803 uint64_t u64;
1804# ifdef RT_ARCH_AMD64
1805 Assert(!((uintptr_t)pu64 & 7));
1806/*# if RT_INLINE_ASM_GNU_STYLE
1807 __asm__ __volatile__( "mfence\n\t"
1808 "movq %1, %0\n\t"
1809 : "=r" (u64)
1810 : "m" (*pu64));
1811# else
1812 __asm
1813 {
1814 mfence
1815 mov rdx, [pu64]
1816 mov rax, [rdx]
1817 mov [u64], rax
1818 }
1819# endif*/
1820 ASMMemoryFence();
1821 u64 = *pu64;
1822# else /* !RT_ARCH_AMD64 */
1823# if RT_INLINE_ASM_GNU_STYLE
1824# if defined(PIC) || defined(__PIC__)
1825 uint32_t u32EBX = 0;
1826 Assert(!((uintptr_t)pu64 & 7));
1827 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1828 "lock; cmpxchg8b (%5)\n\t"
1829 "movl %3, %%ebx\n\t"
1830 : "=A" (u64),
1831# if RT_GNUC_PREREQ(4, 3)
1832 "+m" (*pu64)
1833# else
1834 "=m" (*pu64)
1835# endif
1836 : "0" (0ULL),
1837 "m" (u32EBX),
1838 "c" (0),
1839 "S" (pu64));
1840# else /* !PIC */
1841 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1842 : "=A" (u64),
1843 "+m" (*pu64)
1844 : "0" (0ULL),
1845 "b" (0),
1846 "c" (0));
1847# endif
1848# else
1849 Assert(!((uintptr_t)pu64 & 7));
1850 __asm
1851 {
1852 xor eax, eax
1853 xor edx, edx
1854 mov edi, pu64
1855 xor ecx, ecx
1856 xor ebx, ebx
1857 lock cmpxchg8b [edi]
1858 mov dword ptr [u64], eax
1859 mov dword ptr [u64 + 4], edx
1860 }
1861# endif
1862# endif /* !RT_ARCH_AMD64 */
1863 return u64;
1864}
1865#endif
1866
1867
1868/**
1869 * Atomically reads an unsigned 64-bit value, unordered.
1870 *
1871 * @returns Current *pu64 value
1872 * @param pu64 Pointer to the 64-bit variable to read.
1873 * The memory pointed to must be writable.
1874 *
1875 * @remarks This may fault if the memory is read-only!
1876 * @remarks x86: Requires a Pentium or later.
1877 */
1878#if !defined(RT_ARCH_AMD64) \
1879 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1880 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1881RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64);
1882#else
1883DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64)
1884{
1885 uint64_t u64;
1886# ifdef RT_ARCH_AMD64
1887 Assert(!((uintptr_t)pu64 & 7));
1888/*# if RT_INLINE_ASM_GNU_STYLE
1889 Assert(!((uintptr_t)pu64 & 7));
1890 __asm__ __volatile__("movq %1, %0\n\t"
1891 : "=r" (u64)
1892 : "m" (*pu64));
1893# else
1894 __asm
1895 {
1896 mov rdx, [pu64]
1897 mov rax, [rdx]
1898 mov [u64], rax
1899 }
1900# endif */
1901 u64 = *pu64;
1902# else /* !RT_ARCH_AMD64 */
1903# if RT_INLINE_ASM_GNU_STYLE
1904# if defined(PIC) || defined(__PIC__)
1905 uint32_t u32EBX = 0;
1906 uint32_t u32Spill;
1907 Assert(!((uintptr_t)pu64 & 7));
1908 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1909 "xor %%ecx,%%ecx\n\t"
1910 "xor %%edx,%%edx\n\t"
1911 "xchgl %%ebx, %3\n\t"
1912 "lock; cmpxchg8b (%4)\n\t"
1913 "movl %3, %%ebx\n\t"
1914 : "=A" (u64),
1915# if RT_GNUC_PREREQ(4, 3)
1916 "+m" (*pu64),
1917# else
1918 "=m" (*pu64),
1919# endif
1920 "=c" (u32Spill)
1921 : "m" (u32EBX),
1922 "S" (pu64));
1923# else /* !PIC */
1924 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1925 : "=A" (u64),
1926 "+m" (*pu64)
1927 : "0" (0ULL),
1928 "b" (0),
1929 "c" (0));
1930# endif
1931# else
1932 Assert(!((uintptr_t)pu64 & 7));
1933 __asm
1934 {
1935 xor eax, eax
1936 xor edx, edx
1937 mov edi, pu64
1938 xor ecx, ecx
1939 xor ebx, ebx
1940 lock cmpxchg8b [edi]
1941 mov dword ptr [u64], eax
1942 mov dword ptr [u64 + 4], edx
1943 }
1944# endif
1945# endif /* !RT_ARCH_AMD64 */
1946 return u64;
1947}
1948#endif
1949
1950
1951/**
1952 * Atomically reads a signed 64-bit value, ordered.
1953 *
1954 * @returns Current *pi64 value
1955 * @param pi64 Pointer to the 64-bit variable to read.
1956 * The memory pointed to must be writable.
1957 *
1958 * @remarks This may fault if the memory is read-only!
1959 * @remarks x86: Requires a Pentium or later.
1960 */
1961DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64)
1962{
1963 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
1964}
1965
1966
1967/**
1968 * Atomically reads a signed 64-bit value, unordered.
1969 *
1970 * @returns Current *pi64 value
1971 * @param pi64 Pointer to the 64-bit variable to read.
1972 * The memory pointed to must be writable.
1973 *
1974 * @remarks This will fault if the memory is read-only!
1975 * @remarks x86: Requires a Pentium or later.
1976 */
1977DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64)
1978{
1979 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
1980}
1981
1982
1983/**
1984 * Atomically reads a size_t value, ordered.
1985 *
1986 * @returns Current *pcb value
1987 * @param pcb Pointer to the size_t variable to read.
1988 */
1989DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb)
1990{
1991#if ARCH_BITS == 64
1992 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
1993#elif ARCH_BITS == 32
1994 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
1995#elif ARCH_BITS == 16
1996 AssertCompileSize(size_t, 2);
1997 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
1998#else
1999# error "Unsupported ARCH_BITS value"
2000#endif
2001}
2002
2003
2004/**
2005 * Atomically reads a size_t value, unordered.
2006 *
2007 * @returns Current *pcb value
2008 * @param pcb Pointer to the size_t variable to read.
2009 */
2010DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb)
2011{
2012#if ARCH_BITS == 64 || ARCH_BITS == 16
2013 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2014#elif ARCH_BITS == 32
2015 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2016#elif ARCH_BITS == 16
2017 AssertCompileSize(size_t, 2);
2018 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2019#else
2020# error "Unsupported ARCH_BITS value"
2021#endif
2022}
2023
2024
2025/**
2026 * Atomically reads a pointer value, ordered.
2027 *
2028 * @returns Current *pv value
2029 * @param ppv Pointer to the pointer variable to read.
2030 *
2031 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2032 * requires less typing (no casts).
2033 */
2034DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2035{
2036#if ARCH_BITS == 32 || ARCH_BITS == 16
2037 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2038#elif ARCH_BITS == 64
2039 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2040#else
2041# error "ARCH_BITS is bogus"
2042#endif
2043}
2044
2045/**
2046 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2047 *
2048 * @returns Current *pv value
2049 * @param ppv Pointer to the pointer variable to read.
2050 * @param Type The type of *ppv, sans volatile.
2051 */
2052#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2053# define ASMAtomicReadPtrT(ppv, Type) \
2054 __extension__ \
2055 ({\
2056 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2057 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2058 pvTypeChecked; \
2059 })
2060#else
2061# define ASMAtomicReadPtrT(ppv, Type) \
2062 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2063#endif
2064
2065
2066/**
2067 * Atomically reads a pointer value, unordered.
2068 *
2069 * @returns Current *pv value
2070 * @param ppv Pointer to the pointer variable to read.
2071 *
2072 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2073 * requires less typing (no casts).
2074 */
2075DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2076{
2077#if ARCH_BITS == 32 || ARCH_BITS == 16
2078 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2079#elif ARCH_BITS == 64
2080 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2081#else
2082# error "ARCH_BITS is bogus"
2083#endif
2084}
2085
2086
2087/**
2088 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2089 *
2090 * @returns Current *pv value
2091 * @param ppv Pointer to the pointer variable to read.
2092 * @param Type The type of *ppv, sans volatile.
2093 */
2094#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2095# define ASMAtomicUoReadPtrT(ppv, Type) \
2096 __extension__ \
2097 ({\
2098 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2099 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2100 pvTypeChecked; \
2101 })
2102#else
2103# define ASMAtomicUoReadPtrT(ppv, Type) \
2104 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2105#endif
2106
2107
2108/**
2109 * Atomically reads a boolean value, ordered.
2110 *
2111 * @returns Current *pf value
2112 * @param pf Pointer to the boolean variable to read.
2113 */
2114DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf)
2115{
2116 ASMMemoryFence();
2117 return *pf; /* byte reads are atomic on x86 */
2118}
2119
2120
2121/**
2122 * Atomically reads a boolean value, unordered.
2123 *
2124 * @returns Current *pf value
2125 * @param pf Pointer to the boolean variable to read.
2126 */
2127DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf)
2128{
2129 return *pf; /* byte reads are atomic on x86 */
2130}
2131
2132
2133/**
2134 * Atomically read a typical IPRT handle value, ordered.
2135 *
2136 * @param ph Pointer to the handle variable to read.
2137 * @param phRes Where to store the result.
2138 *
2139 * @remarks This doesn't currently work for all handles (like RTFILE).
2140 */
2141#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2142# define ASMAtomicReadHandle(ph, phRes) \
2143 do { \
2144 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2145 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2146 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2147 } while (0)
2148#elif HC_ARCH_BITS == 64
2149# define ASMAtomicReadHandle(ph, phRes) \
2150 do { \
2151 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2152 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2153 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2154 } while (0)
2155#else
2156# error HC_ARCH_BITS
2157#endif
2158
2159
2160/**
2161 * Atomically read a typical IPRT handle value, unordered.
2162 *
2163 * @param ph Pointer to the handle variable to read.
2164 * @param phRes Where to store the result.
2165 *
2166 * @remarks This doesn't currently work for all handles (like RTFILE).
2167 */
2168#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2169# define ASMAtomicUoReadHandle(ph, phRes) \
2170 do { \
2171 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2172 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2173 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2174 } while (0)
2175#elif HC_ARCH_BITS == 64
2176# define ASMAtomicUoReadHandle(ph, phRes) \
2177 do { \
2178 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2179 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2180 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2181 } while (0)
2182#else
2183# error HC_ARCH_BITS
2184#endif
2185
2186
2187/**
2188 * Atomically read a value which size might differ
2189 * between platforms or compilers, ordered.
2190 *
2191 * @param pu Pointer to the variable to read.
2192 * @param puRes Where to store the result.
2193 */
2194#define ASMAtomicReadSize(pu, puRes) \
2195 do { \
2196 switch (sizeof(*(pu))) { \
2197 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2198 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2199 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2200 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2201 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2202 } \
2203 } while (0)
2204
2205
2206/**
2207 * Atomically read a value which size might differ
2208 * between platforms or compilers, unordered.
2209 *
2210 * @param pu Pointer to the variable to read.
2211 * @param puRes Where to store the result.
2212 */
2213#define ASMAtomicUoReadSize(pu, puRes) \
2214 do { \
2215 switch (sizeof(*(pu))) { \
2216 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2217 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2218 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2219 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2220 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2221 } \
2222 } while (0)
2223
2224
2225/**
2226 * Atomically writes an unsigned 8-bit value, ordered.
2227 *
2228 * @param pu8 Pointer to the 8-bit variable.
2229 * @param u8 The 8-bit value to assign to *pu8.
2230 */
2231DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2232{
2233 ASMAtomicXchgU8(pu8, u8);
2234}
2235
2236
2237/**
2238 * Atomically writes an unsigned 8-bit value, unordered.
2239 *
2240 * @param pu8 Pointer to the 8-bit variable.
2241 * @param u8 The 8-bit value to assign to *pu8.
2242 */
2243DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2244{
2245 *pu8 = u8; /* byte writes are atomic on x86 */
2246}
2247
2248
2249/**
2250 * Atomically writes a signed 8-bit value, ordered.
2251 *
2252 * @param pi8 Pointer to the 8-bit variable to read.
2253 * @param i8 The 8-bit value to assign to *pi8.
2254 */
2255DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2256{
2257 ASMAtomicXchgS8(pi8, i8);
2258}
2259
2260
2261/**
2262 * Atomically writes a signed 8-bit value, unordered.
2263 *
2264 * @param pi8 Pointer to the 8-bit variable to write.
2265 * @param i8 The 8-bit value to assign to *pi8.
2266 */
2267DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2268{
2269 *pi8 = i8; /* byte writes are atomic on x86 */
2270}
2271
2272
2273/**
2274 * Atomically writes an unsigned 16-bit value, ordered.
2275 *
2276 * @param pu16 Pointer to the 16-bit variable to write.
2277 * @param u16 The 16-bit value to assign to *pu16.
2278 */
2279DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2280{
2281 ASMAtomicXchgU16(pu16, u16);
2282}
2283
2284
2285/**
2286 * Atomically writes an unsigned 16-bit value, unordered.
2287 *
2288 * @param pu16 Pointer to the 16-bit variable to write.
2289 * @param u16 The 16-bit value to assign to *pu16.
2290 */
2291DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2292{
2293 Assert(!((uintptr_t)pu16 & 1));
2294 *pu16 = u16;
2295}
2296
2297
2298/**
2299 * Atomically writes a signed 16-bit value, ordered.
2300 *
2301 * @param pi16 Pointer to the 16-bit variable to write.
2302 * @param i16 The 16-bit value to assign to *pi16.
2303 */
2304DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2305{
2306 ASMAtomicXchgS16(pi16, i16);
2307}
2308
2309
2310/**
2311 * Atomically writes a signed 16-bit value, unordered.
2312 *
2313 * @param pi16 Pointer to the 16-bit variable to write.
2314 * @param i16 The 16-bit value to assign to *pi16.
2315 */
2316DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2317{
2318 Assert(!((uintptr_t)pi16 & 1));
2319 *pi16 = i16;
2320}
2321
2322
2323/**
2324 * Atomically writes an unsigned 32-bit value, ordered.
2325 *
2326 * @param pu32 Pointer to the 32-bit variable to write.
2327 * @param u32 The 32-bit value to assign to *pu32.
2328 */
2329DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2330{
2331 ASMAtomicXchgU32(pu32, u32);
2332}
2333
2334
2335/**
2336 * Atomically writes an unsigned 32-bit value, unordered.
2337 *
2338 * @param pu32 Pointer to the 32-bit variable to write.
2339 * @param u32 The 32-bit value to assign to *pu32.
2340 */
2341DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2342{
2343 Assert(!((uintptr_t)pu32 & 3));
2344#if ARCH_BITS >= 32
2345 *pu32 = u32;
2346#else
2347 ASMAtomicXchgU32(pu32, u32);
2348#endif
2349}
2350
2351
2352/**
2353 * Atomically writes a signed 32-bit value, ordered.
2354 *
2355 * @param pi32 Pointer to the 32-bit variable to write.
2356 * @param i32 The 32-bit value to assign to *pi32.
2357 */
2358DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2359{
2360 ASMAtomicXchgS32(pi32, i32);
2361}
2362
2363
2364/**
2365 * Atomically writes a signed 32-bit value, unordered.
2366 *
2367 * @param pi32 Pointer to the 32-bit variable to write.
2368 * @param i32 The 32-bit value to assign to *pi32.
2369 */
2370DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2371{
2372 Assert(!((uintptr_t)pi32 & 3));
2373#if ARCH_BITS >= 32
2374 *pi32 = i32;
2375#else
2376 ASMAtomicXchgS32(pi32, i32);
2377#endif
2378}
2379
2380
2381/**
2382 * Atomically writes an unsigned 64-bit value, ordered.
2383 *
2384 * @param pu64 Pointer to the 64-bit variable to write.
2385 * @param u64 The 64-bit value to assign to *pu64.
2386 */
2387DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2388{
2389 ASMAtomicXchgU64(pu64, u64);
2390}
2391
2392
2393/**
2394 * Atomically writes an unsigned 64-bit value, unordered.
2395 *
2396 * @param pu64 Pointer to the 64-bit variable to write.
2397 * @param u64 The 64-bit value to assign to *pu64.
2398 */
2399DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2400{
2401 Assert(!((uintptr_t)pu64 & 7));
2402#if ARCH_BITS == 64
2403 *pu64 = u64;
2404#else
2405 ASMAtomicXchgU64(pu64, u64);
2406#endif
2407}
2408
2409
2410/**
2411 * Atomically writes a signed 64-bit value, ordered.
2412 *
2413 * @param pi64 Pointer to the 64-bit variable to write.
2414 * @param i64 The 64-bit value to assign to *pi64.
2415 */
2416DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2417{
2418 ASMAtomicXchgS64(pi64, i64);
2419}
2420
2421
2422/**
2423 * Atomically writes a signed 64-bit value, unordered.
2424 *
2425 * @param pi64 Pointer to the 64-bit variable to write.
2426 * @param i64 The 64-bit value to assign to *pi64.
2427 */
2428DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2429{
2430 Assert(!((uintptr_t)pi64 & 7));
2431#if ARCH_BITS == 64
2432 *pi64 = i64;
2433#else
2434 ASMAtomicXchgS64(pi64, i64);
2435#endif
2436}
2437
2438
2439/**
2440 * Atomically writes a size_t value, ordered.
2441 *
2442 * @returns nothing.
2443 * @param pcb Pointer to the size_t variable to write.
2444 * @param cb The value to assign to *pcb.
2445 */
2446DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb)
2447{
2448#if ARCH_BITS == 64
2449 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
2450#elif ARCH_BITS == 32
2451 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
2452#elif ARCH_BITS == 16
2453 AssertCompileSize(size_t, 2);
2454 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
2455#else
2456# error "Unsupported ARCH_BITS value"
2457#endif
2458}
2459
2460
2461/**
2462 * Atomically writes a boolean value, unordered.
2463 *
2464 * @param pf Pointer to the boolean variable to write.
2465 * @param f The boolean value to assign to *pf.
2466 */
2467DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f)
2468{
2469 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
2470}
2471
2472
2473/**
2474 * Atomically writes a boolean value, unordered.
2475 *
2476 * @param pf Pointer to the boolean variable to write.
2477 * @param f The boolean value to assign to *pf.
2478 */
2479DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f)
2480{
2481 *pf = f; /* byte writes are atomic on x86 */
2482}
2483
2484
2485/**
2486 * Atomically writes a pointer value, ordered.
2487 *
2488 * @param ppv Pointer to the pointer variable to write.
2489 * @param pv The pointer value to assign to *ppv.
2490 */
2491DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv)
2492{
2493#if ARCH_BITS == 32 || ARCH_BITS == 16
2494 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
2495#elif ARCH_BITS == 64
2496 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
2497#else
2498# error "ARCH_BITS is bogus"
2499#endif
2500}
2501
2502
2503/**
2504 * Atomically writes a pointer value, ordered.
2505 *
2506 * @param ppv Pointer to the pointer variable to write.
2507 * @param pv The pointer value to assign to *ppv. If NULL use
2508 * ASMAtomicWriteNullPtr or you'll land in trouble.
2509 *
2510 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2511 * NULL.
2512 */
2513#ifdef __GNUC__
2514# define ASMAtomicWritePtr(ppv, pv) \
2515 do \
2516 { \
2517 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
2518 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2519 \
2520 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2521 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2522 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2523 \
2524 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
2525 } while (0)
2526#else
2527# define ASMAtomicWritePtr(ppv, pv) \
2528 do \
2529 { \
2530 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2531 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2532 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2533 \
2534 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
2535 } while (0)
2536#endif
2537
2538
2539/**
2540 * Atomically sets a pointer to NULL, ordered.
2541 *
2542 * @param ppv Pointer to the pointer variable that should be set to NULL.
2543 *
2544 * @remarks This is relatively type safe on GCC platforms.
2545 */
2546#if RT_GNUC_PREREQ(4, 2)
2547# define ASMAtomicWriteNullPtr(ppv) \
2548 do \
2549 { \
2550 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2551 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2552 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2553 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
2554 } while (0)
2555#else
2556# define ASMAtomicWriteNullPtr(ppv) \
2557 do \
2558 { \
2559 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2560 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2561 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
2562 } while (0)
2563#endif
2564
2565
2566/**
2567 * Atomically writes a pointer value, unordered.
2568 *
2569 * @returns Current *pv value
2570 * @param ppv Pointer to the pointer variable.
2571 * @param pv The pointer value to assign to *ppv. If NULL use
2572 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2573 *
2574 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2575 * NULL.
2576 */
2577#if RT_GNUC_PREREQ(4, 2)
2578# define ASMAtomicUoWritePtr(ppv, pv) \
2579 do \
2580 { \
2581 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2582 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2583 \
2584 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2585 AssertCompile(sizeof(pv) == sizeof(void *)); \
2586 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2587 \
2588 *(ppvTypeChecked) = pvTypeChecked; \
2589 } while (0)
2590#else
2591# define ASMAtomicUoWritePtr(ppv, pv) \
2592 do \
2593 { \
2594 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2595 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2596 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2597 *(ppv) = pv; \
2598 } while (0)
2599#endif
2600
2601
2602/**
2603 * Atomically sets a pointer to NULL, unordered.
2604 *
2605 * @param ppv Pointer to the pointer variable that should be set to NULL.
2606 *
2607 * @remarks This is relatively type safe on GCC platforms.
2608 */
2609#ifdef __GNUC__
2610# define ASMAtomicUoWriteNullPtr(ppv) \
2611 do \
2612 { \
2613 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2614 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2615 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2616 *(ppvTypeChecked) = NULL; \
2617 } while (0)
2618#else
2619# define ASMAtomicUoWriteNullPtr(ppv) \
2620 do \
2621 { \
2622 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2623 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2624 *(ppv) = NULL; \
2625 } while (0)
2626#endif
2627
2628
2629/**
2630 * Atomically write a typical IPRT handle value, ordered.
2631 *
2632 * @param ph Pointer to the variable to update.
2633 * @param hNew The value to assign to *ph.
2634 *
2635 * @remarks This doesn't currently work for all handles (like RTFILE).
2636 */
2637#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2638# define ASMAtomicWriteHandle(ph, hNew) \
2639 do { \
2640 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2641 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
2642 } while (0)
2643#elif HC_ARCH_BITS == 64
2644# define ASMAtomicWriteHandle(ph, hNew) \
2645 do { \
2646 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2647 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
2648 } while (0)
2649#else
2650# error HC_ARCH_BITS
2651#endif
2652
2653
2654/**
2655 * Atomically write a typical IPRT handle value, unordered.
2656 *
2657 * @param ph Pointer to the variable to update.
2658 * @param hNew The value to assign to *ph.
2659 *
2660 * @remarks This doesn't currently work for all handles (like RTFILE).
2661 */
2662#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2663# define ASMAtomicUoWriteHandle(ph, hNew) \
2664 do { \
2665 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2666 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
2667 } while (0)
2668#elif HC_ARCH_BITS == 64
2669# define ASMAtomicUoWriteHandle(ph, hNew) \
2670 do { \
2671 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2672 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
2673 } while (0)
2674#else
2675# error HC_ARCH_BITS
2676#endif
2677
2678
2679/**
2680 * Atomically write a value which size might differ
2681 * between platforms or compilers, ordered.
2682 *
2683 * @param pu Pointer to the variable to update.
2684 * @param uNew The value to assign to *pu.
2685 */
2686#define ASMAtomicWriteSize(pu, uNew) \
2687 do { \
2688 switch (sizeof(*(pu))) { \
2689 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2690 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2691 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2692 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2693 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2694 } \
2695 } while (0)
2696
2697/**
2698 * Atomically write a value which size might differ
2699 * between platforms or compilers, unordered.
2700 *
2701 * @param pu Pointer to the variable to update.
2702 * @param uNew The value to assign to *pu.
2703 */
2704#define ASMAtomicUoWriteSize(pu, uNew) \
2705 do { \
2706 switch (sizeof(*(pu))) { \
2707 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2708 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2709 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2710 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2711 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2712 } \
2713 } while (0)
2714
2715
2716
2717/**
2718 * Atomically exchanges and adds to a 16-bit value, ordered.
2719 *
2720 * @returns The old value.
2721 * @param pu16 Pointer to the value.
2722 * @param u16 Number to add.
2723 *
2724 * @remarks Currently not implemented, just to make 16-bit code happy.
2725 * @remarks x86: Requires a 486 or later.
2726 */
2727RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16);
2728
2729
2730/**
2731 * Atomically exchanges and adds to a 32-bit value, ordered.
2732 *
2733 * @returns The old value.
2734 * @param pu32 Pointer to the value.
2735 * @param u32 Number to add.
2736 *
2737 * @remarks x86: Requires a 486 or later.
2738 */
2739#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2740RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
2741#else
2742DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2743{
2744# if RT_INLINE_ASM_USES_INTRIN
2745 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
2746 return u32;
2747
2748# elif RT_INLINE_ASM_GNU_STYLE
2749 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2750 : "=r" (u32),
2751 "=m" (*pu32)
2752 : "0" (u32),
2753 "m" (*pu32)
2754 : "memory");
2755 return u32;
2756# else
2757 __asm
2758 {
2759 mov eax, [u32]
2760# ifdef RT_ARCH_AMD64
2761 mov rdx, [pu32]
2762 lock xadd [rdx], eax
2763# else
2764 mov edx, [pu32]
2765 lock xadd [edx], eax
2766# endif
2767 mov [u32], eax
2768 }
2769 return u32;
2770# endif
2771}
2772#endif
2773
2774
2775/**
2776 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2777 *
2778 * @returns The old value.
2779 * @param pi32 Pointer to the value.
2780 * @param i32 Number to add.
2781 *
2782 * @remarks x86: Requires a 486 or later.
2783 */
2784DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2785{
2786 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
2787}
2788
2789
2790/**
2791 * Atomically exchanges and adds to a 64-bit value, ordered.
2792 *
2793 * @returns The old value.
2794 * @param pu64 Pointer to the value.
2795 * @param u64 Number to add.
2796 *
2797 * @remarks x86: Requires a Pentium or later.
2798 */
2799#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2800DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
2801#else
2802DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2803{
2804# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2805 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
2806 return u64;
2807
2808# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2809 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2810 : "=r" (u64),
2811 "=m" (*pu64)
2812 : "0" (u64),
2813 "m" (*pu64)
2814 : "memory");
2815 return u64;
2816# else
2817 uint64_t u64Old;
2818 for (;;)
2819 {
2820 uint64_t u64New;
2821 u64Old = ASMAtomicUoReadU64(pu64);
2822 u64New = u64Old + u64;
2823 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2824 break;
2825 ASMNopPause();
2826 }
2827 return u64Old;
2828# endif
2829}
2830#endif
2831
2832
2833/**
2834 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2835 *
2836 * @returns The old value.
2837 * @param pi64 Pointer to the value.
2838 * @param i64 Number to add.
2839 *
2840 * @remarks x86: Requires a Pentium or later.
2841 */
2842DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2843{
2844 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
2845}
2846
2847
2848/**
2849 * Atomically exchanges and adds to a size_t value, ordered.
2850 *
2851 * @returns The old value.
2852 * @param pcb Pointer to the size_t value.
2853 * @param cb Number to add.
2854 */
2855DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb)
2856{
2857#if ARCH_BITS == 64
2858 AssertCompileSize(size_t, 8);
2859 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
2860#elif ARCH_BITS == 32
2861 AssertCompileSize(size_t, 4);
2862 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
2863#elif ARCH_BITS == 16
2864 AssertCompileSize(size_t, 2);
2865 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
2866#else
2867# error "Unsupported ARCH_BITS value"
2868#endif
2869}
2870
2871
2872/**
2873 * Atomically exchanges and adds a value which size might differ between
2874 * platforms or compilers, ordered.
2875 *
2876 * @param pu Pointer to the variable to update.
2877 * @param uNew The value to add to *pu.
2878 * @param puOld Where to store the old value.
2879 */
2880#define ASMAtomicAddSize(pu, uNew, puOld) \
2881 do { \
2882 switch (sizeof(*(pu))) { \
2883 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2884 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2885 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2886 } \
2887 } while (0)
2888
2889
2890
2891/**
2892 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2893 *
2894 * @returns The old value.
2895 * @param pu16 Pointer to the value.
2896 * @param u16 Number to subtract.
2897 *
2898 * @remarks x86: Requires a 486 or later.
2899 */
2900DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16)
2901{
2902 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2903}
2904
2905
2906/**
2907 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2908 *
2909 * @returns The old value.
2910 * @param pi16 Pointer to the value.
2911 * @param i16 Number to subtract.
2912 *
2913 * @remarks x86: Requires a 486 or later.
2914 */
2915DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16)
2916{
2917 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
2918}
2919
2920
2921/**
2922 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2923 *
2924 * @returns The old value.
2925 * @param pu32 Pointer to the value.
2926 * @param u32 Number to subtract.
2927 *
2928 * @remarks x86: Requires a 486 or later.
2929 */
2930DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2931{
2932 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2933}
2934
2935
2936/**
2937 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2938 *
2939 * @returns The old value.
2940 * @param pi32 Pointer to the value.
2941 * @param i32 Number to subtract.
2942 *
2943 * @remarks x86: Requires a 486 or later.
2944 */
2945DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2946{
2947 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
2948}
2949
2950
2951/**
2952 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2953 *
2954 * @returns The old value.
2955 * @param pu64 Pointer to the value.
2956 * @param u64 Number to subtract.
2957 *
2958 * @remarks x86: Requires a Pentium or later.
2959 */
2960DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2961{
2962 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2963}
2964
2965
2966/**
2967 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2968 *
2969 * @returns The old value.
2970 * @param pi64 Pointer to the value.
2971 * @param i64 Number to subtract.
2972 *
2973 * @remarks x86: Requires a Pentium or later.
2974 */
2975DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2976{
2977 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
2978}
2979
2980
2981/**
2982 * Atomically exchanges and subtracts to a size_t value, ordered.
2983 *
2984 * @returns The old value.
2985 * @param pcb Pointer to the size_t value.
2986 * @param cb Number to subtract.
2987 *
2988 * @remarks x86: Requires a 486 or later.
2989 */
2990DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb)
2991{
2992#if ARCH_BITS == 64
2993 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
2994#elif ARCH_BITS == 32
2995 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
2996#elif ARCH_BITS == 16
2997 AssertCompileSize(size_t, 2);
2998 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
2999#else
3000# error "Unsupported ARCH_BITS value"
3001#endif
3002}
3003
3004
3005/**
3006 * Atomically exchanges and subtracts a value which size might differ between
3007 * platforms or compilers, ordered.
3008 *
3009 * @param pu Pointer to the variable to update.
3010 * @param uNew The value to subtract to *pu.
3011 * @param puOld Where to store the old value.
3012 *
3013 * @remarks x86: Requires a 486 or later.
3014 */
3015#define ASMAtomicSubSize(pu, uNew, puOld) \
3016 do { \
3017 switch (sizeof(*(pu))) { \
3018 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3019 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3020 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3021 } \
3022 } while (0)
3023
3024
3025
3026/**
3027 * Atomically increment a 16-bit value, ordered.
3028 *
3029 * @returns The new value.
3030 * @param pu16 Pointer to the value to increment.
3031 * @remarks Not implemented. Just to make 16-bit code happy.
3032 *
3033 * @remarks x86: Requires a 486 or later.
3034 */
3035RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16);
3036
3037
3038/**
3039 * Atomically increment a 32-bit value, ordered.
3040 *
3041 * @returns The new value.
3042 * @param pu32 Pointer to the value to increment.
3043 *
3044 * @remarks x86: Requires a 486 or later.
3045 */
3046#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3047RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32);
3048#else
3049DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32)
3050{
3051 uint32_t u32;
3052# if RT_INLINE_ASM_USES_INTRIN
3053 u32 = _InterlockedIncrement((long RT_FAR *)pu32);
3054 return u32;
3055
3056# elif RT_INLINE_ASM_GNU_STYLE
3057 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3058 : "=r" (u32),
3059 "=m" (*pu32)
3060 : "0" (1),
3061 "m" (*pu32)
3062 : "memory");
3063 return u32+1;
3064# else
3065 __asm
3066 {
3067 mov eax, 1
3068# ifdef RT_ARCH_AMD64
3069 mov rdx, [pu32]
3070 lock xadd [rdx], eax
3071# else
3072 mov edx, [pu32]
3073 lock xadd [edx], eax
3074# endif
3075 mov u32, eax
3076 }
3077 return u32+1;
3078# endif
3079}
3080#endif
3081
3082
3083/**
3084 * Atomically increment a signed 32-bit value, ordered.
3085 *
3086 * @returns The new value.
3087 * @param pi32 Pointer to the value to increment.
3088 *
3089 * @remarks x86: Requires a 486 or later.
3090 */
3091DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32)
3092{
3093 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3094}
3095
3096
3097/**
3098 * Atomically increment a 64-bit value, ordered.
3099 *
3100 * @returns The new value.
3101 * @param pu64 Pointer to the value to increment.
3102 *
3103 * @remarks x86: Requires a Pentium or later.
3104 */
3105#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3106DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64);
3107#else
3108DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64)
3109{
3110# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3111 uint64_t u64;
3112 u64 = _InterlockedIncrement64((__int64 RT_FAR *)pu64);
3113 return u64;
3114
3115# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3116 uint64_t u64;
3117 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3118 : "=r" (u64),
3119 "=m" (*pu64)
3120 : "0" (1),
3121 "m" (*pu64)
3122 : "memory");
3123 return u64 + 1;
3124# else
3125 return ASMAtomicAddU64(pu64, 1) + 1;
3126# endif
3127}
3128#endif
3129
3130
3131/**
3132 * Atomically increment a signed 64-bit value, ordered.
3133 *
3134 * @returns The new value.
3135 * @param pi64 Pointer to the value to increment.
3136 *
3137 * @remarks x86: Requires a Pentium or later.
3138 */
3139DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64)
3140{
3141 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3142}
3143
3144
3145/**
3146 * Atomically increment a size_t value, ordered.
3147 *
3148 * @returns The new value.
3149 * @param pcb Pointer to the value to increment.
3150 *
3151 * @remarks x86: Requires a 486 or later.
3152 */
3153DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb)
3154{
3155#if ARCH_BITS == 64
3156 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3157#elif ARCH_BITS == 32
3158 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3159#elif ARCH_BITS == 16
3160 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3161#else
3162# error "Unsupported ARCH_BITS value"
3163#endif
3164}
3165
3166
3167
3168/**
3169 * Atomically decrement an unsigned 32-bit value, ordered.
3170 *
3171 * @returns The new value.
3172 * @param pu16 Pointer to the value to decrement.
3173 * @remarks Not implemented. Just to make 16-bit code happy.
3174 *
3175 * @remarks x86: Requires a 486 or later.
3176 */
3177RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16);
3178
3179
3180/**
3181 * Atomically decrement an unsigned 32-bit value, ordered.
3182 *
3183 * @returns The new value.
3184 * @param pu32 Pointer to the value to decrement.
3185 *
3186 * @remarks x86: Requires a 486 or later.
3187 */
3188#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3189RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32);
3190#else
3191DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32)
3192{
3193 uint32_t u32;
3194# if RT_INLINE_ASM_USES_INTRIN
3195 u32 = _InterlockedDecrement((long RT_FAR *)pu32);
3196 return u32;
3197
3198# elif RT_INLINE_ASM_GNU_STYLE
3199 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3200 : "=r" (u32),
3201 "=m" (*pu32)
3202 : "0" (-1),
3203 "m" (*pu32)
3204 : "memory");
3205 return u32-1;
3206# else
3207 __asm
3208 {
3209 mov eax, -1
3210# ifdef RT_ARCH_AMD64
3211 mov rdx, [pu32]
3212 lock xadd [rdx], eax
3213# else
3214 mov edx, [pu32]
3215 lock xadd [edx], eax
3216# endif
3217 mov u32, eax
3218 }
3219 return u32-1;
3220# endif
3221}
3222#endif
3223
3224
3225/**
3226 * Atomically decrement a signed 32-bit value, ordered.
3227 *
3228 * @returns The new value.
3229 * @param pi32 Pointer to the value to decrement.
3230 *
3231 * @remarks x86: Requires a 486 or later.
3232 */
3233DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32)
3234{
3235 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
3236}
3237
3238
3239/**
3240 * Atomically decrement an unsigned 64-bit value, ordered.
3241 *
3242 * @returns The new value.
3243 * @param pu64 Pointer to the value to decrement.
3244 *
3245 * @remarks x86: Requires a Pentium or later.
3246 */
3247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3248RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64);
3249#else
3250DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64)
3251{
3252# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3253 uint64_t u64 = _InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
3254 return u64;
3255
3256# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3257 uint64_t u64;
3258 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3259 : "=r" (u64),
3260 "=m" (*pu64)
3261 : "0" (~(uint64_t)0),
3262 "m" (*pu64)
3263 : "memory");
3264 return u64-1;
3265# else
3266 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3267# endif
3268}
3269#endif
3270
3271
3272/**
3273 * Atomically decrement a signed 64-bit value, ordered.
3274 *
3275 * @returns The new value.
3276 * @param pi64 Pointer to the value to decrement.
3277 *
3278 * @remarks x86: Requires a Pentium or later.
3279 */
3280DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64)
3281{
3282 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
3283}
3284
3285
3286/**
3287 * Atomically decrement a size_t value, ordered.
3288 *
3289 * @returns The new value.
3290 * @param pcb Pointer to the value to decrement.
3291 *
3292 * @remarks x86: Requires a 486 or later.
3293 */
3294DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb)
3295{
3296#if ARCH_BITS == 64
3297 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
3298#elif ARCH_BITS == 32
3299 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
3300#elif ARCH_BITS == 16
3301 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
3302#else
3303# error "Unsupported ARCH_BITS value"
3304#endif
3305}
3306
3307
3308/**
3309 * Atomically Or an unsigned 32-bit value, ordered.
3310 *
3311 * @param pu32 Pointer to the pointer variable to OR u32 with.
3312 * @param u32 The value to OR *pu32 with.
3313 *
3314 * @remarks x86: Requires a 386 or later.
3315 */
3316#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3317RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3318#else
3319DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3320{
3321# if RT_INLINE_ASM_USES_INTRIN
3322 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
3323
3324# elif RT_INLINE_ASM_GNU_STYLE
3325 __asm__ __volatile__("lock; orl %1, %0\n\t"
3326 : "=m" (*pu32)
3327 : "ir" (u32),
3328 "m" (*pu32));
3329# else
3330 __asm
3331 {
3332 mov eax, [u32]
3333# ifdef RT_ARCH_AMD64
3334 mov rdx, [pu32]
3335 lock or [rdx], eax
3336# else
3337 mov edx, [pu32]
3338 lock or [edx], eax
3339# endif
3340 }
3341# endif
3342}
3343#endif
3344
3345
3346/**
3347 * Atomically Or a signed 32-bit value, ordered.
3348 *
3349 * @param pi32 Pointer to the pointer variable to OR u32 with.
3350 * @param i32 The value to OR *pu32 with.
3351 *
3352 * @remarks x86: Requires a 386 or later.
3353 */
3354DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3355{
3356 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3357}
3358
3359
3360/**
3361 * Atomically Or an unsigned 64-bit value, ordered.
3362 *
3363 * @param pu64 Pointer to the pointer variable to OR u64 with.
3364 * @param u64 The value to OR *pu64 with.
3365 *
3366 * @remarks x86: Requires a Pentium or later.
3367 */
3368#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3369DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3370#else
3371DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3372{
3373# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3374 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
3375
3376# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3377 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3378 : "=m" (*pu64)
3379 : "r" (u64),
3380 "m" (*pu64));
3381# else
3382 for (;;)
3383 {
3384 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3385 uint64_t u64New = u64Old | u64;
3386 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3387 break;
3388 ASMNopPause();
3389 }
3390# endif
3391}
3392#endif
3393
3394
3395/**
3396 * Atomically Or a signed 64-bit value, ordered.
3397 *
3398 * @param pi64 Pointer to the pointer variable to OR u64 with.
3399 * @param i64 The value to OR *pu64 with.
3400 *
3401 * @remarks x86: Requires a Pentium or later.
3402 */
3403DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3404{
3405 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3406}
3407
3408
3409/**
3410 * Atomically And an unsigned 32-bit value, ordered.
3411 *
3412 * @param pu32 Pointer to the pointer variable to AND u32 with.
3413 * @param u32 The value to AND *pu32 with.
3414 *
3415 * @remarks x86: Requires a 386 or later.
3416 */
3417#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3418RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3419#else
3420DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3421{
3422# if RT_INLINE_ASM_USES_INTRIN
3423 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
3424
3425# elif RT_INLINE_ASM_GNU_STYLE
3426 __asm__ __volatile__("lock; andl %1, %0\n\t"
3427 : "=m" (*pu32)
3428 : "ir" (u32),
3429 "m" (*pu32));
3430# else
3431 __asm
3432 {
3433 mov eax, [u32]
3434# ifdef RT_ARCH_AMD64
3435 mov rdx, [pu32]
3436 lock and [rdx], eax
3437# else
3438 mov edx, [pu32]
3439 lock and [edx], eax
3440# endif
3441 }
3442# endif
3443}
3444#endif
3445
3446
3447/**
3448 * Atomically And a signed 32-bit value, ordered.
3449 *
3450 * @param pi32 Pointer to the pointer variable to AND i32 with.
3451 * @param i32 The value to AND *pi32 with.
3452 *
3453 * @remarks x86: Requires a 386 or later.
3454 */
3455DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3456{
3457 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3458}
3459
3460
3461/**
3462 * Atomically And an unsigned 64-bit value, ordered.
3463 *
3464 * @param pu64 Pointer to the pointer variable to AND u64 with.
3465 * @param u64 The value to AND *pu64 with.
3466 *
3467 * @remarks x86: Requires a Pentium or later.
3468 */
3469#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3470DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3471#else
3472DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3473{
3474# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3475 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
3476
3477# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3478 __asm__ __volatile__("lock; andq %1, %0\n\t"
3479 : "=m" (*pu64)
3480 : "r" (u64),
3481 "m" (*pu64));
3482# else
3483 for (;;)
3484 {
3485 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3486 uint64_t u64New = u64Old & u64;
3487 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3488 break;
3489 ASMNopPause();
3490 }
3491# endif
3492}
3493#endif
3494
3495
3496/**
3497 * Atomically And a signed 64-bit value, ordered.
3498 *
3499 * @param pi64 Pointer to the pointer variable to AND i64 with.
3500 * @param i64 The value to AND *pi64 with.
3501 *
3502 * @remarks x86: Requires a Pentium or later.
3503 */
3504DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3505{
3506 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3507}
3508
3509
3510/**
3511 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3512 *
3513 * @param pu32 Pointer to the pointer variable to OR u32 with.
3514 * @param u32 The value to OR *pu32 with.
3515 *
3516 * @remarks x86: Requires a 386 or later.
3517 */
3518#if RT_INLINE_ASM_EXTERNAL
3519RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3520#else
3521DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3522{
3523# if RT_INLINE_ASM_GNU_STYLE
3524 __asm__ __volatile__("orl %1, %0\n\t"
3525 : "=m" (*pu32)
3526 : "ir" (u32),
3527 "m" (*pu32));
3528# else
3529 __asm
3530 {
3531 mov eax, [u32]
3532# ifdef RT_ARCH_AMD64
3533 mov rdx, [pu32]
3534 or [rdx], eax
3535# else
3536 mov edx, [pu32]
3537 or [edx], eax
3538# endif
3539 }
3540# endif
3541}
3542#endif
3543
3544
3545/**
3546 * Atomically OR a signed 32-bit value, unordered.
3547 *
3548 * @param pi32 Pointer to the pointer variable to OR u32 with.
3549 * @param i32 The value to OR *pu32 with.
3550 *
3551 * @remarks x86: Requires a 386 or later.
3552 */
3553DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3554{
3555 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3556}
3557
3558
3559/**
3560 * Atomically OR an unsigned 64-bit value, unordered.
3561 *
3562 * @param pu64 Pointer to the pointer variable to OR u64 with.
3563 * @param u64 The value to OR *pu64 with.
3564 *
3565 * @remarks x86: Requires a Pentium or later.
3566 */
3567#if RT_INLINE_ASM_EXTERNAL
3568DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3569#else
3570DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3571{
3572# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3573 __asm__ __volatile__("orq %1, %q0\n\t"
3574 : "=m" (*pu64)
3575 : "r" (u64),
3576 "m" (*pu64));
3577# else
3578 for (;;)
3579 {
3580 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3581 uint64_t u64New = u64Old | u64;
3582 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3583 break;
3584 ASMNopPause();
3585 }
3586# endif
3587}
3588#endif
3589
3590
3591/**
3592 * Atomically Or a signed 64-bit value, unordered.
3593 *
3594 * @param pi64 Pointer to the pointer variable to OR u64 with.
3595 * @param i64 The value to OR *pu64 with.
3596 *
3597 * @remarks x86: Requires a Pentium or later.
3598 */
3599DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3600{
3601 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3602}
3603
3604
3605/**
3606 * Atomically And an unsigned 32-bit value, unordered.
3607 *
3608 * @param pu32 Pointer to the pointer variable to AND u32 with.
3609 * @param u32 The value to AND *pu32 with.
3610 *
3611 * @remarks x86: Requires a 386 or later.
3612 */
3613#if RT_INLINE_ASM_EXTERNAL
3614RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3615#else
3616DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3617{
3618# if RT_INLINE_ASM_GNU_STYLE
3619 __asm__ __volatile__("andl %1, %0\n\t"
3620 : "=m" (*pu32)
3621 : "ir" (u32),
3622 "m" (*pu32));
3623# else
3624 __asm
3625 {
3626 mov eax, [u32]
3627# ifdef RT_ARCH_AMD64
3628 mov rdx, [pu32]
3629 and [rdx], eax
3630# else
3631 mov edx, [pu32]
3632 and [edx], eax
3633# endif
3634 }
3635# endif
3636}
3637#endif
3638
3639
3640/**
3641 * Atomically And a signed 32-bit value, unordered.
3642 *
3643 * @param pi32 Pointer to the pointer variable to AND i32 with.
3644 * @param i32 The value to AND *pi32 with.
3645 *
3646 * @remarks x86: Requires a 386 or later.
3647 */
3648DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3649{
3650 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3651}
3652
3653
3654/**
3655 * Atomically And an unsigned 64-bit value, unordered.
3656 *
3657 * @param pu64 Pointer to the pointer variable to AND u64 with.
3658 * @param u64 The value to AND *pu64 with.
3659 *
3660 * @remarks x86: Requires a Pentium or later.
3661 */
3662#if RT_INLINE_ASM_EXTERNAL
3663DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3664#else
3665DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3666{
3667# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3668 __asm__ __volatile__("andq %1, %0\n\t"
3669 : "=m" (*pu64)
3670 : "r" (u64),
3671 "m" (*pu64));
3672# else
3673 for (;;)
3674 {
3675 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3676 uint64_t u64New = u64Old & u64;
3677 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3678 break;
3679 ASMNopPause();
3680 }
3681# endif
3682}
3683#endif
3684
3685
3686/**
3687 * Atomically And a signed 64-bit value, unordered.
3688 *
3689 * @param pi64 Pointer to the pointer variable to AND i64 with.
3690 * @param i64 The value to AND *pi64 with.
3691 *
3692 * @remarks x86: Requires a Pentium or later.
3693 */
3694DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3695{
3696 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3697}
3698
3699
3700/**
3701 * Atomically increment an unsigned 32-bit value, unordered.
3702 *
3703 * @returns the new value.
3704 * @param pu32 Pointer to the variable to increment.
3705 *
3706 * @remarks x86: Requires a 486 or later.
3707 */
3708#if RT_INLINE_ASM_EXTERNAL
3709RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32);
3710#else
3711DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32)
3712{
3713 uint32_t u32;
3714# if RT_INLINE_ASM_GNU_STYLE
3715 __asm__ __volatile__("xaddl %0, %1\n\t"
3716 : "=r" (u32),
3717 "=m" (*pu32)
3718 : "0" (1),
3719 "m" (*pu32)
3720 : "memory");
3721 return u32 + 1;
3722# else
3723 __asm
3724 {
3725 mov eax, 1
3726# ifdef RT_ARCH_AMD64
3727 mov rdx, [pu32]
3728 xadd [rdx], eax
3729# else
3730 mov edx, [pu32]
3731 xadd [edx], eax
3732# endif
3733 mov u32, eax
3734 }
3735 return u32 + 1;
3736# endif
3737}
3738#endif
3739
3740
3741/**
3742 * Atomically decrement an unsigned 32-bit value, unordered.
3743 *
3744 * @returns the new value.
3745 * @param pu32 Pointer to the variable to decrement.
3746 *
3747 * @remarks x86: Requires a 486 or later.
3748 */
3749#if RT_INLINE_ASM_EXTERNAL
3750RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32);
3751#else
3752DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32)
3753{
3754 uint32_t u32;
3755# if RT_INLINE_ASM_GNU_STYLE
3756 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3757 : "=r" (u32),
3758 "=m" (*pu32)
3759 : "0" (-1),
3760 "m" (*pu32)
3761 : "memory");
3762 return u32 - 1;
3763# else
3764 __asm
3765 {
3766 mov eax, -1
3767# ifdef RT_ARCH_AMD64
3768 mov rdx, [pu32]
3769 xadd [rdx], eax
3770# else
3771 mov edx, [pu32]
3772 xadd [edx], eax
3773# endif
3774 mov u32, eax
3775 }
3776 return u32 - 1;
3777# endif
3778}
3779#endif
3780
3781
3782/** @def RT_ASM_PAGE_SIZE
3783 * We try avoid dragging in iprt/param.h here.
3784 * @internal
3785 */
3786#if defined(RT_ARCH_SPARC64)
3787# define RT_ASM_PAGE_SIZE 0x2000
3788# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3789# if PAGE_SIZE != 0x2000
3790# error "PAGE_SIZE is not 0x2000!"
3791# endif
3792# endif
3793#else
3794# define RT_ASM_PAGE_SIZE 0x1000
3795# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3796# if PAGE_SIZE != 0x1000
3797# error "PAGE_SIZE is not 0x1000!"
3798# endif
3799# endif
3800#endif
3801
3802/**
3803 * Zeros a 4K memory page.
3804 *
3805 * @param pv Pointer to the memory block. This must be page aligned.
3806 */
3807#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3808RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv);
3809# else
3810DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv)
3811{
3812# if RT_INLINE_ASM_USES_INTRIN
3813# ifdef RT_ARCH_AMD64
3814 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3815# else
3816 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3817# endif
3818
3819# elif RT_INLINE_ASM_GNU_STYLE
3820 RTCCUINTREG uDummy;
3821# ifdef RT_ARCH_AMD64
3822 __asm__ __volatile__("rep stosq"
3823 : "=D" (pv),
3824 "=c" (uDummy)
3825 : "0" (pv),
3826 "c" (RT_ASM_PAGE_SIZE >> 3),
3827 "a" (0)
3828 : "memory");
3829# else
3830 __asm__ __volatile__("rep stosl"
3831 : "=D" (pv),
3832 "=c" (uDummy)
3833 : "0" (pv),
3834 "c" (RT_ASM_PAGE_SIZE >> 2),
3835 "a" (0)
3836 : "memory");
3837# endif
3838# else
3839 __asm
3840 {
3841# ifdef RT_ARCH_AMD64
3842 xor rax, rax
3843 mov ecx, 0200h
3844 mov rdi, [pv]
3845 rep stosq
3846# else
3847 xor eax, eax
3848 mov ecx, 0400h
3849 mov edi, [pv]
3850 rep stosd
3851# endif
3852 }
3853# endif
3854}
3855# endif
3856
3857
3858/**
3859 * Zeros a memory block with a 32-bit aligned size.
3860 *
3861 * @param pv Pointer to the memory block.
3862 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3863 */
3864#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3865RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb);
3866#else
3867DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb)
3868{
3869# if RT_INLINE_ASM_USES_INTRIN
3870# ifdef RT_ARCH_AMD64
3871 if (!(cb & 7))
3872 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
3873 else
3874# endif
3875 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
3876
3877# elif RT_INLINE_ASM_GNU_STYLE
3878 __asm__ __volatile__("rep stosl"
3879 : "=D" (pv),
3880 "=c" (cb)
3881 : "0" (pv),
3882 "1" (cb >> 2),
3883 "a" (0)
3884 : "memory");
3885# else
3886 __asm
3887 {
3888 xor eax, eax
3889# ifdef RT_ARCH_AMD64
3890 mov rcx, [cb]
3891 shr rcx, 2
3892 mov rdi, [pv]
3893# else
3894 mov ecx, [cb]
3895 shr ecx, 2
3896 mov edi, [pv]
3897# endif
3898 rep stosd
3899 }
3900# endif
3901}
3902#endif
3903
3904
3905/**
3906 * Fills a memory block with a 32-bit aligned size.
3907 *
3908 * @param pv Pointer to the memory block.
3909 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3910 * @param u32 The value to fill with.
3911 */
3912#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3913RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32);
3914#else
3915DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32)
3916{
3917# if RT_INLINE_ASM_USES_INTRIN
3918# ifdef RT_ARCH_AMD64
3919 if (!(cb & 7))
3920 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3921 else
3922# endif
3923 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
3924
3925# elif RT_INLINE_ASM_GNU_STYLE
3926 __asm__ __volatile__("rep stosl"
3927 : "=D" (pv),
3928 "=c" (cb)
3929 : "0" (pv),
3930 "1" (cb >> 2),
3931 "a" (u32)
3932 : "memory");
3933# else
3934 __asm
3935 {
3936# ifdef RT_ARCH_AMD64
3937 mov rcx, [cb]
3938 shr rcx, 2
3939 mov rdi, [pv]
3940# else
3941 mov ecx, [cb]
3942 shr ecx, 2
3943 mov edi, [pv]
3944# endif
3945 mov eax, [u32]
3946 rep stosd
3947 }
3948# endif
3949}
3950#endif
3951
3952
3953/**
3954 * Checks if a memory block is all zeros.
3955 *
3956 * @returns Pointer to the first non-zero byte.
3957 * @returns NULL if all zero.
3958 *
3959 * @param pv Pointer to the memory block.
3960 * @param cb Number of bytes in the block.
3961 *
3962 * @todo Fix name, it is a predicate function but it's not returning boolean!
3963 */
3964#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3965 && !defined(RT_ARCH_SPARC64) \
3966 && !defined(RT_ARCH_SPARC)
3967DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb);
3968#else
3969DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb)
3970{
3971 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
3972 for (; cb; cb--, pb++)
3973 if (RT_LIKELY(*pb == 0))
3974 { /* likely */ }
3975 else
3976 return (void RT_FAR *)pb;
3977 return NULL;
3978}
3979#endif
3980
3981
3982/**
3983 * Checks if a memory block is all zeros.
3984 *
3985 * @returns true if zero, false if not.
3986 *
3987 * @param pv Pointer to the memory block.
3988 * @param cb Number of bytes in the block.
3989 *
3990 * @sa ASMMemFirstNonZero
3991 */
3992DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb)
3993{
3994 return ASMMemFirstNonZero(pv, cb) == NULL;
3995}
3996
3997
3998/**
3999 * Checks if a memory page is all zeros.
4000 *
4001 * @returns true / false.
4002 *
4003 * @param pvPage Pointer to the page. Must be aligned on 16 byte
4004 * boundary
4005 */
4006DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage)
4007{
4008# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
4009 union { RTCCUINTREG r; bool f; } uAX;
4010 RTCCUINTREG xCX, xDI;
4011 Assert(!((uintptr_t)pvPage & 15));
4012 __asm__ __volatile__("repe; "
4013# ifdef RT_ARCH_AMD64
4014 "scasq\n\t"
4015# else
4016 "scasl\n\t"
4017# endif
4018 "setnc %%al\n\t"
4019 : "=&c" (xCX),
4020 "=&D" (xDI),
4021 "=&a" (uAX.r)
4022 : "mr" (pvPage),
4023# ifdef RT_ARCH_AMD64
4024 "0" (RT_ASM_PAGE_SIZE/8),
4025# else
4026 "0" (RT_ASM_PAGE_SIZE/4),
4027# endif
4028 "1" (pvPage),
4029 "2" (0));
4030 return uAX.f;
4031# else
4032 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
4033 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
4034 Assert(!((uintptr_t)pvPage & 15));
4035 for (;;)
4036 {
4037 if (puPtr[0]) return false;
4038 if (puPtr[4]) return false;
4039
4040 if (puPtr[2]) return false;
4041 if (puPtr[6]) return false;
4042
4043 if (puPtr[1]) return false;
4044 if (puPtr[5]) return false;
4045
4046 if (puPtr[3]) return false;
4047 if (puPtr[7]) return false;
4048
4049 if (!--cLeft)
4050 return true;
4051 puPtr += 8;
4052 }
4053# endif
4054}
4055
4056
4057/**
4058 * Checks if a memory block is filled with the specified byte, returning the
4059 * first mismatch.
4060 *
4061 * This is sort of an inverted memchr.
4062 *
4063 * @returns Pointer to the byte which doesn't equal u8.
4064 * @returns NULL if all equal to u8.
4065 *
4066 * @param pv Pointer to the memory block.
4067 * @param cb Number of bytes in the block.
4068 * @param u8 The value it's supposed to be filled with.
4069 *
4070 * @remarks No alignment requirements.
4071 */
4072#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
4073 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL)) \
4074 && !defined(RT_ARCH_SPARC64) \
4075 && !defined(RT_ARCH_SPARC)
4076DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8);
4077#else
4078DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4079{
4080 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4081 for (; cb; cb--, pb++)
4082 if (RT_LIKELY(*pb == u8))
4083 { /* likely */ }
4084 else
4085 return (void *)pb;
4086 return NULL;
4087}
4088#endif
4089
4090
4091/**
4092 * Checks if a memory block is filled with the specified byte.
4093 *
4094 * @returns true if all matching, false if not.
4095 *
4096 * @param pv Pointer to the memory block.
4097 * @param cb Number of bytes in the block.
4098 * @param u8 The value it's supposed to be filled with.
4099 *
4100 * @remarks No alignment requirements.
4101 */
4102DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4103{
4104 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4105}
4106
4107
4108/**
4109 * Checks if a memory block is filled with the specified 32-bit value.
4110 *
4111 * This is a sort of inverted memchr.
4112 *
4113 * @returns Pointer to the first value which doesn't equal u32.
4114 * @returns NULL if all equal to u32.
4115 *
4116 * @param pv Pointer to the memory block.
4117 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4118 * @param u32 The value it's supposed to be filled with.
4119 */
4120DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32)
4121{
4122/** @todo rewrite this in inline assembly? */
4123 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
4124 for (; cb; cb -= 4, pu32++)
4125 if (RT_LIKELY(*pu32 == u32))
4126 { /* likely */ }
4127 else
4128 return (uint32_t RT_FAR *)pu32;
4129 return NULL;
4130}
4131
4132
4133/**
4134 * Probes a byte pointer for read access.
4135 *
4136 * While the function will not fault if the byte is not read accessible,
4137 * the idea is to do this in a safe place like before acquiring locks
4138 * and such like.
4139 *
4140 * Also, this functions guarantees that an eager compiler is not going
4141 * to optimize the probing away.
4142 *
4143 * @param pvByte Pointer to the byte.
4144 */
4145#if RT_INLINE_ASM_EXTERNAL
4146RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte);
4147#else
4148DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte)
4149{
4150 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4151 uint8_t u8;
4152# if RT_INLINE_ASM_GNU_STYLE
4153 __asm__ __volatile__("movb (%1), %0\n\t"
4154 : "=r" (u8)
4155 : "r" (pvByte));
4156# else
4157 __asm
4158 {
4159# ifdef RT_ARCH_AMD64
4160 mov rax, [pvByte]
4161 mov al, [rax]
4162# else
4163 mov eax, [pvByte]
4164 mov al, [eax]
4165# endif
4166 mov [u8], al
4167 }
4168# endif
4169 return u8;
4170}
4171#endif
4172
4173/**
4174 * Probes a buffer for read access page by page.
4175 *
4176 * While the function will fault if the buffer is not fully read
4177 * accessible, the idea is to do this in a safe place like before
4178 * acquiring locks and such like.
4179 *
4180 * Also, this functions guarantees that an eager compiler is not going
4181 * to optimize the probing away.
4182 *
4183 * @param pvBuf Pointer to the buffer.
4184 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4185 */
4186DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf)
4187{
4188 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4189 /* the first byte */
4190 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
4191 ASMProbeReadByte(pu8);
4192
4193 /* the pages in between pages. */
4194 while (cbBuf > RT_ASM_PAGE_SIZE)
4195 {
4196 ASMProbeReadByte(pu8);
4197 cbBuf -= RT_ASM_PAGE_SIZE;
4198 pu8 += RT_ASM_PAGE_SIZE;
4199 }
4200
4201 /* the last byte */
4202 ASMProbeReadByte(pu8 + cbBuf - 1);
4203}
4204
4205
4206
4207/** @defgroup grp_inline_bits Bit Operations
4208 * @{
4209 */
4210
4211
4212/**
4213 * Sets a bit in a bitmap.
4214 *
4215 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4216 * @param iBit The bit to set.
4217 *
4218 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4219 * However, doing so will yield better performance as well as avoiding
4220 * traps accessing the last bits in the bitmap.
4221 */
4222#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4223RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4224#else
4225DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4226{
4227# if RT_INLINE_ASM_USES_INTRIN
4228 _bittestandset((long RT_FAR *)pvBitmap, iBit);
4229
4230# elif RT_INLINE_ASM_GNU_STYLE
4231 __asm__ __volatile__("btsl %1, %0"
4232 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4233 : "Ir" (iBit),
4234 "m" (*(volatile long RT_FAR *)pvBitmap)
4235 : "memory");
4236# else
4237 __asm
4238 {
4239# ifdef RT_ARCH_AMD64
4240 mov rax, [pvBitmap]
4241 mov edx, [iBit]
4242 bts [rax], edx
4243# else
4244 mov eax, [pvBitmap]
4245 mov edx, [iBit]
4246 bts [eax], edx
4247# endif
4248 }
4249# endif
4250}
4251#endif
4252
4253
4254/**
4255 * Atomically sets a bit in a bitmap, ordered.
4256 *
4257 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4258 * the memory access isn't atomic!
4259 * @param iBit The bit to set.
4260 *
4261 * @remarks x86: Requires a 386 or later.
4262 */
4263#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4264RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4265#else
4266DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4267{
4268 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4269# if RT_INLINE_ASM_USES_INTRIN
4270 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4271# elif RT_INLINE_ASM_GNU_STYLE
4272 __asm__ __volatile__("lock; btsl %1, %0"
4273 : "=m" (*(volatile long *)pvBitmap)
4274 : "Ir" (iBit),
4275 "m" (*(volatile long *)pvBitmap)
4276 : "memory");
4277# else
4278 __asm
4279 {
4280# ifdef RT_ARCH_AMD64
4281 mov rax, [pvBitmap]
4282 mov edx, [iBit]
4283 lock bts [rax], edx
4284# else
4285 mov eax, [pvBitmap]
4286 mov edx, [iBit]
4287 lock bts [eax], edx
4288# endif
4289 }
4290# endif
4291}
4292#endif
4293
4294
4295/**
4296 * Clears a bit in a bitmap.
4297 *
4298 * @param pvBitmap Pointer to the bitmap.
4299 * @param iBit The bit to clear.
4300 *
4301 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4302 * However, doing so will yield better performance as well as avoiding
4303 * traps accessing the last bits in the bitmap.
4304 */
4305#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4306RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4307#else
4308DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4309{
4310# if RT_INLINE_ASM_USES_INTRIN
4311 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4312
4313# elif RT_INLINE_ASM_GNU_STYLE
4314 __asm__ __volatile__("btrl %1, %0"
4315 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4316 : "Ir" (iBit),
4317 "m" (*(volatile long RT_FAR *)pvBitmap)
4318 : "memory");
4319# else
4320 __asm
4321 {
4322# ifdef RT_ARCH_AMD64
4323 mov rax, [pvBitmap]
4324 mov edx, [iBit]
4325 btr [rax], edx
4326# else
4327 mov eax, [pvBitmap]
4328 mov edx, [iBit]
4329 btr [eax], edx
4330# endif
4331 }
4332# endif
4333}
4334#endif
4335
4336
4337/**
4338 * Atomically clears a bit in a bitmap, ordered.
4339 *
4340 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4341 * the memory access isn't atomic!
4342 * @param iBit The bit to toggle set.
4343 *
4344 * @remarks No memory barrier, take care on smp.
4345 * @remarks x86: Requires a 386 or later.
4346 */
4347#if RT_INLINE_ASM_EXTERNAL
4348RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4349#else
4350DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4351{
4352 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4353# if RT_INLINE_ASM_GNU_STYLE
4354 __asm__ __volatile__("lock; btrl %1, %0"
4355 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4356 : "Ir" (iBit),
4357 "m" (*(volatile long RT_FAR *)pvBitmap)
4358 : "memory");
4359# else
4360 __asm
4361 {
4362# ifdef RT_ARCH_AMD64
4363 mov rax, [pvBitmap]
4364 mov edx, [iBit]
4365 lock btr [rax], edx
4366# else
4367 mov eax, [pvBitmap]
4368 mov edx, [iBit]
4369 lock btr [eax], edx
4370# endif
4371 }
4372# endif
4373}
4374#endif
4375
4376
4377/**
4378 * Toggles a bit in a bitmap.
4379 *
4380 * @param pvBitmap Pointer to the bitmap.
4381 * @param iBit The bit to toggle.
4382 *
4383 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4384 * However, doing so will yield better performance as well as avoiding
4385 * traps accessing the last bits in the bitmap.
4386 */
4387#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4388RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4389#else
4390DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4391{
4392# if RT_INLINE_ASM_USES_INTRIN
4393 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4394# elif RT_INLINE_ASM_GNU_STYLE
4395 __asm__ __volatile__("btcl %1, %0"
4396 : "=m" (*(volatile long *)pvBitmap)
4397 : "Ir" (iBit),
4398 "m" (*(volatile long *)pvBitmap)
4399 : "memory");
4400# else
4401 __asm
4402 {
4403# ifdef RT_ARCH_AMD64
4404 mov rax, [pvBitmap]
4405 mov edx, [iBit]
4406 btc [rax], edx
4407# else
4408 mov eax, [pvBitmap]
4409 mov edx, [iBit]
4410 btc [eax], edx
4411# endif
4412 }
4413# endif
4414}
4415#endif
4416
4417
4418/**
4419 * Atomically toggles a bit in a bitmap, ordered.
4420 *
4421 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4422 * the memory access isn't atomic!
4423 * @param iBit The bit to test and set.
4424 *
4425 * @remarks x86: Requires a 386 or later.
4426 */
4427#if RT_INLINE_ASM_EXTERNAL
4428RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4429#else
4430DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4431{
4432 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4433# if RT_INLINE_ASM_GNU_STYLE
4434 __asm__ __volatile__("lock; btcl %1, %0"
4435 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4436 : "Ir" (iBit),
4437 "m" (*(volatile long RT_FAR *)pvBitmap)
4438 : "memory");
4439# else
4440 __asm
4441 {
4442# ifdef RT_ARCH_AMD64
4443 mov rax, [pvBitmap]
4444 mov edx, [iBit]
4445 lock btc [rax], edx
4446# else
4447 mov eax, [pvBitmap]
4448 mov edx, [iBit]
4449 lock btc [eax], edx
4450# endif
4451 }
4452# endif
4453}
4454#endif
4455
4456
4457/**
4458 * Tests and sets a bit in a bitmap.
4459 *
4460 * @returns true if the bit was set.
4461 * @returns false if the bit was clear.
4462 *
4463 * @param pvBitmap Pointer to the bitmap.
4464 * @param iBit The bit to test and set.
4465 *
4466 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4467 * However, doing so will yield better performance as well as avoiding
4468 * traps accessing the last bits in the bitmap.
4469 */
4470#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4471RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4472#else
4473DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4474{
4475 union { bool f; uint32_t u32; uint8_t u8; } rc;
4476# if RT_INLINE_ASM_USES_INTRIN
4477 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
4478
4479# elif RT_INLINE_ASM_GNU_STYLE
4480 __asm__ __volatile__("btsl %2, %1\n\t"
4481 "setc %b0\n\t"
4482 "andl $1, %0\n\t"
4483 : "=q" (rc.u32),
4484 "=m" (*(volatile long RT_FAR *)pvBitmap)
4485 : "Ir" (iBit),
4486 "m" (*(volatile long RT_FAR *)pvBitmap)
4487 : "memory");
4488# else
4489 __asm
4490 {
4491 mov edx, [iBit]
4492# ifdef RT_ARCH_AMD64
4493 mov rax, [pvBitmap]
4494 bts [rax], edx
4495# else
4496 mov eax, [pvBitmap]
4497 bts [eax], edx
4498# endif
4499 setc al
4500 and eax, 1
4501 mov [rc.u32], eax
4502 }
4503# endif
4504 return rc.f;
4505}
4506#endif
4507
4508
4509/**
4510 * Atomically tests and sets a bit in a bitmap, ordered.
4511 *
4512 * @returns true if the bit was set.
4513 * @returns false if the bit was clear.
4514 *
4515 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4516 * the memory access isn't atomic!
4517 * @param iBit The bit to set.
4518 *
4519 * @remarks x86: Requires a 386 or later.
4520 */
4521#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4522RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4523#else
4524DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4525{
4526 union { bool f; uint32_t u32; uint8_t u8; } rc;
4527 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4528# if RT_INLINE_ASM_USES_INTRIN
4529 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4530# elif RT_INLINE_ASM_GNU_STYLE
4531 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4532 "setc %b0\n\t"
4533 "andl $1, %0\n\t"
4534 : "=q" (rc.u32),
4535 "=m" (*(volatile long RT_FAR *)pvBitmap)
4536 : "Ir" (iBit),
4537 "m" (*(volatile long RT_FAR *)pvBitmap)
4538 : "memory");
4539# else
4540 __asm
4541 {
4542 mov edx, [iBit]
4543# ifdef RT_ARCH_AMD64
4544 mov rax, [pvBitmap]
4545 lock bts [rax], edx
4546# else
4547 mov eax, [pvBitmap]
4548 lock bts [eax], edx
4549# endif
4550 setc al
4551 and eax, 1
4552 mov [rc.u32], eax
4553 }
4554# endif
4555 return rc.f;
4556}
4557#endif
4558
4559
4560/**
4561 * Tests and clears a bit in a bitmap.
4562 *
4563 * @returns true if the bit was set.
4564 * @returns false if the bit was clear.
4565 *
4566 * @param pvBitmap Pointer to the bitmap.
4567 * @param iBit The bit to test and clear.
4568 *
4569 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4570 * However, doing so will yield better performance as well as avoiding
4571 * traps accessing the last bits in the bitmap.
4572 */
4573#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4574RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4575#else
4576DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4577{
4578 union { bool f; uint32_t u32; uint8_t u8; } rc;
4579# if RT_INLINE_ASM_USES_INTRIN
4580 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4581
4582# elif RT_INLINE_ASM_GNU_STYLE
4583 __asm__ __volatile__("btrl %2, %1\n\t"
4584 "setc %b0\n\t"
4585 "andl $1, %0\n\t"
4586 : "=q" (rc.u32),
4587 "=m" (*(volatile long RT_FAR *)pvBitmap)
4588 : "Ir" (iBit),
4589 "m" (*(volatile long RT_FAR *)pvBitmap)
4590 : "memory");
4591# else
4592 __asm
4593 {
4594 mov edx, [iBit]
4595# ifdef RT_ARCH_AMD64
4596 mov rax, [pvBitmap]
4597 btr [rax], edx
4598# else
4599 mov eax, [pvBitmap]
4600 btr [eax], edx
4601# endif
4602 setc al
4603 and eax, 1
4604 mov [rc.u32], eax
4605 }
4606# endif
4607 return rc.f;
4608}
4609#endif
4610
4611
4612/**
4613 * Atomically tests and clears a bit in a bitmap, ordered.
4614 *
4615 * @returns true if the bit was set.
4616 * @returns false if the bit was clear.
4617 *
4618 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4619 * the memory access isn't atomic!
4620 * @param iBit The bit to test and clear.
4621 *
4622 * @remarks No memory barrier, take care on smp.
4623 * @remarks x86: Requires a 386 or later.
4624 */
4625#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4626RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4627#else
4628DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4629{
4630 union { bool f; uint32_t u32; uint8_t u8; } rc;
4631 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4632# if RT_INLINE_ASM_USES_INTRIN
4633 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
4634
4635# elif RT_INLINE_ASM_GNU_STYLE
4636 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4637 "setc %b0\n\t"
4638 "andl $1, %0\n\t"
4639 : "=q" (rc.u32),
4640 "=m" (*(volatile long RT_FAR *)pvBitmap)
4641 : "Ir" (iBit),
4642 "m" (*(volatile long RT_FAR *)pvBitmap)
4643 : "memory");
4644# else
4645 __asm
4646 {
4647 mov edx, [iBit]
4648# ifdef RT_ARCH_AMD64
4649 mov rax, [pvBitmap]
4650 lock btr [rax], edx
4651# else
4652 mov eax, [pvBitmap]
4653 lock btr [eax], edx
4654# endif
4655 setc al
4656 and eax, 1
4657 mov [rc.u32], eax
4658 }
4659# endif
4660 return rc.f;
4661}
4662#endif
4663
4664
4665/**
4666 * Tests and toggles a bit in a bitmap.
4667 *
4668 * @returns true if the bit was set.
4669 * @returns false if the bit was clear.
4670 *
4671 * @param pvBitmap Pointer to the bitmap.
4672 * @param iBit The bit to test and toggle.
4673 *
4674 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4675 * However, doing so will yield better performance as well as avoiding
4676 * traps accessing the last bits in the bitmap.
4677 */
4678#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4679RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4680#else
4681DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4682{
4683 union { bool f; uint32_t u32; uint8_t u8; } rc;
4684# if RT_INLINE_ASM_USES_INTRIN
4685 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4686
4687# elif RT_INLINE_ASM_GNU_STYLE
4688 __asm__ __volatile__("btcl %2, %1\n\t"
4689 "setc %b0\n\t"
4690 "andl $1, %0\n\t"
4691 : "=q" (rc.u32),
4692 "=m" (*(volatile long RT_FAR *)pvBitmap)
4693 : "Ir" (iBit),
4694 "m" (*(volatile long RT_FAR *)pvBitmap)
4695 : "memory");
4696# else
4697 __asm
4698 {
4699 mov edx, [iBit]
4700# ifdef RT_ARCH_AMD64
4701 mov rax, [pvBitmap]
4702 btc [rax], edx
4703# else
4704 mov eax, [pvBitmap]
4705 btc [eax], edx
4706# endif
4707 setc al
4708 and eax, 1
4709 mov [rc.u32], eax
4710 }
4711# endif
4712 return rc.f;
4713}
4714#endif
4715
4716
4717/**
4718 * Atomically tests and toggles a bit in a bitmap, ordered.
4719 *
4720 * @returns true if the bit was set.
4721 * @returns false if the bit was clear.
4722 *
4723 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4724 * the memory access isn't atomic!
4725 * @param iBit The bit to test and toggle.
4726 *
4727 * @remarks x86: Requires a 386 or later.
4728 */
4729#if RT_INLINE_ASM_EXTERNAL
4730RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4731#else
4732DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4733{
4734 union { bool f; uint32_t u32; uint8_t u8; } rc;
4735 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4736# if RT_INLINE_ASM_GNU_STYLE
4737 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4738 "setc %b0\n\t"
4739 "andl $1, %0\n\t"
4740 : "=q" (rc.u32),
4741 "=m" (*(volatile long RT_FAR *)pvBitmap)
4742 : "Ir" (iBit),
4743 "m" (*(volatile long RT_FAR *)pvBitmap)
4744 : "memory");
4745# else
4746 __asm
4747 {
4748 mov edx, [iBit]
4749# ifdef RT_ARCH_AMD64
4750 mov rax, [pvBitmap]
4751 lock btc [rax], edx
4752# else
4753 mov eax, [pvBitmap]
4754 lock btc [eax], edx
4755# endif
4756 setc al
4757 and eax, 1
4758 mov [rc.u32], eax
4759 }
4760# endif
4761 return rc.f;
4762}
4763#endif
4764
4765
4766/**
4767 * Tests if a bit in a bitmap is set.
4768 *
4769 * @returns true if the bit is set.
4770 * @returns false if the bit is clear.
4771 *
4772 * @param pvBitmap Pointer to the bitmap.
4773 * @param iBit The bit to test.
4774 *
4775 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4776 * However, doing so will yield better performance as well as avoiding
4777 * traps accessing the last bits in the bitmap.
4778 */
4779#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4780RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit);
4781#else
4782DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit)
4783{
4784 union { bool f; uint32_t u32; uint8_t u8; } rc;
4785# if RT_INLINE_ASM_USES_INTRIN
4786 rc.u32 = _bittest((long *)pvBitmap, iBit);
4787# elif RT_INLINE_ASM_GNU_STYLE
4788
4789 __asm__ __volatile__("btl %2, %1\n\t"
4790 "setc %b0\n\t"
4791 "andl $1, %0\n\t"
4792 : "=q" (rc.u32)
4793 : "m" (*(const volatile long RT_FAR *)pvBitmap),
4794 "Ir" (iBit)
4795 : "memory");
4796# else
4797 __asm
4798 {
4799 mov edx, [iBit]
4800# ifdef RT_ARCH_AMD64
4801 mov rax, [pvBitmap]
4802 bt [rax], edx
4803# else
4804 mov eax, [pvBitmap]
4805 bt [eax], edx
4806# endif
4807 setc al
4808 and eax, 1
4809 mov [rc.u32], eax
4810 }
4811# endif
4812 return rc.f;
4813}
4814#endif
4815
4816
4817/**
4818 * Clears a bit range within a bitmap.
4819 *
4820 * @param pvBitmap Pointer to the bitmap.
4821 * @param iBitStart The First bit to clear.
4822 * @param iBitEnd The first bit not to clear.
4823 */
4824DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4825{
4826 if (iBitStart < iBitEnd)
4827 {
4828 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4829 int32_t iStart = iBitStart & ~31;
4830 int32_t iEnd = iBitEnd & ~31;
4831 if (iStart == iEnd)
4832 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4833 else
4834 {
4835 /* bits in first dword. */
4836 if (iBitStart & 31)
4837 {
4838 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4839 pu32++;
4840 iBitStart = iStart + 32;
4841 }
4842
4843 /* whole dword. */
4844 if (iBitStart != iEnd)
4845 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4846
4847 /* bits in last dword. */
4848 if (iBitEnd & 31)
4849 {
4850 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4851 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4852 }
4853 }
4854 }
4855}
4856
4857
4858/**
4859 * Sets a bit range within a bitmap.
4860 *
4861 * @param pvBitmap Pointer to the bitmap.
4862 * @param iBitStart The First bit to set.
4863 * @param iBitEnd The first bit not to set.
4864 */
4865DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4866{
4867 if (iBitStart < iBitEnd)
4868 {
4869 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4870 int32_t iStart = iBitStart & ~31;
4871 int32_t iEnd = iBitEnd & ~31;
4872 if (iStart == iEnd)
4873 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4874 else
4875 {
4876 /* bits in first dword. */
4877 if (iBitStart & 31)
4878 {
4879 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4880 pu32++;
4881 iBitStart = iStart + 32;
4882 }
4883
4884 /* whole dword. */
4885 if (iBitStart != iEnd)
4886 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4887
4888 /* bits in last dword. */
4889 if (iBitEnd & 31)
4890 {
4891 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
4892 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4893 }
4894 }
4895 }
4896}
4897
4898
4899/**
4900 * Finds the first clear bit in a bitmap.
4901 *
4902 * @returns Index of the first zero bit.
4903 * @returns -1 if no clear bit was found.
4904 * @param pvBitmap Pointer to the bitmap.
4905 * @param cBits The number of bits in the bitmap. Multiple of 32.
4906 */
4907#if RT_INLINE_ASM_EXTERNAL
4908DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
4909#else
4910DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
4911{
4912 if (cBits)
4913 {
4914 int32_t iBit;
4915# if RT_INLINE_ASM_GNU_STYLE
4916 RTCCUINTREG uEAX, uECX, uEDI;
4917 cBits = RT_ALIGN_32(cBits, 32);
4918 __asm__ __volatile__("repe; scasl\n\t"
4919 "je 1f\n\t"
4920# ifdef RT_ARCH_AMD64
4921 "lea -4(%%rdi), %%rdi\n\t"
4922 "xorl (%%rdi), %%eax\n\t"
4923 "subq %5, %%rdi\n\t"
4924# else
4925 "lea -4(%%edi), %%edi\n\t"
4926 "xorl (%%edi), %%eax\n\t"
4927 "subl %5, %%edi\n\t"
4928# endif
4929 "shll $3, %%edi\n\t"
4930 "bsfl %%eax, %%edx\n\t"
4931 "addl %%edi, %%edx\n\t"
4932 "1:\t\n"
4933 : "=d" (iBit),
4934 "=&c" (uECX),
4935 "=&D" (uEDI),
4936 "=&a" (uEAX)
4937 : "0" (0xffffffff),
4938 "mr" (pvBitmap),
4939 "1" (cBits >> 5),
4940 "2" (pvBitmap),
4941 "3" (0xffffffff));
4942# else
4943 cBits = RT_ALIGN_32(cBits, 32);
4944 __asm
4945 {
4946# ifdef RT_ARCH_AMD64
4947 mov rdi, [pvBitmap]
4948 mov rbx, rdi
4949# else
4950 mov edi, [pvBitmap]
4951 mov ebx, edi
4952# endif
4953 mov edx, 0ffffffffh
4954 mov eax, edx
4955 mov ecx, [cBits]
4956 shr ecx, 5
4957 repe scasd
4958 je done
4959
4960# ifdef RT_ARCH_AMD64
4961 lea rdi, [rdi - 4]
4962 xor eax, [rdi]
4963 sub rdi, rbx
4964# else
4965 lea edi, [edi - 4]
4966 xor eax, [edi]
4967 sub edi, ebx
4968# endif
4969 shl edi, 3
4970 bsf edx, eax
4971 add edx, edi
4972 done:
4973 mov [iBit], edx
4974 }
4975# endif
4976 return iBit;
4977 }
4978 return -1;
4979}
4980#endif
4981
4982
4983/**
4984 * Finds the next clear bit in a bitmap.
4985 *
4986 * @returns Index of the first zero bit.
4987 * @returns -1 if no clear bit was found.
4988 * @param pvBitmap Pointer to the bitmap.
4989 * @param cBits The number of bits in the bitmap. Multiple of 32.
4990 * @param iBitPrev The bit returned from the last search.
4991 * The search will start at iBitPrev + 1.
4992 */
4993#if RT_INLINE_ASM_EXTERNAL
4994DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4995#else
4996DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4997{
4998 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
4999 int iBit = ++iBitPrev & 31;
5000 if (iBit)
5001 {
5002 /*
5003 * Inspect the 32-bit word containing the unaligned bit.
5004 */
5005 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
5006
5007# if RT_INLINE_ASM_USES_INTRIN
5008 unsigned long ulBit = 0;
5009 if (_BitScanForward(&ulBit, u32))
5010 return ulBit + iBitPrev;
5011# else
5012# if RT_INLINE_ASM_GNU_STYLE
5013 __asm__ __volatile__("bsf %1, %0\n\t"
5014 "jnz 1f\n\t"
5015 "movl $-1, %0\n\t"
5016 "1:\n\t"
5017 : "=r" (iBit)
5018 : "r" (u32));
5019# else
5020 __asm
5021 {
5022 mov edx, [u32]
5023 bsf eax, edx
5024 jnz done
5025 mov eax, 0ffffffffh
5026 done:
5027 mov [iBit], eax
5028 }
5029# endif
5030 if (iBit >= 0)
5031 return iBit + iBitPrev;
5032# endif
5033
5034 /*
5035 * Skip ahead and see if there is anything left to search.
5036 */
5037 iBitPrev |= 31;
5038 iBitPrev++;
5039 if (cBits <= (uint32_t)iBitPrev)
5040 return -1;
5041 }
5042
5043 /*
5044 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5045 */
5046 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5047 if (iBit >= 0)
5048 iBit += iBitPrev;
5049 return iBit;
5050}
5051#endif
5052
5053
5054/**
5055 * Finds the first set bit in a bitmap.
5056 *
5057 * @returns Index of the first set bit.
5058 * @returns -1 if no clear bit was found.
5059 * @param pvBitmap Pointer to the bitmap.
5060 * @param cBits The number of bits in the bitmap. Multiple of 32.
5061 */
5062#if RT_INLINE_ASM_EXTERNAL
5063DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
5064#else
5065DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
5066{
5067 if (cBits)
5068 {
5069 int32_t iBit;
5070# if RT_INLINE_ASM_GNU_STYLE
5071 RTCCUINTREG uEAX, uECX, uEDI;
5072 cBits = RT_ALIGN_32(cBits, 32);
5073 __asm__ __volatile__("repe; scasl\n\t"
5074 "je 1f\n\t"
5075# ifdef RT_ARCH_AMD64
5076 "lea -4(%%rdi), %%rdi\n\t"
5077 "movl (%%rdi), %%eax\n\t"
5078 "subq %5, %%rdi\n\t"
5079# else
5080 "lea -4(%%edi), %%edi\n\t"
5081 "movl (%%edi), %%eax\n\t"
5082 "subl %5, %%edi\n\t"
5083# endif
5084 "shll $3, %%edi\n\t"
5085 "bsfl %%eax, %%edx\n\t"
5086 "addl %%edi, %%edx\n\t"
5087 "1:\t\n"
5088 : "=d" (iBit),
5089 "=&c" (uECX),
5090 "=&D" (uEDI),
5091 "=&a" (uEAX)
5092 : "0" (0xffffffff),
5093 "mr" (pvBitmap),
5094 "1" (cBits >> 5),
5095 "2" (pvBitmap),
5096 "3" (0));
5097# else
5098 cBits = RT_ALIGN_32(cBits, 32);
5099 __asm
5100 {
5101# ifdef RT_ARCH_AMD64
5102 mov rdi, [pvBitmap]
5103 mov rbx, rdi
5104# else
5105 mov edi, [pvBitmap]
5106 mov ebx, edi
5107# endif
5108 mov edx, 0ffffffffh
5109 xor eax, eax
5110 mov ecx, [cBits]
5111 shr ecx, 5
5112 repe scasd
5113 je done
5114# ifdef RT_ARCH_AMD64
5115 lea rdi, [rdi - 4]
5116 mov eax, [rdi]
5117 sub rdi, rbx
5118# else
5119 lea edi, [edi - 4]
5120 mov eax, [edi]
5121 sub edi, ebx
5122# endif
5123 shl edi, 3
5124 bsf edx, eax
5125 add edx, edi
5126 done:
5127 mov [iBit], edx
5128 }
5129# endif
5130 return iBit;
5131 }
5132 return -1;
5133}
5134#endif
5135
5136
5137/**
5138 * Finds the next set bit in a bitmap.
5139 *
5140 * @returns Index of the next set bit.
5141 * @returns -1 if no set bit was found.
5142 * @param pvBitmap Pointer to the bitmap.
5143 * @param cBits The number of bits in the bitmap. Multiple of 32.
5144 * @param iBitPrev The bit returned from the last search.
5145 * The search will start at iBitPrev + 1.
5146 */
5147#if RT_INLINE_ASM_EXTERNAL
5148DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5149#else
5150DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5151{
5152 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5153 int iBit = ++iBitPrev & 31;
5154 if (iBit)
5155 {
5156 /*
5157 * Inspect the 32-bit word containing the unaligned bit.
5158 */
5159 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5160
5161# if RT_INLINE_ASM_USES_INTRIN
5162 unsigned long ulBit = 0;
5163 if (_BitScanForward(&ulBit, u32))
5164 return ulBit + iBitPrev;
5165# else
5166# if RT_INLINE_ASM_GNU_STYLE
5167 __asm__ __volatile__("bsf %1, %0\n\t"
5168 "jnz 1f\n\t"
5169 "movl $-1, %0\n\t"
5170 "1:\n\t"
5171 : "=r" (iBit)
5172 : "r" (u32));
5173# else
5174 __asm
5175 {
5176 mov edx, [u32]
5177 bsf eax, edx
5178 jnz done
5179 mov eax, 0ffffffffh
5180 done:
5181 mov [iBit], eax
5182 }
5183# endif
5184 if (iBit >= 0)
5185 return iBit + iBitPrev;
5186# endif
5187
5188 /*
5189 * Skip ahead and see if there is anything left to search.
5190 */
5191 iBitPrev |= 31;
5192 iBitPrev++;
5193 if (cBits <= (uint32_t)iBitPrev)
5194 return -1;
5195 }
5196
5197 /*
5198 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5199 */
5200 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5201 if (iBit >= 0)
5202 iBit += iBitPrev;
5203 return iBit;
5204}
5205#endif
5206
5207
5208/**
5209 * Finds the first bit which is set in the given 32-bit integer.
5210 * Bits are numbered from 1 (least significant) to 32.
5211 *
5212 * @returns index [1..32] of the first set bit.
5213 * @returns 0 if all bits are cleared.
5214 * @param u32 Integer to search for set bits.
5215 * @remarks Similar to ffs() in BSD.
5216 */
5217#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5218RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32);
5219#else
5220DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5221{
5222# if RT_INLINE_ASM_USES_INTRIN
5223 unsigned long iBit;
5224 if (_BitScanForward(&iBit, u32))
5225 iBit++;
5226 else
5227 iBit = 0;
5228# elif RT_INLINE_ASM_GNU_STYLE
5229 uint32_t iBit;
5230 __asm__ __volatile__("bsf %1, %0\n\t"
5231 "jnz 1f\n\t"
5232 "xorl %0, %0\n\t"
5233 "jmp 2f\n"
5234 "1:\n\t"
5235 "incl %0\n"
5236 "2:\n\t"
5237 : "=r" (iBit)
5238 : "rm" (u32));
5239# else
5240 uint32_t iBit;
5241 _asm
5242 {
5243 bsf eax, [u32]
5244 jnz found
5245 xor eax, eax
5246 jmp done
5247 found:
5248 inc eax
5249 done:
5250 mov [iBit], eax
5251 }
5252# endif
5253 return iBit;
5254}
5255#endif
5256
5257
5258/**
5259 * Finds the first bit which is set in the given 32-bit integer.
5260 * Bits are numbered from 1 (least significant) to 32.
5261 *
5262 * @returns index [1..32] of the first set bit.
5263 * @returns 0 if all bits are cleared.
5264 * @param i32 Integer to search for set bits.
5265 * @remark Similar to ffs() in BSD.
5266 */
5267DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5268{
5269 return ASMBitFirstSetU32((uint32_t)i32);
5270}
5271
5272
5273/**
5274 * Finds the first bit which is set in the given 64-bit integer.
5275 *
5276 * Bits are numbered from 1 (least significant) to 64.
5277 *
5278 * @returns index [1..64] of the first set bit.
5279 * @returns 0 if all bits are cleared.
5280 * @param u64 Integer to search for set bits.
5281 * @remarks Similar to ffs() in BSD.
5282 */
5283#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5284RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64);
5285#else
5286DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5287{
5288# if RT_INLINE_ASM_USES_INTRIN
5289 unsigned long iBit;
5290# if ARCH_BITS == 64
5291 if (_BitScanForward64(&iBit, u64))
5292 iBit++;
5293 else
5294 iBit = 0;
5295# else
5296 if (_BitScanForward(&iBit, (uint32_t)u64))
5297 iBit++;
5298 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5299 iBit += 33;
5300 else
5301 iBit = 0;
5302# endif
5303# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5304 uint64_t iBit;
5305 __asm__ __volatile__("bsfq %1, %0\n\t"
5306 "jnz 1f\n\t"
5307 "xorl %k0, %k0\n\t"
5308 "jmp 2f\n"
5309 "1:\n\t"
5310 "incl %k0\n"
5311 "2:\n\t"
5312 : "=r" (iBit)
5313 : "rm" (u64));
5314# else
5315 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5316 if (!iBit)
5317 {
5318 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5319 if (iBit)
5320 iBit += 32;
5321 }
5322# endif
5323 return (unsigned)iBit;
5324}
5325#endif
5326
5327
5328/**
5329 * Finds the first bit which is set in the given 16-bit integer.
5330 *
5331 * Bits are numbered from 1 (least significant) to 16.
5332 *
5333 * @returns index [1..16] of the first set bit.
5334 * @returns 0 if all bits are cleared.
5335 * @param u16 Integer to search for set bits.
5336 * @remarks For 16-bit bs3kit code.
5337 */
5338#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5339RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16);
5340#else
5341DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5342{
5343 return ASMBitFirstSetU32((uint32_t)u16);
5344}
5345#endif
5346
5347
5348/**
5349 * Finds the last bit which is set in the given 32-bit integer.
5350 * Bits are numbered from 1 (least significant) to 32.
5351 *
5352 * @returns index [1..32] of the last set bit.
5353 * @returns 0 if all bits are cleared.
5354 * @param u32 Integer to search for set bits.
5355 * @remark Similar to fls() in BSD.
5356 */
5357#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5358RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32);
5359#else
5360DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5361{
5362# if RT_INLINE_ASM_USES_INTRIN
5363 unsigned long iBit;
5364 if (_BitScanReverse(&iBit, u32))
5365 iBit++;
5366 else
5367 iBit = 0;
5368# elif RT_INLINE_ASM_GNU_STYLE
5369 uint32_t iBit;
5370 __asm__ __volatile__("bsrl %1, %0\n\t"
5371 "jnz 1f\n\t"
5372 "xorl %0, %0\n\t"
5373 "jmp 2f\n"
5374 "1:\n\t"
5375 "incl %0\n"
5376 "2:\n\t"
5377 : "=r" (iBit)
5378 : "rm" (u32));
5379# else
5380 uint32_t iBit;
5381 _asm
5382 {
5383 bsr eax, [u32]
5384 jnz found
5385 xor eax, eax
5386 jmp done
5387 found:
5388 inc eax
5389 done:
5390 mov [iBit], eax
5391 }
5392# endif
5393 return iBit;
5394}
5395#endif
5396
5397
5398/**
5399 * Finds the last bit which is set in the given 32-bit integer.
5400 * Bits are numbered from 1 (least significant) to 32.
5401 *
5402 * @returns index [1..32] of the last set bit.
5403 * @returns 0 if all bits are cleared.
5404 * @param i32 Integer to search for set bits.
5405 * @remark Similar to fls() in BSD.
5406 */
5407DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5408{
5409 return ASMBitLastSetU32((uint32_t)i32);
5410}
5411
5412
5413/**
5414 * Finds the last bit which is set in the given 64-bit integer.
5415 *
5416 * Bits are numbered from 1 (least significant) to 64.
5417 *
5418 * @returns index [1..64] of the last set bit.
5419 * @returns 0 if all bits are cleared.
5420 * @param u64 Integer to search for set bits.
5421 * @remark Similar to fls() in BSD.
5422 */
5423#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5424RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64);
5425#else
5426DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5427{
5428# if RT_INLINE_ASM_USES_INTRIN
5429 unsigned long iBit;
5430# if ARCH_BITS == 64
5431 if (_BitScanReverse64(&iBit, u64))
5432 iBit++;
5433 else
5434 iBit = 0;
5435# else
5436 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5437 iBit += 33;
5438 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5439 iBit++;
5440 else
5441 iBit = 0;
5442# endif
5443# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5444 uint64_t iBit;
5445 __asm__ __volatile__("bsrq %1, %0\n\t"
5446 "jnz 1f\n\t"
5447 "xorl %k0, %k0\n\t"
5448 "jmp 2f\n"
5449 "1:\n\t"
5450 "incl %k0\n"
5451 "2:\n\t"
5452 : "=r" (iBit)
5453 : "rm" (u64));
5454# else
5455 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5456 if (iBit)
5457 iBit += 32;
5458 else
5459 iBit = ASMBitLastSetU32((uint32_t)u64);
5460#endif
5461 return (unsigned)iBit;
5462}
5463#endif
5464
5465
5466/**
5467 * Finds the last bit which is set in the given 16-bit integer.
5468 *
5469 * Bits are numbered from 1 (least significant) to 16.
5470 *
5471 * @returns index [1..16] of the last set bit.
5472 * @returns 0 if all bits are cleared.
5473 * @param u16 Integer to search for set bits.
5474 * @remarks For 16-bit bs3kit code.
5475 */
5476#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5477RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16);
5478#else
5479DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5480{
5481 return ASMBitLastSetU32((uint32_t)u16);
5482}
5483#endif
5484
5485
5486/**
5487 * Reverse the byte order of the given 16-bit integer.
5488 *
5489 * @returns Revert
5490 * @param u16 16-bit integer value.
5491 */
5492#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5493RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16);
5494#else
5495DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5496{
5497# if RT_INLINE_ASM_USES_INTRIN
5498 u16 = _byteswap_ushort(u16);
5499# elif RT_INLINE_ASM_GNU_STYLE
5500 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5501# else
5502 _asm
5503 {
5504 mov ax, [u16]
5505 ror ax, 8
5506 mov [u16], ax
5507 }
5508# endif
5509 return u16;
5510}
5511#endif
5512
5513
5514/**
5515 * Reverse the byte order of the given 32-bit integer.
5516 *
5517 * @returns Revert
5518 * @param u32 32-bit integer value.
5519 */
5520#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5521RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32);
5522#else
5523DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5524{
5525# if RT_INLINE_ASM_USES_INTRIN
5526 u32 = _byteswap_ulong(u32);
5527# elif RT_INLINE_ASM_GNU_STYLE
5528 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5529# else
5530 _asm
5531 {
5532 mov eax, [u32]
5533 bswap eax
5534 mov [u32], eax
5535 }
5536# endif
5537 return u32;
5538}
5539#endif
5540
5541
5542/**
5543 * Reverse the byte order of the given 64-bit integer.
5544 *
5545 * @returns Revert
5546 * @param u64 64-bit integer value.
5547 */
5548DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5549{
5550#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5551 u64 = _byteswap_uint64(u64);
5552#else
5553 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5554 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5555#endif
5556 return u64;
5557}
5558
5559
5560/**
5561 * Rotate 32-bit unsigned value to the left by @a cShift.
5562 *
5563 * @returns Rotated value.
5564 * @param u32 The value to rotate.
5565 * @param cShift How many bits to rotate by.
5566 */
5567#ifdef __WATCOMC__
5568RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5569#else
5570DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5571{
5572# if RT_INLINE_ASM_USES_INTRIN
5573 return _rotl(u32, cShift);
5574# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5575 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5576 return u32;
5577# else
5578 cShift &= 31;
5579 return (u32 << cShift) | (u32 >> (32 - cShift));
5580# endif
5581}
5582#endif
5583
5584
5585/**
5586 * Rotate 32-bit unsigned value to the right by @a cShift.
5587 *
5588 * @returns Rotated value.
5589 * @param u32 The value to rotate.
5590 * @param cShift How many bits to rotate by.
5591 */
5592#ifdef __WATCOMC__
5593RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5594#else
5595DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5596{
5597# if RT_INLINE_ASM_USES_INTRIN
5598 return _rotr(u32, cShift);
5599# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5600 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5601 return u32;
5602# else
5603 cShift &= 31;
5604 return (u32 >> cShift) | (u32 << (32 - cShift));
5605# endif
5606}
5607#endif
5608
5609
5610/**
5611 * Rotate 64-bit unsigned value to the left by @a cShift.
5612 *
5613 * @returns Rotated value.
5614 * @param u64 The value to rotate.
5615 * @param cShift How many bits to rotate by.
5616 */
5617DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5618{
5619#if RT_INLINE_ASM_USES_INTRIN
5620 return _rotl64(u64, cShift);
5621#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5622 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5623 return u64;
5624#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5625 uint32_t uSpill;
5626 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5627 "jz 1f\n\t"
5628 "xchgl %%eax, %%edx\n\t"
5629 "1:\n\t"
5630 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5631 "jz 2f\n\t"
5632 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5633 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5634 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5635 "2:\n\t" /* } */
5636 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5637 : "0" (u64),
5638 "1" (cShift));
5639 return u64;
5640#else
5641 cShift &= 63;
5642 return (u64 << cShift) | (u64 >> (64 - cShift));
5643#endif
5644}
5645
5646
5647/**
5648 * Rotate 64-bit unsigned value to the right by @a cShift.
5649 *
5650 * @returns Rotated value.
5651 * @param u64 The value to rotate.
5652 * @param cShift How many bits to rotate by.
5653 */
5654DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5655{
5656#if RT_INLINE_ASM_USES_INTRIN
5657 return _rotr64(u64, cShift);
5658#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5659 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5660 return u64;
5661#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5662 uint32_t uSpill;
5663 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5664 "jz 1f\n\t"
5665 "xchgl %%eax, %%edx\n\t"
5666 "1:\n\t"
5667 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5668 "jz 2f\n\t"
5669 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5670 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5671 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5672 "2:\n\t" /* } */
5673 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5674 : "0" (u64),
5675 "1" (cShift));
5676 return u64;
5677#else
5678 cShift &= 63;
5679 return (u64 >> cShift) | (u64 << (64 - cShift));
5680#endif
5681}
5682
5683/** @} */
5684
5685
5686/** @} */
5687
5688/*
5689 * Include #pragma aux definitions for Watcom C/C++.
5690 */
5691#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
5692# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
5693# undef IPRT_INCLUDED_asm_watcom_x86_16_h
5694# include "asm-watcom-x86-16.h"
5695#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
5696# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
5697# undef IPRT_INCLUDED_asm_watcom_x86_32_h
5698# include "asm-watcom-x86-32.h"
5699#endif
5700
5701#endif /* !IPRT_INCLUDED_asm_h */
5702
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette