VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 75131

Last change on this file since 75131 was 75131, checked in by vboxsync, 6 years ago

iprt/asm*.h,VMMDev.h: Watcom adjustments.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 165.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2017 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# pragma warning(push)
44# pragma warning(disable:4668) /* Several incorrect __cplusplus uses. */
45# pragma warning(disable:4255) /* Incorrect __slwpcb prototype. */
46# include <intrin.h>
47# pragma warning(pop)
48 /* Emit the intrinsics at all optimization levels. */
49# pragma intrinsic(_ReadWriteBarrier)
50# pragma intrinsic(__cpuid)
51# pragma intrinsic(__stosd)
52# pragma intrinsic(__stosw)
53# pragma intrinsic(__stosb)
54# pragma intrinsic(_BitScanForward)
55# pragma intrinsic(_BitScanReverse)
56# pragma intrinsic(_bittest)
57# pragma intrinsic(_bittestandset)
58# pragma intrinsic(_bittestandreset)
59# pragma intrinsic(_bittestandcomplement)
60# pragma intrinsic(_byteswap_ushort)
61# pragma intrinsic(_byteswap_ulong)
62# pragma intrinsic(_interlockedbittestandset)
63# pragma intrinsic(_interlockedbittestandreset)
64# pragma intrinsic(_InterlockedAnd)
65# pragma intrinsic(_InterlockedOr)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# pragma intrinsic(_InterlockedExchangeAdd64)
81# pragma intrinsic(_InterlockedAnd64)
82# pragma intrinsic(_InterlockedOr64)
83# pragma intrinsic(_InterlockedIncrement64)
84# pragma intrinsic(_InterlockedDecrement64)
85# endif
86#endif
87
88/*
89 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
90 */
91#undef RT_ASM_DECL_PRAGMA_WATCOM
92#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
93# include "asm-watcom-x86-16.h"
94# define RT_ASM_DECL_PRAGMA_WATCOM(type) type
95#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
96# include "asm-watcom-x86-32.h"
97# define RT_ASM_DECL_PRAGMA_WATCOM(type) type
98#else
99# define RT_ASM_DECL_PRAGMA_WATCOM(type) DECLASM(type)
100#endif
101
102
103
104/** @defgroup grp_rt_asm ASM - Assembly Routines
105 * @ingroup grp_rt
106 *
107 * @remarks The difference between ordered and unordered atomic operations are that
108 * the former will complete outstanding reads and writes before continuing
109 * while the latter doesn't make any promises about the order. Ordered
110 * operations doesn't, it seems, make any 100% promise wrt to whether
111 * the operation will complete before any subsequent memory access.
112 * (please, correct if wrong.)
113 *
114 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
115 * are unordered (note the Uo).
116 *
117 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
118 * or even optimize assembler instructions away. For instance, in the following code
119 * the second rdmsr instruction is optimized away because gcc treats that instruction
120 * as deterministic:
121 *
122 * @code
123 * static inline uint64_t rdmsr_low(int idx)
124 * {
125 * uint32_t low;
126 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
127 * }
128 * ...
129 * uint32_t msr1 = rdmsr_low(1);
130 * foo(msr1);
131 * msr1 = rdmsr_low(1);
132 * bar(msr1);
133 * @endcode
134 *
135 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
136 * use the result of the first call as input parameter for bar() as well. For rdmsr this
137 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143
144/** @def RT_INLINE_ASM_GCC_4_3_X_X86
145 * Used to work around some 4.3.x register allocation issues in this version of
146 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
147 * definitely not for 5.x */
148#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
149# define RT_INLINE_ASM_GCC_4_3_X_X86 1
150#else
151# define RT_INLINE_ASM_GCC_4_3_X_X86 0
152#endif
153
154/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
155 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
156 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
157 * mode, x86.
158 *
159 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
160 * when in PIC mode on x86.
161 */
162#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
163# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
164# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
165# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
167# elif ( (defined(PIC) || defined(__PIC__)) \
168 && defined(RT_ARCH_X86) \
169 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
170 || defined(RT_OS_DARWIN)) )
171# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
172# else
173# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
174# endif
175#endif
176
177
178/** @def ASMReturnAddress
179 * Gets the return address of the current (or calling if you like) function or method.
180 */
181#ifdef _MSC_VER
182# ifdef __cplusplus
183extern "C"
184# endif
185void * _ReturnAddress(void);
186# pragma intrinsic(_ReturnAddress)
187# define ASMReturnAddress() _ReturnAddress()
188#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
189# define ASMReturnAddress() __builtin_return_address(0)
190#elif defined(__WATCOMC__)
191# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
192#else
193# error "Unsupported compiler."
194#endif
195
196
197/**
198 * Compiler memory barrier.
199 *
200 * Ensure that the compiler does not use any cached (register/tmp stack) memory
201 * values or any outstanding writes when returning from this function.
202 *
203 * This function must be used if non-volatile data is modified by a
204 * device or the VMM. Typical cases are port access, MMIO access,
205 * trapping instruction, etc.
206 */
207#if RT_INLINE_ASM_GNU_STYLE
208# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
209#elif RT_INLINE_ASM_USES_INTRIN
210# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
211#elif defined(__WATCOMC__)
212void ASMCompilerBarrier(void);
213#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
214DECLINLINE(void) ASMCompilerBarrier(void)
215{
216 __asm
217 {
218 }
219}
220#endif
221
222
223/** @def ASMBreakpoint
224 * Debugger Breakpoint.
225 * @deprecated Use RT_BREAKPOINT instead.
226 * @internal
227 */
228#define ASMBreakpoint() RT_BREAKPOINT()
229
230
231/**
232 * Spinloop hint for platforms that have these, empty function on the other
233 * platforms.
234 *
235 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
236 * spin locks.
237 */
238#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
239RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void);
240#else
241DECLINLINE(void) ASMNopPause(void)
242{
243# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
244# if RT_INLINE_ASM_GNU_STYLE
245 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
246# else
247 __asm {
248 _emit 0f3h
249 _emit 090h
250 }
251# endif
252# else
253 /* dummy */
254# endif
255}
256#endif
257
258
259/**
260 * Atomically Exchange an unsigned 8-bit value, ordered.
261 *
262 * @returns Current *pu8 value
263 * @param pu8 Pointer to the 8-bit variable to update.
264 * @param u8 The 8-bit value to assign to *pu8.
265 */
266#if RT_INLINE_ASM_EXTERNAL
267RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8);
268#else
269DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
270{
271# if RT_INLINE_ASM_GNU_STYLE
272 __asm__ __volatile__("xchgb %0, %1\n\t"
273 : "=m" (*pu8),
274 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
275 : "1" (u8),
276 "m" (*pu8));
277# else
278 __asm
279 {
280# ifdef RT_ARCH_AMD64
281 mov rdx, [pu8]
282 mov al, [u8]
283 xchg [rdx], al
284 mov [u8], al
285# else
286 mov edx, [pu8]
287 mov al, [u8]
288 xchg [edx], al
289 mov [u8], al
290# endif
291 }
292# endif
293 return u8;
294}
295#endif
296
297
298/**
299 * Atomically Exchange a signed 8-bit value, ordered.
300 *
301 * @returns Current *pu8 value
302 * @param pi8 Pointer to the 8-bit variable to update.
303 * @param i8 The 8-bit value to assign to *pi8.
304 */
305DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8)
306{
307 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
308}
309
310
311/**
312 * Atomically Exchange a bool value, ordered.
313 *
314 * @returns Current *pf value
315 * @param pf Pointer to the 8-bit variable to update.
316 * @param f The 8-bit value to assign to *pi8.
317 */
318DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f)
319{
320#ifdef _MSC_VER
321 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
322#else
323 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
324#endif
325}
326
327
328/**
329 * Atomically Exchange an unsigned 16-bit value, ordered.
330 *
331 * @returns Current *pu16 value
332 * @param pu16 Pointer to the 16-bit variable to update.
333 * @param u16 The 16-bit value to assign to *pu16.
334 */
335#if RT_INLINE_ASM_EXTERNAL
336RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16);
337#else
338DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
339{
340# if RT_INLINE_ASM_GNU_STYLE
341 __asm__ __volatile__("xchgw %0, %1\n\t"
342 : "=m" (*pu16),
343 "=r" (u16)
344 : "1" (u16),
345 "m" (*pu16));
346# else
347 __asm
348 {
349# ifdef RT_ARCH_AMD64
350 mov rdx, [pu16]
351 mov ax, [u16]
352 xchg [rdx], ax
353 mov [u16], ax
354# else
355 mov edx, [pu16]
356 mov ax, [u16]
357 xchg [edx], ax
358 mov [u16], ax
359# endif
360 }
361# endif
362 return u16;
363}
364#endif
365
366
367/**
368 * Atomically Exchange a signed 16-bit value, ordered.
369 *
370 * @returns Current *pu16 value
371 * @param pi16 Pointer to the 16-bit variable to update.
372 * @param i16 The 16-bit value to assign to *pi16.
373 */
374DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16)
375{
376 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
377}
378
379
380/**
381 * Atomically Exchange an unsigned 32-bit value, ordered.
382 *
383 * @returns Current *pu32 value
384 * @param pu32 Pointer to the 32-bit variable to update.
385 * @param u32 The 32-bit value to assign to *pu32.
386 *
387 * @remarks Does not work on 286 and earlier.
388 */
389#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
390RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32);
391#else
392DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
393{
394# if RT_INLINE_ASM_GNU_STYLE
395 __asm__ __volatile__("xchgl %0, %1\n\t"
396 : "=m" (*pu32),
397 "=r" (u32)
398 : "1" (u32),
399 "m" (*pu32));
400
401# elif RT_INLINE_ASM_USES_INTRIN
402 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
403
404# else
405 __asm
406 {
407# ifdef RT_ARCH_AMD64
408 mov rdx, [pu32]
409 mov eax, u32
410 xchg [rdx], eax
411 mov [u32], eax
412# else
413 mov edx, [pu32]
414 mov eax, u32
415 xchg [edx], eax
416 mov [u32], eax
417# endif
418 }
419# endif
420 return u32;
421}
422#endif
423
424
425/**
426 * Atomically Exchange a signed 32-bit value, ordered.
427 *
428 * @returns Current *pu32 value
429 * @param pi32 Pointer to the 32-bit variable to update.
430 * @param i32 The 32-bit value to assign to *pi32.
431 */
432DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32)
433{
434 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
435}
436
437
438/**
439 * Atomically Exchange an unsigned 64-bit value, ordered.
440 *
441 * @returns Current *pu64 value
442 * @param pu64 Pointer to the 64-bit variable to update.
443 * @param u64 The 64-bit value to assign to *pu64.
444 *
445 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
446 */
447#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
448 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
449RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64);
450#else
451DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
452{
453# if defined(RT_ARCH_AMD64)
454# if RT_INLINE_ASM_USES_INTRIN
455 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
456
457# elif RT_INLINE_ASM_GNU_STYLE
458 __asm__ __volatile__("xchgq %0, %1\n\t"
459 : "=m" (*pu64),
460 "=r" (u64)
461 : "1" (u64),
462 "m" (*pu64));
463# else
464 __asm
465 {
466 mov rdx, [pu64]
467 mov rax, [u64]
468 xchg [rdx], rax
469 mov [u64], rax
470 }
471# endif
472# else /* !RT_ARCH_AMD64 */
473# if RT_INLINE_ASM_GNU_STYLE
474# if defined(PIC) || defined(__PIC__)
475 uint32_t u32EBX = (uint32_t)u64;
476 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
477 "xchgl %%ebx, %3\n\t"
478 "1:\n\t"
479 "lock; cmpxchg8b (%5)\n\t"
480 "jnz 1b\n\t"
481 "movl %3, %%ebx\n\t"
482 /*"xchgl %%esi, %5\n\t"*/
483 : "=A" (u64),
484 "=m" (*pu64)
485 : "0" (*pu64),
486 "m" ( u32EBX ),
487 "c" ( (uint32_t)(u64 >> 32) ),
488 "S" (pu64));
489# else /* !PIC */
490 __asm__ __volatile__("1:\n\t"
491 "lock; cmpxchg8b %1\n\t"
492 "jnz 1b\n\t"
493 : "=A" (u64),
494 "=m" (*pu64)
495 : "0" (*pu64),
496 "b" ( (uint32_t)u64 ),
497 "c" ( (uint32_t)(u64 >> 32) ));
498# endif
499# else
500 __asm
501 {
502 mov ebx, dword ptr [u64]
503 mov ecx, dword ptr [u64 + 4]
504 mov edi, pu64
505 mov eax, dword ptr [edi]
506 mov edx, dword ptr [edi + 4]
507 retry:
508 lock cmpxchg8b [edi]
509 jnz retry
510 mov dword ptr [u64], eax
511 mov dword ptr [u64 + 4], edx
512 }
513# endif
514# endif /* !RT_ARCH_AMD64 */
515 return u64;
516}
517#endif
518
519
520/**
521 * Atomically Exchange an signed 64-bit value, ordered.
522 *
523 * @returns Current *pi64 value
524 * @param pi64 Pointer to the 64-bit variable to update.
525 * @param i64 The 64-bit value to assign to *pi64.
526 */
527DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64)
528{
529 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
530}
531
532
533/**
534 * Atomically Exchange a size_t value, ordered.
535 *
536 * @returns Current *ppv value
537 * @param puDst Pointer to the size_t variable to update.
538 * @param uNew The new value to assign to *puDst.
539 */
540DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew)
541{
542#if ARCH_BITS == 16
543 AssertCompile(sizeof(size_t) == 2);
544 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
545#elif ARCH_BITS == 32
546 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
547#elif ARCH_BITS == 64
548 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
549#else
550# error "ARCH_BITS is bogus"
551#endif
552}
553
554
555/**
556 * Atomically Exchange a pointer value, ordered.
557 *
558 * @returns Current *ppv value
559 * @param ppv Pointer to the pointer variable to update.
560 * @param pv The pointer value to assign to *ppv.
561 */
562DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv)
563{
564#if ARCH_BITS == 32 || ARCH_BITS == 16
565 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
566#elif ARCH_BITS == 64
567 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
568#else
569# error "ARCH_BITS is bogus"
570#endif
571}
572
573
574/**
575 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
576 *
577 * @returns Current *pv value
578 * @param ppv Pointer to the pointer variable to update.
579 * @param pv The pointer value to assign to *ppv.
580 * @param Type The type of *ppv, sans volatile.
581 */
582#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
583# define ASMAtomicXchgPtrT(ppv, pv, Type) \
584 __extension__ \
585 ({\
586 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
587 Type const pvTypeChecked = (pv); \
588 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
589 pvTypeCheckedRet; \
590 })
591#else
592# define ASMAtomicXchgPtrT(ppv, pv, Type) \
593 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
594#endif
595
596
597/**
598 * Atomically Exchange a raw-mode context pointer value, ordered.
599 *
600 * @returns Current *ppv value
601 * @param ppvRC Pointer to the pointer variable to update.
602 * @param pvRC The pointer value to assign to *ppv.
603 */
604DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC)
605{
606 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
607}
608
609
610/**
611 * Atomically Exchange a ring-0 pointer value, ordered.
612 *
613 * @returns Current *ppv value
614 * @param ppvR0 Pointer to the pointer variable to update.
615 * @param pvR0 The pointer value to assign to *ppv.
616 */
617DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0)
618{
619#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
620 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
621#elif R0_ARCH_BITS == 64
622 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
623#else
624# error "R0_ARCH_BITS is bogus"
625#endif
626}
627
628
629/**
630 * Atomically Exchange a ring-3 pointer value, ordered.
631 *
632 * @returns Current *ppv value
633 * @param ppvR3 Pointer to the pointer variable to update.
634 * @param pvR3 The pointer value to assign to *ppv.
635 */
636DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3)
637{
638#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
639 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
640#elif R3_ARCH_BITS == 64
641 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
642#else
643# error "R3_ARCH_BITS is bogus"
644#endif
645}
646
647
648/** @def ASMAtomicXchgHandle
649 * Atomically Exchange a typical IPRT handle value, ordered.
650 *
651 * @param ph Pointer to the value to update.
652 * @param hNew The new value to assigned to *pu.
653 * @param phRes Where to store the current *ph value.
654 *
655 * @remarks This doesn't currently work for all handles (like RTFILE).
656 */
657#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
658# define ASMAtomicXchgHandle(ph, hNew, phRes) \
659 do { \
660 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
661 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
662 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
663 } while (0)
664#elif HC_ARCH_BITS == 64
665# define ASMAtomicXchgHandle(ph, hNew, phRes) \
666 do { \
667 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
668 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
669 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
670 } while (0)
671#else
672# error HC_ARCH_BITS
673#endif
674
675
676/**
677 * Atomically Exchange a value which size might differ
678 * between platforms or compilers, ordered.
679 *
680 * @param pu Pointer to the variable to update.
681 * @param uNew The value to assign to *pu.
682 * @todo This is busted as its missing the result argument.
683 */
684#define ASMAtomicXchgSize(pu, uNew) \
685 do { \
686 switch (sizeof(*(pu))) { \
687 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
688 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
689 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
690 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
691 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
692 } \
693 } while (0)
694
695/**
696 * Atomically Exchange a value which size might differ
697 * between platforms or compilers, ordered.
698 *
699 * @param pu Pointer to the variable to update.
700 * @param uNew The value to assign to *pu.
701 * @param puRes Where to store the current *pu value.
702 */
703#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
704 do { \
705 switch (sizeof(*(pu))) { \
706 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
707 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
708 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
709 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
710 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
711 } \
712 } while (0)
713
714
715
716/**
717 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
718 *
719 * @returns true if xchg was done.
720 * @returns false if xchg wasn't done.
721 *
722 * @param pu8 Pointer to the value to update.
723 * @param u8New The new value to assigned to *pu8.
724 * @param u8Old The old value to *pu8 compare with.
725 *
726 * @remarks x86: Requires a 486 or later.
727 */
728#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
729RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old);
730#else
731DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old)
732{
733 uint8_t u8Ret;
734 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
735 "setz %1\n\t"
736 : "=m" (*pu8),
737 "=qm" (u8Ret),
738 "=a" (u8Old)
739 : "q" (u8New),
740 "2" (u8Old),
741 "m" (*pu8));
742 return (bool)u8Ret;
743}
744#endif
745
746
747/**
748 * Atomically Compare and Exchange a signed 8-bit value, ordered.
749 *
750 * @returns true if xchg was done.
751 * @returns false if xchg wasn't done.
752 *
753 * @param pi8 Pointer to the value to update.
754 * @param i8New The new value to assigned to *pi8.
755 * @param i8Old The old value to *pi8 compare with.
756 *
757 * @remarks x86: Requires a 486 or later.
758 */
759DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old)
760{
761 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
762}
763
764
765/**
766 * Atomically Compare and Exchange a bool value, ordered.
767 *
768 * @returns true if xchg was done.
769 * @returns false if xchg wasn't done.
770 *
771 * @param pf Pointer to the value to update.
772 * @param fNew The new value to assigned to *pf.
773 * @param fOld The old value to *pf compare with.
774 *
775 * @remarks x86: Requires a 486 or later.
776 */
777DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld)
778{
779 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
780}
781
782
783/**
784 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
785 *
786 * @returns true if xchg was done.
787 * @returns false if xchg wasn't done.
788 *
789 * @param pu32 Pointer to the value to update.
790 * @param u32New The new value to assigned to *pu32.
791 * @param u32Old The old value to *pu32 compare with.
792 *
793 * @remarks x86: Requires a 486 or later.
794 */
795#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
796RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old);
797#else
798DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old)
799{
800# if RT_INLINE_ASM_GNU_STYLE
801 uint8_t u8Ret;
802 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
803 "setz %1\n\t"
804 : "=m" (*pu32),
805 "=qm" (u8Ret),
806 "=a" (u32Old)
807 : "r" (u32New),
808 "2" (u32Old),
809 "m" (*pu32));
810 return (bool)u8Ret;
811
812# elif RT_INLINE_ASM_USES_INTRIN
813 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
814
815# else
816 uint32_t u32Ret;
817 __asm
818 {
819# ifdef RT_ARCH_AMD64
820 mov rdx, [pu32]
821# else
822 mov edx, [pu32]
823# endif
824 mov eax, [u32Old]
825 mov ecx, [u32New]
826# ifdef RT_ARCH_AMD64
827 lock cmpxchg [rdx], ecx
828# else
829 lock cmpxchg [edx], ecx
830# endif
831 setz al
832 movzx eax, al
833 mov [u32Ret], eax
834 }
835 return !!u32Ret;
836# endif
837}
838#endif
839
840
841/**
842 * Atomically Compare and Exchange a signed 32-bit value, ordered.
843 *
844 * @returns true if xchg was done.
845 * @returns false if xchg wasn't done.
846 *
847 * @param pi32 Pointer to the value to update.
848 * @param i32New The new value to assigned to *pi32.
849 * @param i32Old The old value to *pi32 compare with.
850 *
851 * @remarks x86: Requires a 486 or later.
852 */
853DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old)
854{
855 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
856}
857
858
859/**
860 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
861 *
862 * @returns true if xchg was done.
863 * @returns false if xchg wasn't done.
864 *
865 * @param pu64 Pointer to the 64-bit variable to update.
866 * @param u64New The 64-bit value to assign to *pu64.
867 * @param u64Old The value to compare with.
868 *
869 * @remarks x86: Requires a Pentium or later.
870 */
871#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
872 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
873RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old);
874#else
875DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old)
876{
877# if RT_INLINE_ASM_USES_INTRIN
878 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
879
880# elif defined(RT_ARCH_AMD64)
881# if RT_INLINE_ASM_GNU_STYLE
882 uint8_t u8Ret;
883 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
884 "setz %1\n\t"
885 : "=m" (*pu64),
886 "=qm" (u8Ret),
887 "=a" (u64Old)
888 : "r" (u64New),
889 "2" (u64Old),
890 "m" (*pu64));
891 return (bool)u8Ret;
892# else
893 bool fRet;
894 __asm
895 {
896 mov rdx, [pu32]
897 mov rax, [u64Old]
898 mov rcx, [u64New]
899 lock cmpxchg [rdx], rcx
900 setz al
901 mov [fRet], al
902 }
903 return fRet;
904# endif
905# else /* !RT_ARCH_AMD64 */
906 uint32_t u32Ret;
907# if RT_INLINE_ASM_GNU_STYLE
908# if defined(PIC) || defined(__PIC__)
909 uint32_t u32EBX = (uint32_t)u64New;
910 uint32_t u32Spill;
911 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
912 "lock; cmpxchg8b (%6)\n\t"
913 "setz %%al\n\t"
914 "movl %4, %%ebx\n\t"
915 "movzbl %%al, %%eax\n\t"
916 : "=a" (u32Ret),
917 "=d" (u32Spill),
918# if RT_GNUC_PREREQ(4, 3)
919 "+m" (*pu64)
920# else
921 "=m" (*pu64)
922# endif
923 : "A" (u64Old),
924 "m" ( u32EBX ),
925 "c" ( (uint32_t)(u64New >> 32) ),
926 "S" (pu64));
927# else /* !PIC */
928 uint32_t u32Spill;
929 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
930 "setz %%al\n\t"
931 "movzbl %%al, %%eax\n\t"
932 : "=a" (u32Ret),
933 "=d" (u32Spill),
934 "+m" (*pu64)
935 : "A" (u64Old),
936 "b" ( (uint32_t)u64New ),
937 "c" ( (uint32_t)(u64New >> 32) ));
938# endif
939 return (bool)u32Ret;
940# else
941 __asm
942 {
943 mov ebx, dword ptr [u64New]
944 mov ecx, dword ptr [u64New + 4]
945 mov edi, [pu64]
946 mov eax, dword ptr [u64Old]
947 mov edx, dword ptr [u64Old + 4]
948 lock cmpxchg8b [edi]
949 setz al
950 movzx eax, al
951 mov dword ptr [u32Ret], eax
952 }
953 return !!u32Ret;
954# endif
955# endif /* !RT_ARCH_AMD64 */
956}
957#endif
958
959
960/**
961 * Atomically Compare and exchange a signed 64-bit value, ordered.
962 *
963 * @returns true if xchg was done.
964 * @returns false if xchg wasn't done.
965 *
966 * @param pi64 Pointer to the 64-bit variable to update.
967 * @param i64 The 64-bit value to assign to *pu64.
968 * @param i64Old The value to compare with.
969 *
970 * @remarks x86: Requires a Pentium or later.
971 */
972DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old)
973{
974 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
975}
976
977
978/**
979 * Atomically Compare and Exchange a pointer value, ordered.
980 *
981 * @returns true if xchg was done.
982 * @returns false if xchg wasn't done.
983 *
984 * @param ppv Pointer to the value to update.
985 * @param pvNew The new value to assigned to *ppv.
986 * @param pvOld The old value to *ppv compare with.
987 *
988 * @remarks x86: Requires a 486 or later.
989 */
990DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld)
991{
992#if ARCH_BITS == 32 || ARCH_BITS == 16
993 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
994#elif ARCH_BITS == 64
995 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
996#else
997# error "ARCH_BITS is bogus"
998#endif
999}
1000
1001
1002/**
1003 * Atomically Compare and Exchange a pointer value, ordered.
1004 *
1005 * @returns true if xchg was done.
1006 * @returns false if xchg wasn't done.
1007 *
1008 * @param ppv Pointer to the value to update.
1009 * @param pvNew The new value to assigned to *ppv.
1010 * @param pvOld The old value to *ppv compare with.
1011 *
1012 * @remarks This is relatively type safe on GCC platforms.
1013 * @remarks x86: Requires a 486 or later.
1014 */
1015#ifdef __GNUC__
1016# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1017 __extension__ \
1018 ({\
1019 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1020 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1021 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1022 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1023 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1024 fMacroRet; \
1025 })
1026#else
1027# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1028 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1029#endif
1030
1031
1032/** @def ASMAtomicCmpXchgHandle
1033 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1034 *
1035 * @param ph Pointer to the value to update.
1036 * @param hNew The new value to assigned to *pu.
1037 * @param hOld The old value to *pu compare with.
1038 * @param fRc Where to store the result.
1039 *
1040 * @remarks This doesn't currently work for all handles (like RTFILE).
1041 * @remarks x86: Requires a 486 or later.
1042 */
1043#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1044# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1045 do { \
1046 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1047 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1048 } while (0)
1049#elif HC_ARCH_BITS == 64
1050# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1051 do { \
1052 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1053 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1054 } while (0)
1055#else
1056# error HC_ARCH_BITS
1057#endif
1058
1059
1060/** @def ASMAtomicCmpXchgSize
1061 * Atomically Compare and Exchange a value which size might differ
1062 * between platforms or compilers, ordered.
1063 *
1064 * @param pu Pointer to the value to update.
1065 * @param uNew The new value to assigned to *pu.
1066 * @param uOld The old value to *pu compare with.
1067 * @param fRc Where to store the result.
1068 *
1069 * @remarks x86: Requires a 486 or later.
1070 */
1071#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1072 do { \
1073 switch (sizeof(*(pu))) { \
1074 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1075 break; \
1076 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1077 break; \
1078 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1079 (fRc) = false; \
1080 break; \
1081 } \
1082 } while (0)
1083
1084
1085/**
1086 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1087 * passes back old value, ordered.
1088 *
1089 * @returns true if xchg was done.
1090 * @returns false if xchg wasn't done.
1091 *
1092 * @param pu32 Pointer to the value to update.
1093 * @param u32New The new value to assigned to *pu32.
1094 * @param u32Old The old value to *pu32 compare with.
1095 * @param pu32Old Pointer store the old value at.
1096 *
1097 * @remarks x86: Requires a 486 or later.
1098 */
1099#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1100RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old);
1101#else
1102DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old)
1103{
1104# if RT_INLINE_ASM_GNU_STYLE
1105 uint8_t u8Ret;
1106 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1107 "setz %1\n\t"
1108 : "=m" (*pu32),
1109 "=qm" (u8Ret),
1110 "=a" (*pu32Old)
1111 : "r" (u32New),
1112 "a" (u32Old),
1113 "m" (*pu32));
1114 return (bool)u8Ret;
1115
1116# elif RT_INLINE_ASM_USES_INTRIN
1117 return (*pu32Old =_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1118
1119# else
1120 uint32_t u32Ret;
1121 __asm
1122 {
1123# ifdef RT_ARCH_AMD64
1124 mov rdx, [pu32]
1125# else
1126 mov edx, [pu32]
1127# endif
1128 mov eax, [u32Old]
1129 mov ecx, [u32New]
1130# ifdef RT_ARCH_AMD64
1131 lock cmpxchg [rdx], ecx
1132 mov rdx, [pu32Old]
1133 mov [rdx], eax
1134# else
1135 lock cmpxchg [edx], ecx
1136 mov edx, [pu32Old]
1137 mov [edx], eax
1138# endif
1139 setz al
1140 movzx eax, al
1141 mov [u32Ret], eax
1142 }
1143 return !!u32Ret;
1144# endif
1145}
1146#endif
1147
1148
1149/**
1150 * Atomically Compare and Exchange a signed 32-bit value, additionally
1151 * passes back old value, ordered.
1152 *
1153 * @returns true if xchg was done.
1154 * @returns false if xchg wasn't done.
1155 *
1156 * @param pi32 Pointer to the value to update.
1157 * @param i32New The new value to assigned to *pi32.
1158 * @param i32Old The old value to *pi32 compare with.
1159 * @param pi32Old Pointer store the old value at.
1160 *
1161 * @remarks x86: Requires a 486 or later.
1162 */
1163DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old)
1164{
1165 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1166}
1167
1168
1169/**
1170 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1171 * passing back old value, ordered.
1172 *
1173 * @returns true if xchg was done.
1174 * @returns false if xchg wasn't done.
1175 *
1176 * @param pu64 Pointer to the 64-bit variable to update.
1177 * @param u64New The 64-bit value to assign to *pu64.
1178 * @param u64Old The value to compare with.
1179 * @param pu64Old Pointer store the old value at.
1180 *
1181 * @remarks x86: Requires a Pentium or later.
1182 */
1183#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1184 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1185RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old);
1186#else
1187DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old)
1188{
1189# if RT_INLINE_ASM_USES_INTRIN
1190 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1191
1192# elif defined(RT_ARCH_AMD64)
1193# if RT_INLINE_ASM_GNU_STYLE
1194 uint8_t u8Ret;
1195 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1196 "setz %1\n\t"
1197 : "=m" (*pu64),
1198 "=qm" (u8Ret),
1199 "=a" (*pu64Old)
1200 : "r" (u64New),
1201 "a" (u64Old),
1202 "m" (*pu64));
1203 return (bool)u8Ret;
1204# else
1205 bool fRet;
1206 __asm
1207 {
1208 mov rdx, [pu32]
1209 mov rax, [u64Old]
1210 mov rcx, [u64New]
1211 lock cmpxchg [rdx], rcx
1212 mov rdx, [pu64Old]
1213 mov [rdx], rax
1214 setz al
1215 mov [fRet], al
1216 }
1217 return fRet;
1218# endif
1219# else /* !RT_ARCH_AMD64 */
1220# if RT_INLINE_ASM_GNU_STYLE
1221 uint64_t u64Ret;
1222# if defined(PIC) || defined(__PIC__)
1223 /* NB: this code uses a memory clobber description, because the clean
1224 * solution with an output value for *pu64 makes gcc run out of registers.
1225 * This will cause suboptimal code, and anyone with a better solution is
1226 * welcome to improve this. */
1227 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1228 "lock; cmpxchg8b %3\n\t"
1229 "xchgl %%ebx, %1\n\t"
1230 : "=A" (u64Ret)
1231 : "DS" ((uint32_t)u64New),
1232 "c" ((uint32_t)(u64New >> 32)),
1233 "m" (*pu64),
1234 "0" (u64Old)
1235 : "memory" );
1236# else /* !PIC */
1237 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1238 : "=A" (u64Ret),
1239 "=m" (*pu64)
1240 : "b" ((uint32_t)u64New),
1241 "c" ((uint32_t)(u64New >> 32)),
1242 "m" (*pu64),
1243 "0" (u64Old));
1244# endif
1245 *pu64Old = u64Ret;
1246 return u64Ret == u64Old;
1247# else
1248 uint32_t u32Ret;
1249 __asm
1250 {
1251 mov ebx, dword ptr [u64New]
1252 mov ecx, dword ptr [u64New + 4]
1253 mov edi, [pu64]
1254 mov eax, dword ptr [u64Old]
1255 mov edx, dword ptr [u64Old + 4]
1256 lock cmpxchg8b [edi]
1257 mov ebx, [pu64Old]
1258 mov [ebx], eax
1259 setz al
1260 movzx eax, al
1261 add ebx, 4
1262 mov [ebx], edx
1263 mov dword ptr [u32Ret], eax
1264 }
1265 return !!u32Ret;
1266# endif
1267# endif /* !RT_ARCH_AMD64 */
1268}
1269#endif
1270
1271
1272/**
1273 * Atomically Compare and exchange a signed 64-bit value, additionally
1274 * passing back old value, ordered.
1275 *
1276 * @returns true if xchg was done.
1277 * @returns false if xchg wasn't done.
1278 *
1279 * @param pi64 Pointer to the 64-bit variable to update.
1280 * @param i64 The 64-bit value to assign to *pu64.
1281 * @param i64Old The value to compare with.
1282 * @param pi64Old Pointer store the old value at.
1283 *
1284 * @remarks x86: Requires a Pentium or later.
1285 */
1286DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old)
1287{
1288 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1289}
1290
1291/** @def ASMAtomicCmpXchgExHandle
1292 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1293 *
1294 * @param ph Pointer to the value to update.
1295 * @param hNew The new value to assigned to *pu.
1296 * @param hOld The old value to *pu compare with.
1297 * @param fRc Where to store the result.
1298 * @param phOldVal Pointer to where to store the old value.
1299 *
1300 * @remarks This doesn't currently work for all handles (like RTFILE).
1301 */
1302#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1303# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1304 do { \
1305 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1306 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1307 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1308 } while (0)
1309#elif HC_ARCH_BITS == 64
1310# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1311 do { \
1312 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1313 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1314 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1315 } while (0)
1316#else
1317# error HC_ARCH_BITS
1318#endif
1319
1320
1321/** @def ASMAtomicCmpXchgExSize
1322 * Atomically Compare and Exchange a value which size might differ
1323 * between platforms or compilers. Additionally passes back old value.
1324 *
1325 * @param pu Pointer to the value to update.
1326 * @param uNew The new value to assigned to *pu.
1327 * @param uOld The old value to *pu compare with.
1328 * @param fRc Where to store the result.
1329 * @param puOldVal Pointer to where to store the old value.
1330 *
1331 * @remarks x86: Requires a 486 or later.
1332 */
1333#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1334 do { \
1335 switch (sizeof(*(pu))) { \
1336 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1337 break; \
1338 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1339 break; \
1340 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1341 (fRc) = false; \
1342 (uOldVal) = 0; \
1343 break; \
1344 } \
1345 } while (0)
1346
1347
1348/**
1349 * Atomically Compare and Exchange a pointer value, additionally
1350 * passing back old value, ordered.
1351 *
1352 * @returns true if xchg was done.
1353 * @returns false if xchg wasn't done.
1354 *
1355 * @param ppv Pointer to the value to update.
1356 * @param pvNew The new value to assigned to *ppv.
1357 * @param pvOld The old value to *ppv compare with.
1358 * @param ppvOld Pointer store the old value at.
1359 *
1360 * @remarks x86: Requires a 486 or later.
1361 */
1362DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1363 void RT_FAR * RT_FAR *ppvOld)
1364{
1365#if ARCH_BITS == 32 || ARCH_BITS == 16
1366 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1367#elif ARCH_BITS == 64
1368 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1369#else
1370# error "ARCH_BITS is bogus"
1371#endif
1372}
1373
1374
1375/**
1376 * Atomically Compare and Exchange a pointer value, additionally
1377 * passing back old value, ordered.
1378 *
1379 * @returns true if xchg was done.
1380 * @returns false if xchg wasn't done.
1381 *
1382 * @param ppv Pointer to the value to update.
1383 * @param pvNew The new value to assigned to *ppv.
1384 * @param pvOld The old value to *ppv compare with.
1385 * @param ppvOld Pointer store the old value at.
1386 *
1387 * @remarks This is relatively type safe on GCC platforms.
1388 * @remarks x86: Requires a 486 or later.
1389 */
1390#ifdef __GNUC__
1391# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1392 __extension__ \
1393 ({\
1394 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1395 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1396 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1397 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1398 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1399 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1400 (void **)ppvOldTypeChecked); \
1401 fMacroRet; \
1402 })
1403#else
1404# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1405 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1406#endif
1407
1408
1409/**
1410 * Virtualization unfriendly serializing instruction, always exits.
1411 */
1412#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1413RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void);
1414#else
1415DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1416{
1417# if RT_INLINE_ASM_GNU_STYLE
1418 RTCCUINTREG xAX = 0;
1419# ifdef RT_ARCH_AMD64
1420 __asm__ __volatile__ ("cpuid"
1421 : "=a" (xAX)
1422 : "0" (xAX)
1423 : "rbx", "rcx", "rdx", "memory");
1424# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1425 __asm__ __volatile__ ("push %%ebx\n\t"
1426 "cpuid\n\t"
1427 "pop %%ebx\n\t"
1428 : "=a" (xAX)
1429 : "0" (xAX)
1430 : "ecx", "edx", "memory");
1431# else
1432 __asm__ __volatile__ ("cpuid"
1433 : "=a" (xAX)
1434 : "0" (xAX)
1435 : "ebx", "ecx", "edx", "memory");
1436# endif
1437
1438# elif RT_INLINE_ASM_USES_INTRIN
1439 int aInfo[4];
1440 _ReadWriteBarrier();
1441 __cpuid(aInfo, 0);
1442
1443# else
1444 __asm
1445 {
1446 push ebx
1447 xor eax, eax
1448 cpuid
1449 pop ebx
1450 }
1451# endif
1452}
1453#endif
1454
1455/**
1456 * Virtualization friendly serializing instruction, though more expensive.
1457 */
1458#if RT_INLINE_ASM_EXTERNAL
1459RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void);
1460#else
1461DECLINLINE(void) ASMSerializeInstructionIRet(void)
1462{
1463# if RT_INLINE_ASM_GNU_STYLE
1464# ifdef RT_ARCH_AMD64
1465 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1466 "subq $128, %%rsp\n\t" /*redzone*/
1467 "mov %%ss, %%eax\n\t"
1468 "pushq %%rax\n\t"
1469 "pushq %%r10\n\t"
1470 "pushfq\n\t"
1471 "movl %%cs, %%eax\n\t"
1472 "pushq %%rax\n\t"
1473 "leaq 1f(%%rip), %%rax\n\t"
1474 "pushq %%rax\n\t"
1475 "iretq\n\t"
1476 "1:\n\t"
1477 ::: "rax", "r10", "memory");
1478# else
1479 __asm__ __volatile__ ("pushfl\n\t"
1480 "pushl %%cs\n\t"
1481 "pushl $1f\n\t"
1482 "iretl\n\t"
1483 "1:\n\t"
1484 ::: "memory");
1485# endif
1486
1487# else
1488 __asm
1489 {
1490 pushfd
1491 push cs
1492 push la_ret
1493 iretd
1494 la_ret:
1495 }
1496# endif
1497}
1498#endif
1499
1500/**
1501 * Virtualization friendlier serializing instruction, may still cause exits.
1502 */
1503#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1504RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void);
1505#else
1506DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1507{
1508# if RT_INLINE_ASM_GNU_STYLE
1509 /* rdtscp is not supported by ancient linux build VM of course :-( */
1510# ifdef RT_ARCH_AMD64
1511 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1512 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1513# else
1514 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1515 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1516# endif
1517# else
1518# if RT_INLINE_ASM_USES_INTRIN >= 15
1519 uint32_t uIgnore;
1520 _ReadWriteBarrier();
1521 (void)__rdtscp(&uIgnore);
1522 (void)uIgnore;
1523# else
1524 __asm
1525 {
1526 rdtscp
1527 }
1528# endif
1529# endif
1530}
1531#endif
1532
1533
1534/**
1535 * Serialize Instruction.
1536 */
1537#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1538# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1539#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
1540# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1541#elif defined(RT_ARCH_SPARC64)
1542RTDECL(void) ASMSerializeInstruction(void);
1543#else
1544# error "Port me"
1545#endif
1546
1547
1548/**
1549 * Memory fence, waits for any pending writes and reads to complete.
1550 */
1551DECLINLINE(void) ASMMemoryFence(void)
1552{
1553#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1554# if RT_INLINE_ASM_GNU_STYLE
1555 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
1556# elif RT_INLINE_ASM_USES_INTRIN
1557 _mm_mfence();
1558# else
1559 __asm
1560 {
1561 _emit 0x0f
1562 _emit 0xae
1563 _emit 0xf0
1564 }
1565# endif
1566#elif ARCH_BITS == 16
1567 uint16_t volatile u16;
1568 ASMAtomicXchgU16(&u16, 0);
1569#else
1570 uint32_t volatile u32;
1571 ASMAtomicXchgU32(&u32, 0);
1572#endif
1573}
1574
1575
1576/**
1577 * Write fence, waits for any pending writes to complete.
1578 */
1579DECLINLINE(void) ASMWriteFence(void)
1580{
1581#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1582# if RT_INLINE_ASM_GNU_STYLE
1583 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
1584# elif RT_INLINE_ASM_USES_INTRIN
1585 _mm_sfence();
1586# else
1587 __asm
1588 {
1589 _emit 0x0f
1590 _emit 0xae
1591 _emit 0xf8
1592 }
1593# endif
1594#else
1595 ASMMemoryFence();
1596#endif
1597}
1598
1599
1600/**
1601 * Read fence, waits for any pending reads to complete.
1602 */
1603DECLINLINE(void) ASMReadFence(void)
1604{
1605#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1606# if RT_INLINE_ASM_GNU_STYLE
1607 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
1608# elif RT_INLINE_ASM_USES_INTRIN
1609 _mm_lfence();
1610# else
1611 __asm
1612 {
1613 _emit 0x0f
1614 _emit 0xae
1615 _emit 0xe8
1616 }
1617# endif
1618#else
1619 ASMMemoryFence();
1620#endif
1621}
1622
1623
1624/**
1625 * Atomically reads an unsigned 8-bit value, ordered.
1626 *
1627 * @returns Current *pu8 value
1628 * @param pu8 Pointer to the 8-bit variable to read.
1629 */
1630DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8)
1631{
1632 ASMMemoryFence();
1633 return *pu8; /* byte reads are atomic on x86 */
1634}
1635
1636
1637/**
1638 * Atomically reads an unsigned 8-bit value, unordered.
1639 *
1640 * @returns Current *pu8 value
1641 * @param pu8 Pointer to the 8-bit variable to read.
1642 */
1643DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8)
1644{
1645 return *pu8; /* byte reads are atomic on x86 */
1646}
1647
1648
1649/**
1650 * Atomically reads a signed 8-bit value, ordered.
1651 *
1652 * @returns Current *pi8 value
1653 * @param pi8 Pointer to the 8-bit variable to read.
1654 */
1655DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8)
1656{
1657 ASMMemoryFence();
1658 return *pi8; /* byte reads are atomic on x86 */
1659}
1660
1661
1662/**
1663 * Atomically reads a signed 8-bit value, unordered.
1664 *
1665 * @returns Current *pi8 value
1666 * @param pi8 Pointer to the 8-bit variable to read.
1667 */
1668DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8)
1669{
1670 return *pi8; /* byte reads are atomic on x86 */
1671}
1672
1673
1674/**
1675 * Atomically reads an unsigned 16-bit value, ordered.
1676 *
1677 * @returns Current *pu16 value
1678 * @param pu16 Pointer to the 16-bit variable to read.
1679 */
1680DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16)
1681{
1682 ASMMemoryFence();
1683 Assert(!((uintptr_t)pu16 & 1));
1684 return *pu16;
1685}
1686
1687
1688/**
1689 * Atomically reads an unsigned 16-bit value, unordered.
1690 *
1691 * @returns Current *pu16 value
1692 * @param pu16 Pointer to the 16-bit variable to read.
1693 */
1694DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16)
1695{
1696 Assert(!((uintptr_t)pu16 & 1));
1697 return *pu16;
1698}
1699
1700
1701/**
1702 * Atomically reads a signed 16-bit value, ordered.
1703 *
1704 * @returns Current *pi16 value
1705 * @param pi16 Pointer to the 16-bit variable to read.
1706 */
1707DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16)
1708{
1709 ASMMemoryFence();
1710 Assert(!((uintptr_t)pi16 & 1));
1711 return *pi16;
1712}
1713
1714
1715/**
1716 * Atomically reads a signed 16-bit value, unordered.
1717 *
1718 * @returns Current *pi16 value
1719 * @param pi16 Pointer to the 16-bit variable to read.
1720 */
1721DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16)
1722{
1723 Assert(!((uintptr_t)pi16 & 1));
1724 return *pi16;
1725}
1726
1727
1728/**
1729 * Atomically reads an unsigned 32-bit value, ordered.
1730 *
1731 * @returns Current *pu32 value
1732 * @param pu32 Pointer to the 32-bit variable to read.
1733 */
1734DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32)
1735{
1736 ASMMemoryFence();
1737 Assert(!((uintptr_t)pu32 & 3));
1738#if ARCH_BITS == 16
1739 AssertFailed(); /** @todo 16-bit */
1740#endif
1741 return *pu32;
1742}
1743
1744
1745/**
1746 * Atomically reads an unsigned 32-bit value, unordered.
1747 *
1748 * @returns Current *pu32 value
1749 * @param pu32 Pointer to the 32-bit variable to read.
1750 */
1751DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32)
1752{
1753 Assert(!((uintptr_t)pu32 & 3));
1754#if ARCH_BITS == 16
1755 AssertFailed(); /** @todo 16-bit */
1756#endif
1757 return *pu32;
1758}
1759
1760
1761/**
1762 * Atomically reads a signed 32-bit value, ordered.
1763 *
1764 * @returns Current *pi32 value
1765 * @param pi32 Pointer to the 32-bit variable to read.
1766 */
1767DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32)
1768{
1769 ASMMemoryFence();
1770 Assert(!((uintptr_t)pi32 & 3));
1771#if ARCH_BITS == 16
1772 AssertFailed(); /** @todo 16-bit */
1773#endif
1774 return *pi32;
1775}
1776
1777
1778/**
1779 * Atomically reads a signed 32-bit value, unordered.
1780 *
1781 * @returns Current *pi32 value
1782 * @param pi32 Pointer to the 32-bit variable to read.
1783 */
1784DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32)
1785{
1786 Assert(!((uintptr_t)pi32 & 3));
1787#if ARCH_BITS == 16
1788 AssertFailed(); /** @todo 16-bit */
1789#endif
1790 return *pi32;
1791}
1792
1793
1794/**
1795 * Atomically reads an unsigned 64-bit value, ordered.
1796 *
1797 * @returns Current *pu64 value
1798 * @param pu64 Pointer to the 64-bit variable to read.
1799 * The memory pointed to must be writable.
1800 *
1801 * @remarks This may fault if the memory is read-only!
1802 * @remarks x86: Requires a Pentium or later.
1803 */
1804#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1805 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1806RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64);
1807#else
1808DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64)
1809{
1810 uint64_t u64;
1811# ifdef RT_ARCH_AMD64
1812 Assert(!((uintptr_t)pu64 & 7));
1813/*# if RT_INLINE_ASM_GNU_STYLE
1814 __asm__ __volatile__( "mfence\n\t"
1815 "movq %1, %0\n\t"
1816 : "=r" (u64)
1817 : "m" (*pu64));
1818# else
1819 __asm
1820 {
1821 mfence
1822 mov rdx, [pu64]
1823 mov rax, [rdx]
1824 mov [u64], rax
1825 }
1826# endif*/
1827 ASMMemoryFence();
1828 u64 = *pu64;
1829# else /* !RT_ARCH_AMD64 */
1830# if RT_INLINE_ASM_GNU_STYLE
1831# if defined(PIC) || defined(__PIC__)
1832 uint32_t u32EBX = 0;
1833 Assert(!((uintptr_t)pu64 & 7));
1834 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1835 "lock; cmpxchg8b (%5)\n\t"
1836 "movl %3, %%ebx\n\t"
1837 : "=A" (u64),
1838# if RT_GNUC_PREREQ(4, 3)
1839 "+m" (*pu64)
1840# else
1841 "=m" (*pu64)
1842# endif
1843 : "0" (0ULL),
1844 "m" (u32EBX),
1845 "c" (0),
1846 "S" (pu64));
1847# else /* !PIC */
1848 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1849 : "=A" (u64),
1850 "+m" (*pu64)
1851 : "0" (0ULL),
1852 "b" (0),
1853 "c" (0));
1854# endif
1855# else
1856 Assert(!((uintptr_t)pu64 & 7));
1857 __asm
1858 {
1859 xor eax, eax
1860 xor edx, edx
1861 mov edi, pu64
1862 xor ecx, ecx
1863 xor ebx, ebx
1864 lock cmpxchg8b [edi]
1865 mov dword ptr [u64], eax
1866 mov dword ptr [u64 + 4], edx
1867 }
1868# endif
1869# endif /* !RT_ARCH_AMD64 */
1870 return u64;
1871}
1872#endif
1873
1874
1875/**
1876 * Atomically reads an unsigned 64-bit value, unordered.
1877 *
1878 * @returns Current *pu64 value
1879 * @param pu64 Pointer to the 64-bit variable to read.
1880 * The memory pointed to must be writable.
1881 *
1882 * @remarks This may fault if the memory is read-only!
1883 * @remarks x86: Requires a Pentium or later.
1884 */
1885#if !defined(RT_ARCH_AMD64) \
1886 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1887 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1888RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64);
1889#else
1890DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64)
1891{
1892 uint64_t u64;
1893# ifdef RT_ARCH_AMD64
1894 Assert(!((uintptr_t)pu64 & 7));
1895/*# if RT_INLINE_ASM_GNU_STYLE
1896 Assert(!((uintptr_t)pu64 & 7));
1897 __asm__ __volatile__("movq %1, %0\n\t"
1898 : "=r" (u64)
1899 : "m" (*pu64));
1900# else
1901 __asm
1902 {
1903 mov rdx, [pu64]
1904 mov rax, [rdx]
1905 mov [u64], rax
1906 }
1907# endif */
1908 u64 = *pu64;
1909# else /* !RT_ARCH_AMD64 */
1910# if RT_INLINE_ASM_GNU_STYLE
1911# if defined(PIC) || defined(__PIC__)
1912 uint32_t u32EBX = 0;
1913 uint32_t u32Spill;
1914 Assert(!((uintptr_t)pu64 & 7));
1915 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1916 "xor %%ecx,%%ecx\n\t"
1917 "xor %%edx,%%edx\n\t"
1918 "xchgl %%ebx, %3\n\t"
1919 "lock; cmpxchg8b (%4)\n\t"
1920 "movl %3, %%ebx\n\t"
1921 : "=A" (u64),
1922# if RT_GNUC_PREREQ(4, 3)
1923 "+m" (*pu64),
1924# else
1925 "=m" (*pu64),
1926# endif
1927 "=c" (u32Spill)
1928 : "m" (u32EBX),
1929 "S" (pu64));
1930# else /* !PIC */
1931 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1932 : "=A" (u64),
1933 "+m" (*pu64)
1934 : "0" (0ULL),
1935 "b" (0),
1936 "c" (0));
1937# endif
1938# else
1939 Assert(!((uintptr_t)pu64 & 7));
1940 __asm
1941 {
1942 xor eax, eax
1943 xor edx, edx
1944 mov edi, pu64
1945 xor ecx, ecx
1946 xor ebx, ebx
1947 lock cmpxchg8b [edi]
1948 mov dword ptr [u64], eax
1949 mov dword ptr [u64 + 4], edx
1950 }
1951# endif
1952# endif /* !RT_ARCH_AMD64 */
1953 return u64;
1954}
1955#endif
1956
1957
1958/**
1959 * Atomically reads a signed 64-bit value, ordered.
1960 *
1961 * @returns Current *pi64 value
1962 * @param pi64 Pointer to the 64-bit variable to read.
1963 * The memory pointed to must be writable.
1964 *
1965 * @remarks This may fault if the memory is read-only!
1966 * @remarks x86: Requires a Pentium or later.
1967 */
1968DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64)
1969{
1970 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
1971}
1972
1973
1974/**
1975 * Atomically reads a signed 64-bit value, unordered.
1976 *
1977 * @returns Current *pi64 value
1978 * @param pi64 Pointer to the 64-bit variable to read.
1979 * The memory pointed to must be writable.
1980 *
1981 * @remarks This will fault if the memory is read-only!
1982 * @remarks x86: Requires a Pentium or later.
1983 */
1984DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64)
1985{
1986 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
1987}
1988
1989
1990/**
1991 * Atomically reads a size_t value, ordered.
1992 *
1993 * @returns Current *pcb value
1994 * @param pcb Pointer to the size_t variable to read.
1995 */
1996DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb)
1997{
1998#if ARCH_BITS == 64
1999 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
2000#elif ARCH_BITS == 32
2001 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
2002#elif ARCH_BITS == 16
2003 AssertCompileSize(size_t, 2);
2004 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
2005#else
2006# error "Unsupported ARCH_BITS value"
2007#endif
2008}
2009
2010
2011/**
2012 * Atomically reads a size_t value, unordered.
2013 *
2014 * @returns Current *pcb value
2015 * @param pcb Pointer to the size_t variable to read.
2016 */
2017DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb)
2018{
2019#if ARCH_BITS == 64 || ARCH_BITS == 16
2020 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2021#elif ARCH_BITS == 32
2022 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2023#elif ARCH_BITS == 16
2024 AssertCompileSize(size_t, 2);
2025 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2026#else
2027# error "Unsupported ARCH_BITS value"
2028#endif
2029}
2030
2031
2032/**
2033 * Atomically reads a pointer value, ordered.
2034 *
2035 * @returns Current *pv value
2036 * @param ppv Pointer to the pointer variable to read.
2037 *
2038 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2039 * requires less typing (no casts).
2040 */
2041DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2042{
2043#if ARCH_BITS == 32 || ARCH_BITS == 16
2044 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2045#elif ARCH_BITS == 64
2046 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2047#else
2048# error "ARCH_BITS is bogus"
2049#endif
2050}
2051
2052/**
2053 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2054 *
2055 * @returns Current *pv value
2056 * @param ppv Pointer to the pointer variable to read.
2057 * @param Type The type of *ppv, sans volatile.
2058 */
2059#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2060# define ASMAtomicReadPtrT(ppv, Type) \
2061 __extension__ \
2062 ({\
2063 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2064 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2065 pvTypeChecked; \
2066 })
2067#else
2068# define ASMAtomicReadPtrT(ppv, Type) \
2069 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2070#endif
2071
2072
2073/**
2074 * Atomically reads a pointer value, unordered.
2075 *
2076 * @returns Current *pv value
2077 * @param ppv Pointer to the pointer variable to read.
2078 *
2079 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2080 * requires less typing (no casts).
2081 */
2082DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2083{
2084#if ARCH_BITS == 32 || ARCH_BITS == 16
2085 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2086#elif ARCH_BITS == 64
2087 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2088#else
2089# error "ARCH_BITS is bogus"
2090#endif
2091}
2092
2093
2094/**
2095 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2096 *
2097 * @returns Current *pv value
2098 * @param ppv Pointer to the pointer variable to read.
2099 * @param Type The type of *ppv, sans volatile.
2100 */
2101#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2102# define ASMAtomicUoReadPtrT(ppv, Type) \
2103 __extension__ \
2104 ({\
2105 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2106 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2107 pvTypeChecked; \
2108 })
2109#else
2110# define ASMAtomicUoReadPtrT(ppv, Type) \
2111 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2112#endif
2113
2114
2115/**
2116 * Atomically reads a boolean value, ordered.
2117 *
2118 * @returns Current *pf value
2119 * @param pf Pointer to the boolean variable to read.
2120 */
2121DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf)
2122{
2123 ASMMemoryFence();
2124 return *pf; /* byte reads are atomic on x86 */
2125}
2126
2127
2128/**
2129 * Atomically reads a boolean value, unordered.
2130 *
2131 * @returns Current *pf value
2132 * @param pf Pointer to the boolean variable to read.
2133 */
2134DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf)
2135{
2136 return *pf; /* byte reads are atomic on x86 */
2137}
2138
2139
2140/**
2141 * Atomically read a typical IPRT handle value, ordered.
2142 *
2143 * @param ph Pointer to the handle variable to read.
2144 * @param phRes Where to store the result.
2145 *
2146 * @remarks This doesn't currently work for all handles (like RTFILE).
2147 */
2148#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2149# define ASMAtomicReadHandle(ph, phRes) \
2150 do { \
2151 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2152 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2153 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2154 } while (0)
2155#elif HC_ARCH_BITS == 64
2156# define ASMAtomicReadHandle(ph, phRes) \
2157 do { \
2158 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2159 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2160 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2161 } while (0)
2162#else
2163# error HC_ARCH_BITS
2164#endif
2165
2166
2167/**
2168 * Atomically read a typical IPRT handle value, unordered.
2169 *
2170 * @param ph Pointer to the handle variable to read.
2171 * @param phRes Where to store the result.
2172 *
2173 * @remarks This doesn't currently work for all handles (like RTFILE).
2174 */
2175#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2176# define ASMAtomicUoReadHandle(ph, phRes) \
2177 do { \
2178 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2179 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2180 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2181 } while (0)
2182#elif HC_ARCH_BITS == 64
2183# define ASMAtomicUoReadHandle(ph, phRes) \
2184 do { \
2185 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2186 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2187 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2188 } while (0)
2189#else
2190# error HC_ARCH_BITS
2191#endif
2192
2193
2194/**
2195 * Atomically read a value which size might differ
2196 * between platforms or compilers, ordered.
2197 *
2198 * @param pu Pointer to the variable to read.
2199 * @param puRes Where to store the result.
2200 */
2201#define ASMAtomicReadSize(pu, puRes) \
2202 do { \
2203 switch (sizeof(*(pu))) { \
2204 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2205 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2206 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2207 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2208 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2209 } \
2210 } while (0)
2211
2212
2213/**
2214 * Atomically read a value which size might differ
2215 * between platforms or compilers, unordered.
2216 *
2217 * @param pu Pointer to the variable to read.
2218 * @param puRes Where to store the result.
2219 */
2220#define ASMAtomicUoReadSize(pu, puRes) \
2221 do { \
2222 switch (sizeof(*(pu))) { \
2223 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2224 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2225 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2226 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2227 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2228 } \
2229 } while (0)
2230
2231
2232/**
2233 * Atomically writes an unsigned 8-bit value, ordered.
2234 *
2235 * @param pu8 Pointer to the 8-bit variable.
2236 * @param u8 The 8-bit value to assign to *pu8.
2237 */
2238DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2239{
2240 ASMAtomicXchgU8(pu8, u8);
2241}
2242
2243
2244/**
2245 * Atomically writes an unsigned 8-bit value, unordered.
2246 *
2247 * @param pu8 Pointer to the 8-bit variable.
2248 * @param u8 The 8-bit value to assign to *pu8.
2249 */
2250DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2251{
2252 *pu8 = u8; /* byte writes are atomic on x86 */
2253}
2254
2255
2256/**
2257 * Atomically writes a signed 8-bit value, ordered.
2258 *
2259 * @param pi8 Pointer to the 8-bit variable to read.
2260 * @param i8 The 8-bit value to assign to *pi8.
2261 */
2262DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2263{
2264 ASMAtomicXchgS8(pi8, i8);
2265}
2266
2267
2268/**
2269 * Atomically writes a signed 8-bit value, unordered.
2270 *
2271 * @param pi8 Pointer to the 8-bit variable to write.
2272 * @param i8 The 8-bit value to assign to *pi8.
2273 */
2274DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2275{
2276 *pi8 = i8; /* byte writes are atomic on x86 */
2277}
2278
2279
2280/**
2281 * Atomically writes an unsigned 16-bit value, ordered.
2282 *
2283 * @param pu16 Pointer to the 16-bit variable to write.
2284 * @param u16 The 16-bit value to assign to *pu16.
2285 */
2286DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2287{
2288 ASMAtomicXchgU16(pu16, u16);
2289}
2290
2291
2292/**
2293 * Atomically writes an unsigned 16-bit value, unordered.
2294 *
2295 * @param pu16 Pointer to the 16-bit variable to write.
2296 * @param u16 The 16-bit value to assign to *pu16.
2297 */
2298DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2299{
2300 Assert(!((uintptr_t)pu16 & 1));
2301 *pu16 = u16;
2302}
2303
2304
2305/**
2306 * Atomically writes a signed 16-bit value, ordered.
2307 *
2308 * @param pi16 Pointer to the 16-bit variable to write.
2309 * @param i16 The 16-bit value to assign to *pi16.
2310 */
2311DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2312{
2313 ASMAtomicXchgS16(pi16, i16);
2314}
2315
2316
2317/**
2318 * Atomically writes a signed 16-bit value, unordered.
2319 *
2320 * @param pi16 Pointer to the 16-bit variable to write.
2321 * @param i16 The 16-bit value to assign to *pi16.
2322 */
2323DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2324{
2325 Assert(!((uintptr_t)pi16 & 1));
2326 *pi16 = i16;
2327}
2328
2329
2330/**
2331 * Atomically writes an unsigned 32-bit value, ordered.
2332 *
2333 * @param pu32 Pointer to the 32-bit variable to write.
2334 * @param u32 The 32-bit value to assign to *pu32.
2335 */
2336DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2337{
2338 ASMAtomicXchgU32(pu32, u32);
2339}
2340
2341
2342/**
2343 * Atomically writes an unsigned 32-bit value, unordered.
2344 *
2345 * @param pu32 Pointer to the 32-bit variable to write.
2346 * @param u32 The 32-bit value to assign to *pu32.
2347 */
2348DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2349{
2350 Assert(!((uintptr_t)pu32 & 3));
2351#if ARCH_BITS >= 32
2352 *pu32 = u32;
2353#else
2354 ASMAtomicXchgU32(pu32, u32);
2355#endif
2356}
2357
2358
2359/**
2360 * Atomically writes a signed 32-bit value, ordered.
2361 *
2362 * @param pi32 Pointer to the 32-bit variable to write.
2363 * @param i32 The 32-bit value to assign to *pi32.
2364 */
2365DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2366{
2367 ASMAtomicXchgS32(pi32, i32);
2368}
2369
2370
2371/**
2372 * Atomically writes a signed 32-bit value, unordered.
2373 *
2374 * @param pi32 Pointer to the 32-bit variable to write.
2375 * @param i32 The 32-bit value to assign to *pi32.
2376 */
2377DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2378{
2379 Assert(!((uintptr_t)pi32 & 3));
2380#if ARCH_BITS >= 32
2381 *pi32 = i32;
2382#else
2383 ASMAtomicXchgS32(pi32, i32);
2384#endif
2385}
2386
2387
2388/**
2389 * Atomically writes an unsigned 64-bit value, ordered.
2390 *
2391 * @param pu64 Pointer to the 64-bit variable to write.
2392 * @param u64 The 64-bit value to assign to *pu64.
2393 */
2394DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2395{
2396 ASMAtomicXchgU64(pu64, u64);
2397}
2398
2399
2400/**
2401 * Atomically writes an unsigned 64-bit value, unordered.
2402 *
2403 * @param pu64 Pointer to the 64-bit variable to write.
2404 * @param u64 The 64-bit value to assign to *pu64.
2405 */
2406DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2407{
2408 Assert(!((uintptr_t)pu64 & 7));
2409#if ARCH_BITS == 64
2410 *pu64 = u64;
2411#else
2412 ASMAtomicXchgU64(pu64, u64);
2413#endif
2414}
2415
2416
2417/**
2418 * Atomically writes a signed 64-bit value, ordered.
2419 *
2420 * @param pi64 Pointer to the 64-bit variable to write.
2421 * @param i64 The 64-bit value to assign to *pi64.
2422 */
2423DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2424{
2425 ASMAtomicXchgS64(pi64, i64);
2426}
2427
2428
2429/**
2430 * Atomically writes a signed 64-bit value, unordered.
2431 *
2432 * @param pi64 Pointer to the 64-bit variable to write.
2433 * @param i64 The 64-bit value to assign to *pi64.
2434 */
2435DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2436{
2437 Assert(!((uintptr_t)pi64 & 7));
2438#if ARCH_BITS == 64
2439 *pi64 = i64;
2440#else
2441 ASMAtomicXchgS64(pi64, i64);
2442#endif
2443}
2444
2445
2446/**
2447 * Atomically writes a size_t value, ordered.
2448 *
2449 * @returns nothing.
2450 * @param pcb Pointer to the size_t variable to write.
2451 * @param cb The value to assign to *pcb.
2452 */
2453DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb)
2454{
2455#if ARCH_BITS == 64
2456 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
2457#elif ARCH_BITS == 32
2458 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
2459#elif ARCH_BITS == 16
2460 AssertCompileSize(size_t, 2);
2461 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
2462#else
2463# error "Unsupported ARCH_BITS value"
2464#endif
2465}
2466
2467
2468/**
2469 * Atomically writes a boolean value, unordered.
2470 *
2471 * @param pf Pointer to the boolean variable to write.
2472 * @param f The boolean value to assign to *pf.
2473 */
2474DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f)
2475{
2476 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
2477}
2478
2479
2480/**
2481 * Atomically writes a boolean value, unordered.
2482 *
2483 * @param pf Pointer to the boolean variable to write.
2484 * @param f The boolean value to assign to *pf.
2485 */
2486DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f)
2487{
2488 *pf = f; /* byte writes are atomic on x86 */
2489}
2490
2491
2492/**
2493 * Atomically writes a pointer value, ordered.
2494 *
2495 * @param ppv Pointer to the pointer variable to write.
2496 * @param pv The pointer value to assign to *ppv.
2497 */
2498DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv)
2499{
2500#if ARCH_BITS == 32 || ARCH_BITS == 16
2501 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
2502#elif ARCH_BITS == 64
2503 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
2504#else
2505# error "ARCH_BITS is bogus"
2506#endif
2507}
2508
2509
2510/**
2511 * Atomically writes a pointer value, ordered.
2512 *
2513 * @param ppv Pointer to the pointer variable to write.
2514 * @param pv The pointer value to assign to *ppv. If NULL use
2515 * ASMAtomicWriteNullPtr or you'll land in trouble.
2516 *
2517 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2518 * NULL.
2519 */
2520#ifdef __GNUC__
2521# define ASMAtomicWritePtr(ppv, pv) \
2522 do \
2523 { \
2524 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
2525 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2526 \
2527 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2528 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2529 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2530 \
2531 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
2532 } while (0)
2533#else
2534# define ASMAtomicWritePtr(ppv, pv) \
2535 do \
2536 { \
2537 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2538 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2539 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2540 \
2541 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
2542 } while (0)
2543#endif
2544
2545
2546/**
2547 * Atomically sets a pointer to NULL, ordered.
2548 *
2549 * @param ppv Pointer to the pointer variable that should be set to NULL.
2550 *
2551 * @remarks This is relatively type safe on GCC platforms.
2552 */
2553#ifdef __GNUC__
2554# define ASMAtomicWriteNullPtr(ppv) \
2555 do \
2556 { \
2557 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2558 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2559 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2560 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
2561 } while (0)
2562#else
2563# define ASMAtomicWriteNullPtr(ppv) \
2564 do \
2565 { \
2566 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2567 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2568 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
2569 } while (0)
2570#endif
2571
2572
2573/**
2574 * Atomically writes a pointer value, unordered.
2575 *
2576 * @returns Current *pv value
2577 * @param ppv Pointer to the pointer variable.
2578 * @param pv The pointer value to assign to *ppv. If NULL use
2579 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2580 *
2581 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2582 * NULL.
2583 */
2584#ifdef __GNUC__
2585# define ASMAtomicUoWritePtr(ppv, pv) \
2586 do \
2587 { \
2588 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2589 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2590 \
2591 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2592 AssertCompile(sizeof(pv) == sizeof(void *)); \
2593 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2594 \
2595 *(ppvTypeChecked) = pvTypeChecked; \
2596 } while (0)
2597#else
2598# define ASMAtomicUoWritePtr(ppv, pv) \
2599 do \
2600 { \
2601 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2602 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2603 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2604 *(ppv) = pv; \
2605 } while (0)
2606#endif
2607
2608
2609/**
2610 * Atomically sets a pointer to NULL, unordered.
2611 *
2612 * @param ppv Pointer to the pointer variable that should be set to NULL.
2613 *
2614 * @remarks This is relatively type safe on GCC platforms.
2615 */
2616#ifdef __GNUC__
2617# define ASMAtomicUoWriteNullPtr(ppv) \
2618 do \
2619 { \
2620 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2621 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2622 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2623 *(ppvTypeChecked) = NULL; \
2624 } while (0)
2625#else
2626# define ASMAtomicUoWriteNullPtr(ppv) \
2627 do \
2628 { \
2629 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2630 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2631 *(ppv) = NULL; \
2632 } while (0)
2633#endif
2634
2635
2636/**
2637 * Atomically write a typical IPRT handle value, ordered.
2638 *
2639 * @param ph Pointer to the variable to update.
2640 * @param hNew The value to assign to *ph.
2641 *
2642 * @remarks This doesn't currently work for all handles (like RTFILE).
2643 */
2644#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2645# define ASMAtomicWriteHandle(ph, hNew) \
2646 do { \
2647 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2648 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
2649 } while (0)
2650#elif HC_ARCH_BITS == 64
2651# define ASMAtomicWriteHandle(ph, hNew) \
2652 do { \
2653 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2654 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
2655 } while (0)
2656#else
2657# error HC_ARCH_BITS
2658#endif
2659
2660
2661/**
2662 * Atomically write a typical IPRT handle value, unordered.
2663 *
2664 * @param ph Pointer to the variable to update.
2665 * @param hNew The value to assign to *ph.
2666 *
2667 * @remarks This doesn't currently work for all handles (like RTFILE).
2668 */
2669#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2670# define ASMAtomicUoWriteHandle(ph, hNew) \
2671 do { \
2672 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2673 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
2674 } while (0)
2675#elif HC_ARCH_BITS == 64
2676# define ASMAtomicUoWriteHandle(ph, hNew) \
2677 do { \
2678 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2679 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
2680 } while (0)
2681#else
2682# error HC_ARCH_BITS
2683#endif
2684
2685
2686/**
2687 * Atomically write a value which size might differ
2688 * between platforms or compilers, ordered.
2689 *
2690 * @param pu Pointer to the variable to update.
2691 * @param uNew The value to assign to *pu.
2692 */
2693#define ASMAtomicWriteSize(pu, uNew) \
2694 do { \
2695 switch (sizeof(*(pu))) { \
2696 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2697 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2698 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2699 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2700 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2701 } \
2702 } while (0)
2703
2704/**
2705 * Atomically write a value which size might differ
2706 * between platforms or compilers, unordered.
2707 *
2708 * @param pu Pointer to the variable to update.
2709 * @param uNew The value to assign to *pu.
2710 */
2711#define ASMAtomicUoWriteSize(pu, uNew) \
2712 do { \
2713 switch (sizeof(*(pu))) { \
2714 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2715 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2716 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2717 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2718 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2719 } \
2720 } while (0)
2721
2722
2723
2724/**
2725 * Atomically exchanges and adds to a 16-bit value, ordered.
2726 *
2727 * @returns The old value.
2728 * @param pu16 Pointer to the value.
2729 * @param u16 Number to add.
2730 *
2731 * @remarks Currently not implemented, just to make 16-bit code happy.
2732 * @remarks x86: Requires a 486 or later.
2733 */
2734RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16);
2735
2736
2737/**
2738 * Atomically exchanges and adds to a 32-bit value, ordered.
2739 *
2740 * @returns The old value.
2741 * @param pu32 Pointer to the value.
2742 * @param u32 Number to add.
2743 *
2744 * @remarks x86: Requires a 486 or later.
2745 */
2746#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2747RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
2748#else
2749DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2750{
2751# if RT_INLINE_ASM_USES_INTRIN
2752 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
2753 return u32;
2754
2755# elif RT_INLINE_ASM_GNU_STYLE
2756 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2757 : "=r" (u32),
2758 "=m" (*pu32)
2759 : "0" (u32),
2760 "m" (*pu32)
2761 : "memory");
2762 return u32;
2763# else
2764 __asm
2765 {
2766 mov eax, [u32]
2767# ifdef RT_ARCH_AMD64
2768 mov rdx, [pu32]
2769 lock xadd [rdx], eax
2770# else
2771 mov edx, [pu32]
2772 lock xadd [edx], eax
2773# endif
2774 mov [u32], eax
2775 }
2776 return u32;
2777# endif
2778}
2779#endif
2780
2781
2782/**
2783 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2784 *
2785 * @returns The old value.
2786 * @param pi32 Pointer to the value.
2787 * @param i32 Number to add.
2788 *
2789 * @remarks x86: Requires a 486 or later.
2790 */
2791DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2792{
2793 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
2794}
2795
2796
2797/**
2798 * Atomically exchanges and adds to a 64-bit value, ordered.
2799 *
2800 * @returns The old value.
2801 * @param pu64 Pointer to the value.
2802 * @param u64 Number to add.
2803 *
2804 * @remarks x86: Requires a Pentium or later.
2805 */
2806#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2807DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
2808#else
2809DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2810{
2811# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2812 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
2813 return u64;
2814
2815# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2816 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2817 : "=r" (u64),
2818 "=m" (*pu64)
2819 : "0" (u64),
2820 "m" (*pu64)
2821 : "memory");
2822 return u64;
2823# else
2824 uint64_t u64Old;
2825 for (;;)
2826 {
2827 uint64_t u64New;
2828 u64Old = ASMAtomicUoReadU64(pu64);
2829 u64New = u64Old + u64;
2830 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2831 break;
2832 ASMNopPause();
2833 }
2834 return u64Old;
2835# endif
2836}
2837#endif
2838
2839
2840/**
2841 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2842 *
2843 * @returns The old value.
2844 * @param pi64 Pointer to the value.
2845 * @param i64 Number to add.
2846 *
2847 * @remarks x86: Requires a Pentium or later.
2848 */
2849DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2850{
2851 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
2852}
2853
2854
2855/**
2856 * Atomically exchanges and adds to a size_t value, ordered.
2857 *
2858 * @returns The old value.
2859 * @param pcb Pointer to the size_t value.
2860 * @param cb Number to add.
2861 */
2862DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb)
2863{
2864#if ARCH_BITS == 64
2865 AssertCompileSize(size_t, 8);
2866 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
2867#elif ARCH_BITS == 32
2868 AssertCompileSize(size_t, 4);
2869 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
2870#elif ARCH_BITS == 16
2871 AssertCompileSize(size_t, 2);
2872 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
2873#else
2874# error "Unsupported ARCH_BITS value"
2875#endif
2876}
2877
2878
2879/**
2880 * Atomically exchanges and adds a value which size might differ between
2881 * platforms or compilers, ordered.
2882 *
2883 * @param pu Pointer to the variable to update.
2884 * @param uNew The value to add to *pu.
2885 * @param puOld Where to store the old value.
2886 */
2887#define ASMAtomicAddSize(pu, uNew, puOld) \
2888 do { \
2889 switch (sizeof(*(pu))) { \
2890 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2891 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2892 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2893 } \
2894 } while (0)
2895
2896
2897
2898/**
2899 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2900 *
2901 * @returns The old value.
2902 * @param pu16 Pointer to the value.
2903 * @param u16 Number to subtract.
2904 *
2905 * @remarks x86: Requires a 486 or later.
2906 */
2907DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16)
2908{
2909 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2910}
2911
2912
2913/**
2914 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2915 *
2916 * @returns The old value.
2917 * @param pi16 Pointer to the value.
2918 * @param i16 Number to subtract.
2919 *
2920 * @remarks x86: Requires a 486 or later.
2921 */
2922DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16)
2923{
2924 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
2925}
2926
2927
2928/**
2929 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2930 *
2931 * @returns The old value.
2932 * @param pu32 Pointer to the value.
2933 * @param u32 Number to subtract.
2934 *
2935 * @remarks x86: Requires a 486 or later.
2936 */
2937DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2938{
2939 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2940}
2941
2942
2943/**
2944 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2945 *
2946 * @returns The old value.
2947 * @param pi32 Pointer to the value.
2948 * @param i32 Number to subtract.
2949 *
2950 * @remarks x86: Requires a 486 or later.
2951 */
2952DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2953{
2954 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
2955}
2956
2957
2958/**
2959 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2960 *
2961 * @returns The old value.
2962 * @param pu64 Pointer to the value.
2963 * @param u64 Number to subtract.
2964 *
2965 * @remarks x86: Requires a Pentium or later.
2966 */
2967DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2968{
2969 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2970}
2971
2972
2973/**
2974 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2975 *
2976 * @returns The old value.
2977 * @param pi64 Pointer to the value.
2978 * @param i64 Number to subtract.
2979 *
2980 * @remarks x86: Requires a Pentium or later.
2981 */
2982DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2983{
2984 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
2985}
2986
2987
2988/**
2989 * Atomically exchanges and subtracts to a size_t value, ordered.
2990 *
2991 * @returns The old value.
2992 * @param pcb Pointer to the size_t value.
2993 * @param cb Number to subtract.
2994 *
2995 * @remarks x86: Requires a 486 or later.
2996 */
2997DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb)
2998{
2999#if ARCH_BITS == 64
3000 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
3001#elif ARCH_BITS == 32
3002 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
3003#elif ARCH_BITS == 16
3004 AssertCompileSize(size_t, 2);
3005 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
3006#else
3007# error "Unsupported ARCH_BITS value"
3008#endif
3009}
3010
3011
3012/**
3013 * Atomically exchanges and subtracts a value which size might differ between
3014 * platforms or compilers, ordered.
3015 *
3016 * @param pu Pointer to the variable to update.
3017 * @param uNew The value to subtract to *pu.
3018 * @param puOld Where to store the old value.
3019 *
3020 * @remarks x86: Requires a 486 or later.
3021 */
3022#define ASMAtomicSubSize(pu, uNew, puOld) \
3023 do { \
3024 switch (sizeof(*(pu))) { \
3025 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3026 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3027 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3028 } \
3029 } while (0)
3030
3031
3032
3033/**
3034 * Atomically increment a 16-bit value, ordered.
3035 *
3036 * @returns The new value.
3037 * @param pu16 Pointer to the value to increment.
3038 * @remarks Not implemented. Just to make 16-bit code happy.
3039 *
3040 * @remarks x86: Requires a 486 or later.
3041 */
3042RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16);
3043
3044
3045/**
3046 * Atomically increment a 32-bit value, ordered.
3047 *
3048 * @returns The new value.
3049 * @param pu32 Pointer to the value to increment.
3050 *
3051 * @remarks x86: Requires a 486 or later.
3052 */
3053#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3054RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32);
3055#else
3056DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32)
3057{
3058 uint32_t u32;
3059# if RT_INLINE_ASM_USES_INTRIN
3060 u32 = _InterlockedIncrement((long RT_FAR *)pu32);
3061 return u32;
3062
3063# elif RT_INLINE_ASM_GNU_STYLE
3064 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3065 : "=r" (u32),
3066 "=m" (*pu32)
3067 : "0" (1),
3068 "m" (*pu32)
3069 : "memory");
3070 return u32+1;
3071# else
3072 __asm
3073 {
3074 mov eax, 1
3075# ifdef RT_ARCH_AMD64
3076 mov rdx, [pu32]
3077 lock xadd [rdx], eax
3078# else
3079 mov edx, [pu32]
3080 lock xadd [edx], eax
3081# endif
3082 mov u32, eax
3083 }
3084 return u32+1;
3085# endif
3086}
3087#endif
3088
3089
3090/**
3091 * Atomically increment a signed 32-bit value, ordered.
3092 *
3093 * @returns The new value.
3094 * @param pi32 Pointer to the value to increment.
3095 *
3096 * @remarks x86: Requires a 486 or later.
3097 */
3098DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32)
3099{
3100 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3101}
3102
3103
3104/**
3105 * Atomically increment a 64-bit value, ordered.
3106 *
3107 * @returns The new value.
3108 * @param pu64 Pointer to the value to increment.
3109 *
3110 * @remarks x86: Requires a Pentium or later.
3111 */
3112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3113DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64);
3114#else
3115DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64)
3116{
3117# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3118 uint64_t u64;
3119 u64 = _InterlockedIncrement64((__int64 RT_FAR *)pu64);
3120 return u64;
3121
3122# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3123 uint64_t u64;
3124 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3125 : "=r" (u64),
3126 "=m" (*pu64)
3127 : "0" (1),
3128 "m" (*pu64)
3129 : "memory");
3130 return u64 + 1;
3131# else
3132 return ASMAtomicAddU64(pu64, 1) + 1;
3133# endif
3134}
3135#endif
3136
3137
3138/**
3139 * Atomically increment a signed 64-bit value, ordered.
3140 *
3141 * @returns The new value.
3142 * @param pi64 Pointer to the value to increment.
3143 *
3144 * @remarks x86: Requires a Pentium or later.
3145 */
3146DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64)
3147{
3148 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3149}
3150
3151
3152/**
3153 * Atomically increment a size_t value, ordered.
3154 *
3155 * @returns The new value.
3156 * @param pcb Pointer to the value to increment.
3157 *
3158 * @remarks x86: Requires a 486 or later.
3159 */
3160DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb)
3161{
3162#if ARCH_BITS == 64
3163 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3164#elif ARCH_BITS == 32
3165 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3166#elif ARCH_BITS == 16
3167 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3168#else
3169# error "Unsupported ARCH_BITS value"
3170#endif
3171}
3172
3173
3174
3175/**
3176 * Atomically decrement an unsigned 32-bit value, ordered.
3177 *
3178 * @returns The new value.
3179 * @param pu16 Pointer to the value to decrement.
3180 * @remarks Not implemented. Just to make 16-bit code happy.
3181 *
3182 * @remarks x86: Requires a 486 or later.
3183 */
3184RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16);
3185
3186
3187/**
3188 * Atomically decrement an unsigned 32-bit value, ordered.
3189 *
3190 * @returns The new value.
3191 * @param pu32 Pointer to the value to decrement.
3192 *
3193 * @remarks x86: Requires a 486 or later.
3194 */
3195#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3196RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32);
3197#else
3198DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32)
3199{
3200 uint32_t u32;
3201# if RT_INLINE_ASM_USES_INTRIN
3202 u32 = _InterlockedDecrement((long RT_FAR *)pu32);
3203 return u32;
3204
3205# elif RT_INLINE_ASM_GNU_STYLE
3206 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3207 : "=r" (u32),
3208 "=m" (*pu32)
3209 : "0" (-1),
3210 "m" (*pu32)
3211 : "memory");
3212 return u32-1;
3213# else
3214 __asm
3215 {
3216 mov eax, -1
3217# ifdef RT_ARCH_AMD64
3218 mov rdx, [pu32]
3219 lock xadd [rdx], eax
3220# else
3221 mov edx, [pu32]
3222 lock xadd [edx], eax
3223# endif
3224 mov u32, eax
3225 }
3226 return u32-1;
3227# endif
3228}
3229#endif
3230
3231
3232/**
3233 * Atomically decrement a signed 32-bit value, ordered.
3234 *
3235 * @returns The new value.
3236 * @param pi32 Pointer to the value to decrement.
3237 *
3238 * @remarks x86: Requires a 486 or later.
3239 */
3240DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32)
3241{
3242 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
3243}
3244
3245
3246/**
3247 * Atomically decrement an unsigned 64-bit value, ordered.
3248 *
3249 * @returns The new value.
3250 * @param pu64 Pointer to the value to decrement.
3251 *
3252 * @remarks x86: Requires a Pentium or later.
3253 */
3254#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3255RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64);
3256#else
3257DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64)
3258{
3259# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3260 uint64_t u64 = _InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
3261 return u64;
3262
3263# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3264 uint64_t u64;
3265 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3266 : "=r" (u64),
3267 "=m" (*pu64)
3268 : "0" (~(uint64_t)0),
3269 "m" (*pu64)
3270 : "memory");
3271 return u64-1;
3272# else
3273 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3274# endif
3275}
3276#endif
3277
3278
3279/**
3280 * Atomically decrement a signed 64-bit value, ordered.
3281 *
3282 * @returns The new value.
3283 * @param pi64 Pointer to the value to decrement.
3284 *
3285 * @remarks x86: Requires a Pentium or later.
3286 */
3287DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64)
3288{
3289 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
3290}
3291
3292
3293/**
3294 * Atomically decrement a size_t value, ordered.
3295 *
3296 * @returns The new value.
3297 * @param pcb Pointer to the value to decrement.
3298 *
3299 * @remarks x86: Requires a 486 or later.
3300 */
3301DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb)
3302{
3303#if ARCH_BITS == 64
3304 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
3305#elif ARCH_BITS == 32
3306 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
3307#elif ARCH_BITS == 16
3308 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
3309#else
3310# error "Unsupported ARCH_BITS value"
3311#endif
3312}
3313
3314
3315/**
3316 * Atomically Or an unsigned 32-bit value, ordered.
3317 *
3318 * @param pu32 Pointer to the pointer variable to OR u32 with.
3319 * @param u32 The value to OR *pu32 with.
3320 *
3321 * @remarks x86: Requires a 386 or later.
3322 */
3323#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3324RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3325#else
3326DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3327{
3328# if RT_INLINE_ASM_USES_INTRIN
3329 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
3330
3331# elif RT_INLINE_ASM_GNU_STYLE
3332 __asm__ __volatile__("lock; orl %1, %0\n\t"
3333 : "=m" (*pu32)
3334 : "ir" (u32),
3335 "m" (*pu32));
3336# else
3337 __asm
3338 {
3339 mov eax, [u32]
3340# ifdef RT_ARCH_AMD64
3341 mov rdx, [pu32]
3342 lock or [rdx], eax
3343# else
3344 mov edx, [pu32]
3345 lock or [edx], eax
3346# endif
3347 }
3348# endif
3349}
3350#endif
3351
3352
3353/**
3354 * Atomically Or a signed 32-bit value, ordered.
3355 *
3356 * @param pi32 Pointer to the pointer variable to OR u32 with.
3357 * @param i32 The value to OR *pu32 with.
3358 *
3359 * @remarks x86: Requires a 386 or later.
3360 */
3361DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3362{
3363 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3364}
3365
3366
3367/**
3368 * Atomically Or an unsigned 64-bit value, ordered.
3369 *
3370 * @param pu64 Pointer to the pointer variable to OR u64 with.
3371 * @param u64 The value to OR *pu64 with.
3372 *
3373 * @remarks x86: Requires a Pentium or later.
3374 */
3375#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3376DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3377#else
3378DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3379{
3380# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3381 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
3382
3383# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3384 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3385 : "=m" (*pu64)
3386 : "r" (u64),
3387 "m" (*pu64));
3388# else
3389 for (;;)
3390 {
3391 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3392 uint64_t u64New = u64Old | u64;
3393 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3394 break;
3395 ASMNopPause();
3396 }
3397# endif
3398}
3399#endif
3400
3401
3402/**
3403 * Atomically Or a signed 64-bit value, ordered.
3404 *
3405 * @param pi64 Pointer to the pointer variable to OR u64 with.
3406 * @param i64 The value to OR *pu64 with.
3407 *
3408 * @remarks x86: Requires a Pentium or later.
3409 */
3410DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3411{
3412 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3413}
3414
3415
3416/**
3417 * Atomically And an unsigned 32-bit value, ordered.
3418 *
3419 * @param pu32 Pointer to the pointer variable to AND u32 with.
3420 * @param u32 The value to AND *pu32 with.
3421 *
3422 * @remarks x86: Requires a 386 or later.
3423 */
3424#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3425RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3426#else
3427DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3428{
3429# if RT_INLINE_ASM_USES_INTRIN
3430 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
3431
3432# elif RT_INLINE_ASM_GNU_STYLE
3433 __asm__ __volatile__("lock; andl %1, %0\n\t"
3434 : "=m" (*pu32)
3435 : "ir" (u32),
3436 "m" (*pu32));
3437# else
3438 __asm
3439 {
3440 mov eax, [u32]
3441# ifdef RT_ARCH_AMD64
3442 mov rdx, [pu32]
3443 lock and [rdx], eax
3444# else
3445 mov edx, [pu32]
3446 lock and [edx], eax
3447# endif
3448 }
3449# endif
3450}
3451#endif
3452
3453
3454/**
3455 * Atomically And a signed 32-bit value, ordered.
3456 *
3457 * @param pi32 Pointer to the pointer variable to AND i32 with.
3458 * @param i32 The value to AND *pi32 with.
3459 *
3460 * @remarks x86: Requires a 386 or later.
3461 */
3462DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3463{
3464 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3465}
3466
3467
3468/**
3469 * Atomically And an unsigned 64-bit value, ordered.
3470 *
3471 * @param pu64 Pointer to the pointer variable to AND u64 with.
3472 * @param u64 The value to AND *pu64 with.
3473 *
3474 * @remarks x86: Requires a Pentium or later.
3475 */
3476#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3477DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3478#else
3479DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3480{
3481# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3482 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
3483
3484# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3485 __asm__ __volatile__("lock; andq %1, %0\n\t"
3486 : "=m" (*pu64)
3487 : "r" (u64),
3488 "m" (*pu64));
3489# else
3490 for (;;)
3491 {
3492 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3493 uint64_t u64New = u64Old & u64;
3494 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3495 break;
3496 ASMNopPause();
3497 }
3498# endif
3499}
3500#endif
3501
3502
3503/**
3504 * Atomically And a signed 64-bit value, ordered.
3505 *
3506 * @param pi64 Pointer to the pointer variable to AND i64 with.
3507 * @param i64 The value to AND *pi64 with.
3508 *
3509 * @remarks x86: Requires a Pentium or later.
3510 */
3511DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3512{
3513 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3514}
3515
3516
3517/**
3518 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3519 *
3520 * @param pu32 Pointer to the pointer variable to OR u32 with.
3521 * @param u32 The value to OR *pu32 with.
3522 *
3523 * @remarks x86: Requires a 386 or later.
3524 */
3525#if RT_INLINE_ASM_EXTERNAL
3526RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3527#else
3528DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3529{
3530# if RT_INLINE_ASM_GNU_STYLE
3531 __asm__ __volatile__("orl %1, %0\n\t"
3532 : "=m" (*pu32)
3533 : "ir" (u32),
3534 "m" (*pu32));
3535# else
3536 __asm
3537 {
3538 mov eax, [u32]
3539# ifdef RT_ARCH_AMD64
3540 mov rdx, [pu32]
3541 or [rdx], eax
3542# else
3543 mov edx, [pu32]
3544 or [edx], eax
3545# endif
3546 }
3547# endif
3548}
3549#endif
3550
3551
3552/**
3553 * Atomically OR a signed 32-bit value, unordered.
3554 *
3555 * @param pi32 Pointer to the pointer variable to OR u32 with.
3556 * @param i32 The value to OR *pu32 with.
3557 *
3558 * @remarks x86: Requires a 386 or later.
3559 */
3560DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3561{
3562 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3563}
3564
3565
3566/**
3567 * Atomically OR an unsigned 64-bit value, unordered.
3568 *
3569 * @param pu64 Pointer to the pointer variable to OR u64 with.
3570 * @param u64 The value to OR *pu64 with.
3571 *
3572 * @remarks x86: Requires a Pentium or later.
3573 */
3574#if RT_INLINE_ASM_EXTERNAL
3575DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3576#else
3577DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3578{
3579# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3580 __asm__ __volatile__("orq %1, %q0\n\t"
3581 : "=m" (*pu64)
3582 : "r" (u64),
3583 "m" (*pu64));
3584# else
3585 for (;;)
3586 {
3587 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3588 uint64_t u64New = u64Old | u64;
3589 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3590 break;
3591 ASMNopPause();
3592 }
3593# endif
3594}
3595#endif
3596
3597
3598/**
3599 * Atomically Or a signed 64-bit value, unordered.
3600 *
3601 * @param pi64 Pointer to the pointer variable to OR u64 with.
3602 * @param i64 The value to OR *pu64 with.
3603 *
3604 * @remarks x86: Requires a Pentium or later.
3605 */
3606DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3607{
3608 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3609}
3610
3611
3612/**
3613 * Atomically And an unsigned 32-bit value, unordered.
3614 *
3615 * @param pu32 Pointer to the pointer variable to AND u32 with.
3616 * @param u32 The value to AND *pu32 with.
3617 *
3618 * @remarks x86: Requires a 386 or later.
3619 */
3620#if RT_INLINE_ASM_EXTERNAL
3621RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3622#else
3623DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3624{
3625# if RT_INLINE_ASM_GNU_STYLE
3626 __asm__ __volatile__("andl %1, %0\n\t"
3627 : "=m" (*pu32)
3628 : "ir" (u32),
3629 "m" (*pu32));
3630# else
3631 __asm
3632 {
3633 mov eax, [u32]
3634# ifdef RT_ARCH_AMD64
3635 mov rdx, [pu32]
3636 and [rdx], eax
3637# else
3638 mov edx, [pu32]
3639 and [edx], eax
3640# endif
3641 }
3642# endif
3643}
3644#endif
3645
3646
3647/**
3648 * Atomically And a signed 32-bit value, unordered.
3649 *
3650 * @param pi32 Pointer to the pointer variable to AND i32 with.
3651 * @param i32 The value to AND *pi32 with.
3652 *
3653 * @remarks x86: Requires a 386 or later.
3654 */
3655DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3656{
3657 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3658}
3659
3660
3661/**
3662 * Atomically And an unsigned 64-bit value, unordered.
3663 *
3664 * @param pu64 Pointer to the pointer variable to AND u64 with.
3665 * @param u64 The value to AND *pu64 with.
3666 *
3667 * @remarks x86: Requires a Pentium or later.
3668 */
3669#if RT_INLINE_ASM_EXTERNAL
3670DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3671#else
3672DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3673{
3674# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3675 __asm__ __volatile__("andq %1, %0\n\t"
3676 : "=m" (*pu64)
3677 : "r" (u64),
3678 "m" (*pu64));
3679# else
3680 for (;;)
3681 {
3682 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3683 uint64_t u64New = u64Old & u64;
3684 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3685 break;
3686 ASMNopPause();
3687 }
3688# endif
3689}
3690#endif
3691
3692
3693/**
3694 * Atomically And a signed 64-bit value, unordered.
3695 *
3696 * @param pi64 Pointer to the pointer variable to AND i64 with.
3697 * @param i64 The value to AND *pi64 with.
3698 *
3699 * @remarks x86: Requires a Pentium or later.
3700 */
3701DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3702{
3703 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3704}
3705
3706
3707/**
3708 * Atomically increment an unsigned 32-bit value, unordered.
3709 *
3710 * @returns the new value.
3711 * @param pu32 Pointer to the variable to increment.
3712 *
3713 * @remarks x86: Requires a 486 or later.
3714 */
3715#if RT_INLINE_ASM_EXTERNAL
3716RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32);
3717#else
3718DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32)
3719{
3720 uint32_t u32;
3721# if RT_INLINE_ASM_GNU_STYLE
3722 __asm__ __volatile__("xaddl %0, %1\n\t"
3723 : "=r" (u32),
3724 "=m" (*pu32)
3725 : "0" (1),
3726 "m" (*pu32)
3727 : "memory");
3728 return u32 + 1;
3729# else
3730 __asm
3731 {
3732 mov eax, 1
3733# ifdef RT_ARCH_AMD64
3734 mov rdx, [pu32]
3735 xadd [rdx], eax
3736# else
3737 mov edx, [pu32]
3738 xadd [edx], eax
3739# endif
3740 mov u32, eax
3741 }
3742 return u32 + 1;
3743# endif
3744}
3745#endif
3746
3747
3748/**
3749 * Atomically decrement an unsigned 32-bit value, unordered.
3750 *
3751 * @returns the new value.
3752 * @param pu32 Pointer to the variable to decrement.
3753 *
3754 * @remarks x86: Requires a 486 or later.
3755 */
3756#if RT_INLINE_ASM_EXTERNAL
3757RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32);
3758#else
3759DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32)
3760{
3761 uint32_t u32;
3762# if RT_INLINE_ASM_GNU_STYLE
3763 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3764 : "=r" (u32),
3765 "=m" (*pu32)
3766 : "0" (-1),
3767 "m" (*pu32)
3768 : "memory");
3769 return u32 - 1;
3770# else
3771 __asm
3772 {
3773 mov eax, -1
3774# ifdef RT_ARCH_AMD64
3775 mov rdx, [pu32]
3776 xadd [rdx], eax
3777# else
3778 mov edx, [pu32]
3779 xadd [edx], eax
3780# endif
3781 mov u32, eax
3782 }
3783 return u32 - 1;
3784# endif
3785}
3786#endif
3787
3788
3789/** @def RT_ASM_PAGE_SIZE
3790 * We try avoid dragging in iprt/param.h here.
3791 * @internal
3792 */
3793#if defined(RT_ARCH_SPARC64)
3794# define RT_ASM_PAGE_SIZE 0x2000
3795# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3796# if PAGE_SIZE != 0x2000
3797# error "PAGE_SIZE is not 0x2000!"
3798# endif
3799# endif
3800#else
3801# define RT_ASM_PAGE_SIZE 0x1000
3802# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3803# if PAGE_SIZE != 0x1000
3804# error "PAGE_SIZE is not 0x1000!"
3805# endif
3806# endif
3807#endif
3808
3809/**
3810 * Zeros a 4K memory page.
3811 *
3812 * @param pv Pointer to the memory block. This must be page aligned.
3813 */
3814#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3815RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv);
3816# else
3817DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv)
3818{
3819# if RT_INLINE_ASM_USES_INTRIN
3820# ifdef RT_ARCH_AMD64
3821 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3822# else
3823 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3824# endif
3825
3826# elif RT_INLINE_ASM_GNU_STYLE
3827 RTCCUINTREG uDummy;
3828# ifdef RT_ARCH_AMD64
3829 __asm__ __volatile__("rep stosq"
3830 : "=D" (pv),
3831 "=c" (uDummy)
3832 : "0" (pv),
3833 "c" (RT_ASM_PAGE_SIZE >> 3),
3834 "a" (0)
3835 : "memory");
3836# else
3837 __asm__ __volatile__("rep stosl"
3838 : "=D" (pv),
3839 "=c" (uDummy)
3840 : "0" (pv),
3841 "c" (RT_ASM_PAGE_SIZE >> 2),
3842 "a" (0)
3843 : "memory");
3844# endif
3845# else
3846 __asm
3847 {
3848# ifdef RT_ARCH_AMD64
3849 xor rax, rax
3850 mov ecx, 0200h
3851 mov rdi, [pv]
3852 rep stosq
3853# else
3854 xor eax, eax
3855 mov ecx, 0400h
3856 mov edi, [pv]
3857 rep stosd
3858# endif
3859 }
3860# endif
3861}
3862# endif
3863
3864
3865/**
3866 * Zeros a memory block with a 32-bit aligned size.
3867 *
3868 * @param pv Pointer to the memory block.
3869 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3870 */
3871#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3872RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb);
3873#else
3874DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb)
3875{
3876# if RT_INLINE_ASM_USES_INTRIN
3877# ifdef RT_ARCH_AMD64
3878 if (!(cb & 7))
3879 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
3880 else
3881# endif
3882 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
3883
3884# elif RT_INLINE_ASM_GNU_STYLE
3885 __asm__ __volatile__("rep stosl"
3886 : "=D" (pv),
3887 "=c" (cb)
3888 : "0" (pv),
3889 "1" (cb >> 2),
3890 "a" (0)
3891 : "memory");
3892# else
3893 __asm
3894 {
3895 xor eax, eax
3896# ifdef RT_ARCH_AMD64
3897 mov rcx, [cb]
3898 shr rcx, 2
3899 mov rdi, [pv]
3900# else
3901 mov ecx, [cb]
3902 shr ecx, 2
3903 mov edi, [pv]
3904# endif
3905 rep stosd
3906 }
3907# endif
3908}
3909#endif
3910
3911
3912/**
3913 * Fills a memory block with a 32-bit aligned size.
3914 *
3915 * @param pv Pointer to the memory block.
3916 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3917 * @param u32 The value to fill with.
3918 */
3919#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3920RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32);
3921#else
3922DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32)
3923{
3924# if RT_INLINE_ASM_USES_INTRIN
3925# ifdef RT_ARCH_AMD64
3926 if (!(cb & 7))
3927 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3928 else
3929# endif
3930 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
3931
3932# elif RT_INLINE_ASM_GNU_STYLE
3933 __asm__ __volatile__("rep stosl"
3934 : "=D" (pv),
3935 "=c" (cb)
3936 : "0" (pv),
3937 "1" (cb >> 2),
3938 "a" (u32)
3939 : "memory");
3940# else
3941 __asm
3942 {
3943# ifdef RT_ARCH_AMD64
3944 mov rcx, [cb]
3945 shr rcx, 2
3946 mov rdi, [pv]
3947# else
3948 mov ecx, [cb]
3949 shr ecx, 2
3950 mov edi, [pv]
3951# endif
3952 mov eax, [u32]
3953 rep stosd
3954 }
3955# endif
3956}
3957#endif
3958
3959
3960/**
3961 * Checks if a memory block is all zeros.
3962 *
3963 * @returns Pointer to the first non-zero byte.
3964 * @returns NULL if all zero.
3965 *
3966 * @param pv Pointer to the memory block.
3967 * @param cb Number of bytes in the block.
3968 *
3969 * @todo Fix name, it is a predicate function but it's not returning boolean!
3970 */
3971#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3972 && !defined(RT_ARCH_SPARC64) \
3973 && !defined(RT_ARCH_SPARC)
3974DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb);
3975#else
3976DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb)
3977{
3978 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
3979 for (; cb; cb--, pb++)
3980 if (RT_LIKELY(*pb == 0))
3981 { /* likely */ }
3982 else
3983 return (void RT_FAR *)pb;
3984 return NULL;
3985}
3986#endif
3987
3988
3989/**
3990 * Checks if a memory block is all zeros.
3991 *
3992 * @returns true if zero, false if not.
3993 *
3994 * @param pv Pointer to the memory block.
3995 * @param cb Number of bytes in the block.
3996 *
3997 * @sa ASMMemFirstNonZero
3998 */
3999DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb)
4000{
4001 return ASMMemFirstNonZero(pv, cb) == NULL;
4002}
4003
4004
4005/**
4006 * Checks if a memory page is all zeros.
4007 *
4008 * @returns true / false.
4009 *
4010 * @param pvPage Pointer to the page. Must be aligned on 16 byte
4011 * boundary
4012 */
4013DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage)
4014{
4015# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
4016 union { RTCCUINTREG r; bool f; } uAX;
4017 RTCCUINTREG xCX, xDI;
4018 Assert(!((uintptr_t)pvPage & 15));
4019 __asm__ __volatile__("repe; "
4020# ifdef RT_ARCH_AMD64
4021 "scasq\n\t"
4022# else
4023 "scasl\n\t"
4024# endif
4025 "setnc %%al\n\t"
4026 : "=&c" (xCX),
4027 "=&D" (xDI),
4028 "=&a" (uAX.r)
4029 : "mr" (pvPage),
4030# ifdef RT_ARCH_AMD64
4031 "0" (RT_ASM_PAGE_SIZE/8),
4032# else
4033 "0" (RT_ASM_PAGE_SIZE/4),
4034# endif
4035 "1" (pvPage),
4036 "2" (0));
4037 return uAX.f;
4038# else
4039 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
4040 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
4041 Assert(!((uintptr_t)pvPage & 15));
4042 for (;;)
4043 {
4044 if (puPtr[0]) return false;
4045 if (puPtr[4]) return false;
4046
4047 if (puPtr[2]) return false;
4048 if (puPtr[6]) return false;
4049
4050 if (puPtr[1]) return false;
4051 if (puPtr[5]) return false;
4052
4053 if (puPtr[3]) return false;
4054 if (puPtr[7]) return false;
4055
4056 if (!--cLeft)
4057 return true;
4058 puPtr += 8;
4059 }
4060# endif
4061}
4062
4063
4064/**
4065 * Checks if a memory block is filled with the specified byte, returning the
4066 * first mismatch.
4067 *
4068 * This is sort of an inverted memchr.
4069 *
4070 * @returns Pointer to the byte which doesn't equal u8.
4071 * @returns NULL if all equal to u8.
4072 *
4073 * @param pv Pointer to the memory block.
4074 * @param cb Number of bytes in the block.
4075 * @param u8 The value it's supposed to be filled with.
4076 *
4077 * @remarks No alignment requirements.
4078 */
4079#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
4080 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL)) \
4081 && !defined(RT_ARCH_SPARC64) \
4082 && !defined(RT_ARCH_SPARC)
4083DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8);
4084#else
4085DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4086{
4087 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4088 for (; cb; cb--, pb++)
4089 if (RT_LIKELY(*pb == u8))
4090 { /* likely */ }
4091 else
4092 return (void *)pb;
4093 return NULL;
4094}
4095#endif
4096
4097
4098/**
4099 * Checks if a memory block is filled with the specified byte.
4100 *
4101 * @returns true if all matching, false if not.
4102 *
4103 * @param pv Pointer to the memory block.
4104 * @param cb Number of bytes in the block.
4105 * @param u8 The value it's supposed to be filled with.
4106 *
4107 * @remarks No alignment requirements.
4108 */
4109DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4110{
4111 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4112}
4113
4114
4115/**
4116 * Checks if a memory block is filled with the specified 32-bit value.
4117 *
4118 * This is a sort of inverted memchr.
4119 *
4120 * @returns Pointer to the first value which doesn't equal u32.
4121 * @returns NULL if all equal to u32.
4122 *
4123 * @param pv Pointer to the memory block.
4124 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4125 * @param u32 The value it's supposed to be filled with.
4126 */
4127DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32)
4128{
4129/** @todo rewrite this in inline assembly? */
4130 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
4131 for (; cb; cb -= 4, pu32++)
4132 if (RT_LIKELY(*pu32 == u32))
4133 { /* likely */ }
4134 else
4135 return (uint32_t RT_FAR *)pu32;
4136 return NULL;
4137}
4138
4139
4140/**
4141 * Probes a byte pointer for read access.
4142 *
4143 * While the function will not fault if the byte is not read accessible,
4144 * the idea is to do this in a safe place like before acquiring locks
4145 * and such like.
4146 *
4147 * Also, this functions guarantees that an eager compiler is not going
4148 * to optimize the probing away.
4149 *
4150 * @param pvByte Pointer to the byte.
4151 */
4152#if RT_INLINE_ASM_EXTERNAL
4153RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte);
4154#else
4155DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte)
4156{
4157 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4158 uint8_t u8;
4159# if RT_INLINE_ASM_GNU_STYLE
4160 __asm__ __volatile__("movb (%1), %0\n\t"
4161 : "=r" (u8)
4162 : "r" (pvByte));
4163# else
4164 __asm
4165 {
4166# ifdef RT_ARCH_AMD64
4167 mov rax, [pvByte]
4168 mov al, [rax]
4169# else
4170 mov eax, [pvByte]
4171 mov al, [eax]
4172# endif
4173 mov [u8], al
4174 }
4175# endif
4176 return u8;
4177}
4178#endif
4179
4180/**
4181 * Probes a buffer for read access page by page.
4182 *
4183 * While the function will fault if the buffer is not fully read
4184 * accessible, the idea is to do this in a safe place like before
4185 * acquiring locks and such like.
4186 *
4187 * Also, this functions guarantees that an eager compiler is not going
4188 * to optimize the probing away.
4189 *
4190 * @param pvBuf Pointer to the buffer.
4191 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4192 */
4193DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf)
4194{
4195 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4196 /* the first byte */
4197 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
4198 ASMProbeReadByte(pu8);
4199
4200 /* the pages in between pages. */
4201 while (cbBuf > RT_ASM_PAGE_SIZE)
4202 {
4203 ASMProbeReadByte(pu8);
4204 cbBuf -= RT_ASM_PAGE_SIZE;
4205 pu8 += RT_ASM_PAGE_SIZE;
4206 }
4207
4208 /* the last byte */
4209 ASMProbeReadByte(pu8 + cbBuf - 1);
4210}
4211
4212
4213
4214/** @defgroup grp_inline_bits Bit Operations
4215 * @{
4216 */
4217
4218
4219/**
4220 * Sets a bit in a bitmap.
4221 *
4222 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4223 * @param iBit The bit to set.
4224 *
4225 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4226 * However, doing so will yield better performance as well as avoiding
4227 * traps accessing the last bits in the bitmap.
4228 */
4229#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4230RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4231#else
4232DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4233{
4234# if RT_INLINE_ASM_USES_INTRIN
4235 _bittestandset((long RT_FAR *)pvBitmap, iBit);
4236
4237# elif RT_INLINE_ASM_GNU_STYLE
4238 __asm__ __volatile__("btsl %1, %0"
4239 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4240 : "Ir" (iBit),
4241 "m" (*(volatile long RT_FAR *)pvBitmap)
4242 : "memory");
4243# else
4244 __asm
4245 {
4246# ifdef RT_ARCH_AMD64
4247 mov rax, [pvBitmap]
4248 mov edx, [iBit]
4249 bts [rax], edx
4250# else
4251 mov eax, [pvBitmap]
4252 mov edx, [iBit]
4253 bts [eax], edx
4254# endif
4255 }
4256# endif
4257}
4258#endif
4259
4260
4261/**
4262 * Atomically sets a bit in a bitmap, ordered.
4263 *
4264 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4265 * the memory access isn't atomic!
4266 * @param iBit The bit to set.
4267 *
4268 * @remarks x86: Requires a 386 or later.
4269 */
4270#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4271RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4272#else
4273DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4274{
4275 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4276# if RT_INLINE_ASM_USES_INTRIN
4277 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4278# elif RT_INLINE_ASM_GNU_STYLE
4279 __asm__ __volatile__("lock; btsl %1, %0"
4280 : "=m" (*(volatile long *)pvBitmap)
4281 : "Ir" (iBit),
4282 "m" (*(volatile long *)pvBitmap)
4283 : "memory");
4284# else
4285 __asm
4286 {
4287# ifdef RT_ARCH_AMD64
4288 mov rax, [pvBitmap]
4289 mov edx, [iBit]
4290 lock bts [rax], edx
4291# else
4292 mov eax, [pvBitmap]
4293 mov edx, [iBit]
4294 lock bts [eax], edx
4295# endif
4296 }
4297# endif
4298}
4299#endif
4300
4301
4302/**
4303 * Clears a bit in a bitmap.
4304 *
4305 * @param pvBitmap Pointer to the bitmap.
4306 * @param iBit The bit to clear.
4307 *
4308 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4309 * However, doing so will yield better performance as well as avoiding
4310 * traps accessing the last bits in the bitmap.
4311 */
4312#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4313RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4314#else
4315DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4316{
4317# if RT_INLINE_ASM_USES_INTRIN
4318 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4319
4320# elif RT_INLINE_ASM_GNU_STYLE
4321 __asm__ __volatile__("btrl %1, %0"
4322 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4323 : "Ir" (iBit),
4324 "m" (*(volatile long RT_FAR *)pvBitmap)
4325 : "memory");
4326# else
4327 __asm
4328 {
4329# ifdef RT_ARCH_AMD64
4330 mov rax, [pvBitmap]
4331 mov edx, [iBit]
4332 btr [rax], edx
4333# else
4334 mov eax, [pvBitmap]
4335 mov edx, [iBit]
4336 btr [eax], edx
4337# endif
4338 }
4339# endif
4340}
4341#endif
4342
4343
4344/**
4345 * Atomically clears a bit in a bitmap, ordered.
4346 *
4347 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4348 * the memory access isn't atomic!
4349 * @param iBit The bit to toggle set.
4350 *
4351 * @remarks No memory barrier, take care on smp.
4352 * @remarks x86: Requires a 386 or later.
4353 */
4354#if RT_INLINE_ASM_EXTERNAL
4355RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4356#else
4357DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4358{
4359 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4360# if RT_INLINE_ASM_GNU_STYLE
4361 __asm__ __volatile__("lock; btrl %1, %0"
4362 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4363 : "Ir" (iBit),
4364 "m" (*(volatile long RT_FAR *)pvBitmap)
4365 : "memory");
4366# else
4367 __asm
4368 {
4369# ifdef RT_ARCH_AMD64
4370 mov rax, [pvBitmap]
4371 mov edx, [iBit]
4372 lock btr [rax], edx
4373# else
4374 mov eax, [pvBitmap]
4375 mov edx, [iBit]
4376 lock btr [eax], edx
4377# endif
4378 }
4379# endif
4380}
4381#endif
4382
4383
4384/**
4385 * Toggles a bit in a bitmap.
4386 *
4387 * @param pvBitmap Pointer to the bitmap.
4388 * @param iBit The bit to toggle.
4389 *
4390 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4391 * However, doing so will yield better performance as well as avoiding
4392 * traps accessing the last bits in the bitmap.
4393 */
4394#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4395RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4396#else
4397DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4398{
4399# if RT_INLINE_ASM_USES_INTRIN
4400 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4401# elif RT_INLINE_ASM_GNU_STYLE
4402 __asm__ __volatile__("btcl %1, %0"
4403 : "=m" (*(volatile long *)pvBitmap)
4404 : "Ir" (iBit),
4405 "m" (*(volatile long *)pvBitmap)
4406 : "memory");
4407# else
4408 __asm
4409 {
4410# ifdef RT_ARCH_AMD64
4411 mov rax, [pvBitmap]
4412 mov edx, [iBit]
4413 btc [rax], edx
4414# else
4415 mov eax, [pvBitmap]
4416 mov edx, [iBit]
4417 btc [eax], edx
4418# endif
4419 }
4420# endif
4421}
4422#endif
4423
4424
4425/**
4426 * Atomically toggles a bit in a bitmap, ordered.
4427 *
4428 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4429 * the memory access isn't atomic!
4430 * @param iBit The bit to test and set.
4431 *
4432 * @remarks x86: Requires a 386 or later.
4433 */
4434#if RT_INLINE_ASM_EXTERNAL
4435RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4436#else
4437DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4438{
4439 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4440# if RT_INLINE_ASM_GNU_STYLE
4441 __asm__ __volatile__("lock; btcl %1, %0"
4442 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4443 : "Ir" (iBit),
4444 "m" (*(volatile long RT_FAR *)pvBitmap)
4445 : "memory");
4446# else
4447 __asm
4448 {
4449# ifdef RT_ARCH_AMD64
4450 mov rax, [pvBitmap]
4451 mov edx, [iBit]
4452 lock btc [rax], edx
4453# else
4454 mov eax, [pvBitmap]
4455 mov edx, [iBit]
4456 lock btc [eax], edx
4457# endif
4458 }
4459# endif
4460}
4461#endif
4462
4463
4464/**
4465 * Tests and sets a bit in a bitmap.
4466 *
4467 * @returns true if the bit was set.
4468 * @returns false if the bit was clear.
4469 *
4470 * @param pvBitmap Pointer to the bitmap.
4471 * @param iBit The bit to test and set.
4472 *
4473 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4474 * However, doing so will yield better performance as well as avoiding
4475 * traps accessing the last bits in the bitmap.
4476 */
4477#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4478RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4479#else
4480DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4481{
4482 union { bool f; uint32_t u32; uint8_t u8; } rc;
4483# if RT_INLINE_ASM_USES_INTRIN
4484 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
4485
4486# elif RT_INLINE_ASM_GNU_STYLE
4487 __asm__ __volatile__("btsl %2, %1\n\t"
4488 "setc %b0\n\t"
4489 "andl $1, %0\n\t"
4490 : "=q" (rc.u32),
4491 "=m" (*(volatile long RT_FAR *)pvBitmap)
4492 : "Ir" (iBit),
4493 "m" (*(volatile long RT_FAR *)pvBitmap)
4494 : "memory");
4495# else
4496 __asm
4497 {
4498 mov edx, [iBit]
4499# ifdef RT_ARCH_AMD64
4500 mov rax, [pvBitmap]
4501 bts [rax], edx
4502# else
4503 mov eax, [pvBitmap]
4504 bts [eax], edx
4505# endif
4506 setc al
4507 and eax, 1
4508 mov [rc.u32], eax
4509 }
4510# endif
4511 return rc.f;
4512}
4513#endif
4514
4515
4516/**
4517 * Atomically tests and sets a bit in a bitmap, ordered.
4518 *
4519 * @returns true if the bit was set.
4520 * @returns false if the bit was clear.
4521 *
4522 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4523 * the memory access isn't atomic!
4524 * @param iBit The bit to set.
4525 *
4526 * @remarks x86: Requires a 386 or later.
4527 */
4528#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4529RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4530#else
4531DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4532{
4533 union { bool f; uint32_t u32; uint8_t u8; } rc;
4534 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4535# if RT_INLINE_ASM_USES_INTRIN
4536 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4537# elif RT_INLINE_ASM_GNU_STYLE
4538 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4539 "setc %b0\n\t"
4540 "andl $1, %0\n\t"
4541 : "=q" (rc.u32),
4542 "=m" (*(volatile long RT_FAR *)pvBitmap)
4543 : "Ir" (iBit),
4544 "m" (*(volatile long RT_FAR *)pvBitmap)
4545 : "memory");
4546# else
4547 __asm
4548 {
4549 mov edx, [iBit]
4550# ifdef RT_ARCH_AMD64
4551 mov rax, [pvBitmap]
4552 lock bts [rax], edx
4553# else
4554 mov eax, [pvBitmap]
4555 lock bts [eax], edx
4556# endif
4557 setc al
4558 and eax, 1
4559 mov [rc.u32], eax
4560 }
4561# endif
4562 return rc.f;
4563}
4564#endif
4565
4566
4567/**
4568 * Tests and clears a bit in a bitmap.
4569 *
4570 * @returns true if the bit was set.
4571 * @returns false if the bit was clear.
4572 *
4573 * @param pvBitmap Pointer to the bitmap.
4574 * @param iBit The bit to test and clear.
4575 *
4576 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4577 * However, doing so will yield better performance as well as avoiding
4578 * traps accessing the last bits in the bitmap.
4579 */
4580#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4581RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4582#else
4583DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4584{
4585 union { bool f; uint32_t u32; uint8_t u8; } rc;
4586# if RT_INLINE_ASM_USES_INTRIN
4587 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4588
4589# elif RT_INLINE_ASM_GNU_STYLE
4590 __asm__ __volatile__("btrl %2, %1\n\t"
4591 "setc %b0\n\t"
4592 "andl $1, %0\n\t"
4593 : "=q" (rc.u32),
4594 "=m" (*(volatile long RT_FAR *)pvBitmap)
4595 : "Ir" (iBit),
4596 "m" (*(volatile long RT_FAR *)pvBitmap)
4597 : "memory");
4598# else
4599 __asm
4600 {
4601 mov edx, [iBit]
4602# ifdef RT_ARCH_AMD64
4603 mov rax, [pvBitmap]
4604 btr [rax], edx
4605# else
4606 mov eax, [pvBitmap]
4607 btr [eax], edx
4608# endif
4609 setc al
4610 and eax, 1
4611 mov [rc.u32], eax
4612 }
4613# endif
4614 return rc.f;
4615}
4616#endif
4617
4618
4619/**
4620 * Atomically tests and clears a bit in a bitmap, ordered.
4621 *
4622 * @returns true if the bit was set.
4623 * @returns false if the bit was clear.
4624 *
4625 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4626 * the memory access isn't atomic!
4627 * @param iBit The bit to test and clear.
4628 *
4629 * @remarks No memory barrier, take care on smp.
4630 * @remarks x86: Requires a 386 or later.
4631 */
4632#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4633RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4634#else
4635DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4636{
4637 union { bool f; uint32_t u32; uint8_t u8; } rc;
4638 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4639# if RT_INLINE_ASM_USES_INTRIN
4640 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
4641
4642# elif RT_INLINE_ASM_GNU_STYLE
4643 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4644 "setc %b0\n\t"
4645 "andl $1, %0\n\t"
4646 : "=q" (rc.u32),
4647 "=m" (*(volatile long RT_FAR *)pvBitmap)
4648 : "Ir" (iBit),
4649 "m" (*(volatile long RT_FAR *)pvBitmap)
4650 : "memory");
4651# else
4652 __asm
4653 {
4654 mov edx, [iBit]
4655# ifdef RT_ARCH_AMD64
4656 mov rax, [pvBitmap]
4657 lock btr [rax], edx
4658# else
4659 mov eax, [pvBitmap]
4660 lock btr [eax], edx
4661# endif
4662 setc al
4663 and eax, 1
4664 mov [rc.u32], eax
4665 }
4666# endif
4667 return rc.f;
4668}
4669#endif
4670
4671
4672/**
4673 * Tests and toggles a bit in a bitmap.
4674 *
4675 * @returns true if the bit was set.
4676 * @returns false if the bit was clear.
4677 *
4678 * @param pvBitmap Pointer to the bitmap.
4679 * @param iBit The bit to test and toggle.
4680 *
4681 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4682 * However, doing so will yield better performance as well as avoiding
4683 * traps accessing the last bits in the bitmap.
4684 */
4685#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4686RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4687#else
4688DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4689{
4690 union { bool f; uint32_t u32; uint8_t u8; } rc;
4691# if RT_INLINE_ASM_USES_INTRIN
4692 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4693
4694# elif RT_INLINE_ASM_GNU_STYLE
4695 __asm__ __volatile__("btcl %2, %1\n\t"
4696 "setc %b0\n\t"
4697 "andl $1, %0\n\t"
4698 : "=q" (rc.u32),
4699 "=m" (*(volatile long RT_FAR *)pvBitmap)
4700 : "Ir" (iBit),
4701 "m" (*(volatile long RT_FAR *)pvBitmap)
4702 : "memory");
4703# else
4704 __asm
4705 {
4706 mov edx, [iBit]
4707# ifdef RT_ARCH_AMD64
4708 mov rax, [pvBitmap]
4709 btc [rax], edx
4710# else
4711 mov eax, [pvBitmap]
4712 btc [eax], edx
4713# endif
4714 setc al
4715 and eax, 1
4716 mov [rc.u32], eax
4717 }
4718# endif
4719 return rc.f;
4720}
4721#endif
4722
4723
4724/**
4725 * Atomically tests and toggles a bit in a bitmap, ordered.
4726 *
4727 * @returns true if the bit was set.
4728 * @returns false if the bit was clear.
4729 *
4730 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4731 * the memory access isn't atomic!
4732 * @param iBit The bit to test and toggle.
4733 *
4734 * @remarks x86: Requires a 386 or later.
4735 */
4736#if RT_INLINE_ASM_EXTERNAL
4737RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4738#else
4739DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4740{
4741 union { bool f; uint32_t u32; uint8_t u8; } rc;
4742 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4743# if RT_INLINE_ASM_GNU_STYLE
4744 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4745 "setc %b0\n\t"
4746 "andl $1, %0\n\t"
4747 : "=q" (rc.u32),
4748 "=m" (*(volatile long RT_FAR *)pvBitmap)
4749 : "Ir" (iBit),
4750 "m" (*(volatile long RT_FAR *)pvBitmap)
4751 : "memory");
4752# else
4753 __asm
4754 {
4755 mov edx, [iBit]
4756# ifdef RT_ARCH_AMD64
4757 mov rax, [pvBitmap]
4758 lock btc [rax], edx
4759# else
4760 mov eax, [pvBitmap]
4761 lock btc [eax], edx
4762# endif
4763 setc al
4764 and eax, 1
4765 mov [rc.u32], eax
4766 }
4767# endif
4768 return rc.f;
4769}
4770#endif
4771
4772
4773/**
4774 * Tests if a bit in a bitmap is set.
4775 *
4776 * @returns true if the bit is set.
4777 * @returns false if the bit is clear.
4778 *
4779 * @param pvBitmap Pointer to the bitmap.
4780 * @param iBit The bit to test.
4781 *
4782 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4783 * However, doing so will yield better performance as well as avoiding
4784 * traps accessing the last bits in the bitmap.
4785 */
4786#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4787RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit);
4788#else
4789DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit)
4790{
4791 union { bool f; uint32_t u32; uint8_t u8; } rc;
4792# if RT_INLINE_ASM_USES_INTRIN
4793 rc.u32 = _bittest((long *)pvBitmap, iBit);
4794# elif RT_INLINE_ASM_GNU_STYLE
4795
4796 __asm__ __volatile__("btl %2, %1\n\t"
4797 "setc %b0\n\t"
4798 "andl $1, %0\n\t"
4799 : "=q" (rc.u32)
4800 : "m" (*(const volatile long RT_FAR *)pvBitmap),
4801 "Ir" (iBit)
4802 : "memory");
4803# else
4804 __asm
4805 {
4806 mov edx, [iBit]
4807# ifdef RT_ARCH_AMD64
4808 mov rax, [pvBitmap]
4809 bt [rax], edx
4810# else
4811 mov eax, [pvBitmap]
4812 bt [eax], edx
4813# endif
4814 setc al
4815 and eax, 1
4816 mov [rc.u32], eax
4817 }
4818# endif
4819 return rc.f;
4820}
4821#endif
4822
4823
4824/**
4825 * Clears a bit range within a bitmap.
4826 *
4827 * @param pvBitmap Pointer to the bitmap.
4828 * @param iBitStart The First bit to clear.
4829 * @param iBitEnd The first bit not to clear.
4830 */
4831DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4832{
4833 if (iBitStart < iBitEnd)
4834 {
4835 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4836 int32_t iStart = iBitStart & ~31;
4837 int32_t iEnd = iBitEnd & ~31;
4838 if (iStart == iEnd)
4839 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4840 else
4841 {
4842 /* bits in first dword. */
4843 if (iBitStart & 31)
4844 {
4845 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4846 pu32++;
4847 iBitStart = iStart + 32;
4848 }
4849
4850 /* whole dword. */
4851 if (iBitStart != iEnd)
4852 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4853
4854 /* bits in last dword. */
4855 if (iBitEnd & 31)
4856 {
4857 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4858 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4859 }
4860 }
4861 }
4862}
4863
4864
4865/**
4866 * Sets a bit range within a bitmap.
4867 *
4868 * @param pvBitmap Pointer to the bitmap.
4869 * @param iBitStart The First bit to set.
4870 * @param iBitEnd The first bit not to set.
4871 */
4872DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4873{
4874 if (iBitStart < iBitEnd)
4875 {
4876 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4877 int32_t iStart = iBitStart & ~31;
4878 int32_t iEnd = iBitEnd & ~31;
4879 if (iStart == iEnd)
4880 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4881 else
4882 {
4883 /* bits in first dword. */
4884 if (iBitStart & 31)
4885 {
4886 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4887 pu32++;
4888 iBitStart = iStart + 32;
4889 }
4890
4891 /* whole dword. */
4892 if (iBitStart != iEnd)
4893 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4894
4895 /* bits in last dword. */
4896 if (iBitEnd & 31)
4897 {
4898 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
4899 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4900 }
4901 }
4902 }
4903}
4904
4905
4906/**
4907 * Finds the first clear bit in a bitmap.
4908 *
4909 * @returns Index of the first zero bit.
4910 * @returns -1 if no clear bit was found.
4911 * @param pvBitmap Pointer to the bitmap.
4912 * @param cBits The number of bits in the bitmap. Multiple of 32.
4913 */
4914#if RT_INLINE_ASM_EXTERNAL
4915DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
4916#else
4917DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
4918{
4919 if (cBits)
4920 {
4921 int32_t iBit;
4922# if RT_INLINE_ASM_GNU_STYLE
4923 RTCCUINTREG uEAX, uECX, uEDI;
4924 cBits = RT_ALIGN_32(cBits, 32);
4925 __asm__ __volatile__("repe; scasl\n\t"
4926 "je 1f\n\t"
4927# ifdef RT_ARCH_AMD64
4928 "lea -4(%%rdi), %%rdi\n\t"
4929 "xorl (%%rdi), %%eax\n\t"
4930 "subq %5, %%rdi\n\t"
4931# else
4932 "lea -4(%%edi), %%edi\n\t"
4933 "xorl (%%edi), %%eax\n\t"
4934 "subl %5, %%edi\n\t"
4935# endif
4936 "shll $3, %%edi\n\t"
4937 "bsfl %%eax, %%edx\n\t"
4938 "addl %%edi, %%edx\n\t"
4939 "1:\t\n"
4940 : "=d" (iBit),
4941 "=&c" (uECX),
4942 "=&D" (uEDI),
4943 "=&a" (uEAX)
4944 : "0" (0xffffffff),
4945 "mr" (pvBitmap),
4946 "1" (cBits >> 5),
4947 "2" (pvBitmap),
4948 "3" (0xffffffff));
4949# else
4950 cBits = RT_ALIGN_32(cBits, 32);
4951 __asm
4952 {
4953# ifdef RT_ARCH_AMD64
4954 mov rdi, [pvBitmap]
4955 mov rbx, rdi
4956# else
4957 mov edi, [pvBitmap]
4958 mov ebx, edi
4959# endif
4960 mov edx, 0ffffffffh
4961 mov eax, edx
4962 mov ecx, [cBits]
4963 shr ecx, 5
4964 repe scasd
4965 je done
4966
4967# ifdef RT_ARCH_AMD64
4968 lea rdi, [rdi - 4]
4969 xor eax, [rdi]
4970 sub rdi, rbx
4971# else
4972 lea edi, [edi - 4]
4973 xor eax, [edi]
4974 sub edi, ebx
4975# endif
4976 shl edi, 3
4977 bsf edx, eax
4978 add edx, edi
4979 done:
4980 mov [iBit], edx
4981 }
4982# endif
4983 return iBit;
4984 }
4985 return -1;
4986}
4987#endif
4988
4989
4990/**
4991 * Finds the next clear bit in a bitmap.
4992 *
4993 * @returns Index of the first zero bit.
4994 * @returns -1 if no clear bit was found.
4995 * @param pvBitmap Pointer to the bitmap.
4996 * @param cBits The number of bits in the bitmap. Multiple of 32.
4997 * @param iBitPrev The bit returned from the last search.
4998 * The search will start at iBitPrev + 1.
4999 */
5000#if RT_INLINE_ASM_EXTERNAL
5001DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5002#else
5003DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5004{
5005 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5006 int iBit = ++iBitPrev & 31;
5007 if (iBit)
5008 {
5009 /*
5010 * Inspect the 32-bit word containing the unaligned bit.
5011 */
5012 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
5013
5014# if RT_INLINE_ASM_USES_INTRIN
5015 unsigned long ulBit = 0;
5016 if (_BitScanForward(&ulBit, u32))
5017 return ulBit + iBitPrev;
5018# else
5019# if RT_INLINE_ASM_GNU_STYLE
5020 __asm__ __volatile__("bsf %1, %0\n\t"
5021 "jnz 1f\n\t"
5022 "movl $-1, %0\n\t"
5023 "1:\n\t"
5024 : "=r" (iBit)
5025 : "r" (u32));
5026# else
5027 __asm
5028 {
5029 mov edx, [u32]
5030 bsf eax, edx
5031 jnz done
5032 mov eax, 0ffffffffh
5033 done:
5034 mov [iBit], eax
5035 }
5036# endif
5037 if (iBit >= 0)
5038 return iBit + iBitPrev;
5039# endif
5040
5041 /*
5042 * Skip ahead and see if there is anything left to search.
5043 */
5044 iBitPrev |= 31;
5045 iBitPrev++;
5046 if (cBits <= (uint32_t)iBitPrev)
5047 return -1;
5048 }
5049
5050 /*
5051 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5052 */
5053 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5054 if (iBit >= 0)
5055 iBit += iBitPrev;
5056 return iBit;
5057}
5058#endif
5059
5060
5061/**
5062 * Finds the first set bit in a bitmap.
5063 *
5064 * @returns Index of the first set bit.
5065 * @returns -1 if no clear bit was found.
5066 * @param pvBitmap Pointer to the bitmap.
5067 * @param cBits The number of bits in the bitmap. Multiple of 32.
5068 */
5069#if RT_INLINE_ASM_EXTERNAL
5070DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
5071#else
5072DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
5073{
5074 if (cBits)
5075 {
5076 int32_t iBit;
5077# if RT_INLINE_ASM_GNU_STYLE
5078 RTCCUINTREG uEAX, uECX, uEDI;
5079 cBits = RT_ALIGN_32(cBits, 32);
5080 __asm__ __volatile__("repe; scasl\n\t"
5081 "je 1f\n\t"
5082# ifdef RT_ARCH_AMD64
5083 "lea -4(%%rdi), %%rdi\n\t"
5084 "movl (%%rdi), %%eax\n\t"
5085 "subq %5, %%rdi\n\t"
5086# else
5087 "lea -4(%%edi), %%edi\n\t"
5088 "movl (%%edi), %%eax\n\t"
5089 "subl %5, %%edi\n\t"
5090# endif
5091 "shll $3, %%edi\n\t"
5092 "bsfl %%eax, %%edx\n\t"
5093 "addl %%edi, %%edx\n\t"
5094 "1:\t\n"
5095 : "=d" (iBit),
5096 "=&c" (uECX),
5097 "=&D" (uEDI),
5098 "=&a" (uEAX)
5099 : "0" (0xffffffff),
5100 "mr" (pvBitmap),
5101 "1" (cBits >> 5),
5102 "2" (pvBitmap),
5103 "3" (0));
5104# else
5105 cBits = RT_ALIGN_32(cBits, 32);
5106 __asm
5107 {
5108# ifdef RT_ARCH_AMD64
5109 mov rdi, [pvBitmap]
5110 mov rbx, rdi
5111# else
5112 mov edi, [pvBitmap]
5113 mov ebx, edi
5114# endif
5115 mov edx, 0ffffffffh
5116 xor eax, eax
5117 mov ecx, [cBits]
5118 shr ecx, 5
5119 repe scasd
5120 je done
5121# ifdef RT_ARCH_AMD64
5122 lea rdi, [rdi - 4]
5123 mov eax, [rdi]
5124 sub rdi, rbx
5125# else
5126 lea edi, [edi - 4]
5127 mov eax, [edi]
5128 sub edi, ebx
5129# endif
5130 shl edi, 3
5131 bsf edx, eax
5132 add edx, edi
5133 done:
5134 mov [iBit], edx
5135 }
5136# endif
5137 return iBit;
5138 }
5139 return -1;
5140}
5141#endif
5142
5143
5144/**
5145 * Finds the next set bit in a bitmap.
5146 *
5147 * @returns Index of the next set bit.
5148 * @returns -1 if no set bit was found.
5149 * @param pvBitmap Pointer to the bitmap.
5150 * @param cBits The number of bits in the bitmap. Multiple of 32.
5151 * @param iBitPrev The bit returned from the last search.
5152 * The search will start at iBitPrev + 1.
5153 */
5154#if RT_INLINE_ASM_EXTERNAL
5155DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5156#else
5157DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5158{
5159 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5160 int iBit = ++iBitPrev & 31;
5161 if (iBit)
5162 {
5163 /*
5164 * Inspect the 32-bit word containing the unaligned bit.
5165 */
5166 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5167
5168# if RT_INLINE_ASM_USES_INTRIN
5169 unsigned long ulBit = 0;
5170 if (_BitScanForward(&ulBit, u32))
5171 return ulBit + iBitPrev;
5172# else
5173# if RT_INLINE_ASM_GNU_STYLE
5174 __asm__ __volatile__("bsf %1, %0\n\t"
5175 "jnz 1f\n\t"
5176 "movl $-1, %0\n\t"
5177 "1:\n\t"
5178 : "=r" (iBit)
5179 : "r" (u32));
5180# else
5181 __asm
5182 {
5183 mov edx, [u32]
5184 bsf eax, edx
5185 jnz done
5186 mov eax, 0ffffffffh
5187 done:
5188 mov [iBit], eax
5189 }
5190# endif
5191 if (iBit >= 0)
5192 return iBit + iBitPrev;
5193# endif
5194
5195 /*
5196 * Skip ahead and see if there is anything left to search.
5197 */
5198 iBitPrev |= 31;
5199 iBitPrev++;
5200 if (cBits <= (uint32_t)iBitPrev)
5201 return -1;
5202 }
5203
5204 /*
5205 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5206 */
5207 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5208 if (iBit >= 0)
5209 iBit += iBitPrev;
5210 return iBit;
5211}
5212#endif
5213
5214
5215/**
5216 * Finds the first bit which is set in the given 32-bit integer.
5217 * Bits are numbered from 1 (least significant) to 32.
5218 *
5219 * @returns index [1..32] of the first set bit.
5220 * @returns 0 if all bits are cleared.
5221 * @param u32 Integer to search for set bits.
5222 * @remarks Similar to ffs() in BSD.
5223 */
5224#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5225RT_ASM_DECL_PRAGMA_WATCOM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5226#else
5227DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5228{
5229# if RT_INLINE_ASM_USES_INTRIN
5230 unsigned long iBit;
5231 if (_BitScanForward(&iBit, u32))
5232 iBit++;
5233 else
5234 iBit = 0;
5235# elif RT_INLINE_ASM_GNU_STYLE
5236 uint32_t iBit;
5237 __asm__ __volatile__("bsf %1, %0\n\t"
5238 "jnz 1f\n\t"
5239 "xorl %0, %0\n\t"
5240 "jmp 2f\n"
5241 "1:\n\t"
5242 "incl %0\n"
5243 "2:\n\t"
5244 : "=r" (iBit)
5245 : "rm" (u32));
5246# else
5247 uint32_t iBit;
5248 _asm
5249 {
5250 bsf eax, [u32]
5251 jnz found
5252 xor eax, eax
5253 jmp done
5254 found:
5255 inc eax
5256 done:
5257 mov [iBit], eax
5258 }
5259# endif
5260 return iBit;
5261}
5262#endif
5263
5264
5265/**
5266 * Finds the first bit which is set in the given 32-bit integer.
5267 * Bits are numbered from 1 (least significant) to 32.
5268 *
5269 * @returns index [1..32] of the first set bit.
5270 * @returns 0 if all bits are cleared.
5271 * @param i32 Integer to search for set bits.
5272 * @remark Similar to ffs() in BSD.
5273 */
5274DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5275{
5276 return ASMBitFirstSetU32((uint32_t)i32);
5277}
5278
5279
5280/**
5281 * Finds the first bit which is set in the given 64-bit integer.
5282 *
5283 * Bits are numbered from 1 (least significant) to 64.
5284 *
5285 * @returns index [1..64] of the first set bit.
5286 * @returns 0 if all bits are cleared.
5287 * @param u64 Integer to search for set bits.
5288 * @remarks Similar to ffs() in BSD.
5289 */
5290#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5291RT_ASM_DECL_PRAGMA_WATCOM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5292#else
5293DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5294{
5295# if RT_INLINE_ASM_USES_INTRIN
5296 unsigned long iBit;
5297# if ARCH_BITS == 64
5298 if (_BitScanForward64(&iBit, u64))
5299 iBit++;
5300 else
5301 iBit = 0;
5302# else
5303 if (_BitScanForward(&iBit, (uint32_t)u64))
5304 iBit++;
5305 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5306 iBit += 33;
5307 else
5308 iBit = 0;
5309# endif
5310# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5311 uint64_t iBit;
5312 __asm__ __volatile__("bsfq %1, %0\n\t"
5313 "jnz 1f\n\t"
5314 "xorl %k0, %k0\n\t"
5315 "jmp 2f\n"
5316 "1:\n\t"
5317 "incl %k0\n"
5318 "2:\n\t"
5319 : "=r" (iBit)
5320 : "rm" (u64));
5321# else
5322 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5323 if (!iBit)
5324 {
5325 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5326 if (iBit)
5327 iBit += 32;
5328 }
5329# endif
5330 return (unsigned)iBit;
5331}
5332#endif
5333
5334
5335/**
5336 * Finds the first bit which is set in the given 16-bit integer.
5337 *
5338 * Bits are numbered from 1 (least significant) to 16.
5339 *
5340 * @returns index [1..16] of the first set bit.
5341 * @returns 0 if all bits are cleared.
5342 * @param u16 Integer to search for set bits.
5343 * @remarks For 16-bit bs3kit code.
5344 */
5345#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5346RT_ASM_DECL_PRAGMA_WATCOM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5347#else
5348DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5349{
5350 return ASMBitFirstSetU32((uint32_t)u16);
5351}
5352#endif
5353
5354
5355/**
5356 * Finds the last bit which is set in the given 32-bit integer.
5357 * Bits are numbered from 1 (least significant) to 32.
5358 *
5359 * @returns index [1..32] of the last set bit.
5360 * @returns 0 if all bits are cleared.
5361 * @param u32 Integer to search for set bits.
5362 * @remark Similar to fls() in BSD.
5363 */
5364#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5365RT_ASM_DECL_PRAGMA_WATCOM(unsigned) ASMBitLastSetU32(uint32_t u32);
5366#else
5367DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5368{
5369# if RT_INLINE_ASM_USES_INTRIN
5370 unsigned long iBit;
5371 if (_BitScanReverse(&iBit, u32))
5372 iBit++;
5373 else
5374 iBit = 0;
5375# elif RT_INLINE_ASM_GNU_STYLE
5376 uint32_t iBit;
5377 __asm__ __volatile__("bsrl %1, %0\n\t"
5378 "jnz 1f\n\t"
5379 "xorl %0, %0\n\t"
5380 "jmp 2f\n"
5381 "1:\n\t"
5382 "incl %0\n"
5383 "2:\n\t"
5384 : "=r" (iBit)
5385 : "rm" (u32));
5386# else
5387 uint32_t iBit;
5388 _asm
5389 {
5390 bsr eax, [u32]
5391 jnz found
5392 xor eax, eax
5393 jmp done
5394 found:
5395 inc eax
5396 done:
5397 mov [iBit], eax
5398 }
5399# endif
5400 return iBit;
5401}
5402#endif
5403
5404
5405/**
5406 * Finds the last bit which is set in the given 32-bit integer.
5407 * Bits are numbered from 1 (least significant) to 32.
5408 *
5409 * @returns index [1..32] of the last set bit.
5410 * @returns 0 if all bits are cleared.
5411 * @param i32 Integer to search for set bits.
5412 * @remark Similar to fls() in BSD.
5413 */
5414DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5415{
5416 return ASMBitLastSetU32((uint32_t)i32);
5417}
5418
5419
5420/**
5421 * Finds the last bit which is set in the given 64-bit integer.
5422 *
5423 * Bits are numbered from 1 (least significant) to 64.
5424 *
5425 * @returns index [1..64] of the last set bit.
5426 * @returns 0 if all bits are cleared.
5427 * @param u64 Integer to search for set bits.
5428 * @remark Similar to fls() in BSD.
5429 */
5430#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5431RT_ASM_DECL_PRAGMA_WATCOM(unsigned) ASMBitLastSetU64(uint64_t u64);
5432#else
5433DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5434{
5435# if RT_INLINE_ASM_USES_INTRIN
5436 unsigned long iBit;
5437# if ARCH_BITS == 64
5438 if (_BitScanReverse64(&iBit, u64))
5439 iBit++;
5440 else
5441 iBit = 0;
5442# else
5443 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5444 iBit += 33;
5445 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5446 iBit++;
5447 else
5448 iBit = 0;
5449# endif
5450# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5451 uint64_t iBit;
5452 __asm__ __volatile__("bsrq %1, %0\n\t"
5453 "jnz 1f\n\t"
5454 "xorl %k0, %k0\n\t"
5455 "jmp 2f\n"
5456 "1:\n\t"
5457 "incl %k0\n"
5458 "2:\n\t"
5459 : "=r" (iBit)
5460 : "rm" (u64));
5461# else
5462 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5463 if (iBit)
5464 iBit += 32;
5465 else
5466 iBit = ASMBitLastSetU32((uint32_t)u64);
5467#endif
5468 return (unsigned)iBit;
5469}
5470#endif
5471
5472
5473/**
5474 * Finds the last bit which is set in the given 16-bit integer.
5475 *
5476 * Bits are numbered from 1 (least significant) to 16.
5477 *
5478 * @returns index [1..16] of the last set bit.
5479 * @returns 0 if all bits are cleared.
5480 * @param u16 Integer to search for set bits.
5481 * @remarks For 16-bit bs3kit code.
5482 */
5483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5484RT_ASM_DECL_PRAGMA_WATCOM(unsigned) ASMBitLastSetU16(uint16_t u16);
5485#else
5486DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5487{
5488 return ASMBitLastSetU32((uint32_t)u16);
5489}
5490#endif
5491
5492
5493/**
5494 * Reverse the byte order of the given 16-bit integer.
5495 *
5496 * @returns Revert
5497 * @param u16 16-bit integer value.
5498 */
5499#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5500RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16);
5501#else
5502DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5503{
5504# if RT_INLINE_ASM_USES_INTRIN
5505 u16 = _byteswap_ushort(u16);
5506# elif RT_INLINE_ASM_GNU_STYLE
5507 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5508# else
5509 _asm
5510 {
5511 mov ax, [u16]
5512 ror ax, 8
5513 mov [u16], ax
5514 }
5515# endif
5516 return u16;
5517}
5518#endif
5519
5520
5521/**
5522 * Reverse the byte order of the given 32-bit integer.
5523 *
5524 * @returns Revert
5525 * @param u32 32-bit integer value.
5526 */
5527#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5528RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32);
5529#else
5530DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5531{
5532# if RT_INLINE_ASM_USES_INTRIN
5533 u32 = _byteswap_ulong(u32);
5534# elif RT_INLINE_ASM_GNU_STYLE
5535 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5536# else
5537 _asm
5538 {
5539 mov eax, [u32]
5540 bswap eax
5541 mov [u32], eax
5542 }
5543# endif
5544 return u32;
5545}
5546#endif
5547
5548
5549/**
5550 * Reverse the byte order of the given 64-bit integer.
5551 *
5552 * @returns Revert
5553 * @param u64 64-bit integer value.
5554 */
5555DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5556{
5557#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5558 u64 = _byteswap_uint64(u64);
5559#else
5560 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5561 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5562#endif
5563 return u64;
5564}
5565
5566
5567/**
5568 * Rotate 32-bit unsigned value to the left by @a cShift.
5569 *
5570 * @returns Rotated value.
5571 * @param u32 The value to rotate.
5572 * @param cShift How many bits to rotate by.
5573 */
5574#ifdef __WATCOMC__
5575RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5576#else
5577DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5578{
5579# if RT_INLINE_ASM_USES_INTRIN
5580 return _rotl(u32, cShift);
5581# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5582 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5583 return u32;
5584# else
5585 cShift &= 31;
5586 return (u32 << cShift) | (u32 >> (32 - cShift));
5587# endif
5588}
5589#endif
5590
5591
5592/**
5593 * Rotate 32-bit unsigned value to the right by @a cShift.
5594 *
5595 * @returns Rotated value.
5596 * @param u32 The value to rotate.
5597 * @param cShift How many bits to rotate by.
5598 */
5599#ifdef __WATCOMC__
5600RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5601#else
5602DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5603{
5604# if RT_INLINE_ASM_USES_INTRIN
5605 return _rotr(u32, cShift);
5606# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5607 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5608 return u32;
5609# else
5610 cShift &= 31;
5611 return (u32 >> cShift) | (u32 << (32 - cShift));
5612# endif
5613}
5614#endif
5615
5616
5617/**
5618 * Rotate 64-bit unsigned value to the left by @a cShift.
5619 *
5620 * @returns Rotated value.
5621 * @param u64 The value to rotate.
5622 * @param cShift How many bits to rotate by.
5623 */
5624DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5625{
5626#if RT_INLINE_ASM_USES_INTRIN
5627 return _rotl64(u64, cShift);
5628#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5629 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5630 return u64;
5631#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5632 uint32_t uSpill;
5633 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5634 "jz 1f\n\t"
5635 "xchgl %%eax, %%edx\n\t"
5636 "1:\n\t"
5637 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5638 "jz 2f\n\t"
5639 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5640 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5641 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5642 "2:\n\t" /* } */
5643 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5644 : "0" (u64),
5645 "1" (cShift));
5646 return u64;
5647#else
5648 cShift &= 63;
5649 return (u64 << cShift) | (u64 >> (64 - cShift));
5650#endif
5651}
5652
5653
5654/**
5655 * Rotate 64-bit unsigned value to the right by @a cShift.
5656 *
5657 * @returns Rotated value.
5658 * @param u64 The value to rotate.
5659 * @param cShift How many bits to rotate by.
5660 */
5661DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5662{
5663#if RT_INLINE_ASM_USES_INTRIN
5664 return _rotr64(u64, cShift);
5665#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5666 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5667 return u64;
5668#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5669 uint32_t uSpill;
5670 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5671 "jz 1f\n\t"
5672 "xchgl %%eax, %%edx\n\t"
5673 "1:\n\t"
5674 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5675 "jz 2f\n\t"
5676 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5677 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5678 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5679 "2:\n\t" /* } */
5680 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5681 : "0" (u64),
5682 "1" (cShift));
5683 return u64;
5684#else
5685 cShift &= 63;
5686 return (u64 >> cShift) | (u64 << (64 - cShift));
5687#endif
5688}
5689
5690/** @} */
5691
5692
5693/** @} */
5694
5695/*
5696 * Include #pragma aux definitions for Watcom C/C++.
5697 */
5698#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
5699# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
5700# undef ___iprt_asm_watcom_x86_16_h
5701# include "asm-watcom-x86-16.h"
5702#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
5703# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
5704# undef ___iprt_asm_watcom_x86_32_h
5705# include "asm-watcom-x86-32.h"
5706#endif
5707
5708#endif
5709
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette