VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 87147

Last change on this file since 87147 was 87147, checked in by vboxsync, 4 years ago

iprt/asm.h,iprt/param.h: More bugref:9898 adjustments.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 169.8 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2020 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46/* Emit the intrinsics at all optimization levels. */
47# include <iprt/sanitized/intrin.h>
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(_BitScanForward)
54# pragma intrinsic(_BitScanReverse)
55# pragma intrinsic(_bittest)
56# pragma intrinsic(_bittestandset)
57# pragma intrinsic(_bittestandreset)
58# pragma intrinsic(_bittestandcomplement)
59# pragma intrinsic(_byteswap_ushort)
60# pragma intrinsic(_byteswap_ulong)
61# pragma intrinsic(_interlockedbittestandset)
62# pragma intrinsic(_interlockedbittestandreset)
63# pragma intrinsic(_InterlockedAnd)
64# pragma intrinsic(_InterlockedOr)
65# pragma intrinsic(_InterlockedIncrement)
66# pragma intrinsic(_InterlockedDecrement)
67# pragma intrinsic(_InterlockedExchange)
68# pragma intrinsic(_InterlockedExchangeAdd)
69# pragma intrinsic(_InterlockedCompareExchange)
70# pragma intrinsic(_InterlockedCompareExchange64)
71# pragma intrinsic(_rotl)
72# pragma intrinsic(_rotr)
73# pragma intrinsic(_rotl64)
74# pragma intrinsic(_rotr64)
75# ifdef RT_ARCH_AMD64
76# pragma intrinsic(__stosq)
77# pragma intrinsic(_byteswap_uint64)
78# pragma intrinsic(_InterlockedExchange64)
79# pragma intrinsic(_InterlockedExchangeAdd64)
80# pragma intrinsic(_InterlockedAnd64)
81# pragma intrinsic(_InterlockedOr64)
82# pragma intrinsic(_InterlockedIncrement64)
83# pragma intrinsic(_InterlockedDecrement64)
84# endif
85#endif
86
87/*
88 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
89 */
90#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
91# include "asm-watcom-x86-16.h"
92#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
93# include "asm-watcom-x86-32.h"
94#endif
95
96
97/** @defgroup grp_rt_asm ASM - Assembly Routines
98 * @ingroup grp_rt
99 *
100 * @remarks The difference between ordered and unordered atomic operations are that
101 * the former will complete outstanding reads and writes before continuing
102 * while the latter doesn't make any promises about the order. Ordered
103 * operations doesn't, it seems, make any 100% promise wrt to whether
104 * the operation will complete before any subsequent memory access.
105 * (please, correct if wrong.)
106 *
107 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
108 * are unordered (note the Uo).
109 *
110 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
111 * or even optimize assembler instructions away. For instance, in the following code
112 * the second rdmsr instruction is optimized away because gcc treats that instruction
113 * as deterministic:
114 *
115 * @code
116 * static inline uint64_t rdmsr_low(int idx)
117 * {
118 * uint32_t low;
119 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
120 * }
121 * ...
122 * uint32_t msr1 = rdmsr_low(1);
123 * foo(msr1);
124 * msr1 = rdmsr_low(1);
125 * bar(msr1);
126 * @endcode
127 *
128 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
129 * use the result of the first call as input parameter for bar() as well. For rdmsr this
130 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
131 * machine status information in general.
132 *
133 * @{
134 */
135
136
137/** @def RT_INLINE_ASM_GCC_4_3_X_X86
138 * Used to work around some 4.3.x register allocation issues in this version of
139 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
140 * definitely not for 5.x */
141#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
142# define RT_INLINE_ASM_GCC_4_3_X_X86 1
143#else
144# define RT_INLINE_ASM_GCC_4_3_X_X86 0
145#endif
146
147/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
148 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
149 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
150 * mode, x86.
151 *
152 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
153 * when in PIC mode on x86.
154 */
155#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
156# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
157# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
158# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
159# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
160# elif ( (defined(PIC) || defined(__PIC__)) \
161 && defined(RT_ARCH_X86) \
162 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
163 || defined(RT_OS_DARWIN)) )
164# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
165# else
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
167# endif
168#endif
169
170
171/** @def ASMReturnAddress
172 * Gets the return address of the current (or calling if you like) function or method.
173 */
174#ifdef _MSC_VER
175# ifdef __cplusplus
176extern "C"
177# endif
178void * _ReturnAddress(void);
179# pragma intrinsic(_ReturnAddress)
180# define ASMReturnAddress() _ReturnAddress()
181#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
182# define ASMReturnAddress() __builtin_return_address(0)
183#elif defined(__WATCOMC__)
184# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
185#else
186# error "Unsupported compiler."
187#endif
188
189
190/**
191 * Compiler memory barrier.
192 *
193 * Ensure that the compiler does not use any cached (register/tmp stack) memory
194 * values or any outstanding writes when returning from this function.
195 *
196 * This function must be used if non-volatile data is modified by a
197 * device or the VMM. Typical cases are port access, MMIO access,
198 * trapping instruction, etc.
199 */
200#if RT_INLINE_ASM_GNU_STYLE
201# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
202#elif RT_INLINE_ASM_USES_INTRIN
203# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
204#elif defined(__WATCOMC__)
205void ASMCompilerBarrier(void);
206#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
207DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
208{
209 __asm
210 {
211 }
212}
213#endif
214
215
216/** @def ASMBreakpoint
217 * Debugger Breakpoint.
218 * @deprecated Use RT_BREAKPOINT instead.
219 * @internal
220 */
221#define ASMBreakpoint() RT_BREAKPOINT()
222
223
224/**
225 * Spinloop hint for platforms that have these, empty function on the other
226 * platforms.
227 *
228 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
229 * spin locks.
230 */
231#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
232RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
233#else
234DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
235{
236# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
237# if RT_INLINE_ASM_GNU_STYLE
238 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
239# else
240 __asm {
241 _emit 0f3h
242 _emit 090h
243 }
244# endif
245# else
246 /* dummy */
247# endif
248}
249#endif
250
251
252/**
253 * Atomically Exchange an unsigned 8-bit value, ordered.
254 *
255 * @returns Current *pu8 value
256 * @param pu8 Pointer to the 8-bit variable to update.
257 * @param u8 The 8-bit value to assign to *pu8.
258 */
259#if RT_INLINE_ASM_EXTERNAL
260RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
261#else
262DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
263{
264# if RT_INLINE_ASM_GNU_STYLE
265 __asm__ __volatile__("xchgb %0, %1\n\t"
266 : "=m" (*pu8),
267 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
268 : "1" (u8),
269 "m" (*pu8));
270# else
271 __asm
272 {
273# ifdef RT_ARCH_AMD64
274 mov rdx, [pu8]
275 mov al, [u8]
276 xchg [rdx], al
277 mov [u8], al
278# else
279 mov edx, [pu8]
280 mov al, [u8]
281 xchg [edx], al
282 mov [u8], al
283# endif
284 }
285# endif
286 return u8;
287}
288#endif
289
290
291/**
292 * Atomically Exchange a signed 8-bit value, ordered.
293 *
294 * @returns Current *pu8 value
295 * @param pi8 Pointer to the 8-bit variable to update.
296 * @param i8 The 8-bit value to assign to *pi8.
297 */
298DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
299{
300 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
301}
302
303
304/**
305 * Atomically Exchange a bool value, ordered.
306 *
307 * @returns Current *pf value
308 * @param pf Pointer to the 8-bit variable to update.
309 * @param f The 8-bit value to assign to *pi8.
310 */
311DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
312{
313#ifdef _MSC_VER
314 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
315#else
316 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
317#endif
318}
319
320
321/**
322 * Atomically Exchange an unsigned 16-bit value, ordered.
323 *
324 * @returns Current *pu16 value
325 * @param pu16 Pointer to the 16-bit variable to update.
326 * @param u16 The 16-bit value to assign to *pu16.
327 */
328#if RT_INLINE_ASM_EXTERNAL
329RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
330#else
331DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
332{
333# if RT_INLINE_ASM_GNU_STYLE
334 __asm__ __volatile__("xchgw %0, %1\n\t"
335 : "=m" (*pu16),
336 "=r" (u16)
337 : "1" (u16),
338 "m" (*pu16));
339# else
340 __asm
341 {
342# ifdef RT_ARCH_AMD64
343 mov rdx, [pu16]
344 mov ax, [u16]
345 xchg [rdx], ax
346 mov [u16], ax
347# else
348 mov edx, [pu16]
349 mov ax, [u16]
350 xchg [edx], ax
351 mov [u16], ax
352# endif
353 }
354# endif
355 return u16;
356}
357#endif
358
359
360/**
361 * Atomically Exchange a signed 16-bit value, ordered.
362 *
363 * @returns Current *pu16 value
364 * @param pi16 Pointer to the 16-bit variable to update.
365 * @param i16 The 16-bit value to assign to *pi16.
366 */
367DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
368{
369 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
370}
371
372
373/**
374 * Atomically Exchange an unsigned 32-bit value, ordered.
375 *
376 * @returns Current *pu32 value
377 * @param pu32 Pointer to the 32-bit variable to update.
378 * @param u32 The 32-bit value to assign to *pu32.
379 *
380 * @remarks Does not work on 286 and earlier.
381 */
382#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
383RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
384#else
385DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
386{
387# if RT_INLINE_ASM_GNU_STYLE
388 __asm__ __volatile__("xchgl %0, %1\n\t"
389 : "=m" (*pu32),
390 "=r" (u32)
391 : "1" (u32),
392 "m" (*pu32));
393
394# elif RT_INLINE_ASM_USES_INTRIN
395 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
396
397# else
398 __asm
399 {
400# ifdef RT_ARCH_AMD64
401 mov rdx, [pu32]
402 mov eax, u32
403 xchg [rdx], eax
404 mov [u32], eax
405# else
406 mov edx, [pu32]
407 mov eax, u32
408 xchg [edx], eax
409 mov [u32], eax
410# endif
411 }
412# endif
413 return u32;
414}
415#endif
416
417
418/**
419 * Atomically Exchange a signed 32-bit value, ordered.
420 *
421 * @returns Current *pu32 value
422 * @param pi32 Pointer to the 32-bit variable to update.
423 * @param i32 The 32-bit value to assign to *pi32.
424 */
425DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
426{
427 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
428}
429
430
431/**
432 * Atomically Exchange an unsigned 64-bit value, ordered.
433 *
434 * @returns Current *pu64 value
435 * @param pu64 Pointer to the 64-bit variable to update.
436 * @param u64 The 64-bit value to assign to *pu64.
437 *
438 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
439 */
440#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
441 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
442RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
443#else
444DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
445{
446# if defined(RT_ARCH_AMD64)
447# if RT_INLINE_ASM_USES_INTRIN
448 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
449
450# elif RT_INLINE_ASM_GNU_STYLE
451 __asm__ __volatile__("xchgq %0, %1\n\t"
452 : "=m" (*pu64),
453 "=r" (u64)
454 : "1" (u64),
455 "m" (*pu64));
456# else
457 __asm
458 {
459 mov rdx, [pu64]
460 mov rax, [u64]
461 xchg [rdx], rax
462 mov [u64], rax
463 }
464# endif
465# else /* !RT_ARCH_AMD64 */
466# if RT_INLINE_ASM_GNU_STYLE
467# if defined(PIC) || defined(__PIC__)
468 uint32_t u32EBX = (uint32_t)u64;
469 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
470 "xchgl %%ebx, %3\n\t"
471 "1:\n\t"
472 "lock; cmpxchg8b (%5)\n\t"
473 "jnz 1b\n\t"
474 "movl %3, %%ebx\n\t"
475 /*"xchgl %%esi, %5\n\t"*/
476 : "=A" (u64),
477 "=m" (*pu64)
478 : "0" (*pu64),
479 "m" ( u32EBX ),
480 "c" ( (uint32_t)(u64 >> 32) ),
481 "S" (pu64));
482# else /* !PIC */
483 __asm__ __volatile__("1:\n\t"
484 "lock; cmpxchg8b %1\n\t"
485 "jnz 1b\n\t"
486 : "=A" (u64),
487 "=m" (*pu64)
488 : "0" (*pu64),
489 "b" ( (uint32_t)u64 ),
490 "c" ( (uint32_t)(u64 >> 32) ));
491# endif
492# else
493 __asm
494 {
495 mov ebx, dword ptr [u64]
496 mov ecx, dword ptr [u64 + 4]
497 mov edi, pu64
498 mov eax, dword ptr [edi]
499 mov edx, dword ptr [edi + 4]
500 retry:
501 lock cmpxchg8b [edi]
502 jnz retry
503 mov dword ptr [u64], eax
504 mov dword ptr [u64 + 4], edx
505 }
506# endif
507# endif /* !RT_ARCH_AMD64 */
508 return u64;
509}
510#endif
511
512
513/**
514 * Atomically Exchange an signed 64-bit value, ordered.
515 *
516 * @returns Current *pi64 value
517 * @param pi64 Pointer to the 64-bit variable to update.
518 * @param i64 The 64-bit value to assign to *pi64.
519 */
520DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
521{
522 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
523}
524
525
526/**
527 * Atomically Exchange a size_t value, ordered.
528 *
529 * @returns Current *ppv value
530 * @param puDst Pointer to the size_t variable to update.
531 * @param uNew The new value to assign to *puDst.
532 */
533DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
534{
535#if ARCH_BITS == 16
536 AssertCompile(sizeof(size_t) == 2);
537 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
538#elif ARCH_BITS == 32
539 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
540#elif ARCH_BITS == 64
541 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
542#else
543# error "ARCH_BITS is bogus"
544#endif
545}
546
547
548/**
549 * Atomically Exchange a pointer value, ordered.
550 *
551 * @returns Current *ppv value
552 * @param ppv Pointer to the pointer variable to update.
553 * @param pv The pointer value to assign to *ppv.
554 */
555DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
556{
557#if ARCH_BITS == 32 || ARCH_BITS == 16
558 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
559#elif ARCH_BITS == 64
560 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
561#else
562# error "ARCH_BITS is bogus"
563#endif
564}
565
566
567/**
568 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
569 *
570 * @returns Current *pv value
571 * @param ppv Pointer to the pointer variable to update.
572 * @param pv The pointer value to assign to *ppv.
573 * @param Type The type of *ppv, sans volatile.
574 */
575#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
576# define ASMAtomicXchgPtrT(ppv, pv, Type) \
577 __extension__ \
578 ({\
579 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
580 Type const pvTypeChecked = (pv); \
581 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
582 pvTypeCheckedRet; \
583 })
584#else
585# define ASMAtomicXchgPtrT(ppv, pv, Type) \
586 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
587#endif
588
589
590/**
591 * Atomically Exchange a raw-mode context pointer value, ordered.
592 *
593 * @returns Current *ppv value
594 * @param ppvRC Pointer to the pointer variable to update.
595 * @param pvRC The pointer value to assign to *ppv.
596 */
597DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
598{
599 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
600}
601
602
603/**
604 * Atomically Exchange a ring-0 pointer value, ordered.
605 *
606 * @returns Current *ppv value
607 * @param ppvR0 Pointer to the pointer variable to update.
608 * @param pvR0 The pointer value to assign to *ppv.
609 */
610DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
611{
612#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
613 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
614#elif R0_ARCH_BITS == 64
615 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
616#else
617# error "R0_ARCH_BITS is bogus"
618#endif
619}
620
621
622/**
623 * Atomically Exchange a ring-3 pointer value, ordered.
624 *
625 * @returns Current *ppv value
626 * @param ppvR3 Pointer to the pointer variable to update.
627 * @param pvR3 The pointer value to assign to *ppv.
628 */
629DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
630{
631#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
632 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
633#elif R3_ARCH_BITS == 64
634 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
635#else
636# error "R3_ARCH_BITS is bogus"
637#endif
638}
639
640
641/** @def ASMAtomicXchgHandle
642 * Atomically Exchange a typical IPRT handle value, ordered.
643 *
644 * @param ph Pointer to the value to update.
645 * @param hNew The new value to assigned to *pu.
646 * @param phRes Where to store the current *ph value.
647 *
648 * @remarks This doesn't currently work for all handles (like RTFILE).
649 */
650#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
651# define ASMAtomicXchgHandle(ph, hNew, phRes) \
652 do { \
653 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
654 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
655 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
656 } while (0)
657#elif HC_ARCH_BITS == 64
658# define ASMAtomicXchgHandle(ph, hNew, phRes) \
659 do { \
660 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
661 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
662 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
663 } while (0)
664#else
665# error HC_ARCH_BITS
666#endif
667
668
669/**
670 * Atomically Exchange a value which size might differ
671 * between platforms or compilers, ordered.
672 *
673 * @param pu Pointer to the variable to update.
674 * @param uNew The value to assign to *pu.
675 * @todo This is busted as its missing the result argument.
676 */
677#define ASMAtomicXchgSize(pu, uNew) \
678 do { \
679 switch (sizeof(*(pu))) { \
680 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
681 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
682 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
683 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
684 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
685 } \
686 } while (0)
687
688/**
689 * Atomically Exchange a value which size might differ
690 * between platforms or compilers, ordered.
691 *
692 * @param pu Pointer to the variable to update.
693 * @param uNew The value to assign to *pu.
694 * @param puRes Where to store the current *pu value.
695 */
696#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
697 do { \
698 switch (sizeof(*(pu))) { \
699 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
700 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
701 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
702 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
703 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
704 } \
705 } while (0)
706
707
708
709/**
710 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
711 *
712 * @returns true if xchg was done.
713 * @returns false if xchg wasn't done.
714 *
715 * @param pu8 Pointer to the value to update.
716 * @param u8New The new value to assigned to *pu8.
717 * @param u8Old The old value to *pu8 compare with.
718 *
719 * @remarks x86: Requires a 486 or later.
720 */
721#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
722RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
723#else
724DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
725{
726 uint8_t u8Ret;
727 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
728 "setz %1\n\t"
729 : "=m" (*pu8),
730 "=qm" (u8Ret),
731 "=a" (u8Old)
732 : "q" (u8New),
733 "2" (u8Old),
734 "m" (*pu8));
735 return (bool)u8Ret;
736}
737#endif
738
739
740/**
741 * Atomically Compare and Exchange a signed 8-bit value, ordered.
742 *
743 * @returns true if xchg was done.
744 * @returns false if xchg wasn't done.
745 *
746 * @param pi8 Pointer to the value to update.
747 * @param i8New The new value to assigned to *pi8.
748 * @param i8Old The old value to *pi8 compare with.
749 *
750 * @remarks x86: Requires a 486 or later.
751 */
752DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
753{
754 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
755}
756
757
758/**
759 * Atomically Compare and Exchange a bool value, ordered.
760 *
761 * @returns true if xchg was done.
762 * @returns false if xchg wasn't done.
763 *
764 * @param pf Pointer to the value to update.
765 * @param fNew The new value to assigned to *pf.
766 * @param fOld The old value to *pf compare with.
767 *
768 * @remarks x86: Requires a 486 or later.
769 */
770DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
771{
772 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
773}
774
775
776/**
777 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
778 *
779 * @returns true if xchg was done.
780 * @returns false if xchg wasn't done.
781 *
782 * @param pu32 Pointer to the value to update.
783 * @param u32New The new value to assigned to *pu32.
784 * @param u32Old The old value to *pu32 compare with.
785 *
786 * @remarks x86: Requires a 486 or later.
787 */
788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
789RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
790#else
791DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
792{
793# if RT_INLINE_ASM_GNU_STYLE
794 uint8_t u8Ret;
795 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
796 "setz %1\n\t"
797 : "=m" (*pu32),
798 "=qm" (u8Ret),
799 "=a" (u32Old)
800 : "r" (u32New),
801 "2" (u32Old),
802 "m" (*pu32));
803 return (bool)u8Ret;
804
805# elif RT_INLINE_ASM_USES_INTRIN
806 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
807
808# else
809 uint32_t u32Ret;
810 __asm
811 {
812# ifdef RT_ARCH_AMD64
813 mov rdx, [pu32]
814# else
815 mov edx, [pu32]
816# endif
817 mov eax, [u32Old]
818 mov ecx, [u32New]
819# ifdef RT_ARCH_AMD64
820 lock cmpxchg [rdx], ecx
821# else
822 lock cmpxchg [edx], ecx
823# endif
824 setz al
825 movzx eax, al
826 mov [u32Ret], eax
827 }
828 return !!u32Ret;
829# endif
830}
831#endif
832
833
834/**
835 * Atomically Compare and Exchange a signed 32-bit value, ordered.
836 *
837 * @returns true if xchg was done.
838 * @returns false if xchg wasn't done.
839 *
840 * @param pi32 Pointer to the value to update.
841 * @param i32New The new value to assigned to *pi32.
842 * @param i32Old The old value to *pi32 compare with.
843 *
844 * @remarks x86: Requires a 486 or later.
845 */
846DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
847{
848 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
849}
850
851
852/**
853 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
854 *
855 * @returns true if xchg was done.
856 * @returns false if xchg wasn't done.
857 *
858 * @param pu64 Pointer to the 64-bit variable to update.
859 * @param u64New The 64-bit value to assign to *pu64.
860 * @param u64Old The value to compare with.
861 *
862 * @remarks x86: Requires a Pentium or later.
863 */
864#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
865 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
866RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
867#else
868DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
869{
870# if RT_INLINE_ASM_USES_INTRIN
871 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
872
873# elif defined(RT_ARCH_AMD64)
874# if RT_INLINE_ASM_GNU_STYLE
875 uint8_t u8Ret;
876 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
877 "setz %1\n\t"
878 : "=m" (*pu64),
879 "=qm" (u8Ret),
880 "=a" (u64Old)
881 : "r" (u64New),
882 "2" (u64Old),
883 "m" (*pu64));
884 return (bool)u8Ret;
885# else
886 bool fRet;
887 __asm
888 {
889 mov rdx, [pu32]
890 mov rax, [u64Old]
891 mov rcx, [u64New]
892 lock cmpxchg [rdx], rcx
893 setz al
894 mov [fRet], al
895 }
896 return fRet;
897# endif
898# else /* !RT_ARCH_AMD64 */
899 uint32_t u32Ret;
900# if RT_INLINE_ASM_GNU_STYLE
901# if defined(PIC) || defined(__PIC__)
902 uint32_t u32EBX = (uint32_t)u64New;
903 uint32_t u32Spill;
904 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
905 "lock; cmpxchg8b (%6)\n\t"
906 "setz %%al\n\t"
907 "movl %4, %%ebx\n\t"
908 "movzbl %%al, %%eax\n\t"
909 : "=a" (u32Ret),
910 "=d" (u32Spill),
911# if RT_GNUC_PREREQ(4, 3)
912 "+m" (*pu64)
913# else
914 "=m" (*pu64)
915# endif
916 : "A" (u64Old),
917 "m" ( u32EBX ),
918 "c" ( (uint32_t)(u64New >> 32) ),
919 "S" (pu64));
920# else /* !PIC */
921 uint32_t u32Spill;
922 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
923 "setz %%al\n\t"
924 "movzbl %%al, %%eax\n\t"
925 : "=a" (u32Ret),
926 "=d" (u32Spill),
927 "+m" (*pu64)
928 : "A" (u64Old),
929 "b" ( (uint32_t)u64New ),
930 "c" ( (uint32_t)(u64New >> 32) ));
931# endif
932 return (bool)u32Ret;
933# else
934 __asm
935 {
936 mov ebx, dword ptr [u64New]
937 mov ecx, dword ptr [u64New + 4]
938 mov edi, [pu64]
939 mov eax, dword ptr [u64Old]
940 mov edx, dword ptr [u64Old + 4]
941 lock cmpxchg8b [edi]
942 setz al
943 movzx eax, al
944 mov dword ptr [u32Ret], eax
945 }
946 return !!u32Ret;
947# endif
948# endif /* !RT_ARCH_AMD64 */
949}
950#endif
951
952
953/**
954 * Atomically Compare and exchange a signed 64-bit value, ordered.
955 *
956 * @returns true if xchg was done.
957 * @returns false if xchg wasn't done.
958 *
959 * @param pi64 Pointer to the 64-bit variable to update.
960 * @param i64 The 64-bit value to assign to *pu64.
961 * @param i64Old The value to compare with.
962 *
963 * @remarks x86: Requires a Pentium or later.
964 */
965DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
966{
967 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
968}
969
970
971/**
972 * Atomically Compare and Exchange a pointer value, ordered.
973 *
974 * @returns true if xchg was done.
975 * @returns false if xchg wasn't done.
976 *
977 * @param ppv Pointer to the value to update.
978 * @param pvNew The new value to assigned to *ppv.
979 * @param pvOld The old value to *ppv compare with.
980 *
981 * @remarks x86: Requires a 486 or later.
982 */
983DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
984{
985#if ARCH_BITS == 32 || ARCH_BITS == 16
986 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
987#elif ARCH_BITS == 64
988 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
989#else
990# error "ARCH_BITS is bogus"
991#endif
992}
993
994
995/**
996 * Atomically Compare and Exchange a pointer value, ordered.
997 *
998 * @returns true if xchg was done.
999 * @returns false if xchg wasn't done.
1000 *
1001 * @param ppv Pointer to the value to update.
1002 * @param pvNew The new value to assigned to *ppv.
1003 * @param pvOld The old value to *ppv compare with.
1004 *
1005 * @remarks This is relatively type safe on GCC platforms.
1006 * @remarks x86: Requires a 486 or later.
1007 */
1008#ifdef __GNUC__
1009# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1010 __extension__ \
1011 ({\
1012 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1013 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1014 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1015 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1016 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1017 fMacroRet; \
1018 })
1019#else
1020# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1021 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1022#endif
1023
1024
1025/** @def ASMAtomicCmpXchgHandle
1026 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1027 *
1028 * @param ph Pointer to the value to update.
1029 * @param hNew The new value to assigned to *pu.
1030 * @param hOld The old value to *pu compare with.
1031 * @param fRc Where to store the result.
1032 *
1033 * @remarks This doesn't currently work for all handles (like RTFILE).
1034 * @remarks x86: Requires a 486 or later.
1035 */
1036#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1037# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1038 do { \
1039 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1040 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1041 } while (0)
1042#elif HC_ARCH_BITS == 64
1043# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1044 do { \
1045 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1046 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1047 } while (0)
1048#else
1049# error HC_ARCH_BITS
1050#endif
1051
1052
1053/** @def ASMAtomicCmpXchgSize
1054 * Atomically Compare and Exchange a value which size might differ
1055 * between platforms or compilers, ordered.
1056 *
1057 * @param pu Pointer to the value to update.
1058 * @param uNew The new value to assigned to *pu.
1059 * @param uOld The old value to *pu compare with.
1060 * @param fRc Where to store the result.
1061 *
1062 * @remarks x86: Requires a 486 or later.
1063 */
1064#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1065 do { \
1066 switch (sizeof(*(pu))) { \
1067 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1068 break; \
1069 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1070 break; \
1071 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1072 (fRc) = false; \
1073 break; \
1074 } \
1075 } while (0)
1076
1077
1078/**
1079 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1080 * passes back old value, ordered.
1081 *
1082 * @returns true if xchg was done.
1083 * @returns false if xchg wasn't done.
1084 *
1085 * @param pu32 Pointer to the value to update.
1086 * @param u32New The new value to assigned to *pu32.
1087 * @param u32Old The old value to *pu32 compare with.
1088 * @param pu32Old Pointer store the old value at.
1089 *
1090 * @remarks x86: Requires a 486 or later.
1091 */
1092#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1093RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1094#else
1095DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1096{
1097# if RT_INLINE_ASM_GNU_STYLE
1098 uint8_t u8Ret;
1099 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1100 "setz %1\n\t"
1101 : "=m" (*pu32),
1102 "=qm" (u8Ret),
1103 "=a" (*pu32Old)
1104 : "r" (u32New),
1105 "a" (u32Old),
1106 "m" (*pu32));
1107 return (bool)u8Ret;
1108
1109# elif RT_INLINE_ASM_USES_INTRIN
1110 return (*pu32Old =_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1111
1112# else
1113 uint32_t u32Ret;
1114 __asm
1115 {
1116# ifdef RT_ARCH_AMD64
1117 mov rdx, [pu32]
1118# else
1119 mov edx, [pu32]
1120# endif
1121 mov eax, [u32Old]
1122 mov ecx, [u32New]
1123# ifdef RT_ARCH_AMD64
1124 lock cmpxchg [rdx], ecx
1125 mov rdx, [pu32Old]
1126 mov [rdx], eax
1127# else
1128 lock cmpxchg [edx], ecx
1129 mov edx, [pu32Old]
1130 mov [edx], eax
1131# endif
1132 setz al
1133 movzx eax, al
1134 mov [u32Ret], eax
1135 }
1136 return !!u32Ret;
1137# endif
1138}
1139#endif
1140
1141
1142/**
1143 * Atomically Compare and Exchange a signed 32-bit value, additionally
1144 * passes back old value, ordered.
1145 *
1146 * @returns true if xchg was done.
1147 * @returns false if xchg wasn't done.
1148 *
1149 * @param pi32 Pointer to the value to update.
1150 * @param i32New The new value to assigned to *pi32.
1151 * @param i32Old The old value to *pi32 compare with.
1152 * @param pi32Old Pointer store the old value at.
1153 *
1154 * @remarks x86: Requires a 486 or later.
1155 */
1156DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
1157{
1158 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1159}
1160
1161
1162/**
1163 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1164 * passing back old value, ordered.
1165 *
1166 * @returns true if xchg was done.
1167 * @returns false if xchg wasn't done.
1168 *
1169 * @param pu64 Pointer to the 64-bit variable to update.
1170 * @param u64New The 64-bit value to assign to *pu64.
1171 * @param u64Old The value to compare with.
1172 * @param pu64Old Pointer store the old value at.
1173 *
1174 * @remarks x86: Requires a Pentium or later.
1175 */
1176#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1177 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1178RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
1179#else
1180DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
1181{
1182# if RT_INLINE_ASM_USES_INTRIN
1183 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1184
1185# elif defined(RT_ARCH_AMD64)
1186# if RT_INLINE_ASM_GNU_STYLE
1187 uint8_t u8Ret;
1188 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1189 "setz %1\n\t"
1190 : "=m" (*pu64),
1191 "=qm" (u8Ret),
1192 "=a" (*pu64Old)
1193 : "r" (u64New),
1194 "a" (u64Old),
1195 "m" (*pu64));
1196 return (bool)u8Ret;
1197# else
1198 bool fRet;
1199 __asm
1200 {
1201 mov rdx, [pu32]
1202 mov rax, [u64Old]
1203 mov rcx, [u64New]
1204 lock cmpxchg [rdx], rcx
1205 mov rdx, [pu64Old]
1206 mov [rdx], rax
1207 setz al
1208 mov [fRet], al
1209 }
1210 return fRet;
1211# endif
1212# else /* !RT_ARCH_AMD64 */
1213# if RT_INLINE_ASM_GNU_STYLE
1214 uint64_t u64Ret;
1215# if defined(PIC) || defined(__PIC__)
1216 /* NB: this code uses a memory clobber description, because the clean
1217 * solution with an output value for *pu64 makes gcc run out of registers.
1218 * This will cause suboptimal code, and anyone with a better solution is
1219 * welcome to improve this. */
1220 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1221 "lock; cmpxchg8b %3\n\t"
1222 "xchgl %%ebx, %1\n\t"
1223 : "=A" (u64Ret)
1224 : "DS" ((uint32_t)u64New),
1225 "c" ((uint32_t)(u64New >> 32)),
1226 "m" (*pu64),
1227 "0" (u64Old)
1228 : "memory" );
1229# else /* !PIC */
1230 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1231 : "=A" (u64Ret),
1232 "=m" (*pu64)
1233 : "b" ((uint32_t)u64New),
1234 "c" ((uint32_t)(u64New >> 32)),
1235 "m" (*pu64),
1236 "0" (u64Old));
1237# endif
1238 *pu64Old = u64Ret;
1239 return u64Ret == u64Old;
1240# else
1241 uint32_t u32Ret;
1242 __asm
1243 {
1244 mov ebx, dword ptr [u64New]
1245 mov ecx, dword ptr [u64New + 4]
1246 mov edi, [pu64]
1247 mov eax, dword ptr [u64Old]
1248 mov edx, dword ptr [u64Old + 4]
1249 lock cmpxchg8b [edi]
1250 mov ebx, [pu64Old]
1251 mov [ebx], eax
1252 setz al
1253 movzx eax, al
1254 add ebx, 4
1255 mov [ebx], edx
1256 mov dword ptr [u32Ret], eax
1257 }
1258 return !!u32Ret;
1259# endif
1260# endif /* !RT_ARCH_AMD64 */
1261}
1262#endif
1263
1264
1265/**
1266 * Atomically Compare and exchange a signed 64-bit value, additionally
1267 * passing back old value, ordered.
1268 *
1269 * @returns true if xchg was done.
1270 * @returns false if xchg wasn't done.
1271 *
1272 * @param pi64 Pointer to the 64-bit variable to update.
1273 * @param i64 The 64-bit value to assign to *pu64.
1274 * @param i64Old The value to compare with.
1275 * @param pi64Old Pointer store the old value at.
1276 *
1277 * @remarks x86: Requires a Pentium or later.
1278 */
1279DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
1280{
1281 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1282}
1283
1284/** @def ASMAtomicCmpXchgExHandle
1285 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1286 *
1287 * @param ph Pointer to the value to update.
1288 * @param hNew The new value to assigned to *pu.
1289 * @param hOld The old value to *pu compare with.
1290 * @param fRc Where to store the result.
1291 * @param phOldVal Pointer to where to store the old value.
1292 *
1293 * @remarks This doesn't currently work for all handles (like RTFILE).
1294 */
1295#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1296# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1297 do { \
1298 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1299 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1300 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1301 } while (0)
1302#elif HC_ARCH_BITS == 64
1303# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1304 do { \
1305 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1306 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1307 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1308 } while (0)
1309#else
1310# error HC_ARCH_BITS
1311#endif
1312
1313
1314/** @def ASMAtomicCmpXchgExSize
1315 * Atomically Compare and Exchange a value which size might differ
1316 * between platforms or compilers. Additionally passes back old value.
1317 *
1318 * @param pu Pointer to the value to update.
1319 * @param uNew The new value to assigned to *pu.
1320 * @param uOld The old value to *pu compare with.
1321 * @param fRc Where to store the result.
1322 * @param puOldVal Pointer to where to store the old value.
1323 *
1324 * @remarks x86: Requires a 486 or later.
1325 */
1326#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1327 do { \
1328 switch (sizeof(*(pu))) { \
1329 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1330 break; \
1331 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1332 break; \
1333 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1334 (fRc) = false; \
1335 (uOldVal) = 0; \
1336 break; \
1337 } \
1338 } while (0)
1339
1340
1341/**
1342 * Atomically Compare and Exchange a pointer value, additionally
1343 * passing back old value, ordered.
1344 *
1345 * @returns true if xchg was done.
1346 * @returns false if xchg wasn't done.
1347 *
1348 * @param ppv Pointer to the value to update.
1349 * @param pvNew The new value to assigned to *ppv.
1350 * @param pvOld The old value to *ppv compare with.
1351 * @param ppvOld Pointer store the old value at.
1352 *
1353 * @remarks x86: Requires a 486 or later.
1354 */
1355DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1356 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
1357{
1358#if ARCH_BITS == 32 || ARCH_BITS == 16
1359 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1360#elif ARCH_BITS == 64
1361 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1362#else
1363# error "ARCH_BITS is bogus"
1364#endif
1365}
1366
1367
1368/**
1369 * Atomically Compare and Exchange a pointer value, additionally
1370 * passing back old value, ordered.
1371 *
1372 * @returns true if xchg was done.
1373 * @returns false if xchg wasn't done.
1374 *
1375 * @param ppv Pointer to the value to update.
1376 * @param pvNew The new value to assigned to *ppv.
1377 * @param pvOld The old value to *ppv compare with.
1378 * @param ppvOld Pointer store the old value at.
1379 *
1380 * @remarks This is relatively type safe on GCC platforms.
1381 * @remarks x86: Requires a 486 or later.
1382 */
1383#ifdef __GNUC__
1384# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1385 __extension__ \
1386 ({\
1387 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1388 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1389 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1390 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1391 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1392 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1393 (void **)ppvOldTypeChecked); \
1394 fMacroRet; \
1395 })
1396#else
1397# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1398 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1399#endif
1400
1401
1402/**
1403 * Virtualization unfriendly serializing instruction, always exits.
1404 */
1405#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1406RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
1407#else
1408DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
1409{
1410# if RT_INLINE_ASM_GNU_STYLE
1411 RTCCUINTREG xAX = 0;
1412# ifdef RT_ARCH_AMD64
1413 __asm__ __volatile__ ("cpuid"
1414 : "=a" (xAX)
1415 : "0" (xAX)
1416 : "rbx", "rcx", "rdx", "memory");
1417# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1418 __asm__ __volatile__ ("push %%ebx\n\t"
1419 "cpuid\n\t"
1420 "pop %%ebx\n\t"
1421 : "=a" (xAX)
1422 : "0" (xAX)
1423 : "ecx", "edx", "memory");
1424# else
1425 __asm__ __volatile__ ("cpuid"
1426 : "=a" (xAX)
1427 : "0" (xAX)
1428 : "ebx", "ecx", "edx", "memory");
1429# endif
1430
1431# elif RT_INLINE_ASM_USES_INTRIN
1432 int aInfo[4];
1433 _ReadWriteBarrier();
1434 __cpuid(aInfo, 0);
1435
1436# else
1437 __asm
1438 {
1439 push ebx
1440 xor eax, eax
1441 cpuid
1442 pop ebx
1443 }
1444# endif
1445}
1446#endif
1447
1448/**
1449 * Virtualization friendly serializing instruction, though more expensive.
1450 */
1451#if RT_INLINE_ASM_EXTERNAL
1452RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
1453#else
1454DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
1455{
1456# if RT_INLINE_ASM_GNU_STYLE
1457# ifdef RT_ARCH_AMD64
1458 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1459 "subq $128, %%rsp\n\t" /*redzone*/
1460 "mov %%ss, %%eax\n\t"
1461 "pushq %%rax\n\t"
1462 "pushq %%r10\n\t"
1463 "pushfq\n\t"
1464 "movl %%cs, %%eax\n\t"
1465 "pushq %%rax\n\t"
1466 "leaq 1f(%%rip), %%rax\n\t"
1467 "pushq %%rax\n\t"
1468 "iretq\n\t"
1469 "1:\n\t"
1470 ::: "rax", "r10", "memory");
1471# else
1472 __asm__ __volatile__ ("pushfl\n\t"
1473 "pushl %%cs\n\t"
1474 "pushl $1f\n\t"
1475 "iretl\n\t"
1476 "1:\n\t"
1477 ::: "memory");
1478# endif
1479
1480# else
1481 __asm
1482 {
1483 pushfd
1484 push cs
1485 push la_ret
1486 iretd
1487 la_ret:
1488 }
1489# endif
1490}
1491#endif
1492
1493/**
1494 * Virtualization friendlier serializing instruction, may still cause exits.
1495 */
1496#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1497RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
1498#else
1499DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
1500{
1501# if RT_INLINE_ASM_GNU_STYLE
1502 /* rdtscp is not supported by ancient linux build VM of course :-( */
1503# ifdef RT_ARCH_AMD64
1504 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1505 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1506# else
1507 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1508 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1509# endif
1510# else
1511# if RT_INLINE_ASM_USES_INTRIN >= 15
1512 uint32_t uIgnore;
1513 _ReadWriteBarrier();
1514 (void)__rdtscp(&uIgnore);
1515 (void)uIgnore;
1516# else
1517 __asm
1518 {
1519 rdtscp
1520 }
1521# endif
1522# endif
1523}
1524#endif
1525
1526
1527/**
1528 * Serialize Instruction (both data store and instruction flush).
1529 */
1530#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1531# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1532#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
1533# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1534#elif defined(RT_ARCH_SPARC64)
1535RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
1536#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1537DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
1538{
1539 /* Note! Only armv7 and later. */
1540 __asm__ __volatile__ ("dsb\n\t" ::: "memory");
1541}
1542#else
1543# error "Port me"
1544#endif
1545
1546
1547/**
1548 * Memory fence, waits for any pending writes and reads to complete.
1549 */
1550DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
1551{
1552#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1553# if RT_INLINE_ASM_GNU_STYLE
1554 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
1555# elif RT_INLINE_ASM_USES_INTRIN
1556 _mm_mfence();
1557# else
1558 __asm
1559 {
1560 _emit 0x0f
1561 _emit 0xae
1562 _emit 0xf0
1563 }
1564# endif
1565#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1566 /* Note! Only armv7 and later. */
1567 __asm__ __volatile__ ("dsb sy\n\t");
1568#elif ARCH_BITS == 16
1569 uint16_t volatile u16;
1570 ASMAtomicXchgU16(&u16, 0);
1571#else
1572 uint32_t volatile u32;
1573 ASMAtomicXchgU32(&u32, 0);
1574#endif
1575}
1576
1577
1578/**
1579 * Write fence, waits for any pending writes to complete.
1580 */
1581DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
1582{
1583#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1584# if RT_INLINE_ASM_GNU_STYLE
1585 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
1586# elif RT_INLINE_ASM_USES_INTRIN
1587 _mm_sfence();
1588# else
1589 __asm
1590 {
1591 _emit 0x0f
1592 _emit 0xae
1593 _emit 0xf8
1594 }
1595# endif
1596#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1597 /* Note! Only armv7 and later. */
1598 __asm__ __volatile__ ("dmb sy\n\t");
1599#else
1600 ASMMemoryFence();
1601#endif
1602}
1603
1604
1605/**
1606 * Read fence, waits for any pending reads to complete.
1607 */
1608DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
1609{
1610#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1611# if RT_INLINE_ASM_GNU_STYLE
1612 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
1613# elif RT_INLINE_ASM_USES_INTRIN
1614 _mm_lfence();
1615# else
1616 __asm
1617 {
1618 _emit 0x0f
1619 _emit 0xae
1620 _emit 0xe8
1621 }
1622# endif
1623#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1624 /* Note! Only armv7 and later. */
1625 __asm__ __volatile__ ("dmb sy\n\t");
1626#else
1627 ASMMemoryFence();
1628#endif
1629}
1630
1631
1632/**
1633 * Atomically reads an unsigned 8-bit value, ordered.
1634 *
1635 * @returns Current *pu8 value
1636 * @param pu8 Pointer to the 8-bit variable to read.
1637 */
1638DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
1639{
1640 ASMMemoryFence();
1641 return *pu8; /* byte reads are atomic on x86 */
1642}
1643
1644
1645/**
1646 * Atomically reads an unsigned 8-bit value, unordered.
1647 *
1648 * @returns Current *pu8 value
1649 * @param pu8 Pointer to the 8-bit variable to read.
1650 */
1651DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
1652{
1653 return *pu8; /* byte reads are atomic on x86 */
1654}
1655
1656
1657/**
1658 * Atomically reads a signed 8-bit value, ordered.
1659 *
1660 * @returns Current *pi8 value
1661 * @param pi8 Pointer to the 8-bit variable to read.
1662 */
1663DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
1664{
1665 ASMMemoryFence();
1666 return *pi8; /* byte reads are atomic on x86 */
1667}
1668
1669
1670/**
1671 * Atomically reads a signed 8-bit value, unordered.
1672 *
1673 * @returns Current *pi8 value
1674 * @param pi8 Pointer to the 8-bit variable to read.
1675 */
1676DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
1677{
1678 return *pi8; /* byte reads are atomic on x86 */
1679}
1680
1681
1682/**
1683 * Atomically reads an unsigned 16-bit value, ordered.
1684 *
1685 * @returns Current *pu16 value
1686 * @param pu16 Pointer to the 16-bit variable to read.
1687 */
1688DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
1689{
1690 ASMMemoryFence();
1691 Assert(!((uintptr_t)pu16 & 1));
1692 return *pu16;
1693}
1694
1695
1696/**
1697 * Atomically reads an unsigned 16-bit value, unordered.
1698 *
1699 * @returns Current *pu16 value
1700 * @param pu16 Pointer to the 16-bit variable to read.
1701 */
1702DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
1703{
1704 Assert(!((uintptr_t)pu16 & 1));
1705 return *pu16;
1706}
1707
1708
1709/**
1710 * Atomically reads a signed 16-bit value, ordered.
1711 *
1712 * @returns Current *pi16 value
1713 * @param pi16 Pointer to the 16-bit variable to read.
1714 */
1715DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
1716{
1717 ASMMemoryFence();
1718 Assert(!((uintptr_t)pi16 & 1));
1719 return *pi16;
1720}
1721
1722
1723/**
1724 * Atomically reads a signed 16-bit value, unordered.
1725 *
1726 * @returns Current *pi16 value
1727 * @param pi16 Pointer to the 16-bit variable to read.
1728 */
1729DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
1730{
1731 Assert(!((uintptr_t)pi16 & 1));
1732 return *pi16;
1733}
1734
1735
1736/**
1737 * Atomically reads an unsigned 32-bit value, ordered.
1738 *
1739 * @returns Current *pu32 value
1740 * @param pu32 Pointer to the 32-bit variable to read.
1741 */
1742DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
1743{
1744 ASMMemoryFence();
1745 Assert(!((uintptr_t)pu32 & 3));
1746#if ARCH_BITS == 16
1747 AssertFailed(); /** @todo 16-bit */
1748#endif
1749 return *pu32;
1750}
1751
1752
1753/**
1754 * Atomically reads an unsigned 32-bit value, unordered.
1755 *
1756 * @returns Current *pu32 value
1757 * @param pu32 Pointer to the 32-bit variable to read.
1758 */
1759DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
1760{
1761 Assert(!((uintptr_t)pu32 & 3));
1762#if ARCH_BITS == 16
1763 AssertFailed(); /** @todo 16-bit */
1764#endif
1765 return *pu32;
1766}
1767
1768
1769/**
1770 * Atomically reads a signed 32-bit value, ordered.
1771 *
1772 * @returns Current *pi32 value
1773 * @param pi32 Pointer to the 32-bit variable to read.
1774 */
1775DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
1776{
1777 ASMMemoryFence();
1778 Assert(!((uintptr_t)pi32 & 3));
1779#if ARCH_BITS == 16
1780 AssertFailed(); /** @todo 16-bit */
1781#endif
1782 return *pi32;
1783}
1784
1785
1786/**
1787 * Atomically reads a signed 32-bit value, unordered.
1788 *
1789 * @returns Current *pi32 value
1790 * @param pi32 Pointer to the 32-bit variable to read.
1791 */
1792DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
1793{
1794 Assert(!((uintptr_t)pi32 & 3));
1795#if ARCH_BITS == 16
1796 AssertFailed(); /** @todo 16-bit */
1797#endif
1798 return *pi32;
1799}
1800
1801
1802/**
1803 * Atomically reads an unsigned 64-bit value, ordered.
1804 *
1805 * @returns Current *pu64 value
1806 * @param pu64 Pointer to the 64-bit variable to read.
1807 * The memory pointed to must be writable.
1808 *
1809 * @remarks This may fault if the memory is read-only!
1810 * @remarks x86: Requires a Pentium or later.
1811 */
1812#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1813 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1814RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
1815#else
1816DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
1817{
1818 uint64_t u64;
1819# ifdef RT_ARCH_AMD64
1820 Assert(!((uintptr_t)pu64 & 7));
1821/*# if RT_INLINE_ASM_GNU_STYLE
1822 __asm__ __volatile__( "mfence\n\t"
1823 "movq %1, %0\n\t"
1824 : "=r" (u64)
1825 : "m" (*pu64));
1826# else
1827 __asm
1828 {
1829 mfence
1830 mov rdx, [pu64]
1831 mov rax, [rdx]
1832 mov [u64], rax
1833 }
1834# endif*/
1835 ASMMemoryFence();
1836 u64 = *pu64;
1837# else /* !RT_ARCH_AMD64 */
1838# if RT_INLINE_ASM_GNU_STYLE
1839# if defined(PIC) || defined(__PIC__)
1840 uint32_t u32EBX = 0;
1841 Assert(!((uintptr_t)pu64 & 7));
1842 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1843 "lock; cmpxchg8b (%5)\n\t"
1844 "movl %3, %%ebx\n\t"
1845 : "=A" (u64),
1846# if RT_GNUC_PREREQ(4, 3)
1847 "+m" (*pu64)
1848# else
1849 "=m" (*pu64)
1850# endif
1851 : "0" (0ULL),
1852 "m" (u32EBX),
1853 "c" (0),
1854 "S" (pu64));
1855# else /* !PIC */
1856 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1857 : "=A" (u64),
1858 "+m" (*pu64)
1859 : "0" (0ULL),
1860 "b" (0),
1861 "c" (0));
1862# endif
1863# else
1864 Assert(!((uintptr_t)pu64 & 7));
1865 __asm
1866 {
1867 xor eax, eax
1868 xor edx, edx
1869 mov edi, pu64
1870 xor ecx, ecx
1871 xor ebx, ebx
1872 lock cmpxchg8b [edi]
1873 mov dword ptr [u64], eax
1874 mov dword ptr [u64 + 4], edx
1875 }
1876# endif
1877# endif /* !RT_ARCH_AMD64 */
1878 return u64;
1879}
1880#endif
1881
1882
1883/**
1884 * Atomically reads an unsigned 64-bit value, unordered.
1885 *
1886 * @returns Current *pu64 value
1887 * @param pu64 Pointer to the 64-bit variable to read.
1888 * The memory pointed to must be writable.
1889 *
1890 * @remarks This may fault if the memory is read-only!
1891 * @remarks x86: Requires a Pentium or later.
1892 */
1893#if !defined(RT_ARCH_AMD64) \
1894 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1895 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1896RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
1897#else
1898DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
1899{
1900 uint64_t u64;
1901# ifdef RT_ARCH_AMD64
1902 Assert(!((uintptr_t)pu64 & 7));
1903/*# if RT_INLINE_ASM_GNU_STYLE
1904 Assert(!((uintptr_t)pu64 & 7));
1905 __asm__ __volatile__("movq %1, %0\n\t"
1906 : "=r" (u64)
1907 : "m" (*pu64));
1908# else
1909 __asm
1910 {
1911 mov rdx, [pu64]
1912 mov rax, [rdx]
1913 mov [u64], rax
1914 }
1915# endif */
1916 u64 = *pu64;
1917# else /* !RT_ARCH_AMD64 */
1918# if RT_INLINE_ASM_GNU_STYLE
1919# if defined(PIC) || defined(__PIC__)
1920 uint32_t u32EBX = 0;
1921 uint32_t u32Spill;
1922 Assert(!((uintptr_t)pu64 & 7));
1923 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1924 "xor %%ecx,%%ecx\n\t"
1925 "xor %%edx,%%edx\n\t"
1926 "xchgl %%ebx, %3\n\t"
1927 "lock; cmpxchg8b (%4)\n\t"
1928 "movl %3, %%ebx\n\t"
1929 : "=A" (u64),
1930# if RT_GNUC_PREREQ(4, 3)
1931 "+m" (*pu64),
1932# else
1933 "=m" (*pu64),
1934# endif
1935 "=c" (u32Spill)
1936 : "m" (u32EBX),
1937 "S" (pu64));
1938# else /* !PIC */
1939 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1940 : "=A" (u64),
1941 "+m" (*pu64)
1942 : "0" (0ULL),
1943 "b" (0),
1944 "c" (0));
1945# endif
1946# else
1947 Assert(!((uintptr_t)pu64 & 7));
1948 __asm
1949 {
1950 xor eax, eax
1951 xor edx, edx
1952 mov edi, pu64
1953 xor ecx, ecx
1954 xor ebx, ebx
1955 lock cmpxchg8b [edi]
1956 mov dword ptr [u64], eax
1957 mov dword ptr [u64 + 4], edx
1958 }
1959# endif
1960# endif /* !RT_ARCH_AMD64 */
1961 return u64;
1962}
1963#endif
1964
1965
1966/**
1967 * Atomically reads a signed 64-bit value, ordered.
1968 *
1969 * @returns Current *pi64 value
1970 * @param pi64 Pointer to the 64-bit variable to read.
1971 * The memory pointed to must be writable.
1972 *
1973 * @remarks This may fault if the memory is read-only!
1974 * @remarks x86: Requires a Pentium or later.
1975 */
1976DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
1977{
1978 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
1979}
1980
1981
1982/**
1983 * Atomically reads a signed 64-bit value, unordered.
1984 *
1985 * @returns Current *pi64 value
1986 * @param pi64 Pointer to the 64-bit variable to read.
1987 * The memory pointed to must be writable.
1988 *
1989 * @remarks This will fault if the memory is read-only!
1990 * @remarks x86: Requires a Pentium or later.
1991 */
1992DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
1993{
1994 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
1995}
1996
1997
1998/**
1999 * Atomically reads a size_t value, ordered.
2000 *
2001 * @returns Current *pcb value
2002 * @param pcb Pointer to the size_t variable to read.
2003 */
2004DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2005{
2006#if ARCH_BITS == 64
2007 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
2008#elif ARCH_BITS == 32
2009 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
2010#elif ARCH_BITS == 16
2011 AssertCompileSize(size_t, 2);
2012 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
2013#else
2014# error "Unsupported ARCH_BITS value"
2015#endif
2016}
2017
2018
2019/**
2020 * Atomically reads a size_t value, unordered.
2021 *
2022 * @returns Current *pcb value
2023 * @param pcb Pointer to the size_t variable to read.
2024 */
2025DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2026{
2027#if ARCH_BITS == 64 || ARCH_BITS == 16
2028 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2029#elif ARCH_BITS == 32
2030 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2031#elif ARCH_BITS == 16
2032 AssertCompileSize(size_t, 2);
2033 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2034#else
2035# error "Unsupported ARCH_BITS value"
2036#endif
2037}
2038
2039
2040/**
2041 * Atomically reads a pointer value, ordered.
2042 *
2043 * @returns Current *pv value
2044 * @param ppv Pointer to the pointer variable to read.
2045 *
2046 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2047 * requires less typing (no casts).
2048 */
2049DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2050{
2051#if ARCH_BITS == 32 || ARCH_BITS == 16
2052 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2053#elif ARCH_BITS == 64
2054 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2055#else
2056# error "ARCH_BITS is bogus"
2057#endif
2058}
2059
2060/**
2061 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2062 *
2063 * @returns Current *pv value
2064 * @param ppv Pointer to the pointer variable to read.
2065 * @param Type The type of *ppv, sans volatile.
2066 */
2067#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2068# define ASMAtomicReadPtrT(ppv, Type) \
2069 __extension__ \
2070 ({\
2071 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2072 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2073 pvTypeChecked; \
2074 })
2075#else
2076# define ASMAtomicReadPtrT(ppv, Type) \
2077 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2078#endif
2079
2080
2081/**
2082 * Atomically reads a pointer value, unordered.
2083 *
2084 * @returns Current *pv value
2085 * @param ppv Pointer to the pointer variable to read.
2086 *
2087 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2088 * requires less typing (no casts).
2089 */
2090DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2091{
2092#if ARCH_BITS == 32 || ARCH_BITS == 16
2093 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2094#elif ARCH_BITS == 64
2095 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2096#else
2097# error "ARCH_BITS is bogus"
2098#endif
2099}
2100
2101
2102/**
2103 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2104 *
2105 * @returns Current *pv value
2106 * @param ppv Pointer to the pointer variable to read.
2107 * @param Type The type of *ppv, sans volatile.
2108 */
2109#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2110# define ASMAtomicUoReadPtrT(ppv, Type) \
2111 __extension__ \
2112 ({\
2113 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2114 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2115 pvTypeChecked; \
2116 })
2117#else
2118# define ASMAtomicUoReadPtrT(ppv, Type) \
2119 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2120#endif
2121
2122
2123/**
2124 * Atomically reads a boolean value, ordered.
2125 *
2126 * @returns Current *pf value
2127 * @param pf Pointer to the boolean variable to read.
2128 */
2129DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2130{
2131 ASMMemoryFence();
2132 return *pf; /* byte reads are atomic on x86 */
2133}
2134
2135
2136/**
2137 * Atomically reads a boolean value, unordered.
2138 *
2139 * @returns Current *pf value
2140 * @param pf Pointer to the boolean variable to read.
2141 */
2142DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2143{
2144 return *pf; /* byte reads are atomic on x86 */
2145}
2146
2147
2148/**
2149 * Atomically read a typical IPRT handle value, ordered.
2150 *
2151 * @param ph Pointer to the handle variable to read.
2152 * @param phRes Where to store the result.
2153 *
2154 * @remarks This doesn't currently work for all handles (like RTFILE).
2155 */
2156#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2157# define ASMAtomicReadHandle(ph, phRes) \
2158 do { \
2159 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2160 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2161 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2162 } while (0)
2163#elif HC_ARCH_BITS == 64
2164# define ASMAtomicReadHandle(ph, phRes) \
2165 do { \
2166 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2167 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2168 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2169 } while (0)
2170#else
2171# error HC_ARCH_BITS
2172#endif
2173
2174
2175/**
2176 * Atomically read a typical IPRT handle value, unordered.
2177 *
2178 * @param ph Pointer to the handle variable to read.
2179 * @param phRes Where to store the result.
2180 *
2181 * @remarks This doesn't currently work for all handles (like RTFILE).
2182 */
2183#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2184# define ASMAtomicUoReadHandle(ph, phRes) \
2185 do { \
2186 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2187 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2188 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2189 } while (0)
2190#elif HC_ARCH_BITS == 64
2191# define ASMAtomicUoReadHandle(ph, phRes) \
2192 do { \
2193 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2194 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2195 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2196 } while (0)
2197#else
2198# error HC_ARCH_BITS
2199#endif
2200
2201
2202/**
2203 * Atomically read a value which size might differ
2204 * between platforms or compilers, ordered.
2205 *
2206 * @param pu Pointer to the variable to read.
2207 * @param puRes Where to store the result.
2208 */
2209#define ASMAtomicReadSize(pu, puRes) \
2210 do { \
2211 switch (sizeof(*(pu))) { \
2212 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2213 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2214 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2215 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2216 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2217 } \
2218 } while (0)
2219
2220
2221/**
2222 * Atomically read a value which size might differ
2223 * between platforms or compilers, unordered.
2224 *
2225 * @param pu Pointer to the variable to read.
2226 * @param puRes Where to store the result.
2227 */
2228#define ASMAtomicUoReadSize(pu, puRes) \
2229 do { \
2230 switch (sizeof(*(pu))) { \
2231 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2232 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2233 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2234 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2235 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2236 } \
2237 } while (0)
2238
2239
2240/**
2241 * Atomically writes an unsigned 8-bit value, ordered.
2242 *
2243 * @param pu8 Pointer to the 8-bit variable.
2244 * @param u8 The 8-bit value to assign to *pu8.
2245 */
2246DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
2247{
2248 ASMAtomicXchgU8(pu8, u8);
2249}
2250
2251
2252/**
2253 * Atomically writes an unsigned 8-bit value, unordered.
2254 *
2255 * @param pu8 Pointer to the 8-bit variable.
2256 * @param u8 The 8-bit value to assign to *pu8.
2257 */
2258DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
2259{
2260 *pu8 = u8; /* byte writes are atomic on x86 */
2261}
2262
2263
2264/**
2265 * Atomically writes a signed 8-bit value, ordered.
2266 *
2267 * @param pi8 Pointer to the 8-bit variable to read.
2268 * @param i8 The 8-bit value to assign to *pi8.
2269 */
2270DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
2271{
2272 ASMAtomicXchgS8(pi8, i8);
2273}
2274
2275
2276/**
2277 * Atomically writes a signed 8-bit value, unordered.
2278 *
2279 * @param pi8 Pointer to the 8-bit variable to write.
2280 * @param i8 The 8-bit value to assign to *pi8.
2281 */
2282DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
2283{
2284 *pi8 = i8; /* byte writes are atomic on x86 */
2285}
2286
2287
2288/**
2289 * Atomically writes an unsigned 16-bit value, ordered.
2290 *
2291 * @param pu16 Pointer to the 16-bit variable to write.
2292 * @param u16 The 16-bit value to assign to *pu16.
2293 */
2294DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
2295{
2296 ASMAtomicXchgU16(pu16, u16);
2297}
2298
2299
2300/**
2301 * Atomically writes an unsigned 16-bit value, unordered.
2302 *
2303 * @param pu16 Pointer to the 16-bit variable to write.
2304 * @param u16 The 16-bit value to assign to *pu16.
2305 */
2306DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
2307{
2308 Assert(!((uintptr_t)pu16 & 1));
2309 *pu16 = u16;
2310}
2311
2312
2313/**
2314 * Atomically writes a signed 16-bit value, ordered.
2315 *
2316 * @param pi16 Pointer to the 16-bit variable to write.
2317 * @param i16 The 16-bit value to assign to *pi16.
2318 */
2319DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
2320{
2321 ASMAtomicXchgS16(pi16, i16);
2322}
2323
2324
2325/**
2326 * Atomically writes a signed 16-bit value, unordered.
2327 *
2328 * @param pi16 Pointer to the 16-bit variable to write.
2329 * @param i16 The 16-bit value to assign to *pi16.
2330 */
2331DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
2332{
2333 Assert(!((uintptr_t)pi16 & 1));
2334 *pi16 = i16;
2335}
2336
2337
2338/**
2339 * Atomically writes an unsigned 32-bit value, ordered.
2340 *
2341 * @param pu32 Pointer to the 32-bit variable to write.
2342 * @param u32 The 32-bit value to assign to *pu32.
2343 */
2344DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
2345{
2346 ASMAtomicXchgU32(pu32, u32);
2347}
2348
2349
2350/**
2351 * Atomically writes an unsigned 32-bit value, unordered.
2352 *
2353 * @param pu32 Pointer to the 32-bit variable to write.
2354 * @param u32 The 32-bit value to assign to *pu32.
2355 */
2356DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
2357{
2358 Assert(!((uintptr_t)pu32 & 3));
2359#if ARCH_BITS >= 32
2360 *pu32 = u32;
2361#else
2362 ASMAtomicXchgU32(pu32, u32);
2363#endif
2364}
2365
2366
2367/**
2368 * Atomically writes a signed 32-bit value, ordered.
2369 *
2370 * @param pi32 Pointer to the 32-bit variable to write.
2371 * @param i32 The 32-bit value to assign to *pi32.
2372 */
2373DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
2374{
2375 ASMAtomicXchgS32(pi32, i32);
2376}
2377
2378
2379/**
2380 * Atomically writes a signed 32-bit value, unordered.
2381 *
2382 * @param pi32 Pointer to the 32-bit variable to write.
2383 * @param i32 The 32-bit value to assign to *pi32.
2384 */
2385DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
2386{
2387 Assert(!((uintptr_t)pi32 & 3));
2388#if ARCH_BITS >= 32
2389 *pi32 = i32;
2390#else
2391 ASMAtomicXchgS32(pi32, i32);
2392#endif
2393}
2394
2395
2396/**
2397 * Atomically writes an unsigned 64-bit value, ordered.
2398 *
2399 * @param pu64 Pointer to the 64-bit variable to write.
2400 * @param u64 The 64-bit value to assign to *pu64.
2401 */
2402DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
2403{
2404 ASMAtomicXchgU64(pu64, u64);
2405}
2406
2407
2408/**
2409 * Atomically writes an unsigned 64-bit value, unordered.
2410 *
2411 * @param pu64 Pointer to the 64-bit variable to write.
2412 * @param u64 The 64-bit value to assign to *pu64.
2413 */
2414DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
2415{
2416 Assert(!((uintptr_t)pu64 & 7));
2417#if ARCH_BITS == 64
2418 *pu64 = u64;
2419#else
2420 ASMAtomicXchgU64(pu64, u64);
2421#endif
2422}
2423
2424
2425/**
2426 * Atomically writes a signed 64-bit value, ordered.
2427 *
2428 * @param pi64 Pointer to the 64-bit variable to write.
2429 * @param i64 The 64-bit value to assign to *pi64.
2430 */
2431DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
2432{
2433 ASMAtomicXchgS64(pi64, i64);
2434}
2435
2436
2437/**
2438 * Atomically writes a signed 64-bit value, unordered.
2439 *
2440 * @param pi64 Pointer to the 64-bit variable to write.
2441 * @param i64 The 64-bit value to assign to *pi64.
2442 */
2443DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
2444{
2445 Assert(!((uintptr_t)pi64 & 7));
2446#if ARCH_BITS == 64
2447 *pi64 = i64;
2448#else
2449 ASMAtomicXchgS64(pi64, i64);
2450#endif
2451}
2452
2453
2454/**
2455 * Atomically writes a size_t value, ordered.
2456 *
2457 * @returns nothing.
2458 * @param pcb Pointer to the size_t variable to write.
2459 * @param cb The value to assign to *pcb.
2460 */
2461DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
2462{
2463#if ARCH_BITS == 64
2464 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
2465#elif ARCH_BITS == 32
2466 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
2467#elif ARCH_BITS == 16
2468 AssertCompileSize(size_t, 2);
2469 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
2470#else
2471# error "Unsupported ARCH_BITS value"
2472#endif
2473}
2474
2475
2476/**
2477 * Atomically writes a boolean value, unordered.
2478 *
2479 * @param pf Pointer to the boolean variable to write.
2480 * @param f The boolean value to assign to *pf.
2481 */
2482DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
2483{
2484 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
2485}
2486
2487
2488/**
2489 * Atomically writes a boolean value, unordered.
2490 *
2491 * @param pf Pointer to the boolean variable to write.
2492 * @param f The boolean value to assign to *pf.
2493 */
2494DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
2495{
2496 *pf = f; /* byte writes are atomic on x86 */
2497}
2498
2499
2500/**
2501 * Atomically writes a pointer value, ordered.
2502 *
2503 * @param ppv Pointer to the pointer variable to write.
2504 * @param pv The pointer value to assign to *ppv.
2505 */
2506DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
2507{
2508#if ARCH_BITS == 32 || ARCH_BITS == 16
2509 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
2510#elif ARCH_BITS == 64
2511 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
2512#else
2513# error "ARCH_BITS is bogus"
2514#endif
2515}
2516
2517
2518/**
2519 * Atomically writes a pointer value, ordered.
2520 *
2521 * @param ppv Pointer to the pointer variable to write.
2522 * @param pv The pointer value to assign to *ppv. If NULL use
2523 * ASMAtomicWriteNullPtr or you'll land in trouble.
2524 *
2525 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2526 * NULL.
2527 */
2528#ifdef __GNUC__
2529# define ASMAtomicWritePtr(ppv, pv) \
2530 do \
2531 { \
2532 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
2533 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2534 \
2535 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2536 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2537 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2538 \
2539 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
2540 } while (0)
2541#else
2542# define ASMAtomicWritePtr(ppv, pv) \
2543 do \
2544 { \
2545 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2546 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2547 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2548 \
2549 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
2550 } while (0)
2551#endif
2552
2553
2554/**
2555 * Atomically sets a pointer to NULL, ordered.
2556 *
2557 * @param ppv Pointer to the pointer variable that should be set to NULL.
2558 *
2559 * @remarks This is relatively type safe on GCC platforms.
2560 */
2561#if RT_GNUC_PREREQ(4, 2)
2562# define ASMAtomicWriteNullPtr(ppv) \
2563 do \
2564 { \
2565 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2566 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2567 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2568 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
2569 } while (0)
2570#else
2571# define ASMAtomicWriteNullPtr(ppv) \
2572 do \
2573 { \
2574 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2575 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2576 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
2577 } while (0)
2578#endif
2579
2580
2581/**
2582 * Atomically writes a pointer value, unordered.
2583 *
2584 * @returns Current *pv value
2585 * @param ppv Pointer to the pointer variable.
2586 * @param pv The pointer value to assign to *ppv. If NULL use
2587 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2588 *
2589 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2590 * NULL.
2591 */
2592#if RT_GNUC_PREREQ(4, 2)
2593# define ASMAtomicUoWritePtr(ppv, pv) \
2594 do \
2595 { \
2596 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2597 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2598 \
2599 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2600 AssertCompile(sizeof(pv) == sizeof(void *)); \
2601 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2602 \
2603 *(ppvTypeChecked) = pvTypeChecked; \
2604 } while (0)
2605#else
2606# define ASMAtomicUoWritePtr(ppv, pv) \
2607 do \
2608 { \
2609 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2610 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2611 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2612 *(ppv) = pv; \
2613 } while (0)
2614#endif
2615
2616
2617/**
2618 * Atomically sets a pointer to NULL, unordered.
2619 *
2620 * @param ppv Pointer to the pointer variable that should be set to NULL.
2621 *
2622 * @remarks This is relatively type safe on GCC platforms.
2623 */
2624#ifdef __GNUC__
2625# define ASMAtomicUoWriteNullPtr(ppv) \
2626 do \
2627 { \
2628 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2629 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2630 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2631 *(ppvTypeChecked) = NULL; \
2632 } while (0)
2633#else
2634# define ASMAtomicUoWriteNullPtr(ppv) \
2635 do \
2636 { \
2637 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2638 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2639 *(ppv) = NULL; \
2640 } while (0)
2641#endif
2642
2643
2644/**
2645 * Atomically write a typical IPRT handle value, ordered.
2646 *
2647 * @param ph Pointer to the variable to update.
2648 * @param hNew The value to assign to *ph.
2649 *
2650 * @remarks This doesn't currently work for all handles (like RTFILE).
2651 */
2652#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2653# define ASMAtomicWriteHandle(ph, hNew) \
2654 do { \
2655 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2656 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
2657 } while (0)
2658#elif HC_ARCH_BITS == 64
2659# define ASMAtomicWriteHandle(ph, hNew) \
2660 do { \
2661 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2662 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
2663 } while (0)
2664#else
2665# error HC_ARCH_BITS
2666#endif
2667
2668
2669/**
2670 * Atomically write a typical IPRT handle value, unordered.
2671 *
2672 * @param ph Pointer to the variable to update.
2673 * @param hNew The value to assign to *ph.
2674 *
2675 * @remarks This doesn't currently work for all handles (like RTFILE).
2676 */
2677#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2678# define ASMAtomicUoWriteHandle(ph, hNew) \
2679 do { \
2680 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2681 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
2682 } while (0)
2683#elif HC_ARCH_BITS == 64
2684# define ASMAtomicUoWriteHandle(ph, hNew) \
2685 do { \
2686 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2687 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
2688 } while (0)
2689#else
2690# error HC_ARCH_BITS
2691#endif
2692
2693
2694/**
2695 * Atomically write a value which size might differ
2696 * between platforms or compilers, ordered.
2697 *
2698 * @param pu Pointer to the variable to update.
2699 * @param uNew The value to assign to *pu.
2700 */
2701#define ASMAtomicWriteSize(pu, uNew) \
2702 do { \
2703 switch (sizeof(*(pu))) { \
2704 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2705 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2706 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2707 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2708 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2709 } \
2710 } while (0)
2711
2712/**
2713 * Atomically write a value which size might differ
2714 * between platforms or compilers, unordered.
2715 *
2716 * @param pu Pointer to the variable to update.
2717 * @param uNew The value to assign to *pu.
2718 */
2719#define ASMAtomicUoWriteSize(pu, uNew) \
2720 do { \
2721 switch (sizeof(*(pu))) { \
2722 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2723 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2724 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2725 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2726 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2727 } \
2728 } while (0)
2729
2730
2731
2732/**
2733 * Atomically exchanges and adds to a 16-bit value, ordered.
2734 *
2735 * @returns The old value.
2736 * @param pu16 Pointer to the value.
2737 * @param u16 Number to add.
2738 *
2739 * @remarks Currently not implemented, just to make 16-bit code happy.
2740 * @remarks x86: Requires a 486 or later.
2741 */
2742RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
2743
2744
2745/**
2746 * Atomically exchanges and adds to a 32-bit value, ordered.
2747 *
2748 * @returns The old value.
2749 * @param pu32 Pointer to the value.
2750 * @param u32 Number to add.
2751 *
2752 * @remarks x86: Requires a 486 or later.
2753 */
2754#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2755RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
2756#else
2757DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
2758{
2759# if RT_INLINE_ASM_USES_INTRIN
2760 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
2761 return u32;
2762
2763# elif RT_INLINE_ASM_GNU_STYLE
2764 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2765 : "=r" (u32),
2766 "=m" (*pu32)
2767 : "0" (u32),
2768 "m" (*pu32)
2769 : "memory");
2770 return u32;
2771# else
2772 __asm
2773 {
2774 mov eax, [u32]
2775# ifdef RT_ARCH_AMD64
2776 mov rdx, [pu32]
2777 lock xadd [rdx], eax
2778# else
2779 mov edx, [pu32]
2780 lock xadd [edx], eax
2781# endif
2782 mov [u32], eax
2783 }
2784 return u32;
2785# endif
2786}
2787#endif
2788
2789
2790/**
2791 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2792 *
2793 * @returns The old value.
2794 * @param pi32 Pointer to the value.
2795 * @param i32 Number to add.
2796 *
2797 * @remarks x86: Requires a 486 or later.
2798 */
2799DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
2800{
2801 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
2802}
2803
2804
2805/**
2806 * Atomically exchanges and adds to a 64-bit value, ordered.
2807 *
2808 * @returns The old value.
2809 * @param pu64 Pointer to the value.
2810 * @param u64 Number to add.
2811 *
2812 * @remarks x86: Requires a Pentium or later.
2813 */
2814#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2815DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
2816#else
2817DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
2818{
2819# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2820 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
2821 return u64;
2822
2823# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2824 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2825 : "=r" (u64),
2826 "=m" (*pu64)
2827 : "0" (u64),
2828 "m" (*pu64)
2829 : "memory");
2830 return u64;
2831# else
2832 uint64_t u64Old;
2833 for (;;)
2834 {
2835 uint64_t u64New;
2836 u64Old = ASMAtomicUoReadU64(pu64);
2837 u64New = u64Old + u64;
2838 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2839 break;
2840 ASMNopPause();
2841 }
2842 return u64Old;
2843# endif
2844}
2845#endif
2846
2847
2848/**
2849 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2850 *
2851 * @returns The old value.
2852 * @param pi64 Pointer to the value.
2853 * @param i64 Number to add.
2854 *
2855 * @remarks x86: Requires a Pentium or later.
2856 */
2857DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
2858{
2859 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
2860}
2861
2862
2863/**
2864 * Atomically exchanges and adds to a size_t value, ordered.
2865 *
2866 * @returns The old value.
2867 * @param pcb Pointer to the size_t value.
2868 * @param cb Number to add.
2869 */
2870DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
2871{
2872#if ARCH_BITS == 64
2873 AssertCompileSize(size_t, 8);
2874 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
2875#elif ARCH_BITS == 32
2876 AssertCompileSize(size_t, 4);
2877 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
2878#elif ARCH_BITS == 16
2879 AssertCompileSize(size_t, 2);
2880 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
2881#else
2882# error "Unsupported ARCH_BITS value"
2883#endif
2884}
2885
2886
2887/**
2888 * Atomically exchanges and adds a value which size might differ between
2889 * platforms or compilers, ordered.
2890 *
2891 * @param pu Pointer to the variable to update.
2892 * @param uNew The value to add to *pu.
2893 * @param puOld Where to store the old value.
2894 */
2895#define ASMAtomicAddSize(pu, uNew, puOld) \
2896 do { \
2897 switch (sizeof(*(pu))) { \
2898 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2899 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2900 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2901 } \
2902 } while (0)
2903
2904
2905
2906/**
2907 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2908 *
2909 * @returns The old value.
2910 * @param pu16 Pointer to the value.
2911 * @param u16 Number to subtract.
2912 *
2913 * @remarks x86: Requires a 486 or later.
2914 */
2915DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
2916{
2917 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2918}
2919
2920
2921/**
2922 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2923 *
2924 * @returns The old value.
2925 * @param pi16 Pointer to the value.
2926 * @param i16 Number to subtract.
2927 *
2928 * @remarks x86: Requires a 486 or later.
2929 */
2930DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
2931{
2932 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
2933}
2934
2935
2936/**
2937 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2938 *
2939 * @returns The old value.
2940 * @param pu32 Pointer to the value.
2941 * @param u32 Number to subtract.
2942 *
2943 * @remarks x86: Requires a 486 or later.
2944 */
2945DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
2946{
2947 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2948}
2949
2950
2951/**
2952 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2953 *
2954 * @returns The old value.
2955 * @param pi32 Pointer to the value.
2956 * @param i32 Number to subtract.
2957 *
2958 * @remarks x86: Requires a 486 or later.
2959 */
2960DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
2961{
2962 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
2963}
2964
2965
2966/**
2967 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2968 *
2969 * @returns The old value.
2970 * @param pu64 Pointer to the value.
2971 * @param u64 Number to subtract.
2972 *
2973 * @remarks x86: Requires a Pentium or later.
2974 */
2975DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
2976{
2977 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2978}
2979
2980
2981/**
2982 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2983 *
2984 * @returns The old value.
2985 * @param pi64 Pointer to the value.
2986 * @param i64 Number to subtract.
2987 *
2988 * @remarks x86: Requires a Pentium or later.
2989 */
2990DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
2991{
2992 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
2993}
2994
2995
2996/**
2997 * Atomically exchanges and subtracts to a size_t value, ordered.
2998 *
2999 * @returns The old value.
3000 * @param pcb Pointer to the size_t value.
3001 * @param cb Number to subtract.
3002 *
3003 * @remarks x86: Requires a 486 or later.
3004 */
3005DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3006{
3007#if ARCH_BITS == 64
3008 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
3009#elif ARCH_BITS == 32
3010 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
3011#elif ARCH_BITS == 16
3012 AssertCompileSize(size_t, 2);
3013 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
3014#else
3015# error "Unsupported ARCH_BITS value"
3016#endif
3017}
3018
3019
3020/**
3021 * Atomically exchanges and subtracts a value which size might differ between
3022 * platforms or compilers, ordered.
3023 *
3024 * @param pu Pointer to the variable to update.
3025 * @param uNew The value to subtract to *pu.
3026 * @param puOld Where to store the old value.
3027 *
3028 * @remarks x86: Requires a 486 or later.
3029 */
3030#define ASMAtomicSubSize(pu, uNew, puOld) \
3031 do { \
3032 switch (sizeof(*(pu))) { \
3033 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3034 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3035 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3036 } \
3037 } while (0)
3038
3039
3040
3041/**
3042 * Atomically increment a 16-bit value, ordered.
3043 *
3044 * @returns The new value.
3045 * @param pu16 Pointer to the value to increment.
3046 * @remarks Not implemented. Just to make 16-bit code happy.
3047 *
3048 * @remarks x86: Requires a 486 or later.
3049 */
3050RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
3051
3052
3053/**
3054 * Atomically increment a 32-bit value, ordered.
3055 *
3056 * @returns The new value.
3057 * @param pu32 Pointer to the value to increment.
3058 *
3059 * @remarks x86: Requires a 486 or later.
3060 */
3061#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3062RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3063#else
3064DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
3065{
3066 uint32_t u32;
3067# if RT_INLINE_ASM_USES_INTRIN
3068 u32 = _InterlockedIncrement((long RT_FAR *)pu32);
3069 return u32;
3070
3071# elif RT_INLINE_ASM_GNU_STYLE
3072 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3073 : "=r" (u32),
3074 "=m" (*pu32)
3075 : "0" (1),
3076 "m" (*pu32)
3077 : "memory");
3078 return u32+1;
3079# else
3080 __asm
3081 {
3082 mov eax, 1
3083# ifdef RT_ARCH_AMD64
3084 mov rdx, [pu32]
3085 lock xadd [rdx], eax
3086# else
3087 mov edx, [pu32]
3088 lock xadd [edx], eax
3089# endif
3090 mov u32, eax
3091 }
3092 return u32+1;
3093# endif
3094}
3095#endif
3096
3097
3098/**
3099 * Atomically increment a signed 32-bit value, ordered.
3100 *
3101 * @returns The new value.
3102 * @param pi32 Pointer to the value to increment.
3103 *
3104 * @remarks x86: Requires a 486 or later.
3105 */
3106DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
3107{
3108 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3109}
3110
3111
3112/**
3113 * Atomically increment a 64-bit value, ordered.
3114 *
3115 * @returns The new value.
3116 * @param pu64 Pointer to the value to increment.
3117 *
3118 * @remarks x86: Requires a Pentium or later.
3119 */
3120#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3121DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
3122#else
3123DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
3124{
3125# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3126 uint64_t u64;
3127 u64 = _InterlockedIncrement64((__int64 RT_FAR *)pu64);
3128 return u64;
3129
3130# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3131 uint64_t u64;
3132 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3133 : "=r" (u64),
3134 "=m" (*pu64)
3135 : "0" (1),
3136 "m" (*pu64)
3137 : "memory");
3138 return u64 + 1;
3139# else
3140 return ASMAtomicAddU64(pu64, 1) + 1;
3141# endif
3142}
3143#endif
3144
3145
3146/**
3147 * Atomically increment a signed 64-bit value, ordered.
3148 *
3149 * @returns The new value.
3150 * @param pi64 Pointer to the value to increment.
3151 *
3152 * @remarks x86: Requires a Pentium or later.
3153 */
3154DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
3155{
3156 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3157}
3158
3159
3160/**
3161 * Atomically increment a size_t value, ordered.
3162 *
3163 * @returns The new value.
3164 * @param pcb Pointer to the value to increment.
3165 *
3166 * @remarks x86: Requires a 486 or later.
3167 */
3168DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3169{
3170#if ARCH_BITS == 64
3171 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3172#elif ARCH_BITS == 32
3173 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3174#elif ARCH_BITS == 16
3175 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3176#else
3177# error "Unsupported ARCH_BITS value"
3178#endif
3179}
3180
3181
3182
3183/**
3184 * Atomically decrement an unsigned 32-bit value, ordered.
3185 *
3186 * @returns The new value.
3187 * @param pu16 Pointer to the value to decrement.
3188 * @remarks Not implemented. Just to make 16-bit code happy.
3189 *
3190 * @remarks x86: Requires a 486 or later.
3191 */
3192RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
3193
3194
3195/**
3196 * Atomically decrement an unsigned 32-bit value, ordered.
3197 *
3198 * @returns The new value.
3199 * @param pu32 Pointer to the value to decrement.
3200 *
3201 * @remarks x86: Requires a 486 or later.
3202 */
3203#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3204RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3205#else
3206DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
3207{
3208 uint32_t u32;
3209# if RT_INLINE_ASM_USES_INTRIN
3210 u32 = _InterlockedDecrement((long RT_FAR *)pu32);
3211 return u32;
3212
3213# elif RT_INLINE_ASM_GNU_STYLE
3214 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3215 : "=r" (u32),
3216 "=m" (*pu32)
3217 : "0" (-1),
3218 "m" (*pu32)
3219 : "memory");
3220 return u32-1;
3221# else
3222 __asm
3223 {
3224 mov eax, -1
3225# ifdef RT_ARCH_AMD64
3226 mov rdx, [pu32]
3227 lock xadd [rdx], eax
3228# else
3229 mov edx, [pu32]
3230 lock xadd [edx], eax
3231# endif
3232 mov u32, eax
3233 }
3234 return u32-1;
3235# endif
3236}
3237#endif
3238
3239
3240/**
3241 * Atomically decrement a signed 32-bit value, ordered.
3242 *
3243 * @returns The new value.
3244 * @param pi32 Pointer to the value to decrement.
3245 *
3246 * @remarks x86: Requires a 486 or later.
3247 */
3248DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
3249{
3250 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
3251}
3252
3253
3254/**
3255 * Atomically decrement an unsigned 64-bit value, ordered.
3256 *
3257 * @returns The new value.
3258 * @param pu64 Pointer to the value to decrement.
3259 *
3260 * @remarks x86: Requires a Pentium or later.
3261 */
3262#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3263RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
3264#else
3265DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
3266{
3267# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3268 uint64_t u64 = _InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
3269 return u64;
3270
3271# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3272 uint64_t u64;
3273 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3274 : "=r" (u64),
3275 "=m" (*pu64)
3276 : "0" (~(uint64_t)0),
3277 "m" (*pu64)
3278 : "memory");
3279 return u64-1;
3280# else
3281 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3282# endif
3283}
3284#endif
3285
3286
3287/**
3288 * Atomically decrement a signed 64-bit value, ordered.
3289 *
3290 * @returns The new value.
3291 * @param pi64 Pointer to the value to decrement.
3292 *
3293 * @remarks x86: Requires a Pentium or later.
3294 */
3295DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
3296{
3297 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
3298}
3299
3300
3301/**
3302 * Atomically decrement a size_t value, ordered.
3303 *
3304 * @returns The new value.
3305 * @param pcb Pointer to the value to decrement.
3306 *
3307 * @remarks x86: Requires a 486 or later.
3308 */
3309DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
3310{
3311#if ARCH_BITS == 64
3312 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
3313#elif ARCH_BITS == 32
3314 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
3315#elif ARCH_BITS == 16
3316 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
3317#else
3318# error "Unsupported ARCH_BITS value"
3319#endif
3320}
3321
3322
3323/**
3324 * Atomically Or an unsigned 32-bit value, ordered.
3325 *
3326 * @param pu32 Pointer to the pointer variable to OR u32 with.
3327 * @param u32 The value to OR *pu32 with.
3328 *
3329 * @remarks x86: Requires a 386 or later.
3330 */
3331#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3332RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3333#else
3334DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3335{
3336# if RT_INLINE_ASM_USES_INTRIN
3337 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
3338
3339# elif RT_INLINE_ASM_GNU_STYLE
3340 __asm__ __volatile__("lock; orl %1, %0\n\t"
3341 : "=m" (*pu32)
3342 : "ir" (u32),
3343 "m" (*pu32));
3344# else
3345 __asm
3346 {
3347 mov eax, [u32]
3348# ifdef RT_ARCH_AMD64
3349 mov rdx, [pu32]
3350 lock or [rdx], eax
3351# else
3352 mov edx, [pu32]
3353 lock or [edx], eax
3354# endif
3355 }
3356# endif
3357}
3358#endif
3359
3360
3361/**
3362 * Atomically Or a signed 32-bit value, ordered.
3363 *
3364 * @param pi32 Pointer to the pointer variable to OR u32 with.
3365 * @param i32 The value to OR *pu32 with.
3366 *
3367 * @remarks x86: Requires a 386 or later.
3368 */
3369DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3370{
3371 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3372}
3373
3374
3375/**
3376 * Atomically Or an unsigned 64-bit value, ordered.
3377 *
3378 * @param pu64 Pointer to the pointer variable to OR u64 with.
3379 * @param u64 The value to OR *pu64 with.
3380 *
3381 * @remarks x86: Requires a Pentium or later.
3382 */
3383#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3384DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3385#else
3386DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3387{
3388# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3389 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
3390
3391# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3392 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3393 : "=m" (*pu64)
3394 : "r" (u64),
3395 "m" (*pu64));
3396# else
3397 for (;;)
3398 {
3399 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3400 uint64_t u64New = u64Old | u64;
3401 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3402 break;
3403 ASMNopPause();
3404 }
3405# endif
3406}
3407#endif
3408
3409
3410/**
3411 * Atomically Or a signed 64-bit value, ordered.
3412 *
3413 * @param pi64 Pointer to the pointer variable to OR u64 with.
3414 * @param i64 The value to OR *pu64 with.
3415 *
3416 * @remarks x86: Requires a Pentium or later.
3417 */
3418DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3419{
3420 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3421}
3422
3423
3424/**
3425 * Atomically And an unsigned 32-bit value, ordered.
3426 *
3427 * @param pu32 Pointer to the pointer variable to AND u32 with.
3428 * @param u32 The value to AND *pu32 with.
3429 *
3430 * @remarks x86: Requires a 386 or later.
3431 */
3432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3433RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3434#else
3435DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3436{
3437# if RT_INLINE_ASM_USES_INTRIN
3438 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
3439
3440# elif RT_INLINE_ASM_GNU_STYLE
3441 __asm__ __volatile__("lock; andl %1, %0\n\t"
3442 : "=m" (*pu32)
3443 : "ir" (u32),
3444 "m" (*pu32));
3445# else
3446 __asm
3447 {
3448 mov eax, [u32]
3449# ifdef RT_ARCH_AMD64
3450 mov rdx, [pu32]
3451 lock and [rdx], eax
3452# else
3453 mov edx, [pu32]
3454 lock and [edx], eax
3455# endif
3456 }
3457# endif
3458}
3459#endif
3460
3461
3462/**
3463 * Atomically And a signed 32-bit value, ordered.
3464 *
3465 * @param pi32 Pointer to the pointer variable to AND i32 with.
3466 * @param i32 The value to AND *pi32 with.
3467 *
3468 * @remarks x86: Requires a 386 or later.
3469 */
3470DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3471{
3472 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3473}
3474
3475
3476/**
3477 * Atomically And an unsigned 64-bit value, ordered.
3478 *
3479 * @param pu64 Pointer to the pointer variable to AND u64 with.
3480 * @param u64 The value to AND *pu64 with.
3481 *
3482 * @remarks x86: Requires a Pentium or later.
3483 */
3484#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3485DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3486#else
3487DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3488{
3489# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3490 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
3491
3492# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3493 __asm__ __volatile__("lock; andq %1, %0\n\t"
3494 : "=m" (*pu64)
3495 : "r" (u64),
3496 "m" (*pu64));
3497# else
3498 for (;;)
3499 {
3500 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3501 uint64_t u64New = u64Old & u64;
3502 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3503 break;
3504 ASMNopPause();
3505 }
3506# endif
3507}
3508#endif
3509
3510
3511/**
3512 * Atomically And a signed 64-bit value, ordered.
3513 *
3514 * @param pi64 Pointer to the pointer variable to AND i64 with.
3515 * @param i64 The value to AND *pi64 with.
3516 *
3517 * @remarks x86: Requires a Pentium or later.
3518 */
3519DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3520{
3521 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3522}
3523
3524
3525/**
3526 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3527 *
3528 * @param pu32 Pointer to the pointer variable to OR u32 with.
3529 * @param u32 The value to OR *pu32 with.
3530 *
3531 * @remarks x86: Requires a 386 or later.
3532 */
3533#if RT_INLINE_ASM_EXTERNAL
3534RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3535#else
3536DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3537{
3538# if RT_INLINE_ASM_GNU_STYLE
3539 __asm__ __volatile__("orl %1, %0\n\t"
3540 : "=m" (*pu32)
3541 : "ir" (u32),
3542 "m" (*pu32));
3543# else
3544 __asm
3545 {
3546 mov eax, [u32]
3547# ifdef RT_ARCH_AMD64
3548 mov rdx, [pu32]
3549 or [rdx], eax
3550# else
3551 mov edx, [pu32]
3552 or [edx], eax
3553# endif
3554 }
3555# endif
3556}
3557#endif
3558
3559
3560/**
3561 * Atomically OR a signed 32-bit value, unordered.
3562 *
3563 * @param pi32 Pointer to the pointer variable to OR u32 with.
3564 * @param i32 The value to OR *pu32 with.
3565 *
3566 * @remarks x86: Requires a 386 or later.
3567 */
3568DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3569{
3570 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3571}
3572
3573
3574/**
3575 * Atomically OR an unsigned 64-bit value, unordered.
3576 *
3577 * @param pu64 Pointer to the pointer variable to OR u64 with.
3578 * @param u64 The value to OR *pu64 with.
3579 *
3580 * @remarks x86: Requires a Pentium or later.
3581 */
3582#if RT_INLINE_ASM_EXTERNAL
3583DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3584#else
3585DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3586{
3587# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3588 __asm__ __volatile__("orq %1, %q0\n\t"
3589 : "=m" (*pu64)
3590 : "r" (u64),
3591 "m" (*pu64));
3592# else
3593 for (;;)
3594 {
3595 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3596 uint64_t u64New = u64Old | u64;
3597 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3598 break;
3599 ASMNopPause();
3600 }
3601# endif
3602}
3603#endif
3604
3605
3606/**
3607 * Atomically Or a signed 64-bit value, unordered.
3608 *
3609 * @param pi64 Pointer to the pointer variable to OR u64 with.
3610 * @param i64 The value to OR *pu64 with.
3611 *
3612 * @remarks x86: Requires a Pentium or later.
3613 */
3614DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3615{
3616 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3617}
3618
3619
3620/**
3621 * Atomically And an unsigned 32-bit value, unordered.
3622 *
3623 * @param pu32 Pointer to the pointer variable to AND u32 with.
3624 * @param u32 The value to AND *pu32 with.
3625 *
3626 * @remarks x86: Requires a 386 or later.
3627 */
3628#if RT_INLINE_ASM_EXTERNAL
3629RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3630#else
3631DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3632{
3633# if RT_INLINE_ASM_GNU_STYLE
3634 __asm__ __volatile__("andl %1, %0\n\t"
3635 : "=m" (*pu32)
3636 : "ir" (u32),
3637 "m" (*pu32));
3638# else
3639 __asm
3640 {
3641 mov eax, [u32]
3642# ifdef RT_ARCH_AMD64
3643 mov rdx, [pu32]
3644 and [rdx], eax
3645# else
3646 mov edx, [pu32]
3647 and [edx], eax
3648# endif
3649 }
3650# endif
3651}
3652#endif
3653
3654
3655/**
3656 * Atomically And a signed 32-bit value, unordered.
3657 *
3658 * @param pi32 Pointer to the pointer variable to AND i32 with.
3659 * @param i32 The value to AND *pi32 with.
3660 *
3661 * @remarks x86: Requires a 386 or later.
3662 */
3663DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3664{
3665 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3666}
3667
3668
3669/**
3670 * Atomically And an unsigned 64-bit value, unordered.
3671 *
3672 * @param pu64 Pointer to the pointer variable to AND u64 with.
3673 * @param u64 The value to AND *pu64 with.
3674 *
3675 * @remarks x86: Requires a Pentium or later.
3676 */
3677#if RT_INLINE_ASM_EXTERNAL
3678DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3679#else
3680DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3681{
3682# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3683 __asm__ __volatile__("andq %1, %0\n\t"
3684 : "=m" (*pu64)
3685 : "r" (u64),
3686 "m" (*pu64));
3687# else
3688 for (;;)
3689 {
3690 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3691 uint64_t u64New = u64Old & u64;
3692 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3693 break;
3694 ASMNopPause();
3695 }
3696# endif
3697}
3698#endif
3699
3700
3701/**
3702 * Atomically And a signed 64-bit value, unordered.
3703 *
3704 * @param pi64 Pointer to the pointer variable to AND i64 with.
3705 * @param i64 The value to AND *pi64 with.
3706 *
3707 * @remarks x86: Requires a Pentium or later.
3708 */
3709DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3710{
3711 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3712}
3713
3714
3715/**
3716 * Atomically increment an unsigned 32-bit value, unordered.
3717 *
3718 * @returns the new value.
3719 * @param pu32 Pointer to the variable to increment.
3720 *
3721 * @remarks x86: Requires a 486 or later.
3722 */
3723#if RT_INLINE_ASM_EXTERNAL
3724RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3725#else
3726DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
3727{
3728 uint32_t u32;
3729# if RT_INLINE_ASM_GNU_STYLE
3730 __asm__ __volatile__("xaddl %0, %1\n\t"
3731 : "=r" (u32),
3732 "=m" (*pu32)
3733 : "0" (1),
3734 "m" (*pu32)
3735 : "memory");
3736 return u32 + 1;
3737# else
3738 __asm
3739 {
3740 mov eax, 1
3741# ifdef RT_ARCH_AMD64
3742 mov rdx, [pu32]
3743 xadd [rdx], eax
3744# else
3745 mov edx, [pu32]
3746 xadd [edx], eax
3747# endif
3748 mov u32, eax
3749 }
3750 return u32 + 1;
3751# endif
3752}
3753#endif
3754
3755
3756/**
3757 * Atomically decrement an unsigned 32-bit value, unordered.
3758 *
3759 * @returns the new value.
3760 * @param pu32 Pointer to the variable to decrement.
3761 *
3762 * @remarks x86: Requires a 486 or later.
3763 */
3764#if RT_INLINE_ASM_EXTERNAL
3765RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3766#else
3767DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
3768{
3769 uint32_t u32;
3770# if RT_INLINE_ASM_GNU_STYLE
3771 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3772 : "=r" (u32),
3773 "=m" (*pu32)
3774 : "0" (-1),
3775 "m" (*pu32)
3776 : "memory");
3777 return u32 - 1;
3778# else
3779 __asm
3780 {
3781 mov eax, -1
3782# ifdef RT_ARCH_AMD64
3783 mov rdx, [pu32]
3784 xadd [rdx], eax
3785# else
3786 mov edx, [pu32]
3787 xadd [edx], eax
3788# endif
3789 mov u32, eax
3790 }
3791 return u32 - 1;
3792# endif
3793}
3794#endif
3795
3796
3797/** @def RT_ASM_PAGE_SIZE
3798 * We try avoid dragging in iprt/param.h here.
3799 * @internal
3800 */
3801#if defined(RT_ARCH_SPARC64)
3802# define RT_ASM_PAGE_SIZE 0x2000
3803# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3804# if PAGE_SIZE != 0x2000
3805# error "PAGE_SIZE is not 0x2000!"
3806# endif
3807# endif
3808#elif defined(RT_ARCH_ARM64)
3809# define RT_ASM_PAGE_SIZE 0x4000
3810# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
3811# if PAGE_SIZE != 0x4000
3812# error "PAGE_SIZE is not 0x4000!"
3813# endif
3814# endif
3815#else
3816# define RT_ASM_PAGE_SIZE 0x1000
3817# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3818# if PAGE_SIZE != 0x1000
3819# error "PAGE_SIZE is not 0x1000!"
3820# endif
3821# endif
3822#endif
3823
3824/**
3825 * Zeros a 4K memory page.
3826 *
3827 * @param pv Pointer to the memory block. This must be page aligned.
3828 */
3829#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3830RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
3831# else
3832DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
3833{
3834# if RT_INLINE_ASM_USES_INTRIN
3835# ifdef RT_ARCH_AMD64
3836 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3837# else
3838 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3839# endif
3840
3841# elif RT_INLINE_ASM_GNU_STYLE
3842 RTCCUINTREG uDummy;
3843# ifdef RT_ARCH_AMD64
3844 __asm__ __volatile__("rep stosq"
3845 : "=D" (pv),
3846 "=c" (uDummy)
3847 : "0" (pv),
3848 "c" (RT_ASM_PAGE_SIZE >> 3),
3849 "a" (0)
3850 : "memory");
3851# else
3852 __asm__ __volatile__("rep stosl"
3853 : "=D" (pv),
3854 "=c" (uDummy)
3855 : "0" (pv),
3856 "c" (RT_ASM_PAGE_SIZE >> 2),
3857 "a" (0)
3858 : "memory");
3859# endif
3860# else
3861 __asm
3862 {
3863# ifdef RT_ARCH_AMD64
3864 xor rax, rax
3865 mov ecx, 0200h
3866 mov rdi, [pv]
3867 rep stosq
3868# else
3869 xor eax, eax
3870 mov ecx, 0400h
3871 mov edi, [pv]
3872 rep stosd
3873# endif
3874 }
3875# endif
3876}
3877# endif
3878
3879
3880/**
3881 * Zeros a memory block with a 32-bit aligned size.
3882 *
3883 * @param pv Pointer to the memory block.
3884 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3885 */
3886#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3887RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
3888#else
3889DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
3890{
3891# if RT_INLINE_ASM_USES_INTRIN
3892# ifdef RT_ARCH_AMD64
3893 if (!(cb & 7))
3894 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
3895 else
3896# endif
3897 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
3898
3899# elif RT_INLINE_ASM_GNU_STYLE
3900 __asm__ __volatile__("rep stosl"
3901 : "=D" (pv),
3902 "=c" (cb)
3903 : "0" (pv),
3904 "1" (cb >> 2),
3905 "a" (0)
3906 : "memory");
3907# else
3908 __asm
3909 {
3910 xor eax, eax
3911# ifdef RT_ARCH_AMD64
3912 mov rcx, [cb]
3913 shr rcx, 2
3914 mov rdi, [pv]
3915# else
3916 mov ecx, [cb]
3917 shr ecx, 2
3918 mov edi, [pv]
3919# endif
3920 rep stosd
3921 }
3922# endif
3923}
3924#endif
3925
3926
3927/**
3928 * Fills a memory block with a 32-bit aligned size.
3929 *
3930 * @param pv Pointer to the memory block.
3931 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3932 * @param u32 The value to fill with.
3933 */
3934#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3935RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
3936#else
3937DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
3938{
3939# if RT_INLINE_ASM_USES_INTRIN
3940# ifdef RT_ARCH_AMD64
3941 if (!(cb & 7))
3942 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3943 else
3944# endif
3945 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
3946
3947# elif RT_INLINE_ASM_GNU_STYLE
3948 __asm__ __volatile__("rep stosl"
3949 : "=D" (pv),
3950 "=c" (cb)
3951 : "0" (pv),
3952 "1" (cb >> 2),
3953 "a" (u32)
3954 : "memory");
3955# else
3956 __asm
3957 {
3958# ifdef RT_ARCH_AMD64
3959 mov rcx, [cb]
3960 shr rcx, 2
3961 mov rdi, [pv]
3962# else
3963 mov ecx, [cb]
3964 shr ecx, 2
3965 mov edi, [pv]
3966# endif
3967 mov eax, [u32]
3968 rep stosd
3969 }
3970# endif
3971}
3972#endif
3973
3974
3975/**
3976 * Checks if a memory block is all zeros.
3977 *
3978 * @returns Pointer to the first non-zero byte.
3979 * @returns NULL if all zero.
3980 *
3981 * @param pv Pointer to the memory block.
3982 * @param cb Number of bytes in the block.
3983 *
3984 * @todo Fix name, it is a predicate function but it's not returning boolean!
3985 */
3986#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3987 && !defined(RT_ARCH_SPARC64) \
3988 && !defined(RT_ARCH_SPARC)
3989DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
3990#else
3991DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
3992{
3993 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
3994 for (; cb; cb--, pb++)
3995 if (RT_LIKELY(*pb == 0))
3996 { /* likely */ }
3997 else
3998 return (void RT_FAR *)pb;
3999 return NULL;
4000}
4001#endif
4002
4003
4004/**
4005 * Checks if a memory block is all zeros.
4006 *
4007 * @returns true if zero, false if not.
4008 *
4009 * @param pv Pointer to the memory block.
4010 * @param cb Number of bytes in the block.
4011 *
4012 * @sa ASMMemFirstNonZero
4013 */
4014DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
4015{
4016 return ASMMemFirstNonZero(pv, cb) == NULL;
4017}
4018
4019
4020/**
4021 * Checks if a memory page is all zeros.
4022 *
4023 * @returns true / false.
4024 *
4025 * @param pvPage Pointer to the page. Must be aligned on 16 byte
4026 * boundary
4027 */
4028DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
4029{
4030# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
4031 union { RTCCUINTREG r; bool f; } uAX;
4032 RTCCUINTREG xCX, xDI;
4033 Assert(!((uintptr_t)pvPage & 15));
4034 __asm__ __volatile__("repe; "
4035# ifdef RT_ARCH_AMD64
4036 "scasq\n\t"
4037# else
4038 "scasl\n\t"
4039# endif
4040 "setnc %%al\n\t"
4041 : "=&c" (xCX),
4042 "=&D" (xDI),
4043 "=&a" (uAX.r)
4044 : "mr" (pvPage),
4045# ifdef RT_ARCH_AMD64
4046 "0" (RT_ASM_PAGE_SIZE/8),
4047# else
4048 "0" (RT_ASM_PAGE_SIZE/4),
4049# endif
4050 "1" (pvPage),
4051 "2" (0));
4052 return uAX.f;
4053# else
4054 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
4055 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
4056 Assert(!((uintptr_t)pvPage & 15));
4057 for (;;)
4058 {
4059 if (puPtr[0]) return false;
4060 if (puPtr[4]) return false;
4061
4062 if (puPtr[2]) return false;
4063 if (puPtr[6]) return false;
4064
4065 if (puPtr[1]) return false;
4066 if (puPtr[5]) return false;
4067
4068 if (puPtr[3]) return false;
4069 if (puPtr[7]) return false;
4070
4071 if (!--cLeft)
4072 return true;
4073 puPtr += 8;
4074 }
4075# endif
4076}
4077
4078
4079/**
4080 * Checks if a memory block is filled with the specified byte, returning the
4081 * first mismatch.
4082 *
4083 * This is sort of an inverted memchr.
4084 *
4085 * @returns Pointer to the byte which doesn't equal u8.
4086 * @returns NULL if all equal to u8.
4087 *
4088 * @param pv Pointer to the memory block.
4089 * @param cb Number of bytes in the block.
4090 * @param u8 The value it's supposed to be filled with.
4091 *
4092 * @remarks No alignment requirements.
4093 */
4094#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
4095 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL)) \
4096 && !defined(RT_ARCH_SPARC64) \
4097 && !defined(RT_ARCH_SPARC)
4098DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
4099#else
4100DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
4101{
4102 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4103 for (; cb; cb--, pb++)
4104 if (RT_LIKELY(*pb == u8))
4105 { /* likely */ }
4106 else
4107 return (void *)pb;
4108 return NULL;
4109}
4110#endif
4111
4112
4113/**
4114 * Checks if a memory block is filled with the specified byte.
4115 *
4116 * @returns true if all matching, false if not.
4117 *
4118 * @param pv Pointer to the memory block.
4119 * @param cb Number of bytes in the block.
4120 * @param u8 The value it's supposed to be filled with.
4121 *
4122 * @remarks No alignment requirements.
4123 */
4124DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
4125{
4126 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4127}
4128
4129
4130/**
4131 * Checks if a memory block is filled with the specified 32-bit value.
4132 *
4133 * This is a sort of inverted memchr.
4134 *
4135 * @returns Pointer to the first value which doesn't equal u32.
4136 * @returns NULL if all equal to u32.
4137 *
4138 * @param pv Pointer to the memory block.
4139 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4140 * @param u32 The value it's supposed to be filled with.
4141 */
4142DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
4143{
4144/** @todo rewrite this in inline assembly? */
4145 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
4146 for (; cb; cb -= 4, pu32++)
4147 if (RT_LIKELY(*pu32 == u32))
4148 { /* likely */ }
4149 else
4150 return (uint32_t RT_FAR *)pu32;
4151 return NULL;
4152}
4153
4154
4155/**
4156 * Probes a byte pointer for read access.
4157 *
4158 * While the function will not fault if the byte is not read accessible,
4159 * the idea is to do this in a safe place like before acquiring locks
4160 * and such like.
4161 *
4162 * Also, this functions guarantees that an eager compiler is not going
4163 * to optimize the probing away.
4164 *
4165 * @param pvByte Pointer to the byte.
4166 */
4167#if RT_INLINE_ASM_EXTERNAL
4168RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
4169#else
4170DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
4171{
4172 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4173 uint8_t u8;
4174# if RT_INLINE_ASM_GNU_STYLE
4175 __asm__ __volatile__("movb (%1), %0\n\t"
4176 : "=r" (u8)
4177 : "r" (pvByte));
4178# else
4179 __asm
4180 {
4181# ifdef RT_ARCH_AMD64
4182 mov rax, [pvByte]
4183 mov al, [rax]
4184# else
4185 mov eax, [pvByte]
4186 mov al, [eax]
4187# endif
4188 mov [u8], al
4189 }
4190# endif
4191 return u8;
4192}
4193#endif
4194
4195/**
4196 * Probes a buffer for read access page by page.
4197 *
4198 * While the function will fault if the buffer is not fully read
4199 * accessible, the idea is to do this in a safe place like before
4200 * acquiring locks and such like.
4201 *
4202 * Also, this functions guarantees that an eager compiler is not going
4203 * to optimize the probing away.
4204 *
4205 * @param pvBuf Pointer to the buffer.
4206 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4207 */
4208DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
4209{
4210 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4211 /* the first byte */
4212 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
4213 ASMProbeReadByte(pu8);
4214
4215 /* the pages in between pages. */
4216 while (cbBuf > RT_ASM_PAGE_SIZE)
4217 {
4218 ASMProbeReadByte(pu8);
4219 cbBuf -= RT_ASM_PAGE_SIZE;
4220 pu8 += RT_ASM_PAGE_SIZE;
4221 }
4222
4223 /* the last byte */
4224 ASMProbeReadByte(pu8 + cbBuf - 1);
4225}
4226
4227
4228
4229/** @defgroup grp_inline_bits Bit Operations
4230 * @{
4231 */
4232
4233
4234/**
4235 * Sets a bit in a bitmap.
4236 *
4237 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4238 * @param iBit The bit to set.
4239 *
4240 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4241 * However, doing so will yield better performance as well as avoiding
4242 * traps accessing the last bits in the bitmap.
4243 */
4244#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4245RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4246#else
4247DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4248{
4249# if RT_INLINE_ASM_USES_INTRIN
4250 _bittestandset((long RT_FAR *)pvBitmap, iBit);
4251
4252# elif RT_INLINE_ASM_GNU_STYLE
4253 __asm__ __volatile__("btsl %1, %0"
4254 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4255 : "Ir" (iBit),
4256 "m" (*(volatile long RT_FAR *)pvBitmap)
4257 : "memory");
4258# else
4259 __asm
4260 {
4261# ifdef RT_ARCH_AMD64
4262 mov rax, [pvBitmap]
4263 mov edx, [iBit]
4264 bts [rax], edx
4265# else
4266 mov eax, [pvBitmap]
4267 mov edx, [iBit]
4268 bts [eax], edx
4269# endif
4270 }
4271# endif
4272}
4273#endif
4274
4275
4276/**
4277 * Atomically sets a bit in a bitmap, ordered.
4278 *
4279 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4280 * the memory access isn't atomic!
4281 * @param iBit The bit to set.
4282 *
4283 * @remarks x86: Requires a 386 or later.
4284 */
4285#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4286RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4287#else
4288DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4289{
4290 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4291# if RT_INLINE_ASM_USES_INTRIN
4292 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4293# elif RT_INLINE_ASM_GNU_STYLE
4294 __asm__ __volatile__("lock; btsl %1, %0"
4295 : "=m" (*(volatile long *)pvBitmap)
4296 : "Ir" (iBit),
4297 "m" (*(volatile long *)pvBitmap)
4298 : "memory");
4299# else
4300 __asm
4301 {
4302# ifdef RT_ARCH_AMD64
4303 mov rax, [pvBitmap]
4304 mov edx, [iBit]
4305 lock bts [rax], edx
4306# else
4307 mov eax, [pvBitmap]
4308 mov edx, [iBit]
4309 lock bts [eax], edx
4310# endif
4311 }
4312# endif
4313}
4314#endif
4315
4316
4317/**
4318 * Clears a bit in a bitmap.
4319 *
4320 * @param pvBitmap Pointer to the bitmap.
4321 * @param iBit The bit to clear.
4322 *
4323 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4324 * However, doing so will yield better performance as well as avoiding
4325 * traps accessing the last bits in the bitmap.
4326 */
4327#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4328RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4329#else
4330DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4331{
4332# if RT_INLINE_ASM_USES_INTRIN
4333 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4334
4335# elif RT_INLINE_ASM_GNU_STYLE
4336 __asm__ __volatile__("btrl %1, %0"
4337 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4338 : "Ir" (iBit),
4339 "m" (*(volatile long RT_FAR *)pvBitmap)
4340 : "memory");
4341# else
4342 __asm
4343 {
4344# ifdef RT_ARCH_AMD64
4345 mov rax, [pvBitmap]
4346 mov edx, [iBit]
4347 btr [rax], edx
4348# else
4349 mov eax, [pvBitmap]
4350 mov edx, [iBit]
4351 btr [eax], edx
4352# endif
4353 }
4354# endif
4355}
4356#endif
4357
4358
4359/**
4360 * Atomically clears a bit in a bitmap, ordered.
4361 *
4362 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4363 * the memory access isn't atomic!
4364 * @param iBit The bit to toggle set.
4365 *
4366 * @remarks No memory barrier, take care on smp.
4367 * @remarks x86: Requires a 386 or later.
4368 */
4369#if RT_INLINE_ASM_EXTERNAL
4370RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4371#else
4372DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4373{
4374 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4375# if RT_INLINE_ASM_GNU_STYLE
4376 __asm__ __volatile__("lock; btrl %1, %0"
4377 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4378 : "Ir" (iBit),
4379 "m" (*(volatile long RT_FAR *)pvBitmap)
4380 : "memory");
4381# else
4382 __asm
4383 {
4384# ifdef RT_ARCH_AMD64
4385 mov rax, [pvBitmap]
4386 mov edx, [iBit]
4387 lock btr [rax], edx
4388# else
4389 mov eax, [pvBitmap]
4390 mov edx, [iBit]
4391 lock btr [eax], edx
4392# endif
4393 }
4394# endif
4395}
4396#endif
4397
4398
4399/**
4400 * Toggles a bit in a bitmap.
4401 *
4402 * @param pvBitmap Pointer to the bitmap.
4403 * @param iBit The bit to toggle.
4404 *
4405 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4406 * However, doing so will yield better performance as well as avoiding
4407 * traps accessing the last bits in the bitmap.
4408 */
4409#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4410RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4411#else
4412DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4413{
4414# if RT_INLINE_ASM_USES_INTRIN
4415 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4416# elif RT_INLINE_ASM_GNU_STYLE
4417 __asm__ __volatile__("btcl %1, %0"
4418 : "=m" (*(volatile long *)pvBitmap)
4419 : "Ir" (iBit),
4420 "m" (*(volatile long *)pvBitmap)
4421 : "memory");
4422# else
4423 __asm
4424 {
4425# ifdef RT_ARCH_AMD64
4426 mov rax, [pvBitmap]
4427 mov edx, [iBit]
4428 btc [rax], edx
4429# else
4430 mov eax, [pvBitmap]
4431 mov edx, [iBit]
4432 btc [eax], edx
4433# endif
4434 }
4435# endif
4436}
4437#endif
4438
4439
4440/**
4441 * Atomically toggles a bit in a bitmap, ordered.
4442 *
4443 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4444 * the memory access isn't atomic!
4445 * @param iBit The bit to test and set.
4446 *
4447 * @remarks x86: Requires a 386 or later.
4448 */
4449#if RT_INLINE_ASM_EXTERNAL
4450RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4451#else
4452DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4453{
4454 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4455# if RT_INLINE_ASM_GNU_STYLE
4456 __asm__ __volatile__("lock; btcl %1, %0"
4457 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4458 : "Ir" (iBit),
4459 "m" (*(volatile long RT_FAR *)pvBitmap)
4460 : "memory");
4461# else
4462 __asm
4463 {
4464# ifdef RT_ARCH_AMD64
4465 mov rax, [pvBitmap]
4466 mov edx, [iBit]
4467 lock btc [rax], edx
4468# else
4469 mov eax, [pvBitmap]
4470 mov edx, [iBit]
4471 lock btc [eax], edx
4472# endif
4473 }
4474# endif
4475}
4476#endif
4477
4478
4479/**
4480 * Tests and sets a bit in a bitmap.
4481 *
4482 * @returns true if the bit was set.
4483 * @returns false if the bit was clear.
4484 *
4485 * @param pvBitmap Pointer to the bitmap.
4486 * @param iBit The bit to test and set.
4487 *
4488 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4489 * However, doing so will yield better performance as well as avoiding
4490 * traps accessing the last bits in the bitmap.
4491 */
4492#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4493RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4494#else
4495DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4496{
4497 union { bool f; uint32_t u32; uint8_t u8; } rc;
4498# if RT_INLINE_ASM_USES_INTRIN
4499 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
4500
4501# elif RT_INLINE_ASM_GNU_STYLE
4502 __asm__ __volatile__("btsl %2, %1\n\t"
4503 "setc %b0\n\t"
4504 "andl $1, %0\n\t"
4505 : "=q" (rc.u32),
4506 "=m" (*(volatile long RT_FAR *)pvBitmap)
4507 : "Ir" (iBit),
4508 "m" (*(volatile long RT_FAR *)pvBitmap)
4509 : "memory");
4510# else
4511 __asm
4512 {
4513 mov edx, [iBit]
4514# ifdef RT_ARCH_AMD64
4515 mov rax, [pvBitmap]
4516 bts [rax], edx
4517# else
4518 mov eax, [pvBitmap]
4519 bts [eax], edx
4520# endif
4521 setc al
4522 and eax, 1
4523 mov [rc.u32], eax
4524 }
4525# endif
4526 return rc.f;
4527}
4528#endif
4529
4530
4531/**
4532 * Atomically tests and sets a bit in a bitmap, ordered.
4533 *
4534 * @returns true if the bit was set.
4535 * @returns false if the bit was clear.
4536 *
4537 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4538 * the memory access isn't atomic!
4539 * @param iBit The bit to set.
4540 *
4541 * @remarks x86: Requires a 386 or later.
4542 */
4543#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4544RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4545#else
4546DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4547{
4548 union { bool f; uint32_t u32; uint8_t u8; } rc;
4549 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4550# if RT_INLINE_ASM_USES_INTRIN
4551 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4552# elif RT_INLINE_ASM_GNU_STYLE
4553 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4554 "setc %b0\n\t"
4555 "andl $1, %0\n\t"
4556 : "=q" (rc.u32),
4557 "=m" (*(volatile long RT_FAR *)pvBitmap)
4558 : "Ir" (iBit),
4559 "m" (*(volatile long RT_FAR *)pvBitmap)
4560 : "memory");
4561# else
4562 __asm
4563 {
4564 mov edx, [iBit]
4565# ifdef RT_ARCH_AMD64
4566 mov rax, [pvBitmap]
4567 lock bts [rax], edx
4568# else
4569 mov eax, [pvBitmap]
4570 lock bts [eax], edx
4571# endif
4572 setc al
4573 and eax, 1
4574 mov [rc.u32], eax
4575 }
4576# endif
4577 return rc.f;
4578}
4579#endif
4580
4581
4582/**
4583 * Tests and clears a bit in a bitmap.
4584 *
4585 * @returns true if the bit was set.
4586 * @returns false if the bit was clear.
4587 *
4588 * @param pvBitmap Pointer to the bitmap.
4589 * @param iBit The bit to test and clear.
4590 *
4591 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4592 * However, doing so will yield better performance as well as avoiding
4593 * traps accessing the last bits in the bitmap.
4594 */
4595#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4596RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4597#else
4598DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4599{
4600 union { bool f; uint32_t u32; uint8_t u8; } rc;
4601# if RT_INLINE_ASM_USES_INTRIN
4602 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4603
4604# elif RT_INLINE_ASM_GNU_STYLE
4605 __asm__ __volatile__("btrl %2, %1\n\t"
4606 "setc %b0\n\t"
4607 "andl $1, %0\n\t"
4608 : "=q" (rc.u32),
4609 "=m" (*(volatile long RT_FAR *)pvBitmap)
4610 : "Ir" (iBit),
4611 "m" (*(volatile long RT_FAR *)pvBitmap)
4612 : "memory");
4613# else
4614 __asm
4615 {
4616 mov edx, [iBit]
4617# ifdef RT_ARCH_AMD64
4618 mov rax, [pvBitmap]
4619 btr [rax], edx
4620# else
4621 mov eax, [pvBitmap]
4622 btr [eax], edx
4623# endif
4624 setc al
4625 and eax, 1
4626 mov [rc.u32], eax
4627 }
4628# endif
4629 return rc.f;
4630}
4631#endif
4632
4633
4634/**
4635 * Atomically tests and clears a bit in a bitmap, ordered.
4636 *
4637 * @returns true if the bit was set.
4638 * @returns false if the bit was clear.
4639 *
4640 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4641 * the memory access isn't atomic!
4642 * @param iBit The bit to test and clear.
4643 *
4644 * @remarks No memory barrier, take care on smp.
4645 * @remarks x86: Requires a 386 or later.
4646 */
4647#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4648RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4649#else
4650DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4651{
4652 union { bool f; uint32_t u32; uint8_t u8; } rc;
4653 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4654# if RT_INLINE_ASM_USES_INTRIN
4655 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
4656
4657# elif RT_INLINE_ASM_GNU_STYLE
4658 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4659 "setc %b0\n\t"
4660 "andl $1, %0\n\t"
4661 : "=q" (rc.u32),
4662 "=m" (*(volatile long RT_FAR *)pvBitmap)
4663 : "Ir" (iBit),
4664 "m" (*(volatile long RT_FAR *)pvBitmap)
4665 : "memory");
4666# else
4667 __asm
4668 {
4669 mov edx, [iBit]
4670# ifdef RT_ARCH_AMD64
4671 mov rax, [pvBitmap]
4672 lock btr [rax], edx
4673# else
4674 mov eax, [pvBitmap]
4675 lock btr [eax], edx
4676# endif
4677 setc al
4678 and eax, 1
4679 mov [rc.u32], eax
4680 }
4681# endif
4682 return rc.f;
4683}
4684#endif
4685
4686
4687/**
4688 * Tests and toggles a bit in a bitmap.
4689 *
4690 * @returns true if the bit was set.
4691 * @returns false if the bit was clear.
4692 *
4693 * @param pvBitmap Pointer to the bitmap.
4694 * @param iBit The bit to test and toggle.
4695 *
4696 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4697 * However, doing so will yield better performance as well as avoiding
4698 * traps accessing the last bits in the bitmap.
4699 */
4700#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4701RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4702#else
4703DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4704{
4705 union { bool f; uint32_t u32; uint8_t u8; } rc;
4706# if RT_INLINE_ASM_USES_INTRIN
4707 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4708
4709# elif RT_INLINE_ASM_GNU_STYLE
4710 __asm__ __volatile__("btcl %2, %1\n\t"
4711 "setc %b0\n\t"
4712 "andl $1, %0\n\t"
4713 : "=q" (rc.u32),
4714 "=m" (*(volatile long RT_FAR *)pvBitmap)
4715 : "Ir" (iBit),
4716 "m" (*(volatile long RT_FAR *)pvBitmap)
4717 : "memory");
4718# else
4719 __asm
4720 {
4721 mov edx, [iBit]
4722# ifdef RT_ARCH_AMD64
4723 mov rax, [pvBitmap]
4724 btc [rax], edx
4725# else
4726 mov eax, [pvBitmap]
4727 btc [eax], edx
4728# endif
4729 setc al
4730 and eax, 1
4731 mov [rc.u32], eax
4732 }
4733# endif
4734 return rc.f;
4735}
4736#endif
4737
4738
4739/**
4740 * Atomically tests and toggles a bit in a bitmap, ordered.
4741 *
4742 * @returns true if the bit was set.
4743 * @returns false if the bit was clear.
4744 *
4745 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4746 * the memory access isn't atomic!
4747 * @param iBit The bit to test and toggle.
4748 *
4749 * @remarks x86: Requires a 386 or later.
4750 */
4751#if RT_INLINE_ASM_EXTERNAL
4752RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4753#else
4754DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4755{
4756 union { bool f; uint32_t u32; uint8_t u8; } rc;
4757 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4758# if RT_INLINE_ASM_GNU_STYLE
4759 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4760 "setc %b0\n\t"
4761 "andl $1, %0\n\t"
4762 : "=q" (rc.u32),
4763 "=m" (*(volatile long RT_FAR *)pvBitmap)
4764 : "Ir" (iBit),
4765 "m" (*(volatile long RT_FAR *)pvBitmap)
4766 : "memory");
4767# else
4768 __asm
4769 {
4770 mov edx, [iBit]
4771# ifdef RT_ARCH_AMD64
4772 mov rax, [pvBitmap]
4773 lock btc [rax], edx
4774# else
4775 mov eax, [pvBitmap]
4776 lock btc [eax], edx
4777# endif
4778 setc al
4779 and eax, 1
4780 mov [rc.u32], eax
4781 }
4782# endif
4783 return rc.f;
4784}
4785#endif
4786
4787
4788/**
4789 * Tests if a bit in a bitmap is set.
4790 *
4791 * @returns true if the bit is set.
4792 * @returns false if the bit is clear.
4793 *
4794 * @param pvBitmap Pointer to the bitmap.
4795 * @param iBit The bit to test.
4796 *
4797 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4798 * However, doing so will yield better performance as well as avoiding
4799 * traps accessing the last bits in the bitmap.
4800 */
4801#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4802RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
4803#else
4804DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
4805{
4806 union { bool f; uint32_t u32; uint8_t u8; } rc;
4807# if RT_INLINE_ASM_USES_INTRIN
4808 rc.u32 = _bittest((long *)pvBitmap, iBit);
4809# elif RT_INLINE_ASM_GNU_STYLE
4810
4811 __asm__ __volatile__("btl %2, %1\n\t"
4812 "setc %b0\n\t"
4813 "andl $1, %0\n\t"
4814 : "=q" (rc.u32)
4815 : "m" (*(const volatile long RT_FAR *)pvBitmap),
4816 "Ir" (iBit)
4817 : "memory");
4818# else
4819 __asm
4820 {
4821 mov edx, [iBit]
4822# ifdef RT_ARCH_AMD64
4823 mov rax, [pvBitmap]
4824 bt [rax], edx
4825# else
4826 mov eax, [pvBitmap]
4827 bt [eax], edx
4828# endif
4829 setc al
4830 and eax, 1
4831 mov [rc.u32], eax
4832 }
4833# endif
4834 return rc.f;
4835}
4836#endif
4837
4838
4839/**
4840 * Clears a bit range within a bitmap.
4841 *
4842 * @param pvBitmap Pointer to the bitmap.
4843 * @param iBitStart The First bit to clear.
4844 * @param iBitEnd The first bit not to clear.
4845 */
4846DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
4847{
4848 if (iBitStart < iBitEnd)
4849 {
4850 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4851 int32_t iStart = iBitStart & ~31;
4852 int32_t iEnd = iBitEnd & ~31;
4853 if (iStart == iEnd)
4854 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4855 else
4856 {
4857 /* bits in first dword. */
4858 if (iBitStart & 31)
4859 {
4860 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4861 pu32++;
4862 iBitStart = iStart + 32;
4863 }
4864
4865 /* whole dwords. */
4866 if (iBitStart != iEnd)
4867 ASMMemZero32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3);
4868
4869 /* bits in last dword. */
4870 if (iBitEnd & 31)
4871 {
4872 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4873 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4874 }
4875 }
4876 }
4877}
4878
4879
4880/**
4881 * Sets a bit range within a bitmap.
4882 *
4883 * @param pvBitmap Pointer to the bitmap.
4884 * @param iBitStart The First bit to set.
4885 * @param iBitEnd The first bit not to set.
4886 */
4887DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
4888{
4889 if (iBitStart < iBitEnd)
4890 {
4891 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4892 int32_t iStart = iBitStart & ~31;
4893 int32_t iEnd = iBitEnd & ~31;
4894 if (iStart == iEnd)
4895 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4896 else
4897 {
4898 /* bits in first dword. */
4899 if (iBitStart & 31)
4900 {
4901 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4902 pu32++;
4903 iBitStart = iStart + 32;
4904 }
4905
4906 /* whole dword. */
4907 if (iBitStart != iEnd)
4908 ASMMemFill32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3, ~UINT32_C(0));
4909
4910 /* bits in last dword. */
4911 if (iBitEnd & 31)
4912 {
4913 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
4914 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4915 }
4916 }
4917 }
4918}
4919
4920
4921/**
4922 * Finds the first clear bit in a bitmap.
4923 *
4924 * @returns Index of the first zero bit.
4925 * @returns -1 if no clear bit was found.
4926 * @param pvBitmap Pointer to the bitmap.
4927 * @param cBits The number of bits in the bitmap. Multiple of 32.
4928 */
4929#if RT_INLINE_ASM_EXTERNAL
4930DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
4931#else
4932DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
4933{
4934 if (cBits)
4935 {
4936 int32_t iBit;
4937# if RT_INLINE_ASM_GNU_STYLE
4938 RTCCUINTREG uEAX, uECX, uEDI;
4939 cBits = RT_ALIGN_32(cBits, 32);
4940 __asm__ __volatile__("repe; scasl\n\t"
4941 "je 1f\n\t"
4942# ifdef RT_ARCH_AMD64
4943 "lea -4(%%rdi), %%rdi\n\t"
4944 "xorl (%%rdi), %%eax\n\t"
4945 "subq %5, %%rdi\n\t"
4946# else
4947 "lea -4(%%edi), %%edi\n\t"
4948 "xorl (%%edi), %%eax\n\t"
4949 "subl %5, %%edi\n\t"
4950# endif
4951 "shll $3, %%edi\n\t"
4952 "bsfl %%eax, %%edx\n\t"
4953 "addl %%edi, %%edx\n\t"
4954 "1:\t\n"
4955 : "=d" (iBit),
4956 "=&c" (uECX),
4957 "=&D" (uEDI),
4958 "=&a" (uEAX)
4959 : "0" (0xffffffff),
4960 "mr" (pvBitmap),
4961 "1" (cBits >> 5),
4962 "2" (pvBitmap),
4963 "3" (0xffffffff));
4964# else
4965 cBits = RT_ALIGN_32(cBits, 32);
4966 __asm
4967 {
4968# ifdef RT_ARCH_AMD64
4969 mov rdi, [pvBitmap]
4970 mov rbx, rdi
4971# else
4972 mov edi, [pvBitmap]
4973 mov ebx, edi
4974# endif
4975 mov edx, 0ffffffffh
4976 mov eax, edx
4977 mov ecx, [cBits]
4978 shr ecx, 5
4979 repe scasd
4980 je done
4981
4982# ifdef RT_ARCH_AMD64
4983 lea rdi, [rdi - 4]
4984 xor eax, [rdi]
4985 sub rdi, rbx
4986# else
4987 lea edi, [edi - 4]
4988 xor eax, [edi]
4989 sub edi, ebx
4990# endif
4991 shl edi, 3
4992 bsf edx, eax
4993 add edx, edi
4994 done:
4995 mov [iBit], edx
4996 }
4997# endif
4998 return iBit;
4999 }
5000 return -1;
5001}
5002#endif
5003
5004
5005/**
5006 * Finds the next clear bit in a bitmap.
5007 *
5008 * @returns Index of the first zero bit.
5009 * @returns -1 if no clear bit was found.
5010 * @param pvBitmap Pointer to the bitmap.
5011 * @param cBits The number of bits in the bitmap. Multiple of 32.
5012 * @param iBitPrev The bit returned from the last search.
5013 * The search will start at iBitPrev + 1.
5014 */
5015#if RT_INLINE_ASM_EXTERNAL
5016DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
5017#else
5018DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
5019{
5020 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5021 int iBit = ++iBitPrev & 31;
5022 if (iBit)
5023 {
5024 /*
5025 * Inspect the 32-bit word containing the unaligned bit.
5026 */
5027 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
5028
5029# if RT_INLINE_ASM_USES_INTRIN
5030 unsigned long ulBit = 0;
5031 if (_BitScanForward(&ulBit, u32))
5032 return ulBit + iBitPrev;
5033# else
5034# if RT_INLINE_ASM_GNU_STYLE
5035 __asm__ __volatile__("bsf %1, %0\n\t"
5036 "jnz 1f\n\t"
5037 "movl $-1, %0\n\t"
5038 "1:\n\t"
5039 : "=r" (iBit)
5040 : "r" (u32));
5041# else
5042 __asm
5043 {
5044 mov edx, [u32]
5045 bsf eax, edx
5046 jnz done
5047 mov eax, 0ffffffffh
5048 done:
5049 mov [iBit], eax
5050 }
5051# endif
5052 if (iBit >= 0)
5053 return iBit + (int)iBitPrev;
5054# endif
5055
5056 /*
5057 * Skip ahead and see if there is anything left to search.
5058 */
5059 iBitPrev |= 31;
5060 iBitPrev++;
5061 if (cBits <= (uint32_t)iBitPrev)
5062 return -1;
5063 }
5064
5065 /*
5066 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5067 */
5068 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5069 if (iBit >= 0)
5070 iBit += iBitPrev;
5071 return iBit;
5072}
5073#endif
5074
5075
5076/**
5077 * Finds the first set bit in a bitmap.
5078 *
5079 * @returns Index of the first set bit.
5080 * @returns -1 if no clear bit was found.
5081 * @param pvBitmap Pointer to the bitmap.
5082 * @param cBits The number of bits in the bitmap. Multiple of 32.
5083 */
5084#if RT_INLINE_ASM_EXTERNAL
5085DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
5086#else
5087DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
5088{
5089 if (cBits)
5090 {
5091 int32_t iBit;
5092# if RT_INLINE_ASM_GNU_STYLE
5093 RTCCUINTREG uEAX, uECX, uEDI;
5094 cBits = RT_ALIGN_32(cBits, 32);
5095 __asm__ __volatile__("repe; scasl\n\t"
5096 "je 1f\n\t"
5097# ifdef RT_ARCH_AMD64
5098 "lea -4(%%rdi), %%rdi\n\t"
5099 "movl (%%rdi), %%eax\n\t"
5100 "subq %5, %%rdi\n\t"
5101# else
5102 "lea -4(%%edi), %%edi\n\t"
5103 "movl (%%edi), %%eax\n\t"
5104 "subl %5, %%edi\n\t"
5105# endif
5106 "shll $3, %%edi\n\t"
5107 "bsfl %%eax, %%edx\n\t"
5108 "addl %%edi, %%edx\n\t"
5109 "1:\t\n"
5110 : "=d" (iBit),
5111 "=&c" (uECX),
5112 "=&D" (uEDI),
5113 "=&a" (uEAX)
5114 : "0" (0xffffffff),
5115 "mr" (pvBitmap),
5116 "1" (cBits >> 5),
5117 "2" (pvBitmap),
5118 "3" (0));
5119# else
5120 cBits = RT_ALIGN_32(cBits, 32);
5121 __asm
5122 {
5123# ifdef RT_ARCH_AMD64
5124 mov rdi, [pvBitmap]
5125 mov rbx, rdi
5126# else
5127 mov edi, [pvBitmap]
5128 mov ebx, edi
5129# endif
5130 mov edx, 0ffffffffh
5131 xor eax, eax
5132 mov ecx, [cBits]
5133 shr ecx, 5
5134 repe scasd
5135 je done
5136# ifdef RT_ARCH_AMD64
5137 lea rdi, [rdi - 4]
5138 mov eax, [rdi]
5139 sub rdi, rbx
5140# else
5141 lea edi, [edi - 4]
5142 mov eax, [edi]
5143 sub edi, ebx
5144# endif
5145 shl edi, 3
5146 bsf edx, eax
5147 add edx, edi
5148 done:
5149 mov [iBit], edx
5150 }
5151# endif
5152 return iBit;
5153 }
5154 return -1;
5155}
5156#endif
5157
5158
5159/**
5160 * Finds the next set bit in a bitmap.
5161 *
5162 * @returns Index of the next set bit.
5163 * @returns -1 if no set bit was found.
5164 * @param pvBitmap Pointer to the bitmap.
5165 * @param cBits The number of bits in the bitmap. Multiple of 32.
5166 * @param iBitPrev The bit returned from the last search.
5167 * The search will start at iBitPrev + 1.
5168 */
5169#if RT_INLINE_ASM_EXTERNAL
5170DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
5171#else
5172DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
5173{
5174 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5175 int iBit = ++iBitPrev & 31;
5176 if (iBit)
5177 {
5178 /*
5179 * Inspect the 32-bit word containing the unaligned bit.
5180 */
5181 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5182
5183# if RT_INLINE_ASM_USES_INTRIN
5184 unsigned long ulBit = 0;
5185 if (_BitScanForward(&ulBit, u32))
5186 return ulBit + iBitPrev;
5187# else
5188# if RT_INLINE_ASM_GNU_STYLE
5189 __asm__ __volatile__("bsf %1, %0\n\t"
5190 "jnz 1f\n\t"
5191 "movl $-1, %0\n\t"
5192 "1:\n\t"
5193 : "=r" (iBit)
5194 : "r" (u32));
5195# else
5196 __asm
5197 {
5198 mov edx, [u32]
5199 bsf eax, edx
5200 jnz done
5201 mov eax, 0ffffffffh
5202 done:
5203 mov [iBit], eax
5204 }
5205# endif
5206 if (iBit >= 0)
5207 return iBit + (int)iBitPrev;
5208# endif
5209
5210 /*
5211 * Skip ahead and see if there is anything left to search.
5212 */
5213 iBitPrev |= 31;
5214 iBitPrev++;
5215 if (cBits <= (uint32_t)iBitPrev)
5216 return -1;
5217 }
5218
5219 /*
5220 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5221 */
5222 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5223 if (iBit >= 0)
5224 iBit += iBitPrev;
5225 return iBit;
5226}
5227#endif
5228
5229
5230/**
5231 * Finds the first bit which is set in the given 32-bit integer.
5232 * Bits are numbered from 1 (least significant) to 32.
5233 *
5234 * @returns index [1..32] of the first set bit.
5235 * @returns 0 if all bits are cleared.
5236 * @param u32 Integer to search for set bits.
5237 * @remarks Similar to ffs() in BSD.
5238 */
5239#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5240RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
5241#else
5242DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
5243{
5244# if RT_INLINE_ASM_USES_INTRIN
5245 unsigned long iBit;
5246 if (_BitScanForward(&iBit, u32))
5247 iBit++;
5248 else
5249 iBit = 0;
5250# elif RT_INLINE_ASM_GNU_STYLE
5251 uint32_t iBit;
5252 __asm__ __volatile__("bsf %1, %0\n\t"
5253 "jnz 1f\n\t"
5254 "xorl %0, %0\n\t"
5255 "jmp 2f\n"
5256 "1:\n\t"
5257 "incl %0\n"
5258 "2:\n\t"
5259 : "=r" (iBit)
5260 : "rm" (u32));
5261# else
5262 uint32_t iBit;
5263 _asm
5264 {
5265 bsf eax, [u32]
5266 jnz found
5267 xor eax, eax
5268 jmp done
5269 found:
5270 inc eax
5271 done:
5272 mov [iBit], eax
5273 }
5274# endif
5275 return iBit;
5276}
5277#endif
5278
5279
5280/**
5281 * Finds the first bit which is set in the given 32-bit integer.
5282 * Bits are numbered from 1 (least significant) to 32.
5283 *
5284 * @returns index [1..32] of the first set bit.
5285 * @returns 0 if all bits are cleared.
5286 * @param i32 Integer to search for set bits.
5287 * @remark Similar to ffs() in BSD.
5288 */
5289DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
5290{
5291 return ASMBitFirstSetU32((uint32_t)i32);
5292}
5293
5294
5295/**
5296 * Finds the first bit which is set in the given 64-bit integer.
5297 *
5298 * Bits are numbered from 1 (least significant) to 64.
5299 *
5300 * @returns index [1..64] of the first set bit.
5301 * @returns 0 if all bits are cleared.
5302 * @param u64 Integer to search for set bits.
5303 * @remarks Similar to ffs() in BSD.
5304 */
5305#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5306RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
5307#else
5308DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
5309{
5310# if RT_INLINE_ASM_USES_INTRIN
5311 unsigned long iBit;
5312# if ARCH_BITS == 64
5313 if (_BitScanForward64(&iBit, u64))
5314 iBit++;
5315 else
5316 iBit = 0;
5317# else
5318 if (_BitScanForward(&iBit, (uint32_t)u64))
5319 iBit++;
5320 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5321 iBit += 33;
5322 else
5323 iBit = 0;
5324# endif
5325# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5326 uint64_t iBit;
5327 __asm__ __volatile__("bsfq %1, %0\n\t"
5328 "jnz 1f\n\t"
5329 "xorl %k0, %k0\n\t"
5330 "jmp 2f\n"
5331 "1:\n\t"
5332 "incl %k0\n"
5333 "2:\n\t"
5334 : "=r" (iBit)
5335 : "rm" (u64));
5336# else
5337 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5338 if (!iBit)
5339 {
5340 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5341 if (iBit)
5342 iBit += 32;
5343 }
5344# endif
5345 return (unsigned)iBit;
5346}
5347#endif
5348
5349
5350/**
5351 * Finds the first bit which is set in the given 16-bit integer.
5352 *
5353 * Bits are numbered from 1 (least significant) to 16.
5354 *
5355 * @returns index [1..16] of the first set bit.
5356 * @returns 0 if all bits are cleared.
5357 * @param u16 Integer to search for set bits.
5358 * @remarks For 16-bit bs3kit code.
5359 */
5360#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5361RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
5362#else
5363DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
5364{
5365 return ASMBitFirstSetU32((uint32_t)u16);
5366}
5367#endif
5368
5369
5370/**
5371 * Finds the last bit which is set in the given 32-bit integer.
5372 * Bits are numbered from 1 (least significant) to 32.
5373 *
5374 * @returns index [1..32] of the last set bit.
5375 * @returns 0 if all bits are cleared.
5376 * @param u32 Integer to search for set bits.
5377 * @remark Similar to fls() in BSD.
5378 */
5379#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5380RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
5381#else
5382DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
5383{
5384# if RT_INLINE_ASM_USES_INTRIN
5385 unsigned long iBit;
5386 if (_BitScanReverse(&iBit, u32))
5387 iBit++;
5388 else
5389 iBit = 0;
5390# elif RT_INLINE_ASM_GNU_STYLE
5391 uint32_t iBit;
5392 __asm__ __volatile__("bsrl %1, %0\n\t"
5393 "jnz 1f\n\t"
5394 "xorl %0, %0\n\t"
5395 "jmp 2f\n"
5396 "1:\n\t"
5397 "incl %0\n"
5398 "2:\n\t"
5399 : "=r" (iBit)
5400 : "rm" (u32));
5401# else
5402 uint32_t iBit;
5403 _asm
5404 {
5405 bsr eax, [u32]
5406 jnz found
5407 xor eax, eax
5408 jmp done
5409 found:
5410 inc eax
5411 done:
5412 mov [iBit], eax
5413 }
5414# endif
5415 return iBit;
5416}
5417#endif
5418
5419
5420/**
5421 * Finds the last bit which is set in the given 32-bit integer.
5422 * Bits are numbered from 1 (least significant) to 32.
5423 *
5424 * @returns index [1..32] of the last set bit.
5425 * @returns 0 if all bits are cleared.
5426 * @param i32 Integer to search for set bits.
5427 * @remark Similar to fls() in BSD.
5428 */
5429DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
5430{
5431 return ASMBitLastSetU32((uint32_t)i32);
5432}
5433
5434
5435/**
5436 * Finds the last bit which is set in the given 64-bit integer.
5437 *
5438 * Bits are numbered from 1 (least significant) to 64.
5439 *
5440 * @returns index [1..64] of the last set bit.
5441 * @returns 0 if all bits are cleared.
5442 * @param u64 Integer to search for set bits.
5443 * @remark Similar to fls() in BSD.
5444 */
5445#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5446RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
5447#else
5448DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
5449{
5450# if RT_INLINE_ASM_USES_INTRIN
5451 unsigned long iBit;
5452# if ARCH_BITS == 64
5453 if (_BitScanReverse64(&iBit, u64))
5454 iBit++;
5455 else
5456 iBit = 0;
5457# else
5458 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5459 iBit += 33;
5460 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5461 iBit++;
5462 else
5463 iBit = 0;
5464# endif
5465# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5466 uint64_t iBit;
5467 __asm__ __volatile__("bsrq %1, %0\n\t"
5468 "jnz 1f\n\t"
5469 "xorl %k0, %k0\n\t"
5470 "jmp 2f\n"
5471 "1:\n\t"
5472 "incl %k0\n"
5473 "2:\n\t"
5474 : "=r" (iBit)
5475 : "rm" (u64));
5476# else
5477 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5478 if (iBit)
5479 iBit += 32;
5480 else
5481 iBit = ASMBitLastSetU32((uint32_t)u64);
5482#endif
5483 return (unsigned)iBit;
5484}
5485#endif
5486
5487
5488/**
5489 * Finds the last bit which is set in the given 16-bit integer.
5490 *
5491 * Bits are numbered from 1 (least significant) to 16.
5492 *
5493 * @returns index [1..16] of the last set bit.
5494 * @returns 0 if all bits are cleared.
5495 * @param u16 Integer to search for set bits.
5496 * @remarks For 16-bit bs3kit code.
5497 */
5498#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5499RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
5500#else
5501DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
5502{
5503 return ASMBitLastSetU32((uint32_t)u16);
5504}
5505#endif
5506
5507
5508/**
5509 * Reverse the byte order of the given 16-bit integer.
5510 *
5511 * @returns Revert
5512 * @param u16 16-bit integer value.
5513 */
5514#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5515RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
5516#else
5517DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
5518{
5519# if RT_INLINE_ASM_USES_INTRIN
5520 u16 = _byteswap_ushort(u16);
5521# elif RT_INLINE_ASM_GNU_STYLE
5522 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5523# else
5524 _asm
5525 {
5526 mov ax, [u16]
5527 ror ax, 8
5528 mov [u16], ax
5529 }
5530# endif
5531 return u16;
5532}
5533#endif
5534
5535
5536/**
5537 * Reverse the byte order of the given 32-bit integer.
5538 *
5539 * @returns Revert
5540 * @param u32 32-bit integer value.
5541 */
5542#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5543RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
5544#else
5545DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
5546{
5547# if RT_INLINE_ASM_USES_INTRIN
5548 u32 = _byteswap_ulong(u32);
5549# elif RT_INLINE_ASM_GNU_STYLE
5550 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5551# else
5552 _asm
5553 {
5554 mov eax, [u32]
5555 bswap eax
5556 mov [u32], eax
5557 }
5558# endif
5559 return u32;
5560}
5561#endif
5562
5563
5564/**
5565 * Reverse the byte order of the given 64-bit integer.
5566 *
5567 * @returns Revert
5568 * @param u64 64-bit integer value.
5569 */
5570DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
5571{
5572#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5573 u64 = _byteswap_uint64(u64);
5574#else
5575 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5576 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5577#endif
5578 return u64;
5579}
5580
5581
5582/**
5583 * Rotate 32-bit unsigned value to the left by @a cShift.
5584 *
5585 * @returns Rotated value.
5586 * @param u32 The value to rotate.
5587 * @param cShift How many bits to rotate by.
5588 */
5589#ifdef __WATCOMC__
5590RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
5591#else
5592DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
5593{
5594# if RT_INLINE_ASM_USES_INTRIN
5595 return _rotl(u32, cShift);
5596# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5597 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5598 return u32;
5599# else
5600 cShift &= 31;
5601 return (u32 << cShift) | (u32 >> (32 - cShift));
5602# endif
5603}
5604#endif
5605
5606
5607/**
5608 * Rotate 32-bit unsigned value to the right by @a cShift.
5609 *
5610 * @returns Rotated value.
5611 * @param u32 The value to rotate.
5612 * @param cShift How many bits to rotate by.
5613 */
5614#ifdef __WATCOMC__
5615RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
5616#else
5617DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
5618{
5619# if RT_INLINE_ASM_USES_INTRIN
5620 return _rotr(u32, cShift);
5621# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5622 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5623 return u32;
5624# else
5625 cShift &= 31;
5626 return (u32 >> cShift) | (u32 << (32 - cShift));
5627# endif
5628}
5629#endif
5630
5631
5632/**
5633 * Rotate 64-bit unsigned value to the left by @a cShift.
5634 *
5635 * @returns Rotated value.
5636 * @param u64 The value to rotate.
5637 * @param cShift How many bits to rotate by.
5638 */
5639DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
5640{
5641#if RT_INLINE_ASM_USES_INTRIN
5642 return _rotl64(u64, cShift);
5643#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5644 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5645 return u64;
5646#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5647 uint32_t uSpill;
5648 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5649 "jz 1f\n\t"
5650 "xchgl %%eax, %%edx\n\t"
5651 "1:\n\t"
5652 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5653 "jz 2f\n\t"
5654 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5655 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5656 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5657 "2:\n\t" /* } */
5658 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5659 : "0" (u64),
5660 "1" (cShift));
5661 return u64;
5662#else
5663 cShift &= 63;
5664 return (u64 << cShift) | (u64 >> (64 - cShift));
5665#endif
5666}
5667
5668
5669/**
5670 * Rotate 64-bit unsigned value to the right by @a cShift.
5671 *
5672 * @returns Rotated value.
5673 * @param u64 The value to rotate.
5674 * @param cShift How many bits to rotate by.
5675 */
5676DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
5677{
5678#if RT_INLINE_ASM_USES_INTRIN
5679 return _rotr64(u64, cShift);
5680#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5681 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5682 return u64;
5683#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5684 uint32_t uSpill;
5685 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5686 "jz 1f\n\t"
5687 "xchgl %%eax, %%edx\n\t"
5688 "1:\n\t"
5689 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5690 "jz 2f\n\t"
5691 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5692 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5693 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5694 "2:\n\t" /* } */
5695 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5696 : "0" (u64),
5697 "1" (cShift));
5698 return u64;
5699#else
5700 cShift &= 63;
5701 return (u64 >> cShift) | (u64 << (64 - cShift));
5702#endif
5703}
5704
5705/** @} */
5706
5707
5708/** @} */
5709
5710/*
5711 * Include #pragma aux definitions for Watcom C/C++.
5712 */
5713#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
5714# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
5715# undef IPRT_INCLUDED_asm_watcom_x86_16_h
5716# include "asm-watcom-x86-16.h"
5717#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
5718# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
5719# undef IPRT_INCLUDED_asm_watcom_x86_32_h
5720# include "asm-watcom-x86-32.h"
5721#endif
5722
5723#endif /* !IPRT_INCLUDED_asm_h */
5724
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette