VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 23473

Last change on this file since 23473 was 22280, checked in by vboxsync, 15 years ago

ASMIntDisableFlags() / gcc: don't use '=rm' as output constraint because this will not work with -fomit-frame-pointer

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 171.1 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__wbinvd)
73# pragma intrinsic(__stosd)
74# pragma intrinsic(__stosw)
75# pragma intrinsic(__stosb)
76# pragma intrinsic(__readcr0)
77# pragma intrinsic(__readcr2)
78# pragma intrinsic(__readcr3)
79# pragma intrinsic(__readcr4)
80# pragma intrinsic(__writecr0)
81# pragma intrinsic(__writecr3)
82# pragma intrinsic(__writecr4)
83# pragma intrinsic(__readdr)
84# pragma intrinsic(__writedr)
85# pragma intrinsic(_BitScanForward)
86# pragma intrinsic(_BitScanReverse)
87# pragma intrinsic(_bittest)
88# pragma intrinsic(_bittestandset)
89# pragma intrinsic(_bittestandreset)
90# pragma intrinsic(_bittestandcomplement)
91# pragma intrinsic(_byteswap_ushort)
92# pragma intrinsic(_byteswap_ulong)
93# pragma intrinsic(_interlockedbittestandset)
94# pragma intrinsic(_interlockedbittestandreset)
95# pragma intrinsic(_InterlockedAnd)
96# pragma intrinsic(_InterlockedOr)
97# pragma intrinsic(_InterlockedIncrement)
98# pragma intrinsic(_InterlockedDecrement)
99# pragma intrinsic(_InterlockedExchange)
100# pragma intrinsic(_InterlockedExchangeAdd)
101# pragma intrinsic(_InterlockedCompareExchange)
102# pragma intrinsic(_InterlockedCompareExchange64)
103# ifdef RT_ARCH_AMD64
104# pragma intrinsic(_mm_mfence)
105# pragma intrinsic(_mm_sfence)
106# pragma intrinsic(_mm_lfence)
107# pragma intrinsic(__stosq)
108# pragma intrinsic(__readcr8)
109# pragma intrinsic(__writecr8)
110# pragma intrinsic(_byteswap_uint64)
111# pragma intrinsic(_InterlockedExchange64)
112# endif
113# endif
114#endif
115#ifndef RT_INLINE_ASM_USES_INTRIN
116# define RT_INLINE_ASM_USES_INTRIN 0
117#endif
118
119/** @def RT_INLINE_ASM_GCC_4_3_X_X86
120 * Used to work around some 4.3.x register allocation issues in this version of
121 * the compiler. */
122#ifdef __GNUC__
123# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ == 3 && defined(__i386__))
124#endif
125#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
126# define RT_INLINE_ASM_GCC_4_3_X_X86 0
127#endif
128
129
130
131/** @defgroup grp_asm ASM - Assembly Routines
132 * @ingroup grp_rt
133 *
134 * @remarks The difference between ordered and unordered atomic operations are that
135 * the former will complete outstanding reads and writes before continuing
136 * while the latter doesn't make any promisses about the order. Ordered
137 * operations doesn't, it seems, make any 100% promise wrt to whether
138 * the operation will complete before any subsequent memory access.
139 * (please, correct if wrong.)
140 *
141 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
142 * are unordered (note the Uo).
143 *
144 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
145 * or even optimize assembler instructions away. For instance, in the following code
146 * the second rdmsr instruction is optimized away because gcc treats that instruction
147 * as deterministic:
148 *
149 * @code
150 * static inline uint64_t rdmsr_low(int idx)
151 * {
152 * uint32_t low;
153 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
154 * }
155 * ...
156 * uint32_t msr1 = rdmsr_low(1);
157 * foo(msr1);
158 * msr1 = rdmsr_low(1);
159 * bar(msr1);
160 * @endcode
161 *
162 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
163 * use the result of the first call as input parameter for bar() as well. For rdmsr this
164 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
165 * machine status information in general.
166 *
167 * @{
168 */
169
170/** @def RT_INLINE_ASM_EXTERNAL
171 * Defined as 1 if the compiler does not support inline assembly.
172 * The ASM* functions will then be implemented in an external .asm file.
173 *
174 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
175 * inline assembly in their AMD64 compiler.
176 */
177#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
178# define RT_INLINE_ASM_EXTERNAL 1
179#else
180# define RT_INLINE_ASM_EXTERNAL 0
181#endif
182
183/** @def RT_INLINE_ASM_GNU_STYLE
184 * Defined as 1 if the compiler understands GNU style inline assembly.
185 */
186#if defined(_MSC_VER)
187# define RT_INLINE_ASM_GNU_STYLE 0
188#else
189# define RT_INLINE_ASM_GNU_STYLE 1
190#endif
191
192
193/** @todo find a more proper place for this structure? */
194#pragma pack(1)
195/** IDTR */
196typedef struct RTIDTR
197{
198 /** Size of the IDT. */
199 uint16_t cbIdt;
200 /** Address of the IDT. */
201 uintptr_t pIdt;
202} RTIDTR, *PRTIDTR;
203#pragma pack()
204
205#pragma pack(1)
206/** GDTR */
207typedef struct RTGDTR
208{
209 /** Size of the GDT. */
210 uint16_t cbGdt;
211 /** Address of the GDT. */
212 uintptr_t pGdt;
213} RTGDTR, *PRTGDTR;
214#pragma pack()
215
216
217/** @def ASMReturnAddress
218 * Gets the return address of the current (or calling if you like) function or method.
219 */
220#ifdef _MSC_VER
221# ifdef __cplusplus
222extern "C"
223# endif
224void * _ReturnAddress(void);
225# pragma intrinsic(_ReturnAddress)
226# define ASMReturnAddress() _ReturnAddress()
227#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
228# define ASMReturnAddress() __builtin_return_address(0)
229#else
230# error "Unsupported compiler."
231#endif
232
233
234/**
235 * Gets the content of the IDTR CPU register.
236 * @param pIdtr Where to store the IDTR contents.
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
240#else
241DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 sidt [rax]
251# else
252 mov eax, [pIdtr]
253 sidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Sets the content of the IDTR CPU register.
263 * @param pIdtr Where to load the IDTR contents from
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
267#else
268DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pIdtr]
277 lidt [rax]
278# else
279 mov eax, [pIdtr]
280 lidt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287
288/**
289 * Gets the content of the GDTR CPU register.
290 * @param pGdtr Where to store the GDTR contents.
291 */
292#if RT_INLINE_ASM_EXTERNAL
293DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
294#else
295DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
296{
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
299# else
300 __asm
301 {
302# ifdef RT_ARCH_AMD64
303 mov rax, [pGdtr]
304 sgdt [rax]
305# else
306 mov eax, [pGdtr]
307 sgdt [eax]
308# endif
309 }
310# endif
311}
312#endif
313
314/**
315 * Get the cs register.
316 * @returns cs.
317 */
318#if RT_INLINE_ASM_EXTERNAL
319DECLASM(RTSEL) ASMGetCS(void);
320#else
321DECLINLINE(RTSEL) ASMGetCS(void)
322{
323 RTSEL SelCS;
324# if RT_INLINE_ASM_GNU_STYLE
325 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
326# else
327 __asm
328 {
329 mov ax, cs
330 mov [SelCS], ax
331 }
332# endif
333 return SelCS;
334}
335#endif
336
337
338/**
339 * Get the DS register.
340 * @returns DS.
341 */
342#if RT_INLINE_ASM_EXTERNAL
343DECLASM(RTSEL) ASMGetDS(void);
344#else
345DECLINLINE(RTSEL) ASMGetDS(void)
346{
347 RTSEL SelDS;
348# if RT_INLINE_ASM_GNU_STYLE
349 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
350# else
351 __asm
352 {
353 mov ax, ds
354 mov [SelDS], ax
355 }
356# endif
357 return SelDS;
358}
359#endif
360
361
362/**
363 * Get the ES register.
364 * @returns ES.
365 */
366#if RT_INLINE_ASM_EXTERNAL
367DECLASM(RTSEL) ASMGetES(void);
368#else
369DECLINLINE(RTSEL) ASMGetES(void)
370{
371 RTSEL SelES;
372# if RT_INLINE_ASM_GNU_STYLE
373 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
374# else
375 __asm
376 {
377 mov ax, es
378 mov [SelES], ax
379 }
380# endif
381 return SelES;
382}
383#endif
384
385
386/**
387 * Get the FS register.
388 * @returns FS.
389 */
390#if RT_INLINE_ASM_EXTERNAL
391DECLASM(RTSEL) ASMGetFS(void);
392#else
393DECLINLINE(RTSEL) ASMGetFS(void)
394{
395 RTSEL SelFS;
396# if RT_INLINE_ASM_GNU_STYLE
397 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
398# else
399 __asm
400 {
401 mov ax, fs
402 mov [SelFS], ax
403 }
404# endif
405 return SelFS;
406}
407# endif
408
409
410/**
411 * Get the GS register.
412 * @returns GS.
413 */
414#if RT_INLINE_ASM_EXTERNAL
415DECLASM(RTSEL) ASMGetGS(void);
416#else
417DECLINLINE(RTSEL) ASMGetGS(void)
418{
419 RTSEL SelGS;
420# if RT_INLINE_ASM_GNU_STYLE
421 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
422# else
423 __asm
424 {
425 mov ax, gs
426 mov [SelGS], ax
427 }
428# endif
429 return SelGS;
430}
431#endif
432
433
434/**
435 * Get the SS register.
436 * @returns SS.
437 */
438#if RT_INLINE_ASM_EXTERNAL
439DECLASM(RTSEL) ASMGetSS(void);
440#else
441DECLINLINE(RTSEL) ASMGetSS(void)
442{
443 RTSEL SelSS;
444# if RT_INLINE_ASM_GNU_STYLE
445 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
446# else
447 __asm
448 {
449 mov ax, ss
450 mov [SelSS], ax
451 }
452# endif
453 return SelSS;
454}
455#endif
456
457
458/**
459 * Get the TR register.
460 * @returns TR.
461 */
462#if RT_INLINE_ASM_EXTERNAL
463DECLASM(RTSEL) ASMGetTR(void);
464#else
465DECLINLINE(RTSEL) ASMGetTR(void)
466{
467 RTSEL SelTR;
468# if RT_INLINE_ASM_GNU_STYLE
469 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
470# else
471 __asm
472 {
473 str ax
474 mov [SelTR], ax
475 }
476# endif
477 return SelTR;
478}
479#endif
480
481
482/**
483 * Get the [RE]FLAGS register.
484 * @returns [RE]FLAGS.
485 */
486#if RT_INLINE_ASM_EXTERNAL
487DECLASM(RTCCUINTREG) ASMGetFlags(void);
488#else
489DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
490{
491 RTCCUINTREG uFlags;
492# if RT_INLINE_ASM_GNU_STYLE
493# ifdef RT_ARCH_AMD64
494 __asm__ __volatile__("pushfq\n\t"
495 "popq %0\n\t"
496 : "=r" (uFlags));
497# else
498 __asm__ __volatile__("pushfl\n\t"
499 "popl %0\n\t"
500 : "=r" (uFlags));
501# endif
502# else
503 __asm
504 {
505# ifdef RT_ARCH_AMD64
506 pushfq
507 pop [uFlags]
508# else
509 pushfd
510 pop [uFlags]
511# endif
512 }
513# endif
514 return uFlags;
515}
516#endif
517
518
519/**
520 * Set the [RE]FLAGS register.
521 * @param uFlags The new [RE]FLAGS value.
522 */
523#if RT_INLINE_ASM_EXTERNAL
524DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
525#else
526DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
527{
528# if RT_INLINE_ASM_GNU_STYLE
529# ifdef RT_ARCH_AMD64
530 __asm__ __volatile__("pushq %0\n\t"
531 "popfq\n\t"
532 : : "g" (uFlags));
533# else
534 __asm__ __volatile__("pushl %0\n\t"
535 "popfl\n\t"
536 : : "g" (uFlags));
537# endif
538# else
539 __asm
540 {
541# ifdef RT_ARCH_AMD64
542 push [uFlags]
543 popfq
544# else
545 push [uFlags]
546 popfd
547# endif
548 }
549# endif
550}
551#endif
552
553
554/**
555 * Gets the content of the CPU timestamp counter register.
556 *
557 * @returns TSC.
558 */
559#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
560DECLASM(uint64_t) ASMReadTSC(void);
561#else
562DECLINLINE(uint64_t) ASMReadTSC(void)
563{
564 RTUINT64U u;
565# if RT_INLINE_ASM_GNU_STYLE
566 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
567# else
568# if RT_INLINE_ASM_USES_INTRIN
569 u.u = __rdtsc();
570# else
571 __asm
572 {
573 rdtsc
574 mov [u.s.Lo], eax
575 mov [u.s.Hi], edx
576 }
577# endif
578# endif
579 return u.u;
580}
581#endif
582
583
584/**
585 * Performs the cpuid instruction returning all registers.
586 *
587 * @param uOperator CPUID operation (eax).
588 * @param pvEAX Where to store eax.
589 * @param pvEBX Where to store ebx.
590 * @param pvECX Where to store ecx.
591 * @param pvEDX Where to store edx.
592 * @remark We're using void pointers to ease the use of special bitfield structures and such.
593 */
594#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
595DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
596#else
597DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
598{
599# if RT_INLINE_ASM_GNU_STYLE
600# ifdef RT_ARCH_AMD64
601 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
602 __asm__ ("cpuid\n\t"
603 : "=a" (uRAX),
604 "=b" (uRBX),
605 "=c" (uRCX),
606 "=d" (uRDX)
607 : "0" (uOperator));
608 *(uint32_t *)pvEAX = (uint32_t)uRAX;
609 *(uint32_t *)pvEBX = (uint32_t)uRBX;
610 *(uint32_t *)pvECX = (uint32_t)uRCX;
611 *(uint32_t *)pvEDX = (uint32_t)uRDX;
612# else
613 __asm__ ("xchgl %%ebx, %1\n\t"
614 "cpuid\n\t"
615 "xchgl %%ebx, %1\n\t"
616 : "=a" (*(uint32_t *)pvEAX),
617 "=r" (*(uint32_t *)pvEBX),
618 "=c" (*(uint32_t *)pvECX),
619 "=d" (*(uint32_t *)pvEDX)
620 : "0" (uOperator));
621# endif
622
623# elif RT_INLINE_ASM_USES_INTRIN
624 int aInfo[4];
625 __cpuid(aInfo, uOperator);
626 *(uint32_t *)pvEAX = aInfo[0];
627 *(uint32_t *)pvEBX = aInfo[1];
628 *(uint32_t *)pvECX = aInfo[2];
629 *(uint32_t *)pvEDX = aInfo[3];
630
631# else
632 uint32_t uEAX;
633 uint32_t uEBX;
634 uint32_t uECX;
635 uint32_t uEDX;
636 __asm
637 {
638 push ebx
639 mov eax, [uOperator]
640 cpuid
641 mov [uEAX], eax
642 mov [uEBX], ebx
643 mov [uECX], ecx
644 mov [uEDX], edx
645 pop ebx
646 }
647 *(uint32_t *)pvEAX = uEAX;
648 *(uint32_t *)pvEBX = uEBX;
649 *(uint32_t *)pvECX = uECX;
650 *(uint32_t *)pvEDX = uEDX;
651# endif
652}
653#endif
654
655
656/**
657 * Performs the cpuid instruction returning all registers.
658 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
659 *
660 * @param uOperator CPUID operation (eax).
661 * @param uIdxECX ecx index
662 * @param pvEAX Where to store eax.
663 * @param pvEBX Where to store ebx.
664 * @param pvECX Where to store ecx.
665 * @param pvEDX Where to store edx.
666 * @remark We're using void pointers to ease the use of special bitfield structures and such.
667 */
668#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
669DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
670#else
671DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
672{
673# if RT_INLINE_ASM_GNU_STYLE
674# ifdef RT_ARCH_AMD64
675 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
676 __asm__ ("cpuid\n\t"
677 : "=a" (uRAX),
678 "=b" (uRBX),
679 "=c" (uRCX),
680 "=d" (uRDX)
681 : "0" (uOperator),
682 "2" (uIdxECX));
683 *(uint32_t *)pvEAX = (uint32_t)uRAX;
684 *(uint32_t *)pvEBX = (uint32_t)uRBX;
685 *(uint32_t *)pvECX = (uint32_t)uRCX;
686 *(uint32_t *)pvEDX = (uint32_t)uRDX;
687# else
688 __asm__ ("xchgl %%ebx, %1\n\t"
689 "cpuid\n\t"
690 "xchgl %%ebx, %1\n\t"
691 : "=a" (*(uint32_t *)pvEAX),
692 "=r" (*(uint32_t *)pvEBX),
693 "=c" (*(uint32_t *)pvECX),
694 "=d" (*(uint32_t *)pvEDX)
695 : "0" (uOperator),
696 "2" (uIdxECX));
697# endif
698
699# elif RT_INLINE_ASM_USES_INTRIN
700 int aInfo[4];
701 /* ??? another intrinsic ??? */
702 __cpuid(aInfo, uOperator);
703 *(uint32_t *)pvEAX = aInfo[0];
704 *(uint32_t *)pvEBX = aInfo[1];
705 *(uint32_t *)pvECX = aInfo[2];
706 *(uint32_t *)pvEDX = aInfo[3];
707
708# else
709 uint32_t uEAX;
710 uint32_t uEBX;
711 uint32_t uECX;
712 uint32_t uEDX;
713 __asm
714 {
715 push ebx
716 mov eax, [uOperator]
717 mov ecx, [uIdxECX]
718 cpuid
719 mov [uEAX], eax
720 mov [uEBX], ebx
721 mov [uECX], ecx
722 mov [uEDX], edx
723 pop ebx
724 }
725 *(uint32_t *)pvEAX = uEAX;
726 *(uint32_t *)pvEBX = uEBX;
727 *(uint32_t *)pvECX = uECX;
728 *(uint32_t *)pvEDX = uEDX;
729# endif
730}
731#endif
732
733
734/**
735 * Performs the cpuid instruction returning ecx and edx.
736 *
737 * @param uOperator CPUID operation (eax).
738 * @param pvECX Where to store ecx.
739 * @param pvEDX Where to store edx.
740 * @remark We're using void pointers to ease the use of special bitfield structures and such.
741 */
742#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
743DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
744#else
745DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
746{
747 uint32_t uEBX;
748 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
749}
750#endif
751
752
753/**
754 * Performs the cpuid instruction returning edx.
755 *
756 * @param uOperator CPUID operation (eax).
757 * @returns EDX after cpuid operation.
758 */
759#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
760DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
761#else
762DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
763{
764 RTCCUINTREG xDX;
765# if RT_INLINE_ASM_GNU_STYLE
766# ifdef RT_ARCH_AMD64
767 RTCCUINTREG uSpill;
768 __asm__ ("cpuid"
769 : "=a" (uSpill),
770 "=d" (xDX)
771 : "0" (uOperator)
772 : "rbx", "rcx");
773# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
774 __asm__ ("push %%ebx\n\t"
775 "cpuid\n\t"
776 "pop %%ebx\n\t"
777 : "=a" (uOperator),
778 "=d" (xDX)
779 : "0" (uOperator)
780 : "ecx");
781# else
782 __asm__ ("cpuid"
783 : "=a" (uOperator),
784 "=d" (xDX)
785 : "0" (uOperator)
786 : "ebx", "ecx");
787# endif
788
789# elif RT_INLINE_ASM_USES_INTRIN
790 int aInfo[4];
791 __cpuid(aInfo, uOperator);
792 xDX = aInfo[3];
793
794# else
795 __asm
796 {
797 push ebx
798 mov eax, [uOperator]
799 cpuid
800 mov [xDX], edx
801 pop ebx
802 }
803# endif
804 return (uint32_t)xDX;
805}
806#endif
807
808
809/**
810 * Performs the cpuid instruction returning ecx.
811 *
812 * @param uOperator CPUID operation (eax).
813 * @returns ECX after cpuid operation.
814 */
815#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
816DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
817#else
818DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
819{
820 RTCCUINTREG xCX;
821# if RT_INLINE_ASM_GNU_STYLE
822# ifdef RT_ARCH_AMD64
823 RTCCUINTREG uSpill;
824 __asm__ ("cpuid"
825 : "=a" (uSpill),
826 "=c" (xCX)
827 : "0" (uOperator)
828 : "rbx", "rdx");
829# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
830 __asm__ ("push %%ebx\n\t"
831 "cpuid\n\t"
832 "pop %%ebx\n\t"
833 : "=a" (uOperator),
834 "=c" (xCX)
835 : "0" (uOperator)
836 : "edx");
837# else
838 __asm__ ("cpuid"
839 : "=a" (uOperator),
840 "=c" (xCX)
841 : "0" (uOperator)
842 : "ebx", "edx");
843
844# endif
845
846# elif RT_INLINE_ASM_USES_INTRIN
847 int aInfo[4];
848 __cpuid(aInfo, uOperator);
849 xCX = aInfo[2];
850
851# else
852 __asm
853 {
854 push ebx
855 mov eax, [uOperator]
856 cpuid
857 mov [xCX], ecx
858 pop ebx
859 }
860# endif
861 return (uint32_t)xCX;
862}
863#endif
864
865
866/**
867 * Checks if the current CPU supports CPUID.
868 *
869 * @returns true if CPUID is supported.
870 */
871DECLINLINE(bool) ASMHasCpuId(void)
872{
873#ifdef RT_ARCH_AMD64
874 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
875#else /* !RT_ARCH_AMD64 */
876 bool fRet = false;
877# if RT_INLINE_ASM_GNU_STYLE
878 uint32_t u1;
879 uint32_t u2;
880 __asm__ ("pushf\n\t"
881 "pop %1\n\t"
882 "mov %1, %2\n\t"
883 "xorl $0x200000, %1\n\t"
884 "push %1\n\t"
885 "popf\n\t"
886 "pushf\n\t"
887 "pop %1\n\t"
888 "cmpl %1, %2\n\t"
889 "setne %0\n\t"
890 "push %2\n\t"
891 "popf\n\t"
892 : "=m" (fRet), "=r" (u1), "=r" (u2));
893# else
894 __asm
895 {
896 pushfd
897 pop eax
898 mov ebx, eax
899 xor eax, 0200000h
900 push eax
901 popfd
902 pushfd
903 pop eax
904 cmp eax, ebx
905 setne fRet
906 push ebx
907 popfd
908 }
909# endif
910 return fRet;
911#endif /* !RT_ARCH_AMD64 */
912}
913
914
915/**
916 * Gets the APIC ID of the current CPU.
917 *
918 * @returns the APIC ID.
919 */
920#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
921DECLASM(uint8_t) ASMGetApicId(void);
922#else
923DECLINLINE(uint8_t) ASMGetApicId(void)
924{
925 RTCCUINTREG xBX;
926# if RT_INLINE_ASM_GNU_STYLE
927# ifdef RT_ARCH_AMD64
928 RTCCUINTREG uSpill;
929 __asm__ ("cpuid"
930 : "=a" (uSpill),
931 "=b" (xBX)
932 : "0" (1)
933 : "rcx", "rdx");
934# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
935 RTCCUINTREG uSpill;
936 __asm__ ("mov %%ebx,%1\n\t"
937 "cpuid\n\t"
938 "xchgl %%ebx,%1\n\t"
939 : "=a" (uSpill),
940 "=r" (xBX)
941 : "0" (1)
942 : "ecx", "edx");
943# else
944 RTCCUINTREG uSpill;
945 __asm__ ("cpuid"
946 : "=a" (uSpill),
947 "=b" (xBX)
948 : "0" (1)
949 : "ecx", "edx");
950# endif
951
952# elif RT_INLINE_ASM_USES_INTRIN
953 int aInfo[4];
954 __cpuid(aInfo, 1);
955 xBX = aInfo[1];
956
957# else
958 __asm
959 {
960 push ebx
961 mov eax, 1
962 cpuid
963 mov [xBX], ebx
964 pop ebx
965 }
966# endif
967 return (uint8_t)(xBX >> 24);
968}
969#endif
970
971
972/**
973 * Tests if it a genuine Intel CPU based on the ASMCpuId(0) output.
974 *
975 * @returns true/false.
976 * @param uEBX EBX return from ASMCpuId(0)
977 * @param uECX ECX return from ASMCpuId(0)
978 * @param uEDX EDX return from ASMCpuId(0)
979 */
980DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
981{
982 return uEBX == 0x756e6547
983 && uECX == 0x6c65746e
984 && uEDX == 0x49656e69;
985}
986
987
988/**
989 * Tests if this is a genuine Intel CPU.
990 *
991 * @returns true/false.
992 * @remarks ASSUMES that cpuid is supported by the CPU.
993 */
994DECLINLINE(bool) ASMIsIntelCpu(void)
995{
996 uint32_t uEAX, uEBX, uECX, uEDX;
997 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
998 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
999}
1000
1001
1002/**
1003 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
1004 *
1005 * @returns Family.
1006 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
1007 */
1008DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
1009{
1010 return ((uEAX >> 8) & 0xf) == 0xf
1011 ? ((uEAX >> 20) & 0x7f) + 0xf
1012 : ((uEAX >> 8) & 0xf);
1013}
1014
1015
1016/**
1017 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1018 *
1019 * @returns Model.
1020 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1021 * @param fIntel Whether it's an intel CPU.
1022 */
1023DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1024{
1025 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1026 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1027 : ((uEAX >> 4) & 0xf);
1028}
1029
1030
1031/**
1032 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1033 *
1034 * @returns Model.
1035 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1036 * @param fIntel Whether it's an intel CPU.
1037 */
1038DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1039{
1040 return ((uEAX >> 8) & 0xf) == 0xf
1041 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1042 : ((uEAX >> 4) & 0xf);
1043}
1044
1045
1046/**
1047 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1048 *
1049 * @returns Model.
1050 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1051 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1052 */
1053DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1054{
1055 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1056 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1057 : ((uEAX >> 4) & 0xf);
1058}
1059
1060
1061/**
1062 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1063 *
1064 * @returns Model.
1065 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1066 */
1067DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1068{
1069 return uEAX & 0xf;
1070}
1071
1072
1073/**
1074 * Get cr0.
1075 * @returns cr0.
1076 */
1077#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1078DECLASM(RTCCUINTREG) ASMGetCR0(void);
1079#else
1080DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1081{
1082 RTCCUINTREG uCR0;
1083# if RT_INLINE_ASM_USES_INTRIN
1084 uCR0 = __readcr0();
1085
1086# elif RT_INLINE_ASM_GNU_STYLE
1087# ifdef RT_ARCH_AMD64
1088 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1089# else
1090 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1091# endif
1092# else
1093 __asm
1094 {
1095# ifdef RT_ARCH_AMD64
1096 mov rax, cr0
1097 mov [uCR0], rax
1098# else
1099 mov eax, cr0
1100 mov [uCR0], eax
1101# endif
1102 }
1103# endif
1104 return uCR0;
1105}
1106#endif
1107
1108
1109/**
1110 * Sets the CR0 register.
1111 * @param uCR0 The new CR0 value.
1112 */
1113#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1114DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1115#else
1116DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1117{
1118# if RT_INLINE_ASM_USES_INTRIN
1119 __writecr0(uCR0);
1120
1121# elif RT_INLINE_ASM_GNU_STYLE
1122# ifdef RT_ARCH_AMD64
1123 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1124# else
1125 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1126# endif
1127# else
1128 __asm
1129 {
1130# ifdef RT_ARCH_AMD64
1131 mov rax, [uCR0]
1132 mov cr0, rax
1133# else
1134 mov eax, [uCR0]
1135 mov cr0, eax
1136# endif
1137 }
1138# endif
1139}
1140#endif
1141
1142
1143/**
1144 * Get cr2.
1145 * @returns cr2.
1146 */
1147#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1148DECLASM(RTCCUINTREG) ASMGetCR2(void);
1149#else
1150DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1151{
1152 RTCCUINTREG uCR2;
1153# if RT_INLINE_ASM_USES_INTRIN
1154 uCR2 = __readcr2();
1155
1156# elif RT_INLINE_ASM_GNU_STYLE
1157# ifdef RT_ARCH_AMD64
1158 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1159# else
1160 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1161# endif
1162# else
1163 __asm
1164 {
1165# ifdef RT_ARCH_AMD64
1166 mov rax, cr2
1167 mov [uCR2], rax
1168# else
1169 mov eax, cr2
1170 mov [uCR2], eax
1171# endif
1172 }
1173# endif
1174 return uCR2;
1175}
1176#endif
1177
1178
1179/**
1180 * Sets the CR2 register.
1181 * @param uCR2 The new CR0 value.
1182 */
1183#if RT_INLINE_ASM_EXTERNAL
1184DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1185#else
1186DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1187{
1188# if RT_INLINE_ASM_GNU_STYLE
1189# ifdef RT_ARCH_AMD64
1190 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1191# else
1192 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1193# endif
1194# else
1195 __asm
1196 {
1197# ifdef RT_ARCH_AMD64
1198 mov rax, [uCR2]
1199 mov cr2, rax
1200# else
1201 mov eax, [uCR2]
1202 mov cr2, eax
1203# endif
1204 }
1205# endif
1206}
1207#endif
1208
1209
1210/**
1211 * Get cr3.
1212 * @returns cr3.
1213 */
1214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1215DECLASM(RTCCUINTREG) ASMGetCR3(void);
1216#else
1217DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1218{
1219 RTCCUINTREG uCR3;
1220# if RT_INLINE_ASM_USES_INTRIN
1221 uCR3 = __readcr3();
1222
1223# elif RT_INLINE_ASM_GNU_STYLE
1224# ifdef RT_ARCH_AMD64
1225 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1226# else
1227 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1228# endif
1229# else
1230 __asm
1231 {
1232# ifdef RT_ARCH_AMD64
1233 mov rax, cr3
1234 mov [uCR3], rax
1235# else
1236 mov eax, cr3
1237 mov [uCR3], eax
1238# endif
1239 }
1240# endif
1241 return uCR3;
1242}
1243#endif
1244
1245
1246/**
1247 * Sets the CR3 register.
1248 *
1249 * @param uCR3 New CR3 value.
1250 */
1251#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1252DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1253#else
1254DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1255{
1256# if RT_INLINE_ASM_USES_INTRIN
1257 __writecr3(uCR3);
1258
1259# elif RT_INLINE_ASM_GNU_STYLE
1260# ifdef RT_ARCH_AMD64
1261 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1262# else
1263 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1264# endif
1265# else
1266 __asm
1267 {
1268# ifdef RT_ARCH_AMD64
1269 mov rax, [uCR3]
1270 mov cr3, rax
1271# else
1272 mov eax, [uCR3]
1273 mov cr3, eax
1274# endif
1275 }
1276# endif
1277}
1278#endif
1279
1280
1281/**
1282 * Reloads the CR3 register.
1283 */
1284#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1285DECLASM(void) ASMReloadCR3(void);
1286#else
1287DECLINLINE(void) ASMReloadCR3(void)
1288{
1289# if RT_INLINE_ASM_USES_INTRIN
1290 __writecr3(__readcr3());
1291
1292# elif RT_INLINE_ASM_GNU_STYLE
1293 RTCCUINTREG u;
1294# ifdef RT_ARCH_AMD64
1295 __asm__ __volatile__("movq %%cr3, %0\n\t"
1296 "movq %0, %%cr3\n\t"
1297 : "=r" (u));
1298# else
1299 __asm__ __volatile__("movl %%cr3, %0\n\t"
1300 "movl %0, %%cr3\n\t"
1301 : "=r" (u));
1302# endif
1303# else
1304 __asm
1305 {
1306# ifdef RT_ARCH_AMD64
1307 mov rax, cr3
1308 mov cr3, rax
1309# else
1310 mov eax, cr3
1311 mov cr3, eax
1312# endif
1313 }
1314# endif
1315}
1316#endif
1317
1318
1319/**
1320 * Get cr4.
1321 * @returns cr4.
1322 */
1323#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1324DECLASM(RTCCUINTREG) ASMGetCR4(void);
1325#else
1326DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1327{
1328 RTCCUINTREG uCR4;
1329# if RT_INLINE_ASM_USES_INTRIN
1330 uCR4 = __readcr4();
1331
1332# elif RT_INLINE_ASM_GNU_STYLE
1333# ifdef RT_ARCH_AMD64
1334 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1335# else
1336 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1337# endif
1338# else
1339 __asm
1340 {
1341# ifdef RT_ARCH_AMD64
1342 mov rax, cr4
1343 mov [uCR4], rax
1344# else
1345 push eax /* just in case */
1346 /*mov eax, cr4*/
1347 _emit 0x0f
1348 _emit 0x20
1349 _emit 0xe0
1350 mov [uCR4], eax
1351 pop eax
1352# endif
1353 }
1354# endif
1355 return uCR4;
1356}
1357#endif
1358
1359
1360/**
1361 * Sets the CR4 register.
1362 *
1363 * @param uCR4 New CR4 value.
1364 */
1365#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1366DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1367#else
1368DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1369{
1370# if RT_INLINE_ASM_USES_INTRIN
1371 __writecr4(uCR4);
1372
1373# elif RT_INLINE_ASM_GNU_STYLE
1374# ifdef RT_ARCH_AMD64
1375 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1376# else
1377 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1378# endif
1379# else
1380 __asm
1381 {
1382# ifdef RT_ARCH_AMD64
1383 mov rax, [uCR4]
1384 mov cr4, rax
1385# else
1386 mov eax, [uCR4]
1387 _emit 0x0F
1388 _emit 0x22
1389 _emit 0xE0 /* mov cr4, eax */
1390# endif
1391 }
1392# endif
1393}
1394#endif
1395
1396
1397/**
1398 * Get cr8.
1399 * @returns cr8.
1400 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1401 */
1402#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1403DECLASM(RTCCUINTREG) ASMGetCR8(void);
1404#else
1405DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1406{
1407# ifdef RT_ARCH_AMD64
1408 RTCCUINTREG uCR8;
1409# if RT_INLINE_ASM_USES_INTRIN
1410 uCR8 = __readcr8();
1411
1412# elif RT_INLINE_ASM_GNU_STYLE
1413 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1414# else
1415 __asm
1416 {
1417 mov rax, cr8
1418 mov [uCR8], rax
1419 }
1420# endif
1421 return uCR8;
1422# else /* !RT_ARCH_AMD64 */
1423 return 0;
1424# endif /* !RT_ARCH_AMD64 */
1425}
1426#endif
1427
1428
1429/**
1430 * Enables interrupts (EFLAGS.IF).
1431 */
1432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1433DECLASM(void) ASMIntEnable(void);
1434#else
1435DECLINLINE(void) ASMIntEnable(void)
1436{
1437# if RT_INLINE_ASM_GNU_STYLE
1438 __asm("sti\n");
1439# elif RT_INLINE_ASM_USES_INTRIN
1440 _enable();
1441# else
1442 __asm sti
1443# endif
1444}
1445#endif
1446
1447
1448/**
1449 * Disables interrupts (!EFLAGS.IF).
1450 */
1451#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1452DECLASM(void) ASMIntDisable(void);
1453#else
1454DECLINLINE(void) ASMIntDisable(void)
1455{
1456# if RT_INLINE_ASM_GNU_STYLE
1457 __asm("cli\n");
1458# elif RT_INLINE_ASM_USES_INTRIN
1459 _disable();
1460# else
1461 __asm cli
1462# endif
1463}
1464#endif
1465
1466
1467/**
1468 * Disables interrupts and returns previous xFLAGS.
1469 */
1470#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1471DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1472#else
1473DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1474{
1475 RTCCUINTREG xFlags;
1476# if RT_INLINE_ASM_GNU_STYLE
1477# ifdef RT_ARCH_AMD64
1478 __asm__ __volatile__("pushfq\n\t"
1479 "cli\n\t"
1480 "popq %0\n\t"
1481 : "=r" (xFlags));
1482# else
1483 __asm__ __volatile__("pushfl\n\t"
1484 "cli\n\t"
1485 "popl %0\n\t"
1486 : "=r" (xFlags));
1487# endif
1488# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1489 xFlags = ASMGetFlags();
1490 _disable();
1491# else
1492 __asm {
1493 pushfd
1494 cli
1495 pop [xFlags]
1496 }
1497# endif
1498 return xFlags;
1499}
1500#endif
1501
1502
1503/**
1504 * Are interrupts enabled?
1505 *
1506 * @returns true / false.
1507 */
1508DECLINLINE(RTCCUINTREG) ASMIntAreEnabled(void)
1509{
1510 RTCCUINTREG uFlags = ASMGetFlags();
1511 return uFlags & 0x200 /* X86_EFL_IF */ ? true : false;
1512}
1513
1514
1515/**
1516 * Halts the CPU until interrupted.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL
1519DECLASM(void) ASMHalt(void);
1520#else
1521DECLINLINE(void) ASMHalt(void)
1522{
1523# if RT_INLINE_ASM_GNU_STYLE
1524 __asm__ __volatile__("hlt\n\t");
1525# else
1526 __asm {
1527 hlt
1528 }
1529# endif
1530}
1531#endif
1532
1533
1534/**
1535 * The PAUSE variant of NOP for helping hyperthreaded CPUs detecing spin locks.
1536 */
1537#if RT_INLINE_ASM_EXTERNAL
1538DECLASM(void) ASMNopPause(void);
1539#else
1540DECLINLINE(void) ASMNopPause(void)
1541{
1542# if RT_INLINE_ASM_GNU_STYLE
1543 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
1544# else
1545 __asm {
1546 _emit 0f3h
1547 _emit 090h
1548 }
1549# endif
1550}
1551#endif
1552
1553
1554/**
1555 * Reads a machine specific register.
1556 *
1557 * @returns Register content.
1558 * @param uRegister Register to read.
1559 */
1560#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1561DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1562#else
1563DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1564{
1565 RTUINT64U u;
1566# if RT_INLINE_ASM_GNU_STYLE
1567 __asm__ __volatile__("rdmsr\n\t"
1568 : "=a" (u.s.Lo),
1569 "=d" (u.s.Hi)
1570 : "c" (uRegister));
1571
1572# elif RT_INLINE_ASM_USES_INTRIN
1573 u.u = __readmsr(uRegister);
1574
1575# else
1576 __asm
1577 {
1578 mov ecx, [uRegister]
1579 rdmsr
1580 mov [u.s.Lo], eax
1581 mov [u.s.Hi], edx
1582 }
1583# endif
1584
1585 return u.u;
1586}
1587#endif
1588
1589
1590/**
1591 * Writes a machine specific register.
1592 *
1593 * @returns Register content.
1594 * @param uRegister Register to write to.
1595 * @param u64Val Value to write.
1596 */
1597#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1598DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1599#else
1600DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1601{
1602 RTUINT64U u;
1603
1604 u.u = u64Val;
1605# if RT_INLINE_ASM_GNU_STYLE
1606 __asm__ __volatile__("wrmsr\n\t"
1607 ::"a" (u.s.Lo),
1608 "d" (u.s.Hi),
1609 "c" (uRegister));
1610
1611# elif RT_INLINE_ASM_USES_INTRIN
1612 __writemsr(uRegister, u.u);
1613
1614# else
1615 __asm
1616 {
1617 mov ecx, [uRegister]
1618 mov edx, [u.s.Hi]
1619 mov eax, [u.s.Lo]
1620 wrmsr
1621 }
1622# endif
1623}
1624#endif
1625
1626
1627/**
1628 * Reads low part of a machine specific register.
1629 *
1630 * @returns Register content.
1631 * @param uRegister Register to read.
1632 */
1633#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1634DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1635#else
1636DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1637{
1638 uint32_t u32;
1639# if RT_INLINE_ASM_GNU_STYLE
1640 __asm__ __volatile__("rdmsr\n\t"
1641 : "=a" (u32)
1642 : "c" (uRegister)
1643 : "edx");
1644
1645# elif RT_INLINE_ASM_USES_INTRIN
1646 u32 = (uint32_t)__readmsr(uRegister);
1647
1648#else
1649 __asm
1650 {
1651 mov ecx, [uRegister]
1652 rdmsr
1653 mov [u32], eax
1654 }
1655# endif
1656
1657 return u32;
1658}
1659#endif
1660
1661
1662/**
1663 * Reads high part of a machine specific register.
1664 *
1665 * @returns Register content.
1666 * @param uRegister Register to read.
1667 */
1668#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1669DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1670#else
1671DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1672{
1673 uint32_t u32;
1674# if RT_INLINE_ASM_GNU_STYLE
1675 __asm__ __volatile__("rdmsr\n\t"
1676 : "=d" (u32)
1677 : "c" (uRegister)
1678 : "eax");
1679
1680# elif RT_INLINE_ASM_USES_INTRIN
1681 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1682
1683# else
1684 __asm
1685 {
1686 mov ecx, [uRegister]
1687 rdmsr
1688 mov [u32], edx
1689 }
1690# endif
1691
1692 return u32;
1693}
1694#endif
1695
1696
1697/**
1698 * Gets dr0.
1699 *
1700 * @returns dr0.
1701 */
1702#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1703DECLASM(RTCCUINTREG) ASMGetDR0(void);
1704#else
1705DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1706{
1707 RTCCUINTREG uDR0;
1708# if RT_INLINE_ASM_USES_INTRIN
1709 uDR0 = __readdr(0);
1710# elif RT_INLINE_ASM_GNU_STYLE
1711# ifdef RT_ARCH_AMD64
1712 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1713# else
1714 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1715# endif
1716# else
1717 __asm
1718 {
1719# ifdef RT_ARCH_AMD64
1720 mov rax, dr0
1721 mov [uDR0], rax
1722# else
1723 mov eax, dr0
1724 mov [uDR0], eax
1725# endif
1726 }
1727# endif
1728 return uDR0;
1729}
1730#endif
1731
1732
1733/**
1734 * Gets dr1.
1735 *
1736 * @returns dr1.
1737 */
1738#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1739DECLASM(RTCCUINTREG) ASMGetDR1(void);
1740#else
1741DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1742{
1743 RTCCUINTREG uDR1;
1744# if RT_INLINE_ASM_USES_INTRIN
1745 uDR1 = __readdr(1);
1746# elif RT_INLINE_ASM_GNU_STYLE
1747# ifdef RT_ARCH_AMD64
1748 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1749# else
1750 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1751# endif
1752# else
1753 __asm
1754 {
1755# ifdef RT_ARCH_AMD64
1756 mov rax, dr1
1757 mov [uDR1], rax
1758# else
1759 mov eax, dr1
1760 mov [uDR1], eax
1761# endif
1762 }
1763# endif
1764 return uDR1;
1765}
1766#endif
1767
1768
1769/**
1770 * Gets dr2.
1771 *
1772 * @returns dr2.
1773 */
1774#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1775DECLASM(RTCCUINTREG) ASMGetDR2(void);
1776#else
1777DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1778{
1779 RTCCUINTREG uDR2;
1780# if RT_INLINE_ASM_USES_INTRIN
1781 uDR2 = __readdr(2);
1782# elif RT_INLINE_ASM_GNU_STYLE
1783# ifdef RT_ARCH_AMD64
1784 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1785# else
1786 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1787# endif
1788# else
1789 __asm
1790 {
1791# ifdef RT_ARCH_AMD64
1792 mov rax, dr2
1793 mov [uDR2], rax
1794# else
1795 mov eax, dr2
1796 mov [uDR2], eax
1797# endif
1798 }
1799# endif
1800 return uDR2;
1801}
1802#endif
1803
1804
1805/**
1806 * Gets dr3.
1807 *
1808 * @returns dr3.
1809 */
1810#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1811DECLASM(RTCCUINTREG) ASMGetDR3(void);
1812#else
1813DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1814{
1815 RTCCUINTREG uDR3;
1816# if RT_INLINE_ASM_USES_INTRIN
1817 uDR3 = __readdr(3);
1818# elif RT_INLINE_ASM_GNU_STYLE
1819# ifdef RT_ARCH_AMD64
1820 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1821# else
1822 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1823# endif
1824# else
1825 __asm
1826 {
1827# ifdef RT_ARCH_AMD64
1828 mov rax, dr3
1829 mov [uDR3], rax
1830# else
1831 mov eax, dr3
1832 mov [uDR3], eax
1833# endif
1834 }
1835# endif
1836 return uDR3;
1837}
1838#endif
1839
1840
1841/**
1842 * Gets dr6.
1843 *
1844 * @returns dr6.
1845 */
1846#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1847DECLASM(RTCCUINTREG) ASMGetDR6(void);
1848#else
1849DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1850{
1851 RTCCUINTREG uDR6;
1852# if RT_INLINE_ASM_USES_INTRIN
1853 uDR6 = __readdr(6);
1854# elif RT_INLINE_ASM_GNU_STYLE
1855# ifdef RT_ARCH_AMD64
1856 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1857# else
1858 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1859# endif
1860# else
1861 __asm
1862 {
1863# ifdef RT_ARCH_AMD64
1864 mov rax, dr6
1865 mov [uDR6], rax
1866# else
1867 mov eax, dr6
1868 mov [uDR6], eax
1869# endif
1870 }
1871# endif
1872 return uDR6;
1873}
1874#endif
1875
1876
1877/**
1878 * Reads and clears DR6.
1879 *
1880 * @returns DR6.
1881 */
1882#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1883DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1884#else
1885DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1886{
1887 RTCCUINTREG uDR6;
1888# if RT_INLINE_ASM_USES_INTRIN
1889 uDR6 = __readdr(6);
1890 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1891# elif RT_INLINE_ASM_GNU_STYLE
1892 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1893# ifdef RT_ARCH_AMD64
1894 __asm__ __volatile__("movq %%dr6, %0\n\t"
1895 "movq %1, %%dr6\n\t"
1896 : "=r" (uDR6)
1897 : "r" (uNewValue));
1898# else
1899 __asm__ __volatile__("movl %%dr6, %0\n\t"
1900 "movl %1, %%dr6\n\t"
1901 : "=r" (uDR6)
1902 : "r" (uNewValue));
1903# endif
1904# else
1905 __asm
1906 {
1907# ifdef RT_ARCH_AMD64
1908 mov rax, dr6
1909 mov [uDR6], rax
1910 mov rcx, rax
1911 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1912 mov dr6, rcx
1913# else
1914 mov eax, dr6
1915 mov [uDR6], eax
1916 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1917 mov dr6, ecx
1918# endif
1919 }
1920# endif
1921 return uDR6;
1922}
1923#endif
1924
1925
1926/**
1927 * Gets dr7.
1928 *
1929 * @returns dr7.
1930 */
1931#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1932DECLASM(RTCCUINTREG) ASMGetDR7(void);
1933#else
1934DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1935{
1936 RTCCUINTREG uDR7;
1937# if RT_INLINE_ASM_USES_INTRIN
1938 uDR7 = __readdr(7);
1939# elif RT_INLINE_ASM_GNU_STYLE
1940# ifdef RT_ARCH_AMD64
1941 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1942# else
1943 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1944# endif
1945# else
1946 __asm
1947 {
1948# ifdef RT_ARCH_AMD64
1949 mov rax, dr7
1950 mov [uDR7], rax
1951# else
1952 mov eax, dr7
1953 mov [uDR7], eax
1954# endif
1955 }
1956# endif
1957 return uDR7;
1958}
1959#endif
1960
1961
1962/**
1963 * Sets dr0.
1964 *
1965 * @param uDRVal Debug register value to write
1966 */
1967#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1968DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1969#else
1970DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1971{
1972# if RT_INLINE_ASM_USES_INTRIN
1973 __writedr(0, uDRVal);
1974# elif RT_INLINE_ASM_GNU_STYLE
1975# ifdef RT_ARCH_AMD64
1976 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1977# else
1978 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1979# endif
1980# else
1981 __asm
1982 {
1983# ifdef RT_ARCH_AMD64
1984 mov rax, [uDRVal]
1985 mov dr0, rax
1986# else
1987 mov eax, [uDRVal]
1988 mov dr0, eax
1989# endif
1990 }
1991# endif
1992}
1993#endif
1994
1995
1996/**
1997 * Sets dr1.
1998 *
1999 * @param uDRVal Debug register value to write
2000 */
2001#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2002DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
2003#else
2004DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
2005{
2006# if RT_INLINE_ASM_USES_INTRIN
2007 __writedr(1, uDRVal);
2008# elif RT_INLINE_ASM_GNU_STYLE
2009# ifdef RT_ARCH_AMD64
2010 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
2011# else
2012 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
2013# endif
2014# else
2015 __asm
2016 {
2017# ifdef RT_ARCH_AMD64
2018 mov rax, [uDRVal]
2019 mov dr1, rax
2020# else
2021 mov eax, [uDRVal]
2022 mov dr1, eax
2023# endif
2024 }
2025# endif
2026}
2027#endif
2028
2029
2030/**
2031 * Sets dr2.
2032 *
2033 * @param uDRVal Debug register value to write
2034 */
2035#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2036DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
2037#else
2038DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
2039{
2040# if RT_INLINE_ASM_USES_INTRIN
2041 __writedr(2, uDRVal);
2042# elif RT_INLINE_ASM_GNU_STYLE
2043# ifdef RT_ARCH_AMD64
2044 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
2045# else
2046 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2047# endif
2048# else
2049 __asm
2050 {
2051# ifdef RT_ARCH_AMD64
2052 mov rax, [uDRVal]
2053 mov dr2, rax
2054# else
2055 mov eax, [uDRVal]
2056 mov dr2, eax
2057# endif
2058 }
2059# endif
2060}
2061#endif
2062
2063
2064/**
2065 * Sets dr3.
2066 *
2067 * @param uDRVal Debug register value to write
2068 */
2069#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2070DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2071#else
2072DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2073{
2074# if RT_INLINE_ASM_USES_INTRIN
2075 __writedr(3, uDRVal);
2076# elif RT_INLINE_ASM_GNU_STYLE
2077# ifdef RT_ARCH_AMD64
2078 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2079# else
2080 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2081# endif
2082# else
2083 __asm
2084 {
2085# ifdef RT_ARCH_AMD64
2086 mov rax, [uDRVal]
2087 mov dr3, rax
2088# else
2089 mov eax, [uDRVal]
2090 mov dr3, eax
2091# endif
2092 }
2093# endif
2094}
2095#endif
2096
2097
2098/**
2099 * Sets dr6.
2100 *
2101 * @param uDRVal Debug register value to write
2102 */
2103#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2104DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2105#else
2106DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2107{
2108# if RT_INLINE_ASM_USES_INTRIN
2109 __writedr(6, uDRVal);
2110# elif RT_INLINE_ASM_GNU_STYLE
2111# ifdef RT_ARCH_AMD64
2112 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2113# else
2114 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2115# endif
2116# else
2117 __asm
2118 {
2119# ifdef RT_ARCH_AMD64
2120 mov rax, [uDRVal]
2121 mov dr6, rax
2122# else
2123 mov eax, [uDRVal]
2124 mov dr6, eax
2125# endif
2126 }
2127# endif
2128}
2129#endif
2130
2131
2132/**
2133 * Sets dr7.
2134 *
2135 * @param uDRVal Debug register value to write
2136 */
2137#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2138DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2139#else
2140DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2141{
2142# if RT_INLINE_ASM_USES_INTRIN
2143 __writedr(7, uDRVal);
2144# elif RT_INLINE_ASM_GNU_STYLE
2145# ifdef RT_ARCH_AMD64
2146 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2147# else
2148 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2149# endif
2150# else
2151 __asm
2152 {
2153# ifdef RT_ARCH_AMD64
2154 mov rax, [uDRVal]
2155 mov dr7, rax
2156# else
2157 mov eax, [uDRVal]
2158 mov dr7, eax
2159# endif
2160 }
2161# endif
2162}
2163#endif
2164
2165
2166/**
2167 * Compiler memory barrier.
2168 *
2169 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2170 * values or any outstanding writes when returning from this function.
2171 *
2172 * This function must be used if non-volatile data is modified by a
2173 * device or the VMM. Typical cases are port access, MMIO access,
2174 * trapping instruction, etc.
2175 */
2176#if RT_INLINE_ASM_GNU_STYLE
2177# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2178#elif RT_INLINE_ASM_USES_INTRIN
2179# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2180#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2181DECLINLINE(void) ASMCompilerBarrier(void)
2182{
2183 __asm
2184 {
2185 }
2186}
2187#endif
2188
2189
2190/**
2191 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2192 *
2193 * @param Port I/O port to write to.
2194 * @param u8 8-bit integer to write.
2195 */
2196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2197DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2198#else
2199DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2200{
2201# if RT_INLINE_ASM_GNU_STYLE
2202 __asm__ __volatile__("outb %b1, %w0\n\t"
2203 :: "Nd" (Port),
2204 "a" (u8));
2205
2206# elif RT_INLINE_ASM_USES_INTRIN
2207 __outbyte(Port, u8);
2208
2209# else
2210 __asm
2211 {
2212 mov dx, [Port]
2213 mov al, [u8]
2214 out dx, al
2215 }
2216# endif
2217}
2218#endif
2219
2220
2221/**
2222 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2223 *
2224 * @returns 8-bit integer.
2225 * @param Port I/O port to read from.
2226 */
2227#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2228DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2229#else
2230DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2231{
2232 uint8_t u8;
2233# if RT_INLINE_ASM_GNU_STYLE
2234 __asm__ __volatile__("inb %w1, %b0\n\t"
2235 : "=a" (u8)
2236 : "Nd" (Port));
2237
2238# elif RT_INLINE_ASM_USES_INTRIN
2239 u8 = __inbyte(Port);
2240
2241# else
2242 __asm
2243 {
2244 mov dx, [Port]
2245 in al, dx
2246 mov [u8], al
2247 }
2248# endif
2249 return u8;
2250}
2251#endif
2252
2253
2254/**
2255 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2256 *
2257 * @param Port I/O port to write to.
2258 * @param u16 16-bit integer to write.
2259 */
2260#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2261DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2262#else
2263DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2264{
2265# if RT_INLINE_ASM_GNU_STYLE
2266 __asm__ __volatile__("outw %w1, %w0\n\t"
2267 :: "Nd" (Port),
2268 "a" (u16));
2269
2270# elif RT_INLINE_ASM_USES_INTRIN
2271 __outword(Port, u16);
2272
2273# else
2274 __asm
2275 {
2276 mov dx, [Port]
2277 mov ax, [u16]
2278 out dx, ax
2279 }
2280# endif
2281}
2282#endif
2283
2284
2285/**
2286 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2287 *
2288 * @returns 16-bit integer.
2289 * @param Port I/O port to read from.
2290 */
2291#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2292DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2293#else
2294DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2295{
2296 uint16_t u16;
2297# if RT_INLINE_ASM_GNU_STYLE
2298 __asm__ __volatile__("inw %w1, %w0\n\t"
2299 : "=a" (u16)
2300 : "Nd" (Port));
2301
2302# elif RT_INLINE_ASM_USES_INTRIN
2303 u16 = __inword(Port);
2304
2305# else
2306 __asm
2307 {
2308 mov dx, [Port]
2309 in ax, dx
2310 mov [u16], ax
2311 }
2312# endif
2313 return u16;
2314}
2315#endif
2316
2317
2318/**
2319 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2320 *
2321 * @param Port I/O port to write to.
2322 * @param u32 32-bit integer to write.
2323 */
2324#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2325DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2326#else
2327DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2328{
2329# if RT_INLINE_ASM_GNU_STYLE
2330 __asm__ __volatile__("outl %1, %w0\n\t"
2331 :: "Nd" (Port),
2332 "a" (u32));
2333
2334# elif RT_INLINE_ASM_USES_INTRIN
2335 __outdword(Port, u32);
2336
2337# else
2338 __asm
2339 {
2340 mov dx, [Port]
2341 mov eax, [u32]
2342 out dx, eax
2343 }
2344# endif
2345}
2346#endif
2347
2348
2349/**
2350 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2351 *
2352 * @returns 32-bit integer.
2353 * @param Port I/O port to read from.
2354 */
2355#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2356DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2357#else
2358DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2359{
2360 uint32_t u32;
2361# if RT_INLINE_ASM_GNU_STYLE
2362 __asm__ __volatile__("inl %w1, %0\n\t"
2363 : "=a" (u32)
2364 : "Nd" (Port));
2365
2366# elif RT_INLINE_ASM_USES_INTRIN
2367 u32 = __indword(Port);
2368
2369# else
2370 __asm
2371 {
2372 mov dx, [Port]
2373 in eax, dx
2374 mov [u32], eax
2375 }
2376# endif
2377 return u32;
2378}
2379#endif
2380
2381
2382/**
2383 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2384 *
2385 * @param Port I/O port to write to.
2386 * @param pau8 Pointer to the string buffer.
2387 * @param c The number of items to write.
2388 */
2389#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2390DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2391#else
2392DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2393{
2394# if RT_INLINE_ASM_GNU_STYLE
2395 __asm__ __volatile__("rep; outsb\n\t"
2396 : "+S" (pau8),
2397 "+c" (c)
2398 : "d" (Port));
2399
2400# elif RT_INLINE_ASM_USES_INTRIN
2401 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2402
2403# else
2404 __asm
2405 {
2406 mov dx, [Port]
2407 mov ecx, [c]
2408 mov eax, [pau8]
2409 xchg esi, eax
2410 rep outsb
2411 xchg esi, eax
2412 }
2413# endif
2414}
2415#endif
2416
2417
2418/**
2419 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2420 *
2421 * @param Port I/O port to read from.
2422 * @param pau8 Pointer to the string buffer (output).
2423 * @param c The number of items to read.
2424 */
2425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2426DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2427#else
2428DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2429{
2430# if RT_INLINE_ASM_GNU_STYLE
2431 __asm__ __volatile__("rep; insb\n\t"
2432 : "+D" (pau8),
2433 "+c" (c)
2434 : "d" (Port));
2435
2436# elif RT_INLINE_ASM_USES_INTRIN
2437 __inbytestring(Port, pau8, (unsigned long)c);
2438
2439# else
2440 __asm
2441 {
2442 mov dx, [Port]
2443 mov ecx, [c]
2444 mov eax, [pau8]
2445 xchg edi, eax
2446 rep insb
2447 xchg edi, eax
2448 }
2449# endif
2450}
2451#endif
2452
2453
2454/**
2455 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2456 *
2457 * @param Port I/O port to write to.
2458 * @param pau16 Pointer to the string buffer.
2459 * @param c The number of items to write.
2460 */
2461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2462DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2463#else
2464DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2465{
2466# if RT_INLINE_ASM_GNU_STYLE
2467 __asm__ __volatile__("rep; outsw\n\t"
2468 : "+S" (pau16),
2469 "+c" (c)
2470 : "d" (Port));
2471
2472# elif RT_INLINE_ASM_USES_INTRIN
2473 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2474
2475# else
2476 __asm
2477 {
2478 mov dx, [Port]
2479 mov ecx, [c]
2480 mov eax, [pau16]
2481 xchg esi, eax
2482 rep outsw
2483 xchg esi, eax
2484 }
2485# endif
2486}
2487#endif
2488
2489
2490/**
2491 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2492 *
2493 * @param Port I/O port to read from.
2494 * @param pau16 Pointer to the string buffer (output).
2495 * @param c The number of items to read.
2496 */
2497#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2498DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2499#else
2500DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2501{
2502# if RT_INLINE_ASM_GNU_STYLE
2503 __asm__ __volatile__("rep; insw\n\t"
2504 : "+D" (pau16),
2505 "+c" (c)
2506 : "d" (Port));
2507
2508# elif RT_INLINE_ASM_USES_INTRIN
2509 __inwordstring(Port, pau16, (unsigned long)c);
2510
2511# else
2512 __asm
2513 {
2514 mov dx, [Port]
2515 mov ecx, [c]
2516 mov eax, [pau16]
2517 xchg edi, eax
2518 rep insw
2519 xchg edi, eax
2520 }
2521# endif
2522}
2523#endif
2524
2525
2526/**
2527 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2528 *
2529 * @param Port I/O port to write to.
2530 * @param pau32 Pointer to the string buffer.
2531 * @param c The number of items to write.
2532 */
2533#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2534DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2535#else
2536DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2537{
2538# if RT_INLINE_ASM_GNU_STYLE
2539 __asm__ __volatile__("rep; outsl\n\t"
2540 : "+S" (pau32),
2541 "+c" (c)
2542 : "d" (Port));
2543
2544# elif RT_INLINE_ASM_USES_INTRIN
2545 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2546
2547# else
2548 __asm
2549 {
2550 mov dx, [Port]
2551 mov ecx, [c]
2552 mov eax, [pau32]
2553 xchg esi, eax
2554 rep outsd
2555 xchg esi, eax
2556 }
2557# endif
2558}
2559#endif
2560
2561
2562/**
2563 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2564 *
2565 * @param Port I/O port to read from.
2566 * @param pau32 Pointer to the string buffer (output).
2567 * @param c The number of items to read.
2568 */
2569#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2570DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2571#else
2572DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2573{
2574# if RT_INLINE_ASM_GNU_STYLE
2575 __asm__ __volatile__("rep; insl\n\t"
2576 : "+D" (pau32),
2577 "+c" (c)
2578 : "d" (Port));
2579
2580# elif RT_INLINE_ASM_USES_INTRIN
2581 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2582
2583# else
2584 __asm
2585 {
2586 mov dx, [Port]
2587 mov ecx, [c]
2588 mov eax, [pau32]
2589 xchg edi, eax
2590 rep insd
2591 xchg edi, eax
2592 }
2593# endif
2594}
2595#endif
2596
2597
2598/**
2599 * Atomically Exchange an unsigned 8-bit value, ordered.
2600 *
2601 * @returns Current *pu8 value
2602 * @param pu8 Pointer to the 8-bit variable to update.
2603 * @param u8 The 8-bit value to assign to *pu8.
2604 */
2605#if RT_INLINE_ASM_EXTERNAL
2606DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2607#else
2608DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2609{
2610# if RT_INLINE_ASM_GNU_STYLE
2611 __asm__ __volatile__("xchgb %0, %1\n\t"
2612 : "=m" (*pu8),
2613 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2614 : "1" (u8),
2615 "m" (*pu8));
2616# else
2617 __asm
2618 {
2619# ifdef RT_ARCH_AMD64
2620 mov rdx, [pu8]
2621 mov al, [u8]
2622 xchg [rdx], al
2623 mov [u8], al
2624# else
2625 mov edx, [pu8]
2626 mov al, [u8]
2627 xchg [edx], al
2628 mov [u8], al
2629# endif
2630 }
2631# endif
2632 return u8;
2633}
2634#endif
2635
2636
2637/**
2638 * Atomically Exchange a signed 8-bit value, ordered.
2639 *
2640 * @returns Current *pu8 value
2641 * @param pi8 Pointer to the 8-bit variable to update.
2642 * @param i8 The 8-bit value to assign to *pi8.
2643 */
2644DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2645{
2646 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2647}
2648
2649
2650/**
2651 * Atomically Exchange a bool value, ordered.
2652 *
2653 * @returns Current *pf value
2654 * @param pf Pointer to the 8-bit variable to update.
2655 * @param f The 8-bit value to assign to *pi8.
2656 */
2657DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2658{
2659#ifdef _MSC_VER
2660 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2661#else
2662 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2663#endif
2664}
2665
2666
2667/**
2668 * Atomically Exchange an unsigned 16-bit value, ordered.
2669 *
2670 * @returns Current *pu16 value
2671 * @param pu16 Pointer to the 16-bit variable to update.
2672 * @param u16 The 16-bit value to assign to *pu16.
2673 */
2674#if RT_INLINE_ASM_EXTERNAL
2675DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2676#else
2677DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2678{
2679# if RT_INLINE_ASM_GNU_STYLE
2680 __asm__ __volatile__("xchgw %0, %1\n\t"
2681 : "=m" (*pu16),
2682 "=r" (u16)
2683 : "1" (u16),
2684 "m" (*pu16));
2685# else
2686 __asm
2687 {
2688# ifdef RT_ARCH_AMD64
2689 mov rdx, [pu16]
2690 mov ax, [u16]
2691 xchg [rdx], ax
2692 mov [u16], ax
2693# else
2694 mov edx, [pu16]
2695 mov ax, [u16]
2696 xchg [edx], ax
2697 mov [u16], ax
2698# endif
2699 }
2700# endif
2701 return u16;
2702}
2703#endif
2704
2705
2706/**
2707 * Atomically Exchange a signed 16-bit value, ordered.
2708 *
2709 * @returns Current *pu16 value
2710 * @param pi16 Pointer to the 16-bit variable to update.
2711 * @param i16 The 16-bit value to assign to *pi16.
2712 */
2713DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2714{
2715 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2716}
2717
2718
2719/**
2720 * Atomically Exchange an unsigned 32-bit value, ordered.
2721 *
2722 * @returns Current *pu32 value
2723 * @param pu32 Pointer to the 32-bit variable to update.
2724 * @param u32 The 32-bit value to assign to *pu32.
2725 */
2726#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2727DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2728#else
2729DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2730{
2731# if RT_INLINE_ASM_GNU_STYLE
2732 __asm__ __volatile__("xchgl %0, %1\n\t"
2733 : "=m" (*pu32),
2734 "=r" (u32)
2735 : "1" (u32),
2736 "m" (*pu32));
2737
2738# elif RT_INLINE_ASM_USES_INTRIN
2739 u32 = _InterlockedExchange((long *)pu32, u32);
2740
2741# else
2742 __asm
2743 {
2744# ifdef RT_ARCH_AMD64
2745 mov rdx, [pu32]
2746 mov eax, u32
2747 xchg [rdx], eax
2748 mov [u32], eax
2749# else
2750 mov edx, [pu32]
2751 mov eax, u32
2752 xchg [edx], eax
2753 mov [u32], eax
2754# endif
2755 }
2756# endif
2757 return u32;
2758}
2759#endif
2760
2761
2762/**
2763 * Atomically Exchange a signed 32-bit value, ordered.
2764 *
2765 * @returns Current *pu32 value
2766 * @param pi32 Pointer to the 32-bit variable to update.
2767 * @param i32 The 32-bit value to assign to *pi32.
2768 */
2769DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2770{
2771 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2772}
2773
2774
2775/**
2776 * Atomically Exchange an unsigned 64-bit value, ordered.
2777 *
2778 * @returns Current *pu64 value
2779 * @param pu64 Pointer to the 64-bit variable to update.
2780 * @param u64 The 64-bit value to assign to *pu64.
2781 */
2782#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2783DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2784#else
2785DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2786{
2787# if defined(RT_ARCH_AMD64)
2788# if RT_INLINE_ASM_USES_INTRIN
2789 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2790
2791# elif RT_INLINE_ASM_GNU_STYLE
2792 __asm__ __volatile__("xchgq %0, %1\n\t"
2793 : "=m" (*pu64),
2794 "=r" (u64)
2795 : "1" (u64),
2796 "m" (*pu64));
2797# else
2798 __asm
2799 {
2800 mov rdx, [pu64]
2801 mov rax, [u64]
2802 xchg [rdx], rax
2803 mov [u64], rax
2804 }
2805# endif
2806# else /* !RT_ARCH_AMD64 */
2807# if RT_INLINE_ASM_GNU_STYLE
2808# if defined(PIC) || defined(__PIC__)
2809 uint32_t u32EBX = (uint32_t)u64;
2810 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2811 "xchgl %%ebx, %3\n\t"
2812 "1:\n\t"
2813 "lock; cmpxchg8b (%5)\n\t"
2814 "jnz 1b\n\t"
2815 "movl %3, %%ebx\n\t"
2816 /*"xchgl %%esi, %5\n\t"*/
2817 : "=A" (u64),
2818 "=m" (*pu64)
2819 : "0" (*pu64),
2820 "m" ( u32EBX ),
2821 "c" ( (uint32_t)(u64 >> 32) ),
2822 "S" (pu64));
2823# else /* !PIC */
2824 __asm__ __volatile__("1:\n\t"
2825 "lock; cmpxchg8b %1\n\t"
2826 "jnz 1b\n\t"
2827 : "=A" (u64),
2828 "=m" (*pu64)
2829 : "0" (*pu64),
2830 "b" ( (uint32_t)u64 ),
2831 "c" ( (uint32_t)(u64 >> 32) ));
2832# endif
2833# else
2834 __asm
2835 {
2836 mov ebx, dword ptr [u64]
2837 mov ecx, dword ptr [u64 + 4]
2838 mov edi, pu64
2839 mov eax, dword ptr [edi]
2840 mov edx, dword ptr [edi + 4]
2841 retry:
2842 lock cmpxchg8b [edi]
2843 jnz retry
2844 mov dword ptr [u64], eax
2845 mov dword ptr [u64 + 4], edx
2846 }
2847# endif
2848# endif /* !RT_ARCH_AMD64 */
2849 return u64;
2850}
2851#endif
2852
2853
2854/**
2855 * Atomically Exchange an signed 64-bit value, ordered.
2856 *
2857 * @returns Current *pi64 value
2858 * @param pi64 Pointer to the 64-bit variable to update.
2859 * @param i64 The 64-bit value to assign to *pi64.
2860 */
2861DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2862{
2863 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2864}
2865
2866
2867/**
2868 * Atomically Exchange a pointer value, ordered.
2869 *
2870 * @returns Current *ppv value
2871 * @param ppv Pointer to the pointer variable to update.
2872 * @param pv The pointer value to assign to *ppv.
2873 */
2874DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2875{
2876#if ARCH_BITS == 32
2877 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2878#elif ARCH_BITS == 64
2879 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2880#else
2881# error "ARCH_BITS is bogus"
2882#endif
2883}
2884
2885
2886/**
2887 * Atomically Exchange a raw-mode context pointer value, ordered.
2888 *
2889 * @returns Current *ppv value
2890 * @param ppvRC Pointer to the pointer variable to update.
2891 * @param pvRC The pointer value to assign to *ppv.
2892 */
2893DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2894{
2895 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2896}
2897
2898
2899/**
2900 * Atomically Exchange a ring-0 pointer value, ordered.
2901 *
2902 * @returns Current *ppv value
2903 * @param ppvR0 Pointer to the pointer variable to update.
2904 * @param pvR0 The pointer value to assign to *ppv.
2905 */
2906DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2907{
2908#if R0_ARCH_BITS == 32
2909 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2910#elif R0_ARCH_BITS == 64
2911 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2912#else
2913# error "R0_ARCH_BITS is bogus"
2914#endif
2915}
2916
2917
2918/**
2919 * Atomically Exchange a ring-3 pointer value, ordered.
2920 *
2921 * @returns Current *ppv value
2922 * @param ppvR3 Pointer to the pointer variable to update.
2923 * @param pvR3 The pointer value to assign to *ppv.
2924 */
2925DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2926{
2927#if R3_ARCH_BITS == 32
2928 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2929#elif R3_ARCH_BITS == 64
2930 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2931#else
2932# error "R3_ARCH_BITS is bogus"
2933#endif
2934}
2935
2936
2937/** @def ASMAtomicXchgHandle
2938 * Atomically Exchange a typical IPRT handle value, ordered.
2939 *
2940 * @param ph Pointer to the value to update.
2941 * @param hNew The new value to assigned to *pu.
2942 * @param phRes Where to store the current *ph value.
2943 *
2944 * @remarks This doesn't currently work for all handles (like RTFILE).
2945 */
2946#if HC_ARCH_BITS == 32
2947# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2948 do { \
2949 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2950 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2951 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2952 } while (0)
2953#elif HC_ARCH_BITS == 64
2954# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2955 do { \
2956 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2957 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2958 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2959 } while (0)
2960#else
2961# error HC_ARCH_BITS
2962#endif
2963
2964
2965/**
2966 * Atomically Exchange a value which size might differ
2967 * between platforms or compilers, ordered.
2968 *
2969 * @param pu Pointer to the variable to update.
2970 * @param uNew The value to assign to *pu.
2971 * @todo This is busted as its missing the result argument.
2972 */
2973#define ASMAtomicXchgSize(pu, uNew) \
2974 do { \
2975 switch (sizeof(*(pu))) { \
2976 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2977 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2978 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2979 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2980 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2981 } \
2982 } while (0)
2983
2984/**
2985 * Atomically Exchange a value which size might differ
2986 * between platforms or compilers, ordered.
2987 *
2988 * @param pu Pointer to the variable to update.
2989 * @param uNew The value to assign to *pu.
2990 * @param puRes Where to store the current *pu value.
2991 */
2992#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2993 do { \
2994 switch (sizeof(*(pu))) { \
2995 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2996 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2997 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2998 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2999 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3000 } \
3001 } while (0)
3002
3003
3004/**
3005 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3006 *
3007 * @returns true if xchg was done.
3008 * @returns false if xchg wasn't done.
3009 *
3010 * @param pu32 Pointer to the value to update.
3011 * @param u32New The new value to assigned to *pu32.
3012 * @param u32Old The old value to *pu32 compare with.
3013 */
3014#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3015DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3016#else
3017DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3018{
3019# if RT_INLINE_ASM_GNU_STYLE
3020 uint8_t u8Ret;
3021 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3022 "setz %1\n\t"
3023 : "=m" (*pu32),
3024 "=qm" (u8Ret),
3025 "=a" (u32Old)
3026 : "r" (u32New),
3027 "2" (u32Old),
3028 "m" (*pu32));
3029 return (bool)u8Ret;
3030
3031# elif RT_INLINE_ASM_USES_INTRIN
3032 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3033
3034# else
3035 uint32_t u32Ret;
3036 __asm
3037 {
3038# ifdef RT_ARCH_AMD64
3039 mov rdx, [pu32]
3040# else
3041 mov edx, [pu32]
3042# endif
3043 mov eax, [u32Old]
3044 mov ecx, [u32New]
3045# ifdef RT_ARCH_AMD64
3046 lock cmpxchg [rdx], ecx
3047# else
3048 lock cmpxchg [edx], ecx
3049# endif
3050 setz al
3051 movzx eax, al
3052 mov [u32Ret], eax
3053 }
3054 return !!u32Ret;
3055# endif
3056}
3057#endif
3058
3059
3060/**
3061 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3062 *
3063 * @returns true if xchg was done.
3064 * @returns false if xchg wasn't done.
3065 *
3066 * @param pi32 Pointer to the value to update.
3067 * @param i32New The new value to assigned to *pi32.
3068 * @param i32Old The old value to *pi32 compare with.
3069 */
3070DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3071{
3072 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3073}
3074
3075
3076/**
3077 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3078 *
3079 * @returns true if xchg was done.
3080 * @returns false if xchg wasn't done.
3081 *
3082 * @param pu64 Pointer to the 64-bit variable to update.
3083 * @param u64New The 64-bit value to assign to *pu64.
3084 * @param u64Old The value to compare with.
3085 */
3086#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3087 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
3088DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3089#else
3090DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3091{
3092# if RT_INLINE_ASM_USES_INTRIN
3093 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3094
3095# elif defined(RT_ARCH_AMD64)
3096# if RT_INLINE_ASM_GNU_STYLE
3097 uint8_t u8Ret;
3098 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3099 "setz %1\n\t"
3100 : "=m" (*pu64),
3101 "=qm" (u8Ret),
3102 "=a" (u64Old)
3103 : "r" (u64New),
3104 "2" (u64Old),
3105 "m" (*pu64));
3106 return (bool)u8Ret;
3107# else
3108 bool fRet;
3109 __asm
3110 {
3111 mov rdx, [pu32]
3112 mov rax, [u64Old]
3113 mov rcx, [u64New]
3114 lock cmpxchg [rdx], rcx
3115 setz al
3116 mov [fRet], al
3117 }
3118 return fRet;
3119# endif
3120# else /* !RT_ARCH_AMD64 */
3121 uint32_t u32Ret;
3122# if RT_INLINE_ASM_GNU_STYLE
3123# if defined(PIC) || defined(__PIC__)
3124 uint32_t u32EBX = (uint32_t)u64New;
3125 uint32_t u32Spill;
3126 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3127 "lock; cmpxchg8b (%6)\n\t"
3128 "setz %%al\n\t"
3129 "movl %4, %%ebx\n\t"
3130 "movzbl %%al, %%eax\n\t"
3131 : "=a" (u32Ret),
3132 "=d" (u32Spill),
3133# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3134 "+m" (*pu64)
3135# else
3136 "=m" (*pu64)
3137# endif
3138 : "A" (u64Old),
3139 "m" ( u32EBX ),
3140 "c" ( (uint32_t)(u64New >> 32) ),
3141 "S" (pu64));
3142# else /* !PIC */
3143 uint32_t u32Spill;
3144 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3145 "setz %%al\n\t"
3146 "movzbl %%al, %%eax\n\t"
3147 : "=a" (u32Ret),
3148 "=d" (u32Spill),
3149 "+m" (*pu64)
3150 : "A" (u64Old),
3151 "b" ( (uint32_t)u64New ),
3152 "c" ( (uint32_t)(u64New >> 32) ));
3153# endif
3154 return (bool)u32Ret;
3155# else
3156 __asm
3157 {
3158 mov ebx, dword ptr [u64New]
3159 mov ecx, dword ptr [u64New + 4]
3160 mov edi, [pu64]
3161 mov eax, dword ptr [u64Old]
3162 mov edx, dword ptr [u64Old + 4]
3163 lock cmpxchg8b [edi]
3164 setz al
3165 movzx eax, al
3166 mov dword ptr [u32Ret], eax
3167 }
3168 return !!u32Ret;
3169# endif
3170# endif /* !RT_ARCH_AMD64 */
3171}
3172#endif
3173
3174
3175/**
3176 * Atomically Compare and exchange a signed 64-bit value, ordered.
3177 *
3178 * @returns true if xchg was done.
3179 * @returns false if xchg wasn't done.
3180 *
3181 * @param pi64 Pointer to the 64-bit variable to update.
3182 * @param i64 The 64-bit value to assign to *pu64.
3183 * @param i64Old The value to compare with.
3184 */
3185DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3186{
3187 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3188}
3189
3190
3191/**
3192 * Atomically Compare and Exchange a pointer value, ordered.
3193 *
3194 * @returns true if xchg was done.
3195 * @returns false if xchg wasn't done.
3196 *
3197 * @param ppv Pointer to the value to update.
3198 * @param pvNew The new value to assigned to *ppv.
3199 * @param pvOld The old value to *ppv compare with.
3200 */
3201DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3202{
3203#if ARCH_BITS == 32
3204 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3205#elif ARCH_BITS == 64
3206 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3207#else
3208# error "ARCH_BITS is bogus"
3209#endif
3210}
3211
3212
3213/** @def ASMAtomicCmpXchgHandle
3214 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3215 *
3216 * @param ph Pointer to the value to update.
3217 * @param hNew The new value to assigned to *pu.
3218 * @param hOld The old value to *pu compare with.
3219 * @param fRc Where to store the result.
3220 *
3221 * @remarks This doesn't currently work for all handles (like RTFILE).
3222 */
3223#if HC_ARCH_BITS == 32
3224# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3225 do { \
3226 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3227 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
3228 } while (0)
3229#elif HC_ARCH_BITS == 64
3230# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3231 do { \
3232 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3233 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
3234 } while (0)
3235#else
3236# error HC_ARCH_BITS
3237#endif
3238
3239
3240/** @def ASMAtomicCmpXchgSize
3241 * Atomically Compare and Exchange a value which size might differ
3242 * between platforms or compilers, ordered.
3243 *
3244 * @param pu Pointer to the value to update.
3245 * @param uNew The new value to assigned to *pu.
3246 * @param uOld The old value to *pu compare with.
3247 * @param fRc Where to store the result.
3248 */
3249#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3250 do { \
3251 switch (sizeof(*(pu))) { \
3252 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3253 break; \
3254 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3255 break; \
3256 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3257 (fRc) = false; \
3258 break; \
3259 } \
3260 } while (0)
3261
3262
3263/**
3264 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3265 * passes back old value, ordered.
3266 *
3267 * @returns true if xchg was done.
3268 * @returns false if xchg wasn't done.
3269 *
3270 * @param pu32 Pointer to the value to update.
3271 * @param u32New The new value to assigned to *pu32.
3272 * @param u32Old The old value to *pu32 compare with.
3273 * @param pu32Old Pointer store the old value at.
3274 */
3275#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3276DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3277#else
3278DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3279{
3280# if RT_INLINE_ASM_GNU_STYLE
3281 uint8_t u8Ret;
3282 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3283 "setz %1\n\t"
3284 : "=m" (*pu32),
3285 "=qm" (u8Ret),
3286 "=a" (*pu32Old)
3287 : "r" (u32New),
3288 "a" (u32Old),
3289 "m" (*pu32));
3290 return (bool)u8Ret;
3291
3292# elif RT_INLINE_ASM_USES_INTRIN
3293 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3294
3295# else
3296 uint32_t u32Ret;
3297 __asm
3298 {
3299# ifdef RT_ARCH_AMD64
3300 mov rdx, [pu32]
3301# else
3302 mov edx, [pu32]
3303# endif
3304 mov eax, [u32Old]
3305 mov ecx, [u32New]
3306# ifdef RT_ARCH_AMD64
3307 lock cmpxchg [rdx], ecx
3308 mov rdx, [pu32Old]
3309 mov [rdx], eax
3310# else
3311 lock cmpxchg [edx], ecx
3312 mov edx, [pu32Old]
3313 mov [edx], eax
3314# endif
3315 setz al
3316 movzx eax, al
3317 mov [u32Ret], eax
3318 }
3319 return !!u32Ret;
3320# endif
3321}
3322#endif
3323
3324
3325/**
3326 * Atomically Compare and Exchange a signed 32-bit value, additionally
3327 * passes back old value, ordered.
3328 *
3329 * @returns true if xchg was done.
3330 * @returns false if xchg wasn't done.
3331 *
3332 * @param pi32 Pointer to the value to update.
3333 * @param i32New The new value to assigned to *pi32.
3334 * @param i32Old The old value to *pi32 compare with.
3335 * @param pi32Old Pointer store the old value at.
3336 */
3337DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3338{
3339 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3340}
3341
3342
3343/**
3344 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3345 * passing back old value, ordered.
3346 *
3347 * @returns true if xchg was done.
3348 * @returns false if xchg wasn't done.
3349 *
3350 * @param pu64 Pointer to the 64-bit variable to update.
3351 * @param u64New The 64-bit value to assign to *pu64.
3352 * @param u64Old The value to compare with.
3353 * @param pu64Old Pointer store the old value at.
3354 */
3355#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3356DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3357#else
3358DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3359{
3360# if RT_INLINE_ASM_USES_INTRIN
3361 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3362
3363# elif defined(RT_ARCH_AMD64)
3364# if RT_INLINE_ASM_GNU_STYLE
3365 uint8_t u8Ret;
3366 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3367 "setz %1\n\t"
3368 : "=m" (*pu64),
3369 "=qm" (u8Ret),
3370 "=a" (*pu64Old)
3371 : "r" (u64New),
3372 "a" (u64Old),
3373 "m" (*pu64));
3374 return (bool)u8Ret;
3375# else
3376 bool fRet;
3377 __asm
3378 {
3379 mov rdx, [pu32]
3380 mov rax, [u64Old]
3381 mov rcx, [u64New]
3382 lock cmpxchg [rdx], rcx
3383 mov rdx, [pu64Old]
3384 mov [rdx], rax
3385 setz al
3386 mov [fRet], al
3387 }
3388 return fRet;
3389# endif
3390# else /* !RT_ARCH_AMD64 */
3391# if RT_INLINE_ASM_GNU_STYLE
3392 uint64_t u64Ret;
3393# if defined(PIC) || defined(__PIC__)
3394 /* NB: this code uses a memory clobber description, because the clean
3395 * solution with an output value for *pu64 makes gcc run out of registers.
3396 * This will cause suboptimal code, and anyone with a better solution is
3397 * welcome to improve this. */
3398 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3399 "lock; cmpxchg8b %3\n\t"
3400 "xchgl %%ebx, %1\n\t"
3401 : "=A" (u64Ret)
3402 : "DS" ((uint32_t)u64New),
3403 "c" ((uint32_t)(u64New >> 32)),
3404 "m" (*pu64),
3405 "0" (u64Old)
3406 : "memory" );
3407# else /* !PIC */
3408 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3409 : "=A" (u64Ret),
3410 "=m" (*pu64)
3411 : "b" ((uint32_t)u64New),
3412 "c" ((uint32_t)(u64New >> 32)),
3413 "m" (*pu64),
3414 "0" (u64Old));
3415# endif
3416 *pu64Old = u64Ret;
3417 return u64Ret == u64Old;
3418# else
3419 uint32_t u32Ret;
3420 __asm
3421 {
3422 mov ebx, dword ptr [u64New]
3423 mov ecx, dword ptr [u64New + 4]
3424 mov edi, [pu64]
3425 mov eax, dword ptr [u64Old]
3426 mov edx, dword ptr [u64Old + 4]
3427 lock cmpxchg8b [edi]
3428 mov ebx, [pu64Old]
3429 mov [ebx], eax
3430 setz al
3431 movzx eax, al
3432 add ebx, 4
3433 mov [ebx], edx
3434 mov dword ptr [u32Ret], eax
3435 }
3436 return !!u32Ret;
3437# endif
3438# endif /* !RT_ARCH_AMD64 */
3439}
3440#endif
3441
3442
3443/**
3444 * Atomically Compare and exchange a signed 64-bit value, additionally
3445 * passing back old value, ordered.
3446 *
3447 * @returns true if xchg was done.
3448 * @returns false if xchg wasn't done.
3449 *
3450 * @param pi64 Pointer to the 64-bit variable to update.
3451 * @param i64 The 64-bit value to assign to *pu64.
3452 * @param i64Old The value to compare with.
3453 * @param pi64Old Pointer store the old value at.
3454 */
3455DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3456{
3457 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3458}
3459
3460/** @def ASMAtomicCmpXchgExHandle
3461 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3462 *
3463 * @param ph Pointer to the value to update.
3464 * @param hNew The new value to assigned to *pu.
3465 * @param hOld The old value to *pu compare with.
3466 * @param fRc Where to store the result.
3467 * @param phOldVal Pointer to where to store the old value.
3468 *
3469 * @remarks This doesn't currently work for all handles (like RTFILE).
3470 */
3471#if HC_ARCH_BITS == 32
3472# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3473 do { \
3474 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
3475 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
3476 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3477 } while (0)
3478#elif HC_ARCH_BITS == 64
3479# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3480 do { \
3481 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3482 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
3483 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3484 } while (0)
3485#else
3486# error HC_ARCH_BITS
3487#endif
3488
3489
3490/** @def ASMAtomicCmpXchgExSize
3491 * Atomically Compare and Exchange a value which size might differ
3492 * between platforms or compilers. Additionally passes back old value.
3493 *
3494 * @param pu Pointer to the value to update.
3495 * @param uNew The new value to assigned to *pu.
3496 * @param uOld The old value to *pu compare with.
3497 * @param fRc Where to store the result.
3498 * @param puOldVal Pointer to where to store the old value.
3499 */
3500#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3501 do { \
3502 switch (sizeof(*(pu))) { \
3503 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3504 break; \
3505 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3506 break; \
3507 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3508 (fRc) = false; \
3509 (uOldVal) = 0; \
3510 break; \
3511 } \
3512 } while (0)
3513
3514
3515/**
3516 * Atomically Compare and Exchange a pointer value, additionally
3517 * passing back old value, ordered.
3518 *
3519 * @returns true if xchg was done.
3520 * @returns false if xchg wasn't done.
3521 *
3522 * @param ppv Pointer to the value to update.
3523 * @param pvNew The new value to assigned to *ppv.
3524 * @param pvOld The old value to *ppv compare with.
3525 * @param ppvOld Pointer store the old value at.
3526 */
3527DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3528{
3529#if ARCH_BITS == 32
3530 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3531#elif ARCH_BITS == 64
3532 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3533#else
3534# error "ARCH_BITS is bogus"
3535#endif
3536}
3537
3538
3539/**
3540 * Atomically exchanges and adds to a 32-bit value, ordered.
3541 *
3542 * @returns The old value.
3543 * @param pu32 Pointer to the value.
3544 * @param u32 Number to add.
3545 */
3546#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3547DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3548#else
3549DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3550{
3551# if RT_INLINE_ASM_USES_INTRIN
3552 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3553 return u32;
3554
3555# elif RT_INLINE_ASM_GNU_STYLE
3556 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3557 : "=r" (u32),
3558 "=m" (*pu32)
3559 : "0" (u32),
3560 "m" (*pu32)
3561 : "memory");
3562 return u32;
3563# else
3564 __asm
3565 {
3566 mov eax, [u32]
3567# ifdef RT_ARCH_AMD64
3568 mov rdx, [pu32]
3569 lock xadd [rdx], eax
3570# else
3571 mov edx, [pu32]
3572 lock xadd [edx], eax
3573# endif
3574 mov [u32], eax
3575 }
3576 return u32;
3577# endif
3578}
3579#endif
3580
3581
3582/**
3583 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3584 *
3585 * @returns The old value.
3586 * @param pi32 Pointer to the value.
3587 * @param i32 Number to add.
3588 */
3589DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3590{
3591 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3592}
3593
3594
3595/**
3596 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3597 *
3598 * @returns The old value.
3599 * @param pu32 Pointer to the value.
3600 * @param u32 Number to subtract.
3601 */
3602DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3603{
3604 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3605}
3606
3607
3608/**
3609 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3610 *
3611 * @returns The old value.
3612 * @param pi32 Pointer to the value.
3613 * @param i32 Number to subtract.
3614 */
3615DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3616{
3617 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3618}
3619
3620
3621/**
3622 * Atomically increment a 32-bit value, ordered.
3623 *
3624 * @returns The new value.
3625 * @param pu32 Pointer to the value to increment.
3626 */
3627#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3628DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3629#else
3630DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3631{
3632 uint32_t u32;
3633# if RT_INLINE_ASM_USES_INTRIN
3634 u32 = _InterlockedIncrement((long *)pu32);
3635 return u32;
3636
3637# elif RT_INLINE_ASM_GNU_STYLE
3638 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3639 : "=r" (u32),
3640 "=m" (*pu32)
3641 : "0" (1),
3642 "m" (*pu32)
3643 : "memory");
3644 return u32+1;
3645# else
3646 __asm
3647 {
3648 mov eax, 1
3649# ifdef RT_ARCH_AMD64
3650 mov rdx, [pu32]
3651 lock xadd [rdx], eax
3652# else
3653 mov edx, [pu32]
3654 lock xadd [edx], eax
3655# endif
3656 mov u32, eax
3657 }
3658 return u32+1;
3659# endif
3660}
3661#endif
3662
3663
3664/**
3665 * Atomically increment a signed 32-bit value, ordered.
3666 *
3667 * @returns The new value.
3668 * @param pi32 Pointer to the value to increment.
3669 */
3670DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3671{
3672 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3673}
3674
3675
3676/**
3677 * Atomically decrement an unsigned 32-bit value, ordered.
3678 *
3679 * @returns The new value.
3680 * @param pu32 Pointer to the value to decrement.
3681 */
3682#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3683DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3684#else
3685DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3686{
3687 uint32_t u32;
3688# if RT_INLINE_ASM_USES_INTRIN
3689 u32 = _InterlockedDecrement((long *)pu32);
3690 return u32;
3691
3692# elif RT_INLINE_ASM_GNU_STYLE
3693 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3694 : "=r" (u32),
3695 "=m" (*pu32)
3696 : "0" (-1),
3697 "m" (*pu32)
3698 : "memory");
3699 return u32-1;
3700# else
3701 __asm
3702 {
3703 mov eax, -1
3704# ifdef RT_ARCH_AMD64
3705 mov rdx, [pu32]
3706 lock xadd [rdx], eax
3707# else
3708 mov edx, [pu32]
3709 lock xadd [edx], eax
3710# endif
3711 mov u32, eax
3712 }
3713 return u32-1;
3714# endif
3715}
3716#endif
3717
3718
3719/**
3720 * Atomically decrement a signed 32-bit value, ordered.
3721 *
3722 * @returns The new value.
3723 * @param pi32 Pointer to the value to decrement.
3724 */
3725DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3726{
3727 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3728}
3729
3730
3731/**
3732 * Atomically Or an unsigned 32-bit value, ordered.
3733 *
3734 * @param pu32 Pointer to the pointer variable to OR u32 with.
3735 * @param u32 The value to OR *pu32 with.
3736 */
3737#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3738DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3739#else
3740DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3741{
3742# if RT_INLINE_ASM_USES_INTRIN
3743 _InterlockedOr((long volatile *)pu32, (long)u32);
3744
3745# elif RT_INLINE_ASM_GNU_STYLE
3746 __asm__ __volatile__("lock; orl %1, %0\n\t"
3747 : "=m" (*pu32)
3748 : "ir" (u32),
3749 "m" (*pu32));
3750# else
3751 __asm
3752 {
3753 mov eax, [u32]
3754# ifdef RT_ARCH_AMD64
3755 mov rdx, [pu32]
3756 lock or [rdx], eax
3757# else
3758 mov edx, [pu32]
3759 lock or [edx], eax
3760# endif
3761 }
3762# endif
3763}
3764#endif
3765
3766
3767/**
3768 * Atomically Or a signed 32-bit value, ordered.
3769 *
3770 * @param pi32 Pointer to the pointer variable to OR u32 with.
3771 * @param i32 The value to OR *pu32 with.
3772 */
3773DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3774{
3775 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3776}
3777
3778
3779/**
3780 * Atomically And an unsigned 32-bit value, ordered.
3781 *
3782 * @param pu32 Pointer to the pointer variable to AND u32 with.
3783 * @param u32 The value to AND *pu32 with.
3784 */
3785#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3786DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3787#else
3788DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3789{
3790# if RT_INLINE_ASM_USES_INTRIN
3791 _InterlockedAnd((long volatile *)pu32, u32);
3792
3793# elif RT_INLINE_ASM_GNU_STYLE
3794 __asm__ __volatile__("lock; andl %1, %0\n\t"
3795 : "=m" (*pu32)
3796 : "ir" (u32),
3797 "m" (*pu32));
3798# else
3799 __asm
3800 {
3801 mov eax, [u32]
3802# ifdef RT_ARCH_AMD64
3803 mov rdx, [pu32]
3804 lock and [rdx], eax
3805# else
3806 mov edx, [pu32]
3807 lock and [edx], eax
3808# endif
3809 }
3810# endif
3811}
3812#endif
3813
3814
3815/**
3816 * Atomically And a signed 32-bit value, ordered.
3817 *
3818 * @param pi32 Pointer to the pointer variable to AND i32 with.
3819 * @param i32 The value to AND *pi32 with.
3820 */
3821DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3822{
3823 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3824}
3825
3826
3827/**
3828 * Serialize Instruction.
3829 */
3830#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3831DECLASM(void) ASMSerializeInstruction(void);
3832#else
3833DECLINLINE(void) ASMSerializeInstruction(void)
3834{
3835# if RT_INLINE_ASM_GNU_STYLE
3836 RTCCUINTREG xAX = 0;
3837# ifdef RT_ARCH_AMD64
3838 __asm__ ("cpuid"
3839 : "=a" (xAX)
3840 : "0" (xAX)
3841 : "rbx", "rcx", "rdx");
3842# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
3843 __asm__ ("push %%ebx\n\t"
3844 "cpuid\n\t"
3845 "pop %%ebx\n\t"
3846 : "=a" (xAX)
3847 : "0" (xAX)
3848 : "ecx", "edx");
3849# else
3850 __asm__ ("cpuid"
3851 : "=a" (xAX)
3852 : "0" (xAX)
3853 : "ebx", "ecx", "edx");
3854# endif
3855
3856# elif RT_INLINE_ASM_USES_INTRIN
3857 int aInfo[4];
3858 __cpuid(aInfo, 0);
3859
3860# else
3861 __asm
3862 {
3863 push ebx
3864 xor eax, eax
3865 cpuid
3866 pop ebx
3867 }
3868# endif
3869}
3870#endif
3871
3872
3873/**
3874 * Memory load/store fence, waits for any pending writes and reads to complete.
3875 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
3876 */
3877DECLINLINE(void) ASMMemoryFenceSSE2(void)
3878{
3879#if RT_INLINE_ASM_GNU_STYLE
3880 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
3881#elif RT_INLINE_ASM_USES_INTRIN
3882 _mm_mfence();
3883#else
3884 __asm
3885 {
3886 _emit 0x0f
3887 _emit 0xae
3888 _emit 0xf0
3889 }
3890#endif
3891}
3892
3893
3894/**
3895 * Memory store fence, waits for any writes to complete.
3896 * Requires the X86_CPUID_FEATURE_EDX_SSE CPUID bit set.
3897 */
3898DECLINLINE(void) ASMWriteFenceSSE(void)
3899{
3900#if RT_INLINE_ASM_GNU_STYLE
3901 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
3902#elif RT_INLINE_ASM_USES_INTRIN
3903 _mm_sfence();
3904#else
3905 __asm
3906 {
3907 _emit 0x0f
3908 _emit 0xae
3909 _emit 0xf8
3910 }
3911#endif
3912}
3913
3914
3915/**
3916 * Memory load fence, waits for any pending reads to complete.
3917 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
3918 */
3919DECLINLINE(void) ASMReadFenceSSE2(void)
3920{
3921#if RT_INLINE_ASM_GNU_STYLE
3922 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
3923#elif RT_INLINE_ASM_USES_INTRIN
3924 _mm_lfence();
3925#else
3926 __asm
3927 {
3928 _emit 0x0f
3929 _emit 0xae
3930 _emit 0xe8
3931 }
3932#endif
3933}
3934
3935
3936/**
3937 * Memory fence, waits for any pending writes and reads to complete.
3938 */
3939DECLINLINE(void) ASMMemoryFence(void)
3940{
3941 /** @todo use mfence? check if all cpus we care for support it. */
3942 uint32_t volatile u32;
3943 ASMAtomicXchgU32(&u32, 0);
3944}
3945
3946
3947/**
3948 * Write fence, waits for any pending writes to complete.
3949 */
3950DECLINLINE(void) ASMWriteFence(void)
3951{
3952 /** @todo use sfence? check if all cpus we care for support it. */
3953 ASMMemoryFence();
3954}
3955
3956
3957/**
3958 * Read fence, waits for any pending reads to complete.
3959 */
3960DECLINLINE(void) ASMReadFence(void)
3961{
3962 /** @todo use lfence? check if all cpus we care for support it. */
3963 ASMMemoryFence();
3964}
3965
3966
3967/**
3968 * Atomically reads an unsigned 8-bit value, ordered.
3969 *
3970 * @returns Current *pu8 value
3971 * @param pu8 Pointer to the 8-bit variable to read.
3972 */
3973DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3974{
3975 ASMMemoryFence();
3976 return *pu8; /* byte reads are atomic on x86 */
3977}
3978
3979
3980/**
3981 * Atomically reads an unsigned 8-bit value, unordered.
3982 *
3983 * @returns Current *pu8 value
3984 * @param pu8 Pointer to the 8-bit variable to read.
3985 */
3986DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3987{
3988 return *pu8; /* byte reads are atomic on x86 */
3989}
3990
3991
3992/**
3993 * Atomically reads a signed 8-bit value, ordered.
3994 *
3995 * @returns Current *pi8 value
3996 * @param pi8 Pointer to the 8-bit variable to read.
3997 */
3998DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3999{
4000 ASMMemoryFence();
4001 return *pi8; /* byte reads are atomic on x86 */
4002}
4003
4004
4005/**
4006 * Atomically reads a signed 8-bit value, unordered.
4007 *
4008 * @returns Current *pi8 value
4009 * @param pi8 Pointer to the 8-bit variable to read.
4010 */
4011DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
4012{
4013 return *pi8; /* byte reads are atomic on x86 */
4014}
4015
4016
4017/**
4018 * Atomically reads an unsigned 16-bit value, ordered.
4019 *
4020 * @returns Current *pu16 value
4021 * @param pu16 Pointer to the 16-bit variable to read.
4022 */
4023DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
4024{
4025 ASMMemoryFence();
4026 Assert(!((uintptr_t)pu16 & 1));
4027 return *pu16;
4028}
4029
4030
4031/**
4032 * Atomically reads an unsigned 16-bit value, unordered.
4033 *
4034 * @returns Current *pu16 value
4035 * @param pu16 Pointer to the 16-bit variable to read.
4036 */
4037DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
4038{
4039 Assert(!((uintptr_t)pu16 & 1));
4040 return *pu16;
4041}
4042
4043
4044/**
4045 * Atomically reads a signed 16-bit value, ordered.
4046 *
4047 * @returns Current *pi16 value
4048 * @param pi16 Pointer to the 16-bit variable to read.
4049 */
4050DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
4051{
4052 ASMMemoryFence();
4053 Assert(!((uintptr_t)pi16 & 1));
4054 return *pi16;
4055}
4056
4057
4058/**
4059 * Atomically reads a signed 16-bit value, unordered.
4060 *
4061 * @returns Current *pi16 value
4062 * @param pi16 Pointer to the 16-bit variable to read.
4063 */
4064DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
4065{
4066 Assert(!((uintptr_t)pi16 & 1));
4067 return *pi16;
4068}
4069
4070
4071/**
4072 * Atomically reads an unsigned 32-bit value, ordered.
4073 *
4074 * @returns Current *pu32 value
4075 * @param pu32 Pointer to the 32-bit variable to read.
4076 */
4077DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
4078{
4079 ASMMemoryFence();
4080 Assert(!((uintptr_t)pu32 & 3));
4081 return *pu32;
4082}
4083
4084
4085/**
4086 * Atomically reads an unsigned 32-bit value, unordered.
4087 *
4088 * @returns Current *pu32 value
4089 * @param pu32 Pointer to the 32-bit variable to read.
4090 */
4091DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
4092{
4093 Assert(!((uintptr_t)pu32 & 3));
4094 return *pu32;
4095}
4096
4097
4098/**
4099 * Atomically reads a signed 32-bit value, ordered.
4100 *
4101 * @returns Current *pi32 value
4102 * @param pi32 Pointer to the 32-bit variable to read.
4103 */
4104DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
4105{
4106 ASMMemoryFence();
4107 Assert(!((uintptr_t)pi32 & 3));
4108 return *pi32;
4109}
4110
4111
4112/**
4113 * Atomically reads a signed 32-bit value, unordered.
4114 *
4115 * @returns Current *pi32 value
4116 * @param pi32 Pointer to the 32-bit variable to read.
4117 */
4118DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4119{
4120 Assert(!((uintptr_t)pi32 & 3));
4121 return *pi32;
4122}
4123
4124
4125/**
4126 * Atomically reads an unsigned 64-bit value, ordered.
4127 *
4128 * @returns Current *pu64 value
4129 * @param pu64 Pointer to the 64-bit variable to read.
4130 * The memory pointed to must be writable.
4131 * @remark This will fault if the memory is read-only!
4132 */
4133#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
4134 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
4135DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4136#else
4137DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4138{
4139 uint64_t u64;
4140# ifdef RT_ARCH_AMD64
4141 Assert(!((uintptr_t)pu64 & 7));
4142/*# if RT_INLINE_ASM_GNU_STYLE
4143 __asm__ __volatile__( "mfence\n\t"
4144 "movq %1, %0\n\t"
4145 : "=r" (u64)
4146 : "m" (*pu64));
4147# else
4148 __asm
4149 {
4150 mfence
4151 mov rdx, [pu64]
4152 mov rax, [rdx]
4153 mov [u64], rax
4154 }
4155# endif*/
4156 ASMMemoryFence();
4157 u64 = *pu64;
4158# else /* !RT_ARCH_AMD64 */
4159# if RT_INLINE_ASM_GNU_STYLE
4160# if defined(PIC) || defined(__PIC__)
4161 uint32_t u32EBX = 0;
4162 Assert(!((uintptr_t)pu64 & 7));
4163 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4164 "lock; cmpxchg8b (%5)\n\t"
4165 "movl %3, %%ebx\n\t"
4166 : "=A" (u64),
4167# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4168 "+m" (*pu64)
4169# else
4170 "=m" (*pu64)
4171# endif
4172 : "0" (0),
4173 "m" (u32EBX),
4174 "c" (0),
4175 "S" (pu64));
4176# else /* !PIC */
4177 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4178 : "=A" (u64),
4179 "+m" (*pu64)
4180 : "0" (0),
4181 "b" (0),
4182 "c" (0));
4183# endif
4184# else
4185 Assert(!((uintptr_t)pu64 & 7));
4186 __asm
4187 {
4188 xor eax, eax
4189 xor edx, edx
4190 mov edi, pu64
4191 xor ecx, ecx
4192 xor ebx, ebx
4193 lock cmpxchg8b [edi]
4194 mov dword ptr [u64], eax
4195 mov dword ptr [u64 + 4], edx
4196 }
4197# endif
4198# endif /* !RT_ARCH_AMD64 */
4199 return u64;
4200}
4201#endif
4202
4203
4204/**
4205 * Atomically reads an unsigned 64-bit value, unordered.
4206 *
4207 * @returns Current *pu64 value
4208 * @param pu64 Pointer to the 64-bit variable to read.
4209 * The memory pointed to must be writable.
4210 * @remark This will fault if the memory is read-only!
4211 */
4212#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4213DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4214#else
4215DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4216{
4217 uint64_t u64;
4218# ifdef RT_ARCH_AMD64
4219 Assert(!((uintptr_t)pu64 & 7));
4220/*# if RT_INLINE_ASM_GNU_STYLE
4221 Assert(!((uintptr_t)pu64 & 7));
4222 __asm__ __volatile__("movq %1, %0\n\t"
4223 : "=r" (u64)
4224 : "m" (*pu64));
4225# else
4226 __asm
4227 {
4228 mov rdx, [pu64]
4229 mov rax, [rdx]
4230 mov [u64], rax
4231 }
4232# endif */
4233 u64 = *pu64;
4234# else /* !RT_ARCH_AMD64 */
4235# if RT_INLINE_ASM_GNU_STYLE
4236# if defined(PIC) || defined(__PIC__)
4237 uint32_t u32EBX = 0;
4238 uint32_t u32Spill;
4239 Assert(!((uintptr_t)pu64 & 7));
4240 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4241 "xor %%ecx,%%ecx\n\t"
4242 "xor %%edx,%%edx\n\t"
4243 "xchgl %%ebx, %3\n\t"
4244 "lock; cmpxchg8b (%4)\n\t"
4245 "movl %3, %%ebx\n\t"
4246 : "=A" (u64),
4247# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4248 "+m" (*pu64),
4249# else
4250 "=m" (*pu64),
4251# endif
4252 "=c" (u32Spill)
4253 : "m" (u32EBX),
4254 "S" (pu64));
4255# else /* !PIC */
4256 __asm__ __volatile__("cmpxchg8b %1\n\t"
4257 : "=A" (u64),
4258 "+m" (*pu64)
4259 : "0" (0),
4260 "b" (0),
4261 "c" (0));
4262# endif
4263# else
4264 Assert(!((uintptr_t)pu64 & 7));
4265 __asm
4266 {
4267 xor eax, eax
4268 xor edx, edx
4269 mov edi, pu64
4270 xor ecx, ecx
4271 xor ebx, ebx
4272 lock cmpxchg8b [edi]
4273 mov dword ptr [u64], eax
4274 mov dword ptr [u64 + 4], edx
4275 }
4276# endif
4277# endif /* !RT_ARCH_AMD64 */
4278 return u64;
4279}
4280#endif
4281
4282
4283/**
4284 * Atomically reads a signed 64-bit value, ordered.
4285 *
4286 * @returns Current *pi64 value
4287 * @param pi64 Pointer to the 64-bit variable to read.
4288 * The memory pointed to must be writable.
4289 * @remark This will fault if the memory is read-only!
4290 */
4291DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4292{
4293 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4294}
4295
4296
4297/**
4298 * Atomically reads a signed 64-bit value, unordered.
4299 *
4300 * @returns Current *pi64 value
4301 * @param pi64 Pointer to the 64-bit variable to read.
4302 * The memory pointed to must be writable.
4303 * @remark This will fault if the memory is read-only!
4304 */
4305DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4306{
4307 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4308}
4309
4310
4311/**
4312 * Atomically reads a pointer value, ordered.
4313 *
4314 * @returns Current *pv value
4315 * @param ppv Pointer to the pointer variable to read.
4316 */
4317DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4318{
4319#if ARCH_BITS == 32
4320 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4321#elif ARCH_BITS == 64
4322 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4323#else
4324# error "ARCH_BITS is bogus"
4325#endif
4326}
4327
4328
4329/**
4330 * Atomically reads a pointer value, unordered.
4331 *
4332 * @returns Current *pv value
4333 * @param ppv Pointer to the pointer variable to read.
4334 */
4335DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4336{
4337#if ARCH_BITS == 32
4338 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4339#elif ARCH_BITS == 64
4340 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4341#else
4342# error "ARCH_BITS is bogus"
4343#endif
4344}
4345
4346
4347/**
4348 * Atomically reads a boolean value, ordered.
4349 *
4350 * @returns Current *pf value
4351 * @param pf Pointer to the boolean variable to read.
4352 */
4353DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4354{
4355 ASMMemoryFence();
4356 return *pf; /* byte reads are atomic on x86 */
4357}
4358
4359
4360/**
4361 * Atomically reads a boolean value, unordered.
4362 *
4363 * @returns Current *pf value
4364 * @param pf Pointer to the boolean variable to read.
4365 */
4366DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4367{
4368 return *pf; /* byte reads are atomic on x86 */
4369}
4370
4371
4372/**
4373 * Atomically read a typical IPRT handle value, ordered.
4374 *
4375 * @param ph Pointer to the handle variable to read.
4376 * @param phRes Where to store the result.
4377 *
4378 * @remarks This doesn't currently work for all handles (like RTFILE).
4379 */
4380#if HC_ARCH_BITS == 32
4381# define ASMAtomicReadHandle(ph, phRes) \
4382 do { \
4383 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4384 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4385 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
4386 } while (0)
4387#elif HC_ARCH_BITS == 64
4388# define ASMAtomicReadHandle(ph, phRes) \
4389 do { \
4390 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4391 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4392 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
4393 } while (0)
4394#else
4395# error HC_ARCH_BITS
4396#endif
4397
4398
4399/**
4400 * Atomically read a typical IPRT handle value, unordered.
4401 *
4402 * @param ph Pointer to the handle variable to read.
4403 * @param phRes Where to store the result.
4404 *
4405 * @remarks This doesn't currently work for all handles (like RTFILE).
4406 */
4407#if HC_ARCH_BITS == 32
4408# define ASMAtomicUoReadHandle(ph, phRes) \
4409 do { \
4410 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4411 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4412 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
4413 } while (0)
4414#elif HC_ARCH_BITS == 64
4415# define ASMAtomicUoReadHandle(ph, phRes) \
4416 do { \
4417 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4418 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4419 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
4420 } while (0)
4421#else
4422# error HC_ARCH_BITS
4423#endif
4424
4425
4426/**
4427 * Atomically read a value which size might differ
4428 * between platforms or compilers, ordered.
4429 *
4430 * @param pu Pointer to the variable to update.
4431 * @param puRes Where to store the result.
4432 */
4433#define ASMAtomicReadSize(pu, puRes) \
4434 do { \
4435 switch (sizeof(*(pu))) { \
4436 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4437 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4438 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4439 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4440 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4441 } \
4442 } while (0)
4443
4444
4445/**
4446 * Atomically read a value which size might differ
4447 * between platforms or compilers, unordered.
4448 *
4449 * @param pu Pointer to the variable to read.
4450 * @param puRes Where to store the result.
4451 */
4452#define ASMAtomicUoReadSize(pu, puRes) \
4453 do { \
4454 switch (sizeof(*(pu))) { \
4455 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4456 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4457 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4458 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4459 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4460 } \
4461 } while (0)
4462
4463
4464/**
4465 * Atomically writes an unsigned 8-bit value, ordered.
4466 *
4467 * @param pu8 Pointer to the 8-bit variable.
4468 * @param u8 The 8-bit value to assign to *pu8.
4469 */
4470DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4471{
4472 ASMAtomicXchgU8(pu8, u8);
4473}
4474
4475
4476/**
4477 * Atomically writes an unsigned 8-bit value, unordered.
4478 *
4479 * @param pu8 Pointer to the 8-bit variable.
4480 * @param u8 The 8-bit value to assign to *pu8.
4481 */
4482DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4483{
4484 *pu8 = u8; /* byte writes are atomic on x86 */
4485}
4486
4487
4488/**
4489 * Atomically writes a signed 8-bit value, ordered.
4490 *
4491 * @param pi8 Pointer to the 8-bit variable to read.
4492 * @param i8 The 8-bit value to assign to *pi8.
4493 */
4494DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4495{
4496 ASMAtomicXchgS8(pi8, i8);
4497}
4498
4499
4500/**
4501 * Atomically writes a signed 8-bit value, unordered.
4502 *
4503 * @param pi8 Pointer to the 8-bit variable to read.
4504 * @param i8 The 8-bit value to assign to *pi8.
4505 */
4506DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4507{
4508 *pi8 = i8; /* byte writes are atomic on x86 */
4509}
4510
4511
4512/**
4513 * Atomically writes an unsigned 16-bit value, ordered.
4514 *
4515 * @param pu16 Pointer to the 16-bit variable.
4516 * @param u16 The 16-bit value to assign to *pu16.
4517 */
4518DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4519{
4520 ASMAtomicXchgU16(pu16, u16);
4521}
4522
4523
4524/**
4525 * Atomically writes an unsigned 16-bit value, unordered.
4526 *
4527 * @param pu16 Pointer to the 16-bit variable.
4528 * @param u16 The 16-bit value to assign to *pu16.
4529 */
4530DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4531{
4532 Assert(!((uintptr_t)pu16 & 1));
4533 *pu16 = u16;
4534}
4535
4536
4537/**
4538 * Atomically writes a signed 16-bit value, ordered.
4539 *
4540 * @param pi16 Pointer to the 16-bit variable to read.
4541 * @param i16 The 16-bit value to assign to *pi16.
4542 */
4543DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4544{
4545 ASMAtomicXchgS16(pi16, i16);
4546}
4547
4548
4549/**
4550 * Atomically writes a signed 16-bit value, unordered.
4551 *
4552 * @param pi16 Pointer to the 16-bit variable to read.
4553 * @param i16 The 16-bit value to assign to *pi16.
4554 */
4555DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4556{
4557 Assert(!((uintptr_t)pi16 & 1));
4558 *pi16 = i16;
4559}
4560
4561
4562/**
4563 * Atomically writes an unsigned 32-bit value, ordered.
4564 *
4565 * @param pu32 Pointer to the 32-bit variable.
4566 * @param u32 The 32-bit value to assign to *pu32.
4567 */
4568DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4569{
4570 ASMAtomicXchgU32(pu32, u32);
4571}
4572
4573
4574/**
4575 * Atomically writes an unsigned 32-bit value, unordered.
4576 *
4577 * @param pu32 Pointer to the 32-bit variable.
4578 * @param u32 The 32-bit value to assign to *pu32.
4579 */
4580DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4581{
4582 Assert(!((uintptr_t)pu32 & 3));
4583 *pu32 = u32;
4584}
4585
4586
4587/**
4588 * Atomically writes a signed 32-bit value, ordered.
4589 *
4590 * @param pi32 Pointer to the 32-bit variable to read.
4591 * @param i32 The 32-bit value to assign to *pi32.
4592 */
4593DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4594{
4595 ASMAtomicXchgS32(pi32, i32);
4596}
4597
4598
4599/**
4600 * Atomically writes a signed 32-bit value, unordered.
4601 *
4602 * @param pi32 Pointer to the 32-bit variable to read.
4603 * @param i32 The 32-bit value to assign to *pi32.
4604 */
4605DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4606{
4607 Assert(!((uintptr_t)pi32 & 3));
4608 *pi32 = i32;
4609}
4610
4611
4612/**
4613 * Atomically writes an unsigned 64-bit value, ordered.
4614 *
4615 * @param pu64 Pointer to the 64-bit variable.
4616 * @param u64 The 64-bit value to assign to *pu64.
4617 */
4618DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4619{
4620 ASMAtomicXchgU64(pu64, u64);
4621}
4622
4623
4624/**
4625 * Atomically writes an unsigned 64-bit value, unordered.
4626 *
4627 * @param pu64 Pointer to the 64-bit variable.
4628 * @param u64 The 64-bit value to assign to *pu64.
4629 */
4630DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4631{
4632 Assert(!((uintptr_t)pu64 & 7));
4633#if ARCH_BITS == 64
4634 *pu64 = u64;
4635#else
4636 ASMAtomicXchgU64(pu64, u64);
4637#endif
4638}
4639
4640
4641/**
4642 * Atomically writes a signed 64-bit value, ordered.
4643 *
4644 * @param pi64 Pointer to the 64-bit variable.
4645 * @param i64 The 64-bit value to assign to *pi64.
4646 */
4647DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4648{
4649 ASMAtomicXchgS64(pi64, i64);
4650}
4651
4652
4653/**
4654 * Atomically writes a signed 64-bit value, unordered.
4655 *
4656 * @param pi64 Pointer to the 64-bit variable.
4657 * @param i64 The 64-bit value to assign to *pi64.
4658 */
4659DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4660{
4661 Assert(!((uintptr_t)pi64 & 7));
4662#if ARCH_BITS == 64
4663 *pi64 = i64;
4664#else
4665 ASMAtomicXchgS64(pi64, i64);
4666#endif
4667}
4668
4669
4670/**
4671 * Atomically writes a boolean value, unordered.
4672 *
4673 * @param pf Pointer to the boolean variable.
4674 * @param f The boolean value to assign to *pf.
4675 */
4676DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4677{
4678 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4679}
4680
4681
4682/**
4683 * Atomically writes a boolean value, unordered.
4684 *
4685 * @param pf Pointer to the boolean variable.
4686 * @param f The boolean value to assign to *pf.
4687 */
4688DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4689{
4690 *pf = f; /* byte writes are atomic on x86 */
4691}
4692
4693
4694/**
4695 * Atomically writes a pointer value, ordered.
4696 *
4697 * @returns Current *pv value
4698 * @param ppv Pointer to the pointer variable.
4699 * @param pv The pointer value to assigne to *ppv.
4700 */
4701DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4702{
4703#if ARCH_BITS == 32
4704 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4705#elif ARCH_BITS == 64
4706 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4707#else
4708# error "ARCH_BITS is bogus"
4709#endif
4710}
4711
4712
4713/**
4714 * Atomically writes a pointer value, unordered.
4715 *
4716 * @returns Current *pv value
4717 * @param ppv Pointer to the pointer variable.
4718 * @param pv The pointer value to assigne to *ppv.
4719 */
4720DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4721{
4722#if ARCH_BITS == 32
4723 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4724#elif ARCH_BITS == 64
4725 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4726#else
4727# error "ARCH_BITS is bogus"
4728#endif
4729}
4730
4731
4732/**
4733 * Atomically write a typical IPRT handle value, ordered.
4734 *
4735 * @param ph Pointer to the variable to update.
4736 * @param hNew The value to assign to *ph.
4737 *
4738 * @remarks This doesn't currently work for all handles (like RTFILE).
4739 */
4740#if HC_ARCH_BITS == 32
4741# define ASMAtomicWriteHandle(ph, hNew) \
4742 do { \
4743 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4744 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
4745 } while (0)
4746#elif HC_ARCH_BITS == 64
4747# define ASMAtomicWriteHandle(ph, hNew) \
4748 do { \
4749 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4750 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
4751 } while (0)
4752#else
4753# error HC_ARCH_BITS
4754#endif
4755
4756
4757/**
4758 * Atomically write a typical IPRT handle value, unordered.
4759 *
4760 * @param ph Pointer to the variable to update.
4761 * @param hNew The value to assign to *ph.
4762 *
4763 * @remarks This doesn't currently work for all handles (like RTFILE).
4764 */
4765#if HC_ARCH_BITS == 32
4766# define ASMAtomicUoWriteHandle(ph, hNew) \
4767 do { \
4768 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4769 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
4770 } while (0)
4771#elif HC_ARCH_BITS == 64
4772# define ASMAtomicUoWriteHandle(ph, hNew) \
4773 do { \
4774 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4775 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
4776 } while (0)
4777#else
4778# error HC_ARCH_BITS
4779#endif
4780
4781
4782/**
4783 * Atomically write a value which size might differ
4784 * between platforms or compilers, ordered.
4785 *
4786 * @param pu Pointer to the variable to update.
4787 * @param uNew The value to assign to *pu.
4788 */
4789#define ASMAtomicWriteSize(pu, uNew) \
4790 do { \
4791 switch (sizeof(*(pu))) { \
4792 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4793 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4794 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4795 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4796 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4797 } \
4798 } while (0)
4799
4800/**
4801 * Atomically write a value which size might differ
4802 * between platforms or compilers, unordered.
4803 *
4804 * @param pu Pointer to the variable to update.
4805 * @param uNew The value to assign to *pu.
4806 */
4807#define ASMAtomicUoWriteSize(pu, uNew) \
4808 do { \
4809 switch (sizeof(*(pu))) { \
4810 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4811 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4812 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4813 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4814 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4815 } \
4816 } while (0)
4817
4818
4819
4820
4821/**
4822 * Invalidate page.
4823 *
4824 * @param pv Address of the page to invalidate.
4825 */
4826#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4827DECLASM(void) ASMInvalidatePage(void *pv);
4828#else
4829DECLINLINE(void) ASMInvalidatePage(void *pv)
4830{
4831# if RT_INLINE_ASM_USES_INTRIN
4832 __invlpg(pv);
4833
4834# elif RT_INLINE_ASM_GNU_STYLE
4835 __asm__ __volatile__("invlpg %0\n\t"
4836 : : "m" (*(uint8_t *)pv));
4837# else
4838 __asm
4839 {
4840# ifdef RT_ARCH_AMD64
4841 mov rax, [pv]
4842 invlpg [rax]
4843# else
4844 mov eax, [pv]
4845 invlpg [eax]
4846# endif
4847 }
4848# endif
4849}
4850#endif
4851
4852
4853/**
4854 * Write back the internal caches and invalidate them.
4855 */
4856#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4857DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4858#else
4859DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4860{
4861# if RT_INLINE_ASM_USES_INTRIN
4862 __wbinvd();
4863
4864# elif RT_INLINE_ASM_GNU_STYLE
4865 __asm__ __volatile__("wbinvd");
4866# else
4867 __asm
4868 {
4869 wbinvd
4870 }
4871# endif
4872}
4873#endif
4874
4875
4876/**
4877 * Invalidate internal and (perhaps) external caches without first
4878 * flushing dirty cache lines. Use with extreme care.
4879 */
4880#if RT_INLINE_ASM_EXTERNAL
4881DECLASM(void) ASMInvalidateInternalCaches(void);
4882#else
4883DECLINLINE(void) ASMInvalidateInternalCaches(void)
4884{
4885# if RT_INLINE_ASM_GNU_STYLE
4886 __asm__ __volatile__("invd");
4887# else
4888 __asm
4889 {
4890 invd
4891 }
4892# endif
4893}
4894#endif
4895
4896
4897#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4898# if PAGE_SIZE != 0x1000
4899# error "PAGE_SIZE is not 0x1000!"
4900# endif
4901#endif
4902
4903/**
4904 * Zeros a 4K memory page.
4905 *
4906 * @param pv Pointer to the memory block. This must be page aligned.
4907 */
4908#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4909DECLASM(void) ASMMemZeroPage(volatile void *pv);
4910# else
4911DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4912{
4913# if RT_INLINE_ASM_USES_INTRIN
4914# ifdef RT_ARCH_AMD64
4915 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4916# else
4917 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4918# endif
4919
4920# elif RT_INLINE_ASM_GNU_STYLE
4921 RTCCUINTREG uDummy;
4922# ifdef RT_ARCH_AMD64
4923 __asm__ __volatile__("rep stosq"
4924 : "=D" (pv),
4925 "=c" (uDummy)
4926 : "0" (pv),
4927 "c" (0x1000 >> 3),
4928 "a" (0)
4929 : "memory");
4930# else
4931 __asm__ __volatile__("rep stosl"
4932 : "=D" (pv),
4933 "=c" (uDummy)
4934 : "0" (pv),
4935 "c" (0x1000 >> 2),
4936 "a" (0)
4937 : "memory");
4938# endif
4939# else
4940 __asm
4941 {
4942# ifdef RT_ARCH_AMD64
4943 xor rax, rax
4944 mov ecx, 0200h
4945 mov rdi, [pv]
4946 rep stosq
4947# else
4948 xor eax, eax
4949 mov ecx, 0400h
4950 mov edi, [pv]
4951 rep stosd
4952# endif
4953 }
4954# endif
4955}
4956# endif
4957
4958
4959/**
4960 * Zeros a memory block with a 32-bit aligned size.
4961 *
4962 * @param pv Pointer to the memory block.
4963 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4964 */
4965#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4966DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4967#else
4968DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4969{
4970# if RT_INLINE_ASM_USES_INTRIN
4971# ifdef RT_ARCH_AMD64
4972 if (!(cb & 7))
4973 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4974 else
4975# endif
4976 __stosd((unsigned long *)pv, 0, cb / 4);
4977
4978# elif RT_INLINE_ASM_GNU_STYLE
4979 __asm__ __volatile__("rep stosl"
4980 : "=D" (pv),
4981 "=c" (cb)
4982 : "0" (pv),
4983 "1" (cb >> 2),
4984 "a" (0)
4985 : "memory");
4986# else
4987 __asm
4988 {
4989 xor eax, eax
4990# ifdef RT_ARCH_AMD64
4991 mov rcx, [cb]
4992 shr rcx, 2
4993 mov rdi, [pv]
4994# else
4995 mov ecx, [cb]
4996 shr ecx, 2
4997 mov edi, [pv]
4998# endif
4999 rep stosd
5000 }
5001# endif
5002}
5003#endif
5004
5005
5006/**
5007 * Fills a memory block with a 32-bit aligned size.
5008 *
5009 * @param pv Pointer to the memory block.
5010 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5011 * @param u32 The value to fill with.
5012 */
5013#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5014DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
5015#else
5016DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
5017{
5018# if RT_INLINE_ASM_USES_INTRIN
5019# ifdef RT_ARCH_AMD64
5020 if (!(cb & 7))
5021 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5022 else
5023# endif
5024 __stosd((unsigned long *)pv, u32, cb / 4);
5025
5026# elif RT_INLINE_ASM_GNU_STYLE
5027 __asm__ __volatile__("rep stosl"
5028 : "=D" (pv),
5029 "=c" (cb)
5030 : "0" (pv),
5031 "1" (cb >> 2),
5032 "a" (u32)
5033 : "memory");
5034# else
5035 __asm
5036 {
5037# ifdef RT_ARCH_AMD64
5038 mov rcx, [cb]
5039 shr rcx, 2
5040 mov rdi, [pv]
5041# else
5042 mov ecx, [cb]
5043 shr ecx, 2
5044 mov edi, [pv]
5045# endif
5046 mov eax, [u32]
5047 rep stosd
5048 }
5049# endif
5050}
5051#endif
5052
5053
5054/**
5055 * Checks if a memory page is all zeros.
5056 *
5057 * @returns true / false.
5058 *
5059 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5060 * boundrary
5061 */
5062DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
5063{
5064# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5065 union { RTCCUINTREG r; bool f; } uAX;
5066 RTCCUINTREG xCX, xDI;
5067 Assert(!((uintptr_t)pvPage & 15));
5068 __asm__ __volatile__("repe; "
5069# ifdef RT_ARCH_AMD64
5070 "scasq\n\t"
5071# else
5072 "scasl\n\t"
5073# endif
5074 "setnc %%al\n\t"
5075 : "=&c" (xCX),
5076 "=&D" (xDI),
5077 "=&a" (uAX.r)
5078 : "mr" (pvPage),
5079# ifdef RT_ARCH_AMD64
5080 "0" (0x1000/8),
5081# else
5082 "0" (0x1000/4),
5083# endif
5084 "1" (pvPage),
5085 "2" (0));
5086 return uAX.f;
5087# else
5088 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
5089 int cLeft = 0x1000 / sizeof(uintptr_t) / 8;
5090 Assert(!((uintptr_t)pvPage & 15));
5091 for (;;)
5092 {
5093 if (puPtr[0]) return false;
5094 if (puPtr[4]) return false;
5095
5096 if (puPtr[2]) return false;
5097 if (puPtr[6]) return false;
5098
5099 if (puPtr[1]) return false;
5100 if (puPtr[5]) return false;
5101
5102 if (puPtr[3]) return false;
5103 if (puPtr[7]) return false;
5104
5105 if (!--cLeft)
5106 return true;
5107 puPtr += 8;
5108 }
5109 return true;
5110# endif
5111}
5112
5113
5114/**
5115 * Checks if a memory block is filled with the specified byte.
5116 *
5117 * This is a sort of inverted memchr.
5118 *
5119 * @returns Pointer to the byte which doesn't equal u8.
5120 * @returns NULL if all equal to u8.
5121 *
5122 * @param pv Pointer to the memory block.
5123 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5124 * @param u8 The value it's supposed to be filled with.
5125 */
5126#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5127DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
5128#else
5129DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
5130{
5131/** @todo rewrite this in inline assembly? */
5132 uint8_t const *pb = (uint8_t const *)pv;
5133 for (; cb; cb--, pb++)
5134 if (RT_UNLIKELY(*pb != u8))
5135 return (void *)pb;
5136 return NULL;
5137}
5138#endif
5139
5140
5141/**
5142 * Checks if a memory block is filled with the specified 32-bit value.
5143 *
5144 * This is a sort of inverted memchr.
5145 *
5146 * @returns Pointer to the first value which doesn't equal u32.
5147 * @returns NULL if all equal to u32.
5148 *
5149 * @param pv Pointer to the memory block.
5150 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5151 * @param u32 The value it's supposed to be filled with.
5152 */
5153#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5154DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
5155#else
5156DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
5157{
5158/** @todo rewrite this in inline assembly? */
5159 uint32_t const *pu32 = (uint32_t const *)pv;
5160 for (; cb; cb -= 4, pu32++)
5161 if (RT_UNLIKELY(*pu32 != u32))
5162 return (uint32_t *)pu32;
5163 return NULL;
5164}
5165#endif
5166
5167
5168/**
5169 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
5170 *
5171 * @returns u32F1 * u32F2.
5172 */
5173#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5174DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
5175#else
5176DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
5177{
5178# ifdef RT_ARCH_AMD64
5179 return (uint64_t)u32F1 * u32F2;
5180# else /* !RT_ARCH_AMD64 */
5181 uint64_t u64;
5182# if RT_INLINE_ASM_GNU_STYLE
5183 __asm__ __volatile__("mull %%edx"
5184 : "=A" (u64)
5185 : "a" (u32F2), "d" (u32F1));
5186# else
5187 __asm
5188 {
5189 mov edx, [u32F1]
5190 mov eax, [u32F2]
5191 mul edx
5192 mov dword ptr [u64], eax
5193 mov dword ptr [u64 + 4], edx
5194 }
5195# endif
5196 return u64;
5197# endif /* !RT_ARCH_AMD64 */
5198}
5199#endif
5200
5201
5202/**
5203 * Multiplies two signed 32-bit values returning a signed 64-bit result.
5204 *
5205 * @returns u32F1 * u32F2.
5206 */
5207#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5208DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
5209#else
5210DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
5211{
5212# ifdef RT_ARCH_AMD64
5213 return (int64_t)i32F1 * i32F2;
5214# else /* !RT_ARCH_AMD64 */
5215 int64_t i64;
5216# if RT_INLINE_ASM_GNU_STYLE
5217 __asm__ __volatile__("imull %%edx"
5218 : "=A" (i64)
5219 : "a" (i32F2), "d" (i32F1));
5220# else
5221 __asm
5222 {
5223 mov edx, [i32F1]
5224 mov eax, [i32F2]
5225 imul edx
5226 mov dword ptr [i64], eax
5227 mov dword ptr [i64 + 4], edx
5228 }
5229# endif
5230 return i64;
5231# endif /* !RT_ARCH_AMD64 */
5232}
5233#endif
5234
5235
5236/**
5237 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5238 *
5239 * @returns u64 / u32.
5240 */
5241#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5242DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5243#else
5244DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5245{
5246# ifdef RT_ARCH_AMD64
5247 return (uint32_t)(u64 / u32);
5248# else /* !RT_ARCH_AMD64 */
5249# if RT_INLINE_ASM_GNU_STYLE
5250 RTCCUINTREG uDummy;
5251 __asm__ __volatile__("divl %3"
5252 : "=a" (u32), "=d"(uDummy)
5253 : "A" (u64), "r" (u32));
5254# else
5255 __asm
5256 {
5257 mov eax, dword ptr [u64]
5258 mov edx, dword ptr [u64 + 4]
5259 mov ecx, [u32]
5260 div ecx
5261 mov [u32], eax
5262 }
5263# endif
5264 return u32;
5265# endif /* !RT_ARCH_AMD64 */
5266}
5267#endif
5268
5269
5270/**
5271 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5272 *
5273 * @returns u64 / u32.
5274 */
5275#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5276DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5277#else
5278DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5279{
5280# ifdef RT_ARCH_AMD64
5281 return (int32_t)(i64 / i32);
5282# else /* !RT_ARCH_AMD64 */
5283# if RT_INLINE_ASM_GNU_STYLE
5284 RTCCUINTREG iDummy;
5285 __asm__ __volatile__("idivl %3"
5286 : "=a" (i32), "=d"(iDummy)
5287 : "A" (i64), "r" (i32));
5288# else
5289 __asm
5290 {
5291 mov eax, dword ptr [i64]
5292 mov edx, dword ptr [i64 + 4]
5293 mov ecx, [i32]
5294 idiv ecx
5295 mov [i32], eax
5296 }
5297# endif
5298 return i32;
5299# endif /* !RT_ARCH_AMD64 */
5300}
5301#endif
5302
5303
5304/**
5305 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5306 * returning the rest.
5307 *
5308 * @returns u64 % u32.
5309 *
5310 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5311 */
5312#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5313DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5314#else
5315DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5316{
5317# ifdef RT_ARCH_AMD64
5318 return (uint32_t)(u64 % u32);
5319# else /* !RT_ARCH_AMD64 */
5320# if RT_INLINE_ASM_GNU_STYLE
5321 RTCCUINTREG uDummy;
5322 __asm__ __volatile__("divl %3"
5323 : "=a" (uDummy), "=d"(u32)
5324 : "A" (u64), "r" (u32));
5325# else
5326 __asm
5327 {
5328 mov eax, dword ptr [u64]
5329 mov edx, dword ptr [u64 + 4]
5330 mov ecx, [u32]
5331 div ecx
5332 mov [u32], edx
5333 }
5334# endif
5335 return u32;
5336# endif /* !RT_ARCH_AMD64 */
5337}
5338#endif
5339
5340
5341/**
5342 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5343 * returning the rest.
5344 *
5345 * @returns u64 % u32.
5346 *
5347 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5348 */
5349#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5350DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5351#else
5352DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5353{
5354# ifdef RT_ARCH_AMD64
5355 return (int32_t)(i64 % i32);
5356# else /* !RT_ARCH_AMD64 */
5357# if RT_INLINE_ASM_GNU_STYLE
5358 RTCCUINTREG iDummy;
5359 __asm__ __volatile__("idivl %3"
5360 : "=a" (iDummy), "=d"(i32)
5361 : "A" (i64), "r" (i32));
5362# else
5363 __asm
5364 {
5365 mov eax, dword ptr [i64]
5366 mov edx, dword ptr [i64 + 4]
5367 mov ecx, [i32]
5368 idiv ecx
5369 mov [i32], edx
5370 }
5371# endif
5372 return i32;
5373# endif /* !RT_ARCH_AMD64 */
5374}
5375#endif
5376
5377
5378/**
5379 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5380 * using a 96 bit intermediate result.
5381 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5382 * __udivdi3 and __umoddi3 even if this inline function is not used.
5383 *
5384 * @returns (u64A * u32B) / u32C.
5385 * @param u64A The 64-bit value.
5386 * @param u32B The 32-bit value to multiple by A.
5387 * @param u32C The 32-bit value to divide A*B by.
5388 */
5389#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5390DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5391#else
5392DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5393{
5394# if RT_INLINE_ASM_GNU_STYLE
5395# ifdef RT_ARCH_AMD64
5396 uint64_t u64Result, u64Spill;
5397 __asm__ __volatile__("mulq %2\n\t"
5398 "divq %3\n\t"
5399 : "=a" (u64Result),
5400 "=d" (u64Spill)
5401 : "r" ((uint64_t)u32B),
5402 "r" ((uint64_t)u32C),
5403 "0" (u64A),
5404 "1" (0));
5405 return u64Result;
5406# else
5407 uint32_t u32Dummy;
5408 uint64_t u64Result;
5409 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5410 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5411 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5412 eax = u64A.hi */
5413 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5414 edx = u32C */
5415 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5416 edx = u32B */
5417 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5418 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5419 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5420 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5421 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5422 edx = u64Hi % u32C */
5423 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5424 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5425 "divl %%ecx \n\t" /* u64Result.lo */
5426 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5427 : "=A"(u64Result), "=c"(u32Dummy),
5428 "=S"(u32Dummy), "=D"(u32Dummy)
5429 : "a"((uint32_t)u64A),
5430 "S"((uint32_t)(u64A >> 32)),
5431 "c"(u32B),
5432 "D"(u32C));
5433 return u64Result;
5434# endif
5435# else
5436 RTUINT64U u;
5437 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5438 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5439 u64Hi += (u64Lo >> 32);
5440 u.s.Hi = (uint32_t)(u64Hi / u32C);
5441 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5442 return u.u;
5443# endif
5444}
5445#endif
5446
5447
5448/**
5449 * Probes a byte pointer for read access.
5450 *
5451 * While the function will not fault if the byte is not read accessible,
5452 * the idea is to do this in a safe place like before acquiring locks
5453 * and such like.
5454 *
5455 * Also, this functions guarantees that an eager compiler is not going
5456 * to optimize the probing away.
5457 *
5458 * @param pvByte Pointer to the byte.
5459 */
5460#if RT_INLINE_ASM_EXTERNAL
5461DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5462#else
5463DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5464{
5465 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5466 uint8_t u8;
5467# if RT_INLINE_ASM_GNU_STYLE
5468 __asm__ __volatile__("movb (%1), %0\n\t"
5469 : "=r" (u8)
5470 : "r" (pvByte));
5471# else
5472 __asm
5473 {
5474# ifdef RT_ARCH_AMD64
5475 mov rax, [pvByte]
5476 mov al, [rax]
5477# else
5478 mov eax, [pvByte]
5479 mov al, [eax]
5480# endif
5481 mov [u8], al
5482 }
5483# endif
5484 return u8;
5485}
5486#endif
5487
5488/**
5489 * Probes a buffer for read access page by page.
5490 *
5491 * While the function will fault if the buffer is not fully read
5492 * accessible, the idea is to do this in a safe place like before
5493 * acquiring locks and such like.
5494 *
5495 * Also, this functions guarantees that an eager compiler is not going
5496 * to optimize the probing away.
5497 *
5498 * @param pvBuf Pointer to the buffer.
5499 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5500 */
5501DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5502{
5503 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5504 /* the first byte */
5505 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5506 ASMProbeReadByte(pu8);
5507
5508 /* the pages in between pages. */
5509 while (cbBuf > /*PAGE_SIZE*/0x1000)
5510 {
5511 ASMProbeReadByte(pu8);
5512 cbBuf -= /*PAGE_SIZE*/0x1000;
5513 pu8 += /*PAGE_SIZE*/0x1000;
5514 }
5515
5516 /* the last byte */
5517 ASMProbeReadByte(pu8 + cbBuf - 1);
5518}
5519
5520
5521/** @def ASMBreakpoint
5522 * Debugger Breakpoint.
5523 * @remark In the gnu world we add a nop instruction after the int3 to
5524 * force gdb to remain at the int3 source line.
5525 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5526 * @internal
5527 */
5528#if RT_INLINE_ASM_GNU_STYLE
5529# ifndef __L4ENV__
5530# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5531# else
5532# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5533# endif
5534#else
5535# define ASMBreakpoint() __debugbreak()
5536#endif
5537
5538
5539
5540/** @defgroup grp_inline_bits Bit Operations
5541 * @{
5542 */
5543
5544
5545/**
5546 * Sets a bit in a bitmap.
5547 *
5548 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
5549 * @param iBit The bit to set.
5550 *
5551 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5552 * However, doing so will yield better performance as well as avoiding
5553 * traps accessing the last bits in the bitmap.
5554 */
5555#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5556DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5557#else
5558DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5559{
5560# if RT_INLINE_ASM_USES_INTRIN
5561 _bittestandset((long *)pvBitmap, iBit);
5562
5563# elif RT_INLINE_ASM_GNU_STYLE
5564 __asm__ __volatile__("btsl %1, %0"
5565 : "=m" (*(volatile long *)pvBitmap)
5566 : "Ir" (iBit),
5567 "m" (*(volatile long *)pvBitmap)
5568 : "memory");
5569# else
5570 __asm
5571 {
5572# ifdef RT_ARCH_AMD64
5573 mov rax, [pvBitmap]
5574 mov edx, [iBit]
5575 bts [rax], edx
5576# else
5577 mov eax, [pvBitmap]
5578 mov edx, [iBit]
5579 bts [eax], edx
5580# endif
5581 }
5582# endif
5583}
5584#endif
5585
5586
5587/**
5588 * Atomically sets a bit in a bitmap, ordered.
5589 *
5590 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5591 * the memory access isn't atomic!
5592 * @param iBit The bit to set.
5593 */
5594#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5595DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5596#else
5597DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5598{
5599 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5600# if RT_INLINE_ASM_USES_INTRIN
5601 _interlockedbittestandset((long *)pvBitmap, iBit);
5602# elif RT_INLINE_ASM_GNU_STYLE
5603 __asm__ __volatile__("lock; btsl %1, %0"
5604 : "=m" (*(volatile long *)pvBitmap)
5605 : "Ir" (iBit),
5606 "m" (*(volatile long *)pvBitmap)
5607 : "memory");
5608# else
5609 __asm
5610 {
5611# ifdef RT_ARCH_AMD64
5612 mov rax, [pvBitmap]
5613 mov edx, [iBit]
5614 lock bts [rax], edx
5615# else
5616 mov eax, [pvBitmap]
5617 mov edx, [iBit]
5618 lock bts [eax], edx
5619# endif
5620 }
5621# endif
5622}
5623#endif
5624
5625
5626/**
5627 * Clears a bit in a bitmap.
5628 *
5629 * @param pvBitmap Pointer to the bitmap.
5630 * @param iBit The bit to clear.
5631 *
5632 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5633 * However, doing so will yield better performance as well as avoiding
5634 * traps accessing the last bits in the bitmap.
5635 */
5636#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5637DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5638#else
5639DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5640{
5641# if RT_INLINE_ASM_USES_INTRIN
5642 _bittestandreset((long *)pvBitmap, iBit);
5643
5644# elif RT_INLINE_ASM_GNU_STYLE
5645 __asm__ __volatile__("btrl %1, %0"
5646 : "=m" (*(volatile long *)pvBitmap)
5647 : "Ir" (iBit),
5648 "m" (*(volatile long *)pvBitmap)
5649 : "memory");
5650# else
5651 __asm
5652 {
5653# ifdef RT_ARCH_AMD64
5654 mov rax, [pvBitmap]
5655 mov edx, [iBit]
5656 btr [rax], edx
5657# else
5658 mov eax, [pvBitmap]
5659 mov edx, [iBit]
5660 btr [eax], edx
5661# endif
5662 }
5663# endif
5664}
5665#endif
5666
5667
5668/**
5669 * Atomically clears a bit in a bitmap, ordered.
5670 *
5671 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5672 * the memory access isn't atomic!
5673 * @param iBit The bit to toggle set.
5674 * @remarks No memory barrier, take care on smp.
5675 */
5676#if RT_INLINE_ASM_EXTERNAL
5677DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5678#else
5679DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5680{
5681 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5682# if RT_INLINE_ASM_GNU_STYLE
5683 __asm__ __volatile__("lock; btrl %1, %0"
5684 : "=m" (*(volatile long *)pvBitmap)
5685 : "Ir" (iBit),
5686 "m" (*(volatile long *)pvBitmap)
5687 : "memory");
5688# else
5689 __asm
5690 {
5691# ifdef RT_ARCH_AMD64
5692 mov rax, [pvBitmap]
5693 mov edx, [iBit]
5694 lock btr [rax], edx
5695# else
5696 mov eax, [pvBitmap]
5697 mov edx, [iBit]
5698 lock btr [eax], edx
5699# endif
5700 }
5701# endif
5702}
5703#endif
5704
5705
5706/**
5707 * Toggles a bit in a bitmap.
5708 *
5709 * @param pvBitmap Pointer to the bitmap.
5710 * @param iBit The bit to toggle.
5711 *
5712 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5713 * However, doing so will yield better performance as well as avoiding
5714 * traps accessing the last bits in the bitmap.
5715 */
5716#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5717DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5718#else
5719DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5720{
5721# if RT_INLINE_ASM_USES_INTRIN
5722 _bittestandcomplement((long *)pvBitmap, iBit);
5723# elif RT_INLINE_ASM_GNU_STYLE
5724 __asm__ __volatile__("btcl %1, %0"
5725 : "=m" (*(volatile long *)pvBitmap)
5726 : "Ir" (iBit),
5727 "m" (*(volatile long *)pvBitmap)
5728 : "memory");
5729# else
5730 __asm
5731 {
5732# ifdef RT_ARCH_AMD64
5733 mov rax, [pvBitmap]
5734 mov edx, [iBit]
5735 btc [rax], edx
5736# else
5737 mov eax, [pvBitmap]
5738 mov edx, [iBit]
5739 btc [eax], edx
5740# endif
5741 }
5742# endif
5743}
5744#endif
5745
5746
5747/**
5748 * Atomically toggles a bit in a bitmap, ordered.
5749 *
5750 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5751 * the memory access isn't atomic!
5752 * @param iBit The bit to test and set.
5753 */
5754#if RT_INLINE_ASM_EXTERNAL
5755DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5756#else
5757DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5758{
5759 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5760# if RT_INLINE_ASM_GNU_STYLE
5761 __asm__ __volatile__("lock; btcl %1, %0"
5762 : "=m" (*(volatile long *)pvBitmap)
5763 : "Ir" (iBit),
5764 "m" (*(volatile long *)pvBitmap)
5765 : "memory");
5766# else
5767 __asm
5768 {
5769# ifdef RT_ARCH_AMD64
5770 mov rax, [pvBitmap]
5771 mov edx, [iBit]
5772 lock btc [rax], edx
5773# else
5774 mov eax, [pvBitmap]
5775 mov edx, [iBit]
5776 lock btc [eax], edx
5777# endif
5778 }
5779# endif
5780}
5781#endif
5782
5783
5784/**
5785 * Tests and sets a bit in a bitmap.
5786 *
5787 * @returns true if the bit was set.
5788 * @returns false if the bit was clear.
5789 *
5790 * @param pvBitmap Pointer to the bitmap.
5791 * @param iBit The bit to test and set.
5792 *
5793 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5794 * However, doing so will yield better performance as well as avoiding
5795 * traps accessing the last bits in the bitmap.
5796 */
5797#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5798DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5799#else
5800DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5801{
5802 union { bool f; uint32_t u32; uint8_t u8; } rc;
5803# if RT_INLINE_ASM_USES_INTRIN
5804 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5805
5806# elif RT_INLINE_ASM_GNU_STYLE
5807 __asm__ __volatile__("btsl %2, %1\n\t"
5808 "setc %b0\n\t"
5809 "andl $1, %0\n\t"
5810 : "=q" (rc.u32),
5811 "=m" (*(volatile long *)pvBitmap)
5812 : "Ir" (iBit),
5813 "m" (*(volatile long *)pvBitmap)
5814 : "memory");
5815# else
5816 __asm
5817 {
5818 mov edx, [iBit]
5819# ifdef RT_ARCH_AMD64
5820 mov rax, [pvBitmap]
5821 bts [rax], edx
5822# else
5823 mov eax, [pvBitmap]
5824 bts [eax], edx
5825# endif
5826 setc al
5827 and eax, 1
5828 mov [rc.u32], eax
5829 }
5830# endif
5831 return rc.f;
5832}
5833#endif
5834
5835
5836/**
5837 * Atomically tests and sets a bit in a bitmap, ordered.
5838 *
5839 * @returns true if the bit was set.
5840 * @returns false if the bit was clear.
5841 *
5842 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5843 * the memory access isn't atomic!
5844 * @param iBit The bit to set.
5845 */
5846#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5847DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5848#else
5849DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5850{
5851 union { bool f; uint32_t u32; uint8_t u8; } rc;
5852 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5853# if RT_INLINE_ASM_USES_INTRIN
5854 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5855# elif RT_INLINE_ASM_GNU_STYLE
5856 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5857 "setc %b0\n\t"
5858 "andl $1, %0\n\t"
5859 : "=q" (rc.u32),
5860 "=m" (*(volatile long *)pvBitmap)
5861 : "Ir" (iBit),
5862 "m" (*(volatile long *)pvBitmap)
5863 : "memory");
5864# else
5865 __asm
5866 {
5867 mov edx, [iBit]
5868# ifdef RT_ARCH_AMD64
5869 mov rax, [pvBitmap]
5870 lock bts [rax], edx
5871# else
5872 mov eax, [pvBitmap]
5873 lock bts [eax], edx
5874# endif
5875 setc al
5876 and eax, 1
5877 mov [rc.u32], eax
5878 }
5879# endif
5880 return rc.f;
5881}
5882#endif
5883
5884
5885/**
5886 * Tests and clears a bit in a bitmap.
5887 *
5888 * @returns true if the bit was set.
5889 * @returns false if the bit was clear.
5890 *
5891 * @param pvBitmap Pointer to the bitmap.
5892 * @param iBit The bit to test and clear.
5893 *
5894 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5895 * However, doing so will yield better performance as well as avoiding
5896 * traps accessing the last bits in the bitmap.
5897 */
5898#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5899DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5900#else
5901DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5902{
5903 union { bool f; uint32_t u32; uint8_t u8; } rc;
5904# if RT_INLINE_ASM_USES_INTRIN
5905 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5906
5907# elif RT_INLINE_ASM_GNU_STYLE
5908 __asm__ __volatile__("btrl %2, %1\n\t"
5909 "setc %b0\n\t"
5910 "andl $1, %0\n\t"
5911 : "=q" (rc.u32),
5912 "=m" (*(volatile long *)pvBitmap)
5913 : "Ir" (iBit),
5914 "m" (*(volatile long *)pvBitmap)
5915 : "memory");
5916# else
5917 __asm
5918 {
5919 mov edx, [iBit]
5920# ifdef RT_ARCH_AMD64
5921 mov rax, [pvBitmap]
5922 btr [rax], edx
5923# else
5924 mov eax, [pvBitmap]
5925 btr [eax], edx
5926# endif
5927 setc al
5928 and eax, 1
5929 mov [rc.u32], eax
5930 }
5931# endif
5932 return rc.f;
5933}
5934#endif
5935
5936
5937/**
5938 * Atomically tests and clears a bit in a bitmap, ordered.
5939 *
5940 * @returns true if the bit was set.
5941 * @returns false if the bit was clear.
5942 *
5943 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5944 * the memory access isn't atomic!
5945 * @param iBit The bit to test and clear.
5946 *
5947 * @remarks No memory barrier, take care on smp.
5948 */
5949#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5950DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5951#else
5952DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5953{
5954 union { bool f; uint32_t u32; uint8_t u8; } rc;
5955 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5956# if RT_INLINE_ASM_USES_INTRIN
5957 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5958
5959# elif RT_INLINE_ASM_GNU_STYLE
5960 __asm__ __volatile__("lock; btrl %2, %1\n\t"
5961 "setc %b0\n\t"
5962 "andl $1, %0\n\t"
5963 : "=q" (rc.u32),
5964 "=m" (*(volatile long *)pvBitmap)
5965 : "Ir" (iBit),
5966 "m" (*(volatile long *)pvBitmap)
5967 : "memory");
5968# else
5969 __asm
5970 {
5971 mov edx, [iBit]
5972# ifdef RT_ARCH_AMD64
5973 mov rax, [pvBitmap]
5974 lock btr [rax], edx
5975# else
5976 mov eax, [pvBitmap]
5977 lock btr [eax], edx
5978# endif
5979 setc al
5980 and eax, 1
5981 mov [rc.u32], eax
5982 }
5983# endif
5984 return rc.f;
5985}
5986#endif
5987
5988
5989/**
5990 * Tests and toggles a bit in a bitmap.
5991 *
5992 * @returns true if the bit was set.
5993 * @returns false if the bit was clear.
5994 *
5995 * @param pvBitmap Pointer to the bitmap.
5996 * @param iBit The bit to test and toggle.
5997 *
5998 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5999 * However, doing so will yield better performance as well as avoiding
6000 * traps accessing the last bits in the bitmap.
6001 */
6002#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6003DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6004#else
6005DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6006{
6007 union { bool f; uint32_t u32; uint8_t u8; } rc;
6008# if RT_INLINE_ASM_USES_INTRIN
6009 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
6010
6011# elif RT_INLINE_ASM_GNU_STYLE
6012 __asm__ __volatile__("btcl %2, %1\n\t"
6013 "setc %b0\n\t"
6014 "andl $1, %0\n\t"
6015 : "=q" (rc.u32),
6016 "=m" (*(volatile long *)pvBitmap)
6017 : "Ir" (iBit),
6018 "m" (*(volatile long *)pvBitmap)
6019 : "memory");
6020# else
6021 __asm
6022 {
6023 mov edx, [iBit]
6024# ifdef RT_ARCH_AMD64
6025 mov rax, [pvBitmap]
6026 btc [rax], edx
6027# else
6028 mov eax, [pvBitmap]
6029 btc [eax], edx
6030# endif
6031 setc al
6032 and eax, 1
6033 mov [rc.u32], eax
6034 }
6035# endif
6036 return rc.f;
6037}
6038#endif
6039
6040
6041/**
6042 * Atomically tests and toggles a bit in a bitmap, ordered.
6043 *
6044 * @returns true if the bit was set.
6045 * @returns false if the bit was clear.
6046 *
6047 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
6048 * the memory access isn't atomic!
6049 * @param iBit The bit to test and toggle.
6050 */
6051#if RT_INLINE_ASM_EXTERNAL
6052DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6053#else
6054DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6055{
6056 union { bool f; uint32_t u32; uint8_t u8; } rc;
6057 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6058# if RT_INLINE_ASM_GNU_STYLE
6059 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6060 "setc %b0\n\t"
6061 "andl $1, %0\n\t"
6062 : "=q" (rc.u32),
6063 "=m" (*(volatile long *)pvBitmap)
6064 : "Ir" (iBit),
6065 "m" (*(volatile long *)pvBitmap)
6066 : "memory");
6067# else
6068 __asm
6069 {
6070 mov edx, [iBit]
6071# ifdef RT_ARCH_AMD64
6072 mov rax, [pvBitmap]
6073 lock btc [rax], edx
6074# else
6075 mov eax, [pvBitmap]
6076 lock btc [eax], edx
6077# endif
6078 setc al
6079 and eax, 1
6080 mov [rc.u32], eax
6081 }
6082# endif
6083 return rc.f;
6084}
6085#endif
6086
6087
6088/**
6089 * Tests if a bit in a bitmap is set.
6090 *
6091 * @returns true if the bit is set.
6092 * @returns false if the bit is clear.
6093 *
6094 * @param pvBitmap Pointer to the bitmap.
6095 * @param iBit The bit to test.
6096 *
6097 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6098 * However, doing so will yield better performance as well as avoiding
6099 * traps accessing the last bits in the bitmap.
6100 */
6101#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6102DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
6103#else
6104DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
6105{
6106 union { bool f; uint32_t u32; uint8_t u8; } rc;
6107# if RT_INLINE_ASM_USES_INTRIN
6108 rc.u32 = _bittest((long *)pvBitmap, iBit);
6109# elif RT_INLINE_ASM_GNU_STYLE
6110
6111 __asm__ __volatile__("btl %2, %1\n\t"
6112 "setc %b0\n\t"
6113 "andl $1, %0\n\t"
6114 : "=q" (rc.u32)
6115 : "m" (*(const volatile long *)pvBitmap),
6116 "Ir" (iBit)
6117 : "memory");
6118# else
6119 __asm
6120 {
6121 mov edx, [iBit]
6122# ifdef RT_ARCH_AMD64
6123 mov rax, [pvBitmap]
6124 bt [rax], edx
6125# else
6126 mov eax, [pvBitmap]
6127 bt [eax], edx
6128# endif
6129 setc al
6130 and eax, 1
6131 mov [rc.u32], eax
6132 }
6133# endif
6134 return rc.f;
6135}
6136#endif
6137
6138
6139/**
6140 * Clears a bit range within a bitmap.
6141 *
6142 * @param pvBitmap Pointer to the bitmap.
6143 * @param iBitStart The First bit to clear.
6144 * @param iBitEnd The first bit not to clear.
6145 */
6146DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6147{
6148 if (iBitStart < iBitEnd)
6149 {
6150 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6151 int iStart = iBitStart & ~31;
6152 int iEnd = iBitEnd & ~31;
6153 if (iStart == iEnd)
6154 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
6155 else
6156 {
6157 /* bits in first dword. */
6158 if (iBitStart & 31)
6159 {
6160 *pu32 &= (1 << (iBitStart & 31)) - 1;
6161 pu32++;
6162 iBitStart = iStart + 32;
6163 }
6164
6165 /* whole dword. */
6166 if (iBitStart != iEnd)
6167 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6168
6169 /* bits in last dword. */
6170 if (iBitEnd & 31)
6171 {
6172 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6173 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
6174 }
6175 }
6176 }
6177}
6178
6179
6180/**
6181 * Sets a bit range within a bitmap.
6182 *
6183 * @param pvBitmap Pointer to the bitmap.
6184 * @param iBitStart The First bit to set.
6185 * @param iBitEnd The first bit not to set.
6186 */
6187DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6188{
6189 if (iBitStart < iBitEnd)
6190 {
6191 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6192 int iStart = iBitStart & ~31;
6193 int iEnd = iBitEnd & ~31;
6194 if (iStart == iEnd)
6195 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
6196 else
6197 {
6198 /* bits in first dword. */
6199 if (iBitStart & 31)
6200 {
6201 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
6202 pu32++;
6203 iBitStart = iStart + 32;
6204 }
6205
6206 /* whole dword. */
6207 if (iBitStart != iEnd)
6208 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
6209
6210 /* bits in last dword. */
6211 if (iBitEnd & 31)
6212 {
6213 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6214 *pu32 |= (1 << (iBitEnd & 31)) - 1;
6215 }
6216 }
6217 }
6218}
6219
6220
6221/**
6222 * Finds the first clear bit in a bitmap.
6223 *
6224 * @returns Index of the first zero bit.
6225 * @returns -1 if no clear bit was found.
6226 * @param pvBitmap Pointer to the bitmap.
6227 * @param cBits The number of bits in the bitmap. Multiple of 32.
6228 */
6229#if RT_INLINE_ASM_EXTERNAL
6230DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
6231#else
6232DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
6233{
6234 if (cBits)
6235 {
6236 int32_t iBit;
6237# if RT_INLINE_ASM_GNU_STYLE
6238 RTCCUINTREG uEAX, uECX, uEDI;
6239 cBits = RT_ALIGN_32(cBits, 32);
6240 __asm__ __volatile__("repe; scasl\n\t"
6241 "je 1f\n\t"
6242# ifdef RT_ARCH_AMD64
6243 "lea -4(%%rdi), %%rdi\n\t"
6244 "xorl (%%rdi), %%eax\n\t"
6245 "subq %5, %%rdi\n\t"
6246# else
6247 "lea -4(%%edi), %%edi\n\t"
6248 "xorl (%%edi), %%eax\n\t"
6249 "subl %5, %%edi\n\t"
6250# endif
6251 "shll $3, %%edi\n\t"
6252 "bsfl %%eax, %%edx\n\t"
6253 "addl %%edi, %%edx\n\t"
6254 "1:\t\n"
6255 : "=d" (iBit),
6256 "=&c" (uECX),
6257 "=&D" (uEDI),
6258 "=&a" (uEAX)
6259 : "0" (0xffffffff),
6260 "mr" (pvBitmap),
6261 "1" (cBits >> 5),
6262 "2" (pvBitmap),
6263 "3" (0xffffffff));
6264# else
6265 cBits = RT_ALIGN_32(cBits, 32);
6266 __asm
6267 {
6268# ifdef RT_ARCH_AMD64
6269 mov rdi, [pvBitmap]
6270 mov rbx, rdi
6271# else
6272 mov edi, [pvBitmap]
6273 mov ebx, edi
6274# endif
6275 mov edx, 0ffffffffh
6276 mov eax, edx
6277 mov ecx, [cBits]
6278 shr ecx, 5
6279 repe scasd
6280 je done
6281
6282# ifdef RT_ARCH_AMD64
6283 lea rdi, [rdi - 4]
6284 xor eax, [rdi]
6285 sub rdi, rbx
6286# else
6287 lea edi, [edi - 4]
6288 xor eax, [edi]
6289 sub edi, ebx
6290# endif
6291 shl edi, 3
6292 bsf edx, eax
6293 add edx, edi
6294 done:
6295 mov [iBit], edx
6296 }
6297# endif
6298 return iBit;
6299 }
6300 return -1;
6301}
6302#endif
6303
6304
6305/**
6306 * Finds the next clear bit in a bitmap.
6307 *
6308 * @returns Index of the first zero bit.
6309 * @returns -1 if no clear bit was found.
6310 * @param pvBitmap Pointer to the bitmap.
6311 * @param cBits The number of bits in the bitmap. Multiple of 32.
6312 * @param iBitPrev The bit returned from the last search.
6313 * The search will start at iBitPrev + 1.
6314 */
6315#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6316DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6317#else
6318DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6319{
6320 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6321 int iBit = ++iBitPrev & 31;
6322 if (iBit)
6323 {
6324 /*
6325 * Inspect the 32-bit word containing the unaligned bit.
6326 */
6327 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6328
6329# if RT_INLINE_ASM_USES_INTRIN
6330 unsigned long ulBit = 0;
6331 if (_BitScanForward(&ulBit, u32))
6332 return ulBit + iBitPrev;
6333# else
6334# if RT_INLINE_ASM_GNU_STYLE
6335 __asm__ __volatile__("bsf %1, %0\n\t"
6336 "jnz 1f\n\t"
6337 "movl $-1, %0\n\t"
6338 "1:\n\t"
6339 : "=r" (iBit)
6340 : "r" (u32));
6341# else
6342 __asm
6343 {
6344 mov edx, [u32]
6345 bsf eax, edx
6346 jnz done
6347 mov eax, 0ffffffffh
6348 done:
6349 mov [iBit], eax
6350 }
6351# endif
6352 if (iBit >= 0)
6353 return iBit + iBitPrev;
6354# endif
6355
6356 /*
6357 * Skip ahead and see if there is anything left to search.
6358 */
6359 iBitPrev |= 31;
6360 iBitPrev++;
6361 if (cBits <= (uint32_t)iBitPrev)
6362 return -1;
6363 }
6364
6365 /*
6366 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6367 */
6368 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6369 if (iBit >= 0)
6370 iBit += iBitPrev;
6371 return iBit;
6372}
6373#endif
6374
6375
6376/**
6377 * Finds the first set bit in a bitmap.
6378 *
6379 * @returns Index of the first set bit.
6380 * @returns -1 if no clear bit was found.
6381 * @param pvBitmap Pointer to the bitmap.
6382 * @param cBits The number of bits in the bitmap. Multiple of 32.
6383 */
6384#if RT_INLINE_ASM_EXTERNAL
6385DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6386#else
6387DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6388{
6389 if (cBits)
6390 {
6391 int32_t iBit;
6392# if RT_INLINE_ASM_GNU_STYLE
6393 RTCCUINTREG uEAX, uECX, uEDI;
6394 cBits = RT_ALIGN_32(cBits, 32);
6395 __asm__ __volatile__("repe; scasl\n\t"
6396 "je 1f\n\t"
6397# ifdef RT_ARCH_AMD64
6398 "lea -4(%%rdi), %%rdi\n\t"
6399 "movl (%%rdi), %%eax\n\t"
6400 "subq %5, %%rdi\n\t"
6401# else
6402 "lea -4(%%edi), %%edi\n\t"
6403 "movl (%%edi), %%eax\n\t"
6404 "subl %5, %%edi\n\t"
6405# endif
6406 "shll $3, %%edi\n\t"
6407 "bsfl %%eax, %%edx\n\t"
6408 "addl %%edi, %%edx\n\t"
6409 "1:\t\n"
6410 : "=d" (iBit),
6411 "=&c" (uECX),
6412 "=&D" (uEDI),
6413 "=&a" (uEAX)
6414 : "0" (0xffffffff),
6415 "mr" (pvBitmap),
6416 "1" (cBits >> 5),
6417 "2" (pvBitmap),
6418 "3" (0));
6419# else
6420 cBits = RT_ALIGN_32(cBits, 32);
6421 __asm
6422 {
6423# ifdef RT_ARCH_AMD64
6424 mov rdi, [pvBitmap]
6425 mov rbx, rdi
6426# else
6427 mov edi, [pvBitmap]
6428 mov ebx, edi
6429# endif
6430 mov edx, 0ffffffffh
6431 xor eax, eax
6432 mov ecx, [cBits]
6433 shr ecx, 5
6434 repe scasd
6435 je done
6436# ifdef RT_ARCH_AMD64
6437 lea rdi, [rdi - 4]
6438 mov eax, [rdi]
6439 sub rdi, rbx
6440# else
6441 lea edi, [edi - 4]
6442 mov eax, [edi]
6443 sub edi, ebx
6444# endif
6445 shl edi, 3
6446 bsf edx, eax
6447 add edx, edi
6448 done:
6449 mov [iBit], edx
6450 }
6451# endif
6452 return iBit;
6453 }
6454 return -1;
6455}
6456#endif
6457
6458
6459/**
6460 * Finds the next set bit in a bitmap.
6461 *
6462 * @returns Index of the next set bit.
6463 * @returns -1 if no set bit was found.
6464 * @param pvBitmap Pointer to the bitmap.
6465 * @param cBits The number of bits in the bitmap. Multiple of 32.
6466 * @param iBitPrev The bit returned from the last search.
6467 * The search will start at iBitPrev + 1.
6468 */
6469#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6470DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6471#else
6472DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6473{
6474 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6475 int iBit = ++iBitPrev & 31;
6476 if (iBit)
6477 {
6478 /*
6479 * Inspect the 32-bit word containing the unaligned bit.
6480 */
6481 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6482
6483# if RT_INLINE_ASM_USES_INTRIN
6484 unsigned long ulBit = 0;
6485 if (_BitScanForward(&ulBit, u32))
6486 return ulBit + iBitPrev;
6487# else
6488# if RT_INLINE_ASM_GNU_STYLE
6489 __asm__ __volatile__("bsf %1, %0\n\t"
6490 "jnz 1f\n\t"
6491 "movl $-1, %0\n\t"
6492 "1:\n\t"
6493 : "=r" (iBit)
6494 : "r" (u32));
6495# else
6496 __asm
6497 {
6498 mov edx, [u32]
6499 bsf eax, edx
6500 jnz done
6501 mov eax, 0ffffffffh
6502 done:
6503 mov [iBit], eax
6504 }
6505# endif
6506 if (iBit >= 0)
6507 return iBit + iBitPrev;
6508# endif
6509
6510 /*
6511 * Skip ahead and see if there is anything left to search.
6512 */
6513 iBitPrev |= 31;
6514 iBitPrev++;
6515 if (cBits <= (uint32_t)iBitPrev)
6516 return -1;
6517 }
6518
6519 /*
6520 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6521 */
6522 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6523 if (iBit >= 0)
6524 iBit += iBitPrev;
6525 return iBit;
6526}
6527#endif
6528
6529
6530/**
6531 * Finds the first bit which is set in the given 32-bit integer.
6532 * Bits are numbered from 1 (least significant) to 32.
6533 *
6534 * @returns index [1..32] of the first set bit.
6535 * @returns 0 if all bits are cleared.
6536 * @param u32 Integer to search for set bits.
6537 * @remark Similar to ffs() in BSD.
6538 */
6539DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6540{
6541# if RT_INLINE_ASM_USES_INTRIN
6542 unsigned long iBit;
6543 if (_BitScanForward(&iBit, u32))
6544 iBit++;
6545 else
6546 iBit = 0;
6547# elif RT_INLINE_ASM_GNU_STYLE
6548 uint32_t iBit;
6549 __asm__ __volatile__("bsf %1, %0\n\t"
6550 "jnz 1f\n\t"
6551 "xorl %0, %0\n\t"
6552 "jmp 2f\n"
6553 "1:\n\t"
6554 "incl %0\n"
6555 "2:\n\t"
6556 : "=r" (iBit)
6557 : "rm" (u32));
6558# else
6559 uint32_t iBit;
6560 _asm
6561 {
6562 bsf eax, [u32]
6563 jnz found
6564 xor eax, eax
6565 jmp done
6566 found:
6567 inc eax
6568 done:
6569 mov [iBit], eax
6570 }
6571# endif
6572 return iBit;
6573}
6574
6575
6576/**
6577 * Finds the first bit which is set in the given 32-bit integer.
6578 * Bits are numbered from 1 (least significant) to 32.
6579 *
6580 * @returns index [1..32] of the first set bit.
6581 * @returns 0 if all bits are cleared.
6582 * @param i32 Integer to search for set bits.
6583 * @remark Similar to ffs() in BSD.
6584 */
6585DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6586{
6587 return ASMBitFirstSetU32((uint32_t)i32);
6588}
6589
6590
6591/**
6592 * Finds the last bit which is set in the given 32-bit integer.
6593 * Bits are numbered from 1 (least significant) to 32.
6594 *
6595 * @returns index [1..32] of the last set bit.
6596 * @returns 0 if all bits are cleared.
6597 * @param u32 Integer to search for set bits.
6598 * @remark Similar to fls() in BSD.
6599 */
6600DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6601{
6602# if RT_INLINE_ASM_USES_INTRIN
6603 unsigned long iBit;
6604 if (_BitScanReverse(&iBit, u32))
6605 iBit++;
6606 else
6607 iBit = 0;
6608# elif RT_INLINE_ASM_GNU_STYLE
6609 uint32_t iBit;
6610 __asm__ __volatile__("bsrl %1, %0\n\t"
6611 "jnz 1f\n\t"
6612 "xorl %0, %0\n\t"
6613 "jmp 2f\n"
6614 "1:\n\t"
6615 "incl %0\n"
6616 "2:\n\t"
6617 : "=r" (iBit)
6618 : "rm" (u32));
6619# else
6620 uint32_t iBit;
6621 _asm
6622 {
6623 bsr eax, [u32]
6624 jnz found
6625 xor eax, eax
6626 jmp done
6627 found:
6628 inc eax
6629 done:
6630 mov [iBit], eax
6631 }
6632# endif
6633 return iBit;
6634}
6635
6636
6637/**
6638 * Finds the last bit which is set in the given 32-bit integer.
6639 * Bits are numbered from 1 (least significant) to 32.
6640 *
6641 * @returns index [1..32] of the last set bit.
6642 * @returns 0 if all bits are cleared.
6643 * @param i32 Integer to search for set bits.
6644 * @remark Similar to fls() in BSD.
6645 */
6646DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6647{
6648 return ASMBitLastSetU32((uint32_t)i32);
6649}
6650
6651/**
6652 * Reverse the byte order of the given 16-bit integer.
6653 *
6654 * @returns Revert
6655 * @param u16 16-bit integer value.
6656 */
6657DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6658{
6659#if RT_INLINE_ASM_USES_INTRIN
6660 u16 = _byteswap_ushort(u16);
6661#elif RT_INLINE_ASM_GNU_STYLE
6662 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6663#else
6664 _asm
6665 {
6666 mov ax, [u16]
6667 ror ax, 8
6668 mov [u16], ax
6669 }
6670#endif
6671 return u16;
6672}
6673
6674/**
6675 * Reverse the byte order of the given 32-bit integer.
6676 *
6677 * @returns Revert
6678 * @param u32 32-bit integer value.
6679 */
6680DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6681{
6682#if RT_INLINE_ASM_USES_INTRIN
6683 u32 = _byteswap_ulong(u32);
6684#elif RT_INLINE_ASM_GNU_STYLE
6685 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6686#else
6687 _asm
6688 {
6689 mov eax, [u32]
6690 bswap eax
6691 mov [u32], eax
6692 }
6693#endif
6694 return u32;
6695}
6696
6697
6698/**
6699 * Reverse the byte order of the given 64-bit integer.
6700 *
6701 * @returns Revert
6702 * @param u64 64-bit integer value.
6703 */
6704DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6705{
6706#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6707 u64 = _byteswap_uint64(u64);
6708#else
6709 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6710 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6711#endif
6712 return u64;
6713}
6714
6715
6716/** @} */
6717
6718
6719/** @} */
6720#endif
6721
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette