VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 25647

Last change on this file since 25647 was 25645, checked in by vboxsync, 15 years ago

IPRT,DoxyFile.Core: Mopped up the errors in the IPRT doxygen run.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 173.9 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo @code #include <iprt/param.h> @endcode for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__wbinvd)
73# pragma intrinsic(__stosd)
74# pragma intrinsic(__stosw)
75# pragma intrinsic(__stosb)
76# pragma intrinsic(__readcr0)
77# pragma intrinsic(__readcr2)
78# pragma intrinsic(__readcr3)
79# pragma intrinsic(__readcr4)
80# pragma intrinsic(__writecr0)
81# pragma intrinsic(__writecr3)
82# pragma intrinsic(__writecr4)
83# pragma intrinsic(__readdr)
84# pragma intrinsic(__writedr)
85# pragma intrinsic(_BitScanForward)
86# pragma intrinsic(_BitScanReverse)
87# pragma intrinsic(_bittest)
88# pragma intrinsic(_bittestandset)
89# pragma intrinsic(_bittestandreset)
90# pragma intrinsic(_bittestandcomplement)
91# pragma intrinsic(_byteswap_ushort)
92# pragma intrinsic(_byteswap_ulong)
93# pragma intrinsic(_interlockedbittestandset)
94# pragma intrinsic(_interlockedbittestandreset)
95# pragma intrinsic(_InterlockedAnd)
96# pragma intrinsic(_InterlockedOr)
97# pragma intrinsic(_InterlockedIncrement)
98# pragma intrinsic(_InterlockedDecrement)
99# pragma intrinsic(_InterlockedExchange)
100# pragma intrinsic(_InterlockedExchangeAdd)
101# pragma intrinsic(_InterlockedCompareExchange)
102# pragma intrinsic(_InterlockedCompareExchange64)
103# ifdef RT_ARCH_AMD64
104# pragma intrinsic(_mm_mfence)
105# pragma intrinsic(_mm_sfence)
106# pragma intrinsic(_mm_lfence)
107# pragma intrinsic(__stosq)
108# pragma intrinsic(__readcr8)
109# pragma intrinsic(__writecr8)
110# pragma intrinsic(_byteswap_uint64)
111# pragma intrinsic(_InterlockedExchange64)
112# endif
113# endif
114#endif
115#ifndef RT_INLINE_ASM_USES_INTRIN
116# define RT_INLINE_ASM_USES_INTRIN 0
117#endif
118
119/** @def RT_INLINE_ASM_GCC_4_3_X_X86
120 * Used to work around some 4.3.x register allocation issues in this version of
121 * the compiler. */
122#ifdef __GNUC__
123# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ == 3 && defined(__i386__))
124#endif
125#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
126# define RT_INLINE_ASM_GCC_4_3_X_X86 0
127#endif
128
129
130
131/** @defgroup grp_asm ASM - Assembly Routines
132 * @ingroup grp_rt
133 *
134 * @remarks The difference between ordered and unordered atomic operations are that
135 * the former will complete outstanding reads and writes before continuing
136 * while the latter doesn't make any promisses about the order. Ordered
137 * operations doesn't, it seems, make any 100% promise wrt to whether
138 * the operation will complete before any subsequent memory access.
139 * (please, correct if wrong.)
140 *
141 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
142 * are unordered (note the Uo).
143 *
144 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
145 * or even optimize assembler instructions away. For instance, in the following code
146 * the second rdmsr instruction is optimized away because gcc treats that instruction
147 * as deterministic:
148 *
149 * @code
150 * static inline uint64_t rdmsr_low(int idx)
151 * {
152 * uint32_t low;
153 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
154 * }
155 * ...
156 * uint32_t msr1 = rdmsr_low(1);
157 * foo(msr1);
158 * msr1 = rdmsr_low(1);
159 * bar(msr1);
160 * @endcode
161 *
162 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
163 * use the result of the first call as input parameter for bar() as well. For rdmsr this
164 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
165 * machine status information in general.
166 *
167 * @{
168 */
169
170/** @def RT_INLINE_ASM_EXTERNAL
171 * Defined as 1 if the compiler does not support inline assembly.
172 * The ASM* functions will then be implemented in an external .asm file.
173 *
174 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
175 * inline assembly in their AMD64 compiler.
176 */
177#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
178# define RT_INLINE_ASM_EXTERNAL 1
179#else
180# define RT_INLINE_ASM_EXTERNAL 0
181#endif
182
183/** @def RT_INLINE_ASM_GNU_STYLE
184 * Defined as 1 if the compiler understands GNU style inline assembly.
185 */
186#if defined(_MSC_VER)
187# define RT_INLINE_ASM_GNU_STYLE 0
188#else
189# define RT_INLINE_ASM_GNU_STYLE 1
190#endif
191
192
193/** @todo find a more proper place for this structure? */
194#pragma pack(1)
195/** IDTR */
196typedef struct RTIDTR
197{
198 /** Size of the IDT. */
199 uint16_t cbIdt;
200 /** Address of the IDT. */
201 uintptr_t pIdt;
202} RTIDTR, *PRTIDTR;
203#pragma pack()
204
205#pragma pack(1)
206/** GDTR */
207typedef struct RTGDTR
208{
209 /** Size of the GDT. */
210 uint16_t cbGdt;
211 /** Address of the GDT. */
212 uintptr_t pGdt;
213} RTGDTR, *PRTGDTR;
214#pragma pack()
215
216
217/** @def ASMReturnAddress
218 * Gets the return address of the current (or calling if you like) function or method.
219 */
220#ifdef _MSC_VER
221# ifdef __cplusplus
222extern "C"
223# endif
224void * _ReturnAddress(void);
225# pragma intrinsic(_ReturnAddress)
226# define ASMReturnAddress() _ReturnAddress()
227#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
228# define ASMReturnAddress() __builtin_return_address(0)
229#else
230# error "Unsupported compiler."
231#endif
232
233
234/**
235 * Gets the content of the IDTR CPU register.
236 * @param pIdtr Where to store the IDTR contents.
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
240#else
241DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 sidt [rax]
251# else
252 mov eax, [pIdtr]
253 sidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Sets the content of the IDTR CPU register.
263 * @param pIdtr Where to load the IDTR contents from
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
267#else
268DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pIdtr]
277 lidt [rax]
278# else
279 mov eax, [pIdtr]
280 lidt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287
288/**
289 * Gets the content of the GDTR CPU register.
290 * @param pGdtr Where to store the GDTR contents.
291 */
292#if RT_INLINE_ASM_EXTERNAL
293DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
294#else
295DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
296{
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
299# else
300 __asm
301 {
302# ifdef RT_ARCH_AMD64
303 mov rax, [pGdtr]
304 sgdt [rax]
305# else
306 mov eax, [pGdtr]
307 sgdt [eax]
308# endif
309 }
310# endif
311}
312#endif
313
314/**
315 * Get the cs register.
316 * @returns cs.
317 */
318#if RT_INLINE_ASM_EXTERNAL
319DECLASM(RTSEL) ASMGetCS(void);
320#else
321DECLINLINE(RTSEL) ASMGetCS(void)
322{
323 RTSEL SelCS;
324# if RT_INLINE_ASM_GNU_STYLE
325 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
326# else
327 __asm
328 {
329 mov ax, cs
330 mov [SelCS], ax
331 }
332# endif
333 return SelCS;
334}
335#endif
336
337
338/**
339 * Get the DS register.
340 * @returns DS.
341 */
342#if RT_INLINE_ASM_EXTERNAL
343DECLASM(RTSEL) ASMGetDS(void);
344#else
345DECLINLINE(RTSEL) ASMGetDS(void)
346{
347 RTSEL SelDS;
348# if RT_INLINE_ASM_GNU_STYLE
349 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
350# else
351 __asm
352 {
353 mov ax, ds
354 mov [SelDS], ax
355 }
356# endif
357 return SelDS;
358}
359#endif
360
361
362/**
363 * Get the ES register.
364 * @returns ES.
365 */
366#if RT_INLINE_ASM_EXTERNAL
367DECLASM(RTSEL) ASMGetES(void);
368#else
369DECLINLINE(RTSEL) ASMGetES(void)
370{
371 RTSEL SelES;
372# if RT_INLINE_ASM_GNU_STYLE
373 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
374# else
375 __asm
376 {
377 mov ax, es
378 mov [SelES], ax
379 }
380# endif
381 return SelES;
382}
383#endif
384
385
386/**
387 * Get the FS register.
388 * @returns FS.
389 */
390#if RT_INLINE_ASM_EXTERNAL
391DECLASM(RTSEL) ASMGetFS(void);
392#else
393DECLINLINE(RTSEL) ASMGetFS(void)
394{
395 RTSEL SelFS;
396# if RT_INLINE_ASM_GNU_STYLE
397 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
398# else
399 __asm
400 {
401 mov ax, fs
402 mov [SelFS], ax
403 }
404# endif
405 return SelFS;
406}
407# endif
408
409
410/**
411 * Get the GS register.
412 * @returns GS.
413 */
414#if RT_INLINE_ASM_EXTERNAL
415DECLASM(RTSEL) ASMGetGS(void);
416#else
417DECLINLINE(RTSEL) ASMGetGS(void)
418{
419 RTSEL SelGS;
420# if RT_INLINE_ASM_GNU_STYLE
421 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
422# else
423 __asm
424 {
425 mov ax, gs
426 mov [SelGS], ax
427 }
428# endif
429 return SelGS;
430}
431#endif
432
433
434/**
435 * Get the SS register.
436 * @returns SS.
437 */
438#if RT_INLINE_ASM_EXTERNAL
439DECLASM(RTSEL) ASMGetSS(void);
440#else
441DECLINLINE(RTSEL) ASMGetSS(void)
442{
443 RTSEL SelSS;
444# if RT_INLINE_ASM_GNU_STYLE
445 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
446# else
447 __asm
448 {
449 mov ax, ss
450 mov [SelSS], ax
451 }
452# endif
453 return SelSS;
454}
455#endif
456
457
458/**
459 * Get the TR register.
460 * @returns TR.
461 */
462#if RT_INLINE_ASM_EXTERNAL
463DECLASM(RTSEL) ASMGetTR(void);
464#else
465DECLINLINE(RTSEL) ASMGetTR(void)
466{
467 RTSEL SelTR;
468# if RT_INLINE_ASM_GNU_STYLE
469 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
470# else
471 __asm
472 {
473 str ax
474 mov [SelTR], ax
475 }
476# endif
477 return SelTR;
478}
479#endif
480
481
482/**
483 * Get the [RE]FLAGS register.
484 * @returns [RE]FLAGS.
485 */
486#if RT_INLINE_ASM_EXTERNAL
487DECLASM(RTCCUINTREG) ASMGetFlags(void);
488#else
489DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
490{
491 RTCCUINTREG uFlags;
492# if RT_INLINE_ASM_GNU_STYLE
493# ifdef RT_ARCH_AMD64
494 __asm__ __volatile__("pushfq\n\t"
495 "popq %0\n\t"
496 : "=r" (uFlags));
497# else
498 __asm__ __volatile__("pushfl\n\t"
499 "popl %0\n\t"
500 : "=r" (uFlags));
501# endif
502# else
503 __asm
504 {
505# ifdef RT_ARCH_AMD64
506 pushfq
507 pop [uFlags]
508# else
509 pushfd
510 pop [uFlags]
511# endif
512 }
513# endif
514 return uFlags;
515}
516#endif
517
518
519/**
520 * Set the [RE]FLAGS register.
521 * @param uFlags The new [RE]FLAGS value.
522 */
523#if RT_INLINE_ASM_EXTERNAL
524DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
525#else
526DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
527{
528# if RT_INLINE_ASM_GNU_STYLE
529# ifdef RT_ARCH_AMD64
530 __asm__ __volatile__("pushq %0\n\t"
531 "popfq\n\t"
532 : : "g" (uFlags));
533# else
534 __asm__ __volatile__("pushl %0\n\t"
535 "popfl\n\t"
536 : : "g" (uFlags));
537# endif
538# else
539 __asm
540 {
541# ifdef RT_ARCH_AMD64
542 push [uFlags]
543 popfq
544# else
545 push [uFlags]
546 popfd
547# endif
548 }
549# endif
550}
551#endif
552
553
554/**
555 * Gets the content of the CPU timestamp counter register.
556 *
557 * @returns TSC.
558 */
559#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
560DECLASM(uint64_t) ASMReadTSC(void);
561#else
562DECLINLINE(uint64_t) ASMReadTSC(void)
563{
564 RTUINT64U u;
565# if RT_INLINE_ASM_GNU_STYLE
566 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
567# else
568# if RT_INLINE_ASM_USES_INTRIN
569 u.u = __rdtsc();
570# else
571 __asm
572 {
573 rdtsc
574 mov [u.s.Lo], eax
575 mov [u.s.Hi], edx
576 }
577# endif
578# endif
579 return u.u;
580}
581#endif
582
583
584/**
585 * Performs the cpuid instruction returning all registers.
586 *
587 * @param uOperator CPUID operation (eax).
588 * @param pvEAX Where to store eax.
589 * @param pvEBX Where to store ebx.
590 * @param pvECX Where to store ecx.
591 * @param pvEDX Where to store edx.
592 * @remark We're using void pointers to ease the use of special bitfield structures and such.
593 */
594#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
595DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
596#else
597DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
598{
599# if RT_INLINE_ASM_GNU_STYLE
600# ifdef RT_ARCH_AMD64
601 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
602 __asm__ ("cpuid\n\t"
603 : "=a" (uRAX),
604 "=b" (uRBX),
605 "=c" (uRCX),
606 "=d" (uRDX)
607 : "0" (uOperator));
608 *(uint32_t *)pvEAX = (uint32_t)uRAX;
609 *(uint32_t *)pvEBX = (uint32_t)uRBX;
610 *(uint32_t *)pvECX = (uint32_t)uRCX;
611 *(uint32_t *)pvEDX = (uint32_t)uRDX;
612# else
613 __asm__ ("xchgl %%ebx, %1\n\t"
614 "cpuid\n\t"
615 "xchgl %%ebx, %1\n\t"
616 : "=a" (*(uint32_t *)pvEAX),
617 "=r" (*(uint32_t *)pvEBX),
618 "=c" (*(uint32_t *)pvECX),
619 "=d" (*(uint32_t *)pvEDX)
620 : "0" (uOperator));
621# endif
622
623# elif RT_INLINE_ASM_USES_INTRIN
624 int aInfo[4];
625 __cpuid(aInfo, uOperator);
626 *(uint32_t *)pvEAX = aInfo[0];
627 *(uint32_t *)pvEBX = aInfo[1];
628 *(uint32_t *)pvECX = aInfo[2];
629 *(uint32_t *)pvEDX = aInfo[3];
630
631# else
632 uint32_t uEAX;
633 uint32_t uEBX;
634 uint32_t uECX;
635 uint32_t uEDX;
636 __asm
637 {
638 push ebx
639 mov eax, [uOperator]
640 cpuid
641 mov [uEAX], eax
642 mov [uEBX], ebx
643 mov [uECX], ecx
644 mov [uEDX], edx
645 pop ebx
646 }
647 *(uint32_t *)pvEAX = uEAX;
648 *(uint32_t *)pvEBX = uEBX;
649 *(uint32_t *)pvECX = uECX;
650 *(uint32_t *)pvEDX = uEDX;
651# endif
652}
653#endif
654
655
656/**
657 * Performs the cpuid instruction returning all registers.
658 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
659 *
660 * @param uOperator CPUID operation (eax).
661 * @param uIdxECX ecx index
662 * @param pvEAX Where to store eax.
663 * @param pvEBX Where to store ebx.
664 * @param pvECX Where to store ecx.
665 * @param pvEDX Where to store edx.
666 * @remark We're using void pointers to ease the use of special bitfield structures and such.
667 */
668#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
669DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
670#else
671DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
672{
673# if RT_INLINE_ASM_GNU_STYLE
674# ifdef RT_ARCH_AMD64
675 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
676 __asm__ ("cpuid\n\t"
677 : "=a" (uRAX),
678 "=b" (uRBX),
679 "=c" (uRCX),
680 "=d" (uRDX)
681 : "0" (uOperator),
682 "2" (uIdxECX));
683 *(uint32_t *)pvEAX = (uint32_t)uRAX;
684 *(uint32_t *)pvEBX = (uint32_t)uRBX;
685 *(uint32_t *)pvECX = (uint32_t)uRCX;
686 *(uint32_t *)pvEDX = (uint32_t)uRDX;
687# else
688 __asm__ ("xchgl %%ebx, %1\n\t"
689 "cpuid\n\t"
690 "xchgl %%ebx, %1\n\t"
691 : "=a" (*(uint32_t *)pvEAX),
692 "=r" (*(uint32_t *)pvEBX),
693 "=c" (*(uint32_t *)pvECX),
694 "=d" (*(uint32_t *)pvEDX)
695 : "0" (uOperator),
696 "2" (uIdxECX));
697# endif
698
699# elif RT_INLINE_ASM_USES_INTRIN
700 int aInfo[4];
701 /* ??? another intrinsic ??? */
702 __cpuid(aInfo, uOperator);
703 *(uint32_t *)pvEAX = aInfo[0];
704 *(uint32_t *)pvEBX = aInfo[1];
705 *(uint32_t *)pvECX = aInfo[2];
706 *(uint32_t *)pvEDX = aInfo[3];
707
708# else
709 uint32_t uEAX;
710 uint32_t uEBX;
711 uint32_t uECX;
712 uint32_t uEDX;
713 __asm
714 {
715 push ebx
716 mov eax, [uOperator]
717 mov ecx, [uIdxECX]
718 cpuid
719 mov [uEAX], eax
720 mov [uEBX], ebx
721 mov [uECX], ecx
722 mov [uEDX], edx
723 pop ebx
724 }
725 *(uint32_t *)pvEAX = uEAX;
726 *(uint32_t *)pvEBX = uEBX;
727 *(uint32_t *)pvECX = uECX;
728 *(uint32_t *)pvEDX = uEDX;
729# endif
730}
731#endif
732
733
734/**
735 * Performs the cpuid instruction returning ecx and edx.
736 *
737 * @param uOperator CPUID operation (eax).
738 * @param pvECX Where to store ecx.
739 * @param pvEDX Where to store edx.
740 * @remark We're using void pointers to ease the use of special bitfield structures and such.
741 */
742#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
743DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
744#else
745DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
746{
747 uint32_t uEBX;
748 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
749}
750#endif
751
752
753/**
754 * Performs the cpuid instruction returning edx.
755 *
756 * @param uOperator CPUID operation (eax).
757 * @returns EDX after cpuid operation.
758 */
759#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
760DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
761#else
762DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
763{
764 RTCCUINTREG xDX;
765# if RT_INLINE_ASM_GNU_STYLE
766# ifdef RT_ARCH_AMD64
767 RTCCUINTREG uSpill;
768 __asm__ ("cpuid"
769 : "=a" (uSpill),
770 "=d" (xDX)
771 : "0" (uOperator)
772 : "rbx", "rcx");
773# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
774 __asm__ ("push %%ebx\n\t"
775 "cpuid\n\t"
776 "pop %%ebx\n\t"
777 : "=a" (uOperator),
778 "=d" (xDX)
779 : "0" (uOperator)
780 : "ecx");
781# else
782 __asm__ ("cpuid"
783 : "=a" (uOperator),
784 "=d" (xDX)
785 : "0" (uOperator)
786 : "ebx", "ecx");
787# endif
788
789# elif RT_INLINE_ASM_USES_INTRIN
790 int aInfo[4];
791 __cpuid(aInfo, uOperator);
792 xDX = aInfo[3];
793
794# else
795 __asm
796 {
797 push ebx
798 mov eax, [uOperator]
799 cpuid
800 mov [xDX], edx
801 pop ebx
802 }
803# endif
804 return (uint32_t)xDX;
805}
806#endif
807
808
809/**
810 * Performs the cpuid instruction returning ecx.
811 *
812 * @param uOperator CPUID operation (eax).
813 * @returns ECX after cpuid operation.
814 */
815#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
816DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
817#else
818DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
819{
820 RTCCUINTREG xCX;
821# if RT_INLINE_ASM_GNU_STYLE
822# ifdef RT_ARCH_AMD64
823 RTCCUINTREG uSpill;
824 __asm__ ("cpuid"
825 : "=a" (uSpill),
826 "=c" (xCX)
827 : "0" (uOperator)
828 : "rbx", "rdx");
829# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
830 __asm__ ("push %%ebx\n\t"
831 "cpuid\n\t"
832 "pop %%ebx\n\t"
833 : "=a" (uOperator),
834 "=c" (xCX)
835 : "0" (uOperator)
836 : "edx");
837# else
838 __asm__ ("cpuid"
839 : "=a" (uOperator),
840 "=c" (xCX)
841 : "0" (uOperator)
842 : "ebx", "edx");
843
844# endif
845
846# elif RT_INLINE_ASM_USES_INTRIN
847 int aInfo[4];
848 __cpuid(aInfo, uOperator);
849 xCX = aInfo[2];
850
851# else
852 __asm
853 {
854 push ebx
855 mov eax, [uOperator]
856 cpuid
857 mov [xCX], ecx
858 pop ebx
859 }
860# endif
861 return (uint32_t)xCX;
862}
863#endif
864
865
866/**
867 * Checks if the current CPU supports CPUID.
868 *
869 * @returns true if CPUID is supported.
870 */
871DECLINLINE(bool) ASMHasCpuId(void)
872{
873#ifdef RT_ARCH_AMD64
874 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
875#else /* !RT_ARCH_AMD64 */
876 bool fRet = false;
877# if RT_INLINE_ASM_GNU_STYLE
878 uint32_t u1;
879 uint32_t u2;
880 __asm__ ("pushf\n\t"
881 "pop %1\n\t"
882 "mov %1, %2\n\t"
883 "xorl $0x200000, %1\n\t"
884 "push %1\n\t"
885 "popf\n\t"
886 "pushf\n\t"
887 "pop %1\n\t"
888 "cmpl %1, %2\n\t"
889 "setne %0\n\t"
890 "push %2\n\t"
891 "popf\n\t"
892 : "=m" (fRet), "=r" (u1), "=r" (u2));
893# else
894 __asm
895 {
896 pushfd
897 pop eax
898 mov ebx, eax
899 xor eax, 0200000h
900 push eax
901 popfd
902 pushfd
903 pop eax
904 cmp eax, ebx
905 setne fRet
906 push ebx
907 popfd
908 }
909# endif
910 return fRet;
911#endif /* !RT_ARCH_AMD64 */
912}
913
914
915/**
916 * Gets the APIC ID of the current CPU.
917 *
918 * @returns the APIC ID.
919 */
920#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
921DECLASM(uint8_t) ASMGetApicId(void);
922#else
923DECLINLINE(uint8_t) ASMGetApicId(void)
924{
925 RTCCUINTREG xBX;
926# if RT_INLINE_ASM_GNU_STYLE
927# ifdef RT_ARCH_AMD64
928 RTCCUINTREG uSpill;
929 __asm__ ("cpuid"
930 : "=a" (uSpill),
931 "=b" (xBX)
932 : "0" (1)
933 : "rcx", "rdx");
934# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
935 RTCCUINTREG uSpill;
936 __asm__ ("mov %%ebx,%1\n\t"
937 "cpuid\n\t"
938 "xchgl %%ebx,%1\n\t"
939 : "=a" (uSpill),
940 "=r" (xBX)
941 : "0" (1)
942 : "ecx", "edx");
943# else
944 RTCCUINTREG uSpill;
945 __asm__ ("cpuid"
946 : "=a" (uSpill),
947 "=b" (xBX)
948 : "0" (1)
949 : "ecx", "edx");
950# endif
951
952# elif RT_INLINE_ASM_USES_INTRIN
953 int aInfo[4];
954 __cpuid(aInfo, 1);
955 xBX = aInfo[1];
956
957# else
958 __asm
959 {
960 push ebx
961 mov eax, 1
962 cpuid
963 mov [xBX], ebx
964 pop ebx
965 }
966# endif
967 return (uint8_t)(xBX >> 24);
968}
969#endif
970
971
972/**
973 * Tests if it a genuine Intel CPU based on the ASMCpuId(0) output.
974 *
975 * @returns true/false.
976 * @param uEBX EBX return from ASMCpuId(0)
977 * @param uECX ECX return from ASMCpuId(0)
978 * @param uEDX EDX return from ASMCpuId(0)
979 */
980DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
981{
982 return uEBX == UINT32_C(0x756e6547)
983 && uECX == UINT32_C(0x6c65746e)
984 && uEDX == UINT32_C(0x49656e69);
985}
986
987
988/**
989 * Tests if this is a genuine Intel CPU.
990 *
991 * @returns true/false.
992 * @remarks ASSUMES that cpuid is supported by the CPU.
993 */
994DECLINLINE(bool) ASMIsIntelCpu(void)
995{
996 uint32_t uEAX, uEBX, uECX, uEDX;
997 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
998 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
999}
1000
1001
1002/**
1003 * Tests if it a authentic AMD CPU based on the ASMCpuId(0) output.
1004 *
1005 * @returns true/false.
1006 * @param uEBX EBX return from ASMCpuId(0)
1007 * @param uECX ECX return from ASMCpuId(0)
1008 * @param uEDX EDX return from ASMCpuId(0)
1009 */
1010DECLINLINE(bool) ASMIsAmdCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
1011{
1012 return uEBX == UINT32_C(0x68747541)
1013 && uECX == UINT32_C(0x444d4163)
1014 && uEDX == UINT32_C(0x69746e65);
1015}
1016
1017
1018/**
1019 * Tests if this is an authentic AMD CPU.
1020 *
1021 * @returns true/false.
1022 * @remarks ASSUMES that cpuid is supported by the CPU.
1023 */
1024DECLINLINE(bool) ASMIsAmdCpu(void)
1025{
1026 uint32_t uEAX, uEBX, uECX, uEDX;
1027 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1028 return ASMIsAmdCpuEx(uEBX, uECX, uEDX);
1029}
1030
1031
1032/**
1033 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
1034 *
1035 * @returns Family.
1036 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
1037 */
1038DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
1039{
1040 return ((uEAX >> 8) & 0xf) == 0xf
1041 ? ((uEAX >> 20) & 0x7f) + 0xf
1042 : ((uEAX >> 8) & 0xf);
1043}
1044
1045
1046/**
1047 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1048 *
1049 * @returns Model.
1050 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1051 */
1052DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1053{
1054 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1055 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1056 : ((uEAX >> 4) & 0xf);
1057}
1058
1059
1060/**
1061 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1062 *
1063 * @returns Model.
1064 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1065 */
1066DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1067{
1068 return ((uEAX >> 8) & 0xf) == 0xf
1069 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1070 : ((uEAX >> 4) & 0xf);
1071}
1072
1073
1074/**
1075 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1076 *
1077 * @returns Model.
1078 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1079 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1080 */
1081DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1082{
1083 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1084 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1085 : ((uEAX >> 4) & 0xf);
1086}
1087
1088
1089/**
1090 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1091 *
1092 * @returns Model.
1093 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1094 */
1095DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1096{
1097 return uEAX & 0xf;
1098}
1099
1100
1101/**
1102 * Get cr0.
1103 * @returns cr0.
1104 */
1105#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1106DECLASM(RTCCUINTREG) ASMGetCR0(void);
1107#else
1108DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1109{
1110 RTCCUINTREG uCR0;
1111# if RT_INLINE_ASM_USES_INTRIN
1112 uCR0 = __readcr0();
1113
1114# elif RT_INLINE_ASM_GNU_STYLE
1115# ifdef RT_ARCH_AMD64
1116 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1117# else
1118 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1119# endif
1120# else
1121 __asm
1122 {
1123# ifdef RT_ARCH_AMD64
1124 mov rax, cr0
1125 mov [uCR0], rax
1126# else
1127 mov eax, cr0
1128 mov [uCR0], eax
1129# endif
1130 }
1131# endif
1132 return uCR0;
1133}
1134#endif
1135
1136
1137/**
1138 * Sets the CR0 register.
1139 * @param uCR0 The new CR0 value.
1140 */
1141#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1142DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1143#else
1144DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1145{
1146# if RT_INLINE_ASM_USES_INTRIN
1147 __writecr0(uCR0);
1148
1149# elif RT_INLINE_ASM_GNU_STYLE
1150# ifdef RT_ARCH_AMD64
1151 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1152# else
1153 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1154# endif
1155# else
1156 __asm
1157 {
1158# ifdef RT_ARCH_AMD64
1159 mov rax, [uCR0]
1160 mov cr0, rax
1161# else
1162 mov eax, [uCR0]
1163 mov cr0, eax
1164# endif
1165 }
1166# endif
1167}
1168#endif
1169
1170
1171/**
1172 * Get cr2.
1173 * @returns cr2.
1174 */
1175#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1176DECLASM(RTCCUINTREG) ASMGetCR2(void);
1177#else
1178DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1179{
1180 RTCCUINTREG uCR2;
1181# if RT_INLINE_ASM_USES_INTRIN
1182 uCR2 = __readcr2();
1183
1184# elif RT_INLINE_ASM_GNU_STYLE
1185# ifdef RT_ARCH_AMD64
1186 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1187# else
1188 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1189# endif
1190# else
1191 __asm
1192 {
1193# ifdef RT_ARCH_AMD64
1194 mov rax, cr2
1195 mov [uCR2], rax
1196# else
1197 mov eax, cr2
1198 mov [uCR2], eax
1199# endif
1200 }
1201# endif
1202 return uCR2;
1203}
1204#endif
1205
1206
1207/**
1208 * Sets the CR2 register.
1209 * @param uCR2 The new CR0 value.
1210 */
1211#if RT_INLINE_ASM_EXTERNAL
1212DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1213#else
1214DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1215{
1216# if RT_INLINE_ASM_GNU_STYLE
1217# ifdef RT_ARCH_AMD64
1218 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1219# else
1220 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1221# endif
1222# else
1223 __asm
1224 {
1225# ifdef RT_ARCH_AMD64
1226 mov rax, [uCR2]
1227 mov cr2, rax
1228# else
1229 mov eax, [uCR2]
1230 mov cr2, eax
1231# endif
1232 }
1233# endif
1234}
1235#endif
1236
1237
1238/**
1239 * Get cr3.
1240 * @returns cr3.
1241 */
1242#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1243DECLASM(RTCCUINTREG) ASMGetCR3(void);
1244#else
1245DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1246{
1247 RTCCUINTREG uCR3;
1248# if RT_INLINE_ASM_USES_INTRIN
1249 uCR3 = __readcr3();
1250
1251# elif RT_INLINE_ASM_GNU_STYLE
1252# ifdef RT_ARCH_AMD64
1253 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1254# else
1255 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1256# endif
1257# else
1258 __asm
1259 {
1260# ifdef RT_ARCH_AMD64
1261 mov rax, cr3
1262 mov [uCR3], rax
1263# else
1264 mov eax, cr3
1265 mov [uCR3], eax
1266# endif
1267 }
1268# endif
1269 return uCR3;
1270}
1271#endif
1272
1273
1274/**
1275 * Sets the CR3 register.
1276 *
1277 * @param uCR3 New CR3 value.
1278 */
1279#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1280DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1281#else
1282DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1283{
1284# if RT_INLINE_ASM_USES_INTRIN
1285 __writecr3(uCR3);
1286
1287# elif RT_INLINE_ASM_GNU_STYLE
1288# ifdef RT_ARCH_AMD64
1289 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1290# else
1291 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1292# endif
1293# else
1294 __asm
1295 {
1296# ifdef RT_ARCH_AMD64
1297 mov rax, [uCR3]
1298 mov cr3, rax
1299# else
1300 mov eax, [uCR3]
1301 mov cr3, eax
1302# endif
1303 }
1304# endif
1305}
1306#endif
1307
1308
1309/**
1310 * Reloads the CR3 register.
1311 */
1312#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1313DECLASM(void) ASMReloadCR3(void);
1314#else
1315DECLINLINE(void) ASMReloadCR3(void)
1316{
1317# if RT_INLINE_ASM_USES_INTRIN
1318 __writecr3(__readcr3());
1319
1320# elif RT_INLINE_ASM_GNU_STYLE
1321 RTCCUINTREG u;
1322# ifdef RT_ARCH_AMD64
1323 __asm__ __volatile__("movq %%cr3, %0\n\t"
1324 "movq %0, %%cr3\n\t"
1325 : "=r" (u));
1326# else
1327 __asm__ __volatile__("movl %%cr3, %0\n\t"
1328 "movl %0, %%cr3\n\t"
1329 : "=r" (u));
1330# endif
1331# else
1332 __asm
1333 {
1334# ifdef RT_ARCH_AMD64
1335 mov rax, cr3
1336 mov cr3, rax
1337# else
1338 mov eax, cr3
1339 mov cr3, eax
1340# endif
1341 }
1342# endif
1343}
1344#endif
1345
1346
1347/**
1348 * Get cr4.
1349 * @returns cr4.
1350 */
1351#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1352DECLASM(RTCCUINTREG) ASMGetCR4(void);
1353#else
1354DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1355{
1356 RTCCUINTREG uCR4;
1357# if RT_INLINE_ASM_USES_INTRIN
1358 uCR4 = __readcr4();
1359
1360# elif RT_INLINE_ASM_GNU_STYLE
1361# ifdef RT_ARCH_AMD64
1362 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1363# else
1364 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1365# endif
1366# else
1367 __asm
1368 {
1369# ifdef RT_ARCH_AMD64
1370 mov rax, cr4
1371 mov [uCR4], rax
1372# else
1373 push eax /* just in case */
1374 /*mov eax, cr4*/
1375 _emit 0x0f
1376 _emit 0x20
1377 _emit 0xe0
1378 mov [uCR4], eax
1379 pop eax
1380# endif
1381 }
1382# endif
1383 return uCR4;
1384}
1385#endif
1386
1387
1388/**
1389 * Sets the CR4 register.
1390 *
1391 * @param uCR4 New CR4 value.
1392 */
1393#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1394DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1395#else
1396DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1397{
1398# if RT_INLINE_ASM_USES_INTRIN
1399 __writecr4(uCR4);
1400
1401# elif RT_INLINE_ASM_GNU_STYLE
1402# ifdef RT_ARCH_AMD64
1403 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1404# else
1405 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1406# endif
1407# else
1408 __asm
1409 {
1410# ifdef RT_ARCH_AMD64
1411 mov rax, [uCR4]
1412 mov cr4, rax
1413# else
1414 mov eax, [uCR4]
1415 _emit 0x0F
1416 _emit 0x22
1417 _emit 0xE0 /* mov cr4, eax */
1418# endif
1419 }
1420# endif
1421}
1422#endif
1423
1424
1425/**
1426 * Get cr8.
1427 * @returns cr8.
1428 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1429 */
1430#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1431DECLASM(RTCCUINTREG) ASMGetCR8(void);
1432#else
1433DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1434{
1435# ifdef RT_ARCH_AMD64
1436 RTCCUINTREG uCR8;
1437# if RT_INLINE_ASM_USES_INTRIN
1438 uCR8 = __readcr8();
1439
1440# elif RT_INLINE_ASM_GNU_STYLE
1441 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1442# else
1443 __asm
1444 {
1445 mov rax, cr8
1446 mov [uCR8], rax
1447 }
1448# endif
1449 return uCR8;
1450# else /* !RT_ARCH_AMD64 */
1451 return 0;
1452# endif /* !RT_ARCH_AMD64 */
1453}
1454#endif
1455
1456
1457/**
1458 * Enables interrupts (EFLAGS.IF).
1459 */
1460#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1461DECLASM(void) ASMIntEnable(void);
1462#else
1463DECLINLINE(void) ASMIntEnable(void)
1464{
1465# if RT_INLINE_ASM_GNU_STYLE
1466 __asm("sti\n");
1467# elif RT_INLINE_ASM_USES_INTRIN
1468 _enable();
1469# else
1470 __asm sti
1471# endif
1472}
1473#endif
1474
1475
1476/**
1477 * Disables interrupts (!EFLAGS.IF).
1478 */
1479#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1480DECLASM(void) ASMIntDisable(void);
1481#else
1482DECLINLINE(void) ASMIntDisable(void)
1483{
1484# if RT_INLINE_ASM_GNU_STYLE
1485 __asm("cli\n");
1486# elif RT_INLINE_ASM_USES_INTRIN
1487 _disable();
1488# else
1489 __asm cli
1490# endif
1491}
1492#endif
1493
1494
1495/**
1496 * Disables interrupts and returns previous xFLAGS.
1497 */
1498#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1499DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1500#else
1501DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1502{
1503 RTCCUINTREG xFlags;
1504# if RT_INLINE_ASM_GNU_STYLE
1505# ifdef RT_ARCH_AMD64
1506 __asm__ __volatile__("pushfq\n\t"
1507 "cli\n\t"
1508 "popq %0\n\t"
1509 : "=r" (xFlags));
1510# else
1511 __asm__ __volatile__("pushfl\n\t"
1512 "cli\n\t"
1513 "popl %0\n\t"
1514 : "=r" (xFlags));
1515# endif
1516# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1517 xFlags = ASMGetFlags();
1518 _disable();
1519# else
1520 __asm {
1521 pushfd
1522 cli
1523 pop [xFlags]
1524 }
1525# endif
1526 return xFlags;
1527}
1528#endif
1529
1530
1531/**
1532 * Are interrupts enabled?
1533 *
1534 * @returns true / false.
1535 */
1536DECLINLINE(RTCCUINTREG) ASMIntAreEnabled(void)
1537{
1538 RTCCUINTREG uFlags = ASMGetFlags();
1539 return uFlags & 0x200 /* X86_EFL_IF */ ? true : false;
1540}
1541
1542
1543/**
1544 * Halts the CPU until interrupted.
1545 */
1546#if RT_INLINE_ASM_EXTERNAL
1547DECLASM(void) ASMHalt(void);
1548#else
1549DECLINLINE(void) ASMHalt(void)
1550{
1551# if RT_INLINE_ASM_GNU_STYLE
1552 __asm__ __volatile__("hlt\n\t");
1553# else
1554 __asm {
1555 hlt
1556 }
1557# endif
1558}
1559#endif
1560
1561
1562/**
1563 * The PAUSE variant of NOP for helping hyperthreaded CPUs detecing spin locks.
1564 */
1565#if RT_INLINE_ASM_EXTERNAL
1566DECLASM(void) ASMNopPause(void);
1567#else
1568DECLINLINE(void) ASMNopPause(void)
1569{
1570# if RT_INLINE_ASM_GNU_STYLE
1571 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
1572# else
1573 __asm {
1574 _emit 0f3h
1575 _emit 090h
1576 }
1577# endif
1578}
1579#endif
1580
1581
1582/**
1583 * Reads a machine specific register.
1584 *
1585 * @returns Register content.
1586 * @param uRegister Register to read.
1587 */
1588#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1589DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1590#else
1591DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1592{
1593 RTUINT64U u;
1594# if RT_INLINE_ASM_GNU_STYLE
1595 __asm__ __volatile__("rdmsr\n\t"
1596 : "=a" (u.s.Lo),
1597 "=d" (u.s.Hi)
1598 : "c" (uRegister));
1599
1600# elif RT_INLINE_ASM_USES_INTRIN
1601 u.u = __readmsr(uRegister);
1602
1603# else
1604 __asm
1605 {
1606 mov ecx, [uRegister]
1607 rdmsr
1608 mov [u.s.Lo], eax
1609 mov [u.s.Hi], edx
1610 }
1611# endif
1612
1613 return u.u;
1614}
1615#endif
1616
1617
1618/**
1619 * Writes a machine specific register.
1620 *
1621 * @returns Register content.
1622 * @param uRegister Register to write to.
1623 * @param u64Val Value to write.
1624 */
1625#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1626DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1627#else
1628DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1629{
1630 RTUINT64U u;
1631
1632 u.u = u64Val;
1633# if RT_INLINE_ASM_GNU_STYLE
1634 __asm__ __volatile__("wrmsr\n\t"
1635 ::"a" (u.s.Lo),
1636 "d" (u.s.Hi),
1637 "c" (uRegister));
1638
1639# elif RT_INLINE_ASM_USES_INTRIN
1640 __writemsr(uRegister, u.u);
1641
1642# else
1643 __asm
1644 {
1645 mov ecx, [uRegister]
1646 mov edx, [u.s.Hi]
1647 mov eax, [u.s.Lo]
1648 wrmsr
1649 }
1650# endif
1651}
1652#endif
1653
1654
1655/**
1656 * Reads low part of a machine specific register.
1657 *
1658 * @returns Register content.
1659 * @param uRegister Register to read.
1660 */
1661#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1662DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1663#else
1664DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1665{
1666 uint32_t u32;
1667# if RT_INLINE_ASM_GNU_STYLE
1668 __asm__ __volatile__("rdmsr\n\t"
1669 : "=a" (u32)
1670 : "c" (uRegister)
1671 : "edx");
1672
1673# elif RT_INLINE_ASM_USES_INTRIN
1674 u32 = (uint32_t)__readmsr(uRegister);
1675
1676#else
1677 __asm
1678 {
1679 mov ecx, [uRegister]
1680 rdmsr
1681 mov [u32], eax
1682 }
1683# endif
1684
1685 return u32;
1686}
1687#endif
1688
1689
1690/**
1691 * Reads high part of a machine specific register.
1692 *
1693 * @returns Register content.
1694 * @param uRegister Register to read.
1695 */
1696#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1697DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1698#else
1699DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1700{
1701 uint32_t u32;
1702# if RT_INLINE_ASM_GNU_STYLE
1703 __asm__ __volatile__("rdmsr\n\t"
1704 : "=d" (u32)
1705 : "c" (uRegister)
1706 : "eax");
1707
1708# elif RT_INLINE_ASM_USES_INTRIN
1709 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1710
1711# else
1712 __asm
1713 {
1714 mov ecx, [uRegister]
1715 rdmsr
1716 mov [u32], edx
1717 }
1718# endif
1719
1720 return u32;
1721}
1722#endif
1723
1724
1725/**
1726 * Gets dr0.
1727 *
1728 * @returns dr0.
1729 */
1730#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1731DECLASM(RTCCUINTREG) ASMGetDR0(void);
1732#else
1733DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1734{
1735 RTCCUINTREG uDR0;
1736# if RT_INLINE_ASM_USES_INTRIN
1737 uDR0 = __readdr(0);
1738# elif RT_INLINE_ASM_GNU_STYLE
1739# ifdef RT_ARCH_AMD64
1740 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1741# else
1742 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1743# endif
1744# else
1745 __asm
1746 {
1747# ifdef RT_ARCH_AMD64
1748 mov rax, dr0
1749 mov [uDR0], rax
1750# else
1751 mov eax, dr0
1752 mov [uDR0], eax
1753# endif
1754 }
1755# endif
1756 return uDR0;
1757}
1758#endif
1759
1760
1761/**
1762 * Gets dr1.
1763 *
1764 * @returns dr1.
1765 */
1766#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1767DECLASM(RTCCUINTREG) ASMGetDR1(void);
1768#else
1769DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1770{
1771 RTCCUINTREG uDR1;
1772# if RT_INLINE_ASM_USES_INTRIN
1773 uDR1 = __readdr(1);
1774# elif RT_INLINE_ASM_GNU_STYLE
1775# ifdef RT_ARCH_AMD64
1776 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1777# else
1778 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1779# endif
1780# else
1781 __asm
1782 {
1783# ifdef RT_ARCH_AMD64
1784 mov rax, dr1
1785 mov [uDR1], rax
1786# else
1787 mov eax, dr1
1788 mov [uDR1], eax
1789# endif
1790 }
1791# endif
1792 return uDR1;
1793}
1794#endif
1795
1796
1797/**
1798 * Gets dr2.
1799 *
1800 * @returns dr2.
1801 */
1802#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1803DECLASM(RTCCUINTREG) ASMGetDR2(void);
1804#else
1805DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1806{
1807 RTCCUINTREG uDR2;
1808# if RT_INLINE_ASM_USES_INTRIN
1809 uDR2 = __readdr(2);
1810# elif RT_INLINE_ASM_GNU_STYLE
1811# ifdef RT_ARCH_AMD64
1812 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1813# else
1814 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1815# endif
1816# else
1817 __asm
1818 {
1819# ifdef RT_ARCH_AMD64
1820 mov rax, dr2
1821 mov [uDR2], rax
1822# else
1823 mov eax, dr2
1824 mov [uDR2], eax
1825# endif
1826 }
1827# endif
1828 return uDR2;
1829}
1830#endif
1831
1832
1833/**
1834 * Gets dr3.
1835 *
1836 * @returns dr3.
1837 */
1838#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1839DECLASM(RTCCUINTREG) ASMGetDR3(void);
1840#else
1841DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1842{
1843 RTCCUINTREG uDR3;
1844# if RT_INLINE_ASM_USES_INTRIN
1845 uDR3 = __readdr(3);
1846# elif RT_INLINE_ASM_GNU_STYLE
1847# ifdef RT_ARCH_AMD64
1848 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1849# else
1850 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1851# endif
1852# else
1853 __asm
1854 {
1855# ifdef RT_ARCH_AMD64
1856 mov rax, dr3
1857 mov [uDR3], rax
1858# else
1859 mov eax, dr3
1860 mov [uDR3], eax
1861# endif
1862 }
1863# endif
1864 return uDR3;
1865}
1866#endif
1867
1868
1869/**
1870 * Gets dr6.
1871 *
1872 * @returns dr6.
1873 */
1874#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1875DECLASM(RTCCUINTREG) ASMGetDR6(void);
1876#else
1877DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1878{
1879 RTCCUINTREG uDR6;
1880# if RT_INLINE_ASM_USES_INTRIN
1881 uDR6 = __readdr(6);
1882# elif RT_INLINE_ASM_GNU_STYLE
1883# ifdef RT_ARCH_AMD64
1884 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1885# else
1886 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1887# endif
1888# else
1889 __asm
1890 {
1891# ifdef RT_ARCH_AMD64
1892 mov rax, dr6
1893 mov [uDR6], rax
1894# else
1895 mov eax, dr6
1896 mov [uDR6], eax
1897# endif
1898 }
1899# endif
1900 return uDR6;
1901}
1902#endif
1903
1904
1905/**
1906 * Reads and clears DR6.
1907 *
1908 * @returns DR6.
1909 */
1910#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1911DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1912#else
1913DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1914{
1915 RTCCUINTREG uDR6;
1916# if RT_INLINE_ASM_USES_INTRIN
1917 uDR6 = __readdr(6);
1918 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1919# elif RT_INLINE_ASM_GNU_STYLE
1920 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1921# ifdef RT_ARCH_AMD64
1922 __asm__ __volatile__("movq %%dr6, %0\n\t"
1923 "movq %1, %%dr6\n\t"
1924 : "=r" (uDR6)
1925 : "r" (uNewValue));
1926# else
1927 __asm__ __volatile__("movl %%dr6, %0\n\t"
1928 "movl %1, %%dr6\n\t"
1929 : "=r" (uDR6)
1930 : "r" (uNewValue));
1931# endif
1932# else
1933 __asm
1934 {
1935# ifdef RT_ARCH_AMD64
1936 mov rax, dr6
1937 mov [uDR6], rax
1938 mov rcx, rax
1939 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1940 mov dr6, rcx
1941# else
1942 mov eax, dr6
1943 mov [uDR6], eax
1944 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1945 mov dr6, ecx
1946# endif
1947 }
1948# endif
1949 return uDR6;
1950}
1951#endif
1952
1953
1954/**
1955 * Gets dr7.
1956 *
1957 * @returns dr7.
1958 */
1959#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1960DECLASM(RTCCUINTREG) ASMGetDR7(void);
1961#else
1962DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1963{
1964 RTCCUINTREG uDR7;
1965# if RT_INLINE_ASM_USES_INTRIN
1966 uDR7 = __readdr(7);
1967# elif RT_INLINE_ASM_GNU_STYLE
1968# ifdef RT_ARCH_AMD64
1969 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1970# else
1971 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1972# endif
1973# else
1974 __asm
1975 {
1976# ifdef RT_ARCH_AMD64
1977 mov rax, dr7
1978 mov [uDR7], rax
1979# else
1980 mov eax, dr7
1981 mov [uDR7], eax
1982# endif
1983 }
1984# endif
1985 return uDR7;
1986}
1987#endif
1988
1989
1990/**
1991 * Sets dr0.
1992 *
1993 * @param uDRVal Debug register value to write
1994 */
1995#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1996DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1997#else
1998DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1999{
2000# if RT_INLINE_ASM_USES_INTRIN
2001 __writedr(0, uDRVal);
2002# elif RT_INLINE_ASM_GNU_STYLE
2003# ifdef RT_ARCH_AMD64
2004 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
2005# else
2006 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
2007# endif
2008# else
2009 __asm
2010 {
2011# ifdef RT_ARCH_AMD64
2012 mov rax, [uDRVal]
2013 mov dr0, rax
2014# else
2015 mov eax, [uDRVal]
2016 mov dr0, eax
2017# endif
2018 }
2019# endif
2020}
2021#endif
2022
2023
2024/**
2025 * Sets dr1.
2026 *
2027 * @param uDRVal Debug register value to write
2028 */
2029#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2030DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
2031#else
2032DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
2033{
2034# if RT_INLINE_ASM_USES_INTRIN
2035 __writedr(1, uDRVal);
2036# elif RT_INLINE_ASM_GNU_STYLE
2037# ifdef RT_ARCH_AMD64
2038 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
2039# else
2040 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
2041# endif
2042# else
2043 __asm
2044 {
2045# ifdef RT_ARCH_AMD64
2046 mov rax, [uDRVal]
2047 mov dr1, rax
2048# else
2049 mov eax, [uDRVal]
2050 mov dr1, eax
2051# endif
2052 }
2053# endif
2054}
2055#endif
2056
2057
2058/**
2059 * Sets dr2.
2060 *
2061 * @param uDRVal Debug register value to write
2062 */
2063#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2064DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
2065#else
2066DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
2067{
2068# if RT_INLINE_ASM_USES_INTRIN
2069 __writedr(2, uDRVal);
2070# elif RT_INLINE_ASM_GNU_STYLE
2071# ifdef RT_ARCH_AMD64
2072 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
2073# else
2074 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2075# endif
2076# else
2077 __asm
2078 {
2079# ifdef RT_ARCH_AMD64
2080 mov rax, [uDRVal]
2081 mov dr2, rax
2082# else
2083 mov eax, [uDRVal]
2084 mov dr2, eax
2085# endif
2086 }
2087# endif
2088}
2089#endif
2090
2091
2092/**
2093 * Sets dr3.
2094 *
2095 * @param uDRVal Debug register value to write
2096 */
2097#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2098DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2099#else
2100DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2101{
2102# if RT_INLINE_ASM_USES_INTRIN
2103 __writedr(3, uDRVal);
2104# elif RT_INLINE_ASM_GNU_STYLE
2105# ifdef RT_ARCH_AMD64
2106 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2107# else
2108 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2109# endif
2110# else
2111 __asm
2112 {
2113# ifdef RT_ARCH_AMD64
2114 mov rax, [uDRVal]
2115 mov dr3, rax
2116# else
2117 mov eax, [uDRVal]
2118 mov dr3, eax
2119# endif
2120 }
2121# endif
2122}
2123#endif
2124
2125
2126/**
2127 * Sets dr6.
2128 *
2129 * @param uDRVal Debug register value to write
2130 */
2131#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2132DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2133#else
2134DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2135{
2136# if RT_INLINE_ASM_USES_INTRIN
2137 __writedr(6, uDRVal);
2138# elif RT_INLINE_ASM_GNU_STYLE
2139# ifdef RT_ARCH_AMD64
2140 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2141# else
2142 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2143# endif
2144# else
2145 __asm
2146 {
2147# ifdef RT_ARCH_AMD64
2148 mov rax, [uDRVal]
2149 mov dr6, rax
2150# else
2151 mov eax, [uDRVal]
2152 mov dr6, eax
2153# endif
2154 }
2155# endif
2156}
2157#endif
2158
2159
2160/**
2161 * Sets dr7.
2162 *
2163 * @param uDRVal Debug register value to write
2164 */
2165#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2166DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2167#else
2168DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2169{
2170# if RT_INLINE_ASM_USES_INTRIN
2171 __writedr(7, uDRVal);
2172# elif RT_INLINE_ASM_GNU_STYLE
2173# ifdef RT_ARCH_AMD64
2174 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2175# else
2176 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2177# endif
2178# else
2179 __asm
2180 {
2181# ifdef RT_ARCH_AMD64
2182 mov rax, [uDRVal]
2183 mov dr7, rax
2184# else
2185 mov eax, [uDRVal]
2186 mov dr7, eax
2187# endif
2188 }
2189# endif
2190}
2191#endif
2192
2193
2194/**
2195 * Compiler memory barrier.
2196 *
2197 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2198 * values or any outstanding writes when returning from this function.
2199 *
2200 * This function must be used if non-volatile data is modified by a
2201 * device or the VMM. Typical cases are port access, MMIO access,
2202 * trapping instruction, etc.
2203 */
2204#if RT_INLINE_ASM_GNU_STYLE
2205# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2206#elif RT_INLINE_ASM_USES_INTRIN
2207# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2208#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2209DECLINLINE(void) ASMCompilerBarrier(void)
2210{
2211 __asm
2212 {
2213 }
2214}
2215#endif
2216
2217
2218/**
2219 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2220 *
2221 * @param Port I/O port to write to.
2222 * @param u8 8-bit integer to write.
2223 */
2224#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2225DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2226#else
2227DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2228{
2229# if RT_INLINE_ASM_GNU_STYLE
2230 __asm__ __volatile__("outb %b1, %w0\n\t"
2231 :: "Nd" (Port),
2232 "a" (u8));
2233
2234# elif RT_INLINE_ASM_USES_INTRIN
2235 __outbyte(Port, u8);
2236
2237# else
2238 __asm
2239 {
2240 mov dx, [Port]
2241 mov al, [u8]
2242 out dx, al
2243 }
2244# endif
2245}
2246#endif
2247
2248
2249/**
2250 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2251 *
2252 * @returns 8-bit integer.
2253 * @param Port I/O port to read from.
2254 */
2255#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2256DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2257#else
2258DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2259{
2260 uint8_t u8;
2261# if RT_INLINE_ASM_GNU_STYLE
2262 __asm__ __volatile__("inb %w1, %b0\n\t"
2263 : "=a" (u8)
2264 : "Nd" (Port));
2265
2266# elif RT_INLINE_ASM_USES_INTRIN
2267 u8 = __inbyte(Port);
2268
2269# else
2270 __asm
2271 {
2272 mov dx, [Port]
2273 in al, dx
2274 mov [u8], al
2275 }
2276# endif
2277 return u8;
2278}
2279#endif
2280
2281
2282/**
2283 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2284 *
2285 * @param Port I/O port to write to.
2286 * @param u16 16-bit integer to write.
2287 */
2288#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2289DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2290#else
2291DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2292{
2293# if RT_INLINE_ASM_GNU_STYLE
2294 __asm__ __volatile__("outw %w1, %w0\n\t"
2295 :: "Nd" (Port),
2296 "a" (u16));
2297
2298# elif RT_INLINE_ASM_USES_INTRIN
2299 __outword(Port, u16);
2300
2301# else
2302 __asm
2303 {
2304 mov dx, [Port]
2305 mov ax, [u16]
2306 out dx, ax
2307 }
2308# endif
2309}
2310#endif
2311
2312
2313/**
2314 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2315 *
2316 * @returns 16-bit integer.
2317 * @param Port I/O port to read from.
2318 */
2319#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2320DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2321#else
2322DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2323{
2324 uint16_t u16;
2325# if RT_INLINE_ASM_GNU_STYLE
2326 __asm__ __volatile__("inw %w1, %w0\n\t"
2327 : "=a" (u16)
2328 : "Nd" (Port));
2329
2330# elif RT_INLINE_ASM_USES_INTRIN
2331 u16 = __inword(Port);
2332
2333# else
2334 __asm
2335 {
2336 mov dx, [Port]
2337 in ax, dx
2338 mov [u16], ax
2339 }
2340# endif
2341 return u16;
2342}
2343#endif
2344
2345
2346/**
2347 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2348 *
2349 * @param Port I/O port to write to.
2350 * @param u32 32-bit integer to write.
2351 */
2352#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2353DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2354#else
2355DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2356{
2357# if RT_INLINE_ASM_GNU_STYLE
2358 __asm__ __volatile__("outl %1, %w0\n\t"
2359 :: "Nd" (Port),
2360 "a" (u32));
2361
2362# elif RT_INLINE_ASM_USES_INTRIN
2363 __outdword(Port, u32);
2364
2365# else
2366 __asm
2367 {
2368 mov dx, [Port]
2369 mov eax, [u32]
2370 out dx, eax
2371 }
2372# endif
2373}
2374#endif
2375
2376
2377/**
2378 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2379 *
2380 * @returns 32-bit integer.
2381 * @param Port I/O port to read from.
2382 */
2383#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2384DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2385#else
2386DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2387{
2388 uint32_t u32;
2389# if RT_INLINE_ASM_GNU_STYLE
2390 __asm__ __volatile__("inl %w1, %0\n\t"
2391 : "=a" (u32)
2392 : "Nd" (Port));
2393
2394# elif RT_INLINE_ASM_USES_INTRIN
2395 u32 = __indword(Port);
2396
2397# else
2398 __asm
2399 {
2400 mov dx, [Port]
2401 in eax, dx
2402 mov [u32], eax
2403 }
2404# endif
2405 return u32;
2406}
2407#endif
2408
2409
2410/**
2411 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2412 *
2413 * @param Port I/O port to write to.
2414 * @param pau8 Pointer to the string buffer.
2415 * @param c The number of items to write.
2416 */
2417#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2418DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2419#else
2420DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2421{
2422# if RT_INLINE_ASM_GNU_STYLE
2423 __asm__ __volatile__("rep; outsb\n\t"
2424 : "+S" (pau8),
2425 "+c" (c)
2426 : "d" (Port));
2427
2428# elif RT_INLINE_ASM_USES_INTRIN
2429 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2430
2431# else
2432 __asm
2433 {
2434 mov dx, [Port]
2435 mov ecx, [c]
2436 mov eax, [pau8]
2437 xchg esi, eax
2438 rep outsb
2439 xchg esi, eax
2440 }
2441# endif
2442}
2443#endif
2444
2445
2446/**
2447 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2448 *
2449 * @param Port I/O port to read from.
2450 * @param pau8 Pointer to the string buffer (output).
2451 * @param c The number of items to read.
2452 */
2453#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2454DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2455#else
2456DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2457{
2458# if RT_INLINE_ASM_GNU_STYLE
2459 __asm__ __volatile__("rep; insb\n\t"
2460 : "+D" (pau8),
2461 "+c" (c)
2462 : "d" (Port));
2463
2464# elif RT_INLINE_ASM_USES_INTRIN
2465 __inbytestring(Port, pau8, (unsigned long)c);
2466
2467# else
2468 __asm
2469 {
2470 mov dx, [Port]
2471 mov ecx, [c]
2472 mov eax, [pau8]
2473 xchg edi, eax
2474 rep insb
2475 xchg edi, eax
2476 }
2477# endif
2478}
2479#endif
2480
2481
2482/**
2483 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2484 *
2485 * @param Port I/O port to write to.
2486 * @param pau16 Pointer to the string buffer.
2487 * @param c The number of items to write.
2488 */
2489#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2490DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2491#else
2492DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2493{
2494# if RT_INLINE_ASM_GNU_STYLE
2495 __asm__ __volatile__("rep; outsw\n\t"
2496 : "+S" (pau16),
2497 "+c" (c)
2498 : "d" (Port));
2499
2500# elif RT_INLINE_ASM_USES_INTRIN
2501 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2502
2503# else
2504 __asm
2505 {
2506 mov dx, [Port]
2507 mov ecx, [c]
2508 mov eax, [pau16]
2509 xchg esi, eax
2510 rep outsw
2511 xchg esi, eax
2512 }
2513# endif
2514}
2515#endif
2516
2517
2518/**
2519 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2520 *
2521 * @param Port I/O port to read from.
2522 * @param pau16 Pointer to the string buffer (output).
2523 * @param c The number of items to read.
2524 */
2525#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2526DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2527#else
2528DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2529{
2530# if RT_INLINE_ASM_GNU_STYLE
2531 __asm__ __volatile__("rep; insw\n\t"
2532 : "+D" (pau16),
2533 "+c" (c)
2534 : "d" (Port));
2535
2536# elif RT_INLINE_ASM_USES_INTRIN
2537 __inwordstring(Port, pau16, (unsigned long)c);
2538
2539# else
2540 __asm
2541 {
2542 mov dx, [Port]
2543 mov ecx, [c]
2544 mov eax, [pau16]
2545 xchg edi, eax
2546 rep insw
2547 xchg edi, eax
2548 }
2549# endif
2550}
2551#endif
2552
2553
2554/**
2555 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2556 *
2557 * @param Port I/O port to write to.
2558 * @param pau32 Pointer to the string buffer.
2559 * @param c The number of items to write.
2560 */
2561#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2562DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2563#else
2564DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2565{
2566# if RT_INLINE_ASM_GNU_STYLE
2567 __asm__ __volatile__("rep; outsl\n\t"
2568 : "+S" (pau32),
2569 "+c" (c)
2570 : "d" (Port));
2571
2572# elif RT_INLINE_ASM_USES_INTRIN
2573 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2574
2575# else
2576 __asm
2577 {
2578 mov dx, [Port]
2579 mov ecx, [c]
2580 mov eax, [pau32]
2581 xchg esi, eax
2582 rep outsd
2583 xchg esi, eax
2584 }
2585# endif
2586}
2587#endif
2588
2589
2590/**
2591 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2592 *
2593 * @param Port I/O port to read from.
2594 * @param pau32 Pointer to the string buffer (output).
2595 * @param c The number of items to read.
2596 */
2597#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2598DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2599#else
2600DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2601{
2602# if RT_INLINE_ASM_GNU_STYLE
2603 __asm__ __volatile__("rep; insl\n\t"
2604 : "+D" (pau32),
2605 "+c" (c)
2606 : "d" (Port));
2607
2608# elif RT_INLINE_ASM_USES_INTRIN
2609 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2610
2611# else
2612 __asm
2613 {
2614 mov dx, [Port]
2615 mov ecx, [c]
2616 mov eax, [pau32]
2617 xchg edi, eax
2618 rep insd
2619 xchg edi, eax
2620 }
2621# endif
2622}
2623#endif
2624
2625
2626/**
2627 * Atomically Exchange an unsigned 8-bit value, ordered.
2628 *
2629 * @returns Current *pu8 value
2630 * @param pu8 Pointer to the 8-bit variable to update.
2631 * @param u8 The 8-bit value to assign to *pu8.
2632 */
2633#if RT_INLINE_ASM_EXTERNAL
2634DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2635#else
2636DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2637{
2638# if RT_INLINE_ASM_GNU_STYLE
2639 __asm__ __volatile__("xchgb %0, %1\n\t"
2640 : "=m" (*pu8),
2641 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2642 : "1" (u8),
2643 "m" (*pu8));
2644# else
2645 __asm
2646 {
2647# ifdef RT_ARCH_AMD64
2648 mov rdx, [pu8]
2649 mov al, [u8]
2650 xchg [rdx], al
2651 mov [u8], al
2652# else
2653 mov edx, [pu8]
2654 mov al, [u8]
2655 xchg [edx], al
2656 mov [u8], al
2657# endif
2658 }
2659# endif
2660 return u8;
2661}
2662#endif
2663
2664
2665/**
2666 * Atomically Exchange a signed 8-bit value, ordered.
2667 *
2668 * @returns Current *pu8 value
2669 * @param pi8 Pointer to the 8-bit variable to update.
2670 * @param i8 The 8-bit value to assign to *pi8.
2671 */
2672DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2673{
2674 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2675}
2676
2677
2678/**
2679 * Atomically Exchange a bool value, ordered.
2680 *
2681 * @returns Current *pf value
2682 * @param pf Pointer to the 8-bit variable to update.
2683 * @param f The 8-bit value to assign to *pi8.
2684 */
2685DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2686{
2687#ifdef _MSC_VER
2688 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2689#else
2690 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2691#endif
2692}
2693
2694
2695/**
2696 * Atomically Exchange an unsigned 16-bit value, ordered.
2697 *
2698 * @returns Current *pu16 value
2699 * @param pu16 Pointer to the 16-bit variable to update.
2700 * @param u16 The 16-bit value to assign to *pu16.
2701 */
2702#if RT_INLINE_ASM_EXTERNAL
2703DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2704#else
2705DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2706{
2707# if RT_INLINE_ASM_GNU_STYLE
2708 __asm__ __volatile__("xchgw %0, %1\n\t"
2709 : "=m" (*pu16),
2710 "=r" (u16)
2711 : "1" (u16),
2712 "m" (*pu16));
2713# else
2714 __asm
2715 {
2716# ifdef RT_ARCH_AMD64
2717 mov rdx, [pu16]
2718 mov ax, [u16]
2719 xchg [rdx], ax
2720 mov [u16], ax
2721# else
2722 mov edx, [pu16]
2723 mov ax, [u16]
2724 xchg [edx], ax
2725 mov [u16], ax
2726# endif
2727 }
2728# endif
2729 return u16;
2730}
2731#endif
2732
2733
2734/**
2735 * Atomically Exchange a signed 16-bit value, ordered.
2736 *
2737 * @returns Current *pu16 value
2738 * @param pi16 Pointer to the 16-bit variable to update.
2739 * @param i16 The 16-bit value to assign to *pi16.
2740 */
2741DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2742{
2743 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2744}
2745
2746
2747/**
2748 * Atomically Exchange an unsigned 32-bit value, ordered.
2749 *
2750 * @returns Current *pu32 value
2751 * @param pu32 Pointer to the 32-bit variable to update.
2752 * @param u32 The 32-bit value to assign to *pu32.
2753 */
2754#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2755DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2756#else
2757DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2758{
2759# if RT_INLINE_ASM_GNU_STYLE
2760 __asm__ __volatile__("xchgl %0, %1\n\t"
2761 : "=m" (*pu32),
2762 "=r" (u32)
2763 : "1" (u32),
2764 "m" (*pu32));
2765
2766# elif RT_INLINE_ASM_USES_INTRIN
2767 u32 = _InterlockedExchange((long *)pu32, u32);
2768
2769# else
2770 __asm
2771 {
2772# ifdef RT_ARCH_AMD64
2773 mov rdx, [pu32]
2774 mov eax, u32
2775 xchg [rdx], eax
2776 mov [u32], eax
2777# else
2778 mov edx, [pu32]
2779 mov eax, u32
2780 xchg [edx], eax
2781 mov [u32], eax
2782# endif
2783 }
2784# endif
2785 return u32;
2786}
2787#endif
2788
2789
2790/**
2791 * Atomically Exchange a signed 32-bit value, ordered.
2792 *
2793 * @returns Current *pu32 value
2794 * @param pi32 Pointer to the 32-bit variable to update.
2795 * @param i32 The 32-bit value to assign to *pi32.
2796 */
2797DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2798{
2799 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2800}
2801
2802
2803/**
2804 * Atomically Exchange an unsigned 64-bit value, ordered.
2805 *
2806 * @returns Current *pu64 value
2807 * @param pu64 Pointer to the 64-bit variable to update.
2808 * @param u64 The 64-bit value to assign to *pu64.
2809 */
2810#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2811DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2812#else
2813DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2814{
2815# if defined(RT_ARCH_AMD64)
2816# if RT_INLINE_ASM_USES_INTRIN
2817 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2818
2819# elif RT_INLINE_ASM_GNU_STYLE
2820 __asm__ __volatile__("xchgq %0, %1\n\t"
2821 : "=m" (*pu64),
2822 "=r" (u64)
2823 : "1" (u64),
2824 "m" (*pu64));
2825# else
2826 __asm
2827 {
2828 mov rdx, [pu64]
2829 mov rax, [u64]
2830 xchg [rdx], rax
2831 mov [u64], rax
2832 }
2833# endif
2834# else /* !RT_ARCH_AMD64 */
2835# if RT_INLINE_ASM_GNU_STYLE
2836# if defined(PIC) || defined(__PIC__)
2837 uint32_t u32EBX = (uint32_t)u64;
2838 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2839 "xchgl %%ebx, %3\n\t"
2840 "1:\n\t"
2841 "lock; cmpxchg8b (%5)\n\t"
2842 "jnz 1b\n\t"
2843 "movl %3, %%ebx\n\t"
2844 /*"xchgl %%esi, %5\n\t"*/
2845 : "=A" (u64),
2846 "=m" (*pu64)
2847 : "0" (*pu64),
2848 "m" ( u32EBX ),
2849 "c" ( (uint32_t)(u64 >> 32) ),
2850 "S" (pu64));
2851# else /* !PIC */
2852 __asm__ __volatile__("1:\n\t"
2853 "lock; cmpxchg8b %1\n\t"
2854 "jnz 1b\n\t"
2855 : "=A" (u64),
2856 "=m" (*pu64)
2857 : "0" (*pu64),
2858 "b" ( (uint32_t)u64 ),
2859 "c" ( (uint32_t)(u64 >> 32) ));
2860# endif
2861# else
2862 __asm
2863 {
2864 mov ebx, dword ptr [u64]
2865 mov ecx, dword ptr [u64 + 4]
2866 mov edi, pu64
2867 mov eax, dword ptr [edi]
2868 mov edx, dword ptr [edi + 4]
2869 retry:
2870 lock cmpxchg8b [edi]
2871 jnz retry
2872 mov dword ptr [u64], eax
2873 mov dword ptr [u64 + 4], edx
2874 }
2875# endif
2876# endif /* !RT_ARCH_AMD64 */
2877 return u64;
2878}
2879#endif
2880
2881
2882/**
2883 * Atomically Exchange an signed 64-bit value, ordered.
2884 *
2885 * @returns Current *pi64 value
2886 * @param pi64 Pointer to the 64-bit variable to update.
2887 * @param i64 The 64-bit value to assign to *pi64.
2888 */
2889DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2890{
2891 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2892}
2893
2894
2895/**
2896 * Atomically Exchange a pointer value, ordered.
2897 *
2898 * @returns Current *ppv value
2899 * @param ppv Pointer to the pointer variable to update.
2900 * @param pv The pointer value to assign to *ppv.
2901 */
2902DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2903{
2904#if ARCH_BITS == 32
2905 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2906#elif ARCH_BITS == 64
2907 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2908#else
2909# error "ARCH_BITS is bogus"
2910#endif
2911}
2912
2913
2914/**
2915 * Atomically Exchange a raw-mode context pointer value, ordered.
2916 *
2917 * @returns Current *ppv value
2918 * @param ppvRC Pointer to the pointer variable to update.
2919 * @param pvRC The pointer value to assign to *ppv.
2920 */
2921DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2922{
2923 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2924}
2925
2926
2927/**
2928 * Atomically Exchange a ring-0 pointer value, ordered.
2929 *
2930 * @returns Current *ppv value
2931 * @param ppvR0 Pointer to the pointer variable to update.
2932 * @param pvR0 The pointer value to assign to *ppv.
2933 */
2934DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2935{
2936#if R0_ARCH_BITS == 32
2937 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2938#elif R0_ARCH_BITS == 64
2939 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2940#else
2941# error "R0_ARCH_BITS is bogus"
2942#endif
2943}
2944
2945
2946/**
2947 * Atomically Exchange a ring-3 pointer value, ordered.
2948 *
2949 * @returns Current *ppv value
2950 * @param ppvR3 Pointer to the pointer variable to update.
2951 * @param pvR3 The pointer value to assign to *ppv.
2952 */
2953DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2954{
2955#if R3_ARCH_BITS == 32
2956 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2957#elif R3_ARCH_BITS == 64
2958 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2959#else
2960# error "R3_ARCH_BITS is bogus"
2961#endif
2962}
2963
2964
2965/** @def ASMAtomicXchgHandle
2966 * Atomically Exchange a typical IPRT handle value, ordered.
2967 *
2968 * @param ph Pointer to the value to update.
2969 * @param hNew The new value to assigned to *pu.
2970 * @param phRes Where to store the current *ph value.
2971 *
2972 * @remarks This doesn't currently work for all handles (like RTFILE).
2973 */
2974#if HC_ARCH_BITS == 32
2975# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2976 do { \
2977 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2978 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2979 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2980 } while (0)
2981#elif HC_ARCH_BITS == 64
2982# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2983 do { \
2984 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2985 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2986 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2987 } while (0)
2988#else
2989# error HC_ARCH_BITS
2990#endif
2991
2992
2993/**
2994 * Atomically Exchange a value which size might differ
2995 * between platforms or compilers, ordered.
2996 *
2997 * @param pu Pointer to the variable to update.
2998 * @param uNew The value to assign to *pu.
2999 * @todo This is busted as its missing the result argument.
3000 */
3001#define ASMAtomicXchgSize(pu, uNew) \
3002 do { \
3003 switch (sizeof(*(pu))) { \
3004 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3005 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3006 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3007 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3008 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3009 } \
3010 } while (0)
3011
3012/**
3013 * Atomically Exchange a value which size might differ
3014 * between platforms or compilers, ordered.
3015 *
3016 * @param pu Pointer to the variable to update.
3017 * @param uNew The value to assign to *pu.
3018 * @param puRes Where to store the current *pu value.
3019 */
3020#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
3021 do { \
3022 switch (sizeof(*(pu))) { \
3023 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3024 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3025 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3026 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3027 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3028 } \
3029 } while (0)
3030
3031
3032
3033/**
3034 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
3035 *
3036 * @returns true if xchg was done.
3037 * @returns false if xchg wasn't done.
3038 *
3039 * @param pu8 Pointer to the value to update.
3040 * @param u8New The new value to assigned to *pu8.
3041 * @param u8Old The old value to *pu8 compare with.
3042 */
3043#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
3044DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
3045#else
3046DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
3047{
3048 uint8_t u8Ret;
3049 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
3050 "setz %1\n\t"
3051 : "=m" (*pu8),
3052 "=qm" (u8Ret),
3053 "=a" (u8Old)
3054 : "q" (u8New),
3055 "2" (u8Old),
3056 "m" (*pu8));
3057 return (bool)u8Ret;
3058}
3059#endif
3060
3061
3062/**
3063 * Atomically Compare and Exchange a signed 8-bit value, ordered.
3064 *
3065 * @returns true if xchg was done.
3066 * @returns false if xchg wasn't done.
3067 *
3068 * @param pi8 Pointer to the value to update.
3069 * @param i8New The new value to assigned to *pi8.
3070 * @param i8Old The old value to *pi8 compare with.
3071 */
3072DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
3073{
3074 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
3075}
3076
3077
3078/**
3079 * Atomically Compare and Exchange a bool value, ordered.
3080 *
3081 * @returns true if xchg was done.
3082 * @returns false if xchg wasn't done.
3083 *
3084 * @param pf Pointer to the value to update.
3085 * @param fNew The new value to assigned to *pf.
3086 * @param fOld The old value to *pf compare with.
3087 */
3088DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
3089{
3090 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
3091}
3092
3093
3094/**
3095 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3096 *
3097 * @returns true if xchg was done.
3098 * @returns false if xchg wasn't done.
3099 *
3100 * @param pu32 Pointer to the value to update.
3101 * @param u32New The new value to assigned to *pu32.
3102 * @param u32Old The old value to *pu32 compare with.
3103 */
3104#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3105DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3106#else
3107DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3108{
3109# if RT_INLINE_ASM_GNU_STYLE
3110 uint8_t u8Ret;
3111 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3112 "setz %1\n\t"
3113 : "=m" (*pu32),
3114 "=qm" (u8Ret),
3115 "=a" (u32Old)
3116 : "r" (u32New),
3117 "2" (u32Old),
3118 "m" (*pu32));
3119 return (bool)u8Ret;
3120
3121# elif RT_INLINE_ASM_USES_INTRIN
3122 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3123
3124# else
3125 uint32_t u32Ret;
3126 __asm
3127 {
3128# ifdef RT_ARCH_AMD64
3129 mov rdx, [pu32]
3130# else
3131 mov edx, [pu32]
3132# endif
3133 mov eax, [u32Old]
3134 mov ecx, [u32New]
3135# ifdef RT_ARCH_AMD64
3136 lock cmpxchg [rdx], ecx
3137# else
3138 lock cmpxchg [edx], ecx
3139# endif
3140 setz al
3141 movzx eax, al
3142 mov [u32Ret], eax
3143 }
3144 return !!u32Ret;
3145# endif
3146}
3147#endif
3148
3149
3150/**
3151 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3152 *
3153 * @returns true if xchg was done.
3154 * @returns false if xchg wasn't done.
3155 *
3156 * @param pi32 Pointer to the value to update.
3157 * @param i32New The new value to assigned to *pi32.
3158 * @param i32Old The old value to *pi32 compare with.
3159 */
3160DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3161{
3162 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3163}
3164
3165
3166/**
3167 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3168 *
3169 * @returns true if xchg was done.
3170 * @returns false if xchg wasn't done.
3171 *
3172 * @param pu64 Pointer to the 64-bit variable to update.
3173 * @param u64New The 64-bit value to assign to *pu64.
3174 * @param u64Old The value to compare with.
3175 */
3176#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3177 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
3178DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3179#else
3180DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3181{
3182# if RT_INLINE_ASM_USES_INTRIN
3183 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3184
3185# elif defined(RT_ARCH_AMD64)
3186# if RT_INLINE_ASM_GNU_STYLE
3187 uint8_t u8Ret;
3188 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3189 "setz %1\n\t"
3190 : "=m" (*pu64),
3191 "=qm" (u8Ret),
3192 "=a" (u64Old)
3193 : "r" (u64New),
3194 "2" (u64Old),
3195 "m" (*pu64));
3196 return (bool)u8Ret;
3197# else
3198 bool fRet;
3199 __asm
3200 {
3201 mov rdx, [pu32]
3202 mov rax, [u64Old]
3203 mov rcx, [u64New]
3204 lock cmpxchg [rdx], rcx
3205 setz al
3206 mov [fRet], al
3207 }
3208 return fRet;
3209# endif
3210# else /* !RT_ARCH_AMD64 */
3211 uint32_t u32Ret;
3212# if RT_INLINE_ASM_GNU_STYLE
3213# if defined(PIC) || defined(__PIC__)
3214 uint32_t u32EBX = (uint32_t)u64New;
3215 uint32_t u32Spill;
3216 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3217 "lock; cmpxchg8b (%6)\n\t"
3218 "setz %%al\n\t"
3219 "movl %4, %%ebx\n\t"
3220 "movzbl %%al, %%eax\n\t"
3221 : "=a" (u32Ret),
3222 "=d" (u32Spill),
3223# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3224 "+m" (*pu64)
3225# else
3226 "=m" (*pu64)
3227# endif
3228 : "A" (u64Old),
3229 "m" ( u32EBX ),
3230 "c" ( (uint32_t)(u64New >> 32) ),
3231 "S" (pu64));
3232# else /* !PIC */
3233 uint32_t u32Spill;
3234 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3235 "setz %%al\n\t"
3236 "movzbl %%al, %%eax\n\t"
3237 : "=a" (u32Ret),
3238 "=d" (u32Spill),
3239 "+m" (*pu64)
3240 : "A" (u64Old),
3241 "b" ( (uint32_t)u64New ),
3242 "c" ( (uint32_t)(u64New >> 32) ));
3243# endif
3244 return (bool)u32Ret;
3245# else
3246 __asm
3247 {
3248 mov ebx, dword ptr [u64New]
3249 mov ecx, dword ptr [u64New + 4]
3250 mov edi, [pu64]
3251 mov eax, dword ptr [u64Old]
3252 mov edx, dword ptr [u64Old + 4]
3253 lock cmpxchg8b [edi]
3254 setz al
3255 movzx eax, al
3256 mov dword ptr [u32Ret], eax
3257 }
3258 return !!u32Ret;
3259# endif
3260# endif /* !RT_ARCH_AMD64 */
3261}
3262#endif
3263
3264
3265/**
3266 * Atomically Compare and exchange a signed 64-bit value, ordered.
3267 *
3268 * @returns true if xchg was done.
3269 * @returns false if xchg wasn't done.
3270 *
3271 * @param pi64 Pointer to the 64-bit variable to update.
3272 * @param i64 The 64-bit value to assign to *pu64.
3273 * @param i64Old The value to compare with.
3274 */
3275DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3276{
3277 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3278}
3279
3280
3281/**
3282 * Atomically Compare and Exchange a pointer value, ordered.
3283 *
3284 * @returns true if xchg was done.
3285 * @returns false if xchg wasn't done.
3286 *
3287 * @param ppv Pointer to the value to update.
3288 * @param pvNew The new value to assigned to *ppv.
3289 * @param pvOld The old value to *ppv compare with.
3290 */
3291DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3292{
3293#if ARCH_BITS == 32
3294 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3295#elif ARCH_BITS == 64
3296 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3297#else
3298# error "ARCH_BITS is bogus"
3299#endif
3300}
3301
3302
3303/** @def ASMAtomicCmpXchgHandle
3304 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3305 *
3306 * @param ph Pointer to the value to update.
3307 * @param hNew The new value to assigned to *pu.
3308 * @param hOld The old value to *pu compare with.
3309 * @param fRc Where to store the result.
3310 *
3311 * @remarks This doesn't currently work for all handles (like RTFILE).
3312 */
3313#if HC_ARCH_BITS == 32
3314# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3315 do { \
3316 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3317 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
3318 } while (0)
3319#elif HC_ARCH_BITS == 64
3320# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3321 do { \
3322 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3323 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
3324 } while (0)
3325#else
3326# error HC_ARCH_BITS
3327#endif
3328
3329
3330/** @def ASMAtomicCmpXchgSize
3331 * Atomically Compare and Exchange a value which size might differ
3332 * between platforms or compilers, ordered.
3333 *
3334 * @param pu Pointer to the value to update.
3335 * @param uNew The new value to assigned to *pu.
3336 * @param uOld The old value to *pu compare with.
3337 * @param fRc Where to store the result.
3338 */
3339#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3340 do { \
3341 switch (sizeof(*(pu))) { \
3342 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3343 break; \
3344 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3345 break; \
3346 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3347 (fRc) = false; \
3348 break; \
3349 } \
3350 } while (0)
3351
3352
3353/**
3354 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3355 * passes back old value, ordered.
3356 *
3357 * @returns true if xchg was done.
3358 * @returns false if xchg wasn't done.
3359 *
3360 * @param pu32 Pointer to the value to update.
3361 * @param u32New The new value to assigned to *pu32.
3362 * @param u32Old The old value to *pu32 compare with.
3363 * @param pu32Old Pointer store the old value at.
3364 */
3365#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3366DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3367#else
3368DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3369{
3370# if RT_INLINE_ASM_GNU_STYLE
3371 uint8_t u8Ret;
3372 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3373 "setz %1\n\t"
3374 : "=m" (*pu32),
3375 "=qm" (u8Ret),
3376 "=a" (*pu32Old)
3377 : "r" (u32New),
3378 "a" (u32Old),
3379 "m" (*pu32));
3380 return (bool)u8Ret;
3381
3382# elif RT_INLINE_ASM_USES_INTRIN
3383 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3384
3385# else
3386 uint32_t u32Ret;
3387 __asm
3388 {
3389# ifdef RT_ARCH_AMD64
3390 mov rdx, [pu32]
3391# else
3392 mov edx, [pu32]
3393# endif
3394 mov eax, [u32Old]
3395 mov ecx, [u32New]
3396# ifdef RT_ARCH_AMD64
3397 lock cmpxchg [rdx], ecx
3398 mov rdx, [pu32Old]
3399 mov [rdx], eax
3400# else
3401 lock cmpxchg [edx], ecx
3402 mov edx, [pu32Old]
3403 mov [edx], eax
3404# endif
3405 setz al
3406 movzx eax, al
3407 mov [u32Ret], eax
3408 }
3409 return !!u32Ret;
3410# endif
3411}
3412#endif
3413
3414
3415/**
3416 * Atomically Compare and Exchange a signed 32-bit value, additionally
3417 * passes back old value, ordered.
3418 *
3419 * @returns true if xchg was done.
3420 * @returns false if xchg wasn't done.
3421 *
3422 * @param pi32 Pointer to the value to update.
3423 * @param i32New The new value to assigned to *pi32.
3424 * @param i32Old The old value to *pi32 compare with.
3425 * @param pi32Old Pointer store the old value at.
3426 */
3427DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3428{
3429 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3430}
3431
3432
3433/**
3434 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3435 * passing back old value, ordered.
3436 *
3437 * @returns true if xchg was done.
3438 * @returns false if xchg wasn't done.
3439 *
3440 * @param pu64 Pointer to the 64-bit variable to update.
3441 * @param u64New The 64-bit value to assign to *pu64.
3442 * @param u64Old The value to compare with.
3443 * @param pu64Old Pointer store the old value at.
3444 */
3445#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3446DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3447#else
3448DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3449{
3450# if RT_INLINE_ASM_USES_INTRIN
3451 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3452
3453# elif defined(RT_ARCH_AMD64)
3454# if RT_INLINE_ASM_GNU_STYLE
3455 uint8_t u8Ret;
3456 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3457 "setz %1\n\t"
3458 : "=m" (*pu64),
3459 "=qm" (u8Ret),
3460 "=a" (*pu64Old)
3461 : "r" (u64New),
3462 "a" (u64Old),
3463 "m" (*pu64));
3464 return (bool)u8Ret;
3465# else
3466 bool fRet;
3467 __asm
3468 {
3469 mov rdx, [pu32]
3470 mov rax, [u64Old]
3471 mov rcx, [u64New]
3472 lock cmpxchg [rdx], rcx
3473 mov rdx, [pu64Old]
3474 mov [rdx], rax
3475 setz al
3476 mov [fRet], al
3477 }
3478 return fRet;
3479# endif
3480# else /* !RT_ARCH_AMD64 */
3481# if RT_INLINE_ASM_GNU_STYLE
3482 uint64_t u64Ret;
3483# if defined(PIC) || defined(__PIC__)
3484 /* NB: this code uses a memory clobber description, because the clean
3485 * solution with an output value for *pu64 makes gcc run out of registers.
3486 * This will cause suboptimal code, and anyone with a better solution is
3487 * welcome to improve this. */
3488 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3489 "lock; cmpxchg8b %3\n\t"
3490 "xchgl %%ebx, %1\n\t"
3491 : "=A" (u64Ret)
3492 : "DS" ((uint32_t)u64New),
3493 "c" ((uint32_t)(u64New >> 32)),
3494 "m" (*pu64),
3495 "0" (u64Old)
3496 : "memory" );
3497# else /* !PIC */
3498 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3499 : "=A" (u64Ret),
3500 "=m" (*pu64)
3501 : "b" ((uint32_t)u64New),
3502 "c" ((uint32_t)(u64New >> 32)),
3503 "m" (*pu64),
3504 "0" (u64Old));
3505# endif
3506 *pu64Old = u64Ret;
3507 return u64Ret == u64Old;
3508# else
3509 uint32_t u32Ret;
3510 __asm
3511 {
3512 mov ebx, dword ptr [u64New]
3513 mov ecx, dword ptr [u64New + 4]
3514 mov edi, [pu64]
3515 mov eax, dword ptr [u64Old]
3516 mov edx, dword ptr [u64Old + 4]
3517 lock cmpxchg8b [edi]
3518 mov ebx, [pu64Old]
3519 mov [ebx], eax
3520 setz al
3521 movzx eax, al
3522 add ebx, 4
3523 mov [ebx], edx
3524 mov dword ptr [u32Ret], eax
3525 }
3526 return !!u32Ret;
3527# endif
3528# endif /* !RT_ARCH_AMD64 */
3529}
3530#endif
3531
3532
3533/**
3534 * Atomically Compare and exchange a signed 64-bit value, additionally
3535 * passing back old value, ordered.
3536 *
3537 * @returns true if xchg was done.
3538 * @returns false if xchg wasn't done.
3539 *
3540 * @param pi64 Pointer to the 64-bit variable to update.
3541 * @param i64 The 64-bit value to assign to *pu64.
3542 * @param i64Old The value to compare with.
3543 * @param pi64Old Pointer store the old value at.
3544 */
3545DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3546{
3547 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3548}
3549
3550/** @def ASMAtomicCmpXchgExHandle
3551 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3552 *
3553 * @param ph Pointer to the value to update.
3554 * @param hNew The new value to assigned to *pu.
3555 * @param hOld The old value to *pu compare with.
3556 * @param fRc Where to store the result.
3557 * @param phOldVal Pointer to where to store the old value.
3558 *
3559 * @remarks This doesn't currently work for all handles (like RTFILE).
3560 */
3561#if HC_ARCH_BITS == 32
3562# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3563 do { \
3564 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
3565 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
3566 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3567 } while (0)
3568#elif HC_ARCH_BITS == 64
3569# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3570 do { \
3571 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3572 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
3573 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3574 } while (0)
3575#else
3576# error HC_ARCH_BITS
3577#endif
3578
3579
3580/** @def ASMAtomicCmpXchgExSize
3581 * Atomically Compare and Exchange a value which size might differ
3582 * between platforms or compilers. Additionally passes back old value.
3583 *
3584 * @param pu Pointer to the value to update.
3585 * @param uNew The new value to assigned to *pu.
3586 * @param uOld The old value to *pu compare with.
3587 * @param fRc Where to store the result.
3588 * @param puOldVal Pointer to where to store the old value.
3589 */
3590#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3591 do { \
3592 switch (sizeof(*(pu))) { \
3593 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3594 break; \
3595 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3596 break; \
3597 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3598 (fRc) = false; \
3599 (uOldVal) = 0; \
3600 break; \
3601 } \
3602 } while (0)
3603
3604
3605/**
3606 * Atomically Compare and Exchange a pointer value, additionally
3607 * passing back old value, ordered.
3608 *
3609 * @returns true if xchg was done.
3610 * @returns false if xchg wasn't done.
3611 *
3612 * @param ppv Pointer to the value to update.
3613 * @param pvNew The new value to assigned to *ppv.
3614 * @param pvOld The old value to *ppv compare with.
3615 * @param ppvOld Pointer store the old value at.
3616 */
3617DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3618{
3619#if ARCH_BITS == 32
3620 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3621#elif ARCH_BITS == 64
3622 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3623#else
3624# error "ARCH_BITS is bogus"
3625#endif
3626}
3627
3628
3629/**
3630 * Atomically exchanges and adds to a 32-bit value, ordered.
3631 *
3632 * @returns The old value.
3633 * @param pu32 Pointer to the value.
3634 * @param u32 Number to add.
3635 */
3636#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3637DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3638#else
3639DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3640{
3641# if RT_INLINE_ASM_USES_INTRIN
3642 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3643 return u32;
3644
3645# elif RT_INLINE_ASM_GNU_STYLE
3646 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3647 : "=r" (u32),
3648 "=m" (*pu32)
3649 : "0" (u32),
3650 "m" (*pu32)
3651 : "memory");
3652 return u32;
3653# else
3654 __asm
3655 {
3656 mov eax, [u32]
3657# ifdef RT_ARCH_AMD64
3658 mov rdx, [pu32]
3659 lock xadd [rdx], eax
3660# else
3661 mov edx, [pu32]
3662 lock xadd [edx], eax
3663# endif
3664 mov [u32], eax
3665 }
3666 return u32;
3667# endif
3668}
3669#endif
3670
3671
3672/**
3673 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3674 *
3675 * @returns The old value.
3676 * @param pi32 Pointer to the value.
3677 * @param i32 Number to add.
3678 */
3679DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3680{
3681 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3682}
3683
3684
3685/**
3686 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3687 *
3688 * @returns The old value.
3689 * @param pi32 Pointer to the value.
3690 * @param i32 Number to subtract.
3691 */
3692DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t i32)
3693{
3694 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)i32);
3695}
3696
3697
3698/**
3699 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3700 *
3701 * @returns The old value.
3702 * @param pi32 Pointer to the value.
3703 * @param i32 Number to subtract.
3704 */
3705DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3706{
3707 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3708}
3709
3710
3711/**
3712 * Atomically increment a 32-bit value, ordered.
3713 *
3714 * @returns The new value.
3715 * @param pu32 Pointer to the value to increment.
3716 */
3717#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3718DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3719#else
3720DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3721{
3722 uint32_t u32;
3723# if RT_INLINE_ASM_USES_INTRIN
3724 u32 = _InterlockedIncrement((long *)pu32);
3725 return u32;
3726
3727# elif RT_INLINE_ASM_GNU_STYLE
3728 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3729 : "=r" (u32),
3730 "=m" (*pu32)
3731 : "0" (1),
3732 "m" (*pu32)
3733 : "memory");
3734 return u32+1;
3735# else
3736 __asm
3737 {
3738 mov eax, 1
3739# ifdef RT_ARCH_AMD64
3740 mov rdx, [pu32]
3741 lock xadd [rdx], eax
3742# else
3743 mov edx, [pu32]
3744 lock xadd [edx], eax
3745# endif
3746 mov u32, eax
3747 }
3748 return u32+1;
3749# endif
3750}
3751#endif
3752
3753
3754/**
3755 * Atomically increment a signed 32-bit value, ordered.
3756 *
3757 * @returns The new value.
3758 * @param pi32 Pointer to the value to increment.
3759 */
3760DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3761{
3762 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3763}
3764
3765
3766/**
3767 * Atomically decrement an unsigned 32-bit value, ordered.
3768 *
3769 * @returns The new value.
3770 * @param pu32 Pointer to the value to decrement.
3771 */
3772#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3773DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3774#else
3775DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3776{
3777 uint32_t u32;
3778# if RT_INLINE_ASM_USES_INTRIN
3779 u32 = _InterlockedDecrement((long *)pu32);
3780 return u32;
3781
3782# elif RT_INLINE_ASM_GNU_STYLE
3783 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3784 : "=r" (u32),
3785 "=m" (*pu32)
3786 : "0" (-1),
3787 "m" (*pu32)
3788 : "memory");
3789 return u32-1;
3790# else
3791 __asm
3792 {
3793 mov eax, -1
3794# ifdef RT_ARCH_AMD64
3795 mov rdx, [pu32]
3796 lock xadd [rdx], eax
3797# else
3798 mov edx, [pu32]
3799 lock xadd [edx], eax
3800# endif
3801 mov u32, eax
3802 }
3803 return u32-1;
3804# endif
3805}
3806#endif
3807
3808
3809/**
3810 * Atomically decrement a signed 32-bit value, ordered.
3811 *
3812 * @returns The new value.
3813 * @param pi32 Pointer to the value to decrement.
3814 */
3815DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3816{
3817 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3818}
3819
3820
3821/**
3822 * Atomically Or an unsigned 32-bit value, ordered.
3823 *
3824 * @param pu32 Pointer to the pointer variable to OR u32 with.
3825 * @param u32 The value to OR *pu32 with.
3826 */
3827#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3828DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3829#else
3830DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3831{
3832# if RT_INLINE_ASM_USES_INTRIN
3833 _InterlockedOr((long volatile *)pu32, (long)u32);
3834
3835# elif RT_INLINE_ASM_GNU_STYLE
3836 __asm__ __volatile__("lock; orl %1, %0\n\t"
3837 : "=m" (*pu32)
3838 : "ir" (u32),
3839 "m" (*pu32));
3840# else
3841 __asm
3842 {
3843 mov eax, [u32]
3844# ifdef RT_ARCH_AMD64
3845 mov rdx, [pu32]
3846 lock or [rdx], eax
3847# else
3848 mov edx, [pu32]
3849 lock or [edx], eax
3850# endif
3851 }
3852# endif
3853}
3854#endif
3855
3856
3857/**
3858 * Atomically Or a signed 32-bit value, ordered.
3859 *
3860 * @param pi32 Pointer to the pointer variable to OR u32 with.
3861 * @param i32 The value to OR *pu32 with.
3862 */
3863DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3864{
3865 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3866}
3867
3868
3869/**
3870 * Atomically And an unsigned 32-bit value, ordered.
3871 *
3872 * @param pu32 Pointer to the pointer variable to AND u32 with.
3873 * @param u32 The value to AND *pu32 with.
3874 */
3875#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3876DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3877#else
3878DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3879{
3880# if RT_INLINE_ASM_USES_INTRIN
3881 _InterlockedAnd((long volatile *)pu32, u32);
3882
3883# elif RT_INLINE_ASM_GNU_STYLE
3884 __asm__ __volatile__("lock; andl %1, %0\n\t"
3885 : "=m" (*pu32)
3886 : "ir" (u32),
3887 "m" (*pu32));
3888# else
3889 __asm
3890 {
3891 mov eax, [u32]
3892# ifdef RT_ARCH_AMD64
3893 mov rdx, [pu32]
3894 lock and [rdx], eax
3895# else
3896 mov edx, [pu32]
3897 lock and [edx], eax
3898# endif
3899 }
3900# endif
3901}
3902#endif
3903
3904
3905/**
3906 * Atomically And a signed 32-bit value, ordered.
3907 *
3908 * @param pi32 Pointer to the pointer variable to AND i32 with.
3909 * @param i32 The value to AND *pi32 with.
3910 */
3911DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3912{
3913 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3914}
3915
3916
3917/**
3918 * Serialize Instruction.
3919 */
3920#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3921DECLASM(void) ASMSerializeInstruction(void);
3922#else
3923DECLINLINE(void) ASMSerializeInstruction(void)
3924{
3925# if RT_INLINE_ASM_GNU_STYLE
3926 RTCCUINTREG xAX = 0;
3927# ifdef RT_ARCH_AMD64
3928 __asm__ ("cpuid"
3929 : "=a" (xAX)
3930 : "0" (xAX)
3931 : "rbx", "rcx", "rdx");
3932# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
3933 __asm__ ("push %%ebx\n\t"
3934 "cpuid\n\t"
3935 "pop %%ebx\n\t"
3936 : "=a" (xAX)
3937 : "0" (xAX)
3938 : "ecx", "edx");
3939# else
3940 __asm__ ("cpuid"
3941 : "=a" (xAX)
3942 : "0" (xAX)
3943 : "ebx", "ecx", "edx");
3944# endif
3945
3946# elif RT_INLINE_ASM_USES_INTRIN
3947 int aInfo[4];
3948 __cpuid(aInfo, 0);
3949
3950# else
3951 __asm
3952 {
3953 push ebx
3954 xor eax, eax
3955 cpuid
3956 pop ebx
3957 }
3958# endif
3959}
3960#endif
3961
3962
3963/**
3964 * Memory load/store fence, waits for any pending writes and reads to complete.
3965 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
3966 */
3967DECLINLINE(void) ASMMemoryFenceSSE2(void)
3968{
3969#if RT_INLINE_ASM_GNU_STYLE
3970 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
3971#elif RT_INLINE_ASM_USES_INTRIN
3972 _mm_mfence();
3973#else
3974 __asm
3975 {
3976 _emit 0x0f
3977 _emit 0xae
3978 _emit 0xf0
3979 }
3980#endif
3981}
3982
3983
3984/**
3985 * Memory store fence, waits for any writes to complete.
3986 * Requires the X86_CPUID_FEATURE_EDX_SSE CPUID bit set.
3987 */
3988DECLINLINE(void) ASMWriteFenceSSE(void)
3989{
3990#if RT_INLINE_ASM_GNU_STYLE
3991 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
3992#elif RT_INLINE_ASM_USES_INTRIN
3993 _mm_sfence();
3994#else
3995 __asm
3996 {
3997 _emit 0x0f
3998 _emit 0xae
3999 _emit 0xf8
4000 }
4001#endif
4002}
4003
4004
4005/**
4006 * Memory load fence, waits for any pending reads to complete.
4007 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
4008 */
4009DECLINLINE(void) ASMReadFenceSSE2(void)
4010{
4011#if RT_INLINE_ASM_GNU_STYLE
4012 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
4013#elif RT_INLINE_ASM_USES_INTRIN
4014 _mm_lfence();
4015#else
4016 __asm
4017 {
4018 _emit 0x0f
4019 _emit 0xae
4020 _emit 0xe8
4021 }
4022#endif
4023}
4024
4025
4026/**
4027 * Memory fence, waits for any pending writes and reads to complete.
4028 */
4029DECLINLINE(void) ASMMemoryFence(void)
4030{
4031 /** @todo use mfence? check if all cpus we care for support it. */
4032 uint32_t volatile u32;
4033 ASMAtomicXchgU32(&u32, 0);
4034}
4035
4036
4037/**
4038 * Write fence, waits for any pending writes to complete.
4039 */
4040DECLINLINE(void) ASMWriteFence(void)
4041{
4042 /** @todo use sfence? check if all cpus we care for support it. */
4043 ASMMemoryFence();
4044}
4045
4046
4047/**
4048 * Read fence, waits for any pending reads to complete.
4049 */
4050DECLINLINE(void) ASMReadFence(void)
4051{
4052 /** @todo use lfence? check if all cpus we care for support it. */
4053 ASMMemoryFence();
4054}
4055
4056
4057/**
4058 * Atomically reads an unsigned 8-bit value, ordered.
4059 *
4060 * @returns Current *pu8 value
4061 * @param pu8 Pointer to the 8-bit variable to read.
4062 */
4063DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
4064{
4065 ASMMemoryFence();
4066 return *pu8; /* byte reads are atomic on x86 */
4067}
4068
4069
4070/**
4071 * Atomically reads an unsigned 8-bit value, unordered.
4072 *
4073 * @returns Current *pu8 value
4074 * @param pu8 Pointer to the 8-bit variable to read.
4075 */
4076DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
4077{
4078 return *pu8; /* byte reads are atomic on x86 */
4079}
4080
4081
4082/**
4083 * Atomically reads a signed 8-bit value, ordered.
4084 *
4085 * @returns Current *pi8 value
4086 * @param pi8 Pointer to the 8-bit variable to read.
4087 */
4088DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
4089{
4090 ASMMemoryFence();
4091 return *pi8; /* byte reads are atomic on x86 */
4092}
4093
4094
4095/**
4096 * Atomically reads a signed 8-bit value, unordered.
4097 *
4098 * @returns Current *pi8 value
4099 * @param pi8 Pointer to the 8-bit variable to read.
4100 */
4101DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
4102{
4103 return *pi8; /* byte reads are atomic on x86 */
4104}
4105
4106
4107/**
4108 * Atomically reads an unsigned 16-bit value, ordered.
4109 *
4110 * @returns Current *pu16 value
4111 * @param pu16 Pointer to the 16-bit variable to read.
4112 */
4113DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
4114{
4115 ASMMemoryFence();
4116 Assert(!((uintptr_t)pu16 & 1));
4117 return *pu16;
4118}
4119
4120
4121/**
4122 * Atomically reads an unsigned 16-bit value, unordered.
4123 *
4124 * @returns Current *pu16 value
4125 * @param pu16 Pointer to the 16-bit variable to read.
4126 */
4127DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
4128{
4129 Assert(!((uintptr_t)pu16 & 1));
4130 return *pu16;
4131}
4132
4133
4134/**
4135 * Atomically reads a signed 16-bit value, ordered.
4136 *
4137 * @returns Current *pi16 value
4138 * @param pi16 Pointer to the 16-bit variable to read.
4139 */
4140DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
4141{
4142 ASMMemoryFence();
4143 Assert(!((uintptr_t)pi16 & 1));
4144 return *pi16;
4145}
4146
4147
4148/**
4149 * Atomically reads a signed 16-bit value, unordered.
4150 *
4151 * @returns Current *pi16 value
4152 * @param pi16 Pointer to the 16-bit variable to read.
4153 */
4154DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
4155{
4156 Assert(!((uintptr_t)pi16 & 1));
4157 return *pi16;
4158}
4159
4160
4161/**
4162 * Atomically reads an unsigned 32-bit value, ordered.
4163 *
4164 * @returns Current *pu32 value
4165 * @param pu32 Pointer to the 32-bit variable to read.
4166 */
4167DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
4168{
4169 ASMMemoryFence();
4170 Assert(!((uintptr_t)pu32 & 3));
4171 return *pu32;
4172}
4173
4174
4175/**
4176 * Atomically reads an unsigned 32-bit value, unordered.
4177 *
4178 * @returns Current *pu32 value
4179 * @param pu32 Pointer to the 32-bit variable to read.
4180 */
4181DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
4182{
4183 Assert(!((uintptr_t)pu32 & 3));
4184 return *pu32;
4185}
4186
4187
4188/**
4189 * Atomically reads a signed 32-bit value, ordered.
4190 *
4191 * @returns Current *pi32 value
4192 * @param pi32 Pointer to the 32-bit variable to read.
4193 */
4194DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
4195{
4196 ASMMemoryFence();
4197 Assert(!((uintptr_t)pi32 & 3));
4198 return *pi32;
4199}
4200
4201
4202/**
4203 * Atomically reads a signed 32-bit value, unordered.
4204 *
4205 * @returns Current *pi32 value
4206 * @param pi32 Pointer to the 32-bit variable to read.
4207 */
4208DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4209{
4210 Assert(!((uintptr_t)pi32 & 3));
4211 return *pi32;
4212}
4213
4214
4215/**
4216 * Atomically reads an unsigned 64-bit value, ordered.
4217 *
4218 * @returns Current *pu64 value
4219 * @param pu64 Pointer to the 64-bit variable to read.
4220 * The memory pointed to must be writable.
4221 * @remark This will fault if the memory is read-only!
4222 */
4223#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
4224 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
4225DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4226#else
4227DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4228{
4229 uint64_t u64;
4230# ifdef RT_ARCH_AMD64
4231 Assert(!((uintptr_t)pu64 & 7));
4232/*# if RT_INLINE_ASM_GNU_STYLE
4233 __asm__ __volatile__( "mfence\n\t"
4234 "movq %1, %0\n\t"
4235 : "=r" (u64)
4236 : "m" (*pu64));
4237# else
4238 __asm
4239 {
4240 mfence
4241 mov rdx, [pu64]
4242 mov rax, [rdx]
4243 mov [u64], rax
4244 }
4245# endif*/
4246 ASMMemoryFence();
4247 u64 = *pu64;
4248# else /* !RT_ARCH_AMD64 */
4249# if RT_INLINE_ASM_GNU_STYLE
4250# if defined(PIC) || defined(__PIC__)
4251 uint32_t u32EBX = 0;
4252 Assert(!((uintptr_t)pu64 & 7));
4253 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4254 "lock; cmpxchg8b (%5)\n\t"
4255 "movl %3, %%ebx\n\t"
4256 : "=A" (u64),
4257# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4258 "+m" (*pu64)
4259# else
4260 "=m" (*pu64)
4261# endif
4262 : "0" (0),
4263 "m" (u32EBX),
4264 "c" (0),
4265 "S" (pu64));
4266# else /* !PIC */
4267 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4268 : "=A" (u64),
4269 "+m" (*pu64)
4270 : "0" (0),
4271 "b" (0),
4272 "c" (0));
4273# endif
4274# else
4275 Assert(!((uintptr_t)pu64 & 7));
4276 __asm
4277 {
4278 xor eax, eax
4279 xor edx, edx
4280 mov edi, pu64
4281 xor ecx, ecx
4282 xor ebx, ebx
4283 lock cmpxchg8b [edi]
4284 mov dword ptr [u64], eax
4285 mov dword ptr [u64 + 4], edx
4286 }
4287# endif
4288# endif /* !RT_ARCH_AMD64 */
4289 return u64;
4290}
4291#endif
4292
4293
4294/**
4295 * Atomically reads an unsigned 64-bit value, unordered.
4296 *
4297 * @returns Current *pu64 value
4298 * @param pu64 Pointer to the 64-bit variable to read.
4299 * The memory pointed to must be writable.
4300 * @remark This will fault if the memory is read-only!
4301 */
4302#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4303DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4304#else
4305DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4306{
4307 uint64_t u64;
4308# ifdef RT_ARCH_AMD64
4309 Assert(!((uintptr_t)pu64 & 7));
4310/*# if RT_INLINE_ASM_GNU_STYLE
4311 Assert(!((uintptr_t)pu64 & 7));
4312 __asm__ __volatile__("movq %1, %0\n\t"
4313 : "=r" (u64)
4314 : "m" (*pu64));
4315# else
4316 __asm
4317 {
4318 mov rdx, [pu64]
4319 mov rax, [rdx]
4320 mov [u64], rax
4321 }
4322# endif */
4323 u64 = *pu64;
4324# else /* !RT_ARCH_AMD64 */
4325# if RT_INLINE_ASM_GNU_STYLE
4326# if defined(PIC) || defined(__PIC__)
4327 uint32_t u32EBX = 0;
4328 uint32_t u32Spill;
4329 Assert(!((uintptr_t)pu64 & 7));
4330 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4331 "xor %%ecx,%%ecx\n\t"
4332 "xor %%edx,%%edx\n\t"
4333 "xchgl %%ebx, %3\n\t"
4334 "lock; cmpxchg8b (%4)\n\t"
4335 "movl %3, %%ebx\n\t"
4336 : "=A" (u64),
4337# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4338 "+m" (*pu64),
4339# else
4340 "=m" (*pu64),
4341# endif
4342 "=c" (u32Spill)
4343 : "m" (u32EBX),
4344 "S" (pu64));
4345# else /* !PIC */
4346 __asm__ __volatile__("cmpxchg8b %1\n\t"
4347 : "=A" (u64),
4348 "+m" (*pu64)
4349 : "0" (0),
4350 "b" (0),
4351 "c" (0));
4352# endif
4353# else
4354 Assert(!((uintptr_t)pu64 & 7));
4355 __asm
4356 {
4357 xor eax, eax
4358 xor edx, edx
4359 mov edi, pu64
4360 xor ecx, ecx
4361 xor ebx, ebx
4362 lock cmpxchg8b [edi]
4363 mov dword ptr [u64], eax
4364 mov dword ptr [u64 + 4], edx
4365 }
4366# endif
4367# endif /* !RT_ARCH_AMD64 */
4368 return u64;
4369}
4370#endif
4371
4372
4373/**
4374 * Atomically reads a signed 64-bit value, ordered.
4375 *
4376 * @returns Current *pi64 value
4377 * @param pi64 Pointer to the 64-bit variable to read.
4378 * The memory pointed to must be writable.
4379 * @remark This will fault if the memory is read-only!
4380 */
4381DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4382{
4383 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4384}
4385
4386
4387/**
4388 * Atomically reads a signed 64-bit value, unordered.
4389 *
4390 * @returns Current *pi64 value
4391 * @param pi64 Pointer to the 64-bit variable to read.
4392 * The memory pointed to must be writable.
4393 * @remark This will fault if the memory is read-only!
4394 */
4395DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4396{
4397 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4398}
4399
4400
4401/**
4402 * Atomically reads a pointer value, ordered.
4403 *
4404 * @returns Current *pv value
4405 * @param ppv Pointer to the pointer variable to read.
4406 */
4407DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4408{
4409#if ARCH_BITS == 32
4410 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4411#elif ARCH_BITS == 64
4412 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4413#else
4414# error "ARCH_BITS is bogus"
4415#endif
4416}
4417
4418
4419/**
4420 * Atomically reads a pointer value, unordered.
4421 *
4422 * @returns Current *pv value
4423 * @param ppv Pointer to the pointer variable to read.
4424 */
4425DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4426{
4427#if ARCH_BITS == 32
4428 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4429#elif ARCH_BITS == 64
4430 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4431#else
4432# error "ARCH_BITS is bogus"
4433#endif
4434}
4435
4436
4437/**
4438 * Atomically reads a boolean value, ordered.
4439 *
4440 * @returns Current *pf value
4441 * @param pf Pointer to the boolean variable to read.
4442 */
4443DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4444{
4445 ASMMemoryFence();
4446 return *pf; /* byte reads are atomic on x86 */
4447}
4448
4449
4450/**
4451 * Atomically reads a boolean value, unordered.
4452 *
4453 * @returns Current *pf value
4454 * @param pf Pointer to the boolean variable to read.
4455 */
4456DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4457{
4458 return *pf; /* byte reads are atomic on x86 */
4459}
4460
4461
4462/**
4463 * Atomically read a typical IPRT handle value, ordered.
4464 *
4465 * @param ph Pointer to the handle variable to read.
4466 * @param phRes Where to store the result.
4467 *
4468 * @remarks This doesn't currently work for all handles (like RTFILE).
4469 */
4470#if HC_ARCH_BITS == 32
4471# define ASMAtomicReadHandle(ph, phRes) \
4472 do { \
4473 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4474 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4475 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
4476 } while (0)
4477#elif HC_ARCH_BITS == 64
4478# define ASMAtomicReadHandle(ph, phRes) \
4479 do { \
4480 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4481 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4482 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
4483 } while (0)
4484#else
4485# error HC_ARCH_BITS
4486#endif
4487
4488
4489/**
4490 * Atomically read a typical IPRT handle value, unordered.
4491 *
4492 * @param ph Pointer to the handle variable to read.
4493 * @param phRes Where to store the result.
4494 *
4495 * @remarks This doesn't currently work for all handles (like RTFILE).
4496 */
4497#if HC_ARCH_BITS == 32
4498# define ASMAtomicUoReadHandle(ph, phRes) \
4499 do { \
4500 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4501 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4502 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
4503 } while (0)
4504#elif HC_ARCH_BITS == 64
4505# define ASMAtomicUoReadHandle(ph, phRes) \
4506 do { \
4507 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4508 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4509 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
4510 } while (0)
4511#else
4512# error HC_ARCH_BITS
4513#endif
4514
4515
4516/**
4517 * Atomically read a value which size might differ
4518 * between platforms or compilers, ordered.
4519 *
4520 * @param pu Pointer to the variable to update.
4521 * @param puRes Where to store the result.
4522 */
4523#define ASMAtomicReadSize(pu, puRes) \
4524 do { \
4525 switch (sizeof(*(pu))) { \
4526 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4527 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4528 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4529 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4530 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4531 } \
4532 } while (0)
4533
4534
4535/**
4536 * Atomically read a value which size might differ
4537 * between platforms or compilers, unordered.
4538 *
4539 * @param pu Pointer to the variable to read.
4540 * @param puRes Where to store the result.
4541 */
4542#define ASMAtomicUoReadSize(pu, puRes) \
4543 do { \
4544 switch (sizeof(*(pu))) { \
4545 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4546 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4547 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4548 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4549 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4550 } \
4551 } while (0)
4552
4553
4554/**
4555 * Atomically writes an unsigned 8-bit value, ordered.
4556 *
4557 * @param pu8 Pointer to the 8-bit variable.
4558 * @param u8 The 8-bit value to assign to *pu8.
4559 */
4560DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4561{
4562 ASMAtomicXchgU8(pu8, u8);
4563}
4564
4565
4566/**
4567 * Atomically writes an unsigned 8-bit value, unordered.
4568 *
4569 * @param pu8 Pointer to the 8-bit variable.
4570 * @param u8 The 8-bit value to assign to *pu8.
4571 */
4572DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4573{
4574 *pu8 = u8; /* byte writes are atomic on x86 */
4575}
4576
4577
4578/**
4579 * Atomically writes a signed 8-bit value, ordered.
4580 *
4581 * @param pi8 Pointer to the 8-bit variable to read.
4582 * @param i8 The 8-bit value to assign to *pi8.
4583 */
4584DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4585{
4586 ASMAtomicXchgS8(pi8, i8);
4587}
4588
4589
4590/**
4591 * Atomically writes a signed 8-bit value, unordered.
4592 *
4593 * @param pi8 Pointer to the 8-bit variable to read.
4594 * @param i8 The 8-bit value to assign to *pi8.
4595 */
4596DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4597{
4598 *pi8 = i8; /* byte writes are atomic on x86 */
4599}
4600
4601
4602/**
4603 * Atomically writes an unsigned 16-bit value, ordered.
4604 *
4605 * @param pu16 Pointer to the 16-bit variable.
4606 * @param u16 The 16-bit value to assign to *pu16.
4607 */
4608DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4609{
4610 ASMAtomicXchgU16(pu16, u16);
4611}
4612
4613
4614/**
4615 * Atomically writes an unsigned 16-bit value, unordered.
4616 *
4617 * @param pu16 Pointer to the 16-bit variable.
4618 * @param u16 The 16-bit value to assign to *pu16.
4619 */
4620DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4621{
4622 Assert(!((uintptr_t)pu16 & 1));
4623 *pu16 = u16;
4624}
4625
4626
4627/**
4628 * Atomically writes a signed 16-bit value, ordered.
4629 *
4630 * @param pi16 Pointer to the 16-bit variable to read.
4631 * @param i16 The 16-bit value to assign to *pi16.
4632 */
4633DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4634{
4635 ASMAtomicXchgS16(pi16, i16);
4636}
4637
4638
4639/**
4640 * Atomically writes a signed 16-bit value, unordered.
4641 *
4642 * @param pi16 Pointer to the 16-bit variable to read.
4643 * @param i16 The 16-bit value to assign to *pi16.
4644 */
4645DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4646{
4647 Assert(!((uintptr_t)pi16 & 1));
4648 *pi16 = i16;
4649}
4650
4651
4652/**
4653 * Atomically writes an unsigned 32-bit value, ordered.
4654 *
4655 * @param pu32 Pointer to the 32-bit variable.
4656 * @param u32 The 32-bit value to assign to *pu32.
4657 */
4658DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4659{
4660 ASMAtomicXchgU32(pu32, u32);
4661}
4662
4663
4664/**
4665 * Atomically writes an unsigned 32-bit value, unordered.
4666 *
4667 * @param pu32 Pointer to the 32-bit variable.
4668 * @param u32 The 32-bit value to assign to *pu32.
4669 */
4670DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4671{
4672 Assert(!((uintptr_t)pu32 & 3));
4673 *pu32 = u32;
4674}
4675
4676
4677/**
4678 * Atomically writes a signed 32-bit value, ordered.
4679 *
4680 * @param pi32 Pointer to the 32-bit variable to read.
4681 * @param i32 The 32-bit value to assign to *pi32.
4682 */
4683DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4684{
4685 ASMAtomicXchgS32(pi32, i32);
4686}
4687
4688
4689/**
4690 * Atomically writes a signed 32-bit value, unordered.
4691 *
4692 * @param pi32 Pointer to the 32-bit variable to read.
4693 * @param i32 The 32-bit value to assign to *pi32.
4694 */
4695DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4696{
4697 Assert(!((uintptr_t)pi32 & 3));
4698 *pi32 = i32;
4699}
4700
4701
4702/**
4703 * Atomically writes an unsigned 64-bit value, ordered.
4704 *
4705 * @param pu64 Pointer to the 64-bit variable.
4706 * @param u64 The 64-bit value to assign to *pu64.
4707 */
4708DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4709{
4710 ASMAtomicXchgU64(pu64, u64);
4711}
4712
4713
4714/**
4715 * Atomically writes an unsigned 64-bit value, unordered.
4716 *
4717 * @param pu64 Pointer to the 64-bit variable.
4718 * @param u64 The 64-bit value to assign to *pu64.
4719 */
4720DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4721{
4722 Assert(!((uintptr_t)pu64 & 7));
4723#if ARCH_BITS == 64
4724 *pu64 = u64;
4725#else
4726 ASMAtomicXchgU64(pu64, u64);
4727#endif
4728}
4729
4730
4731/**
4732 * Atomically writes a signed 64-bit value, ordered.
4733 *
4734 * @param pi64 Pointer to the 64-bit variable.
4735 * @param i64 The 64-bit value to assign to *pi64.
4736 */
4737DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4738{
4739 ASMAtomicXchgS64(pi64, i64);
4740}
4741
4742
4743/**
4744 * Atomically writes a signed 64-bit value, unordered.
4745 *
4746 * @param pi64 Pointer to the 64-bit variable.
4747 * @param i64 The 64-bit value to assign to *pi64.
4748 */
4749DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4750{
4751 Assert(!((uintptr_t)pi64 & 7));
4752#if ARCH_BITS == 64
4753 *pi64 = i64;
4754#else
4755 ASMAtomicXchgS64(pi64, i64);
4756#endif
4757}
4758
4759
4760/**
4761 * Atomically writes a boolean value, unordered.
4762 *
4763 * @param pf Pointer to the boolean variable.
4764 * @param f The boolean value to assign to *pf.
4765 */
4766DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4767{
4768 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4769}
4770
4771
4772/**
4773 * Atomically writes a boolean value, unordered.
4774 *
4775 * @param pf Pointer to the boolean variable.
4776 * @param f The boolean value to assign to *pf.
4777 */
4778DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4779{
4780 *pf = f; /* byte writes are atomic on x86 */
4781}
4782
4783
4784/**
4785 * Atomically writes a pointer value, ordered.
4786 *
4787 * @returns Current *pv value
4788 * @param ppv Pointer to the pointer variable.
4789 * @param pv The pointer value to assigne to *ppv.
4790 */
4791DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4792{
4793#if ARCH_BITS == 32
4794 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4795#elif ARCH_BITS == 64
4796 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4797#else
4798# error "ARCH_BITS is bogus"
4799#endif
4800}
4801
4802
4803/**
4804 * Atomically writes a pointer value, unordered.
4805 *
4806 * @returns Current *pv value
4807 * @param ppv Pointer to the pointer variable.
4808 * @param pv The pointer value to assigne to *ppv.
4809 */
4810DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4811{
4812#if ARCH_BITS == 32
4813 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4814#elif ARCH_BITS == 64
4815 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4816#else
4817# error "ARCH_BITS is bogus"
4818#endif
4819}
4820
4821
4822/**
4823 * Atomically write a typical IPRT handle value, ordered.
4824 *
4825 * @param ph Pointer to the variable to update.
4826 * @param hNew The value to assign to *ph.
4827 *
4828 * @remarks This doesn't currently work for all handles (like RTFILE).
4829 */
4830#if HC_ARCH_BITS == 32
4831# define ASMAtomicWriteHandle(ph, hNew) \
4832 do { \
4833 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4834 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
4835 } while (0)
4836#elif HC_ARCH_BITS == 64
4837# define ASMAtomicWriteHandle(ph, hNew) \
4838 do { \
4839 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4840 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
4841 } while (0)
4842#else
4843# error HC_ARCH_BITS
4844#endif
4845
4846
4847/**
4848 * Atomically write a typical IPRT handle value, unordered.
4849 *
4850 * @param ph Pointer to the variable to update.
4851 * @param hNew The value to assign to *ph.
4852 *
4853 * @remarks This doesn't currently work for all handles (like RTFILE).
4854 */
4855#if HC_ARCH_BITS == 32
4856# define ASMAtomicUoWriteHandle(ph, hNew) \
4857 do { \
4858 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4859 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
4860 } while (0)
4861#elif HC_ARCH_BITS == 64
4862# define ASMAtomicUoWriteHandle(ph, hNew) \
4863 do { \
4864 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4865 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
4866 } while (0)
4867#else
4868# error HC_ARCH_BITS
4869#endif
4870
4871
4872/**
4873 * Atomically write a value which size might differ
4874 * between platforms or compilers, ordered.
4875 *
4876 * @param pu Pointer to the variable to update.
4877 * @param uNew The value to assign to *pu.
4878 */
4879#define ASMAtomicWriteSize(pu, uNew) \
4880 do { \
4881 switch (sizeof(*(pu))) { \
4882 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4883 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4884 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4885 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4886 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4887 } \
4888 } while (0)
4889
4890/**
4891 * Atomically write a value which size might differ
4892 * between platforms or compilers, unordered.
4893 *
4894 * @param pu Pointer to the variable to update.
4895 * @param uNew The value to assign to *pu.
4896 */
4897#define ASMAtomicUoWriteSize(pu, uNew) \
4898 do { \
4899 switch (sizeof(*(pu))) { \
4900 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4901 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4902 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4903 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4904 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4905 } \
4906 } while (0)
4907
4908
4909
4910
4911/**
4912 * Invalidate page.
4913 *
4914 * @param pv Address of the page to invalidate.
4915 */
4916#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4917DECLASM(void) ASMInvalidatePage(void *pv);
4918#else
4919DECLINLINE(void) ASMInvalidatePage(void *pv)
4920{
4921# if RT_INLINE_ASM_USES_INTRIN
4922 __invlpg(pv);
4923
4924# elif RT_INLINE_ASM_GNU_STYLE
4925 __asm__ __volatile__("invlpg %0\n\t"
4926 : : "m" (*(uint8_t *)pv));
4927# else
4928 __asm
4929 {
4930# ifdef RT_ARCH_AMD64
4931 mov rax, [pv]
4932 invlpg [rax]
4933# else
4934 mov eax, [pv]
4935 invlpg [eax]
4936# endif
4937 }
4938# endif
4939}
4940#endif
4941
4942
4943/**
4944 * Write back the internal caches and invalidate them.
4945 */
4946#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4947DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4948#else
4949DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4950{
4951# if RT_INLINE_ASM_USES_INTRIN
4952 __wbinvd();
4953
4954# elif RT_INLINE_ASM_GNU_STYLE
4955 __asm__ __volatile__("wbinvd");
4956# else
4957 __asm
4958 {
4959 wbinvd
4960 }
4961# endif
4962}
4963#endif
4964
4965
4966/**
4967 * Invalidate internal and (perhaps) external caches without first
4968 * flushing dirty cache lines. Use with extreme care.
4969 */
4970#if RT_INLINE_ASM_EXTERNAL
4971DECLASM(void) ASMInvalidateInternalCaches(void);
4972#else
4973DECLINLINE(void) ASMInvalidateInternalCaches(void)
4974{
4975# if RT_INLINE_ASM_GNU_STYLE
4976 __asm__ __volatile__("invd");
4977# else
4978 __asm
4979 {
4980 invd
4981 }
4982# endif
4983}
4984#endif
4985
4986
4987#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4988# if PAGE_SIZE != 0x1000
4989# error "PAGE_SIZE is not 0x1000!"
4990# endif
4991#endif
4992
4993/**
4994 * Zeros a 4K memory page.
4995 *
4996 * @param pv Pointer to the memory block. This must be page aligned.
4997 */
4998#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4999DECLASM(void) ASMMemZeroPage(volatile void *pv);
5000# else
5001DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
5002{
5003# if RT_INLINE_ASM_USES_INTRIN
5004# ifdef RT_ARCH_AMD64
5005 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
5006# else
5007 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
5008# endif
5009
5010# elif RT_INLINE_ASM_GNU_STYLE
5011 RTCCUINTREG uDummy;
5012# ifdef RT_ARCH_AMD64
5013 __asm__ __volatile__("rep stosq"
5014 : "=D" (pv),
5015 "=c" (uDummy)
5016 : "0" (pv),
5017 "c" (0x1000 >> 3),
5018 "a" (0)
5019 : "memory");
5020# else
5021 __asm__ __volatile__("rep stosl"
5022 : "=D" (pv),
5023 "=c" (uDummy)
5024 : "0" (pv),
5025 "c" (0x1000 >> 2),
5026 "a" (0)
5027 : "memory");
5028# endif
5029# else
5030 __asm
5031 {
5032# ifdef RT_ARCH_AMD64
5033 xor rax, rax
5034 mov ecx, 0200h
5035 mov rdi, [pv]
5036 rep stosq
5037# else
5038 xor eax, eax
5039 mov ecx, 0400h
5040 mov edi, [pv]
5041 rep stosd
5042# endif
5043 }
5044# endif
5045}
5046# endif
5047
5048
5049/**
5050 * Zeros a memory block with a 32-bit aligned size.
5051 *
5052 * @param pv Pointer to the memory block.
5053 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5054 */
5055#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5056DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
5057#else
5058DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
5059{
5060# if RT_INLINE_ASM_USES_INTRIN
5061# ifdef RT_ARCH_AMD64
5062 if (!(cb & 7))
5063 __stosq((unsigned __int64 *)pv, 0, cb / 8);
5064 else
5065# endif
5066 __stosd((unsigned long *)pv, 0, cb / 4);
5067
5068# elif RT_INLINE_ASM_GNU_STYLE
5069 __asm__ __volatile__("rep stosl"
5070 : "=D" (pv),
5071 "=c" (cb)
5072 : "0" (pv),
5073 "1" (cb >> 2),
5074 "a" (0)
5075 : "memory");
5076# else
5077 __asm
5078 {
5079 xor eax, eax
5080# ifdef RT_ARCH_AMD64
5081 mov rcx, [cb]
5082 shr rcx, 2
5083 mov rdi, [pv]
5084# else
5085 mov ecx, [cb]
5086 shr ecx, 2
5087 mov edi, [pv]
5088# endif
5089 rep stosd
5090 }
5091# endif
5092}
5093#endif
5094
5095
5096/**
5097 * Fills a memory block with a 32-bit aligned size.
5098 *
5099 * @param pv Pointer to the memory block.
5100 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5101 * @param u32 The value to fill with.
5102 */
5103#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5104DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
5105#else
5106DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
5107{
5108# if RT_INLINE_ASM_USES_INTRIN
5109# ifdef RT_ARCH_AMD64
5110 if (!(cb & 7))
5111 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5112 else
5113# endif
5114 __stosd((unsigned long *)pv, u32, cb / 4);
5115
5116# elif RT_INLINE_ASM_GNU_STYLE
5117 __asm__ __volatile__("rep stosl"
5118 : "=D" (pv),
5119 "=c" (cb)
5120 : "0" (pv),
5121 "1" (cb >> 2),
5122 "a" (u32)
5123 : "memory");
5124# else
5125 __asm
5126 {
5127# ifdef RT_ARCH_AMD64
5128 mov rcx, [cb]
5129 shr rcx, 2
5130 mov rdi, [pv]
5131# else
5132 mov ecx, [cb]
5133 shr ecx, 2
5134 mov edi, [pv]
5135# endif
5136 mov eax, [u32]
5137 rep stosd
5138 }
5139# endif
5140}
5141#endif
5142
5143
5144/**
5145 * Checks if a memory page is all zeros.
5146 *
5147 * @returns true / false.
5148 *
5149 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5150 * boundrary
5151 */
5152DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
5153{
5154# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5155 union { RTCCUINTREG r; bool f; } uAX;
5156 RTCCUINTREG xCX, xDI;
5157 Assert(!((uintptr_t)pvPage & 15));
5158 __asm__ __volatile__("repe; "
5159# ifdef RT_ARCH_AMD64
5160 "scasq\n\t"
5161# else
5162 "scasl\n\t"
5163# endif
5164 "setnc %%al\n\t"
5165 : "=&c" (xCX),
5166 "=&D" (xDI),
5167 "=&a" (uAX.r)
5168 : "mr" (pvPage),
5169# ifdef RT_ARCH_AMD64
5170 "0" (0x1000/8),
5171# else
5172 "0" (0x1000/4),
5173# endif
5174 "1" (pvPage),
5175 "2" (0));
5176 return uAX.f;
5177# else
5178 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
5179 int cLeft = 0x1000 / sizeof(uintptr_t) / 8;
5180 Assert(!((uintptr_t)pvPage & 15));
5181 for (;;)
5182 {
5183 if (puPtr[0]) return false;
5184 if (puPtr[4]) return false;
5185
5186 if (puPtr[2]) return false;
5187 if (puPtr[6]) return false;
5188
5189 if (puPtr[1]) return false;
5190 if (puPtr[5]) return false;
5191
5192 if (puPtr[3]) return false;
5193 if (puPtr[7]) return false;
5194
5195 if (!--cLeft)
5196 return true;
5197 puPtr += 8;
5198 }
5199 return true;
5200# endif
5201}
5202
5203
5204/**
5205 * Checks if a memory block is filled with the specified byte.
5206 *
5207 * This is a sort of inverted memchr.
5208 *
5209 * @returns Pointer to the byte which doesn't equal u8.
5210 * @returns NULL if all equal to u8.
5211 *
5212 * @param pv Pointer to the memory block.
5213 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5214 * @param u8 The value it's supposed to be filled with.
5215 *
5216 * @todo Fix name, it is a predicate function but it's not returning boolean!
5217 */
5218#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5219DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
5220#else
5221DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
5222{
5223/** @todo rewrite this in inline assembly? */
5224 uint8_t const *pb = (uint8_t const *)pv;
5225 for (; cb; cb--, pb++)
5226 if (RT_UNLIKELY(*pb != u8))
5227 return (void *)pb;
5228 return NULL;
5229}
5230#endif
5231
5232
5233/**
5234 * Checks if a memory block is filled with the specified 32-bit value.
5235 *
5236 * This is a sort of inverted memchr.
5237 *
5238 * @returns Pointer to the first value which doesn't equal u32.
5239 * @returns NULL if all equal to u32.
5240 *
5241 * @param pv Pointer to the memory block.
5242 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5243 * @param u32 The value it's supposed to be filled with.
5244 *
5245 * @todo Fix name, it is a predicate function but it's not returning boolean!
5246 */
5247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5248DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
5249#else
5250DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
5251{
5252/** @todo rewrite this in inline assembly? */
5253 uint32_t const *pu32 = (uint32_t const *)pv;
5254 for (; cb; cb -= 4, pu32++)
5255 if (RT_UNLIKELY(*pu32 != u32))
5256 return (uint32_t *)pu32;
5257 return NULL;
5258}
5259#endif
5260
5261
5262/**
5263 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
5264 *
5265 * @returns u32F1 * u32F2.
5266 */
5267#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5268DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
5269#else
5270DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
5271{
5272# ifdef RT_ARCH_AMD64
5273 return (uint64_t)u32F1 * u32F2;
5274# else /* !RT_ARCH_AMD64 */
5275 uint64_t u64;
5276# if RT_INLINE_ASM_GNU_STYLE
5277 __asm__ __volatile__("mull %%edx"
5278 : "=A" (u64)
5279 : "a" (u32F2), "d" (u32F1));
5280# else
5281 __asm
5282 {
5283 mov edx, [u32F1]
5284 mov eax, [u32F2]
5285 mul edx
5286 mov dword ptr [u64], eax
5287 mov dword ptr [u64 + 4], edx
5288 }
5289# endif
5290 return u64;
5291# endif /* !RT_ARCH_AMD64 */
5292}
5293#endif
5294
5295
5296/**
5297 * Multiplies two signed 32-bit values returning a signed 64-bit result.
5298 *
5299 * @returns u32F1 * u32F2.
5300 */
5301#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5302DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
5303#else
5304DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
5305{
5306# ifdef RT_ARCH_AMD64
5307 return (int64_t)i32F1 * i32F2;
5308# else /* !RT_ARCH_AMD64 */
5309 int64_t i64;
5310# if RT_INLINE_ASM_GNU_STYLE
5311 __asm__ __volatile__("imull %%edx"
5312 : "=A" (i64)
5313 : "a" (i32F2), "d" (i32F1));
5314# else
5315 __asm
5316 {
5317 mov edx, [i32F1]
5318 mov eax, [i32F2]
5319 imul edx
5320 mov dword ptr [i64], eax
5321 mov dword ptr [i64 + 4], edx
5322 }
5323# endif
5324 return i64;
5325# endif /* !RT_ARCH_AMD64 */
5326}
5327#endif
5328
5329
5330/**
5331 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5332 *
5333 * @returns u64 / u32.
5334 */
5335#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5336DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5337#else
5338DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5339{
5340# ifdef RT_ARCH_AMD64
5341 return (uint32_t)(u64 / u32);
5342# else /* !RT_ARCH_AMD64 */
5343# if RT_INLINE_ASM_GNU_STYLE
5344 RTCCUINTREG uDummy;
5345 __asm__ __volatile__("divl %3"
5346 : "=a" (u32), "=d"(uDummy)
5347 : "A" (u64), "r" (u32));
5348# else
5349 __asm
5350 {
5351 mov eax, dword ptr [u64]
5352 mov edx, dword ptr [u64 + 4]
5353 mov ecx, [u32]
5354 div ecx
5355 mov [u32], eax
5356 }
5357# endif
5358 return u32;
5359# endif /* !RT_ARCH_AMD64 */
5360}
5361#endif
5362
5363
5364/**
5365 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5366 *
5367 * @returns u64 / u32.
5368 */
5369#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5370DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5371#else
5372DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5373{
5374# ifdef RT_ARCH_AMD64
5375 return (int32_t)(i64 / i32);
5376# else /* !RT_ARCH_AMD64 */
5377# if RT_INLINE_ASM_GNU_STYLE
5378 RTCCUINTREG iDummy;
5379 __asm__ __volatile__("idivl %3"
5380 : "=a" (i32), "=d"(iDummy)
5381 : "A" (i64), "r" (i32));
5382# else
5383 __asm
5384 {
5385 mov eax, dword ptr [i64]
5386 mov edx, dword ptr [i64 + 4]
5387 mov ecx, [i32]
5388 idiv ecx
5389 mov [i32], eax
5390 }
5391# endif
5392 return i32;
5393# endif /* !RT_ARCH_AMD64 */
5394}
5395#endif
5396
5397
5398/**
5399 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5400 * returning the rest.
5401 *
5402 * @returns u64 % u32.
5403 *
5404 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5405 */
5406#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5407DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5408#else
5409DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5410{
5411# ifdef RT_ARCH_AMD64
5412 return (uint32_t)(u64 % u32);
5413# else /* !RT_ARCH_AMD64 */
5414# if RT_INLINE_ASM_GNU_STYLE
5415 RTCCUINTREG uDummy;
5416 __asm__ __volatile__("divl %3"
5417 : "=a" (uDummy), "=d"(u32)
5418 : "A" (u64), "r" (u32));
5419# else
5420 __asm
5421 {
5422 mov eax, dword ptr [u64]
5423 mov edx, dword ptr [u64 + 4]
5424 mov ecx, [u32]
5425 div ecx
5426 mov [u32], edx
5427 }
5428# endif
5429 return u32;
5430# endif /* !RT_ARCH_AMD64 */
5431}
5432#endif
5433
5434
5435/**
5436 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5437 * returning the rest.
5438 *
5439 * @returns u64 % u32.
5440 *
5441 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5442 */
5443#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5444DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5445#else
5446DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5447{
5448# ifdef RT_ARCH_AMD64
5449 return (int32_t)(i64 % i32);
5450# else /* !RT_ARCH_AMD64 */
5451# if RT_INLINE_ASM_GNU_STYLE
5452 RTCCUINTREG iDummy;
5453 __asm__ __volatile__("idivl %3"
5454 : "=a" (iDummy), "=d"(i32)
5455 : "A" (i64), "r" (i32));
5456# else
5457 __asm
5458 {
5459 mov eax, dword ptr [i64]
5460 mov edx, dword ptr [i64 + 4]
5461 mov ecx, [i32]
5462 idiv ecx
5463 mov [i32], edx
5464 }
5465# endif
5466 return i32;
5467# endif /* !RT_ARCH_AMD64 */
5468}
5469#endif
5470
5471
5472/**
5473 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5474 * using a 96 bit intermediate result.
5475 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5476 * __udivdi3 and __umoddi3 even if this inline function is not used.
5477 *
5478 * @returns (u64A * u32B) / u32C.
5479 * @param u64A The 64-bit value.
5480 * @param u32B The 32-bit value to multiple by A.
5481 * @param u32C The 32-bit value to divide A*B by.
5482 */
5483#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5484DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5485#else
5486DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5487{
5488# if RT_INLINE_ASM_GNU_STYLE
5489# ifdef RT_ARCH_AMD64
5490 uint64_t u64Result, u64Spill;
5491 __asm__ __volatile__("mulq %2\n\t"
5492 "divq %3\n\t"
5493 : "=a" (u64Result),
5494 "=d" (u64Spill)
5495 : "r" ((uint64_t)u32B),
5496 "r" ((uint64_t)u32C),
5497 "0" (u64A),
5498 "1" (0));
5499 return u64Result;
5500# else
5501 uint32_t u32Dummy;
5502 uint64_t u64Result;
5503 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5504 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5505 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5506 eax = u64A.hi */
5507 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5508 edx = u32C */
5509 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5510 edx = u32B */
5511 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5512 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5513 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5514 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5515 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5516 edx = u64Hi % u32C */
5517 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5518 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5519 "divl %%ecx \n\t" /* u64Result.lo */
5520 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5521 : "=A"(u64Result), "=c"(u32Dummy),
5522 "=S"(u32Dummy), "=D"(u32Dummy)
5523 : "a"((uint32_t)u64A),
5524 "S"((uint32_t)(u64A >> 32)),
5525 "c"(u32B),
5526 "D"(u32C));
5527 return u64Result;
5528# endif
5529# else
5530 RTUINT64U u;
5531 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5532 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5533 u64Hi += (u64Lo >> 32);
5534 u.s.Hi = (uint32_t)(u64Hi / u32C);
5535 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5536 return u.u;
5537# endif
5538}
5539#endif
5540
5541
5542/**
5543 * Probes a byte pointer for read access.
5544 *
5545 * While the function will not fault if the byte is not read accessible,
5546 * the idea is to do this in a safe place like before acquiring locks
5547 * and such like.
5548 *
5549 * Also, this functions guarantees that an eager compiler is not going
5550 * to optimize the probing away.
5551 *
5552 * @param pvByte Pointer to the byte.
5553 */
5554#if RT_INLINE_ASM_EXTERNAL
5555DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5556#else
5557DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5558{
5559 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5560 uint8_t u8;
5561# if RT_INLINE_ASM_GNU_STYLE
5562 __asm__ __volatile__("movb (%1), %0\n\t"
5563 : "=r" (u8)
5564 : "r" (pvByte));
5565# else
5566 __asm
5567 {
5568# ifdef RT_ARCH_AMD64
5569 mov rax, [pvByte]
5570 mov al, [rax]
5571# else
5572 mov eax, [pvByte]
5573 mov al, [eax]
5574# endif
5575 mov [u8], al
5576 }
5577# endif
5578 return u8;
5579}
5580#endif
5581
5582/**
5583 * Probes a buffer for read access page by page.
5584 *
5585 * While the function will fault if the buffer is not fully read
5586 * accessible, the idea is to do this in a safe place like before
5587 * acquiring locks and such like.
5588 *
5589 * Also, this functions guarantees that an eager compiler is not going
5590 * to optimize the probing away.
5591 *
5592 * @param pvBuf Pointer to the buffer.
5593 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5594 */
5595DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5596{
5597 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5598 /* the first byte */
5599 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5600 ASMProbeReadByte(pu8);
5601
5602 /* the pages in between pages. */
5603 while (cbBuf > /*PAGE_SIZE*/0x1000)
5604 {
5605 ASMProbeReadByte(pu8);
5606 cbBuf -= /*PAGE_SIZE*/0x1000;
5607 pu8 += /*PAGE_SIZE*/0x1000;
5608 }
5609
5610 /* the last byte */
5611 ASMProbeReadByte(pu8 + cbBuf - 1);
5612}
5613
5614
5615/** @def ASMBreakpoint
5616 * Debugger Breakpoint.
5617 * @remark In the gnu world we add a nop instruction after the int3 to
5618 * force gdb to remain at the int3 source line.
5619 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5620 * @internal
5621 */
5622#if RT_INLINE_ASM_GNU_STYLE
5623# ifndef __L4ENV__
5624# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5625# else
5626# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5627# endif
5628#else
5629# define ASMBreakpoint() __debugbreak()
5630#endif
5631
5632
5633
5634/** @defgroup grp_inline_bits Bit Operations
5635 * @{
5636 */
5637
5638
5639/**
5640 * Sets a bit in a bitmap.
5641 *
5642 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
5643 * @param iBit The bit to set.
5644 *
5645 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5646 * However, doing so will yield better performance as well as avoiding
5647 * traps accessing the last bits in the bitmap.
5648 */
5649#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5650DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5651#else
5652DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5653{
5654# if RT_INLINE_ASM_USES_INTRIN
5655 _bittestandset((long *)pvBitmap, iBit);
5656
5657# elif RT_INLINE_ASM_GNU_STYLE
5658 __asm__ __volatile__("btsl %1, %0"
5659 : "=m" (*(volatile long *)pvBitmap)
5660 : "Ir" (iBit),
5661 "m" (*(volatile long *)pvBitmap)
5662 : "memory");
5663# else
5664 __asm
5665 {
5666# ifdef RT_ARCH_AMD64
5667 mov rax, [pvBitmap]
5668 mov edx, [iBit]
5669 bts [rax], edx
5670# else
5671 mov eax, [pvBitmap]
5672 mov edx, [iBit]
5673 bts [eax], edx
5674# endif
5675 }
5676# endif
5677}
5678#endif
5679
5680
5681/**
5682 * Atomically sets a bit in a bitmap, ordered.
5683 *
5684 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5685 * the memory access isn't atomic!
5686 * @param iBit The bit to set.
5687 */
5688#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5689DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5690#else
5691DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5692{
5693 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5694# if RT_INLINE_ASM_USES_INTRIN
5695 _interlockedbittestandset((long *)pvBitmap, iBit);
5696# elif RT_INLINE_ASM_GNU_STYLE
5697 __asm__ __volatile__("lock; btsl %1, %0"
5698 : "=m" (*(volatile long *)pvBitmap)
5699 : "Ir" (iBit),
5700 "m" (*(volatile long *)pvBitmap)
5701 : "memory");
5702# else
5703 __asm
5704 {
5705# ifdef RT_ARCH_AMD64
5706 mov rax, [pvBitmap]
5707 mov edx, [iBit]
5708 lock bts [rax], edx
5709# else
5710 mov eax, [pvBitmap]
5711 mov edx, [iBit]
5712 lock bts [eax], edx
5713# endif
5714 }
5715# endif
5716}
5717#endif
5718
5719
5720/**
5721 * Clears a bit in a bitmap.
5722 *
5723 * @param pvBitmap Pointer to the bitmap.
5724 * @param iBit The bit to clear.
5725 *
5726 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5727 * However, doing so will yield better performance as well as avoiding
5728 * traps accessing the last bits in the bitmap.
5729 */
5730#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5731DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5732#else
5733DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5734{
5735# if RT_INLINE_ASM_USES_INTRIN
5736 _bittestandreset((long *)pvBitmap, iBit);
5737
5738# elif RT_INLINE_ASM_GNU_STYLE
5739 __asm__ __volatile__("btrl %1, %0"
5740 : "=m" (*(volatile long *)pvBitmap)
5741 : "Ir" (iBit),
5742 "m" (*(volatile long *)pvBitmap)
5743 : "memory");
5744# else
5745 __asm
5746 {
5747# ifdef RT_ARCH_AMD64
5748 mov rax, [pvBitmap]
5749 mov edx, [iBit]
5750 btr [rax], edx
5751# else
5752 mov eax, [pvBitmap]
5753 mov edx, [iBit]
5754 btr [eax], edx
5755# endif
5756 }
5757# endif
5758}
5759#endif
5760
5761
5762/**
5763 * Atomically clears a bit in a bitmap, ordered.
5764 *
5765 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5766 * the memory access isn't atomic!
5767 * @param iBit The bit to toggle set.
5768 * @remarks No memory barrier, take care on smp.
5769 */
5770#if RT_INLINE_ASM_EXTERNAL
5771DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5772#else
5773DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5774{
5775 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5776# if RT_INLINE_ASM_GNU_STYLE
5777 __asm__ __volatile__("lock; btrl %1, %0"
5778 : "=m" (*(volatile long *)pvBitmap)
5779 : "Ir" (iBit),
5780 "m" (*(volatile long *)pvBitmap)
5781 : "memory");
5782# else
5783 __asm
5784 {
5785# ifdef RT_ARCH_AMD64
5786 mov rax, [pvBitmap]
5787 mov edx, [iBit]
5788 lock btr [rax], edx
5789# else
5790 mov eax, [pvBitmap]
5791 mov edx, [iBit]
5792 lock btr [eax], edx
5793# endif
5794 }
5795# endif
5796}
5797#endif
5798
5799
5800/**
5801 * Toggles a bit in a bitmap.
5802 *
5803 * @param pvBitmap Pointer to the bitmap.
5804 * @param iBit The bit to toggle.
5805 *
5806 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5807 * However, doing so will yield better performance as well as avoiding
5808 * traps accessing the last bits in the bitmap.
5809 */
5810#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5811DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5812#else
5813DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5814{
5815# if RT_INLINE_ASM_USES_INTRIN
5816 _bittestandcomplement((long *)pvBitmap, iBit);
5817# elif RT_INLINE_ASM_GNU_STYLE
5818 __asm__ __volatile__("btcl %1, %0"
5819 : "=m" (*(volatile long *)pvBitmap)
5820 : "Ir" (iBit),
5821 "m" (*(volatile long *)pvBitmap)
5822 : "memory");
5823# else
5824 __asm
5825 {
5826# ifdef RT_ARCH_AMD64
5827 mov rax, [pvBitmap]
5828 mov edx, [iBit]
5829 btc [rax], edx
5830# else
5831 mov eax, [pvBitmap]
5832 mov edx, [iBit]
5833 btc [eax], edx
5834# endif
5835 }
5836# endif
5837}
5838#endif
5839
5840
5841/**
5842 * Atomically toggles a bit in a bitmap, ordered.
5843 *
5844 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5845 * the memory access isn't atomic!
5846 * @param iBit The bit to test and set.
5847 */
5848#if RT_INLINE_ASM_EXTERNAL
5849DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5850#else
5851DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5852{
5853 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5854# if RT_INLINE_ASM_GNU_STYLE
5855 __asm__ __volatile__("lock; btcl %1, %0"
5856 : "=m" (*(volatile long *)pvBitmap)
5857 : "Ir" (iBit),
5858 "m" (*(volatile long *)pvBitmap)
5859 : "memory");
5860# else
5861 __asm
5862 {
5863# ifdef RT_ARCH_AMD64
5864 mov rax, [pvBitmap]
5865 mov edx, [iBit]
5866 lock btc [rax], edx
5867# else
5868 mov eax, [pvBitmap]
5869 mov edx, [iBit]
5870 lock btc [eax], edx
5871# endif
5872 }
5873# endif
5874}
5875#endif
5876
5877
5878/**
5879 * Tests and sets a bit in a bitmap.
5880 *
5881 * @returns true if the bit was set.
5882 * @returns false if the bit was clear.
5883 *
5884 * @param pvBitmap Pointer to the bitmap.
5885 * @param iBit The bit to test and set.
5886 *
5887 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5888 * However, doing so will yield better performance as well as avoiding
5889 * traps accessing the last bits in the bitmap.
5890 */
5891#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5892DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5893#else
5894DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5895{
5896 union { bool f; uint32_t u32; uint8_t u8; } rc;
5897# if RT_INLINE_ASM_USES_INTRIN
5898 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5899
5900# elif RT_INLINE_ASM_GNU_STYLE
5901 __asm__ __volatile__("btsl %2, %1\n\t"
5902 "setc %b0\n\t"
5903 "andl $1, %0\n\t"
5904 : "=q" (rc.u32),
5905 "=m" (*(volatile long *)pvBitmap)
5906 : "Ir" (iBit),
5907 "m" (*(volatile long *)pvBitmap)
5908 : "memory");
5909# else
5910 __asm
5911 {
5912 mov edx, [iBit]
5913# ifdef RT_ARCH_AMD64
5914 mov rax, [pvBitmap]
5915 bts [rax], edx
5916# else
5917 mov eax, [pvBitmap]
5918 bts [eax], edx
5919# endif
5920 setc al
5921 and eax, 1
5922 mov [rc.u32], eax
5923 }
5924# endif
5925 return rc.f;
5926}
5927#endif
5928
5929
5930/**
5931 * Atomically tests and sets a bit in a bitmap, ordered.
5932 *
5933 * @returns true if the bit was set.
5934 * @returns false if the bit was clear.
5935 *
5936 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5937 * the memory access isn't atomic!
5938 * @param iBit The bit to set.
5939 */
5940#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5941DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5942#else
5943DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5944{
5945 union { bool f; uint32_t u32; uint8_t u8; } rc;
5946 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5947# if RT_INLINE_ASM_USES_INTRIN
5948 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5949# elif RT_INLINE_ASM_GNU_STYLE
5950 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5951 "setc %b0\n\t"
5952 "andl $1, %0\n\t"
5953 : "=q" (rc.u32),
5954 "=m" (*(volatile long *)pvBitmap)
5955 : "Ir" (iBit),
5956 "m" (*(volatile long *)pvBitmap)
5957 : "memory");
5958# else
5959 __asm
5960 {
5961 mov edx, [iBit]
5962# ifdef RT_ARCH_AMD64
5963 mov rax, [pvBitmap]
5964 lock bts [rax], edx
5965# else
5966 mov eax, [pvBitmap]
5967 lock bts [eax], edx
5968# endif
5969 setc al
5970 and eax, 1
5971 mov [rc.u32], eax
5972 }
5973# endif
5974 return rc.f;
5975}
5976#endif
5977
5978
5979/**
5980 * Tests and clears a bit in a bitmap.
5981 *
5982 * @returns true if the bit was set.
5983 * @returns false if the bit was clear.
5984 *
5985 * @param pvBitmap Pointer to the bitmap.
5986 * @param iBit The bit to test and clear.
5987 *
5988 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5989 * However, doing so will yield better performance as well as avoiding
5990 * traps accessing the last bits in the bitmap.
5991 */
5992#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5993DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5994#else
5995DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5996{
5997 union { bool f; uint32_t u32; uint8_t u8; } rc;
5998# if RT_INLINE_ASM_USES_INTRIN
5999 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
6000
6001# elif RT_INLINE_ASM_GNU_STYLE
6002 __asm__ __volatile__("btrl %2, %1\n\t"
6003 "setc %b0\n\t"
6004 "andl $1, %0\n\t"
6005 : "=q" (rc.u32),
6006 "=m" (*(volatile long *)pvBitmap)
6007 : "Ir" (iBit),
6008 "m" (*(volatile long *)pvBitmap)
6009 : "memory");
6010# else
6011 __asm
6012 {
6013 mov edx, [iBit]
6014# ifdef RT_ARCH_AMD64
6015 mov rax, [pvBitmap]
6016 btr [rax], edx
6017# else
6018 mov eax, [pvBitmap]
6019 btr [eax], edx
6020# endif
6021 setc al
6022 and eax, 1
6023 mov [rc.u32], eax
6024 }
6025# endif
6026 return rc.f;
6027}
6028#endif
6029
6030
6031/**
6032 * Atomically tests and clears a bit in a bitmap, ordered.
6033 *
6034 * @returns true if the bit was set.
6035 * @returns false if the bit was clear.
6036 *
6037 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
6038 * the memory access isn't atomic!
6039 * @param iBit The bit to test and clear.
6040 *
6041 * @remarks No memory barrier, take care on smp.
6042 */
6043#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6044DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
6045#else
6046DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
6047{
6048 union { bool f; uint32_t u32; uint8_t u8; } rc;
6049 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6050# if RT_INLINE_ASM_USES_INTRIN
6051 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
6052
6053# elif RT_INLINE_ASM_GNU_STYLE
6054 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6055 "setc %b0\n\t"
6056 "andl $1, %0\n\t"
6057 : "=q" (rc.u32),
6058 "=m" (*(volatile long *)pvBitmap)
6059 : "Ir" (iBit),
6060 "m" (*(volatile long *)pvBitmap)
6061 : "memory");
6062# else
6063 __asm
6064 {
6065 mov edx, [iBit]
6066# ifdef RT_ARCH_AMD64
6067 mov rax, [pvBitmap]
6068 lock btr [rax], edx
6069# else
6070 mov eax, [pvBitmap]
6071 lock btr [eax], edx
6072# endif
6073 setc al
6074 and eax, 1
6075 mov [rc.u32], eax
6076 }
6077# endif
6078 return rc.f;
6079}
6080#endif
6081
6082
6083/**
6084 * Tests and toggles a bit in a bitmap.
6085 *
6086 * @returns true if the bit was set.
6087 * @returns false if the bit was clear.
6088 *
6089 * @param pvBitmap Pointer to the bitmap.
6090 * @param iBit The bit to test and toggle.
6091 *
6092 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6093 * However, doing so will yield better performance as well as avoiding
6094 * traps accessing the last bits in the bitmap.
6095 */
6096#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6097DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6098#else
6099DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6100{
6101 union { bool f; uint32_t u32; uint8_t u8; } rc;
6102# if RT_INLINE_ASM_USES_INTRIN
6103 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
6104
6105# elif RT_INLINE_ASM_GNU_STYLE
6106 __asm__ __volatile__("btcl %2, %1\n\t"
6107 "setc %b0\n\t"
6108 "andl $1, %0\n\t"
6109 : "=q" (rc.u32),
6110 "=m" (*(volatile long *)pvBitmap)
6111 : "Ir" (iBit),
6112 "m" (*(volatile long *)pvBitmap)
6113 : "memory");
6114# else
6115 __asm
6116 {
6117 mov edx, [iBit]
6118# ifdef RT_ARCH_AMD64
6119 mov rax, [pvBitmap]
6120 btc [rax], edx
6121# else
6122 mov eax, [pvBitmap]
6123 btc [eax], edx
6124# endif
6125 setc al
6126 and eax, 1
6127 mov [rc.u32], eax
6128 }
6129# endif
6130 return rc.f;
6131}
6132#endif
6133
6134
6135/**
6136 * Atomically tests and toggles a bit in a bitmap, ordered.
6137 *
6138 * @returns true if the bit was set.
6139 * @returns false if the bit was clear.
6140 *
6141 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
6142 * the memory access isn't atomic!
6143 * @param iBit The bit to test and toggle.
6144 */
6145#if RT_INLINE_ASM_EXTERNAL
6146DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6147#else
6148DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6149{
6150 union { bool f; uint32_t u32; uint8_t u8; } rc;
6151 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6152# if RT_INLINE_ASM_GNU_STYLE
6153 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6154 "setc %b0\n\t"
6155 "andl $1, %0\n\t"
6156 : "=q" (rc.u32),
6157 "=m" (*(volatile long *)pvBitmap)
6158 : "Ir" (iBit),
6159 "m" (*(volatile long *)pvBitmap)
6160 : "memory");
6161# else
6162 __asm
6163 {
6164 mov edx, [iBit]
6165# ifdef RT_ARCH_AMD64
6166 mov rax, [pvBitmap]
6167 lock btc [rax], edx
6168# else
6169 mov eax, [pvBitmap]
6170 lock btc [eax], edx
6171# endif
6172 setc al
6173 and eax, 1
6174 mov [rc.u32], eax
6175 }
6176# endif
6177 return rc.f;
6178}
6179#endif
6180
6181
6182/**
6183 * Tests if a bit in a bitmap is set.
6184 *
6185 * @returns true if the bit is set.
6186 * @returns false if the bit is clear.
6187 *
6188 * @param pvBitmap Pointer to the bitmap.
6189 * @param iBit The bit to test.
6190 *
6191 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6192 * However, doing so will yield better performance as well as avoiding
6193 * traps accessing the last bits in the bitmap.
6194 */
6195#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6196DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
6197#else
6198DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
6199{
6200 union { bool f; uint32_t u32; uint8_t u8; } rc;
6201# if RT_INLINE_ASM_USES_INTRIN
6202 rc.u32 = _bittest((long *)pvBitmap, iBit);
6203# elif RT_INLINE_ASM_GNU_STYLE
6204
6205 __asm__ __volatile__("btl %2, %1\n\t"
6206 "setc %b0\n\t"
6207 "andl $1, %0\n\t"
6208 : "=q" (rc.u32)
6209 : "m" (*(const volatile long *)pvBitmap),
6210 "Ir" (iBit)
6211 : "memory");
6212# else
6213 __asm
6214 {
6215 mov edx, [iBit]
6216# ifdef RT_ARCH_AMD64
6217 mov rax, [pvBitmap]
6218 bt [rax], edx
6219# else
6220 mov eax, [pvBitmap]
6221 bt [eax], edx
6222# endif
6223 setc al
6224 and eax, 1
6225 mov [rc.u32], eax
6226 }
6227# endif
6228 return rc.f;
6229}
6230#endif
6231
6232
6233/**
6234 * Clears a bit range within a bitmap.
6235 *
6236 * @param pvBitmap Pointer to the bitmap.
6237 * @param iBitStart The First bit to clear.
6238 * @param iBitEnd The first bit not to clear.
6239 */
6240DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6241{
6242 if (iBitStart < iBitEnd)
6243 {
6244 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6245 int iStart = iBitStart & ~31;
6246 int iEnd = iBitEnd & ~31;
6247 if (iStart == iEnd)
6248 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
6249 else
6250 {
6251 /* bits in first dword. */
6252 if (iBitStart & 31)
6253 {
6254 *pu32 &= (1 << (iBitStart & 31)) - 1;
6255 pu32++;
6256 iBitStart = iStart + 32;
6257 }
6258
6259 /* whole dword. */
6260 if (iBitStart != iEnd)
6261 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6262
6263 /* bits in last dword. */
6264 if (iBitEnd & 31)
6265 {
6266 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6267 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
6268 }
6269 }
6270 }
6271}
6272
6273
6274/**
6275 * Sets a bit range within a bitmap.
6276 *
6277 * @param pvBitmap Pointer to the bitmap.
6278 * @param iBitStart The First bit to set.
6279 * @param iBitEnd The first bit not to set.
6280 */
6281DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6282{
6283 if (iBitStart < iBitEnd)
6284 {
6285 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6286 int iStart = iBitStart & ~31;
6287 int iEnd = iBitEnd & ~31;
6288 if (iStart == iEnd)
6289 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
6290 else
6291 {
6292 /* bits in first dword. */
6293 if (iBitStart & 31)
6294 {
6295 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
6296 pu32++;
6297 iBitStart = iStart + 32;
6298 }
6299
6300 /* whole dword. */
6301 if (iBitStart != iEnd)
6302 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
6303
6304 /* bits in last dword. */
6305 if (iBitEnd & 31)
6306 {
6307 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6308 *pu32 |= (1 << (iBitEnd & 31)) - 1;
6309 }
6310 }
6311 }
6312}
6313
6314
6315/**
6316 * Finds the first clear bit in a bitmap.
6317 *
6318 * @returns Index of the first zero bit.
6319 * @returns -1 if no clear bit was found.
6320 * @param pvBitmap Pointer to the bitmap.
6321 * @param cBits The number of bits in the bitmap. Multiple of 32.
6322 */
6323#if RT_INLINE_ASM_EXTERNAL
6324DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
6325#else
6326DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
6327{
6328 if (cBits)
6329 {
6330 int32_t iBit;
6331# if RT_INLINE_ASM_GNU_STYLE
6332 RTCCUINTREG uEAX, uECX, uEDI;
6333 cBits = RT_ALIGN_32(cBits, 32);
6334 __asm__ __volatile__("repe; scasl\n\t"
6335 "je 1f\n\t"
6336# ifdef RT_ARCH_AMD64
6337 "lea -4(%%rdi), %%rdi\n\t"
6338 "xorl (%%rdi), %%eax\n\t"
6339 "subq %5, %%rdi\n\t"
6340# else
6341 "lea -4(%%edi), %%edi\n\t"
6342 "xorl (%%edi), %%eax\n\t"
6343 "subl %5, %%edi\n\t"
6344# endif
6345 "shll $3, %%edi\n\t"
6346 "bsfl %%eax, %%edx\n\t"
6347 "addl %%edi, %%edx\n\t"
6348 "1:\t\n"
6349 : "=d" (iBit),
6350 "=&c" (uECX),
6351 "=&D" (uEDI),
6352 "=&a" (uEAX)
6353 : "0" (0xffffffff),
6354 "mr" (pvBitmap),
6355 "1" (cBits >> 5),
6356 "2" (pvBitmap),
6357 "3" (0xffffffff));
6358# else
6359 cBits = RT_ALIGN_32(cBits, 32);
6360 __asm
6361 {
6362# ifdef RT_ARCH_AMD64
6363 mov rdi, [pvBitmap]
6364 mov rbx, rdi
6365# else
6366 mov edi, [pvBitmap]
6367 mov ebx, edi
6368# endif
6369 mov edx, 0ffffffffh
6370 mov eax, edx
6371 mov ecx, [cBits]
6372 shr ecx, 5
6373 repe scasd
6374 je done
6375
6376# ifdef RT_ARCH_AMD64
6377 lea rdi, [rdi - 4]
6378 xor eax, [rdi]
6379 sub rdi, rbx
6380# else
6381 lea edi, [edi - 4]
6382 xor eax, [edi]
6383 sub edi, ebx
6384# endif
6385 shl edi, 3
6386 bsf edx, eax
6387 add edx, edi
6388 done:
6389 mov [iBit], edx
6390 }
6391# endif
6392 return iBit;
6393 }
6394 return -1;
6395}
6396#endif
6397
6398
6399/**
6400 * Finds the next clear bit in a bitmap.
6401 *
6402 * @returns Index of the first zero bit.
6403 * @returns -1 if no clear bit was found.
6404 * @param pvBitmap Pointer to the bitmap.
6405 * @param cBits The number of bits in the bitmap. Multiple of 32.
6406 * @param iBitPrev The bit returned from the last search.
6407 * The search will start at iBitPrev + 1.
6408 */
6409#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6410DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6411#else
6412DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6413{
6414 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6415 int iBit = ++iBitPrev & 31;
6416 if (iBit)
6417 {
6418 /*
6419 * Inspect the 32-bit word containing the unaligned bit.
6420 */
6421 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6422
6423# if RT_INLINE_ASM_USES_INTRIN
6424 unsigned long ulBit = 0;
6425 if (_BitScanForward(&ulBit, u32))
6426 return ulBit + iBitPrev;
6427# else
6428# if RT_INLINE_ASM_GNU_STYLE
6429 __asm__ __volatile__("bsf %1, %0\n\t"
6430 "jnz 1f\n\t"
6431 "movl $-1, %0\n\t"
6432 "1:\n\t"
6433 : "=r" (iBit)
6434 : "r" (u32));
6435# else
6436 __asm
6437 {
6438 mov edx, [u32]
6439 bsf eax, edx
6440 jnz done
6441 mov eax, 0ffffffffh
6442 done:
6443 mov [iBit], eax
6444 }
6445# endif
6446 if (iBit >= 0)
6447 return iBit + iBitPrev;
6448# endif
6449
6450 /*
6451 * Skip ahead and see if there is anything left to search.
6452 */
6453 iBitPrev |= 31;
6454 iBitPrev++;
6455 if (cBits <= (uint32_t)iBitPrev)
6456 return -1;
6457 }
6458
6459 /*
6460 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6461 */
6462 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6463 if (iBit >= 0)
6464 iBit += iBitPrev;
6465 return iBit;
6466}
6467#endif
6468
6469
6470/**
6471 * Finds the first set bit in a bitmap.
6472 *
6473 * @returns Index of the first set bit.
6474 * @returns -1 if no clear bit was found.
6475 * @param pvBitmap Pointer to the bitmap.
6476 * @param cBits The number of bits in the bitmap. Multiple of 32.
6477 */
6478#if RT_INLINE_ASM_EXTERNAL
6479DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6480#else
6481DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6482{
6483 if (cBits)
6484 {
6485 int32_t iBit;
6486# if RT_INLINE_ASM_GNU_STYLE
6487 RTCCUINTREG uEAX, uECX, uEDI;
6488 cBits = RT_ALIGN_32(cBits, 32);
6489 __asm__ __volatile__("repe; scasl\n\t"
6490 "je 1f\n\t"
6491# ifdef RT_ARCH_AMD64
6492 "lea -4(%%rdi), %%rdi\n\t"
6493 "movl (%%rdi), %%eax\n\t"
6494 "subq %5, %%rdi\n\t"
6495# else
6496 "lea -4(%%edi), %%edi\n\t"
6497 "movl (%%edi), %%eax\n\t"
6498 "subl %5, %%edi\n\t"
6499# endif
6500 "shll $3, %%edi\n\t"
6501 "bsfl %%eax, %%edx\n\t"
6502 "addl %%edi, %%edx\n\t"
6503 "1:\t\n"
6504 : "=d" (iBit),
6505 "=&c" (uECX),
6506 "=&D" (uEDI),
6507 "=&a" (uEAX)
6508 : "0" (0xffffffff),
6509 "mr" (pvBitmap),
6510 "1" (cBits >> 5),
6511 "2" (pvBitmap),
6512 "3" (0));
6513# else
6514 cBits = RT_ALIGN_32(cBits, 32);
6515 __asm
6516 {
6517# ifdef RT_ARCH_AMD64
6518 mov rdi, [pvBitmap]
6519 mov rbx, rdi
6520# else
6521 mov edi, [pvBitmap]
6522 mov ebx, edi
6523# endif
6524 mov edx, 0ffffffffh
6525 xor eax, eax
6526 mov ecx, [cBits]
6527 shr ecx, 5
6528 repe scasd
6529 je done
6530# ifdef RT_ARCH_AMD64
6531 lea rdi, [rdi - 4]
6532 mov eax, [rdi]
6533 sub rdi, rbx
6534# else
6535 lea edi, [edi - 4]
6536 mov eax, [edi]
6537 sub edi, ebx
6538# endif
6539 shl edi, 3
6540 bsf edx, eax
6541 add edx, edi
6542 done:
6543 mov [iBit], edx
6544 }
6545# endif
6546 return iBit;
6547 }
6548 return -1;
6549}
6550#endif
6551
6552
6553/**
6554 * Finds the next set bit in a bitmap.
6555 *
6556 * @returns Index of the next set bit.
6557 * @returns -1 if no set bit was found.
6558 * @param pvBitmap Pointer to the bitmap.
6559 * @param cBits The number of bits in the bitmap. Multiple of 32.
6560 * @param iBitPrev The bit returned from the last search.
6561 * The search will start at iBitPrev + 1.
6562 */
6563#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6564DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6565#else
6566DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6567{
6568 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6569 int iBit = ++iBitPrev & 31;
6570 if (iBit)
6571 {
6572 /*
6573 * Inspect the 32-bit word containing the unaligned bit.
6574 */
6575 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6576
6577# if RT_INLINE_ASM_USES_INTRIN
6578 unsigned long ulBit = 0;
6579 if (_BitScanForward(&ulBit, u32))
6580 return ulBit + iBitPrev;
6581# else
6582# if RT_INLINE_ASM_GNU_STYLE
6583 __asm__ __volatile__("bsf %1, %0\n\t"
6584 "jnz 1f\n\t"
6585 "movl $-1, %0\n\t"
6586 "1:\n\t"
6587 : "=r" (iBit)
6588 : "r" (u32));
6589# else
6590 __asm
6591 {
6592 mov edx, [u32]
6593 bsf eax, edx
6594 jnz done
6595 mov eax, 0ffffffffh
6596 done:
6597 mov [iBit], eax
6598 }
6599# endif
6600 if (iBit >= 0)
6601 return iBit + iBitPrev;
6602# endif
6603
6604 /*
6605 * Skip ahead and see if there is anything left to search.
6606 */
6607 iBitPrev |= 31;
6608 iBitPrev++;
6609 if (cBits <= (uint32_t)iBitPrev)
6610 return -1;
6611 }
6612
6613 /*
6614 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6615 */
6616 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6617 if (iBit >= 0)
6618 iBit += iBitPrev;
6619 return iBit;
6620}
6621#endif
6622
6623
6624/**
6625 * Finds the first bit which is set in the given 32-bit integer.
6626 * Bits are numbered from 1 (least significant) to 32.
6627 *
6628 * @returns index [1..32] of the first set bit.
6629 * @returns 0 if all bits are cleared.
6630 * @param u32 Integer to search for set bits.
6631 * @remark Similar to ffs() in BSD.
6632 */
6633DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6634{
6635# if RT_INLINE_ASM_USES_INTRIN
6636 unsigned long iBit;
6637 if (_BitScanForward(&iBit, u32))
6638 iBit++;
6639 else
6640 iBit = 0;
6641# elif RT_INLINE_ASM_GNU_STYLE
6642 uint32_t iBit;
6643 __asm__ __volatile__("bsf %1, %0\n\t"
6644 "jnz 1f\n\t"
6645 "xorl %0, %0\n\t"
6646 "jmp 2f\n"
6647 "1:\n\t"
6648 "incl %0\n"
6649 "2:\n\t"
6650 : "=r" (iBit)
6651 : "rm" (u32));
6652# else
6653 uint32_t iBit;
6654 _asm
6655 {
6656 bsf eax, [u32]
6657 jnz found
6658 xor eax, eax
6659 jmp done
6660 found:
6661 inc eax
6662 done:
6663 mov [iBit], eax
6664 }
6665# endif
6666 return iBit;
6667}
6668
6669
6670/**
6671 * Finds the first bit which is set in the given 32-bit integer.
6672 * Bits are numbered from 1 (least significant) to 32.
6673 *
6674 * @returns index [1..32] of the first set bit.
6675 * @returns 0 if all bits are cleared.
6676 * @param i32 Integer to search for set bits.
6677 * @remark Similar to ffs() in BSD.
6678 */
6679DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6680{
6681 return ASMBitFirstSetU32((uint32_t)i32);
6682}
6683
6684
6685/**
6686 * Finds the last bit which is set in the given 32-bit integer.
6687 * Bits are numbered from 1 (least significant) to 32.
6688 *
6689 * @returns index [1..32] of the last set bit.
6690 * @returns 0 if all bits are cleared.
6691 * @param u32 Integer to search for set bits.
6692 * @remark Similar to fls() in BSD.
6693 */
6694DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6695{
6696# if RT_INLINE_ASM_USES_INTRIN
6697 unsigned long iBit;
6698 if (_BitScanReverse(&iBit, u32))
6699 iBit++;
6700 else
6701 iBit = 0;
6702# elif RT_INLINE_ASM_GNU_STYLE
6703 uint32_t iBit;
6704 __asm__ __volatile__("bsrl %1, %0\n\t"
6705 "jnz 1f\n\t"
6706 "xorl %0, %0\n\t"
6707 "jmp 2f\n"
6708 "1:\n\t"
6709 "incl %0\n"
6710 "2:\n\t"
6711 : "=r" (iBit)
6712 : "rm" (u32));
6713# else
6714 uint32_t iBit;
6715 _asm
6716 {
6717 bsr eax, [u32]
6718 jnz found
6719 xor eax, eax
6720 jmp done
6721 found:
6722 inc eax
6723 done:
6724 mov [iBit], eax
6725 }
6726# endif
6727 return iBit;
6728}
6729
6730
6731/**
6732 * Finds the last bit which is set in the given 32-bit integer.
6733 * Bits are numbered from 1 (least significant) to 32.
6734 *
6735 * @returns index [1..32] of the last set bit.
6736 * @returns 0 if all bits are cleared.
6737 * @param i32 Integer to search for set bits.
6738 * @remark Similar to fls() in BSD.
6739 */
6740DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6741{
6742 return ASMBitLastSetU32((uint32_t)i32);
6743}
6744
6745/**
6746 * Reverse the byte order of the given 16-bit integer.
6747 *
6748 * @returns Revert
6749 * @param u16 16-bit integer value.
6750 */
6751DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6752{
6753#if RT_INLINE_ASM_USES_INTRIN
6754 u16 = _byteswap_ushort(u16);
6755#elif RT_INLINE_ASM_GNU_STYLE
6756 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6757#else
6758 _asm
6759 {
6760 mov ax, [u16]
6761 ror ax, 8
6762 mov [u16], ax
6763 }
6764#endif
6765 return u16;
6766}
6767
6768/**
6769 * Reverse the byte order of the given 32-bit integer.
6770 *
6771 * @returns Revert
6772 * @param u32 32-bit integer value.
6773 */
6774DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6775{
6776#if RT_INLINE_ASM_USES_INTRIN
6777 u32 = _byteswap_ulong(u32);
6778#elif RT_INLINE_ASM_GNU_STYLE
6779 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6780#else
6781 _asm
6782 {
6783 mov eax, [u32]
6784 bswap eax
6785 mov [u32], eax
6786 }
6787#endif
6788 return u32;
6789}
6790
6791
6792/**
6793 * Reverse the byte order of the given 64-bit integer.
6794 *
6795 * @returns Revert
6796 * @param u64 64-bit integer value.
6797 */
6798DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6799{
6800#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6801 u64 = _byteswap_uint64(u64);
6802#else
6803 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6804 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6805#endif
6806 return u64;
6807}
6808
6809
6810/** @} */
6811
6812
6813/** @} */
6814#endif
6815
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette