VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 25559

Last change on this file since 25559 was 25496, checked in by vboxsync, 15 years ago

ASMAtomicCmpXchgU8: 32-bit build fix.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 174.0 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__wbinvd)
73# pragma intrinsic(__stosd)
74# pragma intrinsic(__stosw)
75# pragma intrinsic(__stosb)
76# pragma intrinsic(__readcr0)
77# pragma intrinsic(__readcr2)
78# pragma intrinsic(__readcr3)
79# pragma intrinsic(__readcr4)
80# pragma intrinsic(__writecr0)
81# pragma intrinsic(__writecr3)
82# pragma intrinsic(__writecr4)
83# pragma intrinsic(__readdr)
84# pragma intrinsic(__writedr)
85# pragma intrinsic(_BitScanForward)
86# pragma intrinsic(_BitScanReverse)
87# pragma intrinsic(_bittest)
88# pragma intrinsic(_bittestandset)
89# pragma intrinsic(_bittestandreset)
90# pragma intrinsic(_bittestandcomplement)
91# pragma intrinsic(_byteswap_ushort)
92# pragma intrinsic(_byteswap_ulong)
93# pragma intrinsic(_interlockedbittestandset)
94# pragma intrinsic(_interlockedbittestandreset)
95# pragma intrinsic(_InterlockedAnd)
96# pragma intrinsic(_InterlockedOr)
97# pragma intrinsic(_InterlockedIncrement)
98# pragma intrinsic(_InterlockedDecrement)
99# pragma intrinsic(_InterlockedExchange)
100# pragma intrinsic(_InterlockedExchangeAdd)
101# pragma intrinsic(_InterlockedCompareExchange)
102# pragma intrinsic(_InterlockedCompareExchange64)
103# ifdef RT_ARCH_AMD64
104# pragma intrinsic(_mm_mfence)
105# pragma intrinsic(_mm_sfence)
106# pragma intrinsic(_mm_lfence)
107# pragma intrinsic(__stosq)
108# pragma intrinsic(__readcr8)
109# pragma intrinsic(__writecr8)
110# pragma intrinsic(_byteswap_uint64)
111# pragma intrinsic(_InterlockedExchange64)
112# endif
113# endif
114#endif
115#ifndef RT_INLINE_ASM_USES_INTRIN
116# define RT_INLINE_ASM_USES_INTRIN 0
117#endif
118
119/** @def RT_INLINE_ASM_GCC_4_3_X_X86
120 * Used to work around some 4.3.x register allocation issues in this version of
121 * the compiler. */
122#ifdef __GNUC__
123# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ == 3 && defined(__i386__))
124#endif
125#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
126# define RT_INLINE_ASM_GCC_4_3_X_X86 0
127#endif
128
129
130
131/** @defgroup grp_asm ASM - Assembly Routines
132 * @ingroup grp_rt
133 *
134 * @remarks The difference between ordered and unordered atomic operations are that
135 * the former will complete outstanding reads and writes before continuing
136 * while the latter doesn't make any promisses about the order. Ordered
137 * operations doesn't, it seems, make any 100% promise wrt to whether
138 * the operation will complete before any subsequent memory access.
139 * (please, correct if wrong.)
140 *
141 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
142 * are unordered (note the Uo).
143 *
144 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
145 * or even optimize assembler instructions away. For instance, in the following code
146 * the second rdmsr instruction is optimized away because gcc treats that instruction
147 * as deterministic:
148 *
149 * @code
150 * static inline uint64_t rdmsr_low(int idx)
151 * {
152 * uint32_t low;
153 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
154 * }
155 * ...
156 * uint32_t msr1 = rdmsr_low(1);
157 * foo(msr1);
158 * msr1 = rdmsr_low(1);
159 * bar(msr1);
160 * @endcode
161 *
162 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
163 * use the result of the first call as input parameter for bar() as well. For rdmsr this
164 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
165 * machine status information in general.
166 *
167 * @{
168 */
169
170/** @def RT_INLINE_ASM_EXTERNAL
171 * Defined as 1 if the compiler does not support inline assembly.
172 * The ASM* functions will then be implemented in an external .asm file.
173 *
174 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
175 * inline assembly in their AMD64 compiler.
176 */
177#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
178# define RT_INLINE_ASM_EXTERNAL 1
179#else
180# define RT_INLINE_ASM_EXTERNAL 0
181#endif
182
183/** @def RT_INLINE_ASM_GNU_STYLE
184 * Defined as 1 if the compiler understands GNU style inline assembly.
185 */
186#if defined(_MSC_VER)
187# define RT_INLINE_ASM_GNU_STYLE 0
188#else
189# define RT_INLINE_ASM_GNU_STYLE 1
190#endif
191
192
193/** @todo find a more proper place for this structure? */
194#pragma pack(1)
195/** IDTR */
196typedef struct RTIDTR
197{
198 /** Size of the IDT. */
199 uint16_t cbIdt;
200 /** Address of the IDT. */
201 uintptr_t pIdt;
202} RTIDTR, *PRTIDTR;
203#pragma pack()
204
205#pragma pack(1)
206/** GDTR */
207typedef struct RTGDTR
208{
209 /** Size of the GDT. */
210 uint16_t cbGdt;
211 /** Address of the GDT. */
212 uintptr_t pGdt;
213} RTGDTR, *PRTGDTR;
214#pragma pack()
215
216
217/** @def ASMReturnAddress
218 * Gets the return address of the current (or calling if you like) function or method.
219 */
220#ifdef _MSC_VER
221# ifdef __cplusplus
222extern "C"
223# endif
224void * _ReturnAddress(void);
225# pragma intrinsic(_ReturnAddress)
226# define ASMReturnAddress() _ReturnAddress()
227#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
228# define ASMReturnAddress() __builtin_return_address(0)
229#else
230# error "Unsupported compiler."
231#endif
232
233
234/**
235 * Gets the content of the IDTR CPU register.
236 * @param pIdtr Where to store the IDTR contents.
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
240#else
241DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 sidt [rax]
251# else
252 mov eax, [pIdtr]
253 sidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Sets the content of the IDTR CPU register.
263 * @param pIdtr Where to load the IDTR contents from
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
267#else
268DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pIdtr]
277 lidt [rax]
278# else
279 mov eax, [pIdtr]
280 lidt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287
288/**
289 * Gets the content of the GDTR CPU register.
290 * @param pGdtr Where to store the GDTR contents.
291 */
292#if RT_INLINE_ASM_EXTERNAL
293DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
294#else
295DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
296{
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
299# else
300 __asm
301 {
302# ifdef RT_ARCH_AMD64
303 mov rax, [pGdtr]
304 sgdt [rax]
305# else
306 mov eax, [pGdtr]
307 sgdt [eax]
308# endif
309 }
310# endif
311}
312#endif
313
314/**
315 * Get the cs register.
316 * @returns cs.
317 */
318#if RT_INLINE_ASM_EXTERNAL
319DECLASM(RTSEL) ASMGetCS(void);
320#else
321DECLINLINE(RTSEL) ASMGetCS(void)
322{
323 RTSEL SelCS;
324# if RT_INLINE_ASM_GNU_STYLE
325 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
326# else
327 __asm
328 {
329 mov ax, cs
330 mov [SelCS], ax
331 }
332# endif
333 return SelCS;
334}
335#endif
336
337
338/**
339 * Get the DS register.
340 * @returns DS.
341 */
342#if RT_INLINE_ASM_EXTERNAL
343DECLASM(RTSEL) ASMGetDS(void);
344#else
345DECLINLINE(RTSEL) ASMGetDS(void)
346{
347 RTSEL SelDS;
348# if RT_INLINE_ASM_GNU_STYLE
349 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
350# else
351 __asm
352 {
353 mov ax, ds
354 mov [SelDS], ax
355 }
356# endif
357 return SelDS;
358}
359#endif
360
361
362/**
363 * Get the ES register.
364 * @returns ES.
365 */
366#if RT_INLINE_ASM_EXTERNAL
367DECLASM(RTSEL) ASMGetES(void);
368#else
369DECLINLINE(RTSEL) ASMGetES(void)
370{
371 RTSEL SelES;
372# if RT_INLINE_ASM_GNU_STYLE
373 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
374# else
375 __asm
376 {
377 mov ax, es
378 mov [SelES], ax
379 }
380# endif
381 return SelES;
382}
383#endif
384
385
386/**
387 * Get the FS register.
388 * @returns FS.
389 */
390#if RT_INLINE_ASM_EXTERNAL
391DECLASM(RTSEL) ASMGetFS(void);
392#else
393DECLINLINE(RTSEL) ASMGetFS(void)
394{
395 RTSEL SelFS;
396# if RT_INLINE_ASM_GNU_STYLE
397 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
398# else
399 __asm
400 {
401 mov ax, fs
402 mov [SelFS], ax
403 }
404# endif
405 return SelFS;
406}
407# endif
408
409
410/**
411 * Get the GS register.
412 * @returns GS.
413 */
414#if RT_INLINE_ASM_EXTERNAL
415DECLASM(RTSEL) ASMGetGS(void);
416#else
417DECLINLINE(RTSEL) ASMGetGS(void)
418{
419 RTSEL SelGS;
420# if RT_INLINE_ASM_GNU_STYLE
421 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
422# else
423 __asm
424 {
425 mov ax, gs
426 mov [SelGS], ax
427 }
428# endif
429 return SelGS;
430}
431#endif
432
433
434/**
435 * Get the SS register.
436 * @returns SS.
437 */
438#if RT_INLINE_ASM_EXTERNAL
439DECLASM(RTSEL) ASMGetSS(void);
440#else
441DECLINLINE(RTSEL) ASMGetSS(void)
442{
443 RTSEL SelSS;
444# if RT_INLINE_ASM_GNU_STYLE
445 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
446# else
447 __asm
448 {
449 mov ax, ss
450 mov [SelSS], ax
451 }
452# endif
453 return SelSS;
454}
455#endif
456
457
458/**
459 * Get the TR register.
460 * @returns TR.
461 */
462#if RT_INLINE_ASM_EXTERNAL
463DECLASM(RTSEL) ASMGetTR(void);
464#else
465DECLINLINE(RTSEL) ASMGetTR(void)
466{
467 RTSEL SelTR;
468# if RT_INLINE_ASM_GNU_STYLE
469 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
470# else
471 __asm
472 {
473 str ax
474 mov [SelTR], ax
475 }
476# endif
477 return SelTR;
478}
479#endif
480
481
482/**
483 * Get the [RE]FLAGS register.
484 * @returns [RE]FLAGS.
485 */
486#if RT_INLINE_ASM_EXTERNAL
487DECLASM(RTCCUINTREG) ASMGetFlags(void);
488#else
489DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
490{
491 RTCCUINTREG uFlags;
492# if RT_INLINE_ASM_GNU_STYLE
493# ifdef RT_ARCH_AMD64
494 __asm__ __volatile__("pushfq\n\t"
495 "popq %0\n\t"
496 : "=r" (uFlags));
497# else
498 __asm__ __volatile__("pushfl\n\t"
499 "popl %0\n\t"
500 : "=r" (uFlags));
501# endif
502# else
503 __asm
504 {
505# ifdef RT_ARCH_AMD64
506 pushfq
507 pop [uFlags]
508# else
509 pushfd
510 pop [uFlags]
511# endif
512 }
513# endif
514 return uFlags;
515}
516#endif
517
518
519/**
520 * Set the [RE]FLAGS register.
521 * @param uFlags The new [RE]FLAGS value.
522 */
523#if RT_INLINE_ASM_EXTERNAL
524DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
525#else
526DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
527{
528# if RT_INLINE_ASM_GNU_STYLE
529# ifdef RT_ARCH_AMD64
530 __asm__ __volatile__("pushq %0\n\t"
531 "popfq\n\t"
532 : : "g" (uFlags));
533# else
534 __asm__ __volatile__("pushl %0\n\t"
535 "popfl\n\t"
536 : : "g" (uFlags));
537# endif
538# else
539 __asm
540 {
541# ifdef RT_ARCH_AMD64
542 push [uFlags]
543 popfq
544# else
545 push [uFlags]
546 popfd
547# endif
548 }
549# endif
550}
551#endif
552
553
554/**
555 * Gets the content of the CPU timestamp counter register.
556 *
557 * @returns TSC.
558 */
559#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
560DECLASM(uint64_t) ASMReadTSC(void);
561#else
562DECLINLINE(uint64_t) ASMReadTSC(void)
563{
564 RTUINT64U u;
565# if RT_INLINE_ASM_GNU_STYLE
566 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
567# else
568# if RT_INLINE_ASM_USES_INTRIN
569 u.u = __rdtsc();
570# else
571 __asm
572 {
573 rdtsc
574 mov [u.s.Lo], eax
575 mov [u.s.Hi], edx
576 }
577# endif
578# endif
579 return u.u;
580}
581#endif
582
583
584/**
585 * Performs the cpuid instruction returning all registers.
586 *
587 * @param uOperator CPUID operation (eax).
588 * @param pvEAX Where to store eax.
589 * @param pvEBX Where to store ebx.
590 * @param pvECX Where to store ecx.
591 * @param pvEDX Where to store edx.
592 * @remark We're using void pointers to ease the use of special bitfield structures and such.
593 */
594#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
595DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
596#else
597DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
598{
599# if RT_INLINE_ASM_GNU_STYLE
600# ifdef RT_ARCH_AMD64
601 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
602 __asm__ ("cpuid\n\t"
603 : "=a" (uRAX),
604 "=b" (uRBX),
605 "=c" (uRCX),
606 "=d" (uRDX)
607 : "0" (uOperator));
608 *(uint32_t *)pvEAX = (uint32_t)uRAX;
609 *(uint32_t *)pvEBX = (uint32_t)uRBX;
610 *(uint32_t *)pvECX = (uint32_t)uRCX;
611 *(uint32_t *)pvEDX = (uint32_t)uRDX;
612# else
613 __asm__ ("xchgl %%ebx, %1\n\t"
614 "cpuid\n\t"
615 "xchgl %%ebx, %1\n\t"
616 : "=a" (*(uint32_t *)pvEAX),
617 "=r" (*(uint32_t *)pvEBX),
618 "=c" (*(uint32_t *)pvECX),
619 "=d" (*(uint32_t *)pvEDX)
620 : "0" (uOperator));
621# endif
622
623# elif RT_INLINE_ASM_USES_INTRIN
624 int aInfo[4];
625 __cpuid(aInfo, uOperator);
626 *(uint32_t *)pvEAX = aInfo[0];
627 *(uint32_t *)pvEBX = aInfo[1];
628 *(uint32_t *)pvECX = aInfo[2];
629 *(uint32_t *)pvEDX = aInfo[3];
630
631# else
632 uint32_t uEAX;
633 uint32_t uEBX;
634 uint32_t uECX;
635 uint32_t uEDX;
636 __asm
637 {
638 push ebx
639 mov eax, [uOperator]
640 cpuid
641 mov [uEAX], eax
642 mov [uEBX], ebx
643 mov [uECX], ecx
644 mov [uEDX], edx
645 pop ebx
646 }
647 *(uint32_t *)pvEAX = uEAX;
648 *(uint32_t *)pvEBX = uEBX;
649 *(uint32_t *)pvECX = uECX;
650 *(uint32_t *)pvEDX = uEDX;
651# endif
652}
653#endif
654
655
656/**
657 * Performs the cpuid instruction returning all registers.
658 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
659 *
660 * @param uOperator CPUID operation (eax).
661 * @param uIdxECX ecx index
662 * @param pvEAX Where to store eax.
663 * @param pvEBX Where to store ebx.
664 * @param pvECX Where to store ecx.
665 * @param pvEDX Where to store edx.
666 * @remark We're using void pointers to ease the use of special bitfield structures and such.
667 */
668#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
669DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
670#else
671DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
672{
673# if RT_INLINE_ASM_GNU_STYLE
674# ifdef RT_ARCH_AMD64
675 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
676 __asm__ ("cpuid\n\t"
677 : "=a" (uRAX),
678 "=b" (uRBX),
679 "=c" (uRCX),
680 "=d" (uRDX)
681 : "0" (uOperator),
682 "2" (uIdxECX));
683 *(uint32_t *)pvEAX = (uint32_t)uRAX;
684 *(uint32_t *)pvEBX = (uint32_t)uRBX;
685 *(uint32_t *)pvECX = (uint32_t)uRCX;
686 *(uint32_t *)pvEDX = (uint32_t)uRDX;
687# else
688 __asm__ ("xchgl %%ebx, %1\n\t"
689 "cpuid\n\t"
690 "xchgl %%ebx, %1\n\t"
691 : "=a" (*(uint32_t *)pvEAX),
692 "=r" (*(uint32_t *)pvEBX),
693 "=c" (*(uint32_t *)pvECX),
694 "=d" (*(uint32_t *)pvEDX)
695 : "0" (uOperator),
696 "2" (uIdxECX));
697# endif
698
699# elif RT_INLINE_ASM_USES_INTRIN
700 int aInfo[4];
701 /* ??? another intrinsic ??? */
702 __cpuid(aInfo, uOperator);
703 *(uint32_t *)pvEAX = aInfo[0];
704 *(uint32_t *)pvEBX = aInfo[1];
705 *(uint32_t *)pvECX = aInfo[2];
706 *(uint32_t *)pvEDX = aInfo[3];
707
708# else
709 uint32_t uEAX;
710 uint32_t uEBX;
711 uint32_t uECX;
712 uint32_t uEDX;
713 __asm
714 {
715 push ebx
716 mov eax, [uOperator]
717 mov ecx, [uIdxECX]
718 cpuid
719 mov [uEAX], eax
720 mov [uEBX], ebx
721 mov [uECX], ecx
722 mov [uEDX], edx
723 pop ebx
724 }
725 *(uint32_t *)pvEAX = uEAX;
726 *(uint32_t *)pvEBX = uEBX;
727 *(uint32_t *)pvECX = uECX;
728 *(uint32_t *)pvEDX = uEDX;
729# endif
730}
731#endif
732
733
734/**
735 * Performs the cpuid instruction returning ecx and edx.
736 *
737 * @param uOperator CPUID operation (eax).
738 * @param pvECX Where to store ecx.
739 * @param pvEDX Where to store edx.
740 * @remark We're using void pointers to ease the use of special bitfield structures and such.
741 */
742#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
743DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
744#else
745DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
746{
747 uint32_t uEBX;
748 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
749}
750#endif
751
752
753/**
754 * Performs the cpuid instruction returning edx.
755 *
756 * @param uOperator CPUID operation (eax).
757 * @returns EDX after cpuid operation.
758 */
759#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
760DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
761#else
762DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
763{
764 RTCCUINTREG xDX;
765# if RT_INLINE_ASM_GNU_STYLE
766# ifdef RT_ARCH_AMD64
767 RTCCUINTREG uSpill;
768 __asm__ ("cpuid"
769 : "=a" (uSpill),
770 "=d" (xDX)
771 : "0" (uOperator)
772 : "rbx", "rcx");
773# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
774 __asm__ ("push %%ebx\n\t"
775 "cpuid\n\t"
776 "pop %%ebx\n\t"
777 : "=a" (uOperator),
778 "=d" (xDX)
779 : "0" (uOperator)
780 : "ecx");
781# else
782 __asm__ ("cpuid"
783 : "=a" (uOperator),
784 "=d" (xDX)
785 : "0" (uOperator)
786 : "ebx", "ecx");
787# endif
788
789# elif RT_INLINE_ASM_USES_INTRIN
790 int aInfo[4];
791 __cpuid(aInfo, uOperator);
792 xDX = aInfo[3];
793
794# else
795 __asm
796 {
797 push ebx
798 mov eax, [uOperator]
799 cpuid
800 mov [xDX], edx
801 pop ebx
802 }
803# endif
804 return (uint32_t)xDX;
805}
806#endif
807
808
809/**
810 * Performs the cpuid instruction returning ecx.
811 *
812 * @param uOperator CPUID operation (eax).
813 * @returns ECX after cpuid operation.
814 */
815#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
816DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
817#else
818DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
819{
820 RTCCUINTREG xCX;
821# if RT_INLINE_ASM_GNU_STYLE
822# ifdef RT_ARCH_AMD64
823 RTCCUINTREG uSpill;
824 __asm__ ("cpuid"
825 : "=a" (uSpill),
826 "=c" (xCX)
827 : "0" (uOperator)
828 : "rbx", "rdx");
829# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
830 __asm__ ("push %%ebx\n\t"
831 "cpuid\n\t"
832 "pop %%ebx\n\t"
833 : "=a" (uOperator),
834 "=c" (xCX)
835 : "0" (uOperator)
836 : "edx");
837# else
838 __asm__ ("cpuid"
839 : "=a" (uOperator),
840 "=c" (xCX)
841 : "0" (uOperator)
842 : "ebx", "edx");
843
844# endif
845
846# elif RT_INLINE_ASM_USES_INTRIN
847 int aInfo[4];
848 __cpuid(aInfo, uOperator);
849 xCX = aInfo[2];
850
851# else
852 __asm
853 {
854 push ebx
855 mov eax, [uOperator]
856 cpuid
857 mov [xCX], ecx
858 pop ebx
859 }
860# endif
861 return (uint32_t)xCX;
862}
863#endif
864
865
866/**
867 * Checks if the current CPU supports CPUID.
868 *
869 * @returns true if CPUID is supported.
870 */
871DECLINLINE(bool) ASMHasCpuId(void)
872{
873#ifdef RT_ARCH_AMD64
874 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
875#else /* !RT_ARCH_AMD64 */
876 bool fRet = false;
877# if RT_INLINE_ASM_GNU_STYLE
878 uint32_t u1;
879 uint32_t u2;
880 __asm__ ("pushf\n\t"
881 "pop %1\n\t"
882 "mov %1, %2\n\t"
883 "xorl $0x200000, %1\n\t"
884 "push %1\n\t"
885 "popf\n\t"
886 "pushf\n\t"
887 "pop %1\n\t"
888 "cmpl %1, %2\n\t"
889 "setne %0\n\t"
890 "push %2\n\t"
891 "popf\n\t"
892 : "=m" (fRet), "=r" (u1), "=r" (u2));
893# else
894 __asm
895 {
896 pushfd
897 pop eax
898 mov ebx, eax
899 xor eax, 0200000h
900 push eax
901 popfd
902 pushfd
903 pop eax
904 cmp eax, ebx
905 setne fRet
906 push ebx
907 popfd
908 }
909# endif
910 return fRet;
911#endif /* !RT_ARCH_AMD64 */
912}
913
914
915/**
916 * Gets the APIC ID of the current CPU.
917 *
918 * @returns the APIC ID.
919 */
920#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
921DECLASM(uint8_t) ASMGetApicId(void);
922#else
923DECLINLINE(uint8_t) ASMGetApicId(void)
924{
925 RTCCUINTREG xBX;
926# if RT_INLINE_ASM_GNU_STYLE
927# ifdef RT_ARCH_AMD64
928 RTCCUINTREG uSpill;
929 __asm__ ("cpuid"
930 : "=a" (uSpill),
931 "=b" (xBX)
932 : "0" (1)
933 : "rcx", "rdx");
934# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
935 RTCCUINTREG uSpill;
936 __asm__ ("mov %%ebx,%1\n\t"
937 "cpuid\n\t"
938 "xchgl %%ebx,%1\n\t"
939 : "=a" (uSpill),
940 "=r" (xBX)
941 : "0" (1)
942 : "ecx", "edx");
943# else
944 RTCCUINTREG uSpill;
945 __asm__ ("cpuid"
946 : "=a" (uSpill),
947 "=b" (xBX)
948 : "0" (1)
949 : "ecx", "edx");
950# endif
951
952# elif RT_INLINE_ASM_USES_INTRIN
953 int aInfo[4];
954 __cpuid(aInfo, 1);
955 xBX = aInfo[1];
956
957# else
958 __asm
959 {
960 push ebx
961 mov eax, 1
962 cpuid
963 mov [xBX], ebx
964 pop ebx
965 }
966# endif
967 return (uint8_t)(xBX >> 24);
968}
969#endif
970
971
972/**
973 * Tests if it a genuine Intel CPU based on the ASMCpuId(0) output.
974 *
975 * @returns true/false.
976 * @param uEBX EBX return from ASMCpuId(0)
977 * @param uECX ECX return from ASMCpuId(0)
978 * @param uEDX EDX return from ASMCpuId(0)
979 */
980DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
981{
982 return uEBX == UINT32_C(0x756e6547)
983 && uECX == UINT32_C(0x6c65746e)
984 && uEDX == UINT32_C(0x49656e69);
985}
986
987
988/**
989 * Tests if this is a genuine Intel CPU.
990 *
991 * @returns true/false.
992 * @remarks ASSUMES that cpuid is supported by the CPU.
993 */
994DECLINLINE(bool) ASMIsIntelCpu(void)
995{
996 uint32_t uEAX, uEBX, uECX, uEDX;
997 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
998 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
999}
1000
1001
1002/**
1003 * Tests if it a authentic AMD CPU based on the ASMCpuId(0) output.
1004 *
1005 * @returns true/false.
1006 * @param uEBX EBX return from ASMCpuId(0)
1007 * @param uECX ECX return from ASMCpuId(0)
1008 * @param uEDX EDX return from ASMCpuId(0)
1009 */
1010DECLINLINE(bool) ASMIsAmdCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
1011{
1012 return uEBX == UINT32_C(0x68747541)
1013 && uECX == UINT32_C(0x444d4163)
1014 && uEDX == UINT32_C(0x69746e65);
1015}
1016
1017
1018/**
1019 * Tests if this is an authentic AMD CPU.
1020 *
1021 * @returns true/false.
1022 * @remarks ASSUMES that cpuid is supported by the CPU.
1023 */
1024DECLINLINE(bool) ASMIsAmdCpu(void)
1025{
1026 uint32_t uEAX, uEBX, uECX, uEDX;
1027 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1028 return ASMIsAmdCpuEx(uEBX, uECX, uEDX);
1029}
1030
1031
1032/**
1033 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
1034 *
1035 * @returns Family.
1036 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
1037 */
1038DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
1039{
1040 return ((uEAX >> 8) & 0xf) == 0xf
1041 ? ((uEAX >> 20) & 0x7f) + 0xf
1042 : ((uEAX >> 8) & 0xf);
1043}
1044
1045
1046/**
1047 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1048 *
1049 * @returns Model.
1050 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1051 * @param fIntel Whether it's an intel CPU.
1052 */
1053DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1054{
1055 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1056 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1057 : ((uEAX >> 4) & 0xf);
1058}
1059
1060
1061/**
1062 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1063 *
1064 * @returns Model.
1065 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1066 * @param fIntel Whether it's an intel CPU.
1067 */
1068DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1069{
1070 return ((uEAX >> 8) & 0xf) == 0xf
1071 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1072 : ((uEAX >> 4) & 0xf);
1073}
1074
1075
1076/**
1077 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1078 *
1079 * @returns Model.
1080 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1081 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1082 */
1083DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1084{
1085 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1086 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1087 : ((uEAX >> 4) & 0xf);
1088}
1089
1090
1091/**
1092 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1093 *
1094 * @returns Model.
1095 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1096 */
1097DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1098{
1099 return uEAX & 0xf;
1100}
1101
1102
1103/**
1104 * Get cr0.
1105 * @returns cr0.
1106 */
1107#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1108DECLASM(RTCCUINTREG) ASMGetCR0(void);
1109#else
1110DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1111{
1112 RTCCUINTREG uCR0;
1113# if RT_INLINE_ASM_USES_INTRIN
1114 uCR0 = __readcr0();
1115
1116# elif RT_INLINE_ASM_GNU_STYLE
1117# ifdef RT_ARCH_AMD64
1118 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1119# else
1120 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1121# endif
1122# else
1123 __asm
1124 {
1125# ifdef RT_ARCH_AMD64
1126 mov rax, cr0
1127 mov [uCR0], rax
1128# else
1129 mov eax, cr0
1130 mov [uCR0], eax
1131# endif
1132 }
1133# endif
1134 return uCR0;
1135}
1136#endif
1137
1138
1139/**
1140 * Sets the CR0 register.
1141 * @param uCR0 The new CR0 value.
1142 */
1143#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1144DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1145#else
1146DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1147{
1148# if RT_INLINE_ASM_USES_INTRIN
1149 __writecr0(uCR0);
1150
1151# elif RT_INLINE_ASM_GNU_STYLE
1152# ifdef RT_ARCH_AMD64
1153 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1154# else
1155 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1156# endif
1157# else
1158 __asm
1159 {
1160# ifdef RT_ARCH_AMD64
1161 mov rax, [uCR0]
1162 mov cr0, rax
1163# else
1164 mov eax, [uCR0]
1165 mov cr0, eax
1166# endif
1167 }
1168# endif
1169}
1170#endif
1171
1172
1173/**
1174 * Get cr2.
1175 * @returns cr2.
1176 */
1177#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1178DECLASM(RTCCUINTREG) ASMGetCR2(void);
1179#else
1180DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1181{
1182 RTCCUINTREG uCR2;
1183# if RT_INLINE_ASM_USES_INTRIN
1184 uCR2 = __readcr2();
1185
1186# elif RT_INLINE_ASM_GNU_STYLE
1187# ifdef RT_ARCH_AMD64
1188 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1189# else
1190 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1191# endif
1192# else
1193 __asm
1194 {
1195# ifdef RT_ARCH_AMD64
1196 mov rax, cr2
1197 mov [uCR2], rax
1198# else
1199 mov eax, cr2
1200 mov [uCR2], eax
1201# endif
1202 }
1203# endif
1204 return uCR2;
1205}
1206#endif
1207
1208
1209/**
1210 * Sets the CR2 register.
1211 * @param uCR2 The new CR0 value.
1212 */
1213#if RT_INLINE_ASM_EXTERNAL
1214DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1215#else
1216DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1217{
1218# if RT_INLINE_ASM_GNU_STYLE
1219# ifdef RT_ARCH_AMD64
1220 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1221# else
1222 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1223# endif
1224# else
1225 __asm
1226 {
1227# ifdef RT_ARCH_AMD64
1228 mov rax, [uCR2]
1229 mov cr2, rax
1230# else
1231 mov eax, [uCR2]
1232 mov cr2, eax
1233# endif
1234 }
1235# endif
1236}
1237#endif
1238
1239
1240/**
1241 * Get cr3.
1242 * @returns cr3.
1243 */
1244#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1245DECLASM(RTCCUINTREG) ASMGetCR3(void);
1246#else
1247DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1248{
1249 RTCCUINTREG uCR3;
1250# if RT_INLINE_ASM_USES_INTRIN
1251 uCR3 = __readcr3();
1252
1253# elif RT_INLINE_ASM_GNU_STYLE
1254# ifdef RT_ARCH_AMD64
1255 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1256# else
1257 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1258# endif
1259# else
1260 __asm
1261 {
1262# ifdef RT_ARCH_AMD64
1263 mov rax, cr3
1264 mov [uCR3], rax
1265# else
1266 mov eax, cr3
1267 mov [uCR3], eax
1268# endif
1269 }
1270# endif
1271 return uCR3;
1272}
1273#endif
1274
1275
1276/**
1277 * Sets the CR3 register.
1278 *
1279 * @param uCR3 New CR3 value.
1280 */
1281#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1282DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1283#else
1284DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1285{
1286# if RT_INLINE_ASM_USES_INTRIN
1287 __writecr3(uCR3);
1288
1289# elif RT_INLINE_ASM_GNU_STYLE
1290# ifdef RT_ARCH_AMD64
1291 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1292# else
1293 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1294# endif
1295# else
1296 __asm
1297 {
1298# ifdef RT_ARCH_AMD64
1299 mov rax, [uCR3]
1300 mov cr3, rax
1301# else
1302 mov eax, [uCR3]
1303 mov cr3, eax
1304# endif
1305 }
1306# endif
1307}
1308#endif
1309
1310
1311/**
1312 * Reloads the CR3 register.
1313 */
1314#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1315DECLASM(void) ASMReloadCR3(void);
1316#else
1317DECLINLINE(void) ASMReloadCR3(void)
1318{
1319# if RT_INLINE_ASM_USES_INTRIN
1320 __writecr3(__readcr3());
1321
1322# elif RT_INLINE_ASM_GNU_STYLE
1323 RTCCUINTREG u;
1324# ifdef RT_ARCH_AMD64
1325 __asm__ __volatile__("movq %%cr3, %0\n\t"
1326 "movq %0, %%cr3\n\t"
1327 : "=r" (u));
1328# else
1329 __asm__ __volatile__("movl %%cr3, %0\n\t"
1330 "movl %0, %%cr3\n\t"
1331 : "=r" (u));
1332# endif
1333# else
1334 __asm
1335 {
1336# ifdef RT_ARCH_AMD64
1337 mov rax, cr3
1338 mov cr3, rax
1339# else
1340 mov eax, cr3
1341 mov cr3, eax
1342# endif
1343 }
1344# endif
1345}
1346#endif
1347
1348
1349/**
1350 * Get cr4.
1351 * @returns cr4.
1352 */
1353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1354DECLASM(RTCCUINTREG) ASMGetCR4(void);
1355#else
1356DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1357{
1358 RTCCUINTREG uCR4;
1359# if RT_INLINE_ASM_USES_INTRIN
1360 uCR4 = __readcr4();
1361
1362# elif RT_INLINE_ASM_GNU_STYLE
1363# ifdef RT_ARCH_AMD64
1364 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1365# else
1366 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1367# endif
1368# else
1369 __asm
1370 {
1371# ifdef RT_ARCH_AMD64
1372 mov rax, cr4
1373 mov [uCR4], rax
1374# else
1375 push eax /* just in case */
1376 /*mov eax, cr4*/
1377 _emit 0x0f
1378 _emit 0x20
1379 _emit 0xe0
1380 mov [uCR4], eax
1381 pop eax
1382# endif
1383 }
1384# endif
1385 return uCR4;
1386}
1387#endif
1388
1389
1390/**
1391 * Sets the CR4 register.
1392 *
1393 * @param uCR4 New CR4 value.
1394 */
1395#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1396DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1397#else
1398DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1399{
1400# if RT_INLINE_ASM_USES_INTRIN
1401 __writecr4(uCR4);
1402
1403# elif RT_INLINE_ASM_GNU_STYLE
1404# ifdef RT_ARCH_AMD64
1405 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1406# else
1407 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1408# endif
1409# else
1410 __asm
1411 {
1412# ifdef RT_ARCH_AMD64
1413 mov rax, [uCR4]
1414 mov cr4, rax
1415# else
1416 mov eax, [uCR4]
1417 _emit 0x0F
1418 _emit 0x22
1419 _emit 0xE0 /* mov cr4, eax */
1420# endif
1421 }
1422# endif
1423}
1424#endif
1425
1426
1427/**
1428 * Get cr8.
1429 * @returns cr8.
1430 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1431 */
1432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1433DECLASM(RTCCUINTREG) ASMGetCR8(void);
1434#else
1435DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1436{
1437# ifdef RT_ARCH_AMD64
1438 RTCCUINTREG uCR8;
1439# if RT_INLINE_ASM_USES_INTRIN
1440 uCR8 = __readcr8();
1441
1442# elif RT_INLINE_ASM_GNU_STYLE
1443 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1444# else
1445 __asm
1446 {
1447 mov rax, cr8
1448 mov [uCR8], rax
1449 }
1450# endif
1451 return uCR8;
1452# else /* !RT_ARCH_AMD64 */
1453 return 0;
1454# endif /* !RT_ARCH_AMD64 */
1455}
1456#endif
1457
1458
1459/**
1460 * Enables interrupts (EFLAGS.IF).
1461 */
1462#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1463DECLASM(void) ASMIntEnable(void);
1464#else
1465DECLINLINE(void) ASMIntEnable(void)
1466{
1467# if RT_INLINE_ASM_GNU_STYLE
1468 __asm("sti\n");
1469# elif RT_INLINE_ASM_USES_INTRIN
1470 _enable();
1471# else
1472 __asm sti
1473# endif
1474}
1475#endif
1476
1477
1478/**
1479 * Disables interrupts (!EFLAGS.IF).
1480 */
1481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1482DECLASM(void) ASMIntDisable(void);
1483#else
1484DECLINLINE(void) ASMIntDisable(void)
1485{
1486# if RT_INLINE_ASM_GNU_STYLE
1487 __asm("cli\n");
1488# elif RT_INLINE_ASM_USES_INTRIN
1489 _disable();
1490# else
1491 __asm cli
1492# endif
1493}
1494#endif
1495
1496
1497/**
1498 * Disables interrupts and returns previous xFLAGS.
1499 */
1500#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1501DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1502#else
1503DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1504{
1505 RTCCUINTREG xFlags;
1506# if RT_INLINE_ASM_GNU_STYLE
1507# ifdef RT_ARCH_AMD64
1508 __asm__ __volatile__("pushfq\n\t"
1509 "cli\n\t"
1510 "popq %0\n\t"
1511 : "=r" (xFlags));
1512# else
1513 __asm__ __volatile__("pushfl\n\t"
1514 "cli\n\t"
1515 "popl %0\n\t"
1516 : "=r" (xFlags));
1517# endif
1518# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1519 xFlags = ASMGetFlags();
1520 _disable();
1521# else
1522 __asm {
1523 pushfd
1524 cli
1525 pop [xFlags]
1526 }
1527# endif
1528 return xFlags;
1529}
1530#endif
1531
1532
1533/**
1534 * Are interrupts enabled?
1535 *
1536 * @returns true / false.
1537 */
1538DECLINLINE(RTCCUINTREG) ASMIntAreEnabled(void)
1539{
1540 RTCCUINTREG uFlags = ASMGetFlags();
1541 return uFlags & 0x200 /* X86_EFL_IF */ ? true : false;
1542}
1543
1544
1545/**
1546 * Halts the CPU until interrupted.
1547 */
1548#if RT_INLINE_ASM_EXTERNAL
1549DECLASM(void) ASMHalt(void);
1550#else
1551DECLINLINE(void) ASMHalt(void)
1552{
1553# if RT_INLINE_ASM_GNU_STYLE
1554 __asm__ __volatile__("hlt\n\t");
1555# else
1556 __asm {
1557 hlt
1558 }
1559# endif
1560}
1561#endif
1562
1563
1564/**
1565 * The PAUSE variant of NOP for helping hyperthreaded CPUs detecing spin locks.
1566 */
1567#if RT_INLINE_ASM_EXTERNAL
1568DECLASM(void) ASMNopPause(void);
1569#else
1570DECLINLINE(void) ASMNopPause(void)
1571{
1572# if RT_INLINE_ASM_GNU_STYLE
1573 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
1574# else
1575 __asm {
1576 _emit 0f3h
1577 _emit 090h
1578 }
1579# endif
1580}
1581#endif
1582
1583
1584/**
1585 * Reads a machine specific register.
1586 *
1587 * @returns Register content.
1588 * @param uRegister Register to read.
1589 */
1590#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1591DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1592#else
1593DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1594{
1595 RTUINT64U u;
1596# if RT_INLINE_ASM_GNU_STYLE
1597 __asm__ __volatile__("rdmsr\n\t"
1598 : "=a" (u.s.Lo),
1599 "=d" (u.s.Hi)
1600 : "c" (uRegister));
1601
1602# elif RT_INLINE_ASM_USES_INTRIN
1603 u.u = __readmsr(uRegister);
1604
1605# else
1606 __asm
1607 {
1608 mov ecx, [uRegister]
1609 rdmsr
1610 mov [u.s.Lo], eax
1611 mov [u.s.Hi], edx
1612 }
1613# endif
1614
1615 return u.u;
1616}
1617#endif
1618
1619
1620/**
1621 * Writes a machine specific register.
1622 *
1623 * @returns Register content.
1624 * @param uRegister Register to write to.
1625 * @param u64Val Value to write.
1626 */
1627#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1628DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1629#else
1630DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1631{
1632 RTUINT64U u;
1633
1634 u.u = u64Val;
1635# if RT_INLINE_ASM_GNU_STYLE
1636 __asm__ __volatile__("wrmsr\n\t"
1637 ::"a" (u.s.Lo),
1638 "d" (u.s.Hi),
1639 "c" (uRegister));
1640
1641# elif RT_INLINE_ASM_USES_INTRIN
1642 __writemsr(uRegister, u.u);
1643
1644# else
1645 __asm
1646 {
1647 mov ecx, [uRegister]
1648 mov edx, [u.s.Hi]
1649 mov eax, [u.s.Lo]
1650 wrmsr
1651 }
1652# endif
1653}
1654#endif
1655
1656
1657/**
1658 * Reads low part of a machine specific register.
1659 *
1660 * @returns Register content.
1661 * @param uRegister Register to read.
1662 */
1663#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1664DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1665#else
1666DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1667{
1668 uint32_t u32;
1669# if RT_INLINE_ASM_GNU_STYLE
1670 __asm__ __volatile__("rdmsr\n\t"
1671 : "=a" (u32)
1672 : "c" (uRegister)
1673 : "edx");
1674
1675# elif RT_INLINE_ASM_USES_INTRIN
1676 u32 = (uint32_t)__readmsr(uRegister);
1677
1678#else
1679 __asm
1680 {
1681 mov ecx, [uRegister]
1682 rdmsr
1683 mov [u32], eax
1684 }
1685# endif
1686
1687 return u32;
1688}
1689#endif
1690
1691
1692/**
1693 * Reads high part of a machine specific register.
1694 *
1695 * @returns Register content.
1696 * @param uRegister Register to read.
1697 */
1698#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1699DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1700#else
1701DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1702{
1703 uint32_t u32;
1704# if RT_INLINE_ASM_GNU_STYLE
1705 __asm__ __volatile__("rdmsr\n\t"
1706 : "=d" (u32)
1707 : "c" (uRegister)
1708 : "eax");
1709
1710# elif RT_INLINE_ASM_USES_INTRIN
1711 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1712
1713# else
1714 __asm
1715 {
1716 mov ecx, [uRegister]
1717 rdmsr
1718 mov [u32], edx
1719 }
1720# endif
1721
1722 return u32;
1723}
1724#endif
1725
1726
1727/**
1728 * Gets dr0.
1729 *
1730 * @returns dr0.
1731 */
1732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1733DECLASM(RTCCUINTREG) ASMGetDR0(void);
1734#else
1735DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1736{
1737 RTCCUINTREG uDR0;
1738# if RT_INLINE_ASM_USES_INTRIN
1739 uDR0 = __readdr(0);
1740# elif RT_INLINE_ASM_GNU_STYLE
1741# ifdef RT_ARCH_AMD64
1742 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1743# else
1744 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1745# endif
1746# else
1747 __asm
1748 {
1749# ifdef RT_ARCH_AMD64
1750 mov rax, dr0
1751 mov [uDR0], rax
1752# else
1753 mov eax, dr0
1754 mov [uDR0], eax
1755# endif
1756 }
1757# endif
1758 return uDR0;
1759}
1760#endif
1761
1762
1763/**
1764 * Gets dr1.
1765 *
1766 * @returns dr1.
1767 */
1768#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1769DECLASM(RTCCUINTREG) ASMGetDR1(void);
1770#else
1771DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1772{
1773 RTCCUINTREG uDR1;
1774# if RT_INLINE_ASM_USES_INTRIN
1775 uDR1 = __readdr(1);
1776# elif RT_INLINE_ASM_GNU_STYLE
1777# ifdef RT_ARCH_AMD64
1778 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1779# else
1780 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1781# endif
1782# else
1783 __asm
1784 {
1785# ifdef RT_ARCH_AMD64
1786 mov rax, dr1
1787 mov [uDR1], rax
1788# else
1789 mov eax, dr1
1790 mov [uDR1], eax
1791# endif
1792 }
1793# endif
1794 return uDR1;
1795}
1796#endif
1797
1798
1799/**
1800 * Gets dr2.
1801 *
1802 * @returns dr2.
1803 */
1804#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1805DECLASM(RTCCUINTREG) ASMGetDR2(void);
1806#else
1807DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1808{
1809 RTCCUINTREG uDR2;
1810# if RT_INLINE_ASM_USES_INTRIN
1811 uDR2 = __readdr(2);
1812# elif RT_INLINE_ASM_GNU_STYLE
1813# ifdef RT_ARCH_AMD64
1814 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1815# else
1816 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1817# endif
1818# else
1819 __asm
1820 {
1821# ifdef RT_ARCH_AMD64
1822 mov rax, dr2
1823 mov [uDR2], rax
1824# else
1825 mov eax, dr2
1826 mov [uDR2], eax
1827# endif
1828 }
1829# endif
1830 return uDR2;
1831}
1832#endif
1833
1834
1835/**
1836 * Gets dr3.
1837 *
1838 * @returns dr3.
1839 */
1840#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1841DECLASM(RTCCUINTREG) ASMGetDR3(void);
1842#else
1843DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1844{
1845 RTCCUINTREG uDR3;
1846# if RT_INLINE_ASM_USES_INTRIN
1847 uDR3 = __readdr(3);
1848# elif RT_INLINE_ASM_GNU_STYLE
1849# ifdef RT_ARCH_AMD64
1850 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1851# else
1852 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1853# endif
1854# else
1855 __asm
1856 {
1857# ifdef RT_ARCH_AMD64
1858 mov rax, dr3
1859 mov [uDR3], rax
1860# else
1861 mov eax, dr3
1862 mov [uDR3], eax
1863# endif
1864 }
1865# endif
1866 return uDR3;
1867}
1868#endif
1869
1870
1871/**
1872 * Gets dr6.
1873 *
1874 * @returns dr6.
1875 */
1876#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1877DECLASM(RTCCUINTREG) ASMGetDR6(void);
1878#else
1879DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1880{
1881 RTCCUINTREG uDR6;
1882# if RT_INLINE_ASM_USES_INTRIN
1883 uDR6 = __readdr(6);
1884# elif RT_INLINE_ASM_GNU_STYLE
1885# ifdef RT_ARCH_AMD64
1886 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1887# else
1888 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1889# endif
1890# else
1891 __asm
1892 {
1893# ifdef RT_ARCH_AMD64
1894 mov rax, dr6
1895 mov [uDR6], rax
1896# else
1897 mov eax, dr6
1898 mov [uDR6], eax
1899# endif
1900 }
1901# endif
1902 return uDR6;
1903}
1904#endif
1905
1906
1907/**
1908 * Reads and clears DR6.
1909 *
1910 * @returns DR6.
1911 */
1912#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1913DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1914#else
1915DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1916{
1917 RTCCUINTREG uDR6;
1918# if RT_INLINE_ASM_USES_INTRIN
1919 uDR6 = __readdr(6);
1920 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1921# elif RT_INLINE_ASM_GNU_STYLE
1922 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1923# ifdef RT_ARCH_AMD64
1924 __asm__ __volatile__("movq %%dr6, %0\n\t"
1925 "movq %1, %%dr6\n\t"
1926 : "=r" (uDR6)
1927 : "r" (uNewValue));
1928# else
1929 __asm__ __volatile__("movl %%dr6, %0\n\t"
1930 "movl %1, %%dr6\n\t"
1931 : "=r" (uDR6)
1932 : "r" (uNewValue));
1933# endif
1934# else
1935 __asm
1936 {
1937# ifdef RT_ARCH_AMD64
1938 mov rax, dr6
1939 mov [uDR6], rax
1940 mov rcx, rax
1941 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1942 mov dr6, rcx
1943# else
1944 mov eax, dr6
1945 mov [uDR6], eax
1946 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1947 mov dr6, ecx
1948# endif
1949 }
1950# endif
1951 return uDR6;
1952}
1953#endif
1954
1955
1956/**
1957 * Gets dr7.
1958 *
1959 * @returns dr7.
1960 */
1961#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1962DECLASM(RTCCUINTREG) ASMGetDR7(void);
1963#else
1964DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1965{
1966 RTCCUINTREG uDR7;
1967# if RT_INLINE_ASM_USES_INTRIN
1968 uDR7 = __readdr(7);
1969# elif RT_INLINE_ASM_GNU_STYLE
1970# ifdef RT_ARCH_AMD64
1971 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1972# else
1973 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1974# endif
1975# else
1976 __asm
1977 {
1978# ifdef RT_ARCH_AMD64
1979 mov rax, dr7
1980 mov [uDR7], rax
1981# else
1982 mov eax, dr7
1983 mov [uDR7], eax
1984# endif
1985 }
1986# endif
1987 return uDR7;
1988}
1989#endif
1990
1991
1992/**
1993 * Sets dr0.
1994 *
1995 * @param uDRVal Debug register value to write
1996 */
1997#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1998DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1999#else
2000DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
2001{
2002# if RT_INLINE_ASM_USES_INTRIN
2003 __writedr(0, uDRVal);
2004# elif RT_INLINE_ASM_GNU_STYLE
2005# ifdef RT_ARCH_AMD64
2006 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
2007# else
2008 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
2009# endif
2010# else
2011 __asm
2012 {
2013# ifdef RT_ARCH_AMD64
2014 mov rax, [uDRVal]
2015 mov dr0, rax
2016# else
2017 mov eax, [uDRVal]
2018 mov dr0, eax
2019# endif
2020 }
2021# endif
2022}
2023#endif
2024
2025
2026/**
2027 * Sets dr1.
2028 *
2029 * @param uDRVal Debug register value to write
2030 */
2031#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2032DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
2033#else
2034DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
2035{
2036# if RT_INLINE_ASM_USES_INTRIN
2037 __writedr(1, uDRVal);
2038# elif RT_INLINE_ASM_GNU_STYLE
2039# ifdef RT_ARCH_AMD64
2040 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
2041# else
2042 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
2043# endif
2044# else
2045 __asm
2046 {
2047# ifdef RT_ARCH_AMD64
2048 mov rax, [uDRVal]
2049 mov dr1, rax
2050# else
2051 mov eax, [uDRVal]
2052 mov dr1, eax
2053# endif
2054 }
2055# endif
2056}
2057#endif
2058
2059
2060/**
2061 * Sets dr2.
2062 *
2063 * @param uDRVal Debug register value to write
2064 */
2065#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2066DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
2067#else
2068DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
2069{
2070# if RT_INLINE_ASM_USES_INTRIN
2071 __writedr(2, uDRVal);
2072# elif RT_INLINE_ASM_GNU_STYLE
2073# ifdef RT_ARCH_AMD64
2074 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
2075# else
2076 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2077# endif
2078# else
2079 __asm
2080 {
2081# ifdef RT_ARCH_AMD64
2082 mov rax, [uDRVal]
2083 mov dr2, rax
2084# else
2085 mov eax, [uDRVal]
2086 mov dr2, eax
2087# endif
2088 }
2089# endif
2090}
2091#endif
2092
2093
2094/**
2095 * Sets dr3.
2096 *
2097 * @param uDRVal Debug register value to write
2098 */
2099#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2100DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2101#else
2102DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2103{
2104# if RT_INLINE_ASM_USES_INTRIN
2105 __writedr(3, uDRVal);
2106# elif RT_INLINE_ASM_GNU_STYLE
2107# ifdef RT_ARCH_AMD64
2108 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2109# else
2110 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2111# endif
2112# else
2113 __asm
2114 {
2115# ifdef RT_ARCH_AMD64
2116 mov rax, [uDRVal]
2117 mov dr3, rax
2118# else
2119 mov eax, [uDRVal]
2120 mov dr3, eax
2121# endif
2122 }
2123# endif
2124}
2125#endif
2126
2127
2128/**
2129 * Sets dr6.
2130 *
2131 * @param uDRVal Debug register value to write
2132 */
2133#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2134DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2135#else
2136DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2137{
2138# if RT_INLINE_ASM_USES_INTRIN
2139 __writedr(6, uDRVal);
2140# elif RT_INLINE_ASM_GNU_STYLE
2141# ifdef RT_ARCH_AMD64
2142 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2143# else
2144 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2145# endif
2146# else
2147 __asm
2148 {
2149# ifdef RT_ARCH_AMD64
2150 mov rax, [uDRVal]
2151 mov dr6, rax
2152# else
2153 mov eax, [uDRVal]
2154 mov dr6, eax
2155# endif
2156 }
2157# endif
2158}
2159#endif
2160
2161
2162/**
2163 * Sets dr7.
2164 *
2165 * @param uDRVal Debug register value to write
2166 */
2167#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2168DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2169#else
2170DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2171{
2172# if RT_INLINE_ASM_USES_INTRIN
2173 __writedr(7, uDRVal);
2174# elif RT_INLINE_ASM_GNU_STYLE
2175# ifdef RT_ARCH_AMD64
2176 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2177# else
2178 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2179# endif
2180# else
2181 __asm
2182 {
2183# ifdef RT_ARCH_AMD64
2184 mov rax, [uDRVal]
2185 mov dr7, rax
2186# else
2187 mov eax, [uDRVal]
2188 mov dr7, eax
2189# endif
2190 }
2191# endif
2192}
2193#endif
2194
2195
2196/**
2197 * Compiler memory barrier.
2198 *
2199 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2200 * values or any outstanding writes when returning from this function.
2201 *
2202 * This function must be used if non-volatile data is modified by a
2203 * device or the VMM. Typical cases are port access, MMIO access,
2204 * trapping instruction, etc.
2205 */
2206#if RT_INLINE_ASM_GNU_STYLE
2207# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2208#elif RT_INLINE_ASM_USES_INTRIN
2209# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2210#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2211DECLINLINE(void) ASMCompilerBarrier(void)
2212{
2213 __asm
2214 {
2215 }
2216}
2217#endif
2218
2219
2220/**
2221 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2222 *
2223 * @param Port I/O port to write to.
2224 * @param u8 8-bit integer to write.
2225 */
2226#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2227DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2228#else
2229DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2230{
2231# if RT_INLINE_ASM_GNU_STYLE
2232 __asm__ __volatile__("outb %b1, %w0\n\t"
2233 :: "Nd" (Port),
2234 "a" (u8));
2235
2236# elif RT_INLINE_ASM_USES_INTRIN
2237 __outbyte(Port, u8);
2238
2239# else
2240 __asm
2241 {
2242 mov dx, [Port]
2243 mov al, [u8]
2244 out dx, al
2245 }
2246# endif
2247}
2248#endif
2249
2250
2251/**
2252 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2253 *
2254 * @returns 8-bit integer.
2255 * @param Port I/O port to read from.
2256 */
2257#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2258DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2259#else
2260DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2261{
2262 uint8_t u8;
2263# if RT_INLINE_ASM_GNU_STYLE
2264 __asm__ __volatile__("inb %w1, %b0\n\t"
2265 : "=a" (u8)
2266 : "Nd" (Port));
2267
2268# elif RT_INLINE_ASM_USES_INTRIN
2269 u8 = __inbyte(Port);
2270
2271# else
2272 __asm
2273 {
2274 mov dx, [Port]
2275 in al, dx
2276 mov [u8], al
2277 }
2278# endif
2279 return u8;
2280}
2281#endif
2282
2283
2284/**
2285 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2286 *
2287 * @param Port I/O port to write to.
2288 * @param u16 16-bit integer to write.
2289 */
2290#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2291DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2292#else
2293DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2294{
2295# if RT_INLINE_ASM_GNU_STYLE
2296 __asm__ __volatile__("outw %w1, %w0\n\t"
2297 :: "Nd" (Port),
2298 "a" (u16));
2299
2300# elif RT_INLINE_ASM_USES_INTRIN
2301 __outword(Port, u16);
2302
2303# else
2304 __asm
2305 {
2306 mov dx, [Port]
2307 mov ax, [u16]
2308 out dx, ax
2309 }
2310# endif
2311}
2312#endif
2313
2314
2315/**
2316 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2317 *
2318 * @returns 16-bit integer.
2319 * @param Port I/O port to read from.
2320 */
2321#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2322DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2323#else
2324DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2325{
2326 uint16_t u16;
2327# if RT_INLINE_ASM_GNU_STYLE
2328 __asm__ __volatile__("inw %w1, %w0\n\t"
2329 : "=a" (u16)
2330 : "Nd" (Port));
2331
2332# elif RT_INLINE_ASM_USES_INTRIN
2333 u16 = __inword(Port);
2334
2335# else
2336 __asm
2337 {
2338 mov dx, [Port]
2339 in ax, dx
2340 mov [u16], ax
2341 }
2342# endif
2343 return u16;
2344}
2345#endif
2346
2347
2348/**
2349 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2350 *
2351 * @param Port I/O port to write to.
2352 * @param u32 32-bit integer to write.
2353 */
2354#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2355DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2356#else
2357DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2358{
2359# if RT_INLINE_ASM_GNU_STYLE
2360 __asm__ __volatile__("outl %1, %w0\n\t"
2361 :: "Nd" (Port),
2362 "a" (u32));
2363
2364# elif RT_INLINE_ASM_USES_INTRIN
2365 __outdword(Port, u32);
2366
2367# else
2368 __asm
2369 {
2370 mov dx, [Port]
2371 mov eax, [u32]
2372 out dx, eax
2373 }
2374# endif
2375}
2376#endif
2377
2378
2379/**
2380 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2381 *
2382 * @returns 32-bit integer.
2383 * @param Port I/O port to read from.
2384 */
2385#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2386DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2387#else
2388DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2389{
2390 uint32_t u32;
2391# if RT_INLINE_ASM_GNU_STYLE
2392 __asm__ __volatile__("inl %w1, %0\n\t"
2393 : "=a" (u32)
2394 : "Nd" (Port));
2395
2396# elif RT_INLINE_ASM_USES_INTRIN
2397 u32 = __indword(Port);
2398
2399# else
2400 __asm
2401 {
2402 mov dx, [Port]
2403 in eax, dx
2404 mov [u32], eax
2405 }
2406# endif
2407 return u32;
2408}
2409#endif
2410
2411
2412/**
2413 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2414 *
2415 * @param Port I/O port to write to.
2416 * @param pau8 Pointer to the string buffer.
2417 * @param c The number of items to write.
2418 */
2419#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2420DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2421#else
2422DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2423{
2424# if RT_INLINE_ASM_GNU_STYLE
2425 __asm__ __volatile__("rep; outsb\n\t"
2426 : "+S" (pau8),
2427 "+c" (c)
2428 : "d" (Port));
2429
2430# elif RT_INLINE_ASM_USES_INTRIN
2431 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2432
2433# else
2434 __asm
2435 {
2436 mov dx, [Port]
2437 mov ecx, [c]
2438 mov eax, [pau8]
2439 xchg esi, eax
2440 rep outsb
2441 xchg esi, eax
2442 }
2443# endif
2444}
2445#endif
2446
2447
2448/**
2449 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2450 *
2451 * @param Port I/O port to read from.
2452 * @param pau8 Pointer to the string buffer (output).
2453 * @param c The number of items to read.
2454 */
2455#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2456DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2457#else
2458DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2459{
2460# if RT_INLINE_ASM_GNU_STYLE
2461 __asm__ __volatile__("rep; insb\n\t"
2462 : "+D" (pau8),
2463 "+c" (c)
2464 : "d" (Port));
2465
2466# elif RT_INLINE_ASM_USES_INTRIN
2467 __inbytestring(Port, pau8, (unsigned long)c);
2468
2469# else
2470 __asm
2471 {
2472 mov dx, [Port]
2473 mov ecx, [c]
2474 mov eax, [pau8]
2475 xchg edi, eax
2476 rep insb
2477 xchg edi, eax
2478 }
2479# endif
2480}
2481#endif
2482
2483
2484/**
2485 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2486 *
2487 * @param Port I/O port to write to.
2488 * @param pau16 Pointer to the string buffer.
2489 * @param c The number of items to write.
2490 */
2491#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2492DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2493#else
2494DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2495{
2496# if RT_INLINE_ASM_GNU_STYLE
2497 __asm__ __volatile__("rep; outsw\n\t"
2498 : "+S" (pau16),
2499 "+c" (c)
2500 : "d" (Port));
2501
2502# elif RT_INLINE_ASM_USES_INTRIN
2503 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2504
2505# else
2506 __asm
2507 {
2508 mov dx, [Port]
2509 mov ecx, [c]
2510 mov eax, [pau16]
2511 xchg esi, eax
2512 rep outsw
2513 xchg esi, eax
2514 }
2515# endif
2516}
2517#endif
2518
2519
2520/**
2521 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2522 *
2523 * @param Port I/O port to read from.
2524 * @param pau16 Pointer to the string buffer (output).
2525 * @param c The number of items to read.
2526 */
2527#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2528DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2529#else
2530DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2531{
2532# if RT_INLINE_ASM_GNU_STYLE
2533 __asm__ __volatile__("rep; insw\n\t"
2534 : "+D" (pau16),
2535 "+c" (c)
2536 : "d" (Port));
2537
2538# elif RT_INLINE_ASM_USES_INTRIN
2539 __inwordstring(Port, pau16, (unsigned long)c);
2540
2541# else
2542 __asm
2543 {
2544 mov dx, [Port]
2545 mov ecx, [c]
2546 mov eax, [pau16]
2547 xchg edi, eax
2548 rep insw
2549 xchg edi, eax
2550 }
2551# endif
2552}
2553#endif
2554
2555
2556/**
2557 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2558 *
2559 * @param Port I/O port to write to.
2560 * @param pau32 Pointer to the string buffer.
2561 * @param c The number of items to write.
2562 */
2563#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2564DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2565#else
2566DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2567{
2568# if RT_INLINE_ASM_GNU_STYLE
2569 __asm__ __volatile__("rep; outsl\n\t"
2570 : "+S" (pau32),
2571 "+c" (c)
2572 : "d" (Port));
2573
2574# elif RT_INLINE_ASM_USES_INTRIN
2575 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2576
2577# else
2578 __asm
2579 {
2580 mov dx, [Port]
2581 mov ecx, [c]
2582 mov eax, [pau32]
2583 xchg esi, eax
2584 rep outsd
2585 xchg esi, eax
2586 }
2587# endif
2588}
2589#endif
2590
2591
2592/**
2593 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2594 *
2595 * @param Port I/O port to read from.
2596 * @param pau32 Pointer to the string buffer (output).
2597 * @param c The number of items to read.
2598 */
2599#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2600DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2601#else
2602DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2603{
2604# if RT_INLINE_ASM_GNU_STYLE
2605 __asm__ __volatile__("rep; insl\n\t"
2606 : "+D" (pau32),
2607 "+c" (c)
2608 : "d" (Port));
2609
2610# elif RT_INLINE_ASM_USES_INTRIN
2611 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2612
2613# else
2614 __asm
2615 {
2616 mov dx, [Port]
2617 mov ecx, [c]
2618 mov eax, [pau32]
2619 xchg edi, eax
2620 rep insd
2621 xchg edi, eax
2622 }
2623# endif
2624}
2625#endif
2626
2627
2628/**
2629 * Atomically Exchange an unsigned 8-bit value, ordered.
2630 *
2631 * @returns Current *pu8 value
2632 * @param pu8 Pointer to the 8-bit variable to update.
2633 * @param u8 The 8-bit value to assign to *pu8.
2634 */
2635#if RT_INLINE_ASM_EXTERNAL
2636DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2637#else
2638DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2639{
2640# if RT_INLINE_ASM_GNU_STYLE
2641 __asm__ __volatile__("xchgb %0, %1\n\t"
2642 : "=m" (*pu8),
2643 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2644 : "1" (u8),
2645 "m" (*pu8));
2646# else
2647 __asm
2648 {
2649# ifdef RT_ARCH_AMD64
2650 mov rdx, [pu8]
2651 mov al, [u8]
2652 xchg [rdx], al
2653 mov [u8], al
2654# else
2655 mov edx, [pu8]
2656 mov al, [u8]
2657 xchg [edx], al
2658 mov [u8], al
2659# endif
2660 }
2661# endif
2662 return u8;
2663}
2664#endif
2665
2666
2667/**
2668 * Atomically Exchange a signed 8-bit value, ordered.
2669 *
2670 * @returns Current *pu8 value
2671 * @param pi8 Pointer to the 8-bit variable to update.
2672 * @param i8 The 8-bit value to assign to *pi8.
2673 */
2674DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2675{
2676 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2677}
2678
2679
2680/**
2681 * Atomically Exchange a bool value, ordered.
2682 *
2683 * @returns Current *pf value
2684 * @param pf Pointer to the 8-bit variable to update.
2685 * @param f The 8-bit value to assign to *pi8.
2686 */
2687DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2688{
2689#ifdef _MSC_VER
2690 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2691#else
2692 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2693#endif
2694}
2695
2696
2697/**
2698 * Atomically Exchange an unsigned 16-bit value, ordered.
2699 *
2700 * @returns Current *pu16 value
2701 * @param pu16 Pointer to the 16-bit variable to update.
2702 * @param u16 The 16-bit value to assign to *pu16.
2703 */
2704#if RT_INLINE_ASM_EXTERNAL
2705DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2706#else
2707DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2708{
2709# if RT_INLINE_ASM_GNU_STYLE
2710 __asm__ __volatile__("xchgw %0, %1\n\t"
2711 : "=m" (*pu16),
2712 "=r" (u16)
2713 : "1" (u16),
2714 "m" (*pu16));
2715# else
2716 __asm
2717 {
2718# ifdef RT_ARCH_AMD64
2719 mov rdx, [pu16]
2720 mov ax, [u16]
2721 xchg [rdx], ax
2722 mov [u16], ax
2723# else
2724 mov edx, [pu16]
2725 mov ax, [u16]
2726 xchg [edx], ax
2727 mov [u16], ax
2728# endif
2729 }
2730# endif
2731 return u16;
2732}
2733#endif
2734
2735
2736/**
2737 * Atomically Exchange a signed 16-bit value, ordered.
2738 *
2739 * @returns Current *pu16 value
2740 * @param pi16 Pointer to the 16-bit variable to update.
2741 * @param i16 The 16-bit value to assign to *pi16.
2742 */
2743DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2744{
2745 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2746}
2747
2748
2749/**
2750 * Atomically Exchange an unsigned 32-bit value, ordered.
2751 *
2752 * @returns Current *pu32 value
2753 * @param pu32 Pointer to the 32-bit variable to update.
2754 * @param u32 The 32-bit value to assign to *pu32.
2755 */
2756#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2757DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2758#else
2759DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2760{
2761# if RT_INLINE_ASM_GNU_STYLE
2762 __asm__ __volatile__("xchgl %0, %1\n\t"
2763 : "=m" (*pu32),
2764 "=r" (u32)
2765 : "1" (u32),
2766 "m" (*pu32));
2767
2768# elif RT_INLINE_ASM_USES_INTRIN
2769 u32 = _InterlockedExchange((long *)pu32, u32);
2770
2771# else
2772 __asm
2773 {
2774# ifdef RT_ARCH_AMD64
2775 mov rdx, [pu32]
2776 mov eax, u32
2777 xchg [rdx], eax
2778 mov [u32], eax
2779# else
2780 mov edx, [pu32]
2781 mov eax, u32
2782 xchg [edx], eax
2783 mov [u32], eax
2784# endif
2785 }
2786# endif
2787 return u32;
2788}
2789#endif
2790
2791
2792/**
2793 * Atomically Exchange a signed 32-bit value, ordered.
2794 *
2795 * @returns Current *pu32 value
2796 * @param pi32 Pointer to the 32-bit variable to update.
2797 * @param i32 The 32-bit value to assign to *pi32.
2798 */
2799DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2800{
2801 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2802}
2803
2804
2805/**
2806 * Atomically Exchange an unsigned 64-bit value, ordered.
2807 *
2808 * @returns Current *pu64 value
2809 * @param pu64 Pointer to the 64-bit variable to update.
2810 * @param u64 The 64-bit value to assign to *pu64.
2811 */
2812#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2813DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2814#else
2815DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2816{
2817# if defined(RT_ARCH_AMD64)
2818# if RT_INLINE_ASM_USES_INTRIN
2819 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2820
2821# elif RT_INLINE_ASM_GNU_STYLE
2822 __asm__ __volatile__("xchgq %0, %1\n\t"
2823 : "=m" (*pu64),
2824 "=r" (u64)
2825 : "1" (u64),
2826 "m" (*pu64));
2827# else
2828 __asm
2829 {
2830 mov rdx, [pu64]
2831 mov rax, [u64]
2832 xchg [rdx], rax
2833 mov [u64], rax
2834 }
2835# endif
2836# else /* !RT_ARCH_AMD64 */
2837# if RT_INLINE_ASM_GNU_STYLE
2838# if defined(PIC) || defined(__PIC__)
2839 uint32_t u32EBX = (uint32_t)u64;
2840 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2841 "xchgl %%ebx, %3\n\t"
2842 "1:\n\t"
2843 "lock; cmpxchg8b (%5)\n\t"
2844 "jnz 1b\n\t"
2845 "movl %3, %%ebx\n\t"
2846 /*"xchgl %%esi, %5\n\t"*/
2847 : "=A" (u64),
2848 "=m" (*pu64)
2849 : "0" (*pu64),
2850 "m" ( u32EBX ),
2851 "c" ( (uint32_t)(u64 >> 32) ),
2852 "S" (pu64));
2853# else /* !PIC */
2854 __asm__ __volatile__("1:\n\t"
2855 "lock; cmpxchg8b %1\n\t"
2856 "jnz 1b\n\t"
2857 : "=A" (u64),
2858 "=m" (*pu64)
2859 : "0" (*pu64),
2860 "b" ( (uint32_t)u64 ),
2861 "c" ( (uint32_t)(u64 >> 32) ));
2862# endif
2863# else
2864 __asm
2865 {
2866 mov ebx, dword ptr [u64]
2867 mov ecx, dword ptr [u64 + 4]
2868 mov edi, pu64
2869 mov eax, dword ptr [edi]
2870 mov edx, dword ptr [edi + 4]
2871 retry:
2872 lock cmpxchg8b [edi]
2873 jnz retry
2874 mov dword ptr [u64], eax
2875 mov dword ptr [u64 + 4], edx
2876 }
2877# endif
2878# endif /* !RT_ARCH_AMD64 */
2879 return u64;
2880}
2881#endif
2882
2883
2884/**
2885 * Atomically Exchange an signed 64-bit value, ordered.
2886 *
2887 * @returns Current *pi64 value
2888 * @param pi64 Pointer to the 64-bit variable to update.
2889 * @param i64 The 64-bit value to assign to *pi64.
2890 */
2891DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2892{
2893 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2894}
2895
2896
2897/**
2898 * Atomically Exchange a pointer value, ordered.
2899 *
2900 * @returns Current *ppv value
2901 * @param ppv Pointer to the pointer variable to update.
2902 * @param pv The pointer value to assign to *ppv.
2903 */
2904DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2905{
2906#if ARCH_BITS == 32
2907 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2908#elif ARCH_BITS == 64
2909 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2910#else
2911# error "ARCH_BITS is bogus"
2912#endif
2913}
2914
2915
2916/**
2917 * Atomically Exchange a raw-mode context pointer value, ordered.
2918 *
2919 * @returns Current *ppv value
2920 * @param ppvRC Pointer to the pointer variable to update.
2921 * @param pvRC The pointer value to assign to *ppv.
2922 */
2923DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2924{
2925 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2926}
2927
2928
2929/**
2930 * Atomically Exchange a ring-0 pointer value, ordered.
2931 *
2932 * @returns Current *ppv value
2933 * @param ppvR0 Pointer to the pointer variable to update.
2934 * @param pvR0 The pointer value to assign to *ppv.
2935 */
2936DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2937{
2938#if R0_ARCH_BITS == 32
2939 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2940#elif R0_ARCH_BITS == 64
2941 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2942#else
2943# error "R0_ARCH_BITS is bogus"
2944#endif
2945}
2946
2947
2948/**
2949 * Atomically Exchange a ring-3 pointer value, ordered.
2950 *
2951 * @returns Current *ppv value
2952 * @param ppvR3 Pointer to the pointer variable to update.
2953 * @param pvR3 The pointer value to assign to *ppv.
2954 */
2955DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2956{
2957#if R3_ARCH_BITS == 32
2958 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2959#elif R3_ARCH_BITS == 64
2960 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2961#else
2962# error "R3_ARCH_BITS is bogus"
2963#endif
2964}
2965
2966
2967/** @def ASMAtomicXchgHandle
2968 * Atomically Exchange a typical IPRT handle value, ordered.
2969 *
2970 * @param ph Pointer to the value to update.
2971 * @param hNew The new value to assigned to *pu.
2972 * @param phRes Where to store the current *ph value.
2973 *
2974 * @remarks This doesn't currently work for all handles (like RTFILE).
2975 */
2976#if HC_ARCH_BITS == 32
2977# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2978 do { \
2979 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2980 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2981 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2982 } while (0)
2983#elif HC_ARCH_BITS == 64
2984# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2985 do { \
2986 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2987 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2988 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2989 } while (0)
2990#else
2991# error HC_ARCH_BITS
2992#endif
2993
2994
2995/**
2996 * Atomically Exchange a value which size might differ
2997 * between platforms or compilers, ordered.
2998 *
2999 * @param pu Pointer to the variable to update.
3000 * @param uNew The value to assign to *pu.
3001 * @todo This is busted as its missing the result argument.
3002 */
3003#define ASMAtomicXchgSize(pu, uNew) \
3004 do { \
3005 switch (sizeof(*(pu))) { \
3006 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3007 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3008 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3009 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3010 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3011 } \
3012 } while (0)
3013
3014/**
3015 * Atomically Exchange a value which size might differ
3016 * between platforms or compilers, ordered.
3017 *
3018 * @param pu Pointer to the variable to update.
3019 * @param uNew The value to assign to *pu.
3020 * @param puRes Where to store the current *pu value.
3021 */
3022#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
3023 do { \
3024 switch (sizeof(*(pu))) { \
3025 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3026 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3027 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3028 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3029 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3030 } \
3031 } while (0)
3032
3033
3034
3035/**
3036 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
3037 *
3038 * @returns true if xchg was done.
3039 * @returns false if xchg wasn't done.
3040 *
3041 * @param pu8 Pointer to the value to update.
3042 * @param u8New The new value to assigned to *pu8.
3043 * @param u8Old The old value to *pu8 compare with.
3044 */
3045#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
3046DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
3047#else
3048DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
3049{
3050 uint8_t u8Ret;
3051 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
3052 "setz %1\n\t"
3053 : "=m" (*pu8),
3054 "=qm" (u8Ret),
3055 "=a" (u8Old)
3056 : "q" (u8New),
3057 "2" (u8Old),
3058 "m" (*pu8));
3059 return (bool)u8Ret;
3060}
3061#endif
3062
3063
3064/**
3065 * Atomically Compare and Exchange a signed 8-bit value, ordered.
3066 *
3067 * @returns true if xchg was done.
3068 * @returns false if xchg wasn't done.
3069 *
3070 * @param pi8 Pointer to the value to update.
3071 * @param i8New The new value to assigned to *pi8.
3072 * @param i8Old The old value to *pi8 compare with.
3073 */
3074DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
3075{
3076 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
3077}
3078
3079
3080/**
3081 * Atomically Compare and Exchange a bool value, ordered.
3082 *
3083 * @returns true if xchg was done.
3084 * @returns false if xchg wasn't done.
3085 *
3086 * @param pf Pointer to the value to update.
3087 * @param fNew The new value to assigned to *pf.
3088 * @param fOld The old value to *pf compare with.
3089 */
3090DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
3091{
3092 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
3093}
3094
3095
3096/**
3097 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3098 *
3099 * @returns true if xchg was done.
3100 * @returns false if xchg wasn't done.
3101 *
3102 * @param pu32 Pointer to the value to update.
3103 * @param u32New The new value to assigned to *pu32.
3104 * @param u32Old The old value to *pu32 compare with.
3105 */
3106#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3107DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3108#else
3109DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3110{
3111# if RT_INLINE_ASM_GNU_STYLE
3112 uint8_t u8Ret;
3113 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3114 "setz %1\n\t"
3115 : "=m" (*pu32),
3116 "=qm" (u8Ret),
3117 "=a" (u32Old)
3118 : "r" (u32New),
3119 "2" (u32Old),
3120 "m" (*pu32));
3121 return (bool)u8Ret;
3122
3123# elif RT_INLINE_ASM_USES_INTRIN
3124 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3125
3126# else
3127 uint32_t u32Ret;
3128 __asm
3129 {
3130# ifdef RT_ARCH_AMD64
3131 mov rdx, [pu32]
3132# else
3133 mov edx, [pu32]
3134# endif
3135 mov eax, [u32Old]
3136 mov ecx, [u32New]
3137# ifdef RT_ARCH_AMD64
3138 lock cmpxchg [rdx], ecx
3139# else
3140 lock cmpxchg [edx], ecx
3141# endif
3142 setz al
3143 movzx eax, al
3144 mov [u32Ret], eax
3145 }
3146 return !!u32Ret;
3147# endif
3148}
3149#endif
3150
3151
3152/**
3153 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3154 *
3155 * @returns true if xchg was done.
3156 * @returns false if xchg wasn't done.
3157 *
3158 * @param pi32 Pointer to the value to update.
3159 * @param i32New The new value to assigned to *pi32.
3160 * @param i32Old The old value to *pi32 compare with.
3161 */
3162DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3163{
3164 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3165}
3166
3167
3168/**
3169 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3170 *
3171 * @returns true if xchg was done.
3172 * @returns false if xchg wasn't done.
3173 *
3174 * @param pu64 Pointer to the 64-bit variable to update.
3175 * @param u64New The 64-bit value to assign to *pu64.
3176 * @param u64Old The value to compare with.
3177 */
3178#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3179 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
3180DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3181#else
3182DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3183{
3184# if RT_INLINE_ASM_USES_INTRIN
3185 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3186
3187# elif defined(RT_ARCH_AMD64)
3188# if RT_INLINE_ASM_GNU_STYLE
3189 uint8_t u8Ret;
3190 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3191 "setz %1\n\t"
3192 : "=m" (*pu64),
3193 "=qm" (u8Ret),
3194 "=a" (u64Old)
3195 : "r" (u64New),
3196 "2" (u64Old),
3197 "m" (*pu64));
3198 return (bool)u8Ret;
3199# else
3200 bool fRet;
3201 __asm
3202 {
3203 mov rdx, [pu32]
3204 mov rax, [u64Old]
3205 mov rcx, [u64New]
3206 lock cmpxchg [rdx], rcx
3207 setz al
3208 mov [fRet], al
3209 }
3210 return fRet;
3211# endif
3212# else /* !RT_ARCH_AMD64 */
3213 uint32_t u32Ret;
3214# if RT_INLINE_ASM_GNU_STYLE
3215# if defined(PIC) || defined(__PIC__)
3216 uint32_t u32EBX = (uint32_t)u64New;
3217 uint32_t u32Spill;
3218 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3219 "lock; cmpxchg8b (%6)\n\t"
3220 "setz %%al\n\t"
3221 "movl %4, %%ebx\n\t"
3222 "movzbl %%al, %%eax\n\t"
3223 : "=a" (u32Ret),
3224 "=d" (u32Spill),
3225# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3226 "+m" (*pu64)
3227# else
3228 "=m" (*pu64)
3229# endif
3230 : "A" (u64Old),
3231 "m" ( u32EBX ),
3232 "c" ( (uint32_t)(u64New >> 32) ),
3233 "S" (pu64));
3234# else /* !PIC */
3235 uint32_t u32Spill;
3236 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3237 "setz %%al\n\t"
3238 "movzbl %%al, %%eax\n\t"
3239 : "=a" (u32Ret),
3240 "=d" (u32Spill),
3241 "+m" (*pu64)
3242 : "A" (u64Old),
3243 "b" ( (uint32_t)u64New ),
3244 "c" ( (uint32_t)(u64New >> 32) ));
3245# endif
3246 return (bool)u32Ret;
3247# else
3248 __asm
3249 {
3250 mov ebx, dword ptr [u64New]
3251 mov ecx, dword ptr [u64New + 4]
3252 mov edi, [pu64]
3253 mov eax, dword ptr [u64Old]
3254 mov edx, dword ptr [u64Old + 4]
3255 lock cmpxchg8b [edi]
3256 setz al
3257 movzx eax, al
3258 mov dword ptr [u32Ret], eax
3259 }
3260 return !!u32Ret;
3261# endif
3262# endif /* !RT_ARCH_AMD64 */
3263}
3264#endif
3265
3266
3267/**
3268 * Atomically Compare and exchange a signed 64-bit value, ordered.
3269 *
3270 * @returns true if xchg was done.
3271 * @returns false if xchg wasn't done.
3272 *
3273 * @param pi64 Pointer to the 64-bit variable to update.
3274 * @param i64 The 64-bit value to assign to *pu64.
3275 * @param i64Old The value to compare with.
3276 */
3277DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3278{
3279 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3280}
3281
3282
3283/**
3284 * Atomically Compare and Exchange a pointer value, ordered.
3285 *
3286 * @returns true if xchg was done.
3287 * @returns false if xchg wasn't done.
3288 *
3289 * @param ppv Pointer to the value to update.
3290 * @param pvNew The new value to assigned to *ppv.
3291 * @param pvOld The old value to *ppv compare with.
3292 */
3293DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3294{
3295#if ARCH_BITS == 32
3296 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3297#elif ARCH_BITS == 64
3298 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3299#else
3300# error "ARCH_BITS is bogus"
3301#endif
3302}
3303
3304
3305/** @def ASMAtomicCmpXchgHandle
3306 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3307 *
3308 * @param ph Pointer to the value to update.
3309 * @param hNew The new value to assigned to *pu.
3310 * @param hOld The old value to *pu compare with.
3311 * @param fRc Where to store the result.
3312 *
3313 * @remarks This doesn't currently work for all handles (like RTFILE).
3314 */
3315#if HC_ARCH_BITS == 32
3316# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3317 do { \
3318 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3319 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
3320 } while (0)
3321#elif HC_ARCH_BITS == 64
3322# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3323 do { \
3324 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3325 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
3326 } while (0)
3327#else
3328# error HC_ARCH_BITS
3329#endif
3330
3331
3332/** @def ASMAtomicCmpXchgSize
3333 * Atomically Compare and Exchange a value which size might differ
3334 * between platforms or compilers, ordered.
3335 *
3336 * @param pu Pointer to the value to update.
3337 * @param uNew The new value to assigned to *pu.
3338 * @param uOld The old value to *pu compare with.
3339 * @param fRc Where to store the result.
3340 */
3341#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3342 do { \
3343 switch (sizeof(*(pu))) { \
3344 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3345 break; \
3346 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3347 break; \
3348 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3349 (fRc) = false; \
3350 break; \
3351 } \
3352 } while (0)
3353
3354
3355/**
3356 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3357 * passes back old value, ordered.
3358 *
3359 * @returns true if xchg was done.
3360 * @returns false if xchg wasn't done.
3361 *
3362 * @param pu32 Pointer to the value to update.
3363 * @param u32New The new value to assigned to *pu32.
3364 * @param u32Old The old value to *pu32 compare with.
3365 * @param pu32Old Pointer store the old value at.
3366 */
3367#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3368DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3369#else
3370DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3371{
3372# if RT_INLINE_ASM_GNU_STYLE
3373 uint8_t u8Ret;
3374 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3375 "setz %1\n\t"
3376 : "=m" (*pu32),
3377 "=qm" (u8Ret),
3378 "=a" (*pu32Old)
3379 : "r" (u32New),
3380 "a" (u32Old),
3381 "m" (*pu32));
3382 return (bool)u8Ret;
3383
3384# elif RT_INLINE_ASM_USES_INTRIN
3385 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3386
3387# else
3388 uint32_t u32Ret;
3389 __asm
3390 {
3391# ifdef RT_ARCH_AMD64
3392 mov rdx, [pu32]
3393# else
3394 mov edx, [pu32]
3395# endif
3396 mov eax, [u32Old]
3397 mov ecx, [u32New]
3398# ifdef RT_ARCH_AMD64
3399 lock cmpxchg [rdx], ecx
3400 mov rdx, [pu32Old]
3401 mov [rdx], eax
3402# else
3403 lock cmpxchg [edx], ecx
3404 mov edx, [pu32Old]
3405 mov [edx], eax
3406# endif
3407 setz al
3408 movzx eax, al
3409 mov [u32Ret], eax
3410 }
3411 return !!u32Ret;
3412# endif
3413}
3414#endif
3415
3416
3417/**
3418 * Atomically Compare and Exchange a signed 32-bit value, additionally
3419 * passes back old value, ordered.
3420 *
3421 * @returns true if xchg was done.
3422 * @returns false if xchg wasn't done.
3423 *
3424 * @param pi32 Pointer to the value to update.
3425 * @param i32New The new value to assigned to *pi32.
3426 * @param i32Old The old value to *pi32 compare with.
3427 * @param pi32Old Pointer store the old value at.
3428 */
3429DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3430{
3431 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3432}
3433
3434
3435/**
3436 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3437 * passing back old value, ordered.
3438 *
3439 * @returns true if xchg was done.
3440 * @returns false if xchg wasn't done.
3441 *
3442 * @param pu64 Pointer to the 64-bit variable to update.
3443 * @param u64New The 64-bit value to assign to *pu64.
3444 * @param u64Old The value to compare with.
3445 * @param pu64Old Pointer store the old value at.
3446 */
3447#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3448DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3449#else
3450DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3451{
3452# if RT_INLINE_ASM_USES_INTRIN
3453 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3454
3455# elif defined(RT_ARCH_AMD64)
3456# if RT_INLINE_ASM_GNU_STYLE
3457 uint8_t u8Ret;
3458 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3459 "setz %1\n\t"
3460 : "=m" (*pu64),
3461 "=qm" (u8Ret),
3462 "=a" (*pu64Old)
3463 : "r" (u64New),
3464 "a" (u64Old),
3465 "m" (*pu64));
3466 return (bool)u8Ret;
3467# else
3468 bool fRet;
3469 __asm
3470 {
3471 mov rdx, [pu32]
3472 mov rax, [u64Old]
3473 mov rcx, [u64New]
3474 lock cmpxchg [rdx], rcx
3475 mov rdx, [pu64Old]
3476 mov [rdx], rax
3477 setz al
3478 mov [fRet], al
3479 }
3480 return fRet;
3481# endif
3482# else /* !RT_ARCH_AMD64 */
3483# if RT_INLINE_ASM_GNU_STYLE
3484 uint64_t u64Ret;
3485# if defined(PIC) || defined(__PIC__)
3486 /* NB: this code uses a memory clobber description, because the clean
3487 * solution with an output value for *pu64 makes gcc run out of registers.
3488 * This will cause suboptimal code, and anyone with a better solution is
3489 * welcome to improve this. */
3490 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3491 "lock; cmpxchg8b %3\n\t"
3492 "xchgl %%ebx, %1\n\t"
3493 : "=A" (u64Ret)
3494 : "DS" ((uint32_t)u64New),
3495 "c" ((uint32_t)(u64New >> 32)),
3496 "m" (*pu64),
3497 "0" (u64Old)
3498 : "memory" );
3499# else /* !PIC */
3500 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3501 : "=A" (u64Ret),
3502 "=m" (*pu64)
3503 : "b" ((uint32_t)u64New),
3504 "c" ((uint32_t)(u64New >> 32)),
3505 "m" (*pu64),
3506 "0" (u64Old));
3507# endif
3508 *pu64Old = u64Ret;
3509 return u64Ret == u64Old;
3510# else
3511 uint32_t u32Ret;
3512 __asm
3513 {
3514 mov ebx, dword ptr [u64New]
3515 mov ecx, dword ptr [u64New + 4]
3516 mov edi, [pu64]
3517 mov eax, dword ptr [u64Old]
3518 mov edx, dword ptr [u64Old + 4]
3519 lock cmpxchg8b [edi]
3520 mov ebx, [pu64Old]
3521 mov [ebx], eax
3522 setz al
3523 movzx eax, al
3524 add ebx, 4
3525 mov [ebx], edx
3526 mov dword ptr [u32Ret], eax
3527 }
3528 return !!u32Ret;
3529# endif
3530# endif /* !RT_ARCH_AMD64 */
3531}
3532#endif
3533
3534
3535/**
3536 * Atomically Compare and exchange a signed 64-bit value, additionally
3537 * passing back old value, ordered.
3538 *
3539 * @returns true if xchg was done.
3540 * @returns false if xchg wasn't done.
3541 *
3542 * @param pi64 Pointer to the 64-bit variable to update.
3543 * @param i64 The 64-bit value to assign to *pu64.
3544 * @param i64Old The value to compare with.
3545 * @param pi64Old Pointer store the old value at.
3546 */
3547DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3548{
3549 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3550}
3551
3552/** @def ASMAtomicCmpXchgExHandle
3553 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3554 *
3555 * @param ph Pointer to the value to update.
3556 * @param hNew The new value to assigned to *pu.
3557 * @param hOld The old value to *pu compare with.
3558 * @param fRc Where to store the result.
3559 * @param phOldVal Pointer to where to store the old value.
3560 *
3561 * @remarks This doesn't currently work for all handles (like RTFILE).
3562 */
3563#if HC_ARCH_BITS == 32
3564# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3565 do { \
3566 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
3567 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
3568 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3569 } while (0)
3570#elif HC_ARCH_BITS == 64
3571# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3572 do { \
3573 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3574 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
3575 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3576 } while (0)
3577#else
3578# error HC_ARCH_BITS
3579#endif
3580
3581
3582/** @def ASMAtomicCmpXchgExSize
3583 * Atomically Compare and Exchange a value which size might differ
3584 * between platforms or compilers. Additionally passes back old value.
3585 *
3586 * @param pu Pointer to the value to update.
3587 * @param uNew The new value to assigned to *pu.
3588 * @param uOld The old value to *pu compare with.
3589 * @param fRc Where to store the result.
3590 * @param puOldVal Pointer to where to store the old value.
3591 */
3592#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3593 do { \
3594 switch (sizeof(*(pu))) { \
3595 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3596 break; \
3597 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3598 break; \
3599 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3600 (fRc) = false; \
3601 (uOldVal) = 0; \
3602 break; \
3603 } \
3604 } while (0)
3605
3606
3607/**
3608 * Atomically Compare and Exchange a pointer value, additionally
3609 * passing back old value, ordered.
3610 *
3611 * @returns true if xchg was done.
3612 * @returns false if xchg wasn't done.
3613 *
3614 * @param ppv Pointer to the value to update.
3615 * @param pvNew The new value to assigned to *ppv.
3616 * @param pvOld The old value to *ppv compare with.
3617 * @param ppvOld Pointer store the old value at.
3618 */
3619DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3620{
3621#if ARCH_BITS == 32
3622 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3623#elif ARCH_BITS == 64
3624 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3625#else
3626# error "ARCH_BITS is bogus"
3627#endif
3628}
3629
3630
3631/**
3632 * Atomically exchanges and adds to a 32-bit value, ordered.
3633 *
3634 * @returns The old value.
3635 * @param pu32 Pointer to the value.
3636 * @param u32 Number to add.
3637 */
3638#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3639DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3640#else
3641DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3642{
3643# if RT_INLINE_ASM_USES_INTRIN
3644 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3645 return u32;
3646
3647# elif RT_INLINE_ASM_GNU_STYLE
3648 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3649 : "=r" (u32),
3650 "=m" (*pu32)
3651 : "0" (u32),
3652 "m" (*pu32)
3653 : "memory");
3654 return u32;
3655# else
3656 __asm
3657 {
3658 mov eax, [u32]
3659# ifdef RT_ARCH_AMD64
3660 mov rdx, [pu32]
3661 lock xadd [rdx], eax
3662# else
3663 mov edx, [pu32]
3664 lock xadd [edx], eax
3665# endif
3666 mov [u32], eax
3667 }
3668 return u32;
3669# endif
3670}
3671#endif
3672
3673
3674/**
3675 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3676 *
3677 * @returns The old value.
3678 * @param pi32 Pointer to the value.
3679 * @param i32 Number to add.
3680 */
3681DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3682{
3683 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3684}
3685
3686
3687/**
3688 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3689 *
3690 * @returns The old value.
3691 * @param pu32 Pointer to the value.
3692 * @param u32 Number to subtract.
3693 */
3694DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3695{
3696 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3697}
3698
3699
3700/**
3701 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3702 *
3703 * @returns The old value.
3704 * @param pi32 Pointer to the value.
3705 * @param i32 Number to subtract.
3706 */
3707DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3708{
3709 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3710}
3711
3712
3713/**
3714 * Atomically increment a 32-bit value, ordered.
3715 *
3716 * @returns The new value.
3717 * @param pu32 Pointer to the value to increment.
3718 */
3719#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3720DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3721#else
3722DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3723{
3724 uint32_t u32;
3725# if RT_INLINE_ASM_USES_INTRIN
3726 u32 = _InterlockedIncrement((long *)pu32);
3727 return u32;
3728
3729# elif RT_INLINE_ASM_GNU_STYLE
3730 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3731 : "=r" (u32),
3732 "=m" (*pu32)
3733 : "0" (1),
3734 "m" (*pu32)
3735 : "memory");
3736 return u32+1;
3737# else
3738 __asm
3739 {
3740 mov eax, 1
3741# ifdef RT_ARCH_AMD64
3742 mov rdx, [pu32]
3743 lock xadd [rdx], eax
3744# else
3745 mov edx, [pu32]
3746 lock xadd [edx], eax
3747# endif
3748 mov u32, eax
3749 }
3750 return u32+1;
3751# endif
3752}
3753#endif
3754
3755
3756/**
3757 * Atomically increment a signed 32-bit value, ordered.
3758 *
3759 * @returns The new value.
3760 * @param pi32 Pointer to the value to increment.
3761 */
3762DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3763{
3764 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3765}
3766
3767
3768/**
3769 * Atomically decrement an unsigned 32-bit value, ordered.
3770 *
3771 * @returns The new value.
3772 * @param pu32 Pointer to the value to decrement.
3773 */
3774#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3775DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3776#else
3777DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3778{
3779 uint32_t u32;
3780# if RT_INLINE_ASM_USES_INTRIN
3781 u32 = _InterlockedDecrement((long *)pu32);
3782 return u32;
3783
3784# elif RT_INLINE_ASM_GNU_STYLE
3785 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3786 : "=r" (u32),
3787 "=m" (*pu32)
3788 : "0" (-1),
3789 "m" (*pu32)
3790 : "memory");
3791 return u32-1;
3792# else
3793 __asm
3794 {
3795 mov eax, -1
3796# ifdef RT_ARCH_AMD64
3797 mov rdx, [pu32]
3798 lock xadd [rdx], eax
3799# else
3800 mov edx, [pu32]
3801 lock xadd [edx], eax
3802# endif
3803 mov u32, eax
3804 }
3805 return u32-1;
3806# endif
3807}
3808#endif
3809
3810
3811/**
3812 * Atomically decrement a signed 32-bit value, ordered.
3813 *
3814 * @returns The new value.
3815 * @param pi32 Pointer to the value to decrement.
3816 */
3817DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3818{
3819 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3820}
3821
3822
3823/**
3824 * Atomically Or an unsigned 32-bit value, ordered.
3825 *
3826 * @param pu32 Pointer to the pointer variable to OR u32 with.
3827 * @param u32 The value to OR *pu32 with.
3828 */
3829#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3830DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3831#else
3832DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3833{
3834# if RT_INLINE_ASM_USES_INTRIN
3835 _InterlockedOr((long volatile *)pu32, (long)u32);
3836
3837# elif RT_INLINE_ASM_GNU_STYLE
3838 __asm__ __volatile__("lock; orl %1, %0\n\t"
3839 : "=m" (*pu32)
3840 : "ir" (u32),
3841 "m" (*pu32));
3842# else
3843 __asm
3844 {
3845 mov eax, [u32]
3846# ifdef RT_ARCH_AMD64
3847 mov rdx, [pu32]
3848 lock or [rdx], eax
3849# else
3850 mov edx, [pu32]
3851 lock or [edx], eax
3852# endif
3853 }
3854# endif
3855}
3856#endif
3857
3858
3859/**
3860 * Atomically Or a signed 32-bit value, ordered.
3861 *
3862 * @param pi32 Pointer to the pointer variable to OR u32 with.
3863 * @param i32 The value to OR *pu32 with.
3864 */
3865DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3866{
3867 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3868}
3869
3870
3871/**
3872 * Atomically And an unsigned 32-bit value, ordered.
3873 *
3874 * @param pu32 Pointer to the pointer variable to AND u32 with.
3875 * @param u32 The value to AND *pu32 with.
3876 */
3877#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3878DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3879#else
3880DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3881{
3882# if RT_INLINE_ASM_USES_INTRIN
3883 _InterlockedAnd((long volatile *)pu32, u32);
3884
3885# elif RT_INLINE_ASM_GNU_STYLE
3886 __asm__ __volatile__("lock; andl %1, %0\n\t"
3887 : "=m" (*pu32)
3888 : "ir" (u32),
3889 "m" (*pu32));
3890# else
3891 __asm
3892 {
3893 mov eax, [u32]
3894# ifdef RT_ARCH_AMD64
3895 mov rdx, [pu32]
3896 lock and [rdx], eax
3897# else
3898 mov edx, [pu32]
3899 lock and [edx], eax
3900# endif
3901 }
3902# endif
3903}
3904#endif
3905
3906
3907/**
3908 * Atomically And a signed 32-bit value, ordered.
3909 *
3910 * @param pi32 Pointer to the pointer variable to AND i32 with.
3911 * @param i32 The value to AND *pi32 with.
3912 */
3913DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3914{
3915 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3916}
3917
3918
3919/**
3920 * Serialize Instruction.
3921 */
3922#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3923DECLASM(void) ASMSerializeInstruction(void);
3924#else
3925DECLINLINE(void) ASMSerializeInstruction(void)
3926{
3927# if RT_INLINE_ASM_GNU_STYLE
3928 RTCCUINTREG xAX = 0;
3929# ifdef RT_ARCH_AMD64
3930 __asm__ ("cpuid"
3931 : "=a" (xAX)
3932 : "0" (xAX)
3933 : "rbx", "rcx", "rdx");
3934# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
3935 __asm__ ("push %%ebx\n\t"
3936 "cpuid\n\t"
3937 "pop %%ebx\n\t"
3938 : "=a" (xAX)
3939 : "0" (xAX)
3940 : "ecx", "edx");
3941# else
3942 __asm__ ("cpuid"
3943 : "=a" (xAX)
3944 : "0" (xAX)
3945 : "ebx", "ecx", "edx");
3946# endif
3947
3948# elif RT_INLINE_ASM_USES_INTRIN
3949 int aInfo[4];
3950 __cpuid(aInfo, 0);
3951
3952# else
3953 __asm
3954 {
3955 push ebx
3956 xor eax, eax
3957 cpuid
3958 pop ebx
3959 }
3960# endif
3961}
3962#endif
3963
3964
3965/**
3966 * Memory load/store fence, waits for any pending writes and reads to complete.
3967 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
3968 */
3969DECLINLINE(void) ASMMemoryFenceSSE2(void)
3970{
3971#if RT_INLINE_ASM_GNU_STYLE
3972 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
3973#elif RT_INLINE_ASM_USES_INTRIN
3974 _mm_mfence();
3975#else
3976 __asm
3977 {
3978 _emit 0x0f
3979 _emit 0xae
3980 _emit 0xf0
3981 }
3982#endif
3983}
3984
3985
3986/**
3987 * Memory store fence, waits for any writes to complete.
3988 * Requires the X86_CPUID_FEATURE_EDX_SSE CPUID bit set.
3989 */
3990DECLINLINE(void) ASMWriteFenceSSE(void)
3991{
3992#if RT_INLINE_ASM_GNU_STYLE
3993 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
3994#elif RT_INLINE_ASM_USES_INTRIN
3995 _mm_sfence();
3996#else
3997 __asm
3998 {
3999 _emit 0x0f
4000 _emit 0xae
4001 _emit 0xf8
4002 }
4003#endif
4004}
4005
4006
4007/**
4008 * Memory load fence, waits for any pending reads to complete.
4009 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
4010 */
4011DECLINLINE(void) ASMReadFenceSSE2(void)
4012{
4013#if RT_INLINE_ASM_GNU_STYLE
4014 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
4015#elif RT_INLINE_ASM_USES_INTRIN
4016 _mm_lfence();
4017#else
4018 __asm
4019 {
4020 _emit 0x0f
4021 _emit 0xae
4022 _emit 0xe8
4023 }
4024#endif
4025}
4026
4027
4028/**
4029 * Memory fence, waits for any pending writes and reads to complete.
4030 */
4031DECLINLINE(void) ASMMemoryFence(void)
4032{
4033 /** @todo use mfence? check if all cpus we care for support it. */
4034 uint32_t volatile u32;
4035 ASMAtomicXchgU32(&u32, 0);
4036}
4037
4038
4039/**
4040 * Write fence, waits for any pending writes to complete.
4041 */
4042DECLINLINE(void) ASMWriteFence(void)
4043{
4044 /** @todo use sfence? check if all cpus we care for support it. */
4045 ASMMemoryFence();
4046}
4047
4048
4049/**
4050 * Read fence, waits for any pending reads to complete.
4051 */
4052DECLINLINE(void) ASMReadFence(void)
4053{
4054 /** @todo use lfence? check if all cpus we care for support it. */
4055 ASMMemoryFence();
4056}
4057
4058
4059/**
4060 * Atomically reads an unsigned 8-bit value, ordered.
4061 *
4062 * @returns Current *pu8 value
4063 * @param pu8 Pointer to the 8-bit variable to read.
4064 */
4065DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
4066{
4067 ASMMemoryFence();
4068 return *pu8; /* byte reads are atomic on x86 */
4069}
4070
4071
4072/**
4073 * Atomically reads an unsigned 8-bit value, unordered.
4074 *
4075 * @returns Current *pu8 value
4076 * @param pu8 Pointer to the 8-bit variable to read.
4077 */
4078DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
4079{
4080 return *pu8; /* byte reads are atomic on x86 */
4081}
4082
4083
4084/**
4085 * Atomically reads a signed 8-bit value, ordered.
4086 *
4087 * @returns Current *pi8 value
4088 * @param pi8 Pointer to the 8-bit variable to read.
4089 */
4090DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
4091{
4092 ASMMemoryFence();
4093 return *pi8; /* byte reads are atomic on x86 */
4094}
4095
4096
4097/**
4098 * Atomically reads a signed 8-bit value, unordered.
4099 *
4100 * @returns Current *pi8 value
4101 * @param pi8 Pointer to the 8-bit variable to read.
4102 */
4103DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
4104{
4105 return *pi8; /* byte reads are atomic on x86 */
4106}
4107
4108
4109/**
4110 * Atomically reads an unsigned 16-bit value, ordered.
4111 *
4112 * @returns Current *pu16 value
4113 * @param pu16 Pointer to the 16-bit variable to read.
4114 */
4115DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
4116{
4117 ASMMemoryFence();
4118 Assert(!((uintptr_t)pu16 & 1));
4119 return *pu16;
4120}
4121
4122
4123/**
4124 * Atomically reads an unsigned 16-bit value, unordered.
4125 *
4126 * @returns Current *pu16 value
4127 * @param pu16 Pointer to the 16-bit variable to read.
4128 */
4129DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
4130{
4131 Assert(!((uintptr_t)pu16 & 1));
4132 return *pu16;
4133}
4134
4135
4136/**
4137 * Atomically reads a signed 16-bit value, ordered.
4138 *
4139 * @returns Current *pi16 value
4140 * @param pi16 Pointer to the 16-bit variable to read.
4141 */
4142DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
4143{
4144 ASMMemoryFence();
4145 Assert(!((uintptr_t)pi16 & 1));
4146 return *pi16;
4147}
4148
4149
4150/**
4151 * Atomically reads a signed 16-bit value, unordered.
4152 *
4153 * @returns Current *pi16 value
4154 * @param pi16 Pointer to the 16-bit variable to read.
4155 */
4156DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
4157{
4158 Assert(!((uintptr_t)pi16 & 1));
4159 return *pi16;
4160}
4161
4162
4163/**
4164 * Atomically reads an unsigned 32-bit value, ordered.
4165 *
4166 * @returns Current *pu32 value
4167 * @param pu32 Pointer to the 32-bit variable to read.
4168 */
4169DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
4170{
4171 ASMMemoryFence();
4172 Assert(!((uintptr_t)pu32 & 3));
4173 return *pu32;
4174}
4175
4176
4177/**
4178 * Atomically reads an unsigned 32-bit value, unordered.
4179 *
4180 * @returns Current *pu32 value
4181 * @param pu32 Pointer to the 32-bit variable to read.
4182 */
4183DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
4184{
4185 Assert(!((uintptr_t)pu32 & 3));
4186 return *pu32;
4187}
4188
4189
4190/**
4191 * Atomically reads a signed 32-bit value, ordered.
4192 *
4193 * @returns Current *pi32 value
4194 * @param pi32 Pointer to the 32-bit variable to read.
4195 */
4196DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
4197{
4198 ASMMemoryFence();
4199 Assert(!((uintptr_t)pi32 & 3));
4200 return *pi32;
4201}
4202
4203
4204/**
4205 * Atomically reads a signed 32-bit value, unordered.
4206 *
4207 * @returns Current *pi32 value
4208 * @param pi32 Pointer to the 32-bit variable to read.
4209 */
4210DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4211{
4212 Assert(!((uintptr_t)pi32 & 3));
4213 return *pi32;
4214}
4215
4216
4217/**
4218 * Atomically reads an unsigned 64-bit value, ordered.
4219 *
4220 * @returns Current *pu64 value
4221 * @param pu64 Pointer to the 64-bit variable to read.
4222 * The memory pointed to must be writable.
4223 * @remark This will fault if the memory is read-only!
4224 */
4225#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
4226 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
4227DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4228#else
4229DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4230{
4231 uint64_t u64;
4232# ifdef RT_ARCH_AMD64
4233 Assert(!((uintptr_t)pu64 & 7));
4234/*# if RT_INLINE_ASM_GNU_STYLE
4235 __asm__ __volatile__( "mfence\n\t"
4236 "movq %1, %0\n\t"
4237 : "=r" (u64)
4238 : "m" (*pu64));
4239# else
4240 __asm
4241 {
4242 mfence
4243 mov rdx, [pu64]
4244 mov rax, [rdx]
4245 mov [u64], rax
4246 }
4247# endif*/
4248 ASMMemoryFence();
4249 u64 = *pu64;
4250# else /* !RT_ARCH_AMD64 */
4251# if RT_INLINE_ASM_GNU_STYLE
4252# if defined(PIC) || defined(__PIC__)
4253 uint32_t u32EBX = 0;
4254 Assert(!((uintptr_t)pu64 & 7));
4255 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4256 "lock; cmpxchg8b (%5)\n\t"
4257 "movl %3, %%ebx\n\t"
4258 : "=A" (u64),
4259# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4260 "+m" (*pu64)
4261# else
4262 "=m" (*pu64)
4263# endif
4264 : "0" (0),
4265 "m" (u32EBX),
4266 "c" (0),
4267 "S" (pu64));
4268# else /* !PIC */
4269 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4270 : "=A" (u64),
4271 "+m" (*pu64)
4272 : "0" (0),
4273 "b" (0),
4274 "c" (0));
4275# endif
4276# else
4277 Assert(!((uintptr_t)pu64 & 7));
4278 __asm
4279 {
4280 xor eax, eax
4281 xor edx, edx
4282 mov edi, pu64
4283 xor ecx, ecx
4284 xor ebx, ebx
4285 lock cmpxchg8b [edi]
4286 mov dword ptr [u64], eax
4287 mov dword ptr [u64 + 4], edx
4288 }
4289# endif
4290# endif /* !RT_ARCH_AMD64 */
4291 return u64;
4292}
4293#endif
4294
4295
4296/**
4297 * Atomically reads an unsigned 64-bit value, unordered.
4298 *
4299 * @returns Current *pu64 value
4300 * @param pu64 Pointer to the 64-bit variable to read.
4301 * The memory pointed to must be writable.
4302 * @remark This will fault if the memory is read-only!
4303 */
4304#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4305DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4306#else
4307DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4308{
4309 uint64_t u64;
4310# ifdef RT_ARCH_AMD64
4311 Assert(!((uintptr_t)pu64 & 7));
4312/*# if RT_INLINE_ASM_GNU_STYLE
4313 Assert(!((uintptr_t)pu64 & 7));
4314 __asm__ __volatile__("movq %1, %0\n\t"
4315 : "=r" (u64)
4316 : "m" (*pu64));
4317# else
4318 __asm
4319 {
4320 mov rdx, [pu64]
4321 mov rax, [rdx]
4322 mov [u64], rax
4323 }
4324# endif */
4325 u64 = *pu64;
4326# else /* !RT_ARCH_AMD64 */
4327# if RT_INLINE_ASM_GNU_STYLE
4328# if defined(PIC) || defined(__PIC__)
4329 uint32_t u32EBX = 0;
4330 uint32_t u32Spill;
4331 Assert(!((uintptr_t)pu64 & 7));
4332 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4333 "xor %%ecx,%%ecx\n\t"
4334 "xor %%edx,%%edx\n\t"
4335 "xchgl %%ebx, %3\n\t"
4336 "lock; cmpxchg8b (%4)\n\t"
4337 "movl %3, %%ebx\n\t"
4338 : "=A" (u64),
4339# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4340 "+m" (*pu64),
4341# else
4342 "=m" (*pu64),
4343# endif
4344 "=c" (u32Spill)
4345 : "m" (u32EBX),
4346 "S" (pu64));
4347# else /* !PIC */
4348 __asm__ __volatile__("cmpxchg8b %1\n\t"
4349 : "=A" (u64),
4350 "+m" (*pu64)
4351 : "0" (0),
4352 "b" (0),
4353 "c" (0));
4354# endif
4355# else
4356 Assert(!((uintptr_t)pu64 & 7));
4357 __asm
4358 {
4359 xor eax, eax
4360 xor edx, edx
4361 mov edi, pu64
4362 xor ecx, ecx
4363 xor ebx, ebx
4364 lock cmpxchg8b [edi]
4365 mov dword ptr [u64], eax
4366 mov dword ptr [u64 + 4], edx
4367 }
4368# endif
4369# endif /* !RT_ARCH_AMD64 */
4370 return u64;
4371}
4372#endif
4373
4374
4375/**
4376 * Atomically reads a signed 64-bit value, ordered.
4377 *
4378 * @returns Current *pi64 value
4379 * @param pi64 Pointer to the 64-bit variable to read.
4380 * The memory pointed to must be writable.
4381 * @remark This will fault if the memory is read-only!
4382 */
4383DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4384{
4385 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4386}
4387
4388
4389/**
4390 * Atomically reads a signed 64-bit value, unordered.
4391 *
4392 * @returns Current *pi64 value
4393 * @param pi64 Pointer to the 64-bit variable to read.
4394 * The memory pointed to must be writable.
4395 * @remark This will fault if the memory is read-only!
4396 */
4397DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4398{
4399 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4400}
4401
4402
4403/**
4404 * Atomically reads a pointer value, ordered.
4405 *
4406 * @returns Current *pv value
4407 * @param ppv Pointer to the pointer variable to read.
4408 */
4409DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4410{
4411#if ARCH_BITS == 32
4412 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4413#elif ARCH_BITS == 64
4414 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4415#else
4416# error "ARCH_BITS is bogus"
4417#endif
4418}
4419
4420
4421/**
4422 * Atomically reads a pointer value, unordered.
4423 *
4424 * @returns Current *pv value
4425 * @param ppv Pointer to the pointer variable to read.
4426 */
4427DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4428{
4429#if ARCH_BITS == 32
4430 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4431#elif ARCH_BITS == 64
4432 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4433#else
4434# error "ARCH_BITS is bogus"
4435#endif
4436}
4437
4438
4439/**
4440 * Atomically reads a boolean value, ordered.
4441 *
4442 * @returns Current *pf value
4443 * @param pf Pointer to the boolean variable to read.
4444 */
4445DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4446{
4447 ASMMemoryFence();
4448 return *pf; /* byte reads are atomic on x86 */
4449}
4450
4451
4452/**
4453 * Atomically reads a boolean value, unordered.
4454 *
4455 * @returns Current *pf value
4456 * @param pf Pointer to the boolean variable to read.
4457 */
4458DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4459{
4460 return *pf; /* byte reads are atomic on x86 */
4461}
4462
4463
4464/**
4465 * Atomically read a typical IPRT handle value, ordered.
4466 *
4467 * @param ph Pointer to the handle variable to read.
4468 * @param phRes Where to store the result.
4469 *
4470 * @remarks This doesn't currently work for all handles (like RTFILE).
4471 */
4472#if HC_ARCH_BITS == 32
4473# define ASMAtomicReadHandle(ph, phRes) \
4474 do { \
4475 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4476 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4477 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
4478 } while (0)
4479#elif HC_ARCH_BITS == 64
4480# define ASMAtomicReadHandle(ph, phRes) \
4481 do { \
4482 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4483 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4484 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
4485 } while (0)
4486#else
4487# error HC_ARCH_BITS
4488#endif
4489
4490
4491/**
4492 * Atomically read a typical IPRT handle value, unordered.
4493 *
4494 * @param ph Pointer to the handle variable to read.
4495 * @param phRes Where to store the result.
4496 *
4497 * @remarks This doesn't currently work for all handles (like RTFILE).
4498 */
4499#if HC_ARCH_BITS == 32
4500# define ASMAtomicUoReadHandle(ph, phRes) \
4501 do { \
4502 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4503 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4504 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
4505 } while (0)
4506#elif HC_ARCH_BITS == 64
4507# define ASMAtomicUoReadHandle(ph, phRes) \
4508 do { \
4509 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4510 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4511 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
4512 } while (0)
4513#else
4514# error HC_ARCH_BITS
4515#endif
4516
4517
4518/**
4519 * Atomically read a value which size might differ
4520 * between platforms or compilers, ordered.
4521 *
4522 * @param pu Pointer to the variable to update.
4523 * @param puRes Where to store the result.
4524 */
4525#define ASMAtomicReadSize(pu, puRes) \
4526 do { \
4527 switch (sizeof(*(pu))) { \
4528 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4529 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4530 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4531 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4532 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4533 } \
4534 } while (0)
4535
4536
4537/**
4538 * Atomically read a value which size might differ
4539 * between platforms or compilers, unordered.
4540 *
4541 * @param pu Pointer to the variable to read.
4542 * @param puRes Where to store the result.
4543 */
4544#define ASMAtomicUoReadSize(pu, puRes) \
4545 do { \
4546 switch (sizeof(*(pu))) { \
4547 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4548 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4549 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4550 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4551 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4552 } \
4553 } while (0)
4554
4555
4556/**
4557 * Atomically writes an unsigned 8-bit value, ordered.
4558 *
4559 * @param pu8 Pointer to the 8-bit variable.
4560 * @param u8 The 8-bit value to assign to *pu8.
4561 */
4562DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4563{
4564 ASMAtomicXchgU8(pu8, u8);
4565}
4566
4567
4568/**
4569 * Atomically writes an unsigned 8-bit value, unordered.
4570 *
4571 * @param pu8 Pointer to the 8-bit variable.
4572 * @param u8 The 8-bit value to assign to *pu8.
4573 */
4574DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4575{
4576 *pu8 = u8; /* byte writes are atomic on x86 */
4577}
4578
4579
4580/**
4581 * Atomically writes a signed 8-bit value, ordered.
4582 *
4583 * @param pi8 Pointer to the 8-bit variable to read.
4584 * @param i8 The 8-bit value to assign to *pi8.
4585 */
4586DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4587{
4588 ASMAtomicXchgS8(pi8, i8);
4589}
4590
4591
4592/**
4593 * Atomically writes a signed 8-bit value, unordered.
4594 *
4595 * @param pi8 Pointer to the 8-bit variable to read.
4596 * @param i8 The 8-bit value to assign to *pi8.
4597 */
4598DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4599{
4600 *pi8 = i8; /* byte writes are atomic on x86 */
4601}
4602
4603
4604/**
4605 * Atomically writes an unsigned 16-bit value, ordered.
4606 *
4607 * @param pu16 Pointer to the 16-bit variable.
4608 * @param u16 The 16-bit value to assign to *pu16.
4609 */
4610DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4611{
4612 ASMAtomicXchgU16(pu16, u16);
4613}
4614
4615
4616/**
4617 * Atomically writes an unsigned 16-bit value, unordered.
4618 *
4619 * @param pu16 Pointer to the 16-bit variable.
4620 * @param u16 The 16-bit value to assign to *pu16.
4621 */
4622DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4623{
4624 Assert(!((uintptr_t)pu16 & 1));
4625 *pu16 = u16;
4626}
4627
4628
4629/**
4630 * Atomically writes a signed 16-bit value, ordered.
4631 *
4632 * @param pi16 Pointer to the 16-bit variable to read.
4633 * @param i16 The 16-bit value to assign to *pi16.
4634 */
4635DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4636{
4637 ASMAtomicXchgS16(pi16, i16);
4638}
4639
4640
4641/**
4642 * Atomically writes a signed 16-bit value, unordered.
4643 *
4644 * @param pi16 Pointer to the 16-bit variable to read.
4645 * @param i16 The 16-bit value to assign to *pi16.
4646 */
4647DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4648{
4649 Assert(!((uintptr_t)pi16 & 1));
4650 *pi16 = i16;
4651}
4652
4653
4654/**
4655 * Atomically writes an unsigned 32-bit value, ordered.
4656 *
4657 * @param pu32 Pointer to the 32-bit variable.
4658 * @param u32 The 32-bit value to assign to *pu32.
4659 */
4660DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4661{
4662 ASMAtomicXchgU32(pu32, u32);
4663}
4664
4665
4666/**
4667 * Atomically writes an unsigned 32-bit value, unordered.
4668 *
4669 * @param pu32 Pointer to the 32-bit variable.
4670 * @param u32 The 32-bit value to assign to *pu32.
4671 */
4672DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4673{
4674 Assert(!((uintptr_t)pu32 & 3));
4675 *pu32 = u32;
4676}
4677
4678
4679/**
4680 * Atomically writes a signed 32-bit value, ordered.
4681 *
4682 * @param pi32 Pointer to the 32-bit variable to read.
4683 * @param i32 The 32-bit value to assign to *pi32.
4684 */
4685DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4686{
4687 ASMAtomicXchgS32(pi32, i32);
4688}
4689
4690
4691/**
4692 * Atomically writes a signed 32-bit value, unordered.
4693 *
4694 * @param pi32 Pointer to the 32-bit variable to read.
4695 * @param i32 The 32-bit value to assign to *pi32.
4696 */
4697DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4698{
4699 Assert(!((uintptr_t)pi32 & 3));
4700 *pi32 = i32;
4701}
4702
4703
4704/**
4705 * Atomically writes an unsigned 64-bit value, ordered.
4706 *
4707 * @param pu64 Pointer to the 64-bit variable.
4708 * @param u64 The 64-bit value to assign to *pu64.
4709 */
4710DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4711{
4712 ASMAtomicXchgU64(pu64, u64);
4713}
4714
4715
4716/**
4717 * Atomically writes an unsigned 64-bit value, unordered.
4718 *
4719 * @param pu64 Pointer to the 64-bit variable.
4720 * @param u64 The 64-bit value to assign to *pu64.
4721 */
4722DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4723{
4724 Assert(!((uintptr_t)pu64 & 7));
4725#if ARCH_BITS == 64
4726 *pu64 = u64;
4727#else
4728 ASMAtomicXchgU64(pu64, u64);
4729#endif
4730}
4731
4732
4733/**
4734 * Atomically writes a signed 64-bit value, ordered.
4735 *
4736 * @param pi64 Pointer to the 64-bit variable.
4737 * @param i64 The 64-bit value to assign to *pi64.
4738 */
4739DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4740{
4741 ASMAtomicXchgS64(pi64, i64);
4742}
4743
4744
4745/**
4746 * Atomically writes a signed 64-bit value, unordered.
4747 *
4748 * @param pi64 Pointer to the 64-bit variable.
4749 * @param i64 The 64-bit value to assign to *pi64.
4750 */
4751DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4752{
4753 Assert(!((uintptr_t)pi64 & 7));
4754#if ARCH_BITS == 64
4755 *pi64 = i64;
4756#else
4757 ASMAtomicXchgS64(pi64, i64);
4758#endif
4759}
4760
4761
4762/**
4763 * Atomically writes a boolean value, unordered.
4764 *
4765 * @param pf Pointer to the boolean variable.
4766 * @param f The boolean value to assign to *pf.
4767 */
4768DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4769{
4770 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4771}
4772
4773
4774/**
4775 * Atomically writes a boolean value, unordered.
4776 *
4777 * @param pf Pointer to the boolean variable.
4778 * @param f The boolean value to assign to *pf.
4779 */
4780DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4781{
4782 *pf = f; /* byte writes are atomic on x86 */
4783}
4784
4785
4786/**
4787 * Atomically writes a pointer value, ordered.
4788 *
4789 * @returns Current *pv value
4790 * @param ppv Pointer to the pointer variable.
4791 * @param pv The pointer value to assigne to *ppv.
4792 */
4793DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4794{
4795#if ARCH_BITS == 32
4796 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4797#elif ARCH_BITS == 64
4798 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4799#else
4800# error "ARCH_BITS is bogus"
4801#endif
4802}
4803
4804
4805/**
4806 * Atomically writes a pointer value, unordered.
4807 *
4808 * @returns Current *pv value
4809 * @param ppv Pointer to the pointer variable.
4810 * @param pv The pointer value to assigne to *ppv.
4811 */
4812DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4813{
4814#if ARCH_BITS == 32
4815 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4816#elif ARCH_BITS == 64
4817 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4818#else
4819# error "ARCH_BITS is bogus"
4820#endif
4821}
4822
4823
4824/**
4825 * Atomically write a typical IPRT handle value, ordered.
4826 *
4827 * @param ph Pointer to the variable to update.
4828 * @param hNew The value to assign to *ph.
4829 *
4830 * @remarks This doesn't currently work for all handles (like RTFILE).
4831 */
4832#if HC_ARCH_BITS == 32
4833# define ASMAtomicWriteHandle(ph, hNew) \
4834 do { \
4835 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4836 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
4837 } while (0)
4838#elif HC_ARCH_BITS == 64
4839# define ASMAtomicWriteHandle(ph, hNew) \
4840 do { \
4841 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4842 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
4843 } while (0)
4844#else
4845# error HC_ARCH_BITS
4846#endif
4847
4848
4849/**
4850 * Atomically write a typical IPRT handle value, unordered.
4851 *
4852 * @param ph Pointer to the variable to update.
4853 * @param hNew The value to assign to *ph.
4854 *
4855 * @remarks This doesn't currently work for all handles (like RTFILE).
4856 */
4857#if HC_ARCH_BITS == 32
4858# define ASMAtomicUoWriteHandle(ph, hNew) \
4859 do { \
4860 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4861 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
4862 } while (0)
4863#elif HC_ARCH_BITS == 64
4864# define ASMAtomicUoWriteHandle(ph, hNew) \
4865 do { \
4866 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4867 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
4868 } while (0)
4869#else
4870# error HC_ARCH_BITS
4871#endif
4872
4873
4874/**
4875 * Atomically write a value which size might differ
4876 * between platforms or compilers, ordered.
4877 *
4878 * @param pu Pointer to the variable to update.
4879 * @param uNew The value to assign to *pu.
4880 */
4881#define ASMAtomicWriteSize(pu, uNew) \
4882 do { \
4883 switch (sizeof(*(pu))) { \
4884 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4885 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4886 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4887 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4888 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4889 } \
4890 } while (0)
4891
4892/**
4893 * Atomically write a value which size might differ
4894 * between platforms or compilers, unordered.
4895 *
4896 * @param pu Pointer to the variable to update.
4897 * @param uNew The value to assign to *pu.
4898 */
4899#define ASMAtomicUoWriteSize(pu, uNew) \
4900 do { \
4901 switch (sizeof(*(pu))) { \
4902 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4903 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4904 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4905 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4906 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4907 } \
4908 } while (0)
4909
4910
4911
4912
4913/**
4914 * Invalidate page.
4915 *
4916 * @param pv Address of the page to invalidate.
4917 */
4918#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4919DECLASM(void) ASMInvalidatePage(void *pv);
4920#else
4921DECLINLINE(void) ASMInvalidatePage(void *pv)
4922{
4923# if RT_INLINE_ASM_USES_INTRIN
4924 __invlpg(pv);
4925
4926# elif RT_INLINE_ASM_GNU_STYLE
4927 __asm__ __volatile__("invlpg %0\n\t"
4928 : : "m" (*(uint8_t *)pv));
4929# else
4930 __asm
4931 {
4932# ifdef RT_ARCH_AMD64
4933 mov rax, [pv]
4934 invlpg [rax]
4935# else
4936 mov eax, [pv]
4937 invlpg [eax]
4938# endif
4939 }
4940# endif
4941}
4942#endif
4943
4944
4945/**
4946 * Write back the internal caches and invalidate them.
4947 */
4948#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4949DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4950#else
4951DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4952{
4953# if RT_INLINE_ASM_USES_INTRIN
4954 __wbinvd();
4955
4956# elif RT_INLINE_ASM_GNU_STYLE
4957 __asm__ __volatile__("wbinvd");
4958# else
4959 __asm
4960 {
4961 wbinvd
4962 }
4963# endif
4964}
4965#endif
4966
4967
4968/**
4969 * Invalidate internal and (perhaps) external caches without first
4970 * flushing dirty cache lines. Use with extreme care.
4971 */
4972#if RT_INLINE_ASM_EXTERNAL
4973DECLASM(void) ASMInvalidateInternalCaches(void);
4974#else
4975DECLINLINE(void) ASMInvalidateInternalCaches(void)
4976{
4977# if RT_INLINE_ASM_GNU_STYLE
4978 __asm__ __volatile__("invd");
4979# else
4980 __asm
4981 {
4982 invd
4983 }
4984# endif
4985}
4986#endif
4987
4988
4989#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4990# if PAGE_SIZE != 0x1000
4991# error "PAGE_SIZE is not 0x1000!"
4992# endif
4993#endif
4994
4995/**
4996 * Zeros a 4K memory page.
4997 *
4998 * @param pv Pointer to the memory block. This must be page aligned.
4999 */
5000#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5001DECLASM(void) ASMMemZeroPage(volatile void *pv);
5002# else
5003DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
5004{
5005# if RT_INLINE_ASM_USES_INTRIN
5006# ifdef RT_ARCH_AMD64
5007 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
5008# else
5009 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
5010# endif
5011
5012# elif RT_INLINE_ASM_GNU_STYLE
5013 RTCCUINTREG uDummy;
5014# ifdef RT_ARCH_AMD64
5015 __asm__ __volatile__("rep stosq"
5016 : "=D" (pv),
5017 "=c" (uDummy)
5018 : "0" (pv),
5019 "c" (0x1000 >> 3),
5020 "a" (0)
5021 : "memory");
5022# else
5023 __asm__ __volatile__("rep stosl"
5024 : "=D" (pv),
5025 "=c" (uDummy)
5026 : "0" (pv),
5027 "c" (0x1000 >> 2),
5028 "a" (0)
5029 : "memory");
5030# endif
5031# else
5032 __asm
5033 {
5034# ifdef RT_ARCH_AMD64
5035 xor rax, rax
5036 mov ecx, 0200h
5037 mov rdi, [pv]
5038 rep stosq
5039# else
5040 xor eax, eax
5041 mov ecx, 0400h
5042 mov edi, [pv]
5043 rep stosd
5044# endif
5045 }
5046# endif
5047}
5048# endif
5049
5050
5051/**
5052 * Zeros a memory block with a 32-bit aligned size.
5053 *
5054 * @param pv Pointer to the memory block.
5055 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5056 */
5057#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5058DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
5059#else
5060DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
5061{
5062# if RT_INLINE_ASM_USES_INTRIN
5063# ifdef RT_ARCH_AMD64
5064 if (!(cb & 7))
5065 __stosq((unsigned __int64 *)pv, 0, cb / 8);
5066 else
5067# endif
5068 __stosd((unsigned long *)pv, 0, cb / 4);
5069
5070# elif RT_INLINE_ASM_GNU_STYLE
5071 __asm__ __volatile__("rep stosl"
5072 : "=D" (pv),
5073 "=c" (cb)
5074 : "0" (pv),
5075 "1" (cb >> 2),
5076 "a" (0)
5077 : "memory");
5078# else
5079 __asm
5080 {
5081 xor eax, eax
5082# ifdef RT_ARCH_AMD64
5083 mov rcx, [cb]
5084 shr rcx, 2
5085 mov rdi, [pv]
5086# else
5087 mov ecx, [cb]
5088 shr ecx, 2
5089 mov edi, [pv]
5090# endif
5091 rep stosd
5092 }
5093# endif
5094}
5095#endif
5096
5097
5098/**
5099 * Fills a memory block with a 32-bit aligned size.
5100 *
5101 * @param pv Pointer to the memory block.
5102 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5103 * @param u32 The value to fill with.
5104 */
5105#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5106DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
5107#else
5108DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
5109{
5110# if RT_INLINE_ASM_USES_INTRIN
5111# ifdef RT_ARCH_AMD64
5112 if (!(cb & 7))
5113 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5114 else
5115# endif
5116 __stosd((unsigned long *)pv, u32, cb / 4);
5117
5118# elif RT_INLINE_ASM_GNU_STYLE
5119 __asm__ __volatile__("rep stosl"
5120 : "=D" (pv),
5121 "=c" (cb)
5122 : "0" (pv),
5123 "1" (cb >> 2),
5124 "a" (u32)
5125 : "memory");
5126# else
5127 __asm
5128 {
5129# ifdef RT_ARCH_AMD64
5130 mov rcx, [cb]
5131 shr rcx, 2
5132 mov rdi, [pv]
5133# else
5134 mov ecx, [cb]
5135 shr ecx, 2
5136 mov edi, [pv]
5137# endif
5138 mov eax, [u32]
5139 rep stosd
5140 }
5141# endif
5142}
5143#endif
5144
5145
5146/**
5147 * Checks if a memory page is all zeros.
5148 *
5149 * @returns true / false.
5150 *
5151 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5152 * boundrary
5153 */
5154DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
5155{
5156# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5157 union { RTCCUINTREG r; bool f; } uAX;
5158 RTCCUINTREG xCX, xDI;
5159 Assert(!((uintptr_t)pvPage & 15));
5160 __asm__ __volatile__("repe; "
5161# ifdef RT_ARCH_AMD64
5162 "scasq\n\t"
5163# else
5164 "scasl\n\t"
5165# endif
5166 "setnc %%al\n\t"
5167 : "=&c" (xCX),
5168 "=&D" (xDI),
5169 "=&a" (uAX.r)
5170 : "mr" (pvPage),
5171# ifdef RT_ARCH_AMD64
5172 "0" (0x1000/8),
5173# else
5174 "0" (0x1000/4),
5175# endif
5176 "1" (pvPage),
5177 "2" (0));
5178 return uAX.f;
5179# else
5180 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
5181 int cLeft = 0x1000 / sizeof(uintptr_t) / 8;
5182 Assert(!((uintptr_t)pvPage & 15));
5183 for (;;)
5184 {
5185 if (puPtr[0]) return false;
5186 if (puPtr[4]) return false;
5187
5188 if (puPtr[2]) return false;
5189 if (puPtr[6]) return false;
5190
5191 if (puPtr[1]) return false;
5192 if (puPtr[5]) return false;
5193
5194 if (puPtr[3]) return false;
5195 if (puPtr[7]) return false;
5196
5197 if (!--cLeft)
5198 return true;
5199 puPtr += 8;
5200 }
5201 return true;
5202# endif
5203}
5204
5205
5206/**
5207 * Checks if a memory block is filled with the specified byte.
5208 *
5209 * This is a sort of inverted memchr.
5210 *
5211 * @returns Pointer to the byte which doesn't equal u8.
5212 * @returns NULL if all equal to u8.
5213 *
5214 * @param pv Pointer to the memory block.
5215 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5216 * @param u8 The value it's supposed to be filled with.
5217 *
5218 * @todo Fix name, it is a predicate function but it's not returning boolean!
5219 */
5220#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5221DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
5222#else
5223DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
5224{
5225/** @todo rewrite this in inline assembly? */
5226 uint8_t const *pb = (uint8_t const *)pv;
5227 for (; cb; cb--, pb++)
5228 if (RT_UNLIKELY(*pb != u8))
5229 return (void *)pb;
5230 return NULL;
5231}
5232#endif
5233
5234
5235/**
5236 * Checks if a memory block is filled with the specified 32-bit value.
5237 *
5238 * This is a sort of inverted memchr.
5239 *
5240 * @returns Pointer to the first value which doesn't equal u32.
5241 * @returns NULL if all equal to u32.
5242 *
5243 * @param pv Pointer to the memory block.
5244 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5245 * @param u32 The value it's supposed to be filled with.
5246 *
5247 * @todo Fix name, it is a predicate function but it's not returning boolean!
5248 */
5249#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5250DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
5251#else
5252DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
5253{
5254/** @todo rewrite this in inline assembly? */
5255 uint32_t const *pu32 = (uint32_t const *)pv;
5256 for (; cb; cb -= 4, pu32++)
5257 if (RT_UNLIKELY(*pu32 != u32))
5258 return (uint32_t *)pu32;
5259 return NULL;
5260}
5261#endif
5262
5263
5264/**
5265 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
5266 *
5267 * @returns u32F1 * u32F2.
5268 */
5269#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5270DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
5271#else
5272DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
5273{
5274# ifdef RT_ARCH_AMD64
5275 return (uint64_t)u32F1 * u32F2;
5276# else /* !RT_ARCH_AMD64 */
5277 uint64_t u64;
5278# if RT_INLINE_ASM_GNU_STYLE
5279 __asm__ __volatile__("mull %%edx"
5280 : "=A" (u64)
5281 : "a" (u32F2), "d" (u32F1));
5282# else
5283 __asm
5284 {
5285 mov edx, [u32F1]
5286 mov eax, [u32F2]
5287 mul edx
5288 mov dword ptr [u64], eax
5289 mov dword ptr [u64 + 4], edx
5290 }
5291# endif
5292 return u64;
5293# endif /* !RT_ARCH_AMD64 */
5294}
5295#endif
5296
5297
5298/**
5299 * Multiplies two signed 32-bit values returning a signed 64-bit result.
5300 *
5301 * @returns u32F1 * u32F2.
5302 */
5303#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5304DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
5305#else
5306DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
5307{
5308# ifdef RT_ARCH_AMD64
5309 return (int64_t)i32F1 * i32F2;
5310# else /* !RT_ARCH_AMD64 */
5311 int64_t i64;
5312# if RT_INLINE_ASM_GNU_STYLE
5313 __asm__ __volatile__("imull %%edx"
5314 : "=A" (i64)
5315 : "a" (i32F2), "d" (i32F1));
5316# else
5317 __asm
5318 {
5319 mov edx, [i32F1]
5320 mov eax, [i32F2]
5321 imul edx
5322 mov dword ptr [i64], eax
5323 mov dword ptr [i64 + 4], edx
5324 }
5325# endif
5326 return i64;
5327# endif /* !RT_ARCH_AMD64 */
5328}
5329#endif
5330
5331
5332/**
5333 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5334 *
5335 * @returns u64 / u32.
5336 */
5337#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5338DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5339#else
5340DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5341{
5342# ifdef RT_ARCH_AMD64
5343 return (uint32_t)(u64 / u32);
5344# else /* !RT_ARCH_AMD64 */
5345# if RT_INLINE_ASM_GNU_STYLE
5346 RTCCUINTREG uDummy;
5347 __asm__ __volatile__("divl %3"
5348 : "=a" (u32), "=d"(uDummy)
5349 : "A" (u64), "r" (u32));
5350# else
5351 __asm
5352 {
5353 mov eax, dword ptr [u64]
5354 mov edx, dword ptr [u64 + 4]
5355 mov ecx, [u32]
5356 div ecx
5357 mov [u32], eax
5358 }
5359# endif
5360 return u32;
5361# endif /* !RT_ARCH_AMD64 */
5362}
5363#endif
5364
5365
5366/**
5367 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5368 *
5369 * @returns u64 / u32.
5370 */
5371#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5372DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5373#else
5374DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5375{
5376# ifdef RT_ARCH_AMD64
5377 return (int32_t)(i64 / i32);
5378# else /* !RT_ARCH_AMD64 */
5379# if RT_INLINE_ASM_GNU_STYLE
5380 RTCCUINTREG iDummy;
5381 __asm__ __volatile__("idivl %3"
5382 : "=a" (i32), "=d"(iDummy)
5383 : "A" (i64), "r" (i32));
5384# else
5385 __asm
5386 {
5387 mov eax, dword ptr [i64]
5388 mov edx, dword ptr [i64 + 4]
5389 mov ecx, [i32]
5390 idiv ecx
5391 mov [i32], eax
5392 }
5393# endif
5394 return i32;
5395# endif /* !RT_ARCH_AMD64 */
5396}
5397#endif
5398
5399
5400/**
5401 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5402 * returning the rest.
5403 *
5404 * @returns u64 % u32.
5405 *
5406 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5407 */
5408#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5409DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5410#else
5411DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5412{
5413# ifdef RT_ARCH_AMD64
5414 return (uint32_t)(u64 % u32);
5415# else /* !RT_ARCH_AMD64 */
5416# if RT_INLINE_ASM_GNU_STYLE
5417 RTCCUINTREG uDummy;
5418 __asm__ __volatile__("divl %3"
5419 : "=a" (uDummy), "=d"(u32)
5420 : "A" (u64), "r" (u32));
5421# else
5422 __asm
5423 {
5424 mov eax, dword ptr [u64]
5425 mov edx, dword ptr [u64 + 4]
5426 mov ecx, [u32]
5427 div ecx
5428 mov [u32], edx
5429 }
5430# endif
5431 return u32;
5432# endif /* !RT_ARCH_AMD64 */
5433}
5434#endif
5435
5436
5437/**
5438 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5439 * returning the rest.
5440 *
5441 * @returns u64 % u32.
5442 *
5443 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5444 */
5445#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5446DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5447#else
5448DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5449{
5450# ifdef RT_ARCH_AMD64
5451 return (int32_t)(i64 % i32);
5452# else /* !RT_ARCH_AMD64 */
5453# if RT_INLINE_ASM_GNU_STYLE
5454 RTCCUINTREG iDummy;
5455 __asm__ __volatile__("idivl %3"
5456 : "=a" (iDummy), "=d"(i32)
5457 : "A" (i64), "r" (i32));
5458# else
5459 __asm
5460 {
5461 mov eax, dword ptr [i64]
5462 mov edx, dword ptr [i64 + 4]
5463 mov ecx, [i32]
5464 idiv ecx
5465 mov [i32], edx
5466 }
5467# endif
5468 return i32;
5469# endif /* !RT_ARCH_AMD64 */
5470}
5471#endif
5472
5473
5474/**
5475 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5476 * using a 96 bit intermediate result.
5477 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5478 * __udivdi3 and __umoddi3 even if this inline function is not used.
5479 *
5480 * @returns (u64A * u32B) / u32C.
5481 * @param u64A The 64-bit value.
5482 * @param u32B The 32-bit value to multiple by A.
5483 * @param u32C The 32-bit value to divide A*B by.
5484 */
5485#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5486DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5487#else
5488DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5489{
5490# if RT_INLINE_ASM_GNU_STYLE
5491# ifdef RT_ARCH_AMD64
5492 uint64_t u64Result, u64Spill;
5493 __asm__ __volatile__("mulq %2\n\t"
5494 "divq %3\n\t"
5495 : "=a" (u64Result),
5496 "=d" (u64Spill)
5497 : "r" ((uint64_t)u32B),
5498 "r" ((uint64_t)u32C),
5499 "0" (u64A),
5500 "1" (0));
5501 return u64Result;
5502# else
5503 uint32_t u32Dummy;
5504 uint64_t u64Result;
5505 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5506 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5507 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5508 eax = u64A.hi */
5509 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5510 edx = u32C */
5511 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5512 edx = u32B */
5513 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5514 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5515 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5516 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5517 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5518 edx = u64Hi % u32C */
5519 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5520 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5521 "divl %%ecx \n\t" /* u64Result.lo */
5522 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5523 : "=A"(u64Result), "=c"(u32Dummy),
5524 "=S"(u32Dummy), "=D"(u32Dummy)
5525 : "a"((uint32_t)u64A),
5526 "S"((uint32_t)(u64A >> 32)),
5527 "c"(u32B),
5528 "D"(u32C));
5529 return u64Result;
5530# endif
5531# else
5532 RTUINT64U u;
5533 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5534 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5535 u64Hi += (u64Lo >> 32);
5536 u.s.Hi = (uint32_t)(u64Hi / u32C);
5537 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5538 return u.u;
5539# endif
5540}
5541#endif
5542
5543
5544/**
5545 * Probes a byte pointer for read access.
5546 *
5547 * While the function will not fault if the byte is not read accessible,
5548 * the idea is to do this in a safe place like before acquiring locks
5549 * and such like.
5550 *
5551 * Also, this functions guarantees that an eager compiler is not going
5552 * to optimize the probing away.
5553 *
5554 * @param pvByte Pointer to the byte.
5555 */
5556#if RT_INLINE_ASM_EXTERNAL
5557DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5558#else
5559DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5560{
5561 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5562 uint8_t u8;
5563# if RT_INLINE_ASM_GNU_STYLE
5564 __asm__ __volatile__("movb (%1), %0\n\t"
5565 : "=r" (u8)
5566 : "r" (pvByte));
5567# else
5568 __asm
5569 {
5570# ifdef RT_ARCH_AMD64
5571 mov rax, [pvByte]
5572 mov al, [rax]
5573# else
5574 mov eax, [pvByte]
5575 mov al, [eax]
5576# endif
5577 mov [u8], al
5578 }
5579# endif
5580 return u8;
5581}
5582#endif
5583
5584/**
5585 * Probes a buffer for read access page by page.
5586 *
5587 * While the function will fault if the buffer is not fully read
5588 * accessible, the idea is to do this in a safe place like before
5589 * acquiring locks and such like.
5590 *
5591 * Also, this functions guarantees that an eager compiler is not going
5592 * to optimize the probing away.
5593 *
5594 * @param pvBuf Pointer to the buffer.
5595 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5596 */
5597DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5598{
5599 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5600 /* the first byte */
5601 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5602 ASMProbeReadByte(pu8);
5603
5604 /* the pages in between pages. */
5605 while (cbBuf > /*PAGE_SIZE*/0x1000)
5606 {
5607 ASMProbeReadByte(pu8);
5608 cbBuf -= /*PAGE_SIZE*/0x1000;
5609 pu8 += /*PAGE_SIZE*/0x1000;
5610 }
5611
5612 /* the last byte */
5613 ASMProbeReadByte(pu8 + cbBuf - 1);
5614}
5615
5616
5617/** @def ASMBreakpoint
5618 * Debugger Breakpoint.
5619 * @remark In the gnu world we add a nop instruction after the int3 to
5620 * force gdb to remain at the int3 source line.
5621 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5622 * @internal
5623 */
5624#if RT_INLINE_ASM_GNU_STYLE
5625# ifndef __L4ENV__
5626# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5627# else
5628# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5629# endif
5630#else
5631# define ASMBreakpoint() __debugbreak()
5632#endif
5633
5634
5635
5636/** @defgroup grp_inline_bits Bit Operations
5637 * @{
5638 */
5639
5640
5641/**
5642 * Sets a bit in a bitmap.
5643 *
5644 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
5645 * @param iBit The bit to set.
5646 *
5647 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5648 * However, doing so will yield better performance as well as avoiding
5649 * traps accessing the last bits in the bitmap.
5650 */
5651#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5652DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5653#else
5654DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5655{
5656# if RT_INLINE_ASM_USES_INTRIN
5657 _bittestandset((long *)pvBitmap, iBit);
5658
5659# elif RT_INLINE_ASM_GNU_STYLE
5660 __asm__ __volatile__("btsl %1, %0"
5661 : "=m" (*(volatile long *)pvBitmap)
5662 : "Ir" (iBit),
5663 "m" (*(volatile long *)pvBitmap)
5664 : "memory");
5665# else
5666 __asm
5667 {
5668# ifdef RT_ARCH_AMD64
5669 mov rax, [pvBitmap]
5670 mov edx, [iBit]
5671 bts [rax], edx
5672# else
5673 mov eax, [pvBitmap]
5674 mov edx, [iBit]
5675 bts [eax], edx
5676# endif
5677 }
5678# endif
5679}
5680#endif
5681
5682
5683/**
5684 * Atomically sets a bit in a bitmap, ordered.
5685 *
5686 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5687 * the memory access isn't atomic!
5688 * @param iBit The bit to set.
5689 */
5690#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5691DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5692#else
5693DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5694{
5695 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5696# if RT_INLINE_ASM_USES_INTRIN
5697 _interlockedbittestandset((long *)pvBitmap, iBit);
5698# elif RT_INLINE_ASM_GNU_STYLE
5699 __asm__ __volatile__("lock; btsl %1, %0"
5700 : "=m" (*(volatile long *)pvBitmap)
5701 : "Ir" (iBit),
5702 "m" (*(volatile long *)pvBitmap)
5703 : "memory");
5704# else
5705 __asm
5706 {
5707# ifdef RT_ARCH_AMD64
5708 mov rax, [pvBitmap]
5709 mov edx, [iBit]
5710 lock bts [rax], edx
5711# else
5712 mov eax, [pvBitmap]
5713 mov edx, [iBit]
5714 lock bts [eax], edx
5715# endif
5716 }
5717# endif
5718}
5719#endif
5720
5721
5722/**
5723 * Clears a bit in a bitmap.
5724 *
5725 * @param pvBitmap Pointer to the bitmap.
5726 * @param iBit The bit to clear.
5727 *
5728 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5729 * However, doing so will yield better performance as well as avoiding
5730 * traps accessing the last bits in the bitmap.
5731 */
5732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5733DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5734#else
5735DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5736{
5737# if RT_INLINE_ASM_USES_INTRIN
5738 _bittestandreset((long *)pvBitmap, iBit);
5739
5740# elif RT_INLINE_ASM_GNU_STYLE
5741 __asm__ __volatile__("btrl %1, %0"
5742 : "=m" (*(volatile long *)pvBitmap)
5743 : "Ir" (iBit),
5744 "m" (*(volatile long *)pvBitmap)
5745 : "memory");
5746# else
5747 __asm
5748 {
5749# ifdef RT_ARCH_AMD64
5750 mov rax, [pvBitmap]
5751 mov edx, [iBit]
5752 btr [rax], edx
5753# else
5754 mov eax, [pvBitmap]
5755 mov edx, [iBit]
5756 btr [eax], edx
5757# endif
5758 }
5759# endif
5760}
5761#endif
5762
5763
5764/**
5765 * Atomically clears a bit in a bitmap, ordered.
5766 *
5767 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5768 * the memory access isn't atomic!
5769 * @param iBit The bit to toggle set.
5770 * @remarks No memory barrier, take care on smp.
5771 */
5772#if RT_INLINE_ASM_EXTERNAL
5773DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5774#else
5775DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5776{
5777 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5778# if RT_INLINE_ASM_GNU_STYLE
5779 __asm__ __volatile__("lock; btrl %1, %0"
5780 : "=m" (*(volatile long *)pvBitmap)
5781 : "Ir" (iBit),
5782 "m" (*(volatile long *)pvBitmap)
5783 : "memory");
5784# else
5785 __asm
5786 {
5787# ifdef RT_ARCH_AMD64
5788 mov rax, [pvBitmap]
5789 mov edx, [iBit]
5790 lock btr [rax], edx
5791# else
5792 mov eax, [pvBitmap]
5793 mov edx, [iBit]
5794 lock btr [eax], edx
5795# endif
5796 }
5797# endif
5798}
5799#endif
5800
5801
5802/**
5803 * Toggles a bit in a bitmap.
5804 *
5805 * @param pvBitmap Pointer to the bitmap.
5806 * @param iBit The bit to toggle.
5807 *
5808 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5809 * However, doing so will yield better performance as well as avoiding
5810 * traps accessing the last bits in the bitmap.
5811 */
5812#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5813DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5814#else
5815DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5816{
5817# if RT_INLINE_ASM_USES_INTRIN
5818 _bittestandcomplement((long *)pvBitmap, iBit);
5819# elif RT_INLINE_ASM_GNU_STYLE
5820 __asm__ __volatile__("btcl %1, %0"
5821 : "=m" (*(volatile long *)pvBitmap)
5822 : "Ir" (iBit),
5823 "m" (*(volatile long *)pvBitmap)
5824 : "memory");
5825# else
5826 __asm
5827 {
5828# ifdef RT_ARCH_AMD64
5829 mov rax, [pvBitmap]
5830 mov edx, [iBit]
5831 btc [rax], edx
5832# else
5833 mov eax, [pvBitmap]
5834 mov edx, [iBit]
5835 btc [eax], edx
5836# endif
5837 }
5838# endif
5839}
5840#endif
5841
5842
5843/**
5844 * Atomically toggles a bit in a bitmap, ordered.
5845 *
5846 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5847 * the memory access isn't atomic!
5848 * @param iBit The bit to test and set.
5849 */
5850#if RT_INLINE_ASM_EXTERNAL
5851DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5852#else
5853DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5854{
5855 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5856# if RT_INLINE_ASM_GNU_STYLE
5857 __asm__ __volatile__("lock; btcl %1, %0"
5858 : "=m" (*(volatile long *)pvBitmap)
5859 : "Ir" (iBit),
5860 "m" (*(volatile long *)pvBitmap)
5861 : "memory");
5862# else
5863 __asm
5864 {
5865# ifdef RT_ARCH_AMD64
5866 mov rax, [pvBitmap]
5867 mov edx, [iBit]
5868 lock btc [rax], edx
5869# else
5870 mov eax, [pvBitmap]
5871 mov edx, [iBit]
5872 lock btc [eax], edx
5873# endif
5874 }
5875# endif
5876}
5877#endif
5878
5879
5880/**
5881 * Tests and sets a bit in a bitmap.
5882 *
5883 * @returns true if the bit was set.
5884 * @returns false if the bit was clear.
5885 *
5886 * @param pvBitmap Pointer to the bitmap.
5887 * @param iBit The bit to test and set.
5888 *
5889 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5890 * However, doing so will yield better performance as well as avoiding
5891 * traps accessing the last bits in the bitmap.
5892 */
5893#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5894DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5895#else
5896DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5897{
5898 union { bool f; uint32_t u32; uint8_t u8; } rc;
5899# if RT_INLINE_ASM_USES_INTRIN
5900 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5901
5902# elif RT_INLINE_ASM_GNU_STYLE
5903 __asm__ __volatile__("btsl %2, %1\n\t"
5904 "setc %b0\n\t"
5905 "andl $1, %0\n\t"
5906 : "=q" (rc.u32),
5907 "=m" (*(volatile long *)pvBitmap)
5908 : "Ir" (iBit),
5909 "m" (*(volatile long *)pvBitmap)
5910 : "memory");
5911# else
5912 __asm
5913 {
5914 mov edx, [iBit]
5915# ifdef RT_ARCH_AMD64
5916 mov rax, [pvBitmap]
5917 bts [rax], edx
5918# else
5919 mov eax, [pvBitmap]
5920 bts [eax], edx
5921# endif
5922 setc al
5923 and eax, 1
5924 mov [rc.u32], eax
5925 }
5926# endif
5927 return rc.f;
5928}
5929#endif
5930
5931
5932/**
5933 * Atomically tests and sets a bit in a bitmap, ordered.
5934 *
5935 * @returns true if the bit was set.
5936 * @returns false if the bit was clear.
5937 *
5938 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5939 * the memory access isn't atomic!
5940 * @param iBit The bit to set.
5941 */
5942#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5943DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5944#else
5945DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5946{
5947 union { bool f; uint32_t u32; uint8_t u8; } rc;
5948 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5949# if RT_INLINE_ASM_USES_INTRIN
5950 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5951# elif RT_INLINE_ASM_GNU_STYLE
5952 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5953 "setc %b0\n\t"
5954 "andl $1, %0\n\t"
5955 : "=q" (rc.u32),
5956 "=m" (*(volatile long *)pvBitmap)
5957 : "Ir" (iBit),
5958 "m" (*(volatile long *)pvBitmap)
5959 : "memory");
5960# else
5961 __asm
5962 {
5963 mov edx, [iBit]
5964# ifdef RT_ARCH_AMD64
5965 mov rax, [pvBitmap]
5966 lock bts [rax], edx
5967# else
5968 mov eax, [pvBitmap]
5969 lock bts [eax], edx
5970# endif
5971 setc al
5972 and eax, 1
5973 mov [rc.u32], eax
5974 }
5975# endif
5976 return rc.f;
5977}
5978#endif
5979
5980
5981/**
5982 * Tests and clears a bit in a bitmap.
5983 *
5984 * @returns true if the bit was set.
5985 * @returns false if the bit was clear.
5986 *
5987 * @param pvBitmap Pointer to the bitmap.
5988 * @param iBit The bit to test and clear.
5989 *
5990 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5991 * However, doing so will yield better performance as well as avoiding
5992 * traps accessing the last bits in the bitmap.
5993 */
5994#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5995DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5996#else
5997DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5998{
5999 union { bool f; uint32_t u32; uint8_t u8; } rc;
6000# if RT_INLINE_ASM_USES_INTRIN
6001 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
6002
6003# elif RT_INLINE_ASM_GNU_STYLE
6004 __asm__ __volatile__("btrl %2, %1\n\t"
6005 "setc %b0\n\t"
6006 "andl $1, %0\n\t"
6007 : "=q" (rc.u32),
6008 "=m" (*(volatile long *)pvBitmap)
6009 : "Ir" (iBit),
6010 "m" (*(volatile long *)pvBitmap)
6011 : "memory");
6012# else
6013 __asm
6014 {
6015 mov edx, [iBit]
6016# ifdef RT_ARCH_AMD64
6017 mov rax, [pvBitmap]
6018 btr [rax], edx
6019# else
6020 mov eax, [pvBitmap]
6021 btr [eax], edx
6022# endif
6023 setc al
6024 and eax, 1
6025 mov [rc.u32], eax
6026 }
6027# endif
6028 return rc.f;
6029}
6030#endif
6031
6032
6033/**
6034 * Atomically tests and clears a bit in a bitmap, ordered.
6035 *
6036 * @returns true if the bit was set.
6037 * @returns false if the bit was clear.
6038 *
6039 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
6040 * the memory access isn't atomic!
6041 * @param iBit The bit to test and clear.
6042 *
6043 * @remarks No memory barrier, take care on smp.
6044 */
6045#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6046DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
6047#else
6048DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
6049{
6050 union { bool f; uint32_t u32; uint8_t u8; } rc;
6051 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6052# if RT_INLINE_ASM_USES_INTRIN
6053 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
6054
6055# elif RT_INLINE_ASM_GNU_STYLE
6056 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6057 "setc %b0\n\t"
6058 "andl $1, %0\n\t"
6059 : "=q" (rc.u32),
6060 "=m" (*(volatile long *)pvBitmap)
6061 : "Ir" (iBit),
6062 "m" (*(volatile long *)pvBitmap)
6063 : "memory");
6064# else
6065 __asm
6066 {
6067 mov edx, [iBit]
6068# ifdef RT_ARCH_AMD64
6069 mov rax, [pvBitmap]
6070 lock btr [rax], edx
6071# else
6072 mov eax, [pvBitmap]
6073 lock btr [eax], edx
6074# endif
6075 setc al
6076 and eax, 1
6077 mov [rc.u32], eax
6078 }
6079# endif
6080 return rc.f;
6081}
6082#endif
6083
6084
6085/**
6086 * Tests and toggles a bit in a bitmap.
6087 *
6088 * @returns true if the bit was set.
6089 * @returns false if the bit was clear.
6090 *
6091 * @param pvBitmap Pointer to the bitmap.
6092 * @param iBit The bit to test and toggle.
6093 *
6094 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6095 * However, doing so will yield better performance as well as avoiding
6096 * traps accessing the last bits in the bitmap.
6097 */
6098#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6099DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6100#else
6101DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6102{
6103 union { bool f; uint32_t u32; uint8_t u8; } rc;
6104# if RT_INLINE_ASM_USES_INTRIN
6105 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
6106
6107# elif RT_INLINE_ASM_GNU_STYLE
6108 __asm__ __volatile__("btcl %2, %1\n\t"
6109 "setc %b0\n\t"
6110 "andl $1, %0\n\t"
6111 : "=q" (rc.u32),
6112 "=m" (*(volatile long *)pvBitmap)
6113 : "Ir" (iBit),
6114 "m" (*(volatile long *)pvBitmap)
6115 : "memory");
6116# else
6117 __asm
6118 {
6119 mov edx, [iBit]
6120# ifdef RT_ARCH_AMD64
6121 mov rax, [pvBitmap]
6122 btc [rax], edx
6123# else
6124 mov eax, [pvBitmap]
6125 btc [eax], edx
6126# endif
6127 setc al
6128 and eax, 1
6129 mov [rc.u32], eax
6130 }
6131# endif
6132 return rc.f;
6133}
6134#endif
6135
6136
6137/**
6138 * Atomically tests and toggles a bit in a bitmap, ordered.
6139 *
6140 * @returns true if the bit was set.
6141 * @returns false if the bit was clear.
6142 *
6143 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
6144 * the memory access isn't atomic!
6145 * @param iBit The bit to test and toggle.
6146 */
6147#if RT_INLINE_ASM_EXTERNAL
6148DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
6149#else
6150DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
6151{
6152 union { bool f; uint32_t u32; uint8_t u8; } rc;
6153 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6154# if RT_INLINE_ASM_GNU_STYLE
6155 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6156 "setc %b0\n\t"
6157 "andl $1, %0\n\t"
6158 : "=q" (rc.u32),
6159 "=m" (*(volatile long *)pvBitmap)
6160 : "Ir" (iBit),
6161 "m" (*(volatile long *)pvBitmap)
6162 : "memory");
6163# else
6164 __asm
6165 {
6166 mov edx, [iBit]
6167# ifdef RT_ARCH_AMD64
6168 mov rax, [pvBitmap]
6169 lock btc [rax], edx
6170# else
6171 mov eax, [pvBitmap]
6172 lock btc [eax], edx
6173# endif
6174 setc al
6175 and eax, 1
6176 mov [rc.u32], eax
6177 }
6178# endif
6179 return rc.f;
6180}
6181#endif
6182
6183
6184/**
6185 * Tests if a bit in a bitmap is set.
6186 *
6187 * @returns true if the bit is set.
6188 * @returns false if the bit is clear.
6189 *
6190 * @param pvBitmap Pointer to the bitmap.
6191 * @param iBit The bit to test.
6192 *
6193 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6194 * However, doing so will yield better performance as well as avoiding
6195 * traps accessing the last bits in the bitmap.
6196 */
6197#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6198DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
6199#else
6200DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
6201{
6202 union { bool f; uint32_t u32; uint8_t u8; } rc;
6203# if RT_INLINE_ASM_USES_INTRIN
6204 rc.u32 = _bittest((long *)pvBitmap, iBit);
6205# elif RT_INLINE_ASM_GNU_STYLE
6206
6207 __asm__ __volatile__("btl %2, %1\n\t"
6208 "setc %b0\n\t"
6209 "andl $1, %0\n\t"
6210 : "=q" (rc.u32)
6211 : "m" (*(const volatile long *)pvBitmap),
6212 "Ir" (iBit)
6213 : "memory");
6214# else
6215 __asm
6216 {
6217 mov edx, [iBit]
6218# ifdef RT_ARCH_AMD64
6219 mov rax, [pvBitmap]
6220 bt [rax], edx
6221# else
6222 mov eax, [pvBitmap]
6223 bt [eax], edx
6224# endif
6225 setc al
6226 and eax, 1
6227 mov [rc.u32], eax
6228 }
6229# endif
6230 return rc.f;
6231}
6232#endif
6233
6234
6235/**
6236 * Clears a bit range within a bitmap.
6237 *
6238 * @param pvBitmap Pointer to the bitmap.
6239 * @param iBitStart The First bit to clear.
6240 * @param iBitEnd The first bit not to clear.
6241 */
6242DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6243{
6244 if (iBitStart < iBitEnd)
6245 {
6246 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6247 int iStart = iBitStart & ~31;
6248 int iEnd = iBitEnd & ~31;
6249 if (iStart == iEnd)
6250 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
6251 else
6252 {
6253 /* bits in first dword. */
6254 if (iBitStart & 31)
6255 {
6256 *pu32 &= (1 << (iBitStart & 31)) - 1;
6257 pu32++;
6258 iBitStart = iStart + 32;
6259 }
6260
6261 /* whole dword. */
6262 if (iBitStart != iEnd)
6263 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6264
6265 /* bits in last dword. */
6266 if (iBitEnd & 31)
6267 {
6268 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6269 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
6270 }
6271 }
6272 }
6273}
6274
6275
6276/**
6277 * Sets a bit range within a bitmap.
6278 *
6279 * @param pvBitmap Pointer to the bitmap.
6280 * @param iBitStart The First bit to set.
6281 * @param iBitEnd The first bit not to set.
6282 */
6283DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6284{
6285 if (iBitStart < iBitEnd)
6286 {
6287 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6288 int iStart = iBitStart & ~31;
6289 int iEnd = iBitEnd & ~31;
6290 if (iStart == iEnd)
6291 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
6292 else
6293 {
6294 /* bits in first dword. */
6295 if (iBitStart & 31)
6296 {
6297 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
6298 pu32++;
6299 iBitStart = iStart + 32;
6300 }
6301
6302 /* whole dword. */
6303 if (iBitStart != iEnd)
6304 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
6305
6306 /* bits in last dword. */
6307 if (iBitEnd & 31)
6308 {
6309 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6310 *pu32 |= (1 << (iBitEnd & 31)) - 1;
6311 }
6312 }
6313 }
6314}
6315
6316
6317/**
6318 * Finds the first clear bit in a bitmap.
6319 *
6320 * @returns Index of the first zero bit.
6321 * @returns -1 if no clear bit was found.
6322 * @param pvBitmap Pointer to the bitmap.
6323 * @param cBits The number of bits in the bitmap. Multiple of 32.
6324 */
6325#if RT_INLINE_ASM_EXTERNAL
6326DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
6327#else
6328DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
6329{
6330 if (cBits)
6331 {
6332 int32_t iBit;
6333# if RT_INLINE_ASM_GNU_STYLE
6334 RTCCUINTREG uEAX, uECX, uEDI;
6335 cBits = RT_ALIGN_32(cBits, 32);
6336 __asm__ __volatile__("repe; scasl\n\t"
6337 "je 1f\n\t"
6338# ifdef RT_ARCH_AMD64
6339 "lea -4(%%rdi), %%rdi\n\t"
6340 "xorl (%%rdi), %%eax\n\t"
6341 "subq %5, %%rdi\n\t"
6342# else
6343 "lea -4(%%edi), %%edi\n\t"
6344 "xorl (%%edi), %%eax\n\t"
6345 "subl %5, %%edi\n\t"
6346# endif
6347 "shll $3, %%edi\n\t"
6348 "bsfl %%eax, %%edx\n\t"
6349 "addl %%edi, %%edx\n\t"
6350 "1:\t\n"
6351 : "=d" (iBit),
6352 "=&c" (uECX),
6353 "=&D" (uEDI),
6354 "=&a" (uEAX)
6355 : "0" (0xffffffff),
6356 "mr" (pvBitmap),
6357 "1" (cBits >> 5),
6358 "2" (pvBitmap),
6359 "3" (0xffffffff));
6360# else
6361 cBits = RT_ALIGN_32(cBits, 32);
6362 __asm
6363 {
6364# ifdef RT_ARCH_AMD64
6365 mov rdi, [pvBitmap]
6366 mov rbx, rdi
6367# else
6368 mov edi, [pvBitmap]
6369 mov ebx, edi
6370# endif
6371 mov edx, 0ffffffffh
6372 mov eax, edx
6373 mov ecx, [cBits]
6374 shr ecx, 5
6375 repe scasd
6376 je done
6377
6378# ifdef RT_ARCH_AMD64
6379 lea rdi, [rdi - 4]
6380 xor eax, [rdi]
6381 sub rdi, rbx
6382# else
6383 lea edi, [edi - 4]
6384 xor eax, [edi]
6385 sub edi, ebx
6386# endif
6387 shl edi, 3
6388 bsf edx, eax
6389 add edx, edi
6390 done:
6391 mov [iBit], edx
6392 }
6393# endif
6394 return iBit;
6395 }
6396 return -1;
6397}
6398#endif
6399
6400
6401/**
6402 * Finds the next clear bit in a bitmap.
6403 *
6404 * @returns Index of the first zero bit.
6405 * @returns -1 if no clear bit was found.
6406 * @param pvBitmap Pointer to the bitmap.
6407 * @param cBits The number of bits in the bitmap. Multiple of 32.
6408 * @param iBitPrev The bit returned from the last search.
6409 * The search will start at iBitPrev + 1.
6410 */
6411#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6412DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6413#else
6414DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6415{
6416 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6417 int iBit = ++iBitPrev & 31;
6418 if (iBit)
6419 {
6420 /*
6421 * Inspect the 32-bit word containing the unaligned bit.
6422 */
6423 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6424
6425# if RT_INLINE_ASM_USES_INTRIN
6426 unsigned long ulBit = 0;
6427 if (_BitScanForward(&ulBit, u32))
6428 return ulBit + iBitPrev;
6429# else
6430# if RT_INLINE_ASM_GNU_STYLE
6431 __asm__ __volatile__("bsf %1, %0\n\t"
6432 "jnz 1f\n\t"
6433 "movl $-1, %0\n\t"
6434 "1:\n\t"
6435 : "=r" (iBit)
6436 : "r" (u32));
6437# else
6438 __asm
6439 {
6440 mov edx, [u32]
6441 bsf eax, edx
6442 jnz done
6443 mov eax, 0ffffffffh
6444 done:
6445 mov [iBit], eax
6446 }
6447# endif
6448 if (iBit >= 0)
6449 return iBit + iBitPrev;
6450# endif
6451
6452 /*
6453 * Skip ahead and see if there is anything left to search.
6454 */
6455 iBitPrev |= 31;
6456 iBitPrev++;
6457 if (cBits <= (uint32_t)iBitPrev)
6458 return -1;
6459 }
6460
6461 /*
6462 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6463 */
6464 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6465 if (iBit >= 0)
6466 iBit += iBitPrev;
6467 return iBit;
6468}
6469#endif
6470
6471
6472/**
6473 * Finds the first set bit in a bitmap.
6474 *
6475 * @returns Index of the first set bit.
6476 * @returns -1 if no clear bit was found.
6477 * @param pvBitmap Pointer to the bitmap.
6478 * @param cBits The number of bits in the bitmap. Multiple of 32.
6479 */
6480#if RT_INLINE_ASM_EXTERNAL
6481DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6482#else
6483DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6484{
6485 if (cBits)
6486 {
6487 int32_t iBit;
6488# if RT_INLINE_ASM_GNU_STYLE
6489 RTCCUINTREG uEAX, uECX, uEDI;
6490 cBits = RT_ALIGN_32(cBits, 32);
6491 __asm__ __volatile__("repe; scasl\n\t"
6492 "je 1f\n\t"
6493# ifdef RT_ARCH_AMD64
6494 "lea -4(%%rdi), %%rdi\n\t"
6495 "movl (%%rdi), %%eax\n\t"
6496 "subq %5, %%rdi\n\t"
6497# else
6498 "lea -4(%%edi), %%edi\n\t"
6499 "movl (%%edi), %%eax\n\t"
6500 "subl %5, %%edi\n\t"
6501# endif
6502 "shll $3, %%edi\n\t"
6503 "bsfl %%eax, %%edx\n\t"
6504 "addl %%edi, %%edx\n\t"
6505 "1:\t\n"
6506 : "=d" (iBit),
6507 "=&c" (uECX),
6508 "=&D" (uEDI),
6509 "=&a" (uEAX)
6510 : "0" (0xffffffff),
6511 "mr" (pvBitmap),
6512 "1" (cBits >> 5),
6513 "2" (pvBitmap),
6514 "3" (0));
6515# else
6516 cBits = RT_ALIGN_32(cBits, 32);
6517 __asm
6518 {
6519# ifdef RT_ARCH_AMD64
6520 mov rdi, [pvBitmap]
6521 mov rbx, rdi
6522# else
6523 mov edi, [pvBitmap]
6524 mov ebx, edi
6525# endif
6526 mov edx, 0ffffffffh
6527 xor eax, eax
6528 mov ecx, [cBits]
6529 shr ecx, 5
6530 repe scasd
6531 je done
6532# ifdef RT_ARCH_AMD64
6533 lea rdi, [rdi - 4]
6534 mov eax, [rdi]
6535 sub rdi, rbx
6536# else
6537 lea edi, [edi - 4]
6538 mov eax, [edi]
6539 sub edi, ebx
6540# endif
6541 shl edi, 3
6542 bsf edx, eax
6543 add edx, edi
6544 done:
6545 mov [iBit], edx
6546 }
6547# endif
6548 return iBit;
6549 }
6550 return -1;
6551}
6552#endif
6553
6554
6555/**
6556 * Finds the next set bit in a bitmap.
6557 *
6558 * @returns Index of the next set bit.
6559 * @returns -1 if no set bit was found.
6560 * @param pvBitmap Pointer to the bitmap.
6561 * @param cBits The number of bits in the bitmap. Multiple of 32.
6562 * @param iBitPrev The bit returned from the last search.
6563 * The search will start at iBitPrev + 1.
6564 */
6565#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6566DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6567#else
6568DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6569{
6570 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6571 int iBit = ++iBitPrev & 31;
6572 if (iBit)
6573 {
6574 /*
6575 * Inspect the 32-bit word containing the unaligned bit.
6576 */
6577 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6578
6579# if RT_INLINE_ASM_USES_INTRIN
6580 unsigned long ulBit = 0;
6581 if (_BitScanForward(&ulBit, u32))
6582 return ulBit + iBitPrev;
6583# else
6584# if RT_INLINE_ASM_GNU_STYLE
6585 __asm__ __volatile__("bsf %1, %0\n\t"
6586 "jnz 1f\n\t"
6587 "movl $-1, %0\n\t"
6588 "1:\n\t"
6589 : "=r" (iBit)
6590 : "r" (u32));
6591# else
6592 __asm
6593 {
6594 mov edx, [u32]
6595 bsf eax, edx
6596 jnz done
6597 mov eax, 0ffffffffh
6598 done:
6599 mov [iBit], eax
6600 }
6601# endif
6602 if (iBit >= 0)
6603 return iBit + iBitPrev;
6604# endif
6605
6606 /*
6607 * Skip ahead and see if there is anything left to search.
6608 */
6609 iBitPrev |= 31;
6610 iBitPrev++;
6611 if (cBits <= (uint32_t)iBitPrev)
6612 return -1;
6613 }
6614
6615 /*
6616 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6617 */
6618 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6619 if (iBit >= 0)
6620 iBit += iBitPrev;
6621 return iBit;
6622}
6623#endif
6624
6625
6626/**
6627 * Finds the first bit which is set in the given 32-bit integer.
6628 * Bits are numbered from 1 (least significant) to 32.
6629 *
6630 * @returns index [1..32] of the first set bit.
6631 * @returns 0 if all bits are cleared.
6632 * @param u32 Integer to search for set bits.
6633 * @remark Similar to ffs() in BSD.
6634 */
6635DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6636{
6637# if RT_INLINE_ASM_USES_INTRIN
6638 unsigned long iBit;
6639 if (_BitScanForward(&iBit, u32))
6640 iBit++;
6641 else
6642 iBit = 0;
6643# elif RT_INLINE_ASM_GNU_STYLE
6644 uint32_t iBit;
6645 __asm__ __volatile__("bsf %1, %0\n\t"
6646 "jnz 1f\n\t"
6647 "xorl %0, %0\n\t"
6648 "jmp 2f\n"
6649 "1:\n\t"
6650 "incl %0\n"
6651 "2:\n\t"
6652 : "=r" (iBit)
6653 : "rm" (u32));
6654# else
6655 uint32_t iBit;
6656 _asm
6657 {
6658 bsf eax, [u32]
6659 jnz found
6660 xor eax, eax
6661 jmp done
6662 found:
6663 inc eax
6664 done:
6665 mov [iBit], eax
6666 }
6667# endif
6668 return iBit;
6669}
6670
6671
6672/**
6673 * Finds the first bit which is set in the given 32-bit integer.
6674 * Bits are numbered from 1 (least significant) to 32.
6675 *
6676 * @returns index [1..32] of the first set bit.
6677 * @returns 0 if all bits are cleared.
6678 * @param i32 Integer to search for set bits.
6679 * @remark Similar to ffs() in BSD.
6680 */
6681DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6682{
6683 return ASMBitFirstSetU32((uint32_t)i32);
6684}
6685
6686
6687/**
6688 * Finds the last bit which is set in the given 32-bit integer.
6689 * Bits are numbered from 1 (least significant) to 32.
6690 *
6691 * @returns index [1..32] of the last set bit.
6692 * @returns 0 if all bits are cleared.
6693 * @param u32 Integer to search for set bits.
6694 * @remark Similar to fls() in BSD.
6695 */
6696DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6697{
6698# if RT_INLINE_ASM_USES_INTRIN
6699 unsigned long iBit;
6700 if (_BitScanReverse(&iBit, u32))
6701 iBit++;
6702 else
6703 iBit = 0;
6704# elif RT_INLINE_ASM_GNU_STYLE
6705 uint32_t iBit;
6706 __asm__ __volatile__("bsrl %1, %0\n\t"
6707 "jnz 1f\n\t"
6708 "xorl %0, %0\n\t"
6709 "jmp 2f\n"
6710 "1:\n\t"
6711 "incl %0\n"
6712 "2:\n\t"
6713 : "=r" (iBit)
6714 : "rm" (u32));
6715# else
6716 uint32_t iBit;
6717 _asm
6718 {
6719 bsr eax, [u32]
6720 jnz found
6721 xor eax, eax
6722 jmp done
6723 found:
6724 inc eax
6725 done:
6726 mov [iBit], eax
6727 }
6728# endif
6729 return iBit;
6730}
6731
6732
6733/**
6734 * Finds the last bit which is set in the given 32-bit integer.
6735 * Bits are numbered from 1 (least significant) to 32.
6736 *
6737 * @returns index [1..32] of the last set bit.
6738 * @returns 0 if all bits are cleared.
6739 * @param i32 Integer to search for set bits.
6740 * @remark Similar to fls() in BSD.
6741 */
6742DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6743{
6744 return ASMBitLastSetU32((uint32_t)i32);
6745}
6746
6747/**
6748 * Reverse the byte order of the given 16-bit integer.
6749 *
6750 * @returns Revert
6751 * @param u16 16-bit integer value.
6752 */
6753DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6754{
6755#if RT_INLINE_ASM_USES_INTRIN
6756 u16 = _byteswap_ushort(u16);
6757#elif RT_INLINE_ASM_GNU_STYLE
6758 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6759#else
6760 _asm
6761 {
6762 mov ax, [u16]
6763 ror ax, 8
6764 mov [u16], ax
6765 }
6766#endif
6767 return u16;
6768}
6769
6770/**
6771 * Reverse the byte order of the given 32-bit integer.
6772 *
6773 * @returns Revert
6774 * @param u32 32-bit integer value.
6775 */
6776DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6777{
6778#if RT_INLINE_ASM_USES_INTRIN
6779 u32 = _byteswap_ulong(u32);
6780#elif RT_INLINE_ASM_GNU_STYLE
6781 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6782#else
6783 _asm
6784 {
6785 mov eax, [u32]
6786 bswap eax
6787 mov [u32], eax
6788 }
6789#endif
6790 return u32;
6791}
6792
6793
6794/**
6795 * Reverse the byte order of the given 64-bit integer.
6796 *
6797 * @returns Revert
6798 * @param u64 64-bit integer value.
6799 */
6800DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6801{
6802#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6803 u64 = _byteswap_uint64(u64);
6804#else
6805 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6806 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6807#endif
6808 return u64;
6809}
6810
6811
6812/** @} */
6813
6814
6815/** @} */
6816#endif
6817
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette