VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 19531

Last change on this file since 19531 was 19524, checked in by vboxsync, 16 years ago

iprt/asm.h: try work around gcc 4.3.3/x86 issues in ASMAtomicCmpXchgU64. (untested)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 164.0 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__wbinvd)
73# pragma intrinsic(__stosd)
74# pragma intrinsic(__stosw)
75# pragma intrinsic(__stosb)
76# pragma intrinsic(__readcr0)
77# pragma intrinsic(__readcr2)
78# pragma intrinsic(__readcr3)
79# pragma intrinsic(__readcr4)
80# pragma intrinsic(__writecr0)
81# pragma intrinsic(__writecr3)
82# pragma intrinsic(__writecr4)
83# pragma intrinsic(__readdr)
84# pragma intrinsic(__writedr)
85# pragma intrinsic(_BitScanForward)
86# pragma intrinsic(_BitScanReverse)
87# pragma intrinsic(_bittest)
88# pragma intrinsic(_bittestandset)
89# pragma intrinsic(_bittestandreset)
90# pragma intrinsic(_bittestandcomplement)
91# pragma intrinsic(_byteswap_ushort)
92# pragma intrinsic(_byteswap_ulong)
93# pragma intrinsic(_interlockedbittestandset)
94# pragma intrinsic(_interlockedbittestandreset)
95# pragma intrinsic(_InterlockedAnd)
96# pragma intrinsic(_InterlockedOr)
97# pragma intrinsic(_InterlockedIncrement)
98# pragma intrinsic(_InterlockedDecrement)
99# pragma intrinsic(_InterlockedExchange)
100# pragma intrinsic(_InterlockedExchangeAdd)
101# pragma intrinsic(_InterlockedCompareExchange)
102# pragma intrinsic(_InterlockedCompareExchange64)
103# ifdef RT_ARCH_AMD64
104# pragma intrinsic(__stosq)
105# pragma intrinsic(__readcr8)
106# pragma intrinsic(__writecr8)
107# pragma intrinsic(_byteswap_uint64)
108# pragma intrinsic(_InterlockedExchange64)
109# endif
110# endif
111#endif
112#ifndef RT_INLINE_ASM_USES_INTRIN
113# define RT_INLINE_ASM_USES_INTRIN 0
114#endif
115
116/** @def RT_INLINE_ASM_GCC_4_3_3_X86
117 * Used to work around some 4.3.3 register allocation issues in this version
118 * of the compiler. */
119#ifdef __GNUC__
120# define RT_INLINE_ASM_GCC_4_3_3_X86 (__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 3 && defined(__i386__))
121#endif
122#ifndef RT_INLINE_ASM_GCC_4_3_3_X86
123# define RT_INLINE_ASM_GCC_4_3_3_X86 0
124#endif
125
126
127
128/** @defgroup grp_asm ASM - Assembly Routines
129 * @ingroup grp_rt
130 *
131 * @remarks The difference between ordered and unordered atomic operations are that
132 * the former will complete outstanding reads and writes before continuing
133 * while the latter doesn't make any promisses about the order. Ordered
134 * operations doesn't, it seems, make any 100% promise wrt to whether
135 * the operation will complete before any subsequent memory access.
136 * (please, correct if wrong.)
137 *
138 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
139 * are unordered (note the Uo).
140 *
141 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
142 * or even optimize assembler instructions away. For instance, in the following code
143 * the second rdmsr instruction is optimized away because gcc treats that instruction
144 * as deterministic:
145 *
146 * @code
147 * static inline uint64_t rdmsr_low(int idx)
148 * {
149 * uint32_t low;
150 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
151 * }
152 * ...
153 * uint32_t msr1 = rdmsr_low(1);
154 * foo(msr1);
155 * msr1 = rdmsr_low(1);
156 * bar(msr1);
157 * @endcode
158 *
159 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
160 * use the result of the first call as input parameter for bar() as well. For rdmsr this
161 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
162 * machine status information in general.
163 *
164 * @{
165 */
166
167/** @def RT_INLINE_ASM_EXTERNAL
168 * Defined as 1 if the compiler does not support inline assembly.
169 * The ASM* functions will then be implemented in an external .asm file.
170 *
171 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
172 * inline assembly in their AMD64 compiler.
173 */
174#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
175# define RT_INLINE_ASM_EXTERNAL 1
176#else
177# define RT_INLINE_ASM_EXTERNAL 0
178#endif
179
180/** @def RT_INLINE_ASM_GNU_STYLE
181 * Defined as 1 if the compiler understands GNU style inline assembly.
182 */
183#if defined(_MSC_VER)
184# define RT_INLINE_ASM_GNU_STYLE 0
185#else
186# define RT_INLINE_ASM_GNU_STYLE 1
187#endif
188
189
190/** @todo find a more proper place for this structure? */
191#pragma pack(1)
192/** IDTR */
193typedef struct RTIDTR
194{
195 /** Size of the IDT. */
196 uint16_t cbIdt;
197 /** Address of the IDT. */
198 uintptr_t pIdt;
199} RTIDTR, *PRTIDTR;
200#pragma pack()
201
202#pragma pack(1)
203/** GDTR */
204typedef struct RTGDTR
205{
206 /** Size of the GDT. */
207 uint16_t cbGdt;
208 /** Address of the GDT. */
209 uintptr_t pGdt;
210} RTGDTR, *PRTGDTR;
211#pragma pack()
212
213
214/** @def ASMReturnAddress
215 * Gets the return address of the current (or calling if you like) function or method.
216 */
217#ifdef _MSC_VER
218# ifdef __cplusplus
219extern "C"
220# endif
221void * _ReturnAddress(void);
222# pragma intrinsic(_ReturnAddress)
223# define ASMReturnAddress() _ReturnAddress()
224#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
225# define ASMReturnAddress() __builtin_return_address(0)
226#else
227# error "Unsupported compiler."
228#endif
229
230
231/**
232 * Gets the content of the IDTR CPU register.
233 * @param pIdtr Where to store the IDTR contents.
234 */
235#if RT_INLINE_ASM_EXTERNAL
236DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
237#else
238DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
239{
240# if RT_INLINE_ASM_GNU_STYLE
241 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
242# else
243 __asm
244 {
245# ifdef RT_ARCH_AMD64
246 mov rax, [pIdtr]
247 sidt [rax]
248# else
249 mov eax, [pIdtr]
250 sidt [eax]
251# endif
252 }
253# endif
254}
255#endif
256
257
258/**
259 * Sets the content of the IDTR CPU register.
260 * @param pIdtr Where to load the IDTR contents from
261 */
262#if RT_INLINE_ASM_EXTERNAL
263DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
264#else
265DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
266{
267# if RT_INLINE_ASM_GNU_STYLE
268 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
269# else
270 __asm
271 {
272# ifdef RT_ARCH_AMD64
273 mov rax, [pIdtr]
274 lidt [rax]
275# else
276 mov eax, [pIdtr]
277 lidt [eax]
278# endif
279 }
280# endif
281}
282#endif
283
284
285/**
286 * Gets the content of the GDTR CPU register.
287 * @param pGdtr Where to store the GDTR contents.
288 */
289#if RT_INLINE_ASM_EXTERNAL
290DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
291#else
292DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
293{
294# if RT_INLINE_ASM_GNU_STYLE
295 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
296# else
297 __asm
298 {
299# ifdef RT_ARCH_AMD64
300 mov rax, [pGdtr]
301 sgdt [rax]
302# else
303 mov eax, [pGdtr]
304 sgdt [eax]
305# endif
306 }
307# endif
308}
309#endif
310
311/**
312 * Get the cs register.
313 * @returns cs.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetCS(void);
317#else
318DECLINLINE(RTSEL) ASMGetCS(void)
319{
320 RTSEL SelCS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
323# else
324 __asm
325 {
326 mov ax, cs
327 mov [SelCS], ax
328 }
329# endif
330 return SelCS;
331}
332#endif
333
334
335/**
336 * Get the DS register.
337 * @returns DS.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetDS(void);
341#else
342DECLINLINE(RTSEL) ASMGetDS(void)
343{
344 RTSEL SelDS;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
347# else
348 __asm
349 {
350 mov ax, ds
351 mov [SelDS], ax
352 }
353# endif
354 return SelDS;
355}
356#endif
357
358
359/**
360 * Get the ES register.
361 * @returns ES.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetES(void);
365#else
366DECLINLINE(RTSEL) ASMGetES(void)
367{
368 RTSEL SelES;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
371# else
372 __asm
373 {
374 mov ax, es
375 mov [SelES], ax
376 }
377# endif
378 return SelES;
379}
380#endif
381
382
383/**
384 * Get the FS register.
385 * @returns FS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetFS(void);
389#else
390DECLINLINE(RTSEL) ASMGetFS(void)
391{
392 RTSEL SelFS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
395# else
396 __asm
397 {
398 mov ax, fs
399 mov [SelFS], ax
400 }
401# endif
402 return SelFS;
403}
404# endif
405
406
407/**
408 * Get the GS register.
409 * @returns GS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetGS(void);
413#else
414DECLINLINE(RTSEL) ASMGetGS(void)
415{
416 RTSEL SelGS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
419# else
420 __asm
421 {
422 mov ax, gs
423 mov [SelGS], ax
424 }
425# endif
426 return SelGS;
427}
428#endif
429
430
431/**
432 * Get the SS register.
433 * @returns SS.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetSS(void);
437#else
438DECLINLINE(RTSEL) ASMGetSS(void)
439{
440 RTSEL SelSS;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
443# else
444 __asm
445 {
446 mov ax, ss
447 mov [SelSS], ax
448 }
449# endif
450 return SelSS;
451}
452#endif
453
454
455/**
456 * Get the TR register.
457 * @returns TR.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTSEL) ASMGetTR(void);
461#else
462DECLINLINE(RTSEL) ASMGetTR(void)
463{
464 RTSEL SelTR;
465# if RT_INLINE_ASM_GNU_STYLE
466 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
467# else
468 __asm
469 {
470 str ax
471 mov [SelTR], ax
472 }
473# endif
474 return SelTR;
475}
476#endif
477
478
479/**
480 * Get the [RE]FLAGS register.
481 * @returns [RE]FLAGS.
482 */
483#if RT_INLINE_ASM_EXTERNAL
484DECLASM(RTCCUINTREG) ASMGetFlags(void);
485#else
486DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
487{
488 RTCCUINTREG uFlags;
489# if RT_INLINE_ASM_GNU_STYLE
490# ifdef RT_ARCH_AMD64
491 __asm__ __volatile__("pushfq\n\t"
492 "popq %0\n\t"
493 : "=g" (uFlags));
494# else
495 __asm__ __volatile__("pushfl\n\t"
496 "popl %0\n\t"
497 : "=g" (uFlags));
498# endif
499# else
500 __asm
501 {
502# ifdef RT_ARCH_AMD64
503 pushfq
504 pop [uFlags]
505# else
506 pushfd
507 pop [uFlags]
508# endif
509 }
510# endif
511 return uFlags;
512}
513#endif
514
515
516/**
517 * Set the [RE]FLAGS register.
518 * @param uFlags The new [RE]FLAGS value.
519 */
520#if RT_INLINE_ASM_EXTERNAL
521DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
522#else
523DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
524{
525# if RT_INLINE_ASM_GNU_STYLE
526# ifdef RT_ARCH_AMD64
527 __asm__ __volatile__("pushq %0\n\t"
528 "popfq\n\t"
529 : : "g" (uFlags));
530# else
531 __asm__ __volatile__("pushl %0\n\t"
532 "popfl\n\t"
533 : : "g" (uFlags));
534# endif
535# else
536 __asm
537 {
538# ifdef RT_ARCH_AMD64
539 push [uFlags]
540 popfq
541# else
542 push [uFlags]
543 popfd
544# endif
545 }
546# endif
547}
548#endif
549
550
551/**
552 * Gets the content of the CPU timestamp counter register.
553 *
554 * @returns TSC.
555 */
556#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
557DECLASM(uint64_t) ASMReadTSC(void);
558#else
559DECLINLINE(uint64_t) ASMReadTSC(void)
560{
561 RTUINT64U u;
562# if RT_INLINE_ASM_GNU_STYLE
563 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
564# else
565# if RT_INLINE_ASM_USES_INTRIN
566 u.u = __rdtsc();
567# else
568 __asm
569 {
570 rdtsc
571 mov [u.s.Lo], eax
572 mov [u.s.Hi], edx
573 }
574# endif
575# endif
576 return u.u;
577}
578#endif
579
580
581/**
582 * Performs the cpuid instruction returning all registers.
583 *
584 * @param uOperator CPUID operation (eax).
585 * @param pvEAX Where to store eax.
586 * @param pvEBX Where to store ebx.
587 * @param pvECX Where to store ecx.
588 * @param pvEDX Where to store edx.
589 * @remark We're using void pointers to ease the use of special bitfield structures and such.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
593#else
594DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
595{
596# if RT_INLINE_ASM_GNU_STYLE
597# ifdef RT_ARCH_AMD64
598 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
599 __asm__ ("cpuid\n\t"
600 : "=a" (uRAX),
601 "=b" (uRBX),
602 "=c" (uRCX),
603 "=d" (uRDX)
604 : "0" (uOperator));
605 *(uint32_t *)pvEAX = (uint32_t)uRAX;
606 *(uint32_t *)pvEBX = (uint32_t)uRBX;
607 *(uint32_t *)pvECX = (uint32_t)uRCX;
608 *(uint32_t *)pvEDX = (uint32_t)uRDX;
609# else
610 __asm__ ("xchgl %%ebx, %1\n\t"
611 "cpuid\n\t"
612 "xchgl %%ebx, %1\n\t"
613 : "=a" (*(uint32_t *)pvEAX),
614 "=r" (*(uint32_t *)pvEBX),
615 "=c" (*(uint32_t *)pvECX),
616 "=d" (*(uint32_t *)pvEDX)
617 : "0" (uOperator));
618# endif
619
620# elif RT_INLINE_ASM_USES_INTRIN
621 int aInfo[4];
622 __cpuid(aInfo, uOperator);
623 *(uint32_t *)pvEAX = aInfo[0];
624 *(uint32_t *)pvEBX = aInfo[1];
625 *(uint32_t *)pvECX = aInfo[2];
626 *(uint32_t *)pvEDX = aInfo[3];
627
628# else
629 uint32_t uEAX;
630 uint32_t uEBX;
631 uint32_t uECX;
632 uint32_t uEDX;
633 __asm
634 {
635 push ebx
636 mov eax, [uOperator]
637 cpuid
638 mov [uEAX], eax
639 mov [uEBX], ebx
640 mov [uECX], ecx
641 mov [uEDX], edx
642 pop ebx
643 }
644 *(uint32_t *)pvEAX = uEAX;
645 *(uint32_t *)pvEBX = uEBX;
646 *(uint32_t *)pvECX = uECX;
647 *(uint32_t *)pvEDX = uEDX;
648# endif
649}
650#endif
651
652
653/**
654 * Performs the cpuid instruction returning all registers.
655 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
656 *
657 * @param uOperator CPUID operation (eax).
658 * @param uIdxECX ecx index
659 * @param pvEAX Where to store eax.
660 * @param pvEBX Where to store ebx.
661 * @param pvECX Where to store ecx.
662 * @param pvEDX Where to store edx.
663 * @remark We're using void pointers to ease the use of special bitfield structures and such.
664 */
665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
666DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
667#else
668DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
669{
670# if RT_INLINE_ASM_GNU_STYLE
671# ifdef RT_ARCH_AMD64
672 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
673 __asm__ ("cpuid\n\t"
674 : "=a" (uRAX),
675 "=b" (uRBX),
676 "=c" (uRCX),
677 "=d" (uRDX)
678 : "0" (uOperator),
679 "2" (uIdxECX));
680 *(uint32_t *)pvEAX = (uint32_t)uRAX;
681 *(uint32_t *)pvEBX = (uint32_t)uRBX;
682 *(uint32_t *)pvECX = (uint32_t)uRCX;
683 *(uint32_t *)pvEDX = (uint32_t)uRDX;
684# else
685 __asm__ ("xchgl %%ebx, %1\n\t"
686 "cpuid\n\t"
687 "xchgl %%ebx, %1\n\t"
688 : "=a" (*(uint32_t *)pvEAX),
689 "=r" (*(uint32_t *)pvEBX),
690 "=c" (*(uint32_t *)pvECX),
691 "=d" (*(uint32_t *)pvEDX)
692 : "0" (uOperator),
693 "2" (uIdxECX));
694# endif
695
696# elif RT_INLINE_ASM_USES_INTRIN
697 int aInfo[4];
698 /* ??? another intrinsic ??? */
699 __cpuid(aInfo, uOperator);
700 *(uint32_t *)pvEAX = aInfo[0];
701 *(uint32_t *)pvEBX = aInfo[1];
702 *(uint32_t *)pvECX = aInfo[2];
703 *(uint32_t *)pvEDX = aInfo[3];
704
705# else
706 uint32_t uEAX;
707 uint32_t uEBX;
708 uint32_t uECX;
709 uint32_t uEDX;
710 __asm
711 {
712 push ebx
713 mov eax, [uOperator]
714 mov ecx, [uIdxECX]
715 cpuid
716 mov [uEAX], eax
717 mov [uEBX], ebx
718 mov [uECX], ecx
719 mov [uEDX], edx
720 pop ebx
721 }
722 *(uint32_t *)pvEAX = uEAX;
723 *(uint32_t *)pvEBX = uEBX;
724 *(uint32_t *)pvECX = uECX;
725 *(uint32_t *)pvEDX = uEDX;
726# endif
727}
728#endif
729
730
731/**
732 * Performs the cpuid instruction returning ecx and edx.
733 *
734 * @param uOperator CPUID operation (eax).
735 * @param pvECX Where to store ecx.
736 * @param pvEDX Where to store edx.
737 * @remark We're using void pointers to ease the use of special bitfield structures and such.
738 */
739#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
740DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
741#else
742DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
743{
744 uint32_t uEBX;
745 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
746}
747#endif
748
749
750/**
751 * Performs the cpuid instruction returning edx.
752 *
753 * @param uOperator CPUID operation (eax).
754 * @returns EDX after cpuid operation.
755 */
756#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
757DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
758#else
759DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
760{
761 RTCCUINTREG xDX;
762# if RT_INLINE_ASM_GNU_STYLE
763# ifdef RT_ARCH_AMD64
764 RTCCUINTREG uSpill;
765 __asm__ ("cpuid"
766 : "=a" (uSpill),
767 "=d" (xDX)
768 : "0" (uOperator)
769 : "rbx", "rcx");
770# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
771 __asm__ ("push %%ebx\n\t"
772 "cpuid\n\t"
773 "pop %%ebx\n\t"
774 : "=a" (uOperator),
775 "=d" (xDX)
776 : "0" (uOperator)
777 : "ecx");
778# else
779 __asm__ ("cpuid"
780 : "=a" (uOperator),
781 "=d" (xDX)
782 : "0" (uOperator)
783 : "ebx", "ecx");
784# endif
785
786# elif RT_INLINE_ASM_USES_INTRIN
787 int aInfo[4];
788 __cpuid(aInfo, uOperator);
789 xDX = aInfo[3];
790
791# else
792 __asm
793 {
794 push ebx
795 mov eax, [uOperator]
796 cpuid
797 mov [xDX], edx
798 pop ebx
799 }
800# endif
801 return (uint32_t)xDX;
802}
803#endif
804
805
806/**
807 * Performs the cpuid instruction returning ecx.
808 *
809 * @param uOperator CPUID operation (eax).
810 * @returns ECX after cpuid operation.
811 */
812#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
813DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
814#else
815DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
816{
817 RTCCUINTREG xCX;
818# if RT_INLINE_ASM_GNU_STYLE
819# ifdef RT_ARCH_AMD64
820 RTCCUINTREG uSpill;
821 __asm__ ("cpuid"
822 : "=a" (uSpill),
823 "=c" (xCX)
824 : "0" (uOperator)
825 : "rbx", "rdx");
826# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
827 __asm__ ("push %%ebx\n\t"
828 "cpuid\n\t"
829 "pop %%ebx\n\t"
830 : "=a" (uOperator),
831 "=c" (xCX)
832 : "0" (uOperator)
833 : "edx");
834# else
835 __asm__ ("cpuid"
836 : "=a" (uOperator),
837 "=c" (xCX)
838 : "0" (uOperator)
839 : "ebx", "edx");
840
841# endif
842
843# elif RT_INLINE_ASM_USES_INTRIN
844 int aInfo[4];
845 __cpuid(aInfo, uOperator);
846 xCX = aInfo[2];
847
848# else
849 __asm
850 {
851 push ebx
852 mov eax, [uOperator]
853 cpuid
854 mov [xCX], ecx
855 pop ebx
856 }
857# endif
858 return (uint32_t)xCX;
859}
860#endif
861
862
863/**
864 * Checks if the current CPU supports CPUID.
865 *
866 * @returns true if CPUID is supported.
867 */
868DECLINLINE(bool) ASMHasCpuId(void)
869{
870#ifdef RT_ARCH_AMD64
871 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
872#else /* !RT_ARCH_AMD64 */
873 bool fRet = false;
874# if RT_INLINE_ASM_GNU_STYLE
875 uint32_t u1;
876 uint32_t u2;
877 __asm__ ("pushf\n\t"
878 "pop %1\n\t"
879 "mov %1, %2\n\t"
880 "xorl $0x200000, %1\n\t"
881 "push %1\n\t"
882 "popf\n\t"
883 "pushf\n\t"
884 "pop %1\n\t"
885 "cmpl %1, %2\n\t"
886 "setne %0\n\t"
887 "push %2\n\t"
888 "popf\n\t"
889 : "=m" (fRet), "=r" (u1), "=r" (u2));
890# else
891 __asm
892 {
893 pushfd
894 pop eax
895 mov ebx, eax
896 xor eax, 0200000h
897 push eax
898 popfd
899 pushfd
900 pop eax
901 cmp eax, ebx
902 setne fRet
903 push ebx
904 popfd
905 }
906# endif
907 return fRet;
908#endif /* !RT_ARCH_AMD64 */
909}
910
911
912/**
913 * Gets the APIC ID of the current CPU.
914 *
915 * @returns the APIC ID.
916 */
917#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
918DECLASM(uint8_t) ASMGetApicId(void);
919#else
920DECLINLINE(uint8_t) ASMGetApicId(void)
921{
922 RTCCUINTREG xBX;
923# if RT_INLINE_ASM_GNU_STYLE
924# ifdef RT_ARCH_AMD64
925 RTCCUINTREG uSpill;
926 __asm__ ("cpuid"
927 : "=a" (uSpill),
928 "=b" (xBX)
929 : "0" (1)
930 : "rcx", "rdx");
931# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
932 RTCCUINTREG uSpill;
933 __asm__ ("mov %%ebx,%1\n\t"
934 "cpuid\n\t"
935 "xchgl %%ebx,%1\n\t"
936 : "=a" (uSpill),
937 "=r" (xBX)
938 : "0" (1)
939 : "ecx", "edx");
940# else
941 RTCCUINTREG uSpill;
942 __asm__ ("cpuid"
943 : "=a" (uSpill),
944 "=b" (xBX)
945 : "0" (1)
946 : "ecx", "edx");
947# endif
948
949# elif RT_INLINE_ASM_USES_INTRIN
950 int aInfo[4];
951 __cpuid(aInfo, 1);
952 xBX = aInfo[1];
953
954# else
955 __asm
956 {
957 push ebx
958 mov eax, 1
959 cpuid
960 mov [xBX], ebx
961 pop ebx
962 }
963# endif
964 return (uint8_t)(xBX >> 24);
965}
966#endif
967
968
969/**
970 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
971 *
972 * @returns true/false.
973 * @param uEBX EBX return from ASMCpuId(0)
974 * @param uECX ECX return from ASMCpuId(0)
975 * @param uEDX EDX return from ASMCpuId(0)
976 */
977DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
978{
979 return uEBX == 0x756e6547
980 && uECX == 0x6c65746e
981 && uEDX == 0x49656e69;
982}
983
984
985/**
986 * Tests if this is an genuin Intel CPU.
987 *
988 * @returns true/false.
989 */
990DECLINLINE(bool) ASMIsIntelCpu(void)
991{
992 uint32_t uEAX, uEBX, uECX, uEDX;
993 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
994 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
995}
996
997
998/**
999 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
1000 *
1001 * @returns Family.
1002 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
1003 */
1004DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
1005{
1006 return ((uEAX >> 8) & 0xf) == 0xf
1007 ? ((uEAX >> 20) & 0x7f) + 0xf
1008 : ((uEAX >> 8) & 0xf);
1009}
1010
1011
1012/**
1013 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1014 *
1015 * @returns Model.
1016 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1017 * @param fIntel Whether it's an intel CPU.
1018 */
1019DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1020{
1021 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1022 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1023 : ((uEAX >> 4) & 0xf);
1024}
1025
1026
1027/**
1028 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1029 *
1030 * @returns Model.
1031 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1032 * @param fIntel Whether it's an intel CPU.
1033 */
1034DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1035{
1036 return ((uEAX >> 8) & 0xf) == 0xf
1037 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1038 : ((uEAX >> 4) & 0xf);
1039}
1040
1041
1042/**
1043 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1044 *
1045 * @returns Model.
1046 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1047 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1048 */
1049DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1050{
1051 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1052 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1053 : ((uEAX >> 4) & 0xf);
1054}
1055
1056
1057/**
1058 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1059 *
1060 * @returns Model.
1061 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1062 */
1063DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1064{
1065 return uEAX & 0xf;
1066}
1067
1068
1069/**
1070 * Get cr0.
1071 * @returns cr0.
1072 */
1073#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1074DECLASM(RTCCUINTREG) ASMGetCR0(void);
1075#else
1076DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1077{
1078 RTCCUINTREG uCR0;
1079# if RT_INLINE_ASM_USES_INTRIN
1080 uCR0 = __readcr0();
1081
1082# elif RT_INLINE_ASM_GNU_STYLE
1083# ifdef RT_ARCH_AMD64
1084 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1085# else
1086 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1087# endif
1088# else
1089 __asm
1090 {
1091# ifdef RT_ARCH_AMD64
1092 mov rax, cr0
1093 mov [uCR0], rax
1094# else
1095 mov eax, cr0
1096 mov [uCR0], eax
1097# endif
1098 }
1099# endif
1100 return uCR0;
1101}
1102#endif
1103
1104
1105/**
1106 * Sets the CR0 register.
1107 * @param uCR0 The new CR0 value.
1108 */
1109#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1110DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1111#else
1112DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1113{
1114# if RT_INLINE_ASM_USES_INTRIN
1115 __writecr0(uCR0);
1116
1117# elif RT_INLINE_ASM_GNU_STYLE
1118# ifdef RT_ARCH_AMD64
1119 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1120# else
1121 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1122# endif
1123# else
1124 __asm
1125 {
1126# ifdef RT_ARCH_AMD64
1127 mov rax, [uCR0]
1128 mov cr0, rax
1129# else
1130 mov eax, [uCR0]
1131 mov cr0, eax
1132# endif
1133 }
1134# endif
1135}
1136#endif
1137
1138
1139/**
1140 * Get cr2.
1141 * @returns cr2.
1142 */
1143#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1144DECLASM(RTCCUINTREG) ASMGetCR2(void);
1145#else
1146DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1147{
1148 RTCCUINTREG uCR2;
1149# if RT_INLINE_ASM_USES_INTRIN
1150 uCR2 = __readcr2();
1151
1152# elif RT_INLINE_ASM_GNU_STYLE
1153# ifdef RT_ARCH_AMD64
1154 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1155# else
1156 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1157# endif
1158# else
1159 __asm
1160 {
1161# ifdef RT_ARCH_AMD64
1162 mov rax, cr2
1163 mov [uCR2], rax
1164# else
1165 mov eax, cr2
1166 mov [uCR2], eax
1167# endif
1168 }
1169# endif
1170 return uCR2;
1171}
1172#endif
1173
1174
1175/**
1176 * Sets the CR2 register.
1177 * @param uCR2 The new CR0 value.
1178 */
1179#if RT_INLINE_ASM_EXTERNAL
1180DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1181#else
1182DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1183{
1184# if RT_INLINE_ASM_GNU_STYLE
1185# ifdef RT_ARCH_AMD64
1186 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1187# else
1188 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1189# endif
1190# else
1191 __asm
1192 {
1193# ifdef RT_ARCH_AMD64
1194 mov rax, [uCR2]
1195 mov cr2, rax
1196# else
1197 mov eax, [uCR2]
1198 mov cr2, eax
1199# endif
1200 }
1201# endif
1202}
1203#endif
1204
1205
1206/**
1207 * Get cr3.
1208 * @returns cr3.
1209 */
1210#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1211DECLASM(RTCCUINTREG) ASMGetCR3(void);
1212#else
1213DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1214{
1215 RTCCUINTREG uCR3;
1216# if RT_INLINE_ASM_USES_INTRIN
1217 uCR3 = __readcr3();
1218
1219# elif RT_INLINE_ASM_GNU_STYLE
1220# ifdef RT_ARCH_AMD64
1221 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1222# else
1223 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1224# endif
1225# else
1226 __asm
1227 {
1228# ifdef RT_ARCH_AMD64
1229 mov rax, cr3
1230 mov [uCR3], rax
1231# else
1232 mov eax, cr3
1233 mov [uCR3], eax
1234# endif
1235 }
1236# endif
1237 return uCR3;
1238}
1239#endif
1240
1241
1242/**
1243 * Sets the CR3 register.
1244 *
1245 * @param uCR3 New CR3 value.
1246 */
1247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1248DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1249#else
1250DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1251{
1252# if RT_INLINE_ASM_USES_INTRIN
1253 __writecr3(uCR3);
1254
1255# elif RT_INLINE_ASM_GNU_STYLE
1256# ifdef RT_ARCH_AMD64
1257 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1258# else
1259 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1260# endif
1261# else
1262 __asm
1263 {
1264# ifdef RT_ARCH_AMD64
1265 mov rax, [uCR3]
1266 mov cr3, rax
1267# else
1268 mov eax, [uCR3]
1269 mov cr3, eax
1270# endif
1271 }
1272# endif
1273}
1274#endif
1275
1276
1277/**
1278 * Reloads the CR3 register.
1279 */
1280#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1281DECLASM(void) ASMReloadCR3(void);
1282#else
1283DECLINLINE(void) ASMReloadCR3(void)
1284{
1285# if RT_INLINE_ASM_USES_INTRIN
1286 __writecr3(__readcr3());
1287
1288# elif RT_INLINE_ASM_GNU_STYLE
1289 RTCCUINTREG u;
1290# ifdef RT_ARCH_AMD64
1291 __asm__ __volatile__("movq %%cr3, %0\n\t"
1292 "movq %0, %%cr3\n\t"
1293 : "=r" (u));
1294# else
1295 __asm__ __volatile__("movl %%cr3, %0\n\t"
1296 "movl %0, %%cr3\n\t"
1297 : "=r" (u));
1298# endif
1299# else
1300 __asm
1301 {
1302# ifdef RT_ARCH_AMD64
1303 mov rax, cr3
1304 mov cr3, rax
1305# else
1306 mov eax, cr3
1307 mov cr3, eax
1308# endif
1309 }
1310# endif
1311}
1312#endif
1313
1314
1315/**
1316 * Get cr4.
1317 * @returns cr4.
1318 */
1319#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1320DECLASM(RTCCUINTREG) ASMGetCR4(void);
1321#else
1322DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1323{
1324 RTCCUINTREG uCR4;
1325# if RT_INLINE_ASM_USES_INTRIN
1326 uCR4 = __readcr4();
1327
1328# elif RT_INLINE_ASM_GNU_STYLE
1329# ifdef RT_ARCH_AMD64
1330 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1331# else
1332 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1333# endif
1334# else
1335 __asm
1336 {
1337# ifdef RT_ARCH_AMD64
1338 mov rax, cr4
1339 mov [uCR4], rax
1340# else
1341 push eax /* just in case */
1342 /*mov eax, cr4*/
1343 _emit 0x0f
1344 _emit 0x20
1345 _emit 0xe0
1346 mov [uCR4], eax
1347 pop eax
1348# endif
1349 }
1350# endif
1351 return uCR4;
1352}
1353#endif
1354
1355
1356/**
1357 * Sets the CR4 register.
1358 *
1359 * @param uCR4 New CR4 value.
1360 */
1361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1362DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1363#else
1364DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1365{
1366# if RT_INLINE_ASM_USES_INTRIN
1367 __writecr4(uCR4);
1368
1369# elif RT_INLINE_ASM_GNU_STYLE
1370# ifdef RT_ARCH_AMD64
1371 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1372# else
1373 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1374# endif
1375# else
1376 __asm
1377 {
1378# ifdef RT_ARCH_AMD64
1379 mov rax, [uCR4]
1380 mov cr4, rax
1381# else
1382 mov eax, [uCR4]
1383 _emit 0x0F
1384 _emit 0x22
1385 _emit 0xE0 /* mov cr4, eax */
1386# endif
1387 }
1388# endif
1389}
1390#endif
1391
1392
1393/**
1394 * Get cr8.
1395 * @returns cr8.
1396 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1397 */
1398#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1399DECLASM(RTCCUINTREG) ASMGetCR8(void);
1400#else
1401DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1402{
1403# ifdef RT_ARCH_AMD64
1404 RTCCUINTREG uCR8;
1405# if RT_INLINE_ASM_USES_INTRIN
1406 uCR8 = __readcr8();
1407
1408# elif RT_INLINE_ASM_GNU_STYLE
1409 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1410# else
1411 __asm
1412 {
1413 mov rax, cr8
1414 mov [uCR8], rax
1415 }
1416# endif
1417 return uCR8;
1418# else /* !RT_ARCH_AMD64 */
1419 return 0;
1420# endif /* !RT_ARCH_AMD64 */
1421}
1422#endif
1423
1424
1425/**
1426 * Enables interrupts (EFLAGS.IF).
1427 */
1428#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1429DECLASM(void) ASMIntEnable(void);
1430#else
1431DECLINLINE(void) ASMIntEnable(void)
1432{
1433# if RT_INLINE_ASM_GNU_STYLE
1434 __asm("sti\n");
1435# elif RT_INLINE_ASM_USES_INTRIN
1436 _enable();
1437# else
1438 __asm sti
1439# endif
1440}
1441#endif
1442
1443
1444/**
1445 * Disables interrupts (!EFLAGS.IF).
1446 */
1447#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1448DECLASM(void) ASMIntDisable(void);
1449#else
1450DECLINLINE(void) ASMIntDisable(void)
1451{
1452# if RT_INLINE_ASM_GNU_STYLE
1453 __asm("cli\n");
1454# elif RT_INLINE_ASM_USES_INTRIN
1455 _disable();
1456# else
1457 __asm cli
1458# endif
1459}
1460#endif
1461
1462
1463/**
1464 * Disables interrupts and returns previous xFLAGS.
1465 */
1466#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1467DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1468#else
1469DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1470{
1471 RTCCUINTREG xFlags;
1472# if RT_INLINE_ASM_GNU_STYLE
1473# ifdef RT_ARCH_AMD64
1474 __asm__ __volatile__("pushfq\n\t"
1475 "cli\n\t"
1476 "popq %0\n\t"
1477 : "=rm" (xFlags));
1478# else
1479 __asm__ __volatile__("pushfl\n\t"
1480 "cli\n\t"
1481 "popl %0\n\t"
1482 : "=rm" (xFlags));
1483# endif
1484# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1485 xFlags = ASMGetFlags();
1486 _disable();
1487# else
1488 __asm {
1489 pushfd
1490 cli
1491 pop [xFlags]
1492 }
1493# endif
1494 return xFlags;
1495}
1496#endif
1497
1498
1499/**
1500 * Halts the CPU until interrupted.
1501 */
1502#if RT_INLINE_ASM_EXTERNAL
1503DECLASM(void) ASMHalt(void);
1504#else
1505DECLINLINE(void) ASMHalt(void)
1506{
1507# if RT_INLINE_ASM_GNU_STYLE
1508 __asm__ __volatile__("hlt\n\t");
1509# else
1510 __asm {
1511 hlt
1512 }
1513# endif
1514}
1515#endif
1516
1517
1518/**
1519 * Reads a machine specific register.
1520 *
1521 * @returns Register content.
1522 * @param uRegister Register to read.
1523 */
1524#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1525DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1526#else
1527DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1528{
1529 RTUINT64U u;
1530# if RT_INLINE_ASM_GNU_STYLE
1531 __asm__ __volatile__("rdmsr\n\t"
1532 : "=a" (u.s.Lo),
1533 "=d" (u.s.Hi)
1534 : "c" (uRegister));
1535
1536# elif RT_INLINE_ASM_USES_INTRIN
1537 u.u = __readmsr(uRegister);
1538
1539# else
1540 __asm
1541 {
1542 mov ecx, [uRegister]
1543 rdmsr
1544 mov [u.s.Lo], eax
1545 mov [u.s.Hi], edx
1546 }
1547# endif
1548
1549 return u.u;
1550}
1551#endif
1552
1553
1554/**
1555 * Writes a machine specific register.
1556 *
1557 * @returns Register content.
1558 * @param uRegister Register to write to.
1559 * @param u64Val Value to write.
1560 */
1561#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1562DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1563#else
1564DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1565{
1566 RTUINT64U u;
1567
1568 u.u = u64Val;
1569# if RT_INLINE_ASM_GNU_STYLE
1570 __asm__ __volatile__("wrmsr\n\t"
1571 ::"a" (u.s.Lo),
1572 "d" (u.s.Hi),
1573 "c" (uRegister));
1574
1575# elif RT_INLINE_ASM_USES_INTRIN
1576 __writemsr(uRegister, u.u);
1577
1578# else
1579 __asm
1580 {
1581 mov ecx, [uRegister]
1582 mov edx, [u.s.Hi]
1583 mov eax, [u.s.Lo]
1584 wrmsr
1585 }
1586# endif
1587}
1588#endif
1589
1590
1591/**
1592 * Reads low part of a machine specific register.
1593 *
1594 * @returns Register content.
1595 * @param uRegister Register to read.
1596 */
1597#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1598DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1599#else
1600DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1601{
1602 uint32_t u32;
1603# if RT_INLINE_ASM_GNU_STYLE
1604 __asm__ __volatile__("rdmsr\n\t"
1605 : "=a" (u32)
1606 : "c" (uRegister)
1607 : "edx");
1608
1609# elif RT_INLINE_ASM_USES_INTRIN
1610 u32 = (uint32_t)__readmsr(uRegister);
1611
1612#else
1613 __asm
1614 {
1615 mov ecx, [uRegister]
1616 rdmsr
1617 mov [u32], eax
1618 }
1619# endif
1620
1621 return u32;
1622}
1623#endif
1624
1625
1626/**
1627 * Reads high part of a machine specific register.
1628 *
1629 * @returns Register content.
1630 * @param uRegister Register to read.
1631 */
1632#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1633DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1634#else
1635DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1636{
1637 uint32_t u32;
1638# if RT_INLINE_ASM_GNU_STYLE
1639 __asm__ __volatile__("rdmsr\n\t"
1640 : "=d" (u32)
1641 : "c" (uRegister)
1642 : "eax");
1643
1644# elif RT_INLINE_ASM_USES_INTRIN
1645 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1646
1647# else
1648 __asm
1649 {
1650 mov ecx, [uRegister]
1651 rdmsr
1652 mov [u32], edx
1653 }
1654# endif
1655
1656 return u32;
1657}
1658#endif
1659
1660
1661/**
1662 * Gets dr0.
1663 *
1664 * @returns dr0.
1665 */
1666#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1667DECLASM(RTCCUINTREG) ASMGetDR0(void);
1668#else
1669DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1670{
1671 RTCCUINTREG uDR0;
1672# if RT_INLINE_ASM_USES_INTRIN
1673 uDR0 = __readdr(0);
1674# elif RT_INLINE_ASM_GNU_STYLE
1675# ifdef RT_ARCH_AMD64
1676 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1677# else
1678 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1679# endif
1680# else
1681 __asm
1682 {
1683# ifdef RT_ARCH_AMD64
1684 mov rax, dr0
1685 mov [uDR0], rax
1686# else
1687 mov eax, dr0
1688 mov [uDR0], eax
1689# endif
1690 }
1691# endif
1692 return uDR0;
1693}
1694#endif
1695
1696
1697/**
1698 * Gets dr1.
1699 *
1700 * @returns dr1.
1701 */
1702#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1703DECLASM(RTCCUINTREG) ASMGetDR1(void);
1704#else
1705DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1706{
1707 RTCCUINTREG uDR1;
1708# if RT_INLINE_ASM_USES_INTRIN
1709 uDR1 = __readdr(1);
1710# elif RT_INLINE_ASM_GNU_STYLE
1711# ifdef RT_ARCH_AMD64
1712 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1713# else
1714 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1715# endif
1716# else
1717 __asm
1718 {
1719# ifdef RT_ARCH_AMD64
1720 mov rax, dr1
1721 mov [uDR1], rax
1722# else
1723 mov eax, dr1
1724 mov [uDR1], eax
1725# endif
1726 }
1727# endif
1728 return uDR1;
1729}
1730#endif
1731
1732
1733/**
1734 * Gets dr2.
1735 *
1736 * @returns dr2.
1737 */
1738#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1739DECLASM(RTCCUINTREG) ASMGetDR2(void);
1740#else
1741DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1742{
1743 RTCCUINTREG uDR2;
1744# if RT_INLINE_ASM_USES_INTRIN
1745 uDR2 = __readdr(2);
1746# elif RT_INLINE_ASM_GNU_STYLE
1747# ifdef RT_ARCH_AMD64
1748 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1749# else
1750 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1751# endif
1752# else
1753 __asm
1754 {
1755# ifdef RT_ARCH_AMD64
1756 mov rax, dr2
1757 mov [uDR2], rax
1758# else
1759 mov eax, dr2
1760 mov [uDR2], eax
1761# endif
1762 }
1763# endif
1764 return uDR2;
1765}
1766#endif
1767
1768
1769/**
1770 * Gets dr3.
1771 *
1772 * @returns dr3.
1773 */
1774#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1775DECLASM(RTCCUINTREG) ASMGetDR3(void);
1776#else
1777DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1778{
1779 RTCCUINTREG uDR3;
1780# if RT_INLINE_ASM_USES_INTRIN
1781 uDR3 = __readdr(3);
1782# elif RT_INLINE_ASM_GNU_STYLE
1783# ifdef RT_ARCH_AMD64
1784 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1785# else
1786 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1787# endif
1788# else
1789 __asm
1790 {
1791# ifdef RT_ARCH_AMD64
1792 mov rax, dr3
1793 mov [uDR3], rax
1794# else
1795 mov eax, dr3
1796 mov [uDR3], eax
1797# endif
1798 }
1799# endif
1800 return uDR3;
1801}
1802#endif
1803
1804
1805/**
1806 * Gets dr6.
1807 *
1808 * @returns dr6.
1809 */
1810#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1811DECLASM(RTCCUINTREG) ASMGetDR6(void);
1812#else
1813DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1814{
1815 RTCCUINTREG uDR6;
1816# if RT_INLINE_ASM_USES_INTRIN
1817 uDR6 = __readdr(6);
1818# elif RT_INLINE_ASM_GNU_STYLE
1819# ifdef RT_ARCH_AMD64
1820 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1821# else
1822 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1823# endif
1824# else
1825 __asm
1826 {
1827# ifdef RT_ARCH_AMD64
1828 mov rax, dr6
1829 mov [uDR6], rax
1830# else
1831 mov eax, dr6
1832 mov [uDR6], eax
1833# endif
1834 }
1835# endif
1836 return uDR6;
1837}
1838#endif
1839
1840
1841/**
1842 * Reads and clears DR6.
1843 *
1844 * @returns DR6.
1845 */
1846#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1847DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1848#else
1849DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1850{
1851 RTCCUINTREG uDR6;
1852# if RT_INLINE_ASM_USES_INTRIN
1853 uDR6 = __readdr(6);
1854 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1855# elif RT_INLINE_ASM_GNU_STYLE
1856 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1857# ifdef RT_ARCH_AMD64
1858 __asm__ __volatile__("movq %%dr6, %0\n\t"
1859 "movq %1, %%dr6\n\t"
1860 : "=r" (uDR6)
1861 : "r" (uNewValue));
1862# else
1863 __asm__ __volatile__("movl %%dr6, %0\n\t"
1864 "movl %1, %%dr6\n\t"
1865 : "=r" (uDR6)
1866 : "r" (uNewValue));
1867# endif
1868# else
1869 __asm
1870 {
1871# ifdef RT_ARCH_AMD64
1872 mov rax, dr6
1873 mov [uDR6], rax
1874 mov rcx, rax
1875 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1876 mov dr6, rcx
1877# else
1878 mov eax, dr6
1879 mov [uDR6], eax
1880 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1881 mov dr6, ecx
1882# endif
1883 }
1884# endif
1885 return uDR6;
1886}
1887#endif
1888
1889
1890/**
1891 * Gets dr7.
1892 *
1893 * @returns dr7.
1894 */
1895#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1896DECLASM(RTCCUINTREG) ASMGetDR7(void);
1897#else
1898DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1899{
1900 RTCCUINTREG uDR7;
1901# if RT_INLINE_ASM_USES_INTRIN
1902 uDR7 = __readdr(7);
1903# elif RT_INLINE_ASM_GNU_STYLE
1904# ifdef RT_ARCH_AMD64
1905 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1906# else
1907 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1908# endif
1909# else
1910 __asm
1911 {
1912# ifdef RT_ARCH_AMD64
1913 mov rax, dr7
1914 mov [uDR7], rax
1915# else
1916 mov eax, dr7
1917 mov [uDR7], eax
1918# endif
1919 }
1920# endif
1921 return uDR7;
1922}
1923#endif
1924
1925
1926/**
1927 * Sets dr0.
1928 *
1929 * @param uDRVal Debug register value to write
1930 */
1931#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1932DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1933#else
1934DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1935{
1936# if RT_INLINE_ASM_USES_INTRIN
1937 __writedr(0, uDRVal);
1938# elif RT_INLINE_ASM_GNU_STYLE
1939# ifdef RT_ARCH_AMD64
1940 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1941# else
1942 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1943# endif
1944# else
1945 __asm
1946 {
1947# ifdef RT_ARCH_AMD64
1948 mov rax, [uDRVal]
1949 mov dr0, rax
1950# else
1951 mov eax, [uDRVal]
1952 mov dr0, eax
1953# endif
1954 }
1955# endif
1956}
1957#endif
1958
1959
1960/**
1961 * Sets dr1.
1962 *
1963 * @param uDRVal Debug register value to write
1964 */
1965#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1966DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1967#else
1968DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1969{
1970# if RT_INLINE_ASM_USES_INTRIN
1971 __writedr(1, uDRVal);
1972# elif RT_INLINE_ASM_GNU_STYLE
1973# ifdef RT_ARCH_AMD64
1974 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
1975# else
1976 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
1977# endif
1978# else
1979 __asm
1980 {
1981# ifdef RT_ARCH_AMD64
1982 mov rax, [uDRVal]
1983 mov dr1, rax
1984# else
1985 mov eax, [uDRVal]
1986 mov dr1, eax
1987# endif
1988 }
1989# endif
1990}
1991#endif
1992
1993
1994/**
1995 * Sets dr2.
1996 *
1997 * @param uDRVal Debug register value to write
1998 */
1999#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2000DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
2001#else
2002DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
2003{
2004# if RT_INLINE_ASM_USES_INTRIN
2005 __writedr(2, uDRVal);
2006# elif RT_INLINE_ASM_GNU_STYLE
2007# ifdef RT_ARCH_AMD64
2008 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
2009# else
2010 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2011# endif
2012# else
2013 __asm
2014 {
2015# ifdef RT_ARCH_AMD64
2016 mov rax, [uDRVal]
2017 mov dr2, rax
2018# else
2019 mov eax, [uDRVal]
2020 mov dr2, eax
2021# endif
2022 }
2023# endif
2024}
2025#endif
2026
2027
2028/**
2029 * Sets dr3.
2030 *
2031 * @param uDRVal Debug register value to write
2032 */
2033#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2034DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2035#else
2036DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2037{
2038# if RT_INLINE_ASM_USES_INTRIN
2039 __writedr(3, uDRVal);
2040# elif RT_INLINE_ASM_GNU_STYLE
2041# ifdef RT_ARCH_AMD64
2042 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2043# else
2044 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2045# endif
2046# else
2047 __asm
2048 {
2049# ifdef RT_ARCH_AMD64
2050 mov rax, [uDRVal]
2051 mov dr3, rax
2052# else
2053 mov eax, [uDRVal]
2054 mov dr3, eax
2055# endif
2056 }
2057# endif
2058}
2059#endif
2060
2061
2062/**
2063 * Sets dr6.
2064 *
2065 * @param uDRVal Debug register value to write
2066 */
2067#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2068DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2069#else
2070DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2071{
2072# if RT_INLINE_ASM_USES_INTRIN
2073 __writedr(6, uDRVal);
2074# elif RT_INLINE_ASM_GNU_STYLE
2075# ifdef RT_ARCH_AMD64
2076 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2077# else
2078 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2079# endif
2080# else
2081 __asm
2082 {
2083# ifdef RT_ARCH_AMD64
2084 mov rax, [uDRVal]
2085 mov dr6, rax
2086# else
2087 mov eax, [uDRVal]
2088 mov dr6, eax
2089# endif
2090 }
2091# endif
2092}
2093#endif
2094
2095
2096/**
2097 * Sets dr7.
2098 *
2099 * @param uDRVal Debug register value to write
2100 */
2101#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2102DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2103#else
2104DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2105{
2106# if RT_INLINE_ASM_USES_INTRIN
2107 __writedr(7, uDRVal);
2108# elif RT_INLINE_ASM_GNU_STYLE
2109# ifdef RT_ARCH_AMD64
2110 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2111# else
2112 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2113# endif
2114# else
2115 __asm
2116 {
2117# ifdef RT_ARCH_AMD64
2118 mov rax, [uDRVal]
2119 mov dr7, rax
2120# else
2121 mov eax, [uDRVal]
2122 mov dr7, eax
2123# endif
2124 }
2125# endif
2126}
2127#endif
2128
2129
2130/**
2131 * Compiler memory barrier.
2132 *
2133 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2134 * values or any outstanding writes when returning from this function.
2135 *
2136 * This function must be used if non-volatile data is modified by a
2137 * device or the VMM. Typical cases are port access, MMIO access,
2138 * trapping instruction, etc.
2139 */
2140#if RT_INLINE_ASM_GNU_STYLE
2141# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2142#elif RT_INLINE_ASM_USES_INTRIN
2143# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2144#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2145DECLINLINE(void) ASMCompilerBarrier(void)
2146{
2147 __asm
2148 {
2149 }
2150}
2151#endif
2152
2153
2154/**
2155 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2156 *
2157 * @param Port I/O port to write to.
2158 * @param u8 8-bit integer to write.
2159 */
2160#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2161DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2162#else
2163DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2164{
2165# if RT_INLINE_ASM_GNU_STYLE
2166 __asm__ __volatile__("outb %b1, %w0\n\t"
2167 :: "Nd" (Port),
2168 "a" (u8));
2169
2170# elif RT_INLINE_ASM_USES_INTRIN
2171 __outbyte(Port, u8);
2172
2173# else
2174 __asm
2175 {
2176 mov dx, [Port]
2177 mov al, [u8]
2178 out dx, al
2179 }
2180# endif
2181}
2182#endif
2183
2184
2185/**
2186 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2187 *
2188 * @returns 8-bit integer.
2189 * @param Port I/O port to read from.
2190 */
2191#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2192DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2193#else
2194DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2195{
2196 uint8_t u8;
2197# if RT_INLINE_ASM_GNU_STYLE
2198 __asm__ __volatile__("inb %w1, %b0\n\t"
2199 : "=a" (u8)
2200 : "Nd" (Port));
2201
2202# elif RT_INLINE_ASM_USES_INTRIN
2203 u8 = __inbyte(Port);
2204
2205# else
2206 __asm
2207 {
2208 mov dx, [Port]
2209 in al, dx
2210 mov [u8], al
2211 }
2212# endif
2213 return u8;
2214}
2215#endif
2216
2217
2218/**
2219 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2220 *
2221 * @param Port I/O port to write to.
2222 * @param u16 16-bit integer to write.
2223 */
2224#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2225DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2226#else
2227DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2228{
2229# if RT_INLINE_ASM_GNU_STYLE
2230 __asm__ __volatile__("outw %w1, %w0\n\t"
2231 :: "Nd" (Port),
2232 "a" (u16));
2233
2234# elif RT_INLINE_ASM_USES_INTRIN
2235 __outword(Port, u16);
2236
2237# else
2238 __asm
2239 {
2240 mov dx, [Port]
2241 mov ax, [u16]
2242 out dx, ax
2243 }
2244# endif
2245}
2246#endif
2247
2248
2249/**
2250 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2251 *
2252 * @returns 16-bit integer.
2253 * @param Port I/O port to read from.
2254 */
2255#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2256DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2257#else
2258DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2259{
2260 uint16_t u16;
2261# if RT_INLINE_ASM_GNU_STYLE
2262 __asm__ __volatile__("inw %w1, %w0\n\t"
2263 : "=a" (u16)
2264 : "Nd" (Port));
2265
2266# elif RT_INLINE_ASM_USES_INTRIN
2267 u16 = __inword(Port);
2268
2269# else
2270 __asm
2271 {
2272 mov dx, [Port]
2273 in ax, dx
2274 mov [u16], ax
2275 }
2276# endif
2277 return u16;
2278}
2279#endif
2280
2281
2282/**
2283 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2284 *
2285 * @param Port I/O port to write to.
2286 * @param u32 32-bit integer to write.
2287 */
2288#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2289DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2290#else
2291DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2292{
2293# if RT_INLINE_ASM_GNU_STYLE
2294 __asm__ __volatile__("outl %1, %w0\n\t"
2295 :: "Nd" (Port),
2296 "a" (u32));
2297
2298# elif RT_INLINE_ASM_USES_INTRIN
2299 __outdword(Port, u32);
2300
2301# else
2302 __asm
2303 {
2304 mov dx, [Port]
2305 mov eax, [u32]
2306 out dx, eax
2307 }
2308# endif
2309}
2310#endif
2311
2312
2313/**
2314 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2315 *
2316 * @returns 32-bit integer.
2317 * @param Port I/O port to read from.
2318 */
2319#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2320DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2321#else
2322DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2323{
2324 uint32_t u32;
2325# if RT_INLINE_ASM_GNU_STYLE
2326 __asm__ __volatile__("inl %w1, %0\n\t"
2327 : "=a" (u32)
2328 : "Nd" (Port));
2329
2330# elif RT_INLINE_ASM_USES_INTRIN
2331 u32 = __indword(Port);
2332
2333# else
2334 __asm
2335 {
2336 mov dx, [Port]
2337 in eax, dx
2338 mov [u32], eax
2339 }
2340# endif
2341 return u32;
2342}
2343#endif
2344
2345
2346/**
2347 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2348 *
2349 * @param Port I/O port to write to.
2350 * @param pau8 Pointer to the string buffer.
2351 * @param c The number of items to write.
2352 */
2353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2354DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2355#else
2356DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2357{
2358# if RT_INLINE_ASM_GNU_STYLE
2359 __asm__ __volatile__("rep; outsb\n\t"
2360 : "+S" (pau8),
2361 "+c" (c)
2362 : "d" (Port));
2363
2364# elif RT_INLINE_ASM_USES_INTRIN
2365 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2366
2367# else
2368 __asm
2369 {
2370 mov dx, [Port]
2371 mov ecx, [c]
2372 mov eax, [pau8]
2373 xchg esi, eax
2374 rep outsb
2375 xchg esi, eax
2376 }
2377# endif
2378}
2379#endif
2380
2381
2382/**
2383 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2384 *
2385 * @param Port I/O port to read from.
2386 * @param pau8 Pointer to the string buffer (output).
2387 * @param c The number of items to read.
2388 */
2389#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2390DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2391#else
2392DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2393{
2394# if RT_INLINE_ASM_GNU_STYLE
2395 __asm__ __volatile__("rep; insb\n\t"
2396 : "+D" (pau8),
2397 "+c" (c)
2398 : "d" (Port));
2399
2400# elif RT_INLINE_ASM_USES_INTRIN
2401 __inbytestring(Port, pau8, (unsigned long)c);
2402
2403# else
2404 __asm
2405 {
2406 mov dx, [Port]
2407 mov ecx, [c]
2408 mov eax, [pau8]
2409 xchg edi, eax
2410 rep insb
2411 xchg edi, eax
2412 }
2413# endif
2414}
2415#endif
2416
2417
2418/**
2419 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2420 *
2421 * @param Port I/O port to write to.
2422 * @param pau16 Pointer to the string buffer.
2423 * @param c The number of items to write.
2424 */
2425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2426DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2427#else
2428DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2429{
2430# if RT_INLINE_ASM_GNU_STYLE
2431 __asm__ __volatile__("rep; outsw\n\t"
2432 : "+S" (pau16),
2433 "+c" (c)
2434 : "d" (Port));
2435
2436# elif RT_INLINE_ASM_USES_INTRIN
2437 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2438
2439# else
2440 __asm
2441 {
2442 mov dx, [Port]
2443 mov ecx, [c]
2444 mov eax, [pau16]
2445 xchg esi, eax
2446 rep outsw
2447 xchg esi, eax
2448 }
2449# endif
2450}
2451#endif
2452
2453
2454/**
2455 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2456 *
2457 * @param Port I/O port to read from.
2458 * @param pau16 Pointer to the string buffer (output).
2459 * @param c The number of items to read.
2460 */
2461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2462DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2463#else
2464DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2465{
2466# if RT_INLINE_ASM_GNU_STYLE
2467 __asm__ __volatile__("rep; insw\n\t"
2468 : "+D" (pau16),
2469 "+c" (c)
2470 : "d" (Port));
2471
2472# elif RT_INLINE_ASM_USES_INTRIN
2473 __inwordstring(Port, pau16, (unsigned long)c);
2474
2475# else
2476 __asm
2477 {
2478 mov dx, [Port]
2479 mov ecx, [c]
2480 mov eax, [pau16]
2481 xchg edi, eax
2482 rep insw
2483 xchg edi, eax
2484 }
2485# endif
2486}
2487#endif
2488
2489
2490/**
2491 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2492 *
2493 * @param Port I/O port to write to.
2494 * @param pau32 Pointer to the string buffer.
2495 * @param c The number of items to write.
2496 */
2497#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2498DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2499#else
2500DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2501{
2502# if RT_INLINE_ASM_GNU_STYLE
2503 __asm__ __volatile__("rep; outsl\n\t"
2504 : "+S" (pau32),
2505 "+c" (c)
2506 : "d" (Port));
2507
2508# elif RT_INLINE_ASM_USES_INTRIN
2509 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2510
2511# else
2512 __asm
2513 {
2514 mov dx, [Port]
2515 mov ecx, [c]
2516 mov eax, [pau32]
2517 xchg esi, eax
2518 rep outsd
2519 xchg esi, eax
2520 }
2521# endif
2522}
2523#endif
2524
2525
2526/**
2527 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2528 *
2529 * @param Port I/O port to read from.
2530 * @param pau32 Pointer to the string buffer (output).
2531 * @param c The number of items to read.
2532 */
2533#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2534DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2535#else
2536DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2537{
2538# if RT_INLINE_ASM_GNU_STYLE
2539 __asm__ __volatile__("rep; insl\n\t"
2540 : "+D" (pau32),
2541 "+c" (c)
2542 : "d" (Port));
2543
2544# elif RT_INLINE_ASM_USES_INTRIN
2545 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2546
2547# else
2548 __asm
2549 {
2550 mov dx, [Port]
2551 mov ecx, [c]
2552 mov eax, [pau32]
2553 xchg edi, eax
2554 rep insd
2555 xchg edi, eax
2556 }
2557# endif
2558}
2559#endif
2560
2561
2562/**
2563 * Atomically Exchange an unsigned 8-bit value, ordered.
2564 *
2565 * @returns Current *pu8 value
2566 * @param pu8 Pointer to the 8-bit variable to update.
2567 * @param u8 The 8-bit value to assign to *pu8.
2568 */
2569#if RT_INLINE_ASM_EXTERNAL
2570DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2571#else
2572DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2573{
2574# if RT_INLINE_ASM_GNU_STYLE
2575 __asm__ __volatile__("xchgb %0, %1\n\t"
2576 : "=m" (*pu8),
2577 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2578 : "1" (u8),
2579 "m" (*pu8));
2580# else
2581 __asm
2582 {
2583# ifdef RT_ARCH_AMD64
2584 mov rdx, [pu8]
2585 mov al, [u8]
2586 xchg [rdx], al
2587 mov [u8], al
2588# else
2589 mov edx, [pu8]
2590 mov al, [u8]
2591 xchg [edx], al
2592 mov [u8], al
2593# endif
2594 }
2595# endif
2596 return u8;
2597}
2598#endif
2599
2600
2601/**
2602 * Atomically Exchange a signed 8-bit value, ordered.
2603 *
2604 * @returns Current *pu8 value
2605 * @param pi8 Pointer to the 8-bit variable to update.
2606 * @param i8 The 8-bit value to assign to *pi8.
2607 */
2608DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2609{
2610 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2611}
2612
2613
2614/**
2615 * Atomically Exchange a bool value, ordered.
2616 *
2617 * @returns Current *pf value
2618 * @param pf Pointer to the 8-bit variable to update.
2619 * @param f The 8-bit value to assign to *pi8.
2620 */
2621DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2622{
2623#ifdef _MSC_VER
2624 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2625#else
2626 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2627#endif
2628}
2629
2630
2631/**
2632 * Atomically Exchange an unsigned 16-bit value, ordered.
2633 *
2634 * @returns Current *pu16 value
2635 * @param pu16 Pointer to the 16-bit variable to update.
2636 * @param u16 The 16-bit value to assign to *pu16.
2637 */
2638#if RT_INLINE_ASM_EXTERNAL
2639DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2640#else
2641DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2642{
2643# if RT_INLINE_ASM_GNU_STYLE
2644 __asm__ __volatile__("xchgw %0, %1\n\t"
2645 : "=m" (*pu16),
2646 "=r" (u16)
2647 : "1" (u16),
2648 "m" (*pu16));
2649# else
2650 __asm
2651 {
2652# ifdef RT_ARCH_AMD64
2653 mov rdx, [pu16]
2654 mov ax, [u16]
2655 xchg [rdx], ax
2656 mov [u16], ax
2657# else
2658 mov edx, [pu16]
2659 mov ax, [u16]
2660 xchg [edx], ax
2661 mov [u16], ax
2662# endif
2663 }
2664# endif
2665 return u16;
2666}
2667#endif
2668
2669
2670/**
2671 * Atomically Exchange a signed 16-bit value, ordered.
2672 *
2673 * @returns Current *pu16 value
2674 * @param pi16 Pointer to the 16-bit variable to update.
2675 * @param i16 The 16-bit value to assign to *pi16.
2676 */
2677DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2678{
2679 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2680}
2681
2682
2683/**
2684 * Atomically Exchange an unsigned 32-bit value, ordered.
2685 *
2686 * @returns Current *pu32 value
2687 * @param pu32 Pointer to the 32-bit variable to update.
2688 * @param u32 The 32-bit value to assign to *pu32.
2689 */
2690#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2691DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2692#else
2693DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2694{
2695# if RT_INLINE_ASM_GNU_STYLE
2696 __asm__ __volatile__("xchgl %0, %1\n\t"
2697 : "=m" (*pu32),
2698 "=r" (u32)
2699 : "1" (u32),
2700 "m" (*pu32));
2701
2702# elif RT_INLINE_ASM_USES_INTRIN
2703 u32 = _InterlockedExchange((long *)pu32, u32);
2704
2705# else
2706 __asm
2707 {
2708# ifdef RT_ARCH_AMD64
2709 mov rdx, [pu32]
2710 mov eax, u32
2711 xchg [rdx], eax
2712 mov [u32], eax
2713# else
2714 mov edx, [pu32]
2715 mov eax, u32
2716 xchg [edx], eax
2717 mov [u32], eax
2718# endif
2719 }
2720# endif
2721 return u32;
2722}
2723#endif
2724
2725
2726/**
2727 * Atomically Exchange a signed 32-bit value, ordered.
2728 *
2729 * @returns Current *pu32 value
2730 * @param pi32 Pointer to the 32-bit variable to update.
2731 * @param i32 The 32-bit value to assign to *pi32.
2732 */
2733DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2734{
2735 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2736}
2737
2738
2739/**
2740 * Atomically Exchange an unsigned 64-bit value, ordered.
2741 *
2742 * @returns Current *pu64 value
2743 * @param pu64 Pointer to the 64-bit variable to update.
2744 * @param u64 The 64-bit value to assign to *pu64.
2745 */
2746#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2747DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2748#else
2749DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2750{
2751# if defined(RT_ARCH_AMD64)
2752# if RT_INLINE_ASM_USES_INTRIN
2753 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2754
2755# elif RT_INLINE_ASM_GNU_STYLE
2756 __asm__ __volatile__("xchgq %0, %1\n\t"
2757 : "=m" (*pu64),
2758 "=r" (u64)
2759 : "1" (u64),
2760 "m" (*pu64));
2761# else
2762 __asm
2763 {
2764 mov rdx, [pu64]
2765 mov rax, [u64]
2766 xchg [rdx], rax
2767 mov [u64], rax
2768 }
2769# endif
2770# else /* !RT_ARCH_AMD64 */
2771# if RT_INLINE_ASM_GNU_STYLE
2772# if defined(PIC) || defined(__PIC__)
2773 uint32_t u32EBX = (uint32_t)u64;
2774 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2775 "xchgl %%ebx, %3\n\t"
2776 "1:\n\t"
2777 "lock; cmpxchg8b (%5)\n\t"
2778 "jnz 1b\n\t"
2779 "movl %3, %%ebx\n\t"
2780 /*"xchgl %%esi, %5\n\t"*/
2781 : "=A" (u64),
2782 "=m" (*pu64)
2783 : "0" (*pu64),
2784 "m" ( u32EBX ),
2785 "c" ( (uint32_t)(u64 >> 32) ),
2786 "S" (pu64));
2787# else /* !PIC */
2788 __asm__ __volatile__("1:\n\t"
2789 "lock; cmpxchg8b %1\n\t"
2790 "jnz 1b\n\t"
2791 : "=A" (u64),
2792 "=m" (*pu64)
2793 : "0" (*pu64),
2794 "b" ( (uint32_t)u64 ),
2795 "c" ( (uint32_t)(u64 >> 32) ));
2796# endif
2797# else
2798 __asm
2799 {
2800 mov ebx, dword ptr [u64]
2801 mov ecx, dword ptr [u64 + 4]
2802 mov edi, pu64
2803 mov eax, dword ptr [edi]
2804 mov edx, dword ptr [edi + 4]
2805 retry:
2806 lock cmpxchg8b [edi]
2807 jnz retry
2808 mov dword ptr [u64], eax
2809 mov dword ptr [u64 + 4], edx
2810 }
2811# endif
2812# endif /* !RT_ARCH_AMD64 */
2813 return u64;
2814}
2815#endif
2816
2817
2818/**
2819 * Atomically Exchange an signed 64-bit value, ordered.
2820 *
2821 * @returns Current *pi64 value
2822 * @param pi64 Pointer to the 64-bit variable to update.
2823 * @param i64 The 64-bit value to assign to *pi64.
2824 */
2825DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2826{
2827 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2828}
2829
2830
2831#ifdef RT_ARCH_AMD64
2832/**
2833 * Atomically Exchange an unsigned 128-bit value, ordered.
2834 *
2835 * @returns Current *pu128.
2836 * @param pu128 Pointer to the 128-bit variable to update.
2837 * @param u128 The 128-bit value to assign to *pu128.
2838 *
2839 * @remark We cannot really assume that any hardware supports this. Nor do I have
2840 * GAS support for it. So, for the time being we'll BREAK the atomic
2841 * bit of this function and use two 64-bit exchanges instead.
2842 */
2843# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2844DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2845# else
2846DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2847{
2848 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2849 {
2850 /** @todo this is clumsy code */
2851 RTUINT128U u128Ret;
2852 u128Ret.u = u128;
2853 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2854 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2855 return u128Ret.u;
2856 }
2857#if 0 /* later? */
2858 else
2859 {
2860# if RT_INLINE_ASM_GNU_STYLE
2861 __asm__ __volatile__("1:\n\t"
2862 "lock; cmpxchg8b %1\n\t"
2863 "jnz 1b\n\t"
2864 : "=A" (u128),
2865 "=m" (*pu128)
2866 : "0" (*pu128),
2867 "b" ( (uint64_t)u128 ),
2868 "c" ( (uint64_t)(u128 >> 64) ));
2869# else
2870 __asm
2871 {
2872 mov rbx, dword ptr [u128]
2873 mov rcx, dword ptr [u128 + 8]
2874 mov rdi, pu128
2875 mov rax, dword ptr [rdi]
2876 mov rdx, dword ptr [rdi + 8]
2877 retry:
2878 lock cmpxchg16b [rdi]
2879 jnz retry
2880 mov dword ptr [u128], rax
2881 mov dword ptr [u128 + 8], rdx
2882 }
2883# endif
2884 }
2885 return u128;
2886#endif
2887}
2888# endif
2889#endif /* RT_ARCH_AMD64 */
2890
2891
2892/**
2893 * Atomically Exchange a pointer value, ordered.
2894 *
2895 * @returns Current *ppv value
2896 * @param ppv Pointer to the pointer variable to update.
2897 * @param pv The pointer value to assign to *ppv.
2898 */
2899DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2900{
2901#if ARCH_BITS == 32
2902 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2903#elif ARCH_BITS == 64
2904 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2905#else
2906# error "ARCH_BITS is bogus"
2907#endif
2908}
2909
2910
2911/**
2912 * Atomically Exchange a raw-mode context pointer value, ordered.
2913 *
2914 * @returns Current *ppv value
2915 * @param ppvRC Pointer to the pointer variable to update.
2916 * @param pvRC The pointer value to assign to *ppv.
2917 */
2918DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2919{
2920 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2921}
2922
2923
2924/**
2925 * Atomically Exchange a ring-0 pointer value, ordered.
2926 *
2927 * @returns Current *ppv value
2928 * @param ppvR0 Pointer to the pointer variable to update.
2929 * @param pvR0 The pointer value to assign to *ppv.
2930 */
2931DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2932{
2933#if R0_ARCH_BITS == 32
2934 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2935#elif R0_ARCH_BITS == 64
2936 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2937#else
2938# error "R0_ARCH_BITS is bogus"
2939#endif
2940}
2941
2942
2943/**
2944 * Atomically Exchange a ring-3 pointer value, ordered.
2945 *
2946 * @returns Current *ppv value
2947 * @param ppvR3 Pointer to the pointer variable to update.
2948 * @param pvR3 The pointer value to assign to *ppv.
2949 */
2950DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2951{
2952#if R3_ARCH_BITS == 32
2953 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2954#elif R3_ARCH_BITS == 64
2955 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2956#else
2957# error "R3_ARCH_BITS is bogus"
2958#endif
2959}
2960
2961
2962/** @def ASMAtomicXchgHandle
2963 * Atomically Exchange a typical IPRT handle value, ordered.
2964 *
2965 * @param ph Pointer to the value to update.
2966 * @param hNew The new value to assigned to *pu.
2967 * @param phRes Where to store the current *ph value.
2968 *
2969 * @remarks This doesn't currently work for all handles (like RTFILE).
2970 */
2971#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2972 do { \
2973 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (const void *)(hNew)); \
2974 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2975 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2976 } while (0)
2977
2978
2979/**
2980 * Atomically Exchange a value which size might differ
2981 * between platforms or compilers, ordered.
2982 *
2983 * @param pu Pointer to the variable to update.
2984 * @param uNew The value to assign to *pu.
2985 * @todo This is busted as its missing the result argument.
2986 */
2987#define ASMAtomicXchgSize(pu, uNew) \
2988 do { \
2989 switch (sizeof(*(pu))) { \
2990 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2991 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2992 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2993 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2994 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2995 } \
2996 } while (0)
2997
2998/**
2999 * Atomically Exchange a value which size might differ
3000 * between platforms or compilers, ordered.
3001 *
3002 * @param pu Pointer to the variable to update.
3003 * @param uNew The value to assign to *pu.
3004 * @param puRes Where to store the current *pu value.
3005 */
3006#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
3007 do { \
3008 switch (sizeof(*(pu))) { \
3009 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3010 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3011 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3012 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3013 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3014 } \
3015 } while (0)
3016
3017
3018/**
3019 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3020 *
3021 * @returns true if xchg was done.
3022 * @returns false if xchg wasn't done.
3023 *
3024 * @param pu32 Pointer to the value to update.
3025 * @param u32New The new value to assigned to *pu32.
3026 * @param u32Old The old value to *pu32 compare with.
3027 */
3028#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3029DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3030#else
3031DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3032{
3033# if RT_INLINE_ASM_GNU_STYLE
3034 uint8_t u8Ret;
3035 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3036 "setz %1\n\t"
3037 : "=m" (*pu32),
3038 "=qm" (u8Ret),
3039 "=a" (u32Old)
3040 : "r" (u32New),
3041 "2" (u32Old),
3042 "m" (*pu32));
3043 return (bool)u8Ret;
3044
3045# elif RT_INLINE_ASM_USES_INTRIN
3046 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3047
3048# else
3049 uint32_t u32Ret;
3050 __asm
3051 {
3052# ifdef RT_ARCH_AMD64
3053 mov rdx, [pu32]
3054# else
3055 mov edx, [pu32]
3056# endif
3057 mov eax, [u32Old]
3058 mov ecx, [u32New]
3059# ifdef RT_ARCH_AMD64
3060 lock cmpxchg [rdx], ecx
3061# else
3062 lock cmpxchg [edx], ecx
3063# endif
3064 setz al
3065 movzx eax, al
3066 mov [u32Ret], eax
3067 }
3068 return !!u32Ret;
3069# endif
3070}
3071#endif
3072
3073
3074/**
3075 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3076 *
3077 * @returns true if xchg was done.
3078 * @returns false if xchg wasn't done.
3079 *
3080 * @param pi32 Pointer to the value to update.
3081 * @param i32New The new value to assigned to *pi32.
3082 * @param i32Old The old value to *pi32 compare with.
3083 */
3084DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3085{
3086 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3087}
3088
3089
3090/**
3091 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3092 *
3093 * @returns true if xchg was done.
3094 * @returns false if xchg wasn't done.
3095 *
3096 * @param pu64 Pointer to the 64-bit variable to update.
3097 * @param u64New The 64-bit value to assign to *pu64.
3098 * @param u64Old The value to compare with.
3099 */
3100#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3101 || (RT_INLINE_ASM_GCC_4_3_3_X86 && defined(IN_RING3) && defined(__PIC__))
3102DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3103#else
3104DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3105{
3106# if RT_INLINE_ASM_USES_INTRIN
3107 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3108
3109# elif defined(RT_ARCH_AMD64)
3110# if RT_INLINE_ASM_GNU_STYLE
3111 uint8_t u8Ret;
3112 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3113 "setz %1\n\t"
3114 : "=m" (*pu64),
3115 "=qm" (u8Ret),
3116 "=a" (u64Old)
3117 : "r" (u64New),
3118 "2" (u64Old),
3119 "m" (*pu64));
3120 return (bool)u8Ret;
3121# else
3122 bool fRet;
3123 __asm
3124 {
3125 mov rdx, [pu32]
3126 mov rax, [u64Old]
3127 mov rcx, [u64New]
3128 lock cmpxchg [rdx], rcx
3129 setz al
3130 mov [fRet], al
3131 }
3132 return fRet;
3133# endif
3134# else /* !RT_ARCH_AMD64 */
3135 uint32_t u32Ret;
3136# if RT_INLINE_ASM_GNU_STYLE
3137# if defined(PIC) || defined(__PIC__)
3138 uint32_t u32EBX = (uint32_t)u64New;
3139 uint32_t u32Spill;
3140 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3141 "lock; cmpxchg8b (%6)\n\t"
3142 "setz %%al\n\t"
3143 "movl %4, %%ebx\n\t"
3144 "movzbl %%al, %%eax\n\t"
3145 : "=a" (u32Ret),
3146 "=d" (u32Spill),
3147# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3148 "+m" (*pu64)
3149# else
3150 "=m" (*pu64)
3151# endif
3152 : "A" (u64Old),
3153 "m" ( u32EBX ),
3154 "c" ( (uint32_t)(u64New >> 32) ),
3155 "S" (pu64));
3156# else /* !PIC */
3157 uint32_t u32Spill;
3158 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3159 "setz %%al\n\t"
3160 "movzbl %%al, %%eax\n\t"
3161 : "=a" (u32Ret),
3162 "=d" (u32Spill),
3163 "+m" (*pu64)
3164 : "A" (u64Old),
3165 "b" ( (uint32_t)u64New ),
3166 "c" ( (uint32_t)(u64New >> 32) ));
3167# endif
3168 return (bool)u32Ret;
3169# else
3170 __asm
3171 {
3172 mov ebx, dword ptr [u64New]
3173 mov ecx, dword ptr [u64New + 4]
3174 mov edi, [pu64]
3175 mov eax, dword ptr [u64Old]
3176 mov edx, dword ptr [u64Old + 4]
3177 lock cmpxchg8b [edi]
3178 setz al
3179 movzx eax, al
3180 mov dword ptr [u32Ret], eax
3181 }
3182 return !!u32Ret;
3183# endif
3184# endif /* !RT_ARCH_AMD64 */
3185}
3186#endif
3187
3188
3189/**
3190 * Atomically Compare and exchange a signed 64-bit value, ordered.
3191 *
3192 * @returns true if xchg was done.
3193 * @returns false if xchg wasn't done.
3194 *
3195 * @param pi64 Pointer to the 64-bit variable to update.
3196 * @param i64 The 64-bit value to assign to *pu64.
3197 * @param i64Old The value to compare with.
3198 */
3199DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3200{
3201 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3202}
3203
3204
3205/**
3206 * Atomically Compare and Exchange a pointer value, ordered.
3207 *
3208 * @returns true if xchg was done.
3209 * @returns false if xchg wasn't done.
3210 *
3211 * @param ppv Pointer to the value to update.
3212 * @param pvNew The new value to assigned to *ppv.
3213 * @param pvOld The old value to *ppv compare with.
3214 */
3215DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3216{
3217#if ARCH_BITS == 32
3218 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3219#elif ARCH_BITS == 64
3220 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3221#else
3222# error "ARCH_BITS is bogus"
3223#endif
3224}
3225
3226
3227/** @def ASMAtomicCmpXchgHandle
3228 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3229 *
3230 * @param ph Pointer to the value to update.
3231 * @param hNew The new value to assigned to *pu.
3232 * @param hOld The old value to *pu compare with.
3233 * @param fRc Where to store the result.
3234 *
3235 * @remarks This doesn't currently work for all handles (like RTFILE).
3236 */
3237#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3238 do { \
3239 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
3240 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3241 } while (0)
3242
3243
3244/** @def ASMAtomicCmpXchgSize
3245 * Atomically Compare and Exchange a value which size might differ
3246 * between platforms or compilers, ordered.
3247 *
3248 * @param pu Pointer to the value to update.
3249 * @param uNew The new value to assigned to *pu.
3250 * @param uOld The old value to *pu compare with.
3251 * @param fRc Where to store the result.
3252 */
3253#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3254 do { \
3255 switch (sizeof(*(pu))) { \
3256 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3257 break; \
3258 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3259 break; \
3260 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3261 (fRc) = false; \
3262 break; \
3263 } \
3264 } while (0)
3265
3266
3267/**
3268 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3269 * passes back old value, ordered.
3270 *
3271 * @returns true if xchg was done.
3272 * @returns false if xchg wasn't done.
3273 *
3274 * @param pu32 Pointer to the value to update.
3275 * @param u32New The new value to assigned to *pu32.
3276 * @param u32Old The old value to *pu32 compare with.
3277 * @param pu32Old Pointer store the old value at.
3278 */
3279#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3280DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3281#else
3282DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3283{
3284# if RT_INLINE_ASM_GNU_STYLE
3285 uint8_t u8Ret;
3286 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3287 "setz %1\n\t"
3288 : "=m" (*pu32),
3289 "=qm" (u8Ret),
3290 "=a" (*pu32Old)
3291 : "r" (u32New),
3292 "a" (u32Old),
3293 "m" (*pu32));
3294 return (bool)u8Ret;
3295
3296# elif RT_INLINE_ASM_USES_INTRIN
3297 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3298
3299# else
3300 uint32_t u32Ret;
3301 __asm
3302 {
3303# ifdef RT_ARCH_AMD64
3304 mov rdx, [pu32]
3305# else
3306 mov edx, [pu32]
3307# endif
3308 mov eax, [u32Old]
3309 mov ecx, [u32New]
3310# ifdef RT_ARCH_AMD64
3311 lock cmpxchg [rdx], ecx
3312 mov rdx, [pu32Old]
3313 mov [rdx], eax
3314# else
3315 lock cmpxchg [edx], ecx
3316 mov edx, [pu32Old]
3317 mov [edx], eax
3318# endif
3319 setz al
3320 movzx eax, al
3321 mov [u32Ret], eax
3322 }
3323 return !!u32Ret;
3324# endif
3325}
3326#endif
3327
3328
3329/**
3330 * Atomically Compare and Exchange a signed 32-bit value, additionally
3331 * passes back old value, ordered.
3332 *
3333 * @returns true if xchg was done.
3334 * @returns false if xchg wasn't done.
3335 *
3336 * @param pi32 Pointer to the value to update.
3337 * @param i32New The new value to assigned to *pi32.
3338 * @param i32Old The old value to *pi32 compare with.
3339 * @param pi32Old Pointer store the old value at.
3340 */
3341DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3342{
3343 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3344}
3345
3346
3347/**
3348 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3349 * passing back old value, ordered.
3350 *
3351 * @returns true if xchg was done.
3352 * @returns false if xchg wasn't done.
3353 *
3354 * @param pu64 Pointer to the 64-bit variable to update.
3355 * @param u64New The 64-bit value to assign to *pu64.
3356 * @param u64Old The value to compare with.
3357 * @param pu64Old Pointer store the old value at.
3358 */
3359#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3360DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3361#else
3362DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3363{
3364# if RT_INLINE_ASM_USES_INTRIN
3365 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3366
3367# elif defined(RT_ARCH_AMD64)
3368# if RT_INLINE_ASM_GNU_STYLE
3369 uint8_t u8Ret;
3370 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3371 "setz %1\n\t"
3372 : "=m" (*pu64),
3373 "=qm" (u8Ret),
3374 "=a" (*pu64Old)
3375 : "r" (u64New),
3376 "a" (u64Old),
3377 "m" (*pu64));
3378 return (bool)u8Ret;
3379# else
3380 bool fRet;
3381 __asm
3382 {
3383 mov rdx, [pu32]
3384 mov rax, [u64Old]
3385 mov rcx, [u64New]
3386 lock cmpxchg [rdx], rcx
3387 mov rdx, [pu64Old]
3388 mov [rdx], rax
3389 setz al
3390 mov [fRet], al
3391 }
3392 return fRet;
3393# endif
3394# else /* !RT_ARCH_AMD64 */
3395# if RT_INLINE_ASM_GNU_STYLE
3396 uint64_t u64Ret;
3397# if defined(PIC) || defined(__PIC__)
3398 /* NB: this code uses a memory clobber description, because the clean
3399 * solution with an output value for *pu64 makes gcc run out of registers.
3400 * This will cause suboptimal code, and anyone with a better solution is
3401 * welcome to improve this. */
3402 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3403 "lock; cmpxchg8b %3\n\t"
3404 "xchgl %%ebx, %1\n\t"
3405 : "=A" (u64Ret)
3406 : "DS" ((uint32_t)u64New),
3407 "c" ((uint32_t)(u64New >> 32)),
3408 "m" (*pu64),
3409 "0" (u64Old)
3410 : "memory" );
3411# else /* !PIC */
3412 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3413 : "=A" (u64Ret),
3414 "=m" (*pu64)
3415 : "b" ((uint32_t)u64New),
3416 "c" ((uint32_t)(u64New >> 32)),
3417 "m" (*pu64),
3418 "0" (u64Old));
3419# endif
3420 *pu64Old = u64Ret;
3421 return u64Ret == u64Old;
3422# else
3423 uint32_t u32Ret;
3424 __asm
3425 {
3426 mov ebx, dword ptr [u64New]
3427 mov ecx, dword ptr [u64New + 4]
3428 mov edi, [pu64]
3429 mov eax, dword ptr [u64Old]
3430 mov edx, dword ptr [u64Old + 4]
3431 lock cmpxchg8b [edi]
3432 mov ebx, [pu64Old]
3433 mov [ebx], eax
3434 setz al
3435 movzx eax, al
3436 add ebx, 4
3437 mov [ebx], edx
3438 mov dword ptr [u32Ret], eax
3439 }
3440 return !!u32Ret;
3441# endif
3442# endif /* !RT_ARCH_AMD64 */
3443}
3444#endif
3445
3446
3447/**
3448 * Atomically Compare and exchange a signed 64-bit value, additionally
3449 * passing back old value, ordered.
3450 *
3451 * @returns true if xchg was done.
3452 * @returns false if xchg wasn't done.
3453 *
3454 * @param pi64 Pointer to the 64-bit variable to update.
3455 * @param i64 The 64-bit value to assign to *pu64.
3456 * @param i64Old The value to compare with.
3457 * @param pi64Old Pointer store the old value at.
3458 */
3459DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3460{
3461 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3462}
3463
3464/** @def ASMAtomicCmpXchgExHandle
3465 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3466 *
3467 * @param ph Pointer to the value to update.
3468 * @param hNew The new value to assigned to *pu.
3469 * @param hOld The old value to *pu compare with.
3470 * @param fRc Where to store the result.
3471 * @param phOldVal Pointer to where to store the old value.
3472 *
3473 * @remarks This doesn't currently work for all handles (like RTFILE).
3474 */
3475#if ARCH_BITS == 32
3476# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3477 do { \
3478 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3479 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3480 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3481 } while (0)
3482#elif ARCH_BITS == 64
3483# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3484 do { \
3485 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3486 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3487 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3488 } while (0)
3489#endif
3490
3491
3492/** @def ASMAtomicCmpXchgExSize
3493 * Atomically Compare and Exchange a value which size might differ
3494 * between platforms or compilers. Additionally passes back old value.
3495 *
3496 * @param pu Pointer to the value to update.
3497 * @param uNew The new value to assigned to *pu.
3498 * @param uOld The old value to *pu compare with.
3499 * @param fRc Where to store the result.
3500 * @param puOldVal Pointer to where to store the old value.
3501 */
3502#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3503 do { \
3504 switch (sizeof(*(pu))) { \
3505 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3506 break; \
3507 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3508 break; \
3509 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3510 (fRc) = false; \
3511 (uOldVal) = 0; \
3512 break; \
3513 } \
3514 } while (0)
3515
3516
3517/**
3518 * Atomically Compare and Exchange a pointer value, additionally
3519 * passing back old value, ordered.
3520 *
3521 * @returns true if xchg was done.
3522 * @returns false if xchg wasn't done.
3523 *
3524 * @param ppv Pointer to the value to update.
3525 * @param pvNew The new value to assigned to *ppv.
3526 * @param pvOld The old value to *ppv compare with.
3527 * @param ppvOld Pointer store the old value at.
3528 */
3529DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3530{
3531#if ARCH_BITS == 32
3532 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3533#elif ARCH_BITS == 64
3534 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3535#else
3536# error "ARCH_BITS is bogus"
3537#endif
3538}
3539
3540
3541/**
3542 * Atomically exchanges and adds to a 32-bit value, ordered.
3543 *
3544 * @returns The old value.
3545 * @param pu32 Pointer to the value.
3546 * @param u32 Number to add.
3547 */
3548#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3549DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3550#else
3551DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3552{
3553# if RT_INLINE_ASM_USES_INTRIN
3554 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3555 return u32;
3556
3557# elif RT_INLINE_ASM_GNU_STYLE
3558 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3559 : "=r" (u32),
3560 "=m" (*pu32)
3561 : "0" (u32),
3562 "m" (*pu32)
3563 : "memory");
3564 return u32;
3565# else
3566 __asm
3567 {
3568 mov eax, [u32]
3569# ifdef RT_ARCH_AMD64
3570 mov rdx, [pu32]
3571 lock xadd [rdx], eax
3572# else
3573 mov edx, [pu32]
3574 lock xadd [edx], eax
3575# endif
3576 mov [u32], eax
3577 }
3578 return u32;
3579# endif
3580}
3581#endif
3582
3583
3584/**
3585 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3586 *
3587 * @returns The old value.
3588 * @param pi32 Pointer to the value.
3589 * @param i32 Number to add.
3590 */
3591DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3592{
3593 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3594}
3595
3596
3597/**
3598 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3599 *
3600 * @returns The old value.
3601 * @param pu32 Pointer to the value.
3602 * @param u32 Number to subtract.
3603 */
3604DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3605{
3606 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3607}
3608
3609
3610/**
3611 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3612 *
3613 * @returns The old value.
3614 * @param pi32 Pointer to the value.
3615 * @param i32 Number to subtract.
3616 */
3617DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3618{
3619 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3620}
3621
3622
3623/**
3624 * Atomically increment a 32-bit value, ordered.
3625 *
3626 * @returns The new value.
3627 * @param pu32 Pointer to the value to increment.
3628 */
3629#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3630DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3631#else
3632DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3633{
3634 uint32_t u32;
3635# if RT_INLINE_ASM_USES_INTRIN
3636 u32 = _InterlockedIncrement((long *)pu32);
3637 return u32;
3638
3639# elif RT_INLINE_ASM_GNU_STYLE
3640 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3641 : "=r" (u32),
3642 "=m" (*pu32)
3643 : "0" (1),
3644 "m" (*pu32)
3645 : "memory");
3646 return u32+1;
3647# else
3648 __asm
3649 {
3650 mov eax, 1
3651# ifdef RT_ARCH_AMD64
3652 mov rdx, [pu32]
3653 lock xadd [rdx], eax
3654# else
3655 mov edx, [pu32]
3656 lock xadd [edx], eax
3657# endif
3658 mov u32, eax
3659 }
3660 return u32+1;
3661# endif
3662}
3663#endif
3664
3665
3666/**
3667 * Atomically increment a signed 32-bit value, ordered.
3668 *
3669 * @returns The new value.
3670 * @param pi32 Pointer to the value to increment.
3671 */
3672DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3673{
3674 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3675}
3676
3677
3678/**
3679 * Atomically decrement an unsigned 32-bit value, ordered.
3680 *
3681 * @returns The new value.
3682 * @param pu32 Pointer to the value to decrement.
3683 */
3684#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3685DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3686#else
3687DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3688{
3689 uint32_t u32;
3690# if RT_INLINE_ASM_USES_INTRIN
3691 u32 = _InterlockedDecrement((long *)pu32);
3692 return u32;
3693
3694# elif RT_INLINE_ASM_GNU_STYLE
3695 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3696 : "=r" (u32),
3697 "=m" (*pu32)
3698 : "0" (-1),
3699 "m" (*pu32)
3700 : "memory");
3701 return u32-1;
3702# else
3703 __asm
3704 {
3705 mov eax, -1
3706# ifdef RT_ARCH_AMD64
3707 mov rdx, [pu32]
3708 lock xadd [rdx], eax
3709# else
3710 mov edx, [pu32]
3711 lock xadd [edx], eax
3712# endif
3713 mov u32, eax
3714 }
3715 return u32-1;
3716# endif
3717}
3718#endif
3719
3720
3721/**
3722 * Atomically decrement a signed 32-bit value, ordered.
3723 *
3724 * @returns The new value.
3725 * @param pi32 Pointer to the value to decrement.
3726 */
3727DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3728{
3729 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3730}
3731
3732
3733/**
3734 * Atomically Or an unsigned 32-bit value, ordered.
3735 *
3736 * @param pu32 Pointer to the pointer variable to OR u32 with.
3737 * @param u32 The value to OR *pu32 with.
3738 */
3739#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3740DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3741#else
3742DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3743{
3744# if RT_INLINE_ASM_USES_INTRIN
3745 _InterlockedOr((long volatile *)pu32, (long)u32);
3746
3747# elif RT_INLINE_ASM_GNU_STYLE
3748 __asm__ __volatile__("lock; orl %1, %0\n\t"
3749 : "=m" (*pu32)
3750 : "ir" (u32),
3751 "m" (*pu32));
3752# else
3753 __asm
3754 {
3755 mov eax, [u32]
3756# ifdef RT_ARCH_AMD64
3757 mov rdx, [pu32]
3758 lock or [rdx], eax
3759# else
3760 mov edx, [pu32]
3761 lock or [edx], eax
3762# endif
3763 }
3764# endif
3765}
3766#endif
3767
3768
3769/**
3770 * Atomically Or a signed 32-bit value, ordered.
3771 *
3772 * @param pi32 Pointer to the pointer variable to OR u32 with.
3773 * @param i32 The value to OR *pu32 with.
3774 */
3775DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3776{
3777 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3778}
3779
3780
3781/**
3782 * Atomically And an unsigned 32-bit value, ordered.
3783 *
3784 * @param pu32 Pointer to the pointer variable to AND u32 with.
3785 * @param u32 The value to AND *pu32 with.
3786 */
3787#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3788DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3789#else
3790DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3791{
3792# if RT_INLINE_ASM_USES_INTRIN
3793 _InterlockedAnd((long volatile *)pu32, u32);
3794
3795# elif RT_INLINE_ASM_GNU_STYLE
3796 __asm__ __volatile__("lock; andl %1, %0\n\t"
3797 : "=m" (*pu32)
3798 : "ir" (u32),
3799 "m" (*pu32));
3800# else
3801 __asm
3802 {
3803 mov eax, [u32]
3804# ifdef RT_ARCH_AMD64
3805 mov rdx, [pu32]
3806 lock and [rdx], eax
3807# else
3808 mov edx, [pu32]
3809 lock and [edx], eax
3810# endif
3811 }
3812# endif
3813}
3814#endif
3815
3816
3817/**
3818 * Atomically And a signed 32-bit value, ordered.
3819 *
3820 * @param pi32 Pointer to the pointer variable to AND i32 with.
3821 * @param i32 The value to AND *pi32 with.
3822 */
3823DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3824{
3825 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3826}
3827
3828
3829/**
3830 * Memory fence, waits for any pending writes and reads to complete.
3831 */
3832DECLINLINE(void) ASMMemoryFence(void)
3833{
3834 /** @todo use mfence? check if all cpus we care for support it. */
3835 uint32_t volatile u32;
3836 ASMAtomicXchgU32(&u32, 0);
3837}
3838
3839
3840/**
3841 * Write fence, waits for any pending writes to complete.
3842 */
3843DECLINLINE(void) ASMWriteFence(void)
3844{
3845 /** @todo use sfence? check if all cpus we care for support it. */
3846 ASMMemoryFence();
3847}
3848
3849
3850/**
3851 * Read fence, waits for any pending reads to complete.
3852 */
3853DECLINLINE(void) ASMReadFence(void)
3854{
3855 /** @todo use lfence? check if all cpus we care for support it. */
3856 ASMMemoryFence();
3857}
3858
3859
3860/**
3861 * Atomically reads an unsigned 8-bit value, ordered.
3862 *
3863 * @returns Current *pu8 value
3864 * @param pu8 Pointer to the 8-bit variable to read.
3865 */
3866DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3867{
3868 ASMMemoryFence();
3869 return *pu8; /* byte reads are atomic on x86 */
3870}
3871
3872
3873/**
3874 * Atomically reads an unsigned 8-bit value, unordered.
3875 *
3876 * @returns Current *pu8 value
3877 * @param pu8 Pointer to the 8-bit variable to read.
3878 */
3879DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3880{
3881 return *pu8; /* byte reads are atomic on x86 */
3882}
3883
3884
3885/**
3886 * Atomically reads a signed 8-bit value, ordered.
3887 *
3888 * @returns Current *pi8 value
3889 * @param pi8 Pointer to the 8-bit variable to read.
3890 */
3891DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3892{
3893 ASMMemoryFence();
3894 return *pi8; /* byte reads are atomic on x86 */
3895}
3896
3897
3898/**
3899 * Atomically reads a signed 8-bit value, unordered.
3900 *
3901 * @returns Current *pi8 value
3902 * @param pi8 Pointer to the 8-bit variable to read.
3903 */
3904DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3905{
3906 return *pi8; /* byte reads are atomic on x86 */
3907}
3908
3909
3910/**
3911 * Atomically reads an unsigned 16-bit value, ordered.
3912 *
3913 * @returns Current *pu16 value
3914 * @param pu16 Pointer to the 16-bit variable to read.
3915 */
3916DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3917{
3918 ASMMemoryFence();
3919 Assert(!((uintptr_t)pu16 & 1));
3920 return *pu16;
3921}
3922
3923
3924/**
3925 * Atomically reads an unsigned 16-bit value, unordered.
3926 *
3927 * @returns Current *pu16 value
3928 * @param pu16 Pointer to the 16-bit variable to read.
3929 */
3930DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3931{
3932 Assert(!((uintptr_t)pu16 & 1));
3933 return *pu16;
3934}
3935
3936
3937/**
3938 * Atomically reads a signed 16-bit value, ordered.
3939 *
3940 * @returns Current *pi16 value
3941 * @param pi16 Pointer to the 16-bit variable to read.
3942 */
3943DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3944{
3945 ASMMemoryFence();
3946 Assert(!((uintptr_t)pi16 & 1));
3947 return *pi16;
3948}
3949
3950
3951/**
3952 * Atomically reads a signed 16-bit value, unordered.
3953 *
3954 * @returns Current *pi16 value
3955 * @param pi16 Pointer to the 16-bit variable to read.
3956 */
3957DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3958{
3959 Assert(!((uintptr_t)pi16 & 1));
3960 return *pi16;
3961}
3962
3963
3964/**
3965 * Atomically reads an unsigned 32-bit value, ordered.
3966 *
3967 * @returns Current *pu32 value
3968 * @param pu32 Pointer to the 32-bit variable to read.
3969 */
3970DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3971{
3972 ASMMemoryFence();
3973 Assert(!((uintptr_t)pu32 & 3));
3974 return *pu32;
3975}
3976
3977
3978/**
3979 * Atomically reads an unsigned 32-bit value, unordered.
3980 *
3981 * @returns Current *pu32 value
3982 * @param pu32 Pointer to the 32-bit variable to read.
3983 */
3984DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3985{
3986 Assert(!((uintptr_t)pu32 & 3));
3987 return *pu32;
3988}
3989
3990
3991/**
3992 * Atomically reads a signed 32-bit value, ordered.
3993 *
3994 * @returns Current *pi32 value
3995 * @param pi32 Pointer to the 32-bit variable to read.
3996 */
3997DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3998{
3999 ASMMemoryFence();
4000 Assert(!((uintptr_t)pi32 & 3));
4001 return *pi32;
4002}
4003
4004
4005/**
4006 * Atomically reads a signed 32-bit value, unordered.
4007 *
4008 * @returns Current *pi32 value
4009 * @param pi32 Pointer to the 32-bit variable to read.
4010 */
4011DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4012{
4013 Assert(!((uintptr_t)pi32 & 3));
4014 return *pi32;
4015}
4016
4017
4018/**
4019 * Atomically reads an unsigned 64-bit value, ordered.
4020 *
4021 * @returns Current *pu64 value
4022 * @param pu64 Pointer to the 64-bit variable to read.
4023 * The memory pointed to must be writable.
4024 * @remark This will fault if the memory is read-only!
4025 */
4026#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4027DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4028#else
4029DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4030{
4031 uint64_t u64;
4032# ifdef RT_ARCH_AMD64
4033 Assert(!((uintptr_t)pu64 & 7));
4034/*# if RT_INLINE_ASM_GNU_STYLE
4035 __asm__ __volatile__( "mfence\n\t"
4036 "movq %1, %0\n\t"
4037 : "=r" (u64)
4038 : "m" (*pu64));
4039# else
4040 __asm
4041 {
4042 mfence
4043 mov rdx, [pu64]
4044 mov rax, [rdx]
4045 mov [u64], rax
4046 }
4047# endif*/
4048 ASMMemoryFence();
4049 u64 = *pu64;
4050# else /* !RT_ARCH_AMD64 */
4051# if RT_INLINE_ASM_GNU_STYLE
4052# if defined(PIC) || defined(__PIC__)
4053 uint32_t u32EBX = 0;
4054 Assert(!((uintptr_t)pu64 & 7));
4055 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4056 "lock; cmpxchg8b (%5)\n\t"
4057 "movl %3, %%ebx\n\t"
4058 : "=A" (u64),
4059# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4060 "+m" (*pu64)
4061# else
4062 "=m" (*pu64)
4063# endif
4064 : "0" (0),
4065 "m" (u32EBX),
4066 "c" (0),
4067 "S" (pu64));
4068# else /* !PIC */
4069 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4070 : "=A" (u64),
4071 "+m" (*pu64)
4072 : "0" (0),
4073 "b" (0),
4074 "c" (0));
4075# endif
4076# else
4077 Assert(!((uintptr_t)pu64 & 7));
4078 __asm
4079 {
4080 xor eax, eax
4081 xor edx, edx
4082 mov edi, pu64
4083 xor ecx, ecx
4084 xor ebx, ebx
4085 lock cmpxchg8b [edi]
4086 mov dword ptr [u64], eax
4087 mov dword ptr [u64 + 4], edx
4088 }
4089# endif
4090# endif /* !RT_ARCH_AMD64 */
4091 return u64;
4092}
4093#endif
4094
4095
4096/**
4097 * Atomically reads an unsigned 64-bit value, unordered.
4098 *
4099 * @returns Current *pu64 value
4100 * @param pu64 Pointer to the 64-bit variable to read.
4101 * The memory pointed to must be writable.
4102 * @remark This will fault if the memory is read-only!
4103 */
4104#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4105DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4106#else
4107DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4108{
4109 uint64_t u64;
4110# ifdef RT_ARCH_AMD64
4111 Assert(!((uintptr_t)pu64 & 7));
4112/*# if RT_INLINE_ASM_GNU_STYLE
4113 Assert(!((uintptr_t)pu64 & 7));
4114 __asm__ __volatile__("movq %1, %0\n\t"
4115 : "=r" (u64)
4116 : "m" (*pu64));
4117# else
4118 __asm
4119 {
4120 mov rdx, [pu64]
4121 mov rax, [rdx]
4122 mov [u64], rax
4123 }
4124# endif */
4125 u64 = *pu64;
4126# else /* !RT_ARCH_AMD64 */
4127# if RT_INLINE_ASM_GNU_STYLE
4128# if defined(PIC) || defined(__PIC__)
4129 uint32_t u32EBX = 0;
4130 uint32_t u32Spill;
4131 Assert(!((uintptr_t)pu64 & 7));
4132 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4133 "xor %%ecx,%%ecx\n\t"
4134 "xor %%edx,%%edx\n\t"
4135 "xchgl %%ebx, %3\n\t"
4136 "lock; cmpxchg8b (%4)\n\t"
4137 "movl %3, %%ebx\n\t"
4138 : "=A" (u64),
4139# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4140 "+m" (*pu64),
4141# else
4142 "=m" (*pu64),
4143# endif
4144 "=c" (u32Spill)
4145 : "m" (u32EBX),
4146 "S" (pu64));
4147# else /* !PIC */
4148 __asm__ __volatile__("cmpxchg8b %1\n\t"
4149 : "=A" (u64),
4150 "+m" (*pu64)
4151 : "0" (0),
4152 "b" (0),
4153 "c" (0));
4154# endif
4155# else
4156 Assert(!((uintptr_t)pu64 & 7));
4157 __asm
4158 {
4159 xor eax, eax
4160 xor edx, edx
4161 mov edi, pu64
4162 xor ecx, ecx
4163 xor ebx, ebx
4164 lock cmpxchg8b [edi]
4165 mov dword ptr [u64], eax
4166 mov dword ptr [u64 + 4], edx
4167 }
4168# endif
4169# endif /* !RT_ARCH_AMD64 */
4170 return u64;
4171}
4172#endif
4173
4174
4175/**
4176 * Atomically reads a signed 64-bit value, ordered.
4177 *
4178 * @returns Current *pi64 value
4179 * @param pi64 Pointer to the 64-bit variable to read.
4180 * The memory pointed to must be writable.
4181 * @remark This will fault if the memory is read-only!
4182 */
4183DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4184{
4185 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4186}
4187
4188
4189/**
4190 * Atomically reads a signed 64-bit value, unordered.
4191 *
4192 * @returns Current *pi64 value
4193 * @param pi64 Pointer to the 64-bit variable to read.
4194 * The memory pointed to must be writable.
4195 * @remark This will fault if the memory is read-only!
4196 */
4197DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4198{
4199 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4200}
4201
4202
4203/**
4204 * Atomically reads a pointer value, ordered.
4205 *
4206 * @returns Current *pv value
4207 * @param ppv Pointer to the pointer variable to read.
4208 */
4209DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4210{
4211#if ARCH_BITS == 32
4212 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4213#elif ARCH_BITS == 64
4214 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4215#else
4216# error "ARCH_BITS is bogus"
4217#endif
4218}
4219
4220
4221/**
4222 * Atomically reads a pointer value, unordered.
4223 *
4224 * @returns Current *pv value
4225 * @param ppv Pointer to the pointer variable to read.
4226 */
4227DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4228{
4229#if ARCH_BITS == 32
4230 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4231#elif ARCH_BITS == 64
4232 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4233#else
4234# error "ARCH_BITS is bogus"
4235#endif
4236}
4237
4238
4239/**
4240 * Atomically reads a boolean value, ordered.
4241 *
4242 * @returns Current *pf value
4243 * @param pf Pointer to the boolean variable to read.
4244 */
4245DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4246{
4247 ASMMemoryFence();
4248 return *pf; /* byte reads are atomic on x86 */
4249}
4250
4251
4252/**
4253 * Atomically reads a boolean value, unordered.
4254 *
4255 * @returns Current *pf value
4256 * @param pf Pointer to the boolean variable to read.
4257 */
4258DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4259{
4260 return *pf; /* byte reads are atomic on x86 */
4261}
4262
4263
4264/**
4265 * Atomically read a typical IPRT handle value, ordered.
4266 *
4267 * @param ph Pointer to the handle variable to read.
4268 * @param phRes Where to store the result.
4269 *
4270 * @remarks This doesn't currently work for all handles (like RTFILE).
4271 */
4272#define ASMAtomicReadHandle(ph, phRes) \
4273 do { \
4274 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
4275 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4276 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4277 } while (0)
4278
4279
4280/**
4281 * Atomically read a typical IPRT handle value, unordered.
4282 *
4283 * @param ph Pointer to the handle variable to read.
4284 * @param phRes Where to store the result.
4285 *
4286 * @remarks This doesn't currently work for all handles (like RTFILE).
4287 */
4288#define ASMAtomicUoReadHandle(ph, phRes) \
4289 do { \
4290 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
4291 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4292 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4293 } while (0)
4294
4295
4296/**
4297 * Atomically read a value which size might differ
4298 * between platforms or compilers, ordered.
4299 *
4300 * @param pu Pointer to the variable to update.
4301 * @param puRes Where to store the result.
4302 */
4303#define ASMAtomicReadSize(pu, puRes) \
4304 do { \
4305 switch (sizeof(*(pu))) { \
4306 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4307 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4308 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4309 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4310 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4311 } \
4312 } while (0)
4313
4314
4315/**
4316 * Atomically read a value which size might differ
4317 * between platforms or compilers, unordered.
4318 *
4319 * @param pu Pointer to the variable to update.
4320 * @param puRes Where to store the result.
4321 */
4322#define ASMAtomicUoReadSize(pu, puRes) \
4323 do { \
4324 switch (sizeof(*(pu))) { \
4325 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4326 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4327 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4328 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4329 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4330 } \
4331 } while (0)
4332
4333
4334/**
4335 * Atomically writes an unsigned 8-bit value, ordered.
4336 *
4337 * @param pu8 Pointer to the 8-bit variable.
4338 * @param u8 The 8-bit value to assign to *pu8.
4339 */
4340DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4341{
4342 ASMAtomicXchgU8(pu8, u8);
4343}
4344
4345
4346/**
4347 * Atomically writes an unsigned 8-bit value, unordered.
4348 *
4349 * @param pu8 Pointer to the 8-bit variable.
4350 * @param u8 The 8-bit value to assign to *pu8.
4351 */
4352DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4353{
4354 *pu8 = u8; /* byte writes are atomic on x86 */
4355}
4356
4357
4358/**
4359 * Atomically writes a signed 8-bit value, ordered.
4360 *
4361 * @param pi8 Pointer to the 8-bit variable to read.
4362 * @param i8 The 8-bit value to assign to *pi8.
4363 */
4364DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4365{
4366 ASMAtomicXchgS8(pi8, i8);
4367}
4368
4369
4370/**
4371 * Atomically writes a signed 8-bit value, unordered.
4372 *
4373 * @param pi8 Pointer to the 8-bit variable to read.
4374 * @param i8 The 8-bit value to assign to *pi8.
4375 */
4376DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4377{
4378 *pi8 = i8; /* byte writes are atomic on x86 */
4379}
4380
4381
4382/**
4383 * Atomically writes an unsigned 16-bit value, ordered.
4384 *
4385 * @param pu16 Pointer to the 16-bit variable.
4386 * @param u16 The 16-bit value to assign to *pu16.
4387 */
4388DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4389{
4390 ASMAtomicXchgU16(pu16, u16);
4391}
4392
4393
4394/**
4395 * Atomically writes an unsigned 16-bit value, unordered.
4396 *
4397 * @param pu16 Pointer to the 16-bit variable.
4398 * @param u16 The 16-bit value to assign to *pu16.
4399 */
4400DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4401{
4402 Assert(!((uintptr_t)pu16 & 1));
4403 *pu16 = u16;
4404}
4405
4406
4407/**
4408 * Atomically writes a signed 16-bit value, ordered.
4409 *
4410 * @param pi16 Pointer to the 16-bit variable to read.
4411 * @param i16 The 16-bit value to assign to *pi16.
4412 */
4413DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4414{
4415 ASMAtomicXchgS16(pi16, i16);
4416}
4417
4418
4419/**
4420 * Atomically writes a signed 16-bit value, unordered.
4421 *
4422 * @param pi16 Pointer to the 16-bit variable to read.
4423 * @param i16 The 16-bit value to assign to *pi16.
4424 */
4425DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4426{
4427 Assert(!((uintptr_t)pi16 & 1));
4428 *pi16 = i16;
4429}
4430
4431
4432/**
4433 * Atomically writes an unsigned 32-bit value, ordered.
4434 *
4435 * @param pu32 Pointer to the 32-bit variable.
4436 * @param u32 The 32-bit value to assign to *pu32.
4437 */
4438DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4439{
4440 ASMAtomicXchgU32(pu32, u32);
4441}
4442
4443
4444/**
4445 * Atomically writes an unsigned 32-bit value, unordered.
4446 *
4447 * @param pu32 Pointer to the 32-bit variable.
4448 * @param u32 The 32-bit value to assign to *pu32.
4449 */
4450DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4451{
4452 Assert(!((uintptr_t)pu32 & 3));
4453 *pu32 = u32;
4454}
4455
4456
4457/**
4458 * Atomically writes a signed 32-bit value, ordered.
4459 *
4460 * @param pi32 Pointer to the 32-bit variable to read.
4461 * @param i32 The 32-bit value to assign to *pi32.
4462 */
4463DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4464{
4465 ASMAtomicXchgS32(pi32, i32);
4466}
4467
4468
4469/**
4470 * Atomically writes a signed 32-bit value, unordered.
4471 *
4472 * @param pi32 Pointer to the 32-bit variable to read.
4473 * @param i32 The 32-bit value to assign to *pi32.
4474 */
4475DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4476{
4477 Assert(!((uintptr_t)pi32 & 3));
4478 *pi32 = i32;
4479}
4480
4481
4482/**
4483 * Atomically writes an unsigned 64-bit value, ordered.
4484 *
4485 * @param pu64 Pointer to the 64-bit variable.
4486 * @param u64 The 64-bit value to assign to *pu64.
4487 */
4488DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4489{
4490 ASMAtomicXchgU64(pu64, u64);
4491}
4492
4493
4494/**
4495 * Atomically writes an unsigned 64-bit value, unordered.
4496 *
4497 * @param pu64 Pointer to the 64-bit variable.
4498 * @param u64 The 64-bit value to assign to *pu64.
4499 */
4500DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4501{
4502 Assert(!((uintptr_t)pu64 & 7));
4503#if ARCH_BITS == 64
4504 *pu64 = u64;
4505#else
4506 ASMAtomicXchgU64(pu64, u64);
4507#endif
4508}
4509
4510
4511/**
4512 * Atomically writes a signed 64-bit value, ordered.
4513 *
4514 * @param pi64 Pointer to the 64-bit variable.
4515 * @param i64 The 64-bit value to assign to *pi64.
4516 */
4517DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4518{
4519 ASMAtomicXchgS64(pi64, i64);
4520}
4521
4522
4523/**
4524 * Atomically writes a signed 64-bit value, unordered.
4525 *
4526 * @param pi64 Pointer to the 64-bit variable.
4527 * @param i64 The 64-bit value to assign to *pi64.
4528 */
4529DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4530{
4531 Assert(!((uintptr_t)pi64 & 7));
4532#if ARCH_BITS == 64
4533 *pi64 = i64;
4534#else
4535 ASMAtomicXchgS64(pi64, i64);
4536#endif
4537}
4538
4539
4540/**
4541 * Atomically writes a boolean value, unordered.
4542 *
4543 * @param pf Pointer to the boolean variable.
4544 * @param f The boolean value to assign to *pf.
4545 */
4546DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4547{
4548 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4549}
4550
4551
4552/**
4553 * Atomically writes a boolean value, unordered.
4554 *
4555 * @param pf Pointer to the boolean variable.
4556 * @param f The boolean value to assign to *pf.
4557 */
4558DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4559{
4560 *pf = f; /* byte writes are atomic on x86 */
4561}
4562
4563
4564/**
4565 * Atomically writes a pointer value, ordered.
4566 *
4567 * @returns Current *pv value
4568 * @param ppv Pointer to the pointer variable.
4569 * @param pv The pointer value to assigne to *ppv.
4570 */
4571DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4572{
4573#if ARCH_BITS == 32
4574 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4575#elif ARCH_BITS == 64
4576 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4577#else
4578# error "ARCH_BITS is bogus"
4579#endif
4580}
4581
4582
4583/**
4584 * Atomically writes a pointer value, unordered.
4585 *
4586 * @returns Current *pv value
4587 * @param ppv Pointer to the pointer variable.
4588 * @param pv The pointer value to assigne to *ppv.
4589 */
4590DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4591{
4592#if ARCH_BITS == 32
4593 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4594#elif ARCH_BITS == 64
4595 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4596#else
4597# error "ARCH_BITS is bogus"
4598#endif
4599}
4600
4601
4602/**
4603 * Atomically write a typical IPRT handle value, ordered.
4604 *
4605 * @param ph Pointer to the variable to update.
4606 * @param hNew The value to assign to *ph.
4607 *
4608 * @remarks This doesn't currently work for all handles (like RTFILE).
4609 */
4610#define ASMAtomicWriteHandle(ph, hNew) \
4611 do { \
4612 ASMAtomicWritePtr((void * volatile *)(ph), (const void *)hNew); \
4613 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4614 } while (0)
4615
4616
4617/**
4618 * Atomically write a typical IPRT handle value, unordered.
4619 *
4620 * @param ph Pointer to the variable to update.
4621 * @param hNew The value to assign to *ph.
4622 *
4623 * @remarks This doesn't currently work for all handles (like RTFILE).
4624 */
4625#define ASMAtomicUoWriteHandle(ph, hNew) \
4626 do { \
4627 ASMAtomicUoWritePtr((void * volatile *)(ph), (const void *)hNew); \
4628 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4629 } while (0)
4630
4631
4632/**
4633 * Atomically write a value which size might differ
4634 * between platforms or compilers, ordered.
4635 *
4636 * @param pu Pointer to the variable to update.
4637 * @param uNew The value to assign to *pu.
4638 */
4639#define ASMAtomicWriteSize(pu, uNew) \
4640 do { \
4641 switch (sizeof(*(pu))) { \
4642 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4643 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4644 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4645 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4646 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4647 } \
4648 } while (0)
4649
4650/**
4651 * Atomically write a value which size might differ
4652 * between platforms or compilers, unordered.
4653 *
4654 * @param pu Pointer to the variable to update.
4655 * @param uNew The value to assign to *pu.
4656 */
4657#define ASMAtomicUoWriteSize(pu, uNew) \
4658 do { \
4659 switch (sizeof(*(pu))) { \
4660 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4661 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4662 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4663 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4664 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4665 } \
4666 } while (0)
4667
4668
4669
4670
4671/**
4672 * Invalidate page.
4673 *
4674 * @param pv Address of the page to invalidate.
4675 */
4676#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4677DECLASM(void) ASMInvalidatePage(void *pv);
4678#else
4679DECLINLINE(void) ASMInvalidatePage(void *pv)
4680{
4681# if RT_INLINE_ASM_USES_INTRIN
4682 __invlpg(pv);
4683
4684# elif RT_INLINE_ASM_GNU_STYLE
4685 __asm__ __volatile__("invlpg %0\n\t"
4686 : : "m" (*(uint8_t *)pv));
4687# else
4688 __asm
4689 {
4690# ifdef RT_ARCH_AMD64
4691 mov rax, [pv]
4692 invlpg [rax]
4693# else
4694 mov eax, [pv]
4695 invlpg [eax]
4696# endif
4697 }
4698# endif
4699}
4700#endif
4701
4702
4703/**
4704 * Write back the internal caches and invalidate them.
4705 */
4706#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4707DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4708#else
4709DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4710{
4711# if RT_INLINE_ASM_USES_INTRIN
4712 __wbinvd();
4713
4714# elif RT_INLINE_ASM_GNU_STYLE
4715 __asm__ __volatile__("wbinvd");
4716# else
4717 __asm
4718 {
4719 wbinvd
4720 }
4721# endif
4722}
4723#endif
4724
4725
4726/**
4727 * Invalidate internal and (perhaps) external caches without first
4728 * flushing dirty cache lines. Use with extreme care.
4729 */
4730#if RT_INLINE_ASM_EXTERNAL
4731DECLASM(void) ASMInvalidateInternalCaches(void);
4732#else
4733DECLINLINE(void) ASMInvalidateInternalCaches(void)
4734{
4735# if RT_INLINE_ASM_GNU_STYLE
4736 __asm__ __volatile__("invd");
4737# else
4738 __asm
4739 {
4740 invd
4741 }
4742# endif
4743}
4744#endif
4745
4746
4747#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4748# if PAGE_SIZE != 0x1000
4749# error "PAGE_SIZE is not 0x1000!"
4750# endif
4751#endif
4752
4753/**
4754 * Zeros a 4K memory page.
4755 *
4756 * @param pv Pointer to the memory block. This must be page aligned.
4757 */
4758#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4759DECLASM(void) ASMMemZeroPage(volatile void *pv);
4760# else
4761DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4762{
4763# if RT_INLINE_ASM_USES_INTRIN
4764# ifdef RT_ARCH_AMD64
4765 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4766# else
4767 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4768# endif
4769
4770# elif RT_INLINE_ASM_GNU_STYLE
4771 RTCCUINTREG uDummy;
4772# ifdef RT_ARCH_AMD64
4773 __asm__ __volatile__("rep stosq"
4774 : "=D" (pv),
4775 "=c" (uDummy)
4776 : "0" (pv),
4777 "c" (0x1000 >> 3),
4778 "a" (0)
4779 : "memory");
4780# else
4781 __asm__ __volatile__("rep stosl"
4782 : "=D" (pv),
4783 "=c" (uDummy)
4784 : "0" (pv),
4785 "c" (0x1000 >> 2),
4786 "a" (0)
4787 : "memory");
4788# endif
4789# else
4790 __asm
4791 {
4792# ifdef RT_ARCH_AMD64
4793 xor rax, rax
4794 mov ecx, 0200h
4795 mov rdi, [pv]
4796 rep stosq
4797# else
4798 xor eax, eax
4799 mov ecx, 0400h
4800 mov edi, [pv]
4801 rep stosd
4802# endif
4803 }
4804# endif
4805}
4806# endif
4807
4808
4809/**
4810 * Zeros a memory block with a 32-bit aligned size.
4811 *
4812 * @param pv Pointer to the memory block.
4813 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4814 */
4815#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4816DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4817#else
4818DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4819{
4820# if RT_INLINE_ASM_USES_INTRIN
4821# ifdef RT_ARCH_AMD64
4822 if (!(cb & 7))
4823 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4824 else
4825# endif
4826 __stosd((unsigned long *)pv, 0, cb / 4);
4827
4828# elif RT_INLINE_ASM_GNU_STYLE
4829 __asm__ __volatile__("rep stosl"
4830 : "=D" (pv),
4831 "=c" (cb)
4832 : "0" (pv),
4833 "1" (cb >> 2),
4834 "a" (0)
4835 : "memory");
4836# else
4837 __asm
4838 {
4839 xor eax, eax
4840# ifdef RT_ARCH_AMD64
4841 mov rcx, [cb]
4842 shr rcx, 2
4843 mov rdi, [pv]
4844# else
4845 mov ecx, [cb]
4846 shr ecx, 2
4847 mov edi, [pv]
4848# endif
4849 rep stosd
4850 }
4851# endif
4852}
4853#endif
4854
4855
4856/**
4857 * Fills a memory block with a 32-bit aligned size.
4858 *
4859 * @param pv Pointer to the memory block.
4860 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4861 * @param u32 The value to fill with.
4862 */
4863#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4864DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4865#else
4866DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4867{
4868# if RT_INLINE_ASM_USES_INTRIN
4869# ifdef RT_ARCH_AMD64
4870 if (!(cb & 7))
4871 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4872 else
4873# endif
4874 __stosd((unsigned long *)pv, u32, cb / 4);
4875
4876# elif RT_INLINE_ASM_GNU_STYLE
4877 __asm__ __volatile__("rep stosl"
4878 : "=D" (pv),
4879 "=c" (cb)
4880 : "0" (pv),
4881 "1" (cb >> 2),
4882 "a" (u32)
4883 : "memory");
4884# else
4885 __asm
4886 {
4887# ifdef RT_ARCH_AMD64
4888 mov rcx, [cb]
4889 shr rcx, 2
4890 mov rdi, [pv]
4891# else
4892 mov ecx, [cb]
4893 shr ecx, 2
4894 mov edi, [pv]
4895# endif
4896 mov eax, [u32]
4897 rep stosd
4898 }
4899# endif
4900}
4901#endif
4902
4903
4904/**
4905 * Checks if a memory block is filled with the specified byte.
4906 *
4907 * This is a sort of inverted memchr.
4908 *
4909 * @returns Pointer to the byte which doesn't equal u8.
4910 * @returns NULL if all equal to u8.
4911 *
4912 * @param pv Pointer to the memory block.
4913 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4914 * @param u8 The value it's supposed to be filled with.
4915 */
4916#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4917DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4918#else
4919DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4920{
4921/** @todo rewrite this in inline assembly? */
4922 uint8_t const *pb = (uint8_t const *)pv;
4923 for (; cb; cb--, pb++)
4924 if (RT_UNLIKELY(*pb != u8))
4925 return (void *)pb;
4926 return NULL;
4927}
4928#endif
4929
4930
4931/**
4932 * Checks if a memory block is filled with the specified 32-bit value.
4933 *
4934 * This is a sort of inverted memchr.
4935 *
4936 * @returns Pointer to the first value which doesn't equal u32.
4937 * @returns NULL if all equal to u32.
4938 *
4939 * @param pv Pointer to the memory block.
4940 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4941 * @param u32 The value it's supposed to be filled with.
4942 */
4943#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4944DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4945#else
4946DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4947{
4948/** @todo rewrite this in inline assembly? */
4949 uint32_t const *pu32 = (uint32_t const *)pv;
4950 for (; cb; cb -= 4, pu32++)
4951 if (RT_UNLIKELY(*pu32 != u32))
4952 return (uint32_t *)pu32;
4953 return NULL;
4954}
4955#endif
4956
4957
4958/**
4959 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4960 *
4961 * @returns u32F1 * u32F2.
4962 */
4963#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4964DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4965#else
4966DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4967{
4968# ifdef RT_ARCH_AMD64
4969 return (uint64_t)u32F1 * u32F2;
4970# else /* !RT_ARCH_AMD64 */
4971 uint64_t u64;
4972# if RT_INLINE_ASM_GNU_STYLE
4973 __asm__ __volatile__("mull %%edx"
4974 : "=A" (u64)
4975 : "a" (u32F2), "d" (u32F1));
4976# else
4977 __asm
4978 {
4979 mov edx, [u32F1]
4980 mov eax, [u32F2]
4981 mul edx
4982 mov dword ptr [u64], eax
4983 mov dword ptr [u64 + 4], edx
4984 }
4985# endif
4986 return u64;
4987# endif /* !RT_ARCH_AMD64 */
4988}
4989#endif
4990
4991
4992/**
4993 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4994 *
4995 * @returns u32F1 * u32F2.
4996 */
4997#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4998DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4999#else
5000DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
5001{
5002# ifdef RT_ARCH_AMD64
5003 return (int64_t)i32F1 * i32F2;
5004# else /* !RT_ARCH_AMD64 */
5005 int64_t i64;
5006# if RT_INLINE_ASM_GNU_STYLE
5007 __asm__ __volatile__("imull %%edx"
5008 : "=A" (i64)
5009 : "a" (i32F2), "d" (i32F1));
5010# else
5011 __asm
5012 {
5013 mov edx, [i32F1]
5014 mov eax, [i32F2]
5015 imul edx
5016 mov dword ptr [i64], eax
5017 mov dword ptr [i64 + 4], edx
5018 }
5019# endif
5020 return i64;
5021# endif /* !RT_ARCH_AMD64 */
5022}
5023#endif
5024
5025
5026/**
5027 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5028 *
5029 * @returns u64 / u32.
5030 */
5031#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5032DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5033#else
5034DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5035{
5036# ifdef RT_ARCH_AMD64
5037 return (uint32_t)(u64 / u32);
5038# else /* !RT_ARCH_AMD64 */
5039# if RT_INLINE_ASM_GNU_STYLE
5040 RTCCUINTREG uDummy;
5041 __asm__ __volatile__("divl %3"
5042 : "=a" (u32), "=d"(uDummy)
5043 : "A" (u64), "r" (u32));
5044# else
5045 __asm
5046 {
5047 mov eax, dword ptr [u64]
5048 mov edx, dword ptr [u64 + 4]
5049 mov ecx, [u32]
5050 div ecx
5051 mov [u32], eax
5052 }
5053# endif
5054 return u32;
5055# endif /* !RT_ARCH_AMD64 */
5056}
5057#endif
5058
5059
5060/**
5061 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5062 *
5063 * @returns u64 / u32.
5064 */
5065#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5066DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5067#else
5068DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5069{
5070# ifdef RT_ARCH_AMD64
5071 return (int32_t)(i64 / i32);
5072# else /* !RT_ARCH_AMD64 */
5073# if RT_INLINE_ASM_GNU_STYLE
5074 RTCCUINTREG iDummy;
5075 __asm__ __volatile__("idivl %3"
5076 : "=a" (i32), "=d"(iDummy)
5077 : "A" (i64), "r" (i32));
5078# else
5079 __asm
5080 {
5081 mov eax, dword ptr [i64]
5082 mov edx, dword ptr [i64 + 4]
5083 mov ecx, [i32]
5084 idiv ecx
5085 mov [i32], eax
5086 }
5087# endif
5088 return i32;
5089# endif /* !RT_ARCH_AMD64 */
5090}
5091#endif
5092
5093
5094/**
5095 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5096 * returning the rest.
5097 *
5098 * @returns u64 % u32.
5099 *
5100 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5101 */
5102#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5103DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5104#else
5105DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5106{
5107# ifdef RT_ARCH_AMD64
5108 return (uint32_t)(u64 % u32);
5109# else /* !RT_ARCH_AMD64 */
5110# if RT_INLINE_ASM_GNU_STYLE
5111 RTCCUINTREG uDummy;
5112 __asm__ __volatile__("divl %3"
5113 : "=a" (uDummy), "=d"(u32)
5114 : "A" (u64), "r" (u32));
5115# else
5116 __asm
5117 {
5118 mov eax, dword ptr [u64]
5119 mov edx, dword ptr [u64 + 4]
5120 mov ecx, [u32]
5121 div ecx
5122 mov [u32], edx
5123 }
5124# endif
5125 return u32;
5126# endif /* !RT_ARCH_AMD64 */
5127}
5128#endif
5129
5130
5131/**
5132 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5133 * returning the rest.
5134 *
5135 * @returns u64 % u32.
5136 *
5137 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5138 */
5139#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5140DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5141#else
5142DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5143{
5144# ifdef RT_ARCH_AMD64
5145 return (int32_t)(i64 % i32);
5146# else /* !RT_ARCH_AMD64 */
5147# if RT_INLINE_ASM_GNU_STYLE
5148 RTCCUINTREG iDummy;
5149 __asm__ __volatile__("idivl %3"
5150 : "=a" (iDummy), "=d"(i32)
5151 : "A" (i64), "r" (i32));
5152# else
5153 __asm
5154 {
5155 mov eax, dword ptr [i64]
5156 mov edx, dword ptr [i64 + 4]
5157 mov ecx, [i32]
5158 idiv ecx
5159 mov [i32], edx
5160 }
5161# endif
5162 return i32;
5163# endif /* !RT_ARCH_AMD64 */
5164}
5165#endif
5166
5167
5168/**
5169 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5170 * using a 96 bit intermediate result.
5171 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5172 * __udivdi3 and __umoddi3 even if this inline function is not used.
5173 *
5174 * @returns (u64A * u32B) / u32C.
5175 * @param u64A The 64-bit value.
5176 * @param u32B The 32-bit value to multiple by A.
5177 * @param u32C The 32-bit value to divide A*B by.
5178 */
5179#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5180DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5181#else
5182DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5183{
5184# if RT_INLINE_ASM_GNU_STYLE
5185# ifdef RT_ARCH_AMD64
5186 uint64_t u64Result, u64Spill;
5187 __asm__ __volatile__("mulq %2\n\t"
5188 "divq %3\n\t"
5189 : "=a" (u64Result),
5190 "=d" (u64Spill)
5191 : "r" ((uint64_t)u32B),
5192 "r" ((uint64_t)u32C),
5193 "0" (u64A),
5194 "1" (0));
5195 return u64Result;
5196# else
5197 uint32_t u32Dummy;
5198 uint64_t u64Result;
5199 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5200 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5201 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5202 eax = u64A.hi */
5203 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5204 edx = u32C */
5205 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5206 edx = u32B */
5207 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5208 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5209 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5210 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5211 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5212 edx = u64Hi % u32C */
5213 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5214 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5215 "divl %%ecx \n\t" /* u64Result.lo */
5216 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5217 : "=A"(u64Result), "=c"(u32Dummy),
5218 "=S"(u32Dummy), "=D"(u32Dummy)
5219 : "a"((uint32_t)u64A),
5220 "S"((uint32_t)(u64A >> 32)),
5221 "c"(u32B),
5222 "D"(u32C));
5223 return u64Result;
5224# endif
5225# else
5226 RTUINT64U u;
5227 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5228 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5229 u64Hi += (u64Lo >> 32);
5230 u.s.Hi = (uint32_t)(u64Hi / u32C);
5231 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5232 return u.u;
5233# endif
5234}
5235#endif
5236
5237
5238/**
5239 * Probes a byte pointer for read access.
5240 *
5241 * While the function will not fault if the byte is not read accessible,
5242 * the idea is to do this in a safe place like before acquiring locks
5243 * and such like.
5244 *
5245 * Also, this functions guarantees that an eager compiler is not going
5246 * to optimize the probing away.
5247 *
5248 * @param pvByte Pointer to the byte.
5249 */
5250#if RT_INLINE_ASM_EXTERNAL
5251DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5252#else
5253DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5254{
5255 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5256 uint8_t u8;
5257# if RT_INLINE_ASM_GNU_STYLE
5258 __asm__ __volatile__("movb (%1), %0\n\t"
5259 : "=r" (u8)
5260 : "r" (pvByte));
5261# else
5262 __asm
5263 {
5264# ifdef RT_ARCH_AMD64
5265 mov rax, [pvByte]
5266 mov al, [rax]
5267# else
5268 mov eax, [pvByte]
5269 mov al, [eax]
5270# endif
5271 mov [u8], al
5272 }
5273# endif
5274 return u8;
5275}
5276#endif
5277
5278/**
5279 * Probes a buffer for read access page by page.
5280 *
5281 * While the function will fault if the buffer is not fully read
5282 * accessible, the idea is to do this in a safe place like before
5283 * acquiring locks and such like.
5284 *
5285 * Also, this functions guarantees that an eager compiler is not going
5286 * to optimize the probing away.
5287 *
5288 * @param pvBuf Pointer to the buffer.
5289 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5290 */
5291DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5292{
5293 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5294 /* the first byte */
5295 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5296 ASMProbeReadByte(pu8);
5297
5298 /* the pages in between pages. */
5299 while (cbBuf > /*PAGE_SIZE*/0x1000)
5300 {
5301 ASMProbeReadByte(pu8);
5302 cbBuf -= /*PAGE_SIZE*/0x1000;
5303 pu8 += /*PAGE_SIZE*/0x1000;
5304 }
5305
5306 /* the last byte */
5307 ASMProbeReadByte(pu8 + cbBuf - 1);
5308}
5309
5310
5311/** @def ASMBreakpoint
5312 * Debugger Breakpoint.
5313 * @remark In the gnu world we add a nop instruction after the int3 to
5314 * force gdb to remain at the int3 source line.
5315 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5316 * @internal
5317 */
5318#if RT_INLINE_ASM_GNU_STYLE
5319# ifndef __L4ENV__
5320# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5321# else
5322# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5323# endif
5324#else
5325# define ASMBreakpoint() __debugbreak()
5326#endif
5327
5328
5329
5330/** @defgroup grp_inline_bits Bit Operations
5331 * @{
5332 */
5333
5334
5335/**
5336 * Sets a bit in a bitmap.
5337 *
5338 * @param pvBitmap Pointer to the bitmap.
5339 * @param iBit The bit to set.
5340 */
5341#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5342DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5343#else
5344DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5345{
5346# if RT_INLINE_ASM_USES_INTRIN
5347 _bittestandset((long *)pvBitmap, iBit);
5348
5349# elif RT_INLINE_ASM_GNU_STYLE
5350 __asm__ __volatile__("btsl %1, %0"
5351 : "=m" (*(volatile long *)pvBitmap)
5352 : "Ir" (iBit),
5353 "m" (*(volatile long *)pvBitmap)
5354 : "memory");
5355# else
5356 __asm
5357 {
5358# ifdef RT_ARCH_AMD64
5359 mov rax, [pvBitmap]
5360 mov edx, [iBit]
5361 bts [rax], edx
5362# else
5363 mov eax, [pvBitmap]
5364 mov edx, [iBit]
5365 bts [eax], edx
5366# endif
5367 }
5368# endif
5369}
5370#endif
5371
5372
5373/**
5374 * Atomically sets a bit in a bitmap, ordered.
5375 *
5376 * @param pvBitmap Pointer to the bitmap.
5377 * @param iBit The bit to set.
5378 */
5379#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5380DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5381#else
5382DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5383{
5384# if RT_INLINE_ASM_USES_INTRIN
5385 _interlockedbittestandset((long *)pvBitmap, iBit);
5386# elif RT_INLINE_ASM_GNU_STYLE
5387 __asm__ __volatile__("lock; btsl %1, %0"
5388 : "=m" (*(volatile long *)pvBitmap)
5389 : "Ir" (iBit),
5390 "m" (*(volatile long *)pvBitmap)
5391 : "memory");
5392# else
5393 __asm
5394 {
5395# ifdef RT_ARCH_AMD64
5396 mov rax, [pvBitmap]
5397 mov edx, [iBit]
5398 lock bts [rax], edx
5399# else
5400 mov eax, [pvBitmap]
5401 mov edx, [iBit]
5402 lock bts [eax], edx
5403# endif
5404 }
5405# endif
5406}
5407#endif
5408
5409
5410/**
5411 * Clears a bit in a bitmap.
5412 *
5413 * @param pvBitmap Pointer to the bitmap.
5414 * @param iBit The bit to clear.
5415 */
5416#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5417DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5418#else
5419DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5420{
5421# if RT_INLINE_ASM_USES_INTRIN
5422 _bittestandreset((long *)pvBitmap, iBit);
5423
5424# elif RT_INLINE_ASM_GNU_STYLE
5425 __asm__ __volatile__("btrl %1, %0"
5426 : "=m" (*(volatile long *)pvBitmap)
5427 : "Ir" (iBit),
5428 "m" (*(volatile long *)pvBitmap)
5429 : "memory");
5430# else
5431 __asm
5432 {
5433# ifdef RT_ARCH_AMD64
5434 mov rax, [pvBitmap]
5435 mov edx, [iBit]
5436 btr [rax], edx
5437# else
5438 mov eax, [pvBitmap]
5439 mov edx, [iBit]
5440 btr [eax], edx
5441# endif
5442 }
5443# endif
5444}
5445#endif
5446
5447
5448/**
5449 * Atomically clears a bit in a bitmap, ordered.
5450 *
5451 * @param pvBitmap Pointer to the bitmap.
5452 * @param iBit The bit to toggle set.
5453 * @remark No memory barrier, take care on smp.
5454 */
5455#if RT_INLINE_ASM_EXTERNAL
5456DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5457#else
5458DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5459{
5460# if RT_INLINE_ASM_GNU_STYLE
5461 __asm__ __volatile__("lock; btrl %1, %0"
5462 : "=m" (*(volatile long *)pvBitmap)
5463 : "Ir" (iBit),
5464 "m" (*(volatile long *)pvBitmap)
5465 : "memory");
5466# else
5467 __asm
5468 {
5469# ifdef RT_ARCH_AMD64
5470 mov rax, [pvBitmap]
5471 mov edx, [iBit]
5472 lock btr [rax], edx
5473# else
5474 mov eax, [pvBitmap]
5475 mov edx, [iBit]
5476 lock btr [eax], edx
5477# endif
5478 }
5479# endif
5480}
5481#endif
5482
5483
5484/**
5485 * Toggles a bit in a bitmap.
5486 *
5487 * @param pvBitmap Pointer to the bitmap.
5488 * @param iBit The bit to toggle.
5489 */
5490#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5491DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5492#else
5493DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5494{
5495# if RT_INLINE_ASM_USES_INTRIN
5496 _bittestandcomplement((long *)pvBitmap, iBit);
5497# elif RT_INLINE_ASM_GNU_STYLE
5498 __asm__ __volatile__("btcl %1, %0"
5499 : "=m" (*(volatile long *)pvBitmap)
5500 : "Ir" (iBit),
5501 "m" (*(volatile long *)pvBitmap)
5502 : "memory");
5503# else
5504 __asm
5505 {
5506# ifdef RT_ARCH_AMD64
5507 mov rax, [pvBitmap]
5508 mov edx, [iBit]
5509 btc [rax], edx
5510# else
5511 mov eax, [pvBitmap]
5512 mov edx, [iBit]
5513 btc [eax], edx
5514# endif
5515 }
5516# endif
5517}
5518#endif
5519
5520
5521/**
5522 * Atomically toggles a bit in a bitmap, ordered.
5523 *
5524 * @param pvBitmap Pointer to the bitmap.
5525 * @param iBit The bit to test and set.
5526 */
5527#if RT_INLINE_ASM_EXTERNAL
5528DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5529#else
5530DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5531{
5532# if RT_INLINE_ASM_GNU_STYLE
5533 __asm__ __volatile__("lock; btcl %1, %0"
5534 : "=m" (*(volatile long *)pvBitmap)
5535 : "Ir" (iBit),
5536 "m" (*(volatile long *)pvBitmap)
5537 : "memory");
5538# else
5539 __asm
5540 {
5541# ifdef RT_ARCH_AMD64
5542 mov rax, [pvBitmap]
5543 mov edx, [iBit]
5544 lock btc [rax], edx
5545# else
5546 mov eax, [pvBitmap]
5547 mov edx, [iBit]
5548 lock btc [eax], edx
5549# endif
5550 }
5551# endif
5552}
5553#endif
5554
5555
5556/**
5557 * Tests and sets a bit in a bitmap.
5558 *
5559 * @returns true if the bit was set.
5560 * @returns false if the bit was clear.
5561 * @param pvBitmap Pointer to the bitmap.
5562 * @param iBit The bit to test and set.
5563 */
5564#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5565DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5566#else
5567DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5568{
5569 union { bool f; uint32_t u32; uint8_t u8; } rc;
5570# if RT_INLINE_ASM_USES_INTRIN
5571 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5572
5573# elif RT_INLINE_ASM_GNU_STYLE
5574 __asm__ __volatile__("btsl %2, %1\n\t"
5575 "setc %b0\n\t"
5576 "andl $1, %0\n\t"
5577 : "=q" (rc.u32),
5578 "=m" (*(volatile long *)pvBitmap)
5579 : "Ir" (iBit),
5580 "m" (*(volatile long *)pvBitmap)
5581 : "memory");
5582# else
5583 __asm
5584 {
5585 mov edx, [iBit]
5586# ifdef RT_ARCH_AMD64
5587 mov rax, [pvBitmap]
5588 bts [rax], edx
5589# else
5590 mov eax, [pvBitmap]
5591 bts [eax], edx
5592# endif
5593 setc al
5594 and eax, 1
5595 mov [rc.u32], eax
5596 }
5597# endif
5598 return rc.f;
5599}
5600#endif
5601
5602
5603/**
5604 * Atomically tests and sets a bit in a bitmap, ordered.
5605 *
5606 * @returns true if the bit was set.
5607 * @returns false if the bit was clear.
5608 * @param pvBitmap Pointer to the bitmap.
5609 * @param iBit The bit to set.
5610 */
5611#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5612DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5613#else
5614DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5615{
5616 union { bool f; uint32_t u32; uint8_t u8; } rc;
5617# if RT_INLINE_ASM_USES_INTRIN
5618 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5619# elif RT_INLINE_ASM_GNU_STYLE
5620 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5621 "setc %b0\n\t"
5622 "andl $1, %0\n\t"
5623 : "=q" (rc.u32),
5624 "=m" (*(volatile long *)pvBitmap)
5625 : "Ir" (iBit),
5626 "m" (*(volatile long *)pvBitmap)
5627 : "memory");
5628# else
5629 __asm
5630 {
5631 mov edx, [iBit]
5632# ifdef RT_ARCH_AMD64
5633 mov rax, [pvBitmap]
5634 lock bts [rax], edx
5635# else
5636 mov eax, [pvBitmap]
5637 lock bts [eax], edx
5638# endif
5639 setc al
5640 and eax, 1
5641 mov [rc.u32], eax
5642 }
5643# endif
5644 return rc.f;
5645}
5646#endif
5647
5648
5649/**
5650 * Tests and clears a bit in a bitmap.
5651 *
5652 * @returns true if the bit was set.
5653 * @returns false if the bit was clear.
5654 * @param pvBitmap Pointer to the bitmap.
5655 * @param iBit The bit to test and clear.
5656 */
5657#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5658DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5659#else
5660DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5661{
5662 union { bool f; uint32_t u32; uint8_t u8; } rc;
5663# if RT_INLINE_ASM_USES_INTRIN
5664 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5665
5666# elif RT_INLINE_ASM_GNU_STYLE
5667 __asm__ __volatile__("btrl %2, %1\n\t"
5668 "setc %b0\n\t"
5669 "andl $1, %0\n\t"
5670 : "=q" (rc.u32),
5671 "=m" (*(volatile long *)pvBitmap)
5672 : "Ir" (iBit),
5673 "m" (*(volatile long *)pvBitmap)
5674 : "memory");
5675# else
5676 __asm
5677 {
5678 mov edx, [iBit]
5679# ifdef RT_ARCH_AMD64
5680 mov rax, [pvBitmap]
5681 btr [rax], edx
5682# else
5683 mov eax, [pvBitmap]
5684 btr [eax], edx
5685# endif
5686 setc al
5687 and eax, 1
5688 mov [rc.u32], eax
5689 }
5690# endif
5691 return rc.f;
5692}
5693#endif
5694
5695
5696/**
5697 * Atomically tests and clears a bit in a bitmap, ordered.
5698 *
5699 * @returns true if the bit was set.
5700 * @returns false if the bit was clear.
5701 * @param pvBitmap Pointer to the bitmap.
5702 * @param iBit The bit to test and clear.
5703 * @remark No memory barrier, take care on smp.
5704 */
5705#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5706DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5707#else
5708DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5709{
5710 union { bool f; uint32_t u32; uint8_t u8; } rc;
5711# if RT_INLINE_ASM_USES_INTRIN
5712 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5713
5714# elif RT_INLINE_ASM_GNU_STYLE
5715 __asm__ __volatile__("lock; btrl %2, %1\n\t"
5716 "setc %b0\n\t"
5717 "andl $1, %0\n\t"
5718 : "=q" (rc.u32),
5719 "=m" (*(volatile long *)pvBitmap)
5720 : "Ir" (iBit),
5721 "m" (*(volatile long *)pvBitmap)
5722 : "memory");
5723# else
5724 __asm
5725 {
5726 mov edx, [iBit]
5727# ifdef RT_ARCH_AMD64
5728 mov rax, [pvBitmap]
5729 lock btr [rax], edx
5730# else
5731 mov eax, [pvBitmap]
5732 lock btr [eax], edx
5733# endif
5734 setc al
5735 and eax, 1
5736 mov [rc.u32], eax
5737 }
5738# endif
5739 return rc.f;
5740}
5741#endif
5742
5743
5744/**
5745 * Tests and toggles a bit in a bitmap.
5746 *
5747 * @returns true if the bit was set.
5748 * @returns false if the bit was clear.
5749 * @param pvBitmap Pointer to the bitmap.
5750 * @param iBit The bit to test and toggle.
5751 */
5752#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5753DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5754#else
5755DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5756{
5757 union { bool f; uint32_t u32; uint8_t u8; } rc;
5758# if RT_INLINE_ASM_USES_INTRIN
5759 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5760
5761# elif RT_INLINE_ASM_GNU_STYLE
5762 __asm__ __volatile__("btcl %2, %1\n\t"
5763 "setc %b0\n\t"
5764 "andl $1, %0\n\t"
5765 : "=q" (rc.u32),
5766 "=m" (*(volatile long *)pvBitmap)
5767 : "Ir" (iBit),
5768 "m" (*(volatile long *)pvBitmap)
5769 : "memory");
5770# else
5771 __asm
5772 {
5773 mov edx, [iBit]
5774# ifdef RT_ARCH_AMD64
5775 mov rax, [pvBitmap]
5776 btc [rax], edx
5777# else
5778 mov eax, [pvBitmap]
5779 btc [eax], edx
5780# endif
5781 setc al
5782 and eax, 1
5783 mov [rc.u32], eax
5784 }
5785# endif
5786 return rc.f;
5787}
5788#endif
5789
5790
5791/**
5792 * Atomically tests and toggles a bit in a bitmap, ordered.
5793 *
5794 * @returns true if the bit was set.
5795 * @returns false if the bit was clear.
5796 * @param pvBitmap Pointer to the bitmap.
5797 * @param iBit The bit to test and toggle.
5798 */
5799#if RT_INLINE_ASM_EXTERNAL
5800DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5801#else
5802DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5803{
5804 union { bool f; uint32_t u32; uint8_t u8; } rc;
5805# if RT_INLINE_ASM_GNU_STYLE
5806 __asm__ __volatile__("lock; btcl %2, %1\n\t"
5807 "setc %b0\n\t"
5808 "andl $1, %0\n\t"
5809 : "=q" (rc.u32),
5810 "=m" (*(volatile long *)pvBitmap)
5811 : "Ir" (iBit),
5812 "m" (*(volatile long *)pvBitmap)
5813 : "memory");
5814# else
5815 __asm
5816 {
5817 mov edx, [iBit]
5818# ifdef RT_ARCH_AMD64
5819 mov rax, [pvBitmap]
5820 lock btc [rax], edx
5821# else
5822 mov eax, [pvBitmap]
5823 lock btc [eax], edx
5824# endif
5825 setc al
5826 and eax, 1
5827 mov [rc.u32], eax
5828 }
5829# endif
5830 return rc.f;
5831}
5832#endif
5833
5834
5835/**
5836 * Tests if a bit in a bitmap is set.
5837 *
5838 * @returns true if the bit is set.
5839 * @returns false if the bit is clear.
5840 * @param pvBitmap Pointer to the bitmap.
5841 * @param iBit The bit to test.
5842 */
5843#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5844DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5845#else
5846DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5847{
5848 union { bool f; uint32_t u32; uint8_t u8; } rc;
5849# if RT_INLINE_ASM_USES_INTRIN
5850 rc.u32 = _bittest((long *)pvBitmap, iBit);
5851# elif RT_INLINE_ASM_GNU_STYLE
5852
5853 __asm__ __volatile__("btl %2, %1\n\t"
5854 "setc %b0\n\t"
5855 "andl $1, %0\n\t"
5856 : "=q" (rc.u32)
5857 : "m" (*(const volatile long *)pvBitmap),
5858 "Ir" (iBit)
5859 : "memory");
5860# else
5861 __asm
5862 {
5863 mov edx, [iBit]
5864# ifdef RT_ARCH_AMD64
5865 mov rax, [pvBitmap]
5866 bt [rax], edx
5867# else
5868 mov eax, [pvBitmap]
5869 bt [eax], edx
5870# endif
5871 setc al
5872 and eax, 1
5873 mov [rc.u32], eax
5874 }
5875# endif
5876 return rc.f;
5877}
5878#endif
5879
5880
5881/**
5882 * Clears a bit range within a bitmap.
5883 *
5884 * @param pvBitmap Pointer to the bitmap.
5885 * @param iBitStart The First bit to clear.
5886 * @param iBitEnd The first bit not to clear.
5887 */
5888DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5889{
5890 if (iBitStart < iBitEnd)
5891 {
5892 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5893 int iStart = iBitStart & ~31;
5894 int iEnd = iBitEnd & ~31;
5895 if (iStart == iEnd)
5896 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5897 else
5898 {
5899 /* bits in first dword. */
5900 if (iBitStart & 31)
5901 {
5902 *pu32 &= (1 << (iBitStart & 31)) - 1;
5903 pu32++;
5904 iBitStart = iStart + 32;
5905 }
5906
5907 /* whole dword. */
5908 if (iBitStart != iEnd)
5909 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5910
5911 /* bits in last dword. */
5912 if (iBitEnd & 31)
5913 {
5914 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5915 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5916 }
5917 }
5918 }
5919}
5920
5921
5922/**
5923 * Sets a bit range within a bitmap.
5924 *
5925 * @param pvBitmap Pointer to the bitmap.
5926 * @param iBitStart The First bit to set.
5927 * @param iBitEnd The first bit not to set.
5928 */
5929DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5930{
5931 if (iBitStart < iBitEnd)
5932 {
5933 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5934 int iStart = iBitStart & ~31;
5935 int iEnd = iBitEnd & ~31;
5936 if (iStart == iEnd)
5937 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5938 else
5939 {
5940 /* bits in first dword. */
5941 if (iBitStart & 31)
5942 {
5943 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5944 pu32++;
5945 iBitStart = iStart + 32;
5946 }
5947
5948 /* whole dword. */
5949 if (iBitStart != iEnd)
5950 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5951
5952 /* bits in last dword. */
5953 if (iBitEnd & 31)
5954 {
5955 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5956 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5957 }
5958 }
5959 }
5960}
5961
5962
5963/**
5964 * Finds the first clear bit in a bitmap.
5965 *
5966 * @returns Index of the first zero bit.
5967 * @returns -1 if no clear bit was found.
5968 * @param pvBitmap Pointer to the bitmap.
5969 * @param cBits The number of bits in the bitmap. Multiple of 32.
5970 */
5971#if RT_INLINE_ASM_EXTERNAL
5972DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5973#else
5974DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5975{
5976 if (cBits)
5977 {
5978 int32_t iBit;
5979# if RT_INLINE_ASM_GNU_STYLE
5980 RTCCUINTREG uEAX, uECX, uEDI;
5981 cBits = RT_ALIGN_32(cBits, 32);
5982 __asm__ __volatile__("repe; scasl\n\t"
5983 "je 1f\n\t"
5984# ifdef RT_ARCH_AMD64
5985 "lea -4(%%rdi), %%rdi\n\t"
5986 "xorl (%%rdi), %%eax\n\t"
5987 "subq %5, %%rdi\n\t"
5988# else
5989 "lea -4(%%edi), %%edi\n\t"
5990 "xorl (%%edi), %%eax\n\t"
5991 "subl %5, %%edi\n\t"
5992# endif
5993 "shll $3, %%edi\n\t"
5994 "bsfl %%eax, %%edx\n\t"
5995 "addl %%edi, %%edx\n\t"
5996 "1:\t\n"
5997 : "=d" (iBit),
5998 "=&c" (uECX),
5999 "=&D" (uEDI),
6000 "=&a" (uEAX)
6001 : "0" (0xffffffff),
6002 "mr" (pvBitmap),
6003 "1" (cBits >> 5),
6004 "2" (pvBitmap),
6005 "3" (0xffffffff));
6006# else
6007 cBits = RT_ALIGN_32(cBits, 32);
6008 __asm
6009 {
6010# ifdef RT_ARCH_AMD64
6011 mov rdi, [pvBitmap]
6012 mov rbx, rdi
6013# else
6014 mov edi, [pvBitmap]
6015 mov ebx, edi
6016# endif
6017 mov edx, 0ffffffffh
6018 mov eax, edx
6019 mov ecx, [cBits]
6020 shr ecx, 5
6021 repe scasd
6022 je done
6023
6024# ifdef RT_ARCH_AMD64
6025 lea rdi, [rdi - 4]
6026 xor eax, [rdi]
6027 sub rdi, rbx
6028# else
6029 lea edi, [edi - 4]
6030 xor eax, [edi]
6031 sub edi, ebx
6032# endif
6033 shl edi, 3
6034 bsf edx, eax
6035 add edx, edi
6036 done:
6037 mov [iBit], edx
6038 }
6039# endif
6040 return iBit;
6041 }
6042 return -1;
6043}
6044#endif
6045
6046
6047/**
6048 * Finds the next clear bit in a bitmap.
6049 *
6050 * @returns Index of the first zero bit.
6051 * @returns -1 if no clear bit was found.
6052 * @param pvBitmap Pointer to the bitmap.
6053 * @param cBits The number of bits in the bitmap. Multiple of 32.
6054 * @param iBitPrev The bit returned from the last search.
6055 * The search will start at iBitPrev + 1.
6056 */
6057#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6058DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6059#else
6060DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6061{
6062 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6063 int iBit = ++iBitPrev & 31;
6064 if (iBit)
6065 {
6066 /*
6067 * Inspect the 32-bit word containing the unaligned bit.
6068 */
6069 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6070
6071# if RT_INLINE_ASM_USES_INTRIN
6072 unsigned long ulBit = 0;
6073 if (_BitScanForward(&ulBit, u32))
6074 return ulBit + iBitPrev;
6075# else
6076# if RT_INLINE_ASM_GNU_STYLE
6077 __asm__ __volatile__("bsf %1, %0\n\t"
6078 "jnz 1f\n\t"
6079 "movl $-1, %0\n\t"
6080 "1:\n\t"
6081 : "=r" (iBit)
6082 : "r" (u32));
6083# else
6084 __asm
6085 {
6086 mov edx, [u32]
6087 bsf eax, edx
6088 jnz done
6089 mov eax, 0ffffffffh
6090 done:
6091 mov [iBit], eax
6092 }
6093# endif
6094 if (iBit >= 0)
6095 return iBit + iBitPrev;
6096# endif
6097
6098 /*
6099 * Skip ahead and see if there is anything left to search.
6100 */
6101 iBitPrev |= 31;
6102 iBitPrev++;
6103 if (cBits <= (uint32_t)iBitPrev)
6104 return -1;
6105 }
6106
6107 /*
6108 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6109 */
6110 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6111 if (iBit >= 0)
6112 iBit += iBitPrev;
6113 return iBit;
6114}
6115#endif
6116
6117
6118/**
6119 * Finds the first set bit in a bitmap.
6120 *
6121 * @returns Index of the first set bit.
6122 * @returns -1 if no clear bit was found.
6123 * @param pvBitmap Pointer to the bitmap.
6124 * @param cBits The number of bits in the bitmap. Multiple of 32.
6125 */
6126#if RT_INLINE_ASM_EXTERNAL
6127DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6128#else
6129DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6130{
6131 if (cBits)
6132 {
6133 int32_t iBit;
6134# if RT_INLINE_ASM_GNU_STYLE
6135 RTCCUINTREG uEAX, uECX, uEDI;
6136 cBits = RT_ALIGN_32(cBits, 32);
6137 __asm__ __volatile__("repe; scasl\n\t"
6138 "je 1f\n\t"
6139# ifdef RT_ARCH_AMD64
6140 "lea -4(%%rdi), %%rdi\n\t"
6141 "movl (%%rdi), %%eax\n\t"
6142 "subq %5, %%rdi\n\t"
6143# else
6144 "lea -4(%%edi), %%edi\n\t"
6145 "movl (%%edi), %%eax\n\t"
6146 "subl %5, %%edi\n\t"
6147# endif
6148 "shll $3, %%edi\n\t"
6149 "bsfl %%eax, %%edx\n\t"
6150 "addl %%edi, %%edx\n\t"
6151 "1:\t\n"
6152 : "=d" (iBit),
6153 "=&c" (uECX),
6154 "=&D" (uEDI),
6155 "=&a" (uEAX)
6156 : "0" (0xffffffff),
6157 "mr" (pvBitmap),
6158 "1" (cBits >> 5),
6159 "2" (pvBitmap),
6160 "3" (0));
6161# else
6162 cBits = RT_ALIGN_32(cBits, 32);
6163 __asm
6164 {
6165# ifdef RT_ARCH_AMD64
6166 mov rdi, [pvBitmap]
6167 mov rbx, rdi
6168# else
6169 mov edi, [pvBitmap]
6170 mov ebx, edi
6171# endif
6172 mov edx, 0ffffffffh
6173 xor eax, eax
6174 mov ecx, [cBits]
6175 shr ecx, 5
6176 repe scasd
6177 je done
6178# ifdef RT_ARCH_AMD64
6179 lea rdi, [rdi - 4]
6180 mov eax, [rdi]
6181 sub rdi, rbx
6182# else
6183 lea edi, [edi - 4]
6184 mov eax, [edi]
6185 sub edi, ebx
6186# endif
6187 shl edi, 3
6188 bsf edx, eax
6189 add edx, edi
6190 done:
6191 mov [iBit], edx
6192 }
6193# endif
6194 return iBit;
6195 }
6196 return -1;
6197}
6198#endif
6199
6200
6201/**
6202 * Finds the next set bit in a bitmap.
6203 *
6204 * @returns Index of the next set bit.
6205 * @returns -1 if no set bit was found.
6206 * @param pvBitmap Pointer to the bitmap.
6207 * @param cBits The number of bits in the bitmap. Multiple of 32.
6208 * @param iBitPrev The bit returned from the last search.
6209 * The search will start at iBitPrev + 1.
6210 */
6211#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6212DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6213#else
6214DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6215{
6216 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6217 int iBit = ++iBitPrev & 31;
6218 if (iBit)
6219 {
6220 /*
6221 * Inspect the 32-bit word containing the unaligned bit.
6222 */
6223 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6224
6225# if RT_INLINE_ASM_USES_INTRIN
6226 unsigned long ulBit = 0;
6227 if (_BitScanForward(&ulBit, u32))
6228 return ulBit + iBitPrev;
6229# else
6230# if RT_INLINE_ASM_GNU_STYLE
6231 __asm__ __volatile__("bsf %1, %0\n\t"
6232 "jnz 1f\n\t"
6233 "movl $-1, %0\n\t"
6234 "1:\n\t"
6235 : "=r" (iBit)
6236 : "r" (u32));
6237# else
6238 __asm
6239 {
6240 mov edx, [u32]
6241 bsf eax, edx
6242 jnz done
6243 mov eax, 0ffffffffh
6244 done:
6245 mov [iBit], eax
6246 }
6247# endif
6248 if (iBit >= 0)
6249 return iBit + iBitPrev;
6250# endif
6251
6252 /*
6253 * Skip ahead and see if there is anything left to search.
6254 */
6255 iBitPrev |= 31;
6256 iBitPrev++;
6257 if (cBits <= (uint32_t)iBitPrev)
6258 return -1;
6259 }
6260
6261 /*
6262 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6263 */
6264 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6265 if (iBit >= 0)
6266 iBit += iBitPrev;
6267 return iBit;
6268}
6269#endif
6270
6271
6272/**
6273 * Finds the first bit which is set in the given 32-bit integer.
6274 * Bits are numbered from 1 (least significant) to 32.
6275 *
6276 * @returns index [1..32] of the first set bit.
6277 * @returns 0 if all bits are cleared.
6278 * @param u32 Integer to search for set bits.
6279 * @remark Similar to ffs() in BSD.
6280 */
6281DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6282{
6283# if RT_INLINE_ASM_USES_INTRIN
6284 unsigned long iBit;
6285 if (_BitScanForward(&iBit, u32))
6286 iBit++;
6287 else
6288 iBit = 0;
6289# elif RT_INLINE_ASM_GNU_STYLE
6290 uint32_t iBit;
6291 __asm__ __volatile__("bsf %1, %0\n\t"
6292 "jnz 1f\n\t"
6293 "xorl %0, %0\n\t"
6294 "jmp 2f\n"
6295 "1:\n\t"
6296 "incl %0\n"
6297 "2:\n\t"
6298 : "=r" (iBit)
6299 : "rm" (u32));
6300# else
6301 uint32_t iBit;
6302 _asm
6303 {
6304 bsf eax, [u32]
6305 jnz found
6306 xor eax, eax
6307 jmp done
6308 found:
6309 inc eax
6310 done:
6311 mov [iBit], eax
6312 }
6313# endif
6314 return iBit;
6315}
6316
6317
6318/**
6319 * Finds the first bit which is set in the given 32-bit integer.
6320 * Bits are numbered from 1 (least significant) to 32.
6321 *
6322 * @returns index [1..32] of the first set bit.
6323 * @returns 0 if all bits are cleared.
6324 * @param i32 Integer to search for set bits.
6325 * @remark Similar to ffs() in BSD.
6326 */
6327DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6328{
6329 return ASMBitFirstSetU32((uint32_t)i32);
6330}
6331
6332
6333/**
6334 * Finds the last bit which is set in the given 32-bit integer.
6335 * Bits are numbered from 1 (least significant) to 32.
6336 *
6337 * @returns index [1..32] of the last set bit.
6338 * @returns 0 if all bits are cleared.
6339 * @param u32 Integer to search for set bits.
6340 * @remark Similar to fls() in BSD.
6341 */
6342DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6343{
6344# if RT_INLINE_ASM_USES_INTRIN
6345 unsigned long iBit;
6346 if (_BitScanReverse(&iBit, u32))
6347 iBit++;
6348 else
6349 iBit = 0;
6350# elif RT_INLINE_ASM_GNU_STYLE
6351 uint32_t iBit;
6352 __asm__ __volatile__("bsrl %1, %0\n\t"
6353 "jnz 1f\n\t"
6354 "xorl %0, %0\n\t"
6355 "jmp 2f\n"
6356 "1:\n\t"
6357 "incl %0\n"
6358 "2:\n\t"
6359 : "=r" (iBit)
6360 : "rm" (u32));
6361# else
6362 uint32_t iBit;
6363 _asm
6364 {
6365 bsr eax, [u32]
6366 jnz found
6367 xor eax, eax
6368 jmp done
6369 found:
6370 inc eax
6371 done:
6372 mov [iBit], eax
6373 }
6374# endif
6375 return iBit;
6376}
6377
6378
6379/**
6380 * Finds the last bit which is set in the given 32-bit integer.
6381 * Bits are numbered from 1 (least significant) to 32.
6382 *
6383 * @returns index [1..32] of the last set bit.
6384 * @returns 0 if all bits are cleared.
6385 * @param i32 Integer to search for set bits.
6386 * @remark Similar to fls() in BSD.
6387 */
6388DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6389{
6390 return ASMBitLastSetS32((uint32_t)i32);
6391}
6392
6393/**
6394 * Reverse the byte order of the given 16-bit integer.
6395 *
6396 * @returns Revert
6397 * @param u16 16-bit integer value.
6398 */
6399DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6400{
6401#if RT_INLINE_ASM_USES_INTRIN
6402 u16 = _byteswap_ushort(u16);
6403#elif RT_INLINE_ASM_GNU_STYLE
6404 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6405#else
6406 _asm
6407 {
6408 mov ax, [u16]
6409 ror ax, 8
6410 mov [u16], ax
6411 }
6412#endif
6413 return u16;
6414}
6415
6416/**
6417 * Reverse the byte order of the given 32-bit integer.
6418 *
6419 * @returns Revert
6420 * @param u32 32-bit integer value.
6421 */
6422DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6423{
6424#if RT_INLINE_ASM_USES_INTRIN
6425 u32 = _byteswap_ulong(u32);
6426#elif RT_INLINE_ASM_GNU_STYLE
6427 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6428#else
6429 _asm
6430 {
6431 mov eax, [u32]
6432 bswap eax
6433 mov [u32], eax
6434 }
6435#endif
6436 return u32;
6437}
6438
6439
6440/**
6441 * Reverse the byte order of the given 64-bit integer.
6442 *
6443 * @returns Revert
6444 * @param u64 64-bit integer value.
6445 */
6446DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6447{
6448#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6449 u64 = _byteswap_uint64(u64);
6450#else
6451 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6452 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6453#endif
6454 return u64;
6455}
6456
6457
6458/** @} */
6459
6460
6461/** @} */
6462#endif
6463
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette