VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 2245

Last change on this file since 2245 was 2245, checked in by vboxsync, 18 years ago

ASMAtomicXchgBool.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 98.9 KB
Line 
1/** @file
2 * InnoTek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef __iprt_asm_h__
22#define __iprt_asm_h__
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26/** @todo #include <iprt/param.h> for PAGE_SIZE. */
27/** @def RT_INLINE_ASM_USES_INTRIN
28 * Defined as 1 if we're using a _MSC_VER 1400.
29 * Otherwise defined as 0.
30 */
31
32#ifdef _MSC_VER
33# if _MSC_VER >= 1400
34# define RT_INLINE_ASM_USES_INTRIN 1
35# include <intrin.h>
36 /* Emit the intrinsics at all optimization levels. */
37# pragma intrinsic(__cpuid)
38# pragma intrinsic(_enable)
39# pragma intrinsic(_disable)
40# pragma intrinsic(__rdtsc)
41# pragma intrinsic(__readmsr)
42# pragma intrinsic(__writemsr)
43# pragma intrinsic(__outbyte)
44# pragma intrinsic(__outword)
45# pragma intrinsic(__outdword)
46# pragma intrinsic(__inbyte)
47# pragma intrinsic(__inword)
48# pragma intrinsic(__indword)
49# pragma intrinsic(__invlpg)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(__readcr0)
54# pragma intrinsic(__readcr2)
55# pragma intrinsic(__readcr3)
56# pragma intrinsic(__readcr4)
57# pragma intrinsic(__writecr0)
58# pragma intrinsic(__writecr3)
59# pragma intrinsic(__writecr4)
60# pragma intrinsic(_BitScanForward)
61# pragma intrinsic(_BitScanReverse)
62# pragma intrinsic(_bittest)
63# pragma intrinsic(_bittestandset)
64# pragma intrinsic(_bittestandreset)
65# pragma intrinsic(_bittestandcomplement)
66# pragma intrinsic(_byteswap_ushort)
67# pragma intrinsic(_byteswap_ulong)
68# pragma intrinsic(_interlockedbittestandset)
69# pragma intrinsic(_interlockedbittestandreset)
70# pragma intrinsic(_InterlockedAnd)
71# pragma intrinsic(_InterlockedOr)
72# pragma intrinsic(_InterlockedIncrement)
73# pragma intrinsic(_InterlockedDecrement)
74# pragma intrinsic(_InterlockedExchange)
75# pragma intrinsic(_InterlockedCompareExchange)
76# pragma intrinsic(_InterlockedCompareExchange64)
77# ifdef __AMD64__
78# pragma intrinsic(__stosq)
79# pragma intrinsic(__readcr8)
80# pragma intrinsic(__writecr8)
81# pragma intrinsic(_byteswap_uint64)
82# pragma intrinsic(_InterlockedExchange64)
83# endif
84# endif
85#endif
86#ifndef RT_INLINE_ASM_USES_INTRIN
87# define RT_INLINE_ASM_USES_INTRIN 0
88#endif
89
90
91
92/** @defgroup grp_asm ASM - Assembly Routines
93 * @ingroup grp_rt
94 * @{
95 */
96
97/** @def RT_INLINE_ASM_EXTERNAL
98 * Defined as 1 if the compiler does not support inline assembly.
99 * The ASM* functions will then be implemented in an external .asm file.
100 *
101 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
102 * inline assmebly in their AMD64 compiler.
103 */
104#if defined(_MSC_VER) && defined(__AMD64__)
105# define RT_INLINE_ASM_EXTERNAL 1
106#else
107# define RT_INLINE_ASM_EXTERNAL 0
108#endif
109
110/** @def RT_INLINE_ASM_GNU_STYLE
111 * Defined as 1 if the compiler understand GNU style inline assembly.
112 */
113#if defined(_MSC_VER)
114# define RT_INLINE_ASM_GNU_STYLE 0
115#else
116# define RT_INLINE_ASM_GNU_STYLE 1
117#endif
118
119
120/** @todo find a more proper place for this structure? */
121#pragma pack(1)
122/** IDTR */
123typedef struct RTIDTR
124{
125 /** Size of the IDT. */
126 uint16_t cbIdt;
127 /** Address of the IDT. */
128 uintptr_t pIdt;
129} RTIDTR, *PRTIDTR;
130#pragma pack()
131
132#pragma pack(1)
133/** GDTR */
134typedef struct RTGDTR
135{
136 /** Size of the GDT. */
137 uint16_t cbGdt;
138 /** Address of the GDT. */
139 uintptr_t pGdt;
140} RTGDTR, *PRTGDTR;
141#pragma pack()
142
143
144/** @def ASMReturnAddress
145 * Gets the return address of the current (or calling if you like) function or method.
146 */
147#ifdef _MSC_VER
148# ifdef __cplusplus
149extern "C"
150# endif
151void * _ReturnAddress(void);
152# pragma intrinsic(_ReturnAddress)
153# define ASMReturnAddress() _ReturnAddress()
154#elif defined(__GNUC__) || defined(__DOXYGEN__)
155# define ASMReturnAddress() __builtin_return_address(0)
156#else
157# error "Unsupported compiler."
158#endif
159
160
161/**
162 * Gets the content of the IDTR CPU register.
163 * @param pIdtr Where to store the IDTR contents.
164 */
165#if RT_INLINE_ASM_EXTERNAL
166DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
167#else
168DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
169{
170# if RT_INLINE_ASM_GNU_STYLE
171 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
172# else
173 __asm
174 {
175# ifdef __AMD64__
176 mov rax, [pIdtr]
177 sidt [rax]
178# else
179 mov eax, [pIdtr]
180 sidt [eax]
181# endif
182 }
183# endif
184}
185#endif
186
187
188/**
189 * Sets the content of the IDTR CPU register.
190 * @param pIdtr Where to load the IDTR contents from
191 */
192#if RT_INLINE_ASM_EXTERNAL
193DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
194#else
195DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
196{
197# if RT_INLINE_ASM_GNU_STYLE
198 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
199# else
200 __asm
201 {
202# ifdef __AMD64__
203 mov rax, [pIdtr]
204 lidt [rax]
205# else
206 mov eax, [pIdtr]
207 lidt [eax]
208# endif
209 }
210# endif
211}
212#endif
213
214
215/**
216 * Gets the content of the GDTR CPU register.
217 * @param pGdtr Where to store the GDTR contents.
218 */
219#if RT_INLINE_ASM_EXTERNAL
220DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
221#else
222DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
223{
224# if RT_INLINE_ASM_GNU_STYLE
225 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
226# else
227 __asm
228 {
229# ifdef __AMD64__
230 mov rax, [pGdtr]
231 sgdt [rax]
232# else
233 mov eax, [pGdtr]
234 sgdt [eax]
235# endif
236 }
237# endif
238}
239#endif
240
241/**
242 * Get the cs register.
243 * @returns cs.
244 */
245#if RT_INLINE_ASM_EXTERNAL
246DECLASM(RTSEL) ASMGetCS(void);
247#else
248DECLINLINE(RTSEL) ASMGetCS(void)
249{
250 RTSEL SelCS;
251# if RT_INLINE_ASM_GNU_STYLE
252 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
253# else
254 __asm
255 {
256 mov ax, cs
257 mov [SelCS], ax
258 }
259# endif
260 return SelCS;
261}
262#endif
263
264
265/**
266 * Get the DS register.
267 * @returns DS.
268 */
269#if RT_INLINE_ASM_EXTERNAL
270DECLASM(RTSEL) ASMGetDS(void);
271#else
272DECLINLINE(RTSEL) ASMGetDS(void)
273{
274 RTSEL SelDS;
275# if RT_INLINE_ASM_GNU_STYLE
276 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
277# else
278 __asm
279 {
280 mov ax, ds
281 mov [SelDS], ax
282 }
283# endif
284 return SelDS;
285}
286#endif
287
288
289/**
290 * Get the ES register.
291 * @returns ES.
292 */
293#if RT_INLINE_ASM_EXTERNAL
294DECLASM(RTSEL) ASMGetES(void);
295#else
296DECLINLINE(RTSEL) ASMGetES(void)
297{
298 RTSEL SelES;
299# if RT_INLINE_ASM_GNU_STYLE
300 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
301# else
302 __asm
303 {
304 mov ax, es
305 mov [SelES], ax
306 }
307# endif
308 return SelES;
309}
310#endif
311
312
313/**
314 * Get the FS register.
315 * @returns FS.
316 */
317#if RT_INLINE_ASM_EXTERNAL
318DECLASM(RTSEL) ASMGetFS(void);
319#else
320DECLINLINE(RTSEL) ASMGetFS(void)
321{
322 RTSEL SelFS;
323# if RT_INLINE_ASM_GNU_STYLE
324 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
325# else
326 __asm
327 {
328 mov ax, fs
329 mov [SelFS], ax
330 }
331# endif
332 return SelFS;
333}
334# endif
335
336
337/**
338 * Get the GS register.
339 * @returns GS.
340 */
341#if RT_INLINE_ASM_EXTERNAL
342DECLASM(RTSEL) ASMGetGS(void);
343#else
344DECLINLINE(RTSEL) ASMGetGS(void)
345{
346 RTSEL SelGS;
347# if RT_INLINE_ASM_GNU_STYLE
348 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
349# else
350 __asm
351 {
352 mov ax, gs
353 mov [SelGS], ax
354 }
355# endif
356 return SelGS;
357}
358#endif
359
360
361/**
362 * Get the SS register.
363 * @returns SS.
364 */
365#if RT_INLINE_ASM_EXTERNAL
366DECLASM(RTSEL) ASMGetSS(void);
367#else
368DECLINLINE(RTSEL) ASMGetSS(void)
369{
370 RTSEL SelSS;
371# if RT_INLINE_ASM_GNU_STYLE
372 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
373# else
374 __asm
375 {
376 mov ax, ss
377 mov [SelSS], ax
378 }
379# endif
380 return SelSS;
381}
382#endif
383
384
385/**
386 * Get the TR register.
387 * @returns TR.
388 */
389#if RT_INLINE_ASM_EXTERNAL
390DECLASM(RTSEL) ASMGetTR(void);
391#else
392DECLINLINE(RTSEL) ASMGetTR(void)
393{
394 RTSEL SelTR;
395# if RT_INLINE_ASM_GNU_STYLE
396 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
397# else
398 __asm
399 {
400 str ax
401 mov [SelTR], ax
402 }
403# endif
404 return SelTR;
405}
406#endif
407
408
409/**
410 * Get the [RE]FLAGS register.
411 * @returns [RE]FLAGS.
412 */
413#if RT_INLINE_ASM_EXTERNAL
414DECLASM(RTCCUINTREG) ASMGetFlags(void);
415#else
416DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
417{
418 RTCCUINTREG uFlags;
419# if RT_INLINE_ASM_GNU_STYLE
420# ifdef __AMD64__
421 __asm__ __volatile__("pushfq\n\t"
422 "popq %0\n\t"
423 : "=m" (uFlags));
424# else
425 __asm__ __volatile__("pushfl\n\t"
426 "popl %0\n\t"
427 : "=m" (uFlags));
428# endif
429# else
430 __asm
431 {
432# ifdef __AMD64__
433 pushfq
434 pop [uFlags]
435# else
436 pushfd
437 pop [uFlags]
438# endif
439 }
440# endif
441 return uFlags;
442}
443#endif
444
445
446/**
447 * Set the [RE]FLAGS register.
448 * @param uFlags The new [RE]FLAGS value.
449 */
450#if RT_INLINE_ASM_EXTERNAL
451DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
452#else
453DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
454{
455# if RT_INLINE_ASM_GNU_STYLE
456# ifdef __AMD64__
457 __asm__ __volatile__("pushq %0\n\t"
458 "popfq\n\t"
459 : : "m" (uFlags));
460# else
461 __asm__ __volatile__("pushl %0\n\t"
462 "popfl\n\t"
463 : : "m" (uFlags));
464# endif
465# else
466 __asm
467 {
468# ifdef __AMD64__
469 push [uFlags]
470 popfq
471# else
472 push [uFlags]
473 popfd
474# endif
475 }
476# endif
477}
478#endif
479
480
481/**
482 * Gets the content of the CPU timestamp counter register.
483 *
484 * @returns TSC.
485 */
486#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
487DECLASM(uint64_t) ASMReadTSC(void);
488#else
489DECLINLINE(uint64_t) ASMReadTSC(void)
490{
491 RTUINT64U u;
492# if RT_INLINE_ASM_GNU_STYLE
493 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
494# else
495# if RT_INLINE_ASM_USES_INTRIN
496 u.u = __rdtsc();
497# else
498 __asm
499 {
500 rdtsc
501 mov [u.s.Lo], eax
502 mov [u.s.Hi], edx
503 }
504# endif
505# endif
506 return u.u;
507}
508#endif
509
510
511/**
512 * Performs the cpuid instruction returning all registers.
513 *
514 * @param uOperator CPUID operation (eax).
515 * @param pvEAX Where to store eax.
516 * @param pvEBX Where to store ebx.
517 * @param pvECX Where to store ecx.
518 * @param pvEDX Where to store edx.
519 * @remark We're using void pointers to ease the use of special bitfield structures and such.
520 */
521#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
522DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
523#else
524DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
525{
526# if RT_INLINE_ASM_GNU_STYLE
527# ifdef __AMD64__
528 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
529 __asm__ ("cpuid\n\t"
530 : "=a" (uRAX),
531 "=b" (uRBX),
532 "=c" (uRCX),
533 "=d" (uRDX)
534 : "0" (uOperator));
535 *(uint32_t *)pvEAX = (uint32_t)uRAX;
536 *(uint32_t *)pvEBX = (uint32_t)uRBX;
537 *(uint32_t *)pvECX = (uint32_t)uRCX;
538 *(uint32_t *)pvEDX = (uint32_t)uRDX;
539# else
540 __asm__ ("xchgl %%ebx, %1\n\t"
541 "cpuid\n\t"
542 "xchgl %%ebx, %1\n\t"
543 : "=a" (*(uint32_t *)pvEAX),
544 "=r" (*(uint32_t *)pvEBX),
545 "=c" (*(uint32_t *)pvECX),
546 "=d" (*(uint32_t *)pvEDX)
547 : "0" (uOperator));
548# endif
549
550# elif RT_INLINE_ASM_USES_INTRIN
551 int aInfo[4];
552 __cpuid(aInfo, uOperator);
553 *(uint32_t *)pvEAX = aInfo[0];
554 *(uint32_t *)pvEBX = aInfo[1];
555 *(uint32_t *)pvECX = aInfo[2];
556 *(uint32_t *)pvEDX = aInfo[3];
557
558# else
559 uint32_t uEAX;
560 uint32_t uEBX;
561 uint32_t uECX;
562 uint32_t uEDX;
563 __asm
564 {
565 push ebx
566 mov eax, [uOperator]
567 cpuid
568 mov [uEAX], eax
569 mov [uEBX], ebx
570 mov [uECX], ecx
571 mov [uEDX], edx
572 pop ebx
573 }
574 *(uint32_t *)pvEAX = uEAX;
575 *(uint32_t *)pvEBX = uEBX;
576 *(uint32_t *)pvECX = uECX;
577 *(uint32_t *)pvEDX = uEDX;
578# endif
579}
580#endif
581
582
583/**
584 * Performs the cpuid instruction returning ecx and edx.
585 *
586 * @param uOperator CPUID operation (eax).
587 * @param pvECX Where to store ecx.
588 * @param pvEDX Where to store edx.
589 * @remark We're using void pointers to ease the use of special bitfield structures and such.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
593#else
594DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
595{
596 uint32_t uEBX;
597 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
598}
599#endif
600
601
602/**
603 * Performs the cpuid instruction returning edx.
604 *
605 * @param uOperator CPUID operation (eax).
606 * @returns EDX after cpuid operation.
607 */
608#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
609DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
610#else
611DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
612{
613 RTCCUINTREG xDX;
614# if RT_INLINE_ASM_GNU_STYLE
615# ifdef __AMD64__
616 RTCCUINTREG uSpill;
617 __asm__ ("cpuid"
618 : "=a" (uSpill),
619 "=d" (xDX)
620 : "0" (uOperator)
621 : "rbx", "rcx");
622# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: PIC by default. */
623 __asm__ ("push %%ebx\n\t"
624 "cpuid\n\t"
625 "pop %%ebx\n\t"
626 : "=a" (uOperator),
627 "=d" (xDX)
628 : "0" (uOperator)
629 : "ecx");
630# else
631 __asm__ ("cpuid"
632 : "=a" (uOperator),
633 "=d" (xDX)
634 : "0" (uOperator)
635 : "ebx", "ecx");
636# endif
637
638# elif RT_INLINE_ASM_USES_INTRIN
639 int aInfo[4];
640 __cpuid(aInfo, uOperator);
641 xDX = aInfo[3];
642
643# else
644 __asm
645 {
646 push ebx
647 mov eax, [uOperator]
648 cpuid
649 mov [xDX], edx
650 pop ebx
651 }
652# endif
653 return (uint32_t)xDX;
654}
655#endif
656
657
658/**
659 * Performs the cpuid instruction returning ecx.
660 *
661 * @param uOperator CPUID operation (eax).
662 * @returns ECX after cpuid operation.
663 */
664#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
665DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
666#else
667DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
668{
669 RTCCUINTREG xCX;
670# if RT_INLINE_ASM_GNU_STYLE
671# ifdef __AMD64__
672 RTCCUINTREG uSpill;
673 __asm__ ("cpuid"
674 : "=a" (uSpill),
675 "=c" (xCX)
676 : "0" (uOperator)
677 : "rbx", "rdx");
678# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
679 __asm__ ("push %%ebx\n\t"
680 "cpuid\n\t"
681 "pop %%ebx\n\t"
682 : "=a" (uOperator),
683 "=c" (xCX)
684 : "0" (uOperator)
685 : "edx");
686# else
687 __asm__ ("cpuid"
688 : "=a" (uOperator),
689 "=c" (xCX)
690 : "0" (uOperator)
691 : "ebx", "edx");
692
693# endif
694
695# elif RT_INLINE_ASM_USES_INTRIN
696 int aInfo[4];
697 __cpuid(aInfo, uOperator);
698 xCX = aInfo[2];
699
700# else
701 __asm
702 {
703 push ebx
704 mov eax, [uOperator]
705 cpuid
706 mov [xCX], ecx
707 pop ebx
708 }
709# endif
710 return (uint32_t)xCX;
711}
712#endif
713
714
715/**
716 * Checks if the current CPU supports CPUID.
717 *
718 * @returns true if CPUID is supported.
719 */
720DECLINLINE(bool) ASMHasCpuId(void)
721{
722#ifdef __AMD64__
723 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
724#else /* !__AMD64__ */
725 bool fRet = false;
726# if RT_INLINE_ASM_GNU_STYLE
727 uint32_t u1;
728 uint32_t u2;
729 __asm__ ("pushf\n\t"
730 "pop %1\n\t"
731 "mov %1, %2\n\t"
732 "xorl $0x200000, %1\n\t"
733 "push %1\n\t"
734 "popf\n\t"
735 "pushf\n\t"
736 "pop %1\n\t"
737 "cmpl %1, %2\n\t"
738 "setne %0\n\t"
739 "push %2\n\t"
740 "popf\n\t"
741 : "=m" (fRet), "=r" (u1), "=r" (u2));
742# else
743 __asm
744 {
745 pushfd
746 pop eax
747 mov ebx, eax
748 xor eax, 0200000h
749 push eax
750 popfd
751 pushfd
752 pop eax
753 cmp eax, ebx
754 setne fRet
755 push ebx
756 popfd
757 }
758# endif
759 return fRet;
760#endif /* !__AMD64__ */
761}
762
763
764/**
765 * Gets the APIC ID of the current CPU.
766 *
767 * @returns the APIC ID.
768 */
769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
770DECLASM(uint8_t) ASMGetApicId(void);
771#else
772DECLINLINE(uint8_t) ASMGetApicId(void)
773{
774 RTCCUINTREG xBX;
775# if RT_INLINE_ASM_GNU_STYLE
776# ifdef __AMD64__
777 RTCCUINTREG uSpill;
778 __asm__ ("cpuid"
779 : "=a" (uSpill),
780 "=b" (xBX)
781 : "0" (1)
782 : "rcx", "rdx");
783# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__)
784 RTCCUINTREG uSpill;
785 __asm__ ("mov %%ebx,%1\n\t"
786 "cpuid\n\t"
787 "xchgl %%ebx,%1\n\t"
788 : "=a" (uSpill),
789 "=r" (xBX)
790 : "0" (1)
791 : "ecx", "edx");
792# else
793 RTCCUINTREG uSpill;
794 __asm__ ("cpuid"
795 : "=a" (uSpill),
796 "=b" (xBX)
797 : "0" (1)
798 : "ecx", "edx");
799# endif
800
801# elif RT_INLINE_ASM_USES_INTRIN
802 int aInfo[4];
803 __cpuid(aInfo, 1);
804 xBX = aInfo[1];
805
806# else
807 __asm
808 {
809 push ebx
810 mov eax, 1
811 cpuid
812 mov [xBX], ebx
813 pop ebx
814 }
815# endif
816 return (uint8_t)(xBX >> 24);
817}
818#endif
819
820/**
821 * Get cr0.
822 * @returns cr0.
823 */
824#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
825DECLASM(RTCCUINTREG) ASMGetCR0(void);
826#else
827DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
828{
829 RTCCUINTREG uCR0;
830# if RT_INLINE_ASM_USES_INTRIN
831 uCR0 = __readcr0();
832
833# elif RT_INLINE_ASM_GNU_STYLE
834# ifdef __AMD64__
835 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
836# else
837 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
838# endif
839# else
840 __asm
841 {
842# ifdef __AMD64__
843 mov rax, cr0
844 mov [uCR0], rax
845# else
846 mov eax, cr0
847 mov [uCR0], eax
848# endif
849 }
850# endif
851 return uCR0;
852}
853#endif
854
855
856/**
857 * Sets the CR0 register.
858 * @param uCR0 The new CR0 value.
859 */
860#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
861DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
862#else
863DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
864{
865# if RT_INLINE_ASM_USES_INTRIN
866 __writecr0(uCR0);
867
868# elif RT_INLINE_ASM_GNU_STYLE
869# ifdef __AMD64__
870 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
871# else
872 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
873# endif
874# else
875 __asm
876 {
877# ifdef __AMD64__
878 mov rax, [uCR0]
879 mov cr0, rax
880# else
881 mov eax, [uCR0]
882 mov cr0, eax
883# endif
884 }
885# endif
886}
887#endif
888
889
890/**
891 * Get cr2.
892 * @returns cr2.
893 */
894#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
895DECLASM(RTCCUINTREG) ASMGetCR2(void);
896#else
897DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
898{
899 RTCCUINTREG uCR2;
900# if RT_INLINE_ASM_USES_INTRIN
901 uCR2 = __readcr2();
902
903# elif RT_INLINE_ASM_GNU_STYLE
904# ifdef __AMD64__
905 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
906# else
907 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
908# endif
909# else
910 __asm
911 {
912# ifdef __AMD64__
913 mov rax, cr2
914 mov [uCR2], rax
915# else
916 mov eax, cr2
917 mov [uCR2], eax
918# endif
919 }
920# endif
921 return uCR2;
922}
923#endif
924
925
926/**
927 * Sets the CR2 register.
928 * @param uCR2 The new CR0 value.
929 */
930#if RT_INLINE_ASM_EXTERNAL
931DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
932#else
933DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
934{
935# if RT_INLINE_ASM_GNU_STYLE
936# ifdef __AMD64__
937 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
938# else
939 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
940# endif
941# else
942 __asm
943 {
944# ifdef __AMD64__
945 mov rax, [uCR2]
946 mov cr2, rax
947# else
948 mov eax, [uCR2]
949 mov cr2, eax
950# endif
951 }
952# endif
953}
954#endif
955
956
957/**
958 * Get cr3.
959 * @returns cr3.
960 */
961#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
962DECLASM(RTCCUINTREG) ASMGetCR3(void);
963#else
964DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
965{
966 RTCCUINTREG uCR3;
967# if RT_INLINE_ASM_USES_INTRIN
968 uCR3 = __readcr3();
969
970# elif RT_INLINE_ASM_GNU_STYLE
971# ifdef __AMD64__
972 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
973# else
974 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
975# endif
976# else
977 __asm
978 {
979# ifdef __AMD64__
980 mov rax, cr3
981 mov [uCR3], rax
982# else
983 mov eax, cr3
984 mov [uCR3], eax
985# endif
986 }
987# endif
988 return uCR3;
989}
990#endif
991
992
993/**
994 * Sets the CR3 register.
995 *
996 * @param uCR3 New CR3 value.
997 */
998#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
999DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1000#else
1001DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1002{
1003# if RT_INLINE_ASM_USES_INTRIN
1004 __writecr3(uCR3);
1005
1006# elif RT_INLINE_ASM_GNU_STYLE
1007# ifdef __AMD64__
1008 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1009# else
1010 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1011# endif
1012# else
1013 __asm
1014 {
1015# ifdef __AMD64__
1016 mov rax, [uCR3]
1017 mov cr3, rax
1018# else
1019 mov eax, [uCR3]
1020 mov cr3, eax
1021# endif
1022 }
1023# endif
1024}
1025#endif
1026
1027
1028/**
1029 * Reloads the CR3 register.
1030 */
1031#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1032DECLASM(void) ASMReloadCR3(void);
1033#else
1034DECLINLINE(void) ASMReloadCR3(void)
1035{
1036# if RT_INLINE_ASM_USES_INTRIN
1037 __writecr3(__readcr3());
1038
1039# elif RT_INLINE_ASM_GNU_STYLE
1040 RTCCUINTREG u;
1041# ifdef __AMD64__
1042 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1043 "movq %0, %%cr3\n\t"
1044 : "=r" (u));
1045# else
1046 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1047 "movl %0, %%cr3\n\t"
1048 : "=r" (u));
1049# endif
1050# else
1051 __asm
1052 {
1053# ifdef __AMD64__
1054 mov rax, cr3
1055 mov cr3, rax
1056# else
1057 mov eax, cr3
1058 mov cr3, eax
1059# endif
1060 }
1061# endif
1062}
1063#endif
1064
1065
1066/**
1067 * Get cr4.
1068 * @returns cr4.
1069 */
1070#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1071DECLASM(RTCCUINTREG) ASMGetCR4(void);
1072#else
1073DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1074{
1075 RTCCUINTREG uCR4;
1076# if RT_INLINE_ASM_USES_INTRIN
1077 uCR4 = __readcr4();
1078
1079# elif RT_INLINE_ASM_GNU_STYLE
1080# ifdef __AMD64__
1081 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1082# else
1083 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1084# endif
1085# else
1086 __asm
1087 {
1088# ifdef __AMD64__
1089 mov rax, cr4
1090 mov [uCR4], rax
1091# else
1092 push eax /* just in case */
1093 /*mov eax, cr4*/
1094 _emit 0x0f
1095 _emit 0x20
1096 _emit 0xe0
1097 mov [uCR4], eax
1098 pop eax
1099# endif
1100 }
1101# endif
1102 return uCR4;
1103}
1104#endif
1105
1106
1107/**
1108 * Sets the CR4 register.
1109 *
1110 * @param uCR4 New CR4 value.
1111 */
1112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1113DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1114#else
1115DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1116{
1117# if RT_INLINE_ASM_USES_INTRIN
1118 __writecr4(uCR4);
1119
1120# elif RT_INLINE_ASM_GNU_STYLE
1121# ifdef __AMD64__
1122 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1123# else
1124 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1125# endif
1126# else
1127 __asm
1128 {
1129# ifdef __AMD64__
1130 mov rax, [uCR4]
1131 mov cr4, rax
1132# else
1133 mov eax, [uCR4]
1134 _emit 0x0F
1135 _emit 0x22
1136 _emit 0xE0 /* mov cr4, eax */
1137# endif
1138 }
1139# endif
1140}
1141#endif
1142
1143
1144/**
1145 * Get cr8.
1146 * @returns cr8.
1147 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1148 */
1149#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1150DECLASM(RTCCUINTREG) ASMGetCR8(void);
1151#else
1152DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1153{
1154# ifdef __AMD64__
1155 RTCCUINTREG uCR8;
1156# if RT_INLINE_ASM_USES_INTRIN
1157 uCR8 = __readcr8();
1158
1159# elif RT_INLINE_ASM_GNU_STYLE
1160 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1161# else
1162 __asm
1163 {
1164 mov rax, cr8
1165 mov [uCR8], rax
1166 }
1167# endif
1168 return uCR8;
1169# else /* !__AMD64__ */
1170 return 0;
1171# endif /* !__AMD64__ */
1172}
1173#endif
1174
1175
1176/**
1177 * Enables interrupts (EFLAGS.IF).
1178 */
1179#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1180DECLASM(void) ASMIntEnable(void);
1181#else
1182DECLINLINE(void) ASMIntEnable(void)
1183{
1184# if RT_INLINE_ASM_GNU_STYLE
1185 __asm("sti\n");
1186# elif RT_INLINE_ASM_USES_INTRIN
1187 _enable();
1188# else
1189 __asm sti
1190# endif
1191}
1192#endif
1193
1194
1195/**
1196 * Disables interrupts (!EFLAGS.IF).
1197 */
1198#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1199DECLASM(void) ASMIntDisable(void);
1200#else
1201DECLINLINE(void) ASMIntDisable(void)
1202{
1203# if RT_INLINE_ASM_GNU_STYLE
1204 __asm("cli\n");
1205# elif RT_INLINE_ASM_USES_INTRIN
1206 _disable();
1207# else
1208 __asm cli
1209# endif
1210}
1211#endif
1212
1213
1214/**
1215 * Disables interrupts and returns previous xFLAGS.
1216 */
1217#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1218DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1219#else
1220DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1221{
1222 RTCCUINTREG xFlags;
1223# if RT_INLINE_ASM_GNU_STYLE
1224# ifdef __AMD64__
1225 __asm__ __volatile__("pushfq\n\t"
1226 "cli\n\t"
1227 "popq %0\n\t"
1228 : "=m" (xFlags));
1229# else
1230 __asm__ __volatile__("pushfl\n\t"
1231 "cli\n\t"
1232 "popl %0\n\t"
1233 : "=m" (xFlags));
1234# endif
1235# elif RT_INLINE_ASM_USES_INTRIN && !defined(__X86__)
1236 xFlags = ASMGetFlags();
1237 _disable();
1238# else
1239 __asm {
1240 pushfd
1241 cli
1242 pop [xFlags]
1243 }
1244# endif
1245 return xFlags;
1246}
1247#endif
1248
1249
1250/**
1251 * Reads a machine specific register.
1252 *
1253 * @returns Register content.
1254 * @param uRegister Register to read.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1258#else
1259DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1260{
1261 RTUINT64U u;
1262# if RT_INLINE_ASM_GNU_STYLE
1263 __asm__ ("rdmsr\n\t"
1264 : "=a" (u.s.Lo),
1265 "=d" (u.s.Hi)
1266 : "c" (uRegister));
1267
1268# elif RT_INLINE_ASM_USES_INTRIN
1269 u.u = __readmsr(uRegister);
1270
1271# else
1272 __asm
1273 {
1274 mov ecx, [uRegister]
1275 rdmsr
1276 mov [u.s.Lo], eax
1277 mov [u.s.Hi], edx
1278 }
1279# endif
1280
1281 return u.u;
1282}
1283#endif
1284
1285
1286/**
1287 * Writes a machine specific register.
1288 *
1289 * @returns Register content.
1290 * @param uRegister Register to write to.
1291 * @param u64Val Value to write.
1292 */
1293#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1294DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1295#else
1296DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1297{
1298 RTUINT64U u;
1299
1300 u.u = u64Val;
1301# if RT_INLINE_ASM_GNU_STYLE
1302 __asm__ __volatile__("wrmsr\n\t"
1303 ::"a" (u.s.Lo),
1304 "d" (u.s.Hi),
1305 "c" (uRegister));
1306
1307# elif RT_INLINE_ASM_USES_INTRIN
1308 __writemsr(uRegister, u.u);
1309
1310# else
1311 __asm
1312 {
1313 mov ecx, [uRegister]
1314 mov edx, [u.s.Hi]
1315 mov eax, [u.s.Lo]
1316 wrmsr
1317 }
1318# endif
1319}
1320#endif
1321
1322
1323/**
1324 * Reads low part of a machine specific register.
1325 *
1326 * @returns Register content.
1327 * @param uRegister Register to read.
1328 */
1329#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1330DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1331#else
1332DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1333{
1334 uint32_t u32;
1335# if RT_INLINE_ASM_GNU_STYLE
1336 __asm__ ("rdmsr\n\t"
1337 : "=a" (u32)
1338 : "c" (uRegister)
1339 : "edx");
1340
1341# elif RT_INLINE_ASM_USES_INTRIN
1342 u32 = (uint32_t)__readmsr(uRegister);
1343
1344#else
1345 __asm
1346 {
1347 mov ecx, [uRegister]
1348 rdmsr
1349 mov [u32], eax
1350 }
1351# endif
1352
1353 return u32;
1354}
1355#endif
1356
1357
1358/**
1359 * Reads high part of a machine specific register.
1360 *
1361 * @returns Register content.
1362 * @param uRegister Register to read.
1363 */
1364#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1365DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1366#else
1367DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1368{
1369 uint32_t u32;
1370# if RT_INLINE_ASM_GNU_STYLE
1371 __asm__ ("rdmsr\n\t"
1372 : "=d" (u32)
1373 : "c" (uRegister)
1374 : "eax");
1375
1376# elif RT_INLINE_ASM_USES_INTRIN
1377 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1378
1379# else
1380 __asm
1381 {
1382 mov ecx, [uRegister]
1383 rdmsr
1384 mov [u32], edx
1385 }
1386# endif
1387
1388 return u32;
1389}
1390#endif
1391
1392
1393/**
1394 * Gets dr7.
1395 *
1396 * @returns dr7.
1397 */
1398#if RT_INLINE_ASM_EXTERNAL
1399DECLASM(RTCCUINTREG) ASMGetDR7(void);
1400#else
1401DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1402{
1403 RTCCUINTREG uDR7;
1404# if RT_INLINE_ASM_GNU_STYLE
1405# ifdef __AMD64__
1406 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1407# else
1408 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1409# endif
1410# else
1411 __asm
1412 {
1413# ifdef __AMD64__
1414 mov rax, dr7
1415 mov [uDR7], rax
1416# else
1417 mov eax, dr7
1418 mov [uDR7], eax
1419# endif
1420 }
1421# endif
1422 return uDR7;
1423}
1424#endif
1425
1426
1427/**
1428 * Gets dr6.
1429 *
1430 * @returns dr6.
1431 */
1432#if RT_INLINE_ASM_EXTERNAL
1433DECLASM(RTCCUINTREG) ASMGetDR6(void);
1434#else
1435DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1436{
1437 RTCCUINTREG uDR6;
1438# if RT_INLINE_ASM_GNU_STYLE
1439# ifdef __AMD64__
1440 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1441# else
1442 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1443# endif
1444# else
1445 __asm
1446 {
1447# ifdef __AMD64__
1448 mov rax, dr6
1449 mov [uDR6], rax
1450# else
1451 mov eax, dr6
1452 mov [uDR6], eax
1453# endif
1454 }
1455# endif
1456 return uDR6;
1457}
1458#endif
1459
1460
1461/**
1462 * Reads and clears DR6.
1463 *
1464 * @returns DR6.
1465 */
1466#if RT_INLINE_ASM_EXTERNAL
1467DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1468#else
1469DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1470{
1471 RTCCUINTREG uDR6;
1472# if RT_INLINE_ASM_GNU_STYLE
1473 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1474# ifdef __AMD64__
1475 __asm__ ("movq %%dr6, %0\n\t"
1476 "movq %1, %%dr6\n\t"
1477 : "=r" (uDR6)
1478 : "r" (uNewValue));
1479# else
1480 __asm__ ("movl %%dr6, %0\n\t"
1481 "movl %1, %%dr6\n\t"
1482 : "=r" (uDR6)
1483 : "r" (uNewValue));
1484# endif
1485# else
1486 __asm
1487 {
1488# ifdef __AMD64__
1489 mov rax, dr6
1490 mov [uDR6], rax
1491 mov rcx, rax
1492 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1493 mov dr6, rcx
1494# else
1495 mov eax, dr6
1496 mov [uDR6], eax
1497 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1498 mov dr6, ecx
1499# endif
1500 }
1501# endif
1502 return uDR6;
1503}
1504#endif
1505
1506
1507/**
1508 * Ensure that gcc does not use any register value before this instruction. This function is used
1509 * for assembler instructions with side-effects, e.g. port writes to magical guest ports causing
1510 * guest memory changes by the host
1511 */
1512#if RT_INLINE_ASM_GNU_STYLE
1513DECLINLINE(void) ASMMemoryClobber(void)
1514{
1515 __asm__ __volatile__ ("" : : : "memory");
1516}
1517#else
1518DECLINLINE(void) ASMMemoryClobber(void)
1519{
1520}
1521#endif
1522
1523/**
1524 * Writes a 8-bit unsigned integer to an I/O port.
1525 *
1526 * @param Port I/O port to read from.
1527 * @param u8 8-bit integer to write.
1528 */
1529#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1530DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1531#else
1532DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1533{
1534# if RT_INLINE_ASM_GNU_STYLE
1535 __asm__ __volatile__("outb %b1, %w0\n\t"
1536 :: "Nd" (Port),
1537 "a" (u8));
1538
1539# elif RT_INLINE_ASM_USES_INTRIN
1540 __outbyte(Port, u8);
1541
1542# else
1543 __asm
1544 {
1545 mov dx, [Port]
1546 mov al, [u8]
1547 out dx, al
1548 }
1549# endif
1550}
1551#endif
1552
1553
1554/**
1555 * Gets a 8-bit unsigned integer from an I/O port.
1556 *
1557 * @returns 8-bit integer.
1558 * @param Port I/O port to read from.
1559 */
1560#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1561DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1562#else
1563DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1564{
1565 uint8_t u8;
1566# if RT_INLINE_ASM_GNU_STYLE
1567 __asm__ __volatile__("inb %w1, %b0\n\t"
1568 : "=a" (u8)
1569 : "Nd" (Port));
1570
1571# elif RT_INLINE_ASM_USES_INTRIN
1572 u8 = __inbyte(Port);
1573
1574# else
1575 __asm
1576 {
1577 mov dx, [Port]
1578 in al, dx
1579 mov [u8], al
1580 }
1581# endif
1582 return u8;
1583}
1584#endif
1585
1586
1587/**
1588 * Writes a 16-bit unsigned integer to an I/O port.
1589 *
1590 * @param Port I/O port to read from.
1591 * @param u16 16-bit integer to write.
1592 */
1593#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1594DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1595#else
1596DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1597{
1598# if RT_INLINE_ASM_GNU_STYLE
1599 __asm__ __volatile__("outw %w1, %w0\n\t"
1600 :: "Nd" (Port),
1601 "a" (u16));
1602
1603# elif RT_INLINE_ASM_USES_INTRIN
1604 __outword(Port, u16);
1605
1606# else
1607 __asm
1608 {
1609 mov dx, [Port]
1610 mov ax, [u16]
1611 out dx, ax
1612 }
1613# endif
1614}
1615#endif
1616
1617
1618/**
1619 * Gets a 16-bit unsigned integer from an I/O port.
1620 *
1621 * @returns 16-bit integer.
1622 * @param Port I/O port to read from.
1623 */
1624#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1625DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1626#else
1627DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1628{
1629 uint16_t u16;
1630# if RT_INLINE_ASM_GNU_STYLE
1631 __asm__ __volatile__("inw %w1, %w0\n\t"
1632 : "=a" (u16)
1633 : "Nd" (Port));
1634
1635# elif RT_INLINE_ASM_USES_INTRIN
1636 u16 = __inword(Port);
1637
1638# else
1639 __asm
1640 {
1641 mov dx, [Port]
1642 in ax, dx
1643 mov [u16], ax
1644 }
1645# endif
1646 return u16;
1647}
1648#endif
1649
1650
1651/**
1652 * Writes a 32-bit unsigned integer to an I/O port.
1653 *
1654 * @param Port I/O port to read from.
1655 * @param u32 32-bit integer to write.
1656 */
1657#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1658DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1659#else
1660DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1661{
1662# if RT_INLINE_ASM_GNU_STYLE
1663 __asm__ __volatile__("outl %1, %w0\n\t"
1664 :: "Nd" (Port),
1665 "a" (u32));
1666
1667# elif RT_INLINE_ASM_USES_INTRIN
1668 __outdword(Port, u32);
1669
1670# else
1671 __asm
1672 {
1673 mov dx, [Port]
1674 mov eax, [u32]
1675 out dx, eax
1676 }
1677# endif
1678}
1679#endif
1680
1681
1682/**
1683 * Gets a 32-bit unsigned integer from an I/O port.
1684 *
1685 * @returns 32-bit integer.
1686 * @param Port I/O port to read from.
1687 */
1688#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1689DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1690#else
1691DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1692{
1693 uint32_t u32;
1694# if RT_INLINE_ASM_GNU_STYLE
1695 __asm__ __volatile__("inl %w1, %0\n\t"
1696 : "=a" (u32)
1697 : "Nd" (Port));
1698
1699# elif RT_INLINE_ASM_USES_INTRIN
1700 u32 = __indword(Port);
1701
1702# else
1703 __asm
1704 {
1705 mov dx, [Port]
1706 in eax, dx
1707 mov [u32], eax
1708 }
1709# endif
1710 return u32;
1711}
1712#endif
1713
1714
1715/**
1716 * Atomically Exchange an unsigned 8-bit value.
1717 *
1718 * @returns Current *pu8 value
1719 * @param pu8 Pointer to the 8-bit variable to update.
1720 * @param u8 The 8-bit value to assign to *pu8.
1721 */
1722#if RT_INLINE_ASM_EXTERNAL
1723DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1724#else
1725DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1726{
1727# if RT_INLINE_ASM_GNU_STYLE
1728 __asm__ __volatile__("xchgb %0, %1\n\t"
1729 : "=m" (*pu8),
1730 "=r" (u8)
1731 : "1" (u8));
1732# else
1733 __asm
1734 {
1735# ifdef __AMD64__
1736 mov rdx, [pu8]
1737 mov al, [u8]
1738 xchg [rdx], al
1739 mov [u8], al
1740# else
1741 mov edx, [pu8]
1742 mov al, [u8]
1743 xchg [edx], al
1744 mov [u8], al
1745# endif
1746 }
1747# endif
1748 return u8;
1749}
1750#endif
1751
1752
1753/**
1754 * Atomically Exchange a signed 8-bit value.
1755 *
1756 * @returns Current *pu8 value
1757 * @param pi8 Pointer to the 8-bit variable to update.
1758 * @param i8 The 8-bit value to assign to *pi8.
1759 */
1760DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1761{
1762 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1763}
1764
1765
1766/**
1767 * Atomically Exchange a bool value.
1768 *
1769 * @returns Current *pf value
1770 * @param pf Pointer to the 8-bit variable to update.
1771 * @param f The 8-bit value to assign to *pi8.
1772 */
1773DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1774{
1775 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1776}
1777
1778
1779/**
1780 * Atomically Exchange an unsigned 16-bit value.
1781 *
1782 * @returns Current *pu16 value
1783 * @param pu16 Pointer to the 16-bit variable to update.
1784 * @param u16 The 16-bit value to assign to *pu16.
1785 */
1786#if RT_INLINE_ASM_EXTERNAL
1787DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1788#else
1789DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1790{
1791# if RT_INLINE_ASM_GNU_STYLE
1792 __asm__ __volatile__("xchgw %0, %1\n\t"
1793 : "=m" (*pu16),
1794 "=r" (u16)
1795 : "1" (u16));
1796# else
1797 __asm
1798 {
1799# ifdef __AMD64__
1800 mov rdx, [pu16]
1801 mov ax, [u16]
1802 xchg [rdx], ax
1803 mov [u16], ax
1804# else
1805 mov edx, [pu16]
1806 mov ax, [u16]
1807 xchg [edx], ax
1808 mov [u16], ax
1809# endif
1810 }
1811# endif
1812 return u16;
1813}
1814#endif
1815
1816
1817/**
1818 * Atomically Exchange a signed 16-bit value.
1819 *
1820 * @returns Current *pu16 value
1821 * @param pi16 Pointer to the 16-bit variable to update.
1822 * @param i16 The 16-bit value to assign to *pi16.
1823 */
1824DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1825{
1826 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1827}
1828
1829
1830/**
1831 * Atomically Exchange an unsigned 32-bit value.
1832 *
1833 * @returns Current *pu32 value
1834 * @param pu32 Pointer to the 32-bit variable to update.
1835 * @param u32 The 32-bit value to assign to *pu32.
1836 */
1837#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1838DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1839#else
1840DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1841{
1842# if RT_INLINE_ASM_GNU_STYLE
1843 __asm__ __volatile__("xchgl %0, %1\n\t"
1844 : "=m" (*pu32),
1845 "=r" (u32)
1846 : "1" (u32));
1847
1848# elif RT_INLINE_ASM_USES_INTRIN
1849 u32 = _InterlockedExchange((long *)pu32, u32);
1850
1851# else
1852 __asm
1853 {
1854# ifdef __AMD64__
1855 mov rdx, [pu32]
1856 mov eax, u32
1857 xchg [rdx], eax
1858 mov [u32], eax
1859# else
1860 mov edx, [pu32]
1861 mov eax, u32
1862 xchg [edx], eax
1863 mov [u32], eax
1864# endif
1865 }
1866# endif
1867 return u32;
1868}
1869#endif
1870
1871
1872/**
1873 * Atomically Exchange a signed 32-bit value.
1874 *
1875 * @returns Current *pu32 value
1876 * @param pi32 Pointer to the 32-bit variable to update.
1877 * @param i32 The 32-bit value to assign to *pi32.
1878 */
1879DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1880{
1881 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1882}
1883
1884
1885/**
1886 * Atomically Exchange an unsigned 64-bit value.
1887 *
1888 * @returns Current *pu64 value
1889 * @param pu64 Pointer to the 64-bit variable to update.
1890 * @param u64 The 64-bit value to assign to *pu64.
1891 */
1892#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1893DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1894#else
1895DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1896{
1897# if defined(__AMD64__)
1898# if RT_INLINE_ASM_USES_INTRIN
1899 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1900
1901# elif RT_INLINE_ASM_GNU_STYLE
1902 __asm__ __volatile__("xchgq %0, %1\n\t"
1903 : "=m" (*pu64),
1904 "=r" (u64)
1905 : "1" (u64));
1906# else
1907 __asm
1908 {
1909 mov rdx, [pu64]
1910 mov rax, [u64]
1911 xchg [rdx], rax
1912 mov [u64], rax
1913 }
1914# endif
1915# else /* !__AMD64__ */
1916# if RT_INLINE_ASM_GNU_STYLE
1917# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
1918 uint32_t u32 = (uint32_t)u64;
1919 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1920 "xchgl %%ebx, %3\n\t"
1921 "1:\n\t"
1922 "lock; cmpxchg8b (%5)\n\t"
1923 "jnz 1b\n\t"
1924 "xchgl %%ebx, %3\n\t"
1925 /*"xchgl %%esi, %5\n\t"*/
1926 : "=A" (u64),
1927 "=m" (*pu64)
1928 : "0" (*pu64),
1929 "m" ( u32 ),
1930 "c" ( (uint32_t)(u64 >> 32) ),
1931 "S" (pu64) );
1932# else /* !PIC */
1933 __asm__ __volatile__("1:\n\t"
1934 "lock; cmpxchg8b %1\n\t"
1935 "jnz 1b\n\t"
1936 : "=A" (u64),
1937 "=m" (*pu64)
1938 : "0" (*pu64),
1939 "b" ( (uint32_t)u64 ),
1940 "c" ( (uint32_t)(u64 >> 32) ));
1941# endif
1942# else
1943 __asm
1944 {
1945 mov ebx, dword ptr [u64]
1946 mov ecx, dword ptr [u64 + 4]
1947 mov edi, pu64
1948 mov eax, dword ptr [edi]
1949 mov edx, dword ptr [edi + 4]
1950 retry:
1951 lock cmpxchg8b [edi]
1952 jnz retry
1953 mov dword ptr [u64], eax
1954 mov dword ptr [u64 + 4], edx
1955 }
1956# endif
1957# endif /* !__AMD64__ */
1958 return u64;
1959}
1960#endif
1961
1962
1963/**
1964 * Atomically Exchange an signed 64-bit value.
1965 *
1966 * @returns Current *pi64 value
1967 * @param pi64 Pointer to the 64-bit variable to update.
1968 * @param i64 The 64-bit value to assign to *pi64.
1969 */
1970DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1971{
1972 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1973}
1974
1975
1976#ifdef __AMD64__
1977/**
1978 * Atomically Exchange an unsigned 128-bit value.
1979 *
1980 * @returns Current *pu128.
1981 * @param pu128 Pointer to the 128-bit variable to update.
1982 * @param u128 The 128-bit value to assign to *pu128.
1983 *
1984 * @remark We cannot really assume that any hardware supports this. Nor do I have
1985 * GAS support for it. So, for the time being we'll BREAK the atomic
1986 * bit of this function and use two 64-bit exchanges instead.
1987 */
1988# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
1989DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
1990# else
1991DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
1992{
1993 if (true)/*ASMCpuId_ECX(1) & BIT(13))*/
1994 {
1995 /** @todo this is clumsy code */
1996 RTUINT128U u128Ret;
1997 u128Ret.u = u128;
1998 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
1999 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2000 return u128Ret.u;
2001 }
2002#if 0 /* later? */
2003 else
2004 {
2005# if RT_INLINE_ASM_GNU_STYLE
2006 __asm__ __volatile__("1:\n\t"
2007 "lock; cmpxchg8b %1\n\t"
2008 "jnz 1b\n\t"
2009 : "=A" (u128),
2010 "=m" (*pu128)
2011 : "0" (*pu128),
2012 "b" ( (uint64_t)u128 ),
2013 "c" ( (uint64_t)(u128 >> 64) ));
2014# else
2015 __asm
2016 {
2017 mov rbx, dword ptr [u128]
2018 mov rcx, dword ptr [u128 + 4]
2019 mov rdi, pu128
2020 mov rax, dword ptr [rdi]
2021 mov rdx, dword ptr [rdi + 4]
2022 retry:
2023 lock cmpxchg16b [rdi]
2024 jnz retry
2025 mov dword ptr [u128], rax
2026 mov dword ptr [u128 + 4], rdx
2027 }
2028# endif
2029 }
2030 return u128;
2031#endif
2032}
2033# endif
2034#endif /* __AMD64__ */
2035
2036
2037/**
2038 * Atomically Reads a unsigned 64-bit value.
2039 *
2040 * @returns Current *pu64 value
2041 * @param pu64 Pointer to the 64-bit variable to read.
2042 * The memory pointed to must be writable.
2043 * @remark This will fault if the memory is read-only!
2044 */
2045#if RT_INLINE_ASM_EXTERNAL
2046DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2047#else
2048DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2049{
2050 uint64_t u64;
2051# ifdef __AMD64__
2052# if RT_INLINE_ASM_GNU_STYLE
2053 __asm__ __volatile__("movq %1, %0\n\t"
2054 : "=r" (u64)
2055 : "m" (*pu64));
2056# else
2057 __asm
2058 {
2059 mov rdx, [pu64]
2060 mov rax, [rdx]
2061 mov [u64], rax
2062 }
2063# endif
2064# else /* !__AMD64__ */
2065# if RT_INLINE_ASM_GNU_STYLE
2066# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2067 uint32_t u32EBX = 0;
2068 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2069 "lock; cmpxchg8b (%5)\n\t"
2070 "xchgl %%ebx, %3\n\t"
2071 : "=A" (u64),
2072 "=m" (*pu64)
2073 : "0" (0),
2074 "m" (u32EBX),
2075 "c" (0),
2076 "S" (pu64));
2077# else /* !PIC */
2078 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2079 : "=A" (u64),
2080 "=m" (*pu64)
2081 : "0" (0),
2082 "b" (0),
2083 "c" (0));
2084# endif
2085# else
2086 __asm
2087 {
2088 xor eax, eax
2089 xor edx, edx
2090 mov edi, pu64
2091 xor ecx, ecx
2092 xor ebx, ebx
2093 lock cmpxchg8b [edi]
2094 mov dword ptr [u64], eax
2095 mov dword ptr [u64 + 4], edx
2096 }
2097# endif
2098# endif /* !__AMD64__ */
2099 return u64;
2100}
2101#endif
2102
2103
2104/**
2105 * Atomically Reads a signed 64-bit value.
2106 *
2107 * @returns Current *pi64 value
2108 * @param pi64 Pointer to the 64-bit variable to read.
2109 * The memory pointed to must be writable.
2110 * @remark This will fault if the memory is read-only!
2111 */
2112DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2113{
2114 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2115}
2116
2117
2118/**
2119 * Atomically Exchange a value which size might differ
2120 * between platforms or compilers.
2121 *
2122 * @param pu Pointer to the variable to update.
2123 * @param uNew The value to assign to *pu.
2124 */
2125#define ASMAtomicXchgSize(pu, uNew) \
2126 do { \
2127 switch (sizeof(*(pu))) { \
2128 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2129 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2130 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2131 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2132 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2133 } \
2134 } while (0)
2135
2136
2137/**
2138 * Atomically Exchange a pointer value.
2139 *
2140 * @returns Current *ppv value
2141 * @param ppv Pointer to the pointer variable to update.
2142 * @param pv The pointer value to assign to *ppv.
2143 */
2144DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2145{
2146#if ARCH_BITS == 32
2147 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2148#elif ARCH_BITS == 64
2149 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2150#else
2151# error "ARCH_BITS is bogus"
2152#endif
2153}
2154
2155
2156/**
2157 * Atomically Compare and Exchange an unsigned 32-bit value.
2158 *
2159 * @returns true if xchg was done.
2160 * @returns false if xchg wasn't done.
2161 *
2162 * @param pu32 Pointer to the value to update.
2163 * @param u32New The new value to assigned to *pu32.
2164 * @param u32Old The old value to *pu32 compare with.
2165 */
2166#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2167DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2168#else
2169DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2170{
2171# if RT_INLINE_ASM_GNU_STYLE
2172 uint32_t u32Ret;
2173 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2174 "setz %%al\n\t"
2175 "movzx %%al, %%eax\n\t"
2176 : "=m" (*pu32),
2177 "=a" (u32Ret)
2178 : "r" (u32New),
2179 "1" (u32Old));
2180 return (bool)u32Ret;
2181
2182# elif RT_INLINE_ASM_USES_INTRIN
2183 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2184
2185# else
2186 uint32_t u32Ret;
2187 __asm
2188 {
2189# ifdef __AMD64__
2190 mov rdx, [pu32]
2191# else
2192 mov edx, [pu32]
2193# endif
2194 mov eax, [u32Old]
2195 mov ecx, [u32New]
2196# ifdef __AMD64__
2197 lock cmpxchg [rdx], ecx
2198# else
2199 lock cmpxchg [edx], ecx
2200# endif
2201 setz al
2202 movzx eax, al
2203 mov [u32Ret], eax
2204 }
2205 return !!u32Ret;
2206# endif
2207}
2208#endif
2209
2210
2211/**
2212 * Atomically Compare and Exchange a signed 32-bit value.
2213 *
2214 * @returns true if xchg was done.
2215 * @returns false if xchg wasn't done.
2216 *
2217 * @param pi32 Pointer to the value to update.
2218 * @param i32New The new value to assigned to *pi32.
2219 * @param i32Old The old value to *pi32 compare with.
2220 */
2221DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2222{
2223 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2224}
2225
2226
2227/**
2228 * Atomically Compare and exchange an unsigned 64-bit value.
2229 *
2230 * @returns true if xchg was done.
2231 * @returns false if xchg wasn't done.
2232 *
2233 * @param pu64 Pointer to the 64-bit variable to update.
2234 * @param u64New The 64-bit value to assign to *pu64.
2235 * @param u64Old The value to compare with.
2236 */
2237#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2238DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2239#else
2240DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2241{
2242# if RT_INLINE_ASM_USES_INTRIN
2243 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2244
2245# elif defined(__AMD64__)
2246# if RT_INLINE_ASM_GNU_STYLE
2247 uint64_t u64Ret;
2248 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2249 "setz %%al\n\t"
2250 "movzx %%al, %%eax\n\t"
2251 : "=m" (*pu64),
2252 "=a" (u64Ret)
2253 : "r" (u64New),
2254 "1" (u64Old));
2255 return (bool)u64Ret;
2256# else
2257 bool fRet;
2258 __asm
2259 {
2260 mov rdx, [pu32]
2261 mov rax, [u64Old]
2262 mov rcx, [u64New]
2263 lock cmpxchg [rdx], rcx
2264 setz al
2265 mov [fRet], al
2266 }
2267 return fRet;
2268# endif
2269# else /* !__AMD64__ */
2270 uint32_t u32Ret;
2271# if RT_INLINE_ASM_GNU_STYLE
2272# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2273 uint32_t u32 = (uint32_t)u64New;
2274 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2275 "lock; cmpxchg8b (%5)\n\t"
2276 "setz %%al\n\t"
2277 "xchgl %%ebx, %3\n\t"
2278 "movzx %%al, %%eax\n\t"
2279 : "=a" (u32Ret),
2280 "=m" (*pu64)
2281 : "A" (u64Old),
2282 "m" ( u32 ),
2283 "c" ( (uint32_t)(u64New >> 32) ),
2284 "S" (pu64) );
2285# else /* !PIC */
2286 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2287 "setz %%al\n\t"
2288 "movzx %%al, %%eax\n\t"
2289 : "=a" (u32Ret),
2290 "=m" (*pu64)
2291 : "A" (u64Old),
2292 "b" ( (uint32_t)u64New ),
2293 "c" ( (uint32_t)(u64New >> 32) ));
2294# endif
2295 return (bool)u32Ret;
2296# else
2297 __asm
2298 {
2299 mov ebx, dword ptr [u64New]
2300 mov ecx, dword ptr [u64New + 4]
2301 mov edi, [pu64]
2302 mov eax, dword ptr [u64Old]
2303 mov edx, dword ptr [u64Old + 4]
2304 lock cmpxchg8b [edi]
2305 setz al
2306 movzx eax, al
2307 mov dword ptr [u32Ret], eax
2308 }
2309 return !!u32Ret;
2310# endif
2311# endif /* !__AMD64__ */
2312}
2313#endif
2314
2315
2316/**
2317 * Atomically Compare and exchange a signed 64-bit value.
2318 *
2319 * @returns true if xchg was done.
2320 * @returns false if xchg wasn't done.
2321 *
2322 * @param pi64 Pointer to the 64-bit variable to update.
2323 * @param i64 The 64-bit value to assign to *pu64.
2324 * @param i64Old The value to compare with.
2325 */
2326DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2327{
2328 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2329}
2330
2331
2332
2333/** @def ASMAtomicCmpXchgSize
2334 * Atomically Compare and Exchange a value which size might differ
2335 * between platforms or compilers.
2336 *
2337 * @param pu Pointer to the value to update.
2338 * @param uNew The new value to assigned to *pu.
2339 * @param uOld The old value to *pu compare with.
2340 * @param fRc Where to store the result.
2341 */
2342#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2343 do { \
2344 switch (sizeof(*(pu))) { \
2345 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2346 break; \
2347 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2348 break; \
2349 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2350 (fRc) = false; \
2351 break; \
2352 } \
2353 } while (0)
2354
2355
2356/**
2357 * Atomically Compare and Exchange a pointer value.
2358 *
2359 * @returns true if xchg was done.
2360 * @returns false if xchg wasn't done.
2361 *
2362 * @param ppv Pointer to the value to update.
2363 * @param pvNew The new value to assigned to *ppv.
2364 * @param pvOld The old value to *ppv compare with.
2365 */
2366DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2367{
2368#if ARCH_BITS == 32
2369 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2370#elif ARCH_BITS == 64
2371 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2372#else
2373# error "ARCH_BITS is bogus"
2374#endif
2375}
2376
2377
2378/**
2379 * Atomically increment a 32-bit value.
2380 *
2381 * @returns The new value.
2382 * @param pu32 Pointer to the value to increment.
2383 */
2384#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2385DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2386#else
2387DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2388{
2389 uint32_t u32;
2390# if RT_INLINE_ASM_USES_INTRIN
2391 u32 = _InterlockedIncrement((long *)pu32);
2392
2393# elif RT_INLINE_ASM_GNU_STYLE
2394 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2395 "incl %0\n\t"
2396 : "=r" (u32),
2397 "=m" (*pu32)
2398 : "0" (1)
2399 : "memory");
2400# else
2401 __asm
2402 {
2403 mov eax, 1
2404# ifdef __AMD64__
2405 mov rdx, [pu32]
2406 lock xadd [rdx], eax
2407# else
2408 mov edx, [pu32]
2409 lock xadd [edx], eax
2410# endif
2411 inc eax
2412 mov u32, eax
2413 }
2414# endif
2415 return u32;
2416}
2417#endif
2418
2419
2420/**
2421 * Atomically increment a signed 32-bit value.
2422 *
2423 * @returns The new value.
2424 * @param pi32 Pointer to the value to increment.
2425 */
2426DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2427{
2428 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2429}
2430
2431
2432/**
2433 * Atomically decrement an unsigned 32-bit value.
2434 *
2435 * @returns The new value.
2436 * @param pu32 Pointer to the value to decrement.
2437 */
2438#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2439DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2440#else
2441DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2442{
2443 uint32_t u32;
2444# if RT_INLINE_ASM_USES_INTRIN
2445 u32 = _InterlockedDecrement((long *)pu32);
2446
2447# elif RT_INLINE_ASM_GNU_STYLE
2448 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2449 "decl %0\n\t"
2450 : "=r" (u32),
2451 "=m" (*pu32)
2452 : "0" (-1)
2453 : "memory");
2454# else
2455 __asm
2456 {
2457 mov eax, -1
2458# ifdef __AMD64__
2459 mov rdx, [pu32]
2460 lock xadd [rdx], eax
2461# else
2462 mov edx, [pu32]
2463 lock xadd [edx], eax
2464# endif
2465 dec eax
2466 mov u32, eax
2467 }
2468# endif
2469 return u32;
2470}
2471#endif
2472
2473
2474/**
2475 * Atomically decrement a signed 32-bit value.
2476 *
2477 * @returns The new value.
2478 * @param pi32 Pointer to the value to decrement.
2479 */
2480DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2481{
2482 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2483}
2484
2485
2486/**
2487 * Atomically Or an unsigned 32-bit value.
2488 *
2489 * @param pu32 Pointer to the pointer variable to OR u32 with.
2490 * @param u32 The value to OR *pu32 with.
2491 */
2492#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2493DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2494#else
2495DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2496{
2497# if RT_INLINE_ASM_USES_INTRIN
2498 _InterlockedOr((long volatile *)pu32, (long)u32);
2499
2500# elif RT_INLINE_ASM_GNU_STYLE
2501 __asm__ __volatile__("lock; orl %1, %0\n\t"
2502 : "=m" (*pu32)
2503 : "r" (u32));
2504# else
2505 __asm
2506 {
2507 mov eax, [u32]
2508# ifdef __AMD64__
2509 mov rdx, [pu32]
2510 lock or [rdx], eax
2511# else
2512 mov edx, [pu32]
2513 lock or [edx], eax
2514# endif
2515 }
2516# endif
2517}
2518#endif
2519
2520
2521/**
2522 * Atomically Or a signed 32-bit value.
2523 *
2524 * @param pi32 Pointer to the pointer variable to OR u32 with.
2525 * @param i32 The value to OR *pu32 with.
2526 */
2527DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2528{
2529 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2530}
2531
2532
2533/**
2534 * Atomically And an unsigned 32-bit value.
2535 *
2536 * @param pu32 Pointer to the pointer variable to AND u32 with.
2537 * @param u32 The value to AND *pu32 with.
2538 */
2539#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2540DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2541#else
2542DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2543{
2544# if RT_INLINE_ASM_USES_INTRIN
2545 _InterlockedAnd((long volatile *)pu32, u32);
2546
2547# elif RT_INLINE_ASM_GNU_STYLE
2548 __asm__ __volatile__("lock; andl %1, %0\n\t"
2549 : "=m" (*pu32)
2550 : "r" (u32));
2551# else
2552 __asm
2553 {
2554 mov eax, [u32]
2555# ifdef __AMD64__
2556 mov rdx, [pu32]
2557 lock and [rdx], eax
2558# else
2559 mov edx, [pu32]
2560 lock and [edx], eax
2561# endif
2562 }
2563# endif
2564}
2565#endif
2566
2567
2568/**
2569 * Atomically And a signed 32-bit value.
2570 *
2571 * @param pi32 Pointer to the pointer variable to AND i32 with.
2572 * @param i32 The value to AND *pi32 with.
2573 */
2574DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2575{
2576 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2577}
2578
2579
2580/**
2581 * Invalidate page.
2582 *
2583 * @param pv Address of the page to invalidate.
2584 */
2585#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2586DECLASM(void) ASMInvalidatePage(void *pv);
2587#else
2588DECLINLINE(void) ASMInvalidatePage(void *pv)
2589{
2590# if RT_INLINE_ASM_USES_INTRIN
2591 __invlpg(pv);
2592
2593# elif RT_INLINE_ASM_GNU_STYLE
2594 __asm__ __volatile__("invlpg %0\n\t"
2595 : : "m" (*(uint8_t *)pv));
2596# else
2597 __asm
2598 {
2599# ifdef __AMD64__
2600 mov rax, [pv]
2601 invlpg [rax]
2602# else
2603 mov eax, [pv]
2604 invlpg [eax]
2605# endif
2606 }
2607# endif
2608}
2609#endif
2610
2611
2612#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2613# if PAGE_SIZE != 0x1000
2614# error "PAGE_SIZE is not 0x1000!"
2615# endif
2616#endif
2617
2618/**
2619 * Zeros a 4K memory page.
2620 *
2621 * @param pv Pointer to the memory block. This must be page aligned.
2622 */
2623#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2624DECLASM(void) ASMMemZeroPage(volatile void *pv);
2625# else
2626DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2627{
2628# if RT_INLINE_ASM_USES_INTRIN
2629# ifdef __AMD64__
2630 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2631# else
2632 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2633# endif
2634
2635# elif RT_INLINE_ASM_GNU_STYLE
2636 RTUINTREG uDummy;
2637# ifdef __AMD64__
2638 __asm__ __volatile__ ("rep stosq"
2639 : "=D" (pv),
2640 "=c" (uDummy)
2641 : "0" (pv),
2642 "c" (0x1000 >> 3),
2643 "a" (0)
2644 : "memory");
2645# else
2646 __asm__ __volatile__ ("rep stosl"
2647 : "=D" (pv),
2648 "=c" (uDummy)
2649 : "0" (pv),
2650 "c" (0x1000 >> 2),
2651 "a" (0)
2652 : "memory");
2653# endif
2654# else
2655 __asm
2656 {
2657# ifdef __AMD64__
2658 xor rax, rax
2659 mov ecx, 0200h
2660 mov rdi, [pv]
2661 rep stosq
2662# else
2663 xor eax, eax
2664 mov ecx, 0400h
2665 mov edi, [pv]
2666 rep stosd
2667# endif
2668 }
2669# endif
2670}
2671# endif
2672
2673
2674/**
2675 * Zeros a memory block with a 32-bit aligned size.
2676 *
2677 * @param pv Pointer to the memory block.
2678 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2679 */
2680#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2681DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2682#else
2683DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2684{
2685# if RT_INLINE_ASM_USES_INTRIN
2686 __stosd((unsigned long *)pv, 0, cb >> 2);
2687
2688# elif RT_INLINE_ASM_GNU_STYLE
2689 __asm__ __volatile__ ("rep stosl"
2690 : "=D" (pv),
2691 "=c" (cb)
2692 : "0" (pv),
2693 "1" (cb >> 2),
2694 "a" (0)
2695 : "memory");
2696# else
2697 __asm
2698 {
2699 xor eax, eax
2700# ifdef __AMD64__
2701 mov rcx, [cb]
2702 shr rcx, 2
2703 mov rdi, [pv]
2704# else
2705 mov ecx, [cb]
2706 shr ecx, 2
2707 mov edi, [pv]
2708# endif
2709 rep stosd
2710 }
2711# endif
2712}
2713#endif
2714
2715
2716/**
2717 * Fills a memory block with a 32-bit aligned size.
2718 *
2719 * @param pv Pointer to the memory block.
2720 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2721 * @param u32 The value to fill with.
2722 */
2723#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2724DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2725#else
2726DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2727{
2728# if RT_INLINE_ASM_USES_INTRIN
2729 __stosd((unsigned long *)pv, 0, cb >> 2);
2730
2731# elif RT_INLINE_ASM_GNU_STYLE
2732 __asm__ __volatile__ ("rep stosl"
2733 : "=D" (pv),
2734 "=c" (cb)
2735 : "0" (pv),
2736 "1" (cb >> 2),
2737 "a" (u32)
2738 : "memory");
2739# else
2740 __asm
2741 {
2742# ifdef __AMD64__
2743 mov rcx, [cb]
2744 shr rcx, 2
2745 mov rdi, [pv]
2746# else
2747 mov ecx, [cb]
2748 shr ecx, 2
2749 mov edi, [pv]
2750# endif
2751 mov eax, [u32]
2752 rep stosd
2753 }
2754# endif
2755}
2756#endif
2757
2758
2759
2760/**
2761 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2762 *
2763 * @returns u32F1 * u32F2.
2764 */
2765#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2766DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2767#else
2768DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2769{
2770# ifdef __AMD64__
2771 return (uint64_t)u32F1 * u32F2;
2772# else /* !__AMD64__ */
2773 uint64_t u64;
2774# if RT_INLINE_ASM_GNU_STYLE
2775 __asm__ __volatile__("mull %%edx"
2776 : "=A" (u64)
2777 : "a" (u32F2), "d" (u32F1));
2778# else
2779 __asm
2780 {
2781 mov edx, [u32F1]
2782 mov eax, [u32F2]
2783 mul edx
2784 mov dword ptr [u64], eax
2785 mov dword ptr [u64 + 4], edx
2786 }
2787# endif
2788 return u64;
2789# endif /* !__AMD64__ */
2790}
2791#endif
2792
2793
2794/**
2795 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2796 *
2797 * @returns u32F1 * u32F2.
2798 */
2799#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2800DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2801#else
2802DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2803{
2804# ifdef __AMD64__
2805 return (int64_t)i32F1 * i32F2;
2806# else /* !__AMD64__ */
2807 int64_t i64;
2808# if RT_INLINE_ASM_GNU_STYLE
2809 __asm__ __volatile__("imull %%edx"
2810 : "=A" (i64)
2811 : "a" (i32F2), "d" (i32F1));
2812# else
2813 __asm
2814 {
2815 mov edx, [i32F1]
2816 mov eax, [i32F2]
2817 imul edx
2818 mov dword ptr [i64], eax
2819 mov dword ptr [i64 + 4], edx
2820 }
2821# endif
2822 return i64;
2823# endif /* !__AMD64__ */
2824}
2825#endif
2826
2827
2828/**
2829 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2830 *
2831 * @returns u64 / u32.
2832 */
2833#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2834DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2835#else
2836DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2837{
2838# ifdef __AMD64__
2839 return (uint32_t)(u64 / u32);
2840# else /* !__AMD64__ */
2841# if RT_INLINE_ASM_GNU_STYLE
2842 RTUINTREG uDummy;
2843 __asm__ __volatile__("divl %3"
2844 : "=a" (u32), "=d"(uDummy)
2845 : "A" (u64), "r" (u32));
2846# else
2847 __asm
2848 {
2849 mov eax, dword ptr [u64]
2850 mov edx, dword ptr [u64 + 4]
2851 mov ecx, [u32]
2852 div ecx
2853 mov [u32], eax
2854 }
2855# endif
2856 return u32;
2857# endif /* !__AMD64__ */
2858}
2859#endif
2860
2861
2862/**
2863 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2864 *
2865 * @returns u64 / u32.
2866 */
2867#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2868DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2869#else
2870DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2871{
2872# ifdef __AMD64__
2873 return (int32_t)(i64 / i32);
2874# else /* !__AMD64__ */
2875# if RT_INLINE_ASM_GNU_STYLE
2876 RTUINTREG iDummy;
2877 __asm__ __volatile__("idivl %3"
2878 : "=a" (i32), "=d"(iDummy)
2879 : "A" (i64), "r" (i32));
2880# else
2881 __asm
2882 {
2883 mov eax, dword ptr [i64]
2884 mov edx, dword ptr [i64 + 4]
2885 mov ecx, [i32]
2886 idiv ecx
2887 mov [i32], eax
2888 }
2889# endif
2890 return i32;
2891# endif /* !__AMD64__ */
2892}
2893#endif
2894
2895
2896/**
2897 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
2898 * using a 96 bit intermediate result.
2899 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
2900 * __udivdi3 and __umoddi3 even if this inline function is not used.
2901 *
2902 * @returns (u64A * u32B) / u32C.
2903 * @param u64A The 64-bit value.
2904 * @param u32B The 32-bit value to multiple by A.
2905 * @param u32C The 32-bit value to divide A*B by.
2906 */
2907#if RT_INLINE_ASM_EXTERNAL
2908DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
2909#else
2910DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
2911{
2912# if RT_INLINE_ASM_GNU_STYLE
2913# ifdef __AMD64__
2914 uint64_t u64Result, u64Spill;
2915 __asm__ __volatile__("mulq %2\n\t"
2916 "divq %3\n\t"
2917 : "=a" (u64Result),
2918 "=d" (u64Spill)
2919 : "r" ((uint64_t)u32B),
2920 "r" ((uint64_t)u32C),
2921 "0" (u64A),
2922 "1" (0));
2923 return u64Result;
2924# else
2925 uint32_t u32Dummy;
2926 uint64_t u64Result;
2927 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
2928 edx = u64Lo.hi = (u64A.lo * u32B).hi */
2929 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
2930 eax = u64A.hi */
2931 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
2932 edx = u32C */
2933 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
2934 edx = u32B */
2935 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
2936 edx = u64Hi.hi = (u64A.hi * u32B).hi */
2937 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
2938 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
2939 "divl %%ecx \n\t" /* eax = u64Hi / u32C
2940 edx = u64Hi % u32C */
2941 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
2942 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
2943 "divl %%ecx \n\t" /* u64Result.lo */
2944 "movl %%edi,%%edx \n\t" /* u64Result.hi */
2945 : "=A"(u64Result),
2946 "=S"(u32Dummy), "=D"(u32Dummy)
2947 : "a"((uint32_t)u64A),
2948 "S"((uint32_t)(u64A >> 32)),
2949 "c"(u32B),
2950 "D"(u32C));
2951 return u64Result;
2952# endif
2953# else
2954 RTUINT64U u;
2955 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
2956 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
2957 u64Hi += (u64Lo >> 32);
2958 u.s.Hi = (uint32_t)(u64Hi / u32C);
2959 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
2960 return u.u;
2961# endif
2962}
2963#endif
2964
2965
2966/**
2967 * Probes a byte pointer for read access.
2968 *
2969 * While the function will not fault if the byte is not read accessible,
2970 * the idea is to do this in a safe place like before acquiring locks
2971 * and such like.
2972 *
2973 * Also, this functions guarantees that an eager compiler is not going
2974 * to optimize the probing away.
2975 *
2976 * @param pvByte Pointer to the byte.
2977 */
2978#if RT_INLINE_ASM_EXTERNAL
2979DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2980#else
2981DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2982{
2983 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2984 uint8_t u8;
2985# if RT_INLINE_ASM_GNU_STYLE
2986 __asm__ __volatile__("movb (%1), %0\n\t"
2987 : "=r" (u8)
2988 : "r" (pvByte));
2989# else
2990 __asm
2991 {
2992# ifdef __AMD64__
2993 mov rax, [pvByte]
2994 mov al, [rax]
2995# else
2996 mov eax, [pvByte]
2997 mov al, [eax]
2998# endif
2999 mov [u8], al
3000 }
3001# endif
3002 return u8;
3003}
3004#endif
3005
3006/**
3007 * Probes a buffer for read access page by page.
3008 *
3009 * While the function will fault if the buffer is not fully read
3010 * accessible, the idea is to do this in a safe place like before
3011 * acquiring locks and such like.
3012 *
3013 * Also, this functions guarantees that an eager compiler is not going
3014 * to optimize the probing away.
3015 *
3016 * @param pvBuf Pointer to the buffer.
3017 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3018 */
3019DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3020{
3021 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3022 /* the first byte */
3023 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3024 ASMProbeReadByte(pu8);
3025
3026 /* the pages in between pages. */
3027 while (cbBuf > /*PAGE_SIZE*/0x1000)
3028 {
3029 ASMProbeReadByte(pu8);
3030 cbBuf -= /*PAGE_SIZE*/0x1000;
3031 pu8 += /*PAGE_SIZE*/0x1000;
3032 }
3033
3034 /* the last byte */
3035 ASMProbeReadByte(pu8 + cbBuf - 1);
3036}
3037
3038
3039/** @def ASMBreakpoint
3040 * Debugger Breakpoint.
3041 * @remark In the gnu world we add a nop instruction after the int3 to
3042 * force gdb to remain at the int3 source line.
3043 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3044 * @internal
3045 */
3046#if RT_INLINE_ASM_GNU_STYLE
3047# ifndef __L4ENV__
3048# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3049# else
3050# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3051# endif
3052#else
3053# define ASMBreakpoint() __debugbreak()
3054#endif
3055
3056
3057
3058/** @defgroup grp_inline_bits Bit Operations
3059 * @{
3060 */
3061
3062
3063/**
3064 * Sets a bit in a bitmap.
3065 *
3066 * @param pvBitmap Pointer to the bitmap.
3067 * @param iBit The bit to set.
3068 */
3069#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3070DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3071#else
3072DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3073{
3074# if RT_INLINE_ASM_USES_INTRIN
3075 _bittestandset((long *)pvBitmap, iBit);
3076
3077# elif RT_INLINE_ASM_GNU_STYLE
3078 __asm__ __volatile__ ("btsl %1, %0"
3079 : "=m" (*(volatile long *)pvBitmap)
3080 : "Ir" (iBit)
3081 : "memory");
3082# else
3083 __asm
3084 {
3085# ifdef __AMD64__
3086 mov rax, [pvBitmap]
3087 mov edx, [iBit]
3088 bts [rax], edx
3089# else
3090 mov eax, [pvBitmap]
3091 mov edx, [iBit]
3092 bts [eax], edx
3093# endif
3094 }
3095# endif
3096}
3097#endif
3098
3099
3100/**
3101 * Atomically sets a bit in a bitmap.
3102 *
3103 * @param pvBitmap Pointer to the bitmap.
3104 * @param iBit The bit to set.
3105 */
3106#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3107DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3108#else
3109DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3110{
3111# if RT_INLINE_ASM_USES_INTRIN
3112 _interlockedbittestandset((long *)pvBitmap, iBit);
3113# elif RT_INLINE_ASM_GNU_STYLE
3114 __asm__ __volatile__ ("lock; btsl %1, %0"
3115 : "=m" (*(volatile long *)pvBitmap)
3116 : "Ir" (iBit)
3117 : "memory");
3118# else
3119 __asm
3120 {
3121# ifdef __AMD64__
3122 mov rax, [pvBitmap]
3123 mov edx, [iBit]
3124 lock bts [rax], edx
3125# else
3126 mov eax, [pvBitmap]
3127 mov edx, [iBit]
3128 lock bts [eax], edx
3129# endif
3130 }
3131# endif
3132}
3133#endif
3134
3135
3136/**
3137 * Clears a bit in a bitmap.
3138 *
3139 * @param pvBitmap Pointer to the bitmap.
3140 * @param iBit The bit to clear.
3141 */
3142#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3143DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3144#else
3145DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3146{
3147# if RT_INLINE_ASM_USES_INTRIN
3148 _bittestandreset((long *)pvBitmap, iBit);
3149
3150# elif RT_INLINE_ASM_GNU_STYLE
3151 __asm__ __volatile__ ("btrl %1, %0"
3152 : "=m" (*(volatile long *)pvBitmap)
3153 : "Ir" (iBit)
3154 : "memory");
3155# else
3156 __asm
3157 {
3158# ifdef __AMD64__
3159 mov rax, [pvBitmap]
3160 mov edx, [iBit]
3161 btr [rax], edx
3162# else
3163 mov eax, [pvBitmap]
3164 mov edx, [iBit]
3165 btr [eax], edx
3166# endif
3167 }
3168# endif
3169}
3170#endif
3171
3172
3173/**
3174 * Atomically clears a bit in a bitmap.
3175 *
3176 * @param pvBitmap Pointer to the bitmap.
3177 * @param iBit The bit to toggle set.
3178 * @remark No memory barrier, take care on smp.
3179 */
3180#if RT_INLINE_ASM_EXTERNAL
3181DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3182#else
3183DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3184{
3185# if RT_INLINE_ASM_GNU_STYLE
3186 __asm__ __volatile__ ("lock; btrl %1, %0"
3187 : "=m" (*(volatile long *)pvBitmap)
3188 : "Ir" (iBit)
3189 : "memory");
3190# else
3191 __asm
3192 {
3193# ifdef __AMD64__
3194 mov rax, [pvBitmap]
3195 mov edx, [iBit]
3196 lock btr [rax], edx
3197# else
3198 mov eax, [pvBitmap]
3199 mov edx, [iBit]
3200 lock btr [eax], edx
3201# endif
3202 }
3203# endif
3204}
3205#endif
3206
3207
3208/**
3209 * Toggles a bit in a bitmap.
3210 *
3211 * @param pvBitmap Pointer to the bitmap.
3212 * @param iBit The bit to toggle.
3213 */
3214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3215DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3216#else
3217DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3218{
3219# if RT_INLINE_ASM_USES_INTRIN
3220 _bittestandcomplement((long *)pvBitmap, iBit);
3221# elif RT_INLINE_ASM_GNU_STYLE
3222 __asm__ __volatile__ ("btcl %1, %0"
3223 : "=m" (*(volatile long *)pvBitmap)
3224 : "Ir" (iBit)
3225 : "memory");
3226# else
3227 __asm
3228 {
3229# ifdef __AMD64__
3230 mov rax, [pvBitmap]
3231 mov edx, [iBit]
3232 btc [rax], edx
3233# else
3234 mov eax, [pvBitmap]
3235 mov edx, [iBit]
3236 btc [eax], edx
3237# endif
3238 }
3239# endif
3240}
3241#endif
3242
3243
3244/**
3245 * Atomically toggles a bit in a bitmap.
3246 *
3247 * @param pvBitmap Pointer to the bitmap.
3248 * @param iBit The bit to test and set.
3249 */
3250#if RT_INLINE_ASM_EXTERNAL
3251DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3252#else
3253DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3254{
3255# if RT_INLINE_ASM_GNU_STYLE
3256 __asm__ __volatile__ ("lock; btcl %1, %0"
3257 : "=m" (*(volatile long *)pvBitmap)
3258 : "Ir" (iBit)
3259 : "memory");
3260# else
3261 __asm
3262 {
3263# ifdef __AMD64__
3264 mov rax, [pvBitmap]
3265 mov edx, [iBit]
3266 lock btc [rax], edx
3267# else
3268 mov eax, [pvBitmap]
3269 mov edx, [iBit]
3270 lock btc [eax], edx
3271# endif
3272 }
3273# endif
3274}
3275#endif
3276
3277
3278/**
3279 * Tests and sets a bit in a bitmap.
3280 *
3281 * @returns true if the bit was set.
3282 * @returns false if the bit was clear.
3283 * @param pvBitmap Pointer to the bitmap.
3284 * @param iBit The bit to test and set.
3285 */
3286#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3287DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3288#else
3289DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3290{
3291 union { bool f; uint32_t u32; uint8_t u8; } rc;
3292# if RT_INLINE_ASM_USES_INTRIN
3293 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3294
3295# elif RT_INLINE_ASM_GNU_STYLE
3296 __asm__ __volatile__ ("btsl %2, %1\n\t"
3297 "setc %b0\n\t"
3298 "andl $1, %0\n\t"
3299 : "=q" (rc.u32),
3300 "=m" (*(volatile long *)pvBitmap)
3301 : "Ir" (iBit)
3302 : "memory");
3303# else
3304 __asm
3305 {
3306 mov edx, [iBit]
3307# ifdef __AMD64__
3308 mov rax, [pvBitmap]
3309 bts [rax], edx
3310# else
3311 mov eax, [pvBitmap]
3312 bts [eax], edx
3313# endif
3314 setc al
3315 and eax, 1
3316 mov [rc.u32], eax
3317 }
3318# endif
3319 return rc.f;
3320}
3321#endif
3322
3323
3324/**
3325 * Atomically tests and sets a bit in a bitmap.
3326 *
3327 * @returns true if the bit was set.
3328 * @returns false if the bit was clear.
3329 * @param pvBitmap Pointer to the bitmap.
3330 * @param iBit The bit to set.
3331 */
3332#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3333DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3334#else
3335DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3336{
3337 union { bool f; uint32_t u32; uint8_t u8; } rc;
3338# if RT_INLINE_ASM_USES_INTRIN
3339 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3340# elif RT_INLINE_ASM_GNU_STYLE
3341 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3342 "setc %b0\n\t"
3343 "andl $1, %0\n\t"
3344 : "=q" (rc.u32),
3345 "=m" (*(volatile long *)pvBitmap)
3346 : "Ir" (iBit)
3347 : "memory");
3348# else
3349 __asm
3350 {
3351 mov edx, [iBit]
3352# ifdef __AMD64__
3353 mov rax, [pvBitmap]
3354 lock bts [rax], edx
3355# else
3356 mov eax, [pvBitmap]
3357 lock bts [eax], edx
3358# endif
3359 setc al
3360 and eax, 1
3361 mov [rc.u32], eax
3362 }
3363# endif
3364 return rc.f;
3365}
3366#endif
3367
3368
3369/**
3370 * Tests and clears a bit in a bitmap.
3371 *
3372 * @returns true if the bit was set.
3373 * @returns false if the bit was clear.
3374 * @param pvBitmap Pointer to the bitmap.
3375 * @param iBit The bit to test and clear.
3376 */
3377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3378DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3379#else
3380DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3381{
3382 union { bool f; uint32_t u32; uint8_t u8; } rc;
3383# if RT_INLINE_ASM_USES_INTRIN
3384 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3385
3386# elif RT_INLINE_ASM_GNU_STYLE
3387 __asm__ __volatile__ ("btrl %2, %1\n\t"
3388 "setc %b0\n\t"
3389 "andl $1, %0\n\t"
3390 : "=q" (rc.u32),
3391 "=m" (*(volatile long *)pvBitmap)
3392 : "Ir" (iBit)
3393 : "memory");
3394# else
3395 __asm
3396 {
3397 mov edx, [iBit]
3398# ifdef __AMD64__
3399 mov rax, [pvBitmap]
3400 btr [rax], edx
3401# else
3402 mov eax, [pvBitmap]
3403 btr [eax], edx
3404# endif
3405 setc al
3406 and eax, 1
3407 mov [rc.u32], eax
3408 }
3409# endif
3410 return rc.f;
3411}
3412#endif
3413
3414
3415/**
3416 * Atomically tests and clears a bit in a bitmap.
3417 *
3418 * @returns true if the bit was set.
3419 * @returns false if the bit was clear.
3420 * @param pvBitmap Pointer to the bitmap.
3421 * @param iBit The bit to test and clear.
3422 * @remark No memory barrier, take care on smp.
3423 */
3424#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3425DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3426#else
3427DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3428{
3429 union { bool f; uint32_t u32; uint8_t u8; } rc;
3430# if RT_INLINE_ASM_USES_INTRIN
3431 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3432
3433# elif RT_INLINE_ASM_GNU_STYLE
3434 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3435 "setc %b0\n\t"
3436 "andl $1, %0\n\t"
3437 : "=q" (rc.u32),
3438 "=m" (*(volatile long *)pvBitmap)
3439 : "Ir" (iBit)
3440 : "memory");
3441# else
3442 __asm
3443 {
3444 mov edx, [iBit]
3445# ifdef __AMD64__
3446 mov rax, [pvBitmap]
3447 lock btr [rax], edx
3448# else
3449 mov eax, [pvBitmap]
3450 lock btr [eax], edx
3451# endif
3452 setc al
3453 and eax, 1
3454 mov [rc.u32], eax
3455 }
3456# endif
3457 return rc.f;
3458}
3459#endif
3460
3461
3462/**
3463 * Tests and toggles a bit in a bitmap.
3464 *
3465 * @returns true if the bit was set.
3466 * @returns false if the bit was clear.
3467 * @param pvBitmap Pointer to the bitmap.
3468 * @param iBit The bit to test and toggle.
3469 */
3470#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3471DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3472#else
3473DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3474{
3475 union { bool f; uint32_t u32; uint8_t u8; } rc;
3476# if RT_INLINE_ASM_USES_INTRIN
3477 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3478
3479# elif RT_INLINE_ASM_GNU_STYLE
3480 __asm__ __volatile__ ("btcl %2, %1\n\t"
3481 "setc %b0\n\t"
3482 "andl $1, %0\n\t"
3483 : "=q" (rc.u32),
3484 "=m" (*(volatile long *)pvBitmap)
3485 : "Ir" (iBit)
3486 : "memory");
3487# else
3488 __asm
3489 {
3490 mov edx, [iBit]
3491# ifdef __AMD64__
3492 mov rax, [pvBitmap]
3493 btc [rax], edx
3494# else
3495 mov eax, [pvBitmap]
3496 btc [eax], edx
3497# endif
3498 setc al
3499 and eax, 1
3500 mov [rc.u32], eax
3501 }
3502# endif
3503 return rc.f;
3504}
3505#endif
3506
3507
3508/**
3509 * Atomically tests and toggles a bit in a bitmap.
3510 *
3511 * @returns true if the bit was set.
3512 * @returns false if the bit was clear.
3513 * @param pvBitmap Pointer to the bitmap.
3514 * @param iBit The bit to test and toggle.
3515 */
3516#if RT_INLINE_ASM_EXTERNAL
3517DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3518#else
3519DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3520{
3521 union { bool f; uint32_t u32; uint8_t u8; } rc;
3522# if RT_INLINE_ASM_GNU_STYLE
3523 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3524 "setc %b0\n\t"
3525 "andl $1, %0\n\t"
3526 : "=q" (rc.u32),
3527 "=m" (*(volatile long *)pvBitmap)
3528 : "Ir" (iBit)
3529 : "memory");
3530# else
3531 __asm
3532 {
3533 mov edx, [iBit]
3534# ifdef __AMD64__
3535 mov rax, [pvBitmap]
3536 lock btc [rax], edx
3537# else
3538 mov eax, [pvBitmap]
3539 lock btc [eax], edx
3540# endif
3541 setc al
3542 and eax, 1
3543 mov [rc.u32], eax
3544 }
3545# endif
3546 return rc.f;
3547}
3548#endif
3549
3550
3551/**
3552 * Tests if a bit in a bitmap is set.
3553 *
3554 * @returns true if the bit is set.
3555 * @returns false if the bit is clear.
3556 * @param pvBitmap Pointer to the bitmap.
3557 * @param iBit The bit to test.
3558 */
3559#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3560DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3561#else
3562DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3563{
3564 union { bool f; uint32_t u32; uint8_t u8; } rc;
3565# if RT_INLINE_ASM_USES_INTRIN
3566 rc.u32 = _bittest((long *)pvBitmap, iBit);
3567# elif RT_INLINE_ASM_GNU_STYLE
3568
3569 __asm__ __volatile__ ("btl %2, %1\n\t"
3570 "setc %b0\n\t"
3571 "andl $1, %0\n\t"
3572 : "=q" (rc.u32),
3573 "=m" (*(volatile long *)pvBitmap)
3574 : "Ir" (iBit)
3575 : "memory");
3576# else
3577 __asm
3578 {
3579 mov edx, [iBit]
3580# ifdef __AMD64__
3581 mov rax, [pvBitmap]
3582 bt [rax], edx
3583# else
3584 mov eax, [pvBitmap]
3585 bt [eax], edx
3586# endif
3587 setc al
3588 and eax, 1
3589 mov [rc.u32], eax
3590 }
3591# endif
3592 return rc.f;
3593}
3594#endif
3595
3596
3597/**
3598 * Clears a bit range within a bitmap.
3599 *
3600 * @param pvBitmap Pointer to the bitmap.
3601 * @param iBitStart The First bit to clear.
3602 * @param iBitEnd The first bit not to clear.
3603 */
3604DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3605{
3606 if (iBitStart < iBitEnd)
3607 {
3608 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3609 int iStart = iBitStart & ~31;
3610 int iEnd = iBitEnd & ~31;
3611 if (iStart == iEnd)
3612 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3613 else
3614 {
3615 /* bits in first dword. */
3616 if (iBitStart & 31)
3617 {
3618 *pu32 &= (1 << (iBitStart & 31)) - 1;
3619 pu32++;
3620 iBitStart = iStart + 32;
3621 }
3622
3623 /* whole dword. */
3624 if (iBitStart != iEnd)
3625 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3626
3627 /* bits in last dword. */
3628 if (iBitEnd & 31)
3629 {
3630 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3631 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3632 }
3633 }
3634 }
3635}
3636
3637
3638/**
3639 * Finds the first clear bit in a bitmap.
3640 *
3641 * @returns Index of the first zero bit.
3642 * @returns -1 if no clear bit was found.
3643 * @param pvBitmap Pointer to the bitmap.
3644 * @param cBits The number of bits in the bitmap. Multiple of 32.
3645 */
3646#if RT_INLINE_ASM_EXTERNAL
3647DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3648#else
3649DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3650{
3651 if (cBits)
3652 {
3653 int32_t iBit;
3654# if RT_INLINE_ASM_GNU_STYLE
3655 RTCCUINTREG uEAX, uECX, uEDI;
3656 cBits = RT_ALIGN_32(cBits, 32);
3657 __asm__ __volatile__("repe; scasl\n\t"
3658 "je 1f\n\t"
3659# ifdef __AMD64__
3660 "lea -4(%%rdi), %%rdi\n\t"
3661 "xorl (%%rdi), %%eax\n\t"
3662 "subq %5, %%rdi\n\t"
3663# else
3664 "lea -4(%%edi), %%edi\n\t"
3665 "xorl (%%edi), %%eax\n\t"
3666 "subl %5, %%edi\n\t"
3667# endif
3668 "shll $3, %%edi\n\t"
3669 "bsfl %%eax, %%edx\n\t"
3670 "addl %%edi, %%edx\n\t"
3671 "1:\t\n"
3672 : "=d" (iBit),
3673 "=&c" (uECX),
3674 "=&D" (uEDI),
3675 "=&a" (uEAX)
3676 : "0" (0xffffffff),
3677 "mr" (pvBitmap),
3678 "1" (cBits >> 5),
3679 "2" (pvBitmap),
3680 "3" (0xffffffff));
3681# else
3682 cBits = RT_ALIGN_32(cBits, 32);
3683 __asm
3684 {
3685# ifdef __AMD64__
3686 mov rdi, [pvBitmap]
3687 mov rbx, rdi
3688# else
3689 mov edi, [pvBitmap]
3690 mov ebx, edi
3691# endif
3692 mov edx, 0ffffffffh
3693 mov eax, edx
3694 mov ecx, [cBits]
3695 shr ecx, 5
3696 repe scasd
3697 je done
3698
3699# ifdef __AMD64__
3700 lea rdi, [rdi - 4]
3701 xor eax, [rdi]
3702 sub rdi, rbx
3703# else
3704 lea edi, [edi - 4]
3705 xor eax, [edi]
3706 sub edi, ebx
3707# endif
3708 shl edi, 3
3709 bsf edx, eax
3710 add edx, edi
3711 done:
3712 mov [iBit], edx
3713 }
3714# endif
3715 return iBit;
3716 }
3717 return -1;
3718}
3719#endif
3720
3721
3722/**
3723 * Finds the next clear bit in a bitmap.
3724 *
3725 * @returns Index of the first zero bit.
3726 * @returns -1 if no clear bit was found.
3727 * @param pvBitmap Pointer to the bitmap.
3728 * @param cBits The number of bits in the bitmap. Multiple of 32.
3729 * @param iBitPrev The bit returned from the last search.
3730 * The search will start at iBitPrev + 1.
3731 */
3732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3733DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3734#else
3735DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3736{
3737 int iBit = ++iBitPrev & 31;
3738 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3739 cBits -= iBitPrev & ~31;
3740 if (iBit)
3741 {
3742 /* inspect the first dword. */
3743 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3744# if RT_INLINE_ASM_USES_INTRIN
3745 unsigned long ulBit = 0;
3746 if (_BitScanForward(&ulBit, u32))
3747 return ulBit + iBitPrev;
3748 iBit = -1;
3749# else
3750# if RT_INLINE_ASM_GNU_STYLE
3751 __asm__ __volatile__("bsf %1, %0\n\t"
3752 "jnz 1f\n\t"
3753 "movl $-1, %0\n\t"
3754 "1:\n\t"
3755 : "=r" (iBit)
3756 : "r" (u32));
3757# else
3758 __asm
3759 {
3760 mov edx, [u32]
3761 bsf eax, edx
3762 jnz done
3763 mov eax, 0ffffffffh
3764 done:
3765 mov [iBit], eax
3766 }
3767# endif
3768 if (iBit >= 0)
3769 return iBit + iBitPrev;
3770# endif
3771 /* Search the rest of the bitmap, if there is anything. */
3772 if (cBits > 32)
3773 {
3774 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3775 if (iBit >= 0)
3776 return iBit + (iBitPrev & ~31) + 32;
3777 }
3778 }
3779 else
3780 {
3781 /* Search the rest of the bitmap. */
3782 iBit = ASMBitFirstClear(pvBitmap, cBits);
3783 if (iBit >= 0)
3784 return iBit + (iBitPrev & ~31);
3785 }
3786 return iBit;
3787}
3788#endif
3789
3790
3791/**
3792 * Finds the first set bit in a bitmap.
3793 *
3794 * @returns Index of the first set bit.
3795 * @returns -1 if no clear bit was found.
3796 * @param pvBitmap Pointer to the bitmap.
3797 * @param cBits The number of bits in the bitmap. Multiple of 32.
3798 */
3799#if RT_INLINE_ASM_EXTERNAL
3800DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3801#else
3802DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3803{
3804 if (cBits)
3805 {
3806 int32_t iBit;
3807# if RT_INLINE_ASM_GNU_STYLE
3808 RTCCUINTREG uEAX, uECX, uEDI;
3809 cBits = RT_ALIGN_32(cBits, 32);
3810 __asm__ __volatile__("repe; scasl\n\t"
3811 "je 1f\n\t"
3812# ifdef __AMD64__
3813 "lea -4(%%rdi), %%rdi\n\t"
3814 "movl (%%rdi), %%eax\n\t"
3815 "subq %5, %%rdi\n\t"
3816# else
3817 "lea -4(%%edi), %%edi\n\t"
3818 "movl (%%edi), %%eax\n\t"
3819 "subl %5, %%edi\n\t"
3820# endif
3821 "shll $3, %%edi\n\t"
3822 "bsfl %%eax, %%edx\n\t"
3823 "addl %%edi, %%edx\n\t"
3824 "1:\t\n"
3825 : "=d" (iBit),
3826 "=&c" (uECX),
3827 "=&D" (uEDI),
3828 "=&a" (uEAX)
3829 : "0" (0xffffffff),
3830 "mr" (pvBitmap),
3831 "1" (cBits >> 5),
3832 "2" (pvBitmap),
3833 "3" (0));
3834# else
3835 cBits = RT_ALIGN_32(cBits, 32);
3836 __asm
3837 {
3838# ifdef __AMD64__
3839 mov rdi, [pvBitmap]
3840 mov rbx, rdi
3841# else
3842 mov edi, [pvBitmap]
3843 mov ebx, edi
3844# endif
3845 mov edx, 0ffffffffh
3846 xor eax, eax
3847 mov ecx, [cBits]
3848 shr ecx, 5
3849 repe scasd
3850 je done
3851# ifdef __AMD64__
3852 lea rdi, [rdi - 4]
3853 mov eax, [rdi]
3854 sub rdi, rbx
3855# else
3856 lea edi, [edi - 4]
3857 mov eax, [edi]
3858 sub edi, ebx
3859# endif
3860 shl edi, 3
3861 bsf edx, eax
3862 add edx, edi
3863 done:
3864 mov [iBit], edx
3865 }
3866# endif
3867 return iBit;
3868 }
3869 return -1;
3870}
3871#endif
3872
3873
3874/**
3875 * Finds the next set bit in a bitmap.
3876 *
3877 * @returns Index of the next set bit.
3878 * @returns -1 if no set bit was found.
3879 * @param pvBitmap Pointer to the bitmap.
3880 * @param cBits The number of bits in the bitmap. Multiple of 32.
3881 * @param iBitPrev The bit returned from the last search.
3882 * The search will start at iBitPrev + 1.
3883 */
3884#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3885DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3886#else
3887DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3888{
3889 int iBit = ++iBitPrev & 31;
3890 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3891 cBits -= iBitPrev & ~31;
3892 if (iBit)
3893 {
3894 /* inspect the first dword. */
3895 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3896# if RT_INLINE_ASM_USES_INTRIN
3897 unsigned long ulBit = 0;
3898 if (_BitScanForward(&ulBit, u32))
3899 return ulBit + iBitPrev;
3900 iBit = -1;
3901# else
3902# if RT_INLINE_ASM_GNU_STYLE
3903 __asm__ __volatile__("bsf %1, %0\n\t"
3904 "jnz 1f\n\t"
3905 "movl $-1, %0\n\t"
3906 "1:\n\t"
3907 : "=r" (iBit)
3908 : "r" (u32));
3909# else
3910 __asm
3911 {
3912 mov edx, u32
3913 bsf eax, edx
3914 jnz done
3915 mov eax, 0ffffffffh
3916 done:
3917 mov [iBit], eax
3918 }
3919# endif
3920 if (iBit >= 0)
3921 return iBit + iBitPrev;
3922# endif
3923 /* Search the rest of the bitmap, if there is anything. */
3924 if (cBits > 32)
3925 {
3926 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3927 if (iBit >= 0)
3928 return iBit + (iBitPrev & ~31) + 32;
3929 }
3930
3931 }
3932 else
3933 {
3934 /* Search the rest of the bitmap. */
3935 iBit = ASMBitFirstSet(pvBitmap, cBits);
3936 if (iBit >= 0)
3937 return iBit + (iBitPrev & ~31);
3938 }
3939 return iBit;
3940}
3941#endif
3942
3943
3944/**
3945 * Finds the first bit which is set in the given 32-bit integer.
3946 * Bits are numbered from 1 (least significant) to 32.
3947 *
3948 * @returns index [1..32] of the first set bit.
3949 * @returns 0 if all bits are cleared.
3950 * @param u32 Integer to search for set bits.
3951 * @remark Similar to ffs() in BSD.
3952 */
3953DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3954{
3955# if RT_INLINE_ASM_USES_INTRIN
3956 unsigned long iBit;
3957 if (_BitScanForward(&iBit, u32))
3958 iBit++;
3959 else
3960 iBit = 0;
3961# elif RT_INLINE_ASM_GNU_STYLE
3962 uint32_t iBit;
3963 __asm__ __volatile__("bsf %1, %0\n\t"
3964 "jnz 1f\n\t"
3965 "xorl %0, %0\n\t"
3966 "jmp 2f\n"
3967 "1:\n\t"
3968 "incl %0\n"
3969 "2:\n\t"
3970 : "=r" (iBit)
3971 : "rm" (u32));
3972# else
3973 uint32_t iBit;
3974 _asm
3975 {
3976 bsf eax, [u32]
3977 jnz found
3978 xor eax, eax
3979 jmp done
3980 found:
3981 inc eax
3982 done:
3983 mov [iBit], eax
3984 }
3985# endif
3986 return iBit;
3987}
3988
3989
3990/**
3991 * Finds the first bit which is set in the given 32-bit integer.
3992 * Bits are numbered from 1 (least significant) to 32.
3993 *
3994 * @returns index [1..32] of the first set bit.
3995 * @returns 0 if all bits are cleared.
3996 * @param i32 Integer to search for set bits.
3997 * @remark Similar to ffs() in BSD.
3998 */
3999DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4000{
4001 return ASMBitFirstSetU32((uint32_t)i32);
4002}
4003
4004
4005/**
4006 * Finds the last bit which is set in the given 32-bit integer.
4007 * Bits are numbered from 1 (least significant) to 32.
4008 *
4009 * @returns index [1..32] of the last set bit.
4010 * @returns 0 if all bits are cleared.
4011 * @param u32 Integer to search for set bits.
4012 * @remark Similar to fls() in BSD.
4013 */
4014DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4015{
4016# if RT_INLINE_ASM_USES_INTRIN
4017 unsigned long iBit;
4018 if (_BitScanReverse(&iBit, u32))
4019 iBit++;
4020 else
4021 iBit = 0;
4022# elif RT_INLINE_ASM_GNU_STYLE
4023 uint32_t iBit;
4024 __asm__ __volatile__("bsrl %1, %0\n\t"
4025 "jnz 1f\n\t"
4026 "xorl %0, %0\n\t"
4027 "jmp 2f\n"
4028 "1:\n\t"
4029 "incl %0\n"
4030 "2:\n\t"
4031 : "=r" (iBit)
4032 : "rm" (u32));
4033# else
4034 uint32_t iBit;
4035 _asm
4036 {
4037 bsr eax, [u32]
4038 jnz found
4039 xor eax, eax
4040 jmp done
4041 found:
4042 inc eax
4043 done:
4044 mov [iBit], eax
4045 }
4046# endif
4047 return iBit;
4048}
4049
4050
4051/**
4052 * Finds the last bit which is set in the given 32-bit integer.
4053 * Bits are numbered from 1 (least significant) to 32.
4054 *
4055 * @returns index [1..32] of the last set bit.
4056 * @returns 0 if all bits are cleared.
4057 * @param i32 Integer to search for set bits.
4058 * @remark Similar to fls() in BSD.
4059 */
4060DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4061{
4062 return ASMBitLastSetS32((uint32_t)i32);
4063}
4064
4065
4066/**
4067 * Reverse the byte order of the given 32-bit integer.
4068 * @param u32 Integer
4069 */
4070DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4071{
4072#if RT_INLINE_ASM_USES_INTRIN
4073 u32 = _byteswap_ulong(u32);
4074#elif RT_INLINE_ASM_GNU_STYLE
4075 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4076#else
4077 _asm
4078 {
4079 mov eax, [u32]
4080 bswap eax
4081 mov [u32], eax
4082 }
4083#endif
4084 return u32;
4085}
4086
4087/** @} */
4088
4089
4090/** @} */
4091#endif
4092
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette