VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 2211

Last change on this file since 2211 was 2144, checked in by vboxsync, 18 years ago

added ASMMemoryClobber()

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 98.6 KB
Line 
1/** @file
2 * InnoTek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef __iprt_asm_h__
22#define __iprt_asm_h__
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26/** @todo #include <iprt/param.h> for PAGE_SIZE. */
27/** @def RT_INLINE_ASM_USES_INTRIN
28 * Defined as 1 if we're using a _MSC_VER 1400.
29 * Otherwise defined as 0.
30 */
31
32#ifdef _MSC_VER
33# if _MSC_VER >= 1400
34# define RT_INLINE_ASM_USES_INTRIN 1
35# include <intrin.h>
36 /* Emit the intrinsics at all optimization levels. */
37# pragma intrinsic(__cpuid)
38# pragma intrinsic(_enable)
39# pragma intrinsic(_disable)
40# pragma intrinsic(__rdtsc)
41# pragma intrinsic(__readmsr)
42# pragma intrinsic(__writemsr)
43# pragma intrinsic(__outbyte)
44# pragma intrinsic(__outword)
45# pragma intrinsic(__outdword)
46# pragma intrinsic(__inbyte)
47# pragma intrinsic(__inword)
48# pragma intrinsic(__indword)
49# pragma intrinsic(__invlpg)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(__readcr0)
54# pragma intrinsic(__readcr2)
55# pragma intrinsic(__readcr3)
56# pragma intrinsic(__readcr4)
57# pragma intrinsic(__writecr0)
58# pragma intrinsic(__writecr3)
59# pragma intrinsic(__writecr4)
60# pragma intrinsic(_BitScanForward)
61# pragma intrinsic(_BitScanReverse)
62# pragma intrinsic(_bittest)
63# pragma intrinsic(_bittestandset)
64# pragma intrinsic(_bittestandreset)
65# pragma intrinsic(_bittestandcomplement)
66# pragma intrinsic(_byteswap_ushort)
67# pragma intrinsic(_byteswap_ulong)
68# pragma intrinsic(_interlockedbittestandset)
69# pragma intrinsic(_interlockedbittestandreset)
70# pragma intrinsic(_InterlockedAnd)
71# pragma intrinsic(_InterlockedOr)
72# pragma intrinsic(_InterlockedIncrement)
73# pragma intrinsic(_InterlockedDecrement)
74# pragma intrinsic(_InterlockedExchange)
75# pragma intrinsic(_InterlockedCompareExchange)
76# pragma intrinsic(_InterlockedCompareExchange64)
77# ifdef __AMD64__
78# pragma intrinsic(__stosq)
79# pragma intrinsic(__readcr8)
80# pragma intrinsic(__writecr8)
81# pragma intrinsic(_byteswap_uint64)
82# pragma intrinsic(_InterlockedExchange64)
83# endif
84# endif
85#endif
86#ifndef RT_INLINE_ASM_USES_INTRIN
87# define RT_INLINE_ASM_USES_INTRIN 0
88#endif
89
90
91
92/** @defgroup grp_asm ASM - Assembly Routines
93 * @ingroup grp_rt
94 * @{
95 */
96
97/** @def RT_INLINE_ASM_EXTERNAL
98 * Defined as 1 if the compiler does not support inline assembly.
99 * The ASM* functions will then be implemented in an external .asm file.
100 *
101 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
102 * inline assmebly in their AMD64 compiler.
103 */
104#if defined(_MSC_VER) && defined(__AMD64__)
105# define RT_INLINE_ASM_EXTERNAL 1
106#else
107# define RT_INLINE_ASM_EXTERNAL 0
108#endif
109
110/** @def RT_INLINE_ASM_GNU_STYLE
111 * Defined as 1 if the compiler understand GNU style inline assembly.
112 */
113#if defined(_MSC_VER)
114# define RT_INLINE_ASM_GNU_STYLE 0
115#else
116# define RT_INLINE_ASM_GNU_STYLE 1
117#endif
118
119
120/** @todo find a more proper place for this structure? */
121#pragma pack(1)
122/** IDTR */
123typedef struct RTIDTR
124{
125 /** Size of the IDT. */
126 uint16_t cbIdt;
127 /** Address of the IDT. */
128 uintptr_t pIdt;
129} RTIDTR, *PRTIDTR;
130#pragma pack()
131
132#pragma pack(1)
133/** GDTR */
134typedef struct RTGDTR
135{
136 /** Size of the GDT. */
137 uint16_t cbGdt;
138 /** Address of the GDT. */
139 uintptr_t pGdt;
140} RTGDTR, *PRTGDTR;
141#pragma pack()
142
143
144/** @def ASMReturnAddress
145 * Gets the return address of the current (or calling if you like) function or method.
146 */
147#ifdef _MSC_VER
148# ifdef __cplusplus
149extern "C"
150# endif
151void * _ReturnAddress(void);
152# pragma intrinsic(_ReturnAddress)
153# define ASMReturnAddress() _ReturnAddress()
154#elif defined(__GNUC__) || defined(__DOXYGEN__)
155# define ASMReturnAddress() __builtin_return_address(0)
156#else
157# error "Unsupported compiler."
158#endif
159
160
161/**
162 * Gets the content of the IDTR CPU register.
163 * @param pIdtr Where to store the IDTR contents.
164 */
165#if RT_INLINE_ASM_EXTERNAL
166DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
167#else
168DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
169{
170# if RT_INLINE_ASM_GNU_STYLE
171 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
172# else
173 __asm
174 {
175# ifdef __AMD64__
176 mov rax, [pIdtr]
177 sidt [rax]
178# else
179 mov eax, [pIdtr]
180 sidt [eax]
181# endif
182 }
183# endif
184}
185#endif
186
187
188/**
189 * Sets the content of the IDTR CPU register.
190 * @param pIdtr Where to load the IDTR contents from
191 */
192#if RT_INLINE_ASM_EXTERNAL
193DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
194#else
195DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
196{
197# if RT_INLINE_ASM_GNU_STYLE
198 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
199# else
200 __asm
201 {
202# ifdef __AMD64__
203 mov rax, [pIdtr]
204 lidt [rax]
205# else
206 mov eax, [pIdtr]
207 lidt [eax]
208# endif
209 }
210# endif
211}
212#endif
213
214
215/**
216 * Gets the content of the GDTR CPU register.
217 * @param pGdtr Where to store the GDTR contents.
218 */
219#if RT_INLINE_ASM_EXTERNAL
220DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
221#else
222DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
223{
224# if RT_INLINE_ASM_GNU_STYLE
225 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
226# else
227 __asm
228 {
229# ifdef __AMD64__
230 mov rax, [pGdtr]
231 sgdt [rax]
232# else
233 mov eax, [pGdtr]
234 sgdt [eax]
235# endif
236 }
237# endif
238}
239#endif
240
241/**
242 * Get the cs register.
243 * @returns cs.
244 */
245#if RT_INLINE_ASM_EXTERNAL
246DECLASM(RTSEL) ASMGetCS(void);
247#else
248DECLINLINE(RTSEL) ASMGetCS(void)
249{
250 RTSEL SelCS;
251# if RT_INLINE_ASM_GNU_STYLE
252 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
253# else
254 __asm
255 {
256 mov ax, cs
257 mov [SelCS], ax
258 }
259# endif
260 return SelCS;
261}
262#endif
263
264
265/**
266 * Get the DS register.
267 * @returns DS.
268 */
269#if RT_INLINE_ASM_EXTERNAL
270DECLASM(RTSEL) ASMGetDS(void);
271#else
272DECLINLINE(RTSEL) ASMGetDS(void)
273{
274 RTSEL SelDS;
275# if RT_INLINE_ASM_GNU_STYLE
276 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
277# else
278 __asm
279 {
280 mov ax, ds
281 mov [SelDS], ax
282 }
283# endif
284 return SelDS;
285}
286#endif
287
288
289/**
290 * Get the ES register.
291 * @returns ES.
292 */
293#if RT_INLINE_ASM_EXTERNAL
294DECLASM(RTSEL) ASMGetES(void);
295#else
296DECLINLINE(RTSEL) ASMGetES(void)
297{
298 RTSEL SelES;
299# if RT_INLINE_ASM_GNU_STYLE
300 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
301# else
302 __asm
303 {
304 mov ax, es
305 mov [SelES], ax
306 }
307# endif
308 return SelES;
309}
310#endif
311
312
313/**
314 * Get the FS register.
315 * @returns FS.
316 */
317#if RT_INLINE_ASM_EXTERNAL
318DECLASM(RTSEL) ASMGetFS(void);
319#else
320DECLINLINE(RTSEL) ASMGetFS(void)
321{
322 RTSEL SelFS;
323# if RT_INLINE_ASM_GNU_STYLE
324 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
325# else
326 __asm
327 {
328 mov ax, fs
329 mov [SelFS], ax
330 }
331# endif
332 return SelFS;
333}
334# endif
335
336
337/**
338 * Get the GS register.
339 * @returns GS.
340 */
341#if RT_INLINE_ASM_EXTERNAL
342DECLASM(RTSEL) ASMGetGS(void);
343#else
344DECLINLINE(RTSEL) ASMGetGS(void)
345{
346 RTSEL SelGS;
347# if RT_INLINE_ASM_GNU_STYLE
348 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
349# else
350 __asm
351 {
352 mov ax, gs
353 mov [SelGS], ax
354 }
355# endif
356 return SelGS;
357}
358#endif
359
360
361/**
362 * Get the SS register.
363 * @returns SS.
364 */
365#if RT_INLINE_ASM_EXTERNAL
366DECLASM(RTSEL) ASMGetSS(void);
367#else
368DECLINLINE(RTSEL) ASMGetSS(void)
369{
370 RTSEL SelSS;
371# if RT_INLINE_ASM_GNU_STYLE
372 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
373# else
374 __asm
375 {
376 mov ax, ss
377 mov [SelSS], ax
378 }
379# endif
380 return SelSS;
381}
382#endif
383
384
385/**
386 * Get the TR register.
387 * @returns TR.
388 */
389#if RT_INLINE_ASM_EXTERNAL
390DECLASM(RTSEL) ASMGetTR(void);
391#else
392DECLINLINE(RTSEL) ASMGetTR(void)
393{
394 RTSEL SelTR;
395# if RT_INLINE_ASM_GNU_STYLE
396 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
397# else
398 __asm
399 {
400 str ax
401 mov [SelTR], ax
402 }
403# endif
404 return SelTR;
405}
406#endif
407
408
409/**
410 * Get the [RE]FLAGS register.
411 * @returns [RE]FLAGS.
412 */
413#if RT_INLINE_ASM_EXTERNAL
414DECLASM(RTCCUINTREG) ASMGetFlags(void);
415#else
416DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
417{
418 RTCCUINTREG uFlags;
419# if RT_INLINE_ASM_GNU_STYLE
420# ifdef __AMD64__
421 __asm__ __volatile__("pushfq\n\t"
422 "popq %0\n\t"
423 : "=m" (uFlags));
424# else
425 __asm__ __volatile__("pushfl\n\t"
426 "popl %0\n\t"
427 : "=m" (uFlags));
428# endif
429# else
430 __asm
431 {
432# ifdef __AMD64__
433 pushfq
434 pop [uFlags]
435# else
436 pushfd
437 pop [uFlags]
438# endif
439 }
440# endif
441 return uFlags;
442}
443#endif
444
445
446/**
447 * Set the [RE]FLAGS register.
448 * @param uFlags The new [RE]FLAGS value.
449 */
450#if RT_INLINE_ASM_EXTERNAL
451DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
452#else
453DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
454{
455# if RT_INLINE_ASM_GNU_STYLE
456# ifdef __AMD64__
457 __asm__ __volatile__("pushq %0\n\t"
458 "popfq\n\t"
459 : : "m" (uFlags));
460# else
461 __asm__ __volatile__("pushl %0\n\t"
462 "popfl\n\t"
463 : : "m" (uFlags));
464# endif
465# else
466 __asm
467 {
468# ifdef __AMD64__
469 push [uFlags]
470 popfq
471# else
472 push [uFlags]
473 popfd
474# endif
475 }
476# endif
477}
478#endif
479
480
481/**
482 * Gets the content of the CPU timestamp counter register.
483 *
484 * @returns TSC.
485 */
486#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
487DECLASM(uint64_t) ASMReadTSC(void);
488#else
489DECLINLINE(uint64_t) ASMReadTSC(void)
490{
491 RTUINT64U u;
492# if RT_INLINE_ASM_GNU_STYLE
493 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
494# else
495# if RT_INLINE_ASM_USES_INTRIN
496 u.u = __rdtsc();
497# else
498 __asm
499 {
500 rdtsc
501 mov [u.s.Lo], eax
502 mov [u.s.Hi], edx
503 }
504# endif
505# endif
506 return u.u;
507}
508#endif
509
510
511/**
512 * Performs the cpuid instruction returning all registers.
513 *
514 * @param uOperator CPUID operation (eax).
515 * @param pvEAX Where to store eax.
516 * @param pvEBX Where to store ebx.
517 * @param pvECX Where to store ecx.
518 * @param pvEDX Where to store edx.
519 * @remark We're using void pointers to ease the use of special bitfield structures and such.
520 */
521#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
522DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
523#else
524DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
525{
526# if RT_INLINE_ASM_GNU_STYLE
527# ifdef __AMD64__
528 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
529 __asm__ ("cpuid\n\t"
530 : "=a" (uRAX),
531 "=b" (uRBX),
532 "=c" (uRCX),
533 "=d" (uRDX)
534 : "0" (uOperator));
535 *(uint32_t *)pvEAX = (uint32_t)uRAX;
536 *(uint32_t *)pvEBX = (uint32_t)uRBX;
537 *(uint32_t *)pvECX = (uint32_t)uRCX;
538 *(uint32_t *)pvEDX = (uint32_t)uRDX;
539# else
540 __asm__ ("xchgl %%ebx, %1\n\t"
541 "cpuid\n\t"
542 "xchgl %%ebx, %1\n\t"
543 : "=a" (*(uint32_t *)pvEAX),
544 "=r" (*(uint32_t *)pvEBX),
545 "=c" (*(uint32_t *)pvECX),
546 "=d" (*(uint32_t *)pvEDX)
547 : "0" (uOperator));
548# endif
549
550# elif RT_INLINE_ASM_USES_INTRIN
551 int aInfo[4];
552 __cpuid(aInfo, uOperator);
553 *(uint32_t *)pvEAX = aInfo[0];
554 *(uint32_t *)pvEBX = aInfo[1];
555 *(uint32_t *)pvECX = aInfo[2];
556 *(uint32_t *)pvEDX = aInfo[3];
557
558# else
559 uint32_t uEAX;
560 uint32_t uEBX;
561 uint32_t uECX;
562 uint32_t uEDX;
563 __asm
564 {
565 push ebx
566 mov eax, [uOperator]
567 cpuid
568 mov [uEAX], eax
569 mov [uEBX], ebx
570 mov [uECX], ecx
571 mov [uEDX], edx
572 pop ebx
573 }
574 *(uint32_t *)pvEAX = uEAX;
575 *(uint32_t *)pvEBX = uEBX;
576 *(uint32_t *)pvECX = uECX;
577 *(uint32_t *)pvEDX = uEDX;
578# endif
579}
580#endif
581
582
583/**
584 * Performs the cpuid instruction returning ecx and edx.
585 *
586 * @param uOperator CPUID operation (eax).
587 * @param pvECX Where to store ecx.
588 * @param pvEDX Where to store edx.
589 * @remark We're using void pointers to ease the use of special bitfield structures and such.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
593#else
594DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
595{
596 uint32_t uEBX;
597 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
598}
599#endif
600
601
602/**
603 * Performs the cpuid instruction returning edx.
604 *
605 * @param uOperator CPUID operation (eax).
606 * @returns EDX after cpuid operation.
607 */
608#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
609DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
610#else
611DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
612{
613 RTCCUINTREG xDX;
614# if RT_INLINE_ASM_GNU_STYLE
615# ifdef __AMD64__
616 RTCCUINTREG uSpill;
617 __asm__ ("cpuid"
618 : "=a" (uSpill),
619 "=d" (xDX)
620 : "0" (uOperator)
621 : "rbx", "rcx");
622# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: PIC by default. */
623 __asm__ ("push %%ebx\n\t"
624 "cpuid\n\t"
625 "pop %%ebx\n\t"
626 : "=a" (uOperator),
627 "=d" (xDX)
628 : "0" (uOperator)
629 : "ecx");
630# else
631 __asm__ ("cpuid"
632 : "=a" (uOperator),
633 "=d" (xDX)
634 : "0" (uOperator)
635 : "ebx", "ecx");
636# endif
637
638# elif RT_INLINE_ASM_USES_INTRIN
639 int aInfo[4];
640 __cpuid(aInfo, uOperator);
641 xDX = aInfo[3];
642
643# else
644 __asm
645 {
646 push ebx
647 mov eax, [uOperator]
648 cpuid
649 mov [xDX], edx
650 pop ebx
651 }
652# endif
653 return (uint32_t)xDX;
654}
655#endif
656
657
658/**
659 * Performs the cpuid instruction returning ecx.
660 *
661 * @param uOperator CPUID operation (eax).
662 * @returns ECX after cpuid operation.
663 */
664#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
665DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
666#else
667DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
668{
669 RTCCUINTREG xCX;
670# if RT_INLINE_ASM_GNU_STYLE
671# ifdef __AMD64__
672 RTCCUINTREG uSpill;
673 __asm__ ("cpuid"
674 : "=a" (uSpill),
675 "=c" (xCX)
676 : "0" (uOperator)
677 : "rbx", "rdx");
678# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
679 __asm__ ("push %%ebx\n\t"
680 "cpuid\n\t"
681 "pop %%ebx\n\t"
682 : "=a" (uOperator),
683 "=c" (xCX)
684 : "0" (uOperator)
685 : "edx");
686# else
687 __asm__ ("cpuid"
688 : "=a" (uOperator),
689 "=c" (xCX)
690 : "0" (uOperator)
691 : "ebx", "edx");
692
693# endif
694
695# elif RT_INLINE_ASM_USES_INTRIN
696 int aInfo[4];
697 __cpuid(aInfo, uOperator);
698 xCX = aInfo[2];
699
700# else
701 __asm
702 {
703 push ebx
704 mov eax, [uOperator]
705 cpuid
706 mov [xCX], ecx
707 pop ebx
708 }
709# endif
710 return (uint32_t)xCX;
711}
712#endif
713
714
715/**
716 * Checks if the current CPU supports CPUID.
717 *
718 * @returns true if CPUID is supported.
719 */
720DECLINLINE(bool) ASMHasCpuId(void)
721{
722#ifdef __AMD64__
723 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
724#else /* !__AMD64__ */
725 bool fRet = false;
726# if RT_INLINE_ASM_GNU_STYLE
727 uint32_t u1;
728 uint32_t u2;
729 __asm__ ("pushf\n\t"
730 "pop %1\n\t"
731 "mov %1, %2\n\t"
732 "xorl $0x200000, %1\n\t"
733 "push %1\n\t"
734 "popf\n\t"
735 "pushf\n\t"
736 "pop %1\n\t"
737 "cmpl %1, %2\n\t"
738 "setne %0\n\t"
739 "push %2\n\t"
740 "popf\n\t"
741 : "=m" (fRet), "=r" (u1), "=r" (u2));
742# else
743 __asm
744 {
745 pushfd
746 pop eax
747 mov ebx, eax
748 xor eax, 0200000h
749 push eax
750 popfd
751 pushfd
752 pop eax
753 cmp eax, ebx
754 setne fRet
755 push ebx
756 popfd
757 }
758# endif
759 return fRet;
760#endif /* !__AMD64__ */
761}
762
763
764/**
765 * Gets the APIC ID of the current CPU.
766 *
767 * @returns the APIC ID.
768 */
769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
770DECLASM(uint8_t) ASMGetApicId(void);
771#else
772DECLINLINE(uint8_t) ASMGetApicId(void)
773{
774 RTCCUINTREG xBX;
775# if RT_INLINE_ASM_GNU_STYLE
776# ifdef __AMD64__
777 RTCCUINTREG uSpill;
778 __asm__ ("cpuid"
779 : "=a" (uSpill),
780 "=b" (xBX)
781 : "0" (1)
782 : "rcx", "rdx");
783# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__)
784 RTCCUINTREG uSpill;
785 __asm__ ("mov %%ebx,%1\n\t"
786 "cpuid\n\t"
787 "xchgl %%ebx,%1\n\t"
788 : "=a" (uSpill),
789 "=r" (xBX)
790 : "0" (1)
791 : "ecx", "edx");
792# else
793 RTCCUINTREG uSpill;
794 __asm__ ("cpuid"
795 : "=a" (uSpill),
796 "=b" (xBX)
797 : "0" (1)
798 : "ecx", "edx");
799# endif
800
801# elif RT_INLINE_ASM_USES_INTRIN
802 int aInfo[4];
803 __cpuid(aInfo, 1);
804 xBX = aInfo[1];
805
806# else
807 __asm
808 {
809 push ebx
810 mov eax, 1
811 cpuid
812 mov [xBX], ebx
813 pop ebx
814 }
815# endif
816 return (uint8_t)(xBX >> 24);
817}
818#endif
819
820/**
821 * Get cr0.
822 * @returns cr0.
823 */
824#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
825DECLASM(RTCCUINTREG) ASMGetCR0(void);
826#else
827DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
828{
829 RTCCUINTREG uCR0;
830# if RT_INLINE_ASM_USES_INTRIN
831 uCR0 = __readcr0();
832
833# elif RT_INLINE_ASM_GNU_STYLE
834# ifdef __AMD64__
835 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
836# else
837 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
838# endif
839# else
840 __asm
841 {
842# ifdef __AMD64__
843 mov rax, cr0
844 mov [uCR0], rax
845# else
846 mov eax, cr0
847 mov [uCR0], eax
848# endif
849 }
850# endif
851 return uCR0;
852}
853#endif
854
855
856/**
857 * Sets the CR0 register.
858 * @param uCR0 The new CR0 value.
859 */
860#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
861DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
862#else
863DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
864{
865# if RT_INLINE_ASM_USES_INTRIN
866 __writecr0(uCR0);
867
868# elif RT_INLINE_ASM_GNU_STYLE
869# ifdef __AMD64__
870 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
871# else
872 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
873# endif
874# else
875 __asm
876 {
877# ifdef __AMD64__
878 mov rax, [uCR0]
879 mov cr0, rax
880# else
881 mov eax, [uCR0]
882 mov cr0, eax
883# endif
884 }
885# endif
886}
887#endif
888
889
890/**
891 * Get cr2.
892 * @returns cr2.
893 */
894#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
895DECLASM(RTCCUINTREG) ASMGetCR2(void);
896#else
897DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
898{
899 RTCCUINTREG uCR2;
900# if RT_INLINE_ASM_USES_INTRIN
901 uCR2 = __readcr2();
902
903# elif RT_INLINE_ASM_GNU_STYLE
904# ifdef __AMD64__
905 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
906# else
907 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
908# endif
909# else
910 __asm
911 {
912# ifdef __AMD64__
913 mov rax, cr2
914 mov [uCR2], rax
915# else
916 mov eax, cr2
917 mov [uCR2], eax
918# endif
919 }
920# endif
921 return uCR2;
922}
923#endif
924
925
926/**
927 * Sets the CR2 register.
928 * @param uCR2 The new CR0 value.
929 */
930#if RT_INLINE_ASM_EXTERNAL
931DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
932#else
933DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
934{
935# if RT_INLINE_ASM_GNU_STYLE
936# ifdef __AMD64__
937 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
938# else
939 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
940# endif
941# else
942 __asm
943 {
944# ifdef __AMD64__
945 mov rax, [uCR2]
946 mov cr2, rax
947# else
948 mov eax, [uCR2]
949 mov cr2, eax
950# endif
951 }
952# endif
953}
954#endif
955
956
957/**
958 * Get cr3.
959 * @returns cr3.
960 */
961#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
962DECLASM(RTCCUINTREG) ASMGetCR3(void);
963#else
964DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
965{
966 RTCCUINTREG uCR3;
967# if RT_INLINE_ASM_USES_INTRIN
968 uCR3 = __readcr3();
969
970# elif RT_INLINE_ASM_GNU_STYLE
971# ifdef __AMD64__
972 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
973# else
974 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
975# endif
976# else
977 __asm
978 {
979# ifdef __AMD64__
980 mov rax, cr3
981 mov [uCR3], rax
982# else
983 mov eax, cr3
984 mov [uCR3], eax
985# endif
986 }
987# endif
988 return uCR3;
989}
990#endif
991
992
993/**
994 * Sets the CR3 register.
995 *
996 * @param uCR3 New CR3 value.
997 */
998#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
999DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1000#else
1001DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1002{
1003# if RT_INLINE_ASM_USES_INTRIN
1004 __writecr3(uCR3);
1005
1006# elif RT_INLINE_ASM_GNU_STYLE
1007# ifdef __AMD64__
1008 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1009# else
1010 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1011# endif
1012# else
1013 __asm
1014 {
1015# ifdef __AMD64__
1016 mov rax, [uCR3]
1017 mov cr3, rax
1018# else
1019 mov eax, [uCR3]
1020 mov cr3, eax
1021# endif
1022 }
1023# endif
1024}
1025#endif
1026
1027
1028/**
1029 * Reloads the CR3 register.
1030 */
1031#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1032DECLASM(void) ASMReloadCR3(void);
1033#else
1034DECLINLINE(void) ASMReloadCR3(void)
1035{
1036# if RT_INLINE_ASM_USES_INTRIN
1037 __writecr3(__readcr3());
1038
1039# elif RT_INLINE_ASM_GNU_STYLE
1040 RTCCUINTREG u;
1041# ifdef __AMD64__
1042 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1043 "movq %0, %%cr3\n\t"
1044 : "=r" (u));
1045# else
1046 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1047 "movl %0, %%cr3\n\t"
1048 : "=r" (u));
1049# endif
1050# else
1051 __asm
1052 {
1053# ifdef __AMD64__
1054 mov rax, cr3
1055 mov cr3, rax
1056# else
1057 mov eax, cr3
1058 mov cr3, eax
1059# endif
1060 }
1061# endif
1062}
1063#endif
1064
1065
1066/**
1067 * Get cr4.
1068 * @returns cr4.
1069 */
1070#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1071DECLASM(RTCCUINTREG) ASMGetCR4(void);
1072#else
1073DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1074{
1075 RTCCUINTREG uCR4;
1076# if RT_INLINE_ASM_USES_INTRIN
1077 uCR4 = __readcr4();
1078
1079# elif RT_INLINE_ASM_GNU_STYLE
1080# ifdef __AMD64__
1081 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1082# else
1083 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1084# endif
1085# else
1086 __asm
1087 {
1088# ifdef __AMD64__
1089 mov rax, cr4
1090 mov [uCR4], rax
1091# else
1092 push eax /* just in case */
1093 /*mov eax, cr4*/
1094 _emit 0x0f
1095 _emit 0x20
1096 _emit 0xe0
1097 mov [uCR4], eax
1098 pop eax
1099# endif
1100 }
1101# endif
1102 return uCR4;
1103}
1104#endif
1105
1106
1107/**
1108 * Sets the CR4 register.
1109 *
1110 * @param uCR4 New CR4 value.
1111 */
1112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1113DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1114#else
1115DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1116{
1117# if RT_INLINE_ASM_USES_INTRIN
1118 __writecr4(uCR4);
1119
1120# elif RT_INLINE_ASM_GNU_STYLE
1121# ifdef __AMD64__
1122 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1123# else
1124 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1125# endif
1126# else
1127 __asm
1128 {
1129# ifdef __AMD64__
1130 mov rax, [uCR4]
1131 mov cr4, rax
1132# else
1133 mov eax, [uCR4]
1134 _emit 0x0F
1135 _emit 0x22
1136 _emit 0xE0 /* mov cr4, eax */
1137# endif
1138 }
1139# endif
1140}
1141#endif
1142
1143
1144/**
1145 * Get cr8.
1146 * @returns cr8.
1147 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1148 */
1149#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1150DECLASM(RTCCUINTREG) ASMGetCR8(void);
1151#else
1152DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1153{
1154# ifdef __AMD64__
1155 RTCCUINTREG uCR8;
1156# if RT_INLINE_ASM_USES_INTRIN
1157 uCR8 = __readcr8();
1158
1159# elif RT_INLINE_ASM_GNU_STYLE
1160 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1161# else
1162 __asm
1163 {
1164 mov rax, cr8
1165 mov [uCR8], rax
1166 }
1167# endif
1168 return uCR8;
1169# else /* !__AMD64__ */
1170 return 0;
1171# endif /* !__AMD64__ */
1172}
1173#endif
1174
1175
1176/**
1177 * Enables interrupts (EFLAGS.IF).
1178 */
1179#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1180DECLASM(void) ASMIntEnable(void);
1181#else
1182DECLINLINE(void) ASMIntEnable(void)
1183{
1184# if RT_INLINE_ASM_GNU_STYLE
1185 __asm("sti\n");
1186# elif RT_INLINE_ASM_USES_INTRIN
1187 _enable();
1188# else
1189 __asm sti
1190# endif
1191}
1192#endif
1193
1194
1195/**
1196 * Disables interrupts (!EFLAGS.IF).
1197 */
1198#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1199DECLASM(void) ASMIntDisable(void);
1200#else
1201DECLINLINE(void) ASMIntDisable(void)
1202{
1203# if RT_INLINE_ASM_GNU_STYLE
1204 __asm("cli\n");
1205# elif RT_INLINE_ASM_USES_INTRIN
1206 _disable();
1207# else
1208 __asm cli
1209# endif
1210}
1211#endif
1212
1213
1214/**
1215 * Disables interrupts and returns previous xFLAGS.
1216 */
1217#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1218DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1219#else
1220DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1221{
1222 RTCCUINTREG xFlags;
1223# if RT_INLINE_ASM_GNU_STYLE
1224# ifdef __AMD64__
1225 __asm__ __volatile__("pushfq\n\t"
1226 "cli\n\t"
1227 "popq %0\n\t"
1228 : "=m" (xFlags));
1229# else
1230 __asm__ __volatile__("pushfl\n\t"
1231 "cli\n\t"
1232 "popl %0\n\t"
1233 : "=m" (xFlags));
1234# endif
1235# elif RT_INLINE_ASM_USES_INTRIN && !defined(__X86__)
1236 xFlags = ASMGetFlags();
1237 _disable();
1238# else
1239 __asm {
1240 pushfd
1241 cli
1242 pop [xFlags]
1243 }
1244# endif
1245 return xFlags;
1246}
1247#endif
1248
1249
1250/**
1251 * Reads a machine specific register.
1252 *
1253 * @returns Register content.
1254 * @param uRegister Register to read.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1258#else
1259DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1260{
1261 RTUINT64U u;
1262# if RT_INLINE_ASM_GNU_STYLE
1263 __asm__ ("rdmsr\n\t"
1264 : "=a" (u.s.Lo),
1265 "=d" (u.s.Hi)
1266 : "c" (uRegister));
1267
1268# elif RT_INLINE_ASM_USES_INTRIN
1269 u.u = __readmsr(uRegister);
1270
1271# else
1272 __asm
1273 {
1274 mov ecx, [uRegister]
1275 rdmsr
1276 mov [u.s.Lo], eax
1277 mov [u.s.Hi], edx
1278 }
1279# endif
1280
1281 return u.u;
1282}
1283#endif
1284
1285
1286/**
1287 * Writes a machine specific register.
1288 *
1289 * @returns Register content.
1290 * @param uRegister Register to write to.
1291 * @param u64Val Value to write.
1292 */
1293#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1294DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1295#else
1296DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1297{
1298 RTUINT64U u;
1299
1300 u.u = u64Val;
1301# if RT_INLINE_ASM_GNU_STYLE
1302 __asm__ __volatile__("wrmsr\n\t"
1303 ::"a" (u.s.Lo),
1304 "d" (u.s.Hi),
1305 "c" (uRegister));
1306
1307# elif RT_INLINE_ASM_USES_INTRIN
1308 __writemsr(uRegister, u.u);
1309
1310# else
1311 __asm
1312 {
1313 mov ecx, [uRegister]
1314 mov edx, [u.s.Hi]
1315 mov eax, [u.s.Lo]
1316 wrmsr
1317 }
1318# endif
1319}
1320#endif
1321
1322
1323/**
1324 * Reads low part of a machine specific register.
1325 *
1326 * @returns Register content.
1327 * @param uRegister Register to read.
1328 */
1329#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1330DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1331#else
1332DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1333{
1334 uint32_t u32;
1335# if RT_INLINE_ASM_GNU_STYLE
1336 __asm__ ("rdmsr\n\t"
1337 : "=a" (u32)
1338 : "c" (uRegister)
1339 : "edx");
1340
1341# elif RT_INLINE_ASM_USES_INTRIN
1342 u32 = (uint32_t)__readmsr(uRegister);
1343
1344#else
1345 __asm
1346 {
1347 mov ecx, [uRegister]
1348 rdmsr
1349 mov [u32], eax
1350 }
1351# endif
1352
1353 return u32;
1354}
1355#endif
1356
1357
1358/**
1359 * Reads high part of a machine specific register.
1360 *
1361 * @returns Register content.
1362 * @param uRegister Register to read.
1363 */
1364#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1365DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1366#else
1367DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1368{
1369 uint32_t u32;
1370# if RT_INLINE_ASM_GNU_STYLE
1371 __asm__ ("rdmsr\n\t"
1372 : "=d" (u32)
1373 : "c" (uRegister)
1374 : "eax");
1375
1376# elif RT_INLINE_ASM_USES_INTRIN
1377 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1378
1379# else
1380 __asm
1381 {
1382 mov ecx, [uRegister]
1383 rdmsr
1384 mov [u32], edx
1385 }
1386# endif
1387
1388 return u32;
1389}
1390#endif
1391
1392
1393/**
1394 * Gets dr7.
1395 *
1396 * @returns dr7.
1397 */
1398#if RT_INLINE_ASM_EXTERNAL
1399DECLASM(RTCCUINTREG) ASMGetDR7(void);
1400#else
1401DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1402{
1403 RTCCUINTREG uDR7;
1404# if RT_INLINE_ASM_GNU_STYLE
1405# ifdef __AMD64__
1406 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1407# else
1408 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1409# endif
1410# else
1411 __asm
1412 {
1413# ifdef __AMD64__
1414 mov rax, dr7
1415 mov [uDR7], rax
1416# else
1417 mov eax, dr7
1418 mov [uDR7], eax
1419# endif
1420 }
1421# endif
1422 return uDR7;
1423}
1424#endif
1425
1426
1427/**
1428 * Gets dr6.
1429 *
1430 * @returns dr6.
1431 */
1432#if RT_INLINE_ASM_EXTERNAL
1433DECLASM(RTCCUINTREG) ASMGetDR6(void);
1434#else
1435DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1436{
1437 RTCCUINTREG uDR6;
1438# if RT_INLINE_ASM_GNU_STYLE
1439# ifdef __AMD64__
1440 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1441# else
1442 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1443# endif
1444# else
1445 __asm
1446 {
1447# ifdef __AMD64__
1448 mov rax, dr6
1449 mov [uDR6], rax
1450# else
1451 mov eax, dr6
1452 mov [uDR6], eax
1453# endif
1454 }
1455# endif
1456 return uDR6;
1457}
1458#endif
1459
1460
1461/**
1462 * Reads and clears DR6.
1463 *
1464 * @returns DR6.
1465 */
1466#if RT_INLINE_ASM_EXTERNAL
1467DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1468#else
1469DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1470{
1471 RTCCUINTREG uDR6;
1472# if RT_INLINE_ASM_GNU_STYLE
1473 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1474# ifdef __AMD64__
1475 __asm__ ("movq %%dr6, %0\n\t"
1476 "movq %1, %%dr6\n\t"
1477 : "=r" (uDR6)
1478 : "r" (uNewValue));
1479# else
1480 __asm__ ("movl %%dr6, %0\n\t"
1481 "movl %1, %%dr6\n\t"
1482 : "=r" (uDR6)
1483 : "r" (uNewValue));
1484# endif
1485# else
1486 __asm
1487 {
1488# ifdef __AMD64__
1489 mov rax, dr6
1490 mov [uDR6], rax
1491 mov rcx, rax
1492 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1493 mov dr6, rcx
1494# else
1495 mov eax, dr6
1496 mov [uDR6], eax
1497 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1498 mov dr6, ecx
1499# endif
1500 }
1501# endif
1502 return uDR6;
1503}
1504#endif
1505
1506
1507/**
1508 * Ensure that gcc does not use any register value before this instruction. This function is used
1509 * for assembler instructions with side-effects, e.g. port writes to magical guest ports causing
1510 * guest memory changes by the host
1511 */
1512#if RT_INLINE_ASM_GNU_STYLE
1513DECLINLINE(void) ASMMemoryClobber(void)
1514{
1515 __asm__ __volatile__ ("" : : : "memory");
1516}
1517#else
1518DECLINLINE(void) ASMMemoryClobber(void)
1519{
1520}
1521#endif
1522
1523/**
1524 * Writes a 8-bit unsigned integer to an I/O port.
1525 *
1526 * @param Port I/O port to read from.
1527 * @param u8 8-bit integer to write.
1528 */
1529#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1530DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1531#else
1532DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1533{
1534# if RT_INLINE_ASM_GNU_STYLE
1535 __asm__ __volatile__("outb %b1, %w0\n\t"
1536 :: "Nd" (Port),
1537 "a" (u8));
1538
1539# elif RT_INLINE_ASM_USES_INTRIN
1540 __outbyte(Port, u8);
1541
1542# else
1543 __asm
1544 {
1545 mov dx, [Port]
1546 mov al, [u8]
1547 out dx, al
1548 }
1549# endif
1550}
1551#endif
1552
1553
1554/**
1555 * Gets a 8-bit unsigned integer from an I/O port.
1556 *
1557 * @returns 8-bit integer.
1558 * @param Port I/O port to read from.
1559 */
1560#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1561DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1562#else
1563DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1564{
1565 uint8_t u8;
1566# if RT_INLINE_ASM_GNU_STYLE
1567 __asm__ __volatile__("inb %w1, %b0\n\t"
1568 : "=a" (u8)
1569 : "Nd" (Port));
1570
1571# elif RT_INLINE_ASM_USES_INTRIN
1572 u8 = __inbyte(Port);
1573
1574# else
1575 __asm
1576 {
1577 mov dx, [Port]
1578 in al, dx
1579 mov [u8], al
1580 }
1581# endif
1582 return u8;
1583}
1584#endif
1585
1586
1587/**
1588 * Writes a 16-bit unsigned integer to an I/O port.
1589 *
1590 * @param Port I/O port to read from.
1591 * @param u16 16-bit integer to write.
1592 */
1593#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1594DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1595#else
1596DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1597{
1598# if RT_INLINE_ASM_GNU_STYLE
1599 __asm__ __volatile__("outw %w1, %w0\n\t"
1600 :: "Nd" (Port),
1601 "a" (u16));
1602
1603# elif RT_INLINE_ASM_USES_INTRIN
1604 __outword(Port, u16);
1605
1606# else
1607 __asm
1608 {
1609 mov dx, [Port]
1610 mov ax, [u16]
1611 out dx, ax
1612 }
1613# endif
1614}
1615#endif
1616
1617
1618/**
1619 * Gets a 16-bit unsigned integer from an I/O port.
1620 *
1621 * @returns 16-bit integer.
1622 * @param Port I/O port to read from.
1623 */
1624#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1625DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1626#else
1627DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1628{
1629 uint16_t u16;
1630# if RT_INLINE_ASM_GNU_STYLE
1631 __asm__ __volatile__("inw %w1, %w0\n\t"
1632 : "=a" (u16)
1633 : "Nd" (Port));
1634
1635# elif RT_INLINE_ASM_USES_INTRIN
1636 u16 = __inword(Port);
1637
1638# else
1639 __asm
1640 {
1641 mov dx, [Port]
1642 in ax, dx
1643 mov [u16], ax
1644 }
1645# endif
1646 return u16;
1647}
1648#endif
1649
1650
1651/**
1652 * Writes a 32-bit unsigned integer to an I/O port.
1653 *
1654 * @param Port I/O port to read from.
1655 * @param u32 32-bit integer to write.
1656 */
1657#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1658DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1659#else
1660DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1661{
1662# if RT_INLINE_ASM_GNU_STYLE
1663 __asm__ __volatile__("outl %1, %w0\n\t"
1664 :: "Nd" (Port),
1665 "a" (u32));
1666
1667# elif RT_INLINE_ASM_USES_INTRIN
1668 __outdword(Port, u32);
1669
1670# else
1671 __asm
1672 {
1673 mov dx, [Port]
1674 mov eax, [u32]
1675 out dx, eax
1676 }
1677# endif
1678}
1679#endif
1680
1681
1682/**
1683 * Gets a 32-bit unsigned integer from an I/O port.
1684 *
1685 * @returns 32-bit integer.
1686 * @param Port I/O port to read from.
1687 */
1688#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1689DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1690#else
1691DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1692{
1693 uint32_t u32;
1694# if RT_INLINE_ASM_GNU_STYLE
1695 __asm__ __volatile__("inl %w1, %0\n\t"
1696 : "=a" (u32)
1697 : "Nd" (Port));
1698
1699# elif RT_INLINE_ASM_USES_INTRIN
1700 u32 = __indword(Port);
1701
1702# else
1703 __asm
1704 {
1705 mov dx, [Port]
1706 in eax, dx
1707 mov [u32], eax
1708 }
1709# endif
1710 return u32;
1711}
1712#endif
1713
1714
1715/**
1716 * Atomically Exchange an unsigned 8-bit value.
1717 *
1718 * @returns Current *pu8 value
1719 * @param pu8 Pointer to the 8-bit variable to update.
1720 * @param u8 The 8-bit value to assign to *pu8.
1721 */
1722#if RT_INLINE_ASM_EXTERNAL
1723DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1724#else
1725DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1726{
1727# if RT_INLINE_ASM_GNU_STYLE
1728 __asm__ __volatile__("xchgb %0, %1\n\t"
1729 : "=m" (*pu8),
1730 "=r" (u8)
1731 : "1" (u8));
1732# else
1733 __asm
1734 {
1735# ifdef __AMD64__
1736 mov rdx, [pu8]
1737 mov al, [u8]
1738 xchg [rdx], al
1739 mov [u8], al
1740# else
1741 mov edx, [pu8]
1742 mov al, [u8]
1743 xchg [edx], al
1744 mov [u8], al
1745# endif
1746 }
1747# endif
1748 return u8;
1749}
1750#endif
1751
1752
1753/**
1754 * Atomically Exchange a signed 8-bit value.
1755 *
1756 * @returns Current *pu8 value
1757 * @param pi8 Pointer to the 8-bit variable to update.
1758 * @param i8 The 8-bit value to assign to *pi8.
1759 */
1760DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1761{
1762 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1763}
1764
1765
1766/**
1767 * Atomically Exchange an unsigned 16-bit value.
1768 *
1769 * @returns Current *pu16 value
1770 * @param pu16 Pointer to the 16-bit variable to update.
1771 * @param u16 The 16-bit value to assign to *pu16.
1772 */
1773#if RT_INLINE_ASM_EXTERNAL
1774DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1775#else
1776DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1777{
1778# if RT_INLINE_ASM_GNU_STYLE
1779 __asm__ __volatile__("xchgw %0, %1\n\t"
1780 : "=m" (*pu16),
1781 "=r" (u16)
1782 : "1" (u16));
1783# else
1784 __asm
1785 {
1786# ifdef __AMD64__
1787 mov rdx, [pu16]
1788 mov ax, [u16]
1789 xchg [rdx], ax
1790 mov [u16], ax
1791# else
1792 mov edx, [pu16]
1793 mov ax, [u16]
1794 xchg [edx], ax
1795 mov [u16], ax
1796# endif
1797 }
1798# endif
1799 return u16;
1800}
1801#endif
1802
1803
1804/**
1805 * Atomically Exchange a signed 16-bit value.
1806 *
1807 * @returns Current *pu16 value
1808 * @param pi16 Pointer to the 16-bit variable to update.
1809 * @param i16 The 16-bit value to assign to *pi16.
1810 */
1811DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1812{
1813 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1814}
1815
1816
1817/**
1818 * Atomically Exchange an unsigned 32-bit value.
1819 *
1820 * @returns Current *pu32 value
1821 * @param pu32 Pointer to the 32-bit variable to update.
1822 * @param u32 The 32-bit value to assign to *pu32.
1823 */
1824#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1825DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1826#else
1827DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1828{
1829# if RT_INLINE_ASM_GNU_STYLE
1830 __asm__ __volatile__("xchgl %0, %1\n\t"
1831 : "=m" (*pu32),
1832 "=r" (u32)
1833 : "1" (u32));
1834
1835# elif RT_INLINE_ASM_USES_INTRIN
1836 u32 = _InterlockedExchange((long *)pu32, u32);
1837
1838# else
1839 __asm
1840 {
1841# ifdef __AMD64__
1842 mov rdx, [pu32]
1843 mov eax, u32
1844 xchg [rdx], eax
1845 mov [u32], eax
1846# else
1847 mov edx, [pu32]
1848 mov eax, u32
1849 xchg [edx], eax
1850 mov [u32], eax
1851# endif
1852 }
1853# endif
1854 return u32;
1855}
1856#endif
1857
1858
1859/**
1860 * Atomically Exchange a signed 32-bit value.
1861 *
1862 * @returns Current *pu32 value
1863 * @param pi32 Pointer to the 32-bit variable to update.
1864 * @param i32 The 32-bit value to assign to *pi32.
1865 */
1866DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1867{
1868 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1869}
1870
1871
1872/**
1873 * Atomically Exchange an unsigned 64-bit value.
1874 *
1875 * @returns Current *pu64 value
1876 * @param pu64 Pointer to the 64-bit variable to update.
1877 * @param u64 The 64-bit value to assign to *pu64.
1878 */
1879#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1880DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1881#else
1882DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1883{
1884# if defined(__AMD64__)
1885# if RT_INLINE_ASM_USES_INTRIN
1886 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1887
1888# elif RT_INLINE_ASM_GNU_STYLE
1889 __asm__ __volatile__("xchgq %0, %1\n\t"
1890 : "=m" (*pu64),
1891 "=r" (u64)
1892 : "1" (u64));
1893# else
1894 __asm
1895 {
1896 mov rdx, [pu64]
1897 mov rax, [u64]
1898 xchg [rdx], rax
1899 mov [u64], rax
1900 }
1901# endif
1902# else /* !__AMD64__ */
1903# if RT_INLINE_ASM_GNU_STYLE
1904# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
1905 uint32_t u32 = (uint32_t)u64;
1906 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1907 "xchgl %%ebx, %3\n\t"
1908 "1:\n\t"
1909 "lock; cmpxchg8b (%5)\n\t"
1910 "jnz 1b\n\t"
1911 "xchgl %%ebx, %3\n\t"
1912 /*"xchgl %%esi, %5\n\t"*/
1913 : "=A" (u64),
1914 "=m" (*pu64)
1915 : "0" (*pu64),
1916 "m" ( u32 ),
1917 "c" ( (uint32_t)(u64 >> 32) ),
1918 "S" (pu64) );
1919# else /* !PIC */
1920 __asm__ __volatile__("1:\n\t"
1921 "lock; cmpxchg8b %1\n\t"
1922 "jnz 1b\n\t"
1923 : "=A" (u64),
1924 "=m" (*pu64)
1925 : "0" (*pu64),
1926 "b" ( (uint32_t)u64 ),
1927 "c" ( (uint32_t)(u64 >> 32) ));
1928# endif
1929# else
1930 __asm
1931 {
1932 mov ebx, dword ptr [u64]
1933 mov ecx, dword ptr [u64 + 4]
1934 mov edi, pu64
1935 mov eax, dword ptr [edi]
1936 mov edx, dword ptr [edi + 4]
1937 retry:
1938 lock cmpxchg8b [edi]
1939 jnz retry
1940 mov dword ptr [u64], eax
1941 mov dword ptr [u64 + 4], edx
1942 }
1943# endif
1944# endif /* !__AMD64__ */
1945 return u64;
1946}
1947#endif
1948
1949
1950/**
1951 * Atomically Exchange an signed 64-bit value.
1952 *
1953 * @returns Current *pi64 value
1954 * @param pi64 Pointer to the 64-bit variable to update.
1955 * @param i64 The 64-bit value to assign to *pi64.
1956 */
1957DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1958{
1959 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1960}
1961
1962
1963#ifdef __AMD64__
1964/**
1965 * Atomically Exchange an unsigned 128-bit value.
1966 *
1967 * @returns Current *pu128.
1968 * @param pu128 Pointer to the 128-bit variable to update.
1969 * @param u128 The 128-bit value to assign to *pu128.
1970 *
1971 * @remark We cannot really assume that any hardware supports this. Nor do I have
1972 * GAS support for it. So, for the time being we'll BREAK the atomic
1973 * bit of this function and use two 64-bit exchanges instead.
1974 */
1975# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
1976DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
1977# else
1978DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
1979{
1980 if (true)/*ASMCpuId_ECX(1) & BIT(13))*/
1981 {
1982 /** @todo this is clumsy code */
1983 RTUINT128U u128Ret;
1984 u128Ret.u = u128;
1985 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
1986 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
1987 return u128Ret.u;
1988 }
1989#if 0 /* later? */
1990 else
1991 {
1992# if RT_INLINE_ASM_GNU_STYLE
1993 __asm__ __volatile__("1:\n\t"
1994 "lock; cmpxchg8b %1\n\t"
1995 "jnz 1b\n\t"
1996 : "=A" (u128),
1997 "=m" (*pu128)
1998 : "0" (*pu128),
1999 "b" ( (uint64_t)u128 ),
2000 "c" ( (uint64_t)(u128 >> 64) ));
2001# else
2002 __asm
2003 {
2004 mov rbx, dword ptr [u128]
2005 mov rcx, dword ptr [u128 + 4]
2006 mov rdi, pu128
2007 mov rax, dword ptr [rdi]
2008 mov rdx, dword ptr [rdi + 4]
2009 retry:
2010 lock cmpxchg16b [rdi]
2011 jnz retry
2012 mov dword ptr [u128], rax
2013 mov dword ptr [u128 + 4], rdx
2014 }
2015# endif
2016 }
2017 return u128;
2018#endif
2019}
2020# endif
2021#endif /* __AMD64__ */
2022
2023
2024/**
2025 * Atomically Reads a unsigned 64-bit value.
2026 *
2027 * @returns Current *pu64 value
2028 * @param pu64 Pointer to the 64-bit variable to read.
2029 * The memory pointed to must be writable.
2030 * @remark This will fault if the memory is read-only!
2031 */
2032#if RT_INLINE_ASM_EXTERNAL
2033DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2034#else
2035DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2036{
2037 uint64_t u64;
2038# ifdef __AMD64__
2039# if RT_INLINE_ASM_GNU_STYLE
2040 __asm__ __volatile__("movq %1, %0\n\t"
2041 : "=r" (u64)
2042 : "m" (*pu64));
2043# else
2044 __asm
2045 {
2046 mov rdx, [pu64]
2047 mov rax, [rdx]
2048 mov [u64], rax
2049 }
2050# endif
2051# else /* !__AMD64__ */
2052# if RT_INLINE_ASM_GNU_STYLE
2053# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2054 uint32_t u32EBX = 0;
2055 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2056 "lock; cmpxchg8b (%5)\n\t"
2057 "xchgl %%ebx, %3\n\t"
2058 : "=A" (u64),
2059 "=m" (*pu64)
2060 : "0" (0),
2061 "m" (u32EBX),
2062 "c" (0),
2063 "S" (pu64));
2064# else /* !PIC */
2065 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2066 : "=A" (u64),
2067 "=m" (*pu64)
2068 : "0" (0),
2069 "b" (0),
2070 "c" (0));
2071# endif
2072# else
2073 __asm
2074 {
2075 xor eax, eax
2076 xor edx, edx
2077 mov edi, pu64
2078 xor ecx, ecx
2079 xor ebx, ebx
2080 lock cmpxchg8b [edi]
2081 mov dword ptr [u64], eax
2082 mov dword ptr [u64 + 4], edx
2083 }
2084# endif
2085# endif /* !__AMD64__ */
2086 return u64;
2087}
2088#endif
2089
2090
2091/**
2092 * Atomically Reads a signed 64-bit value.
2093 *
2094 * @returns Current *pi64 value
2095 * @param pi64 Pointer to the 64-bit variable to read.
2096 * The memory pointed to must be writable.
2097 * @remark This will fault if the memory is read-only!
2098 */
2099DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2100{
2101 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2102}
2103
2104
2105/**
2106 * Atomically Exchange a value which size might differ
2107 * between platforms or compilers.
2108 *
2109 * @param pu Pointer to the variable to update.
2110 * @param uNew The value to assign to *pu.
2111 */
2112#define ASMAtomicXchgSize(pu, uNew) \
2113 do { \
2114 switch (sizeof(*(pu))) { \
2115 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2116 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2117 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2118 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2119 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2120 } \
2121 } while (0)
2122
2123
2124/**
2125 * Atomically Exchange a pointer value.
2126 *
2127 * @returns Current *ppv value
2128 * @param ppv Pointer to the pointer variable to update.
2129 * @param pv The pointer value to assign to *ppv.
2130 */
2131DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2132{
2133#if ARCH_BITS == 32
2134 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2135#elif ARCH_BITS == 64
2136 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2137#else
2138# error "ARCH_BITS is bogus"
2139#endif
2140}
2141
2142
2143/**
2144 * Atomically Compare and Exchange an unsigned 32-bit value.
2145 *
2146 * @returns true if xchg was done.
2147 * @returns false if xchg wasn't done.
2148 *
2149 * @param pu32 Pointer to the value to update.
2150 * @param u32New The new value to assigned to *pu32.
2151 * @param u32Old The old value to *pu32 compare with.
2152 */
2153#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2154DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2155#else
2156DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2157{
2158# if RT_INLINE_ASM_GNU_STYLE
2159 uint32_t u32Ret;
2160 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2161 "setz %%al\n\t"
2162 "movzx %%al, %%eax\n\t"
2163 : "=m" (*pu32),
2164 "=a" (u32Ret)
2165 : "r" (u32New),
2166 "1" (u32Old));
2167 return (bool)u32Ret;
2168
2169# elif RT_INLINE_ASM_USES_INTRIN
2170 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2171
2172# else
2173 uint32_t u32Ret;
2174 __asm
2175 {
2176# ifdef __AMD64__
2177 mov rdx, [pu32]
2178# else
2179 mov edx, [pu32]
2180# endif
2181 mov eax, [u32Old]
2182 mov ecx, [u32New]
2183# ifdef __AMD64__
2184 lock cmpxchg [rdx], ecx
2185# else
2186 lock cmpxchg [edx], ecx
2187# endif
2188 setz al
2189 movzx eax, al
2190 mov [u32Ret], eax
2191 }
2192 return !!u32Ret;
2193# endif
2194}
2195#endif
2196
2197
2198/**
2199 * Atomically Compare and Exchange a signed 32-bit value.
2200 *
2201 * @returns true if xchg was done.
2202 * @returns false if xchg wasn't done.
2203 *
2204 * @param pi32 Pointer to the value to update.
2205 * @param i32New The new value to assigned to *pi32.
2206 * @param i32Old The old value to *pi32 compare with.
2207 */
2208DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2209{
2210 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2211}
2212
2213
2214/**
2215 * Atomically Compare and exchange an unsigned 64-bit value.
2216 *
2217 * @returns true if xchg was done.
2218 * @returns false if xchg wasn't done.
2219 *
2220 * @param pu64 Pointer to the 64-bit variable to update.
2221 * @param u64New The 64-bit value to assign to *pu64.
2222 * @param u64Old The value to compare with.
2223 */
2224#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2225DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2226#else
2227DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2228{
2229# if RT_INLINE_ASM_USES_INTRIN
2230 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2231
2232# elif defined(__AMD64__)
2233# if RT_INLINE_ASM_GNU_STYLE
2234 uint64_t u64Ret;
2235 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2236 "setz %%al\n\t"
2237 "movzx %%al, %%eax\n\t"
2238 : "=m" (*pu64),
2239 "=a" (u64Ret)
2240 : "r" (u64New),
2241 "1" (u64Old));
2242 return (bool)u64Ret;
2243# else
2244 bool fRet;
2245 __asm
2246 {
2247 mov rdx, [pu32]
2248 mov rax, [u64Old]
2249 mov rcx, [u64New]
2250 lock cmpxchg [rdx], rcx
2251 setz al
2252 mov [fRet], al
2253 }
2254 return fRet;
2255# endif
2256# else /* !__AMD64__ */
2257 uint32_t u32Ret;
2258# if RT_INLINE_ASM_GNU_STYLE
2259# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2260 uint32_t u32 = (uint32_t)u64New;
2261 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2262 "lock; cmpxchg8b (%5)\n\t"
2263 "setz %%al\n\t"
2264 "xchgl %%ebx, %3\n\t"
2265 "movzx %%al, %%eax\n\t"
2266 : "=a" (u32Ret),
2267 "=m" (*pu64)
2268 : "A" (u64Old),
2269 "m" ( u32 ),
2270 "c" ( (uint32_t)(u64New >> 32) ),
2271 "S" (pu64) );
2272# else /* !PIC */
2273 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2274 "setz %%al\n\t"
2275 "movzx %%al, %%eax\n\t"
2276 : "=a" (u32Ret),
2277 "=m" (*pu64)
2278 : "A" (u64Old),
2279 "b" ( (uint32_t)u64New ),
2280 "c" ( (uint32_t)(u64New >> 32) ));
2281# endif
2282 return (bool)u32Ret;
2283# else
2284 __asm
2285 {
2286 mov ebx, dword ptr [u64New]
2287 mov ecx, dword ptr [u64New + 4]
2288 mov edi, [pu64]
2289 mov eax, dword ptr [u64Old]
2290 mov edx, dword ptr [u64Old + 4]
2291 lock cmpxchg8b [edi]
2292 setz al
2293 movzx eax, al
2294 mov dword ptr [u32Ret], eax
2295 }
2296 return !!u32Ret;
2297# endif
2298# endif /* !__AMD64__ */
2299}
2300#endif
2301
2302
2303/**
2304 * Atomically Compare and exchange a signed 64-bit value.
2305 *
2306 * @returns true if xchg was done.
2307 * @returns false if xchg wasn't done.
2308 *
2309 * @param pi64 Pointer to the 64-bit variable to update.
2310 * @param i64 The 64-bit value to assign to *pu64.
2311 * @param i64Old The value to compare with.
2312 */
2313DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2314{
2315 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2316}
2317
2318
2319
2320/** @def ASMAtomicCmpXchgSize
2321 * Atomically Compare and Exchange a value which size might differ
2322 * between platforms or compilers.
2323 *
2324 * @param pu Pointer to the value to update.
2325 * @param uNew The new value to assigned to *pu.
2326 * @param uOld The old value to *pu compare with.
2327 * @param fRc Where to store the result.
2328 */
2329#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2330 do { \
2331 switch (sizeof(*(pu))) { \
2332 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2333 break; \
2334 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2335 break; \
2336 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2337 (fRc) = false; \
2338 break; \
2339 } \
2340 } while (0)
2341
2342
2343/**
2344 * Atomically Compare and Exchange a pointer value.
2345 *
2346 * @returns true if xchg was done.
2347 * @returns false if xchg wasn't done.
2348 *
2349 * @param ppv Pointer to the value to update.
2350 * @param pvNew The new value to assigned to *ppv.
2351 * @param pvOld The old value to *ppv compare with.
2352 */
2353DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2354{
2355#if ARCH_BITS == 32
2356 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2357#elif ARCH_BITS == 64
2358 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2359#else
2360# error "ARCH_BITS is bogus"
2361#endif
2362}
2363
2364
2365/**
2366 * Atomically increment a 32-bit value.
2367 *
2368 * @returns The new value.
2369 * @param pu32 Pointer to the value to increment.
2370 */
2371#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2372DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2373#else
2374DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2375{
2376 uint32_t u32;
2377# if RT_INLINE_ASM_USES_INTRIN
2378 u32 = _InterlockedIncrement((long *)pu32);
2379
2380# elif RT_INLINE_ASM_GNU_STYLE
2381 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2382 "incl %0\n\t"
2383 : "=r" (u32),
2384 "=m" (*pu32)
2385 : "0" (1)
2386 : "memory");
2387# else
2388 __asm
2389 {
2390 mov eax, 1
2391# ifdef __AMD64__
2392 mov rdx, [pu32]
2393 lock xadd [rdx], eax
2394# else
2395 mov edx, [pu32]
2396 lock xadd [edx], eax
2397# endif
2398 inc eax
2399 mov u32, eax
2400 }
2401# endif
2402 return u32;
2403}
2404#endif
2405
2406
2407/**
2408 * Atomically increment a signed 32-bit value.
2409 *
2410 * @returns The new value.
2411 * @param pi32 Pointer to the value to increment.
2412 */
2413DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2414{
2415 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2416}
2417
2418
2419/**
2420 * Atomically decrement an unsigned 32-bit value.
2421 *
2422 * @returns The new value.
2423 * @param pu32 Pointer to the value to decrement.
2424 */
2425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2426DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2427#else
2428DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2429{
2430 uint32_t u32;
2431# if RT_INLINE_ASM_USES_INTRIN
2432 u32 = _InterlockedDecrement((long *)pu32);
2433
2434# elif RT_INLINE_ASM_GNU_STYLE
2435 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2436 "decl %0\n\t"
2437 : "=r" (u32),
2438 "=m" (*pu32)
2439 : "0" (-1)
2440 : "memory");
2441# else
2442 __asm
2443 {
2444 mov eax, -1
2445# ifdef __AMD64__
2446 mov rdx, [pu32]
2447 lock xadd [rdx], eax
2448# else
2449 mov edx, [pu32]
2450 lock xadd [edx], eax
2451# endif
2452 dec eax
2453 mov u32, eax
2454 }
2455# endif
2456 return u32;
2457}
2458#endif
2459
2460
2461/**
2462 * Atomically decrement a signed 32-bit value.
2463 *
2464 * @returns The new value.
2465 * @param pi32 Pointer to the value to decrement.
2466 */
2467DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2468{
2469 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2470}
2471
2472
2473/**
2474 * Atomically Or an unsigned 32-bit value.
2475 *
2476 * @param pu32 Pointer to the pointer variable to OR u32 with.
2477 * @param u32 The value to OR *pu32 with.
2478 */
2479#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2480DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2481#else
2482DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2483{
2484# if RT_INLINE_ASM_USES_INTRIN
2485 _InterlockedOr((long volatile *)pu32, (long)u32);
2486
2487# elif RT_INLINE_ASM_GNU_STYLE
2488 __asm__ __volatile__("lock; orl %1, %0\n\t"
2489 : "=m" (*pu32)
2490 : "r" (u32));
2491# else
2492 __asm
2493 {
2494 mov eax, [u32]
2495# ifdef __AMD64__
2496 mov rdx, [pu32]
2497 lock or [rdx], eax
2498# else
2499 mov edx, [pu32]
2500 lock or [edx], eax
2501# endif
2502 }
2503# endif
2504}
2505#endif
2506
2507
2508/**
2509 * Atomically Or a signed 32-bit value.
2510 *
2511 * @param pi32 Pointer to the pointer variable to OR u32 with.
2512 * @param i32 The value to OR *pu32 with.
2513 */
2514DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2515{
2516 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2517}
2518
2519
2520/**
2521 * Atomically And an unsigned 32-bit value.
2522 *
2523 * @param pu32 Pointer to the pointer variable to AND u32 with.
2524 * @param u32 The value to AND *pu32 with.
2525 */
2526#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2527DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2528#else
2529DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2530{
2531# if RT_INLINE_ASM_USES_INTRIN
2532 _InterlockedAnd((long volatile *)pu32, u32);
2533
2534# elif RT_INLINE_ASM_GNU_STYLE
2535 __asm__ __volatile__("lock; andl %1, %0\n\t"
2536 : "=m" (*pu32)
2537 : "r" (u32));
2538# else
2539 __asm
2540 {
2541 mov eax, [u32]
2542# ifdef __AMD64__
2543 mov rdx, [pu32]
2544 lock and [rdx], eax
2545# else
2546 mov edx, [pu32]
2547 lock and [edx], eax
2548# endif
2549 }
2550# endif
2551}
2552#endif
2553
2554
2555/**
2556 * Atomically And a signed 32-bit value.
2557 *
2558 * @param pi32 Pointer to the pointer variable to AND i32 with.
2559 * @param i32 The value to AND *pi32 with.
2560 */
2561DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2562{
2563 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2564}
2565
2566
2567/**
2568 * Invalidate page.
2569 *
2570 * @param pv Address of the page to invalidate.
2571 */
2572#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2573DECLASM(void) ASMInvalidatePage(void *pv);
2574#else
2575DECLINLINE(void) ASMInvalidatePage(void *pv)
2576{
2577# if RT_INLINE_ASM_USES_INTRIN
2578 __invlpg(pv);
2579
2580# elif RT_INLINE_ASM_GNU_STYLE
2581 __asm__ __volatile__("invlpg %0\n\t"
2582 : : "m" (*(uint8_t *)pv));
2583# else
2584 __asm
2585 {
2586# ifdef __AMD64__
2587 mov rax, [pv]
2588 invlpg [rax]
2589# else
2590 mov eax, [pv]
2591 invlpg [eax]
2592# endif
2593 }
2594# endif
2595}
2596#endif
2597
2598
2599#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2600# if PAGE_SIZE != 0x1000
2601# error "PAGE_SIZE is not 0x1000!"
2602# endif
2603#endif
2604
2605/**
2606 * Zeros a 4K memory page.
2607 *
2608 * @param pv Pointer to the memory block. This must be page aligned.
2609 */
2610#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2611DECLASM(void) ASMMemZeroPage(volatile void *pv);
2612# else
2613DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2614{
2615# if RT_INLINE_ASM_USES_INTRIN
2616# ifdef __AMD64__
2617 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2618# else
2619 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2620# endif
2621
2622# elif RT_INLINE_ASM_GNU_STYLE
2623 RTUINTREG uDummy;
2624# ifdef __AMD64__
2625 __asm__ __volatile__ ("rep stosq"
2626 : "=D" (pv),
2627 "=c" (uDummy)
2628 : "0" (pv),
2629 "c" (0x1000 >> 3),
2630 "a" (0)
2631 : "memory");
2632# else
2633 __asm__ __volatile__ ("rep stosl"
2634 : "=D" (pv),
2635 "=c" (uDummy)
2636 : "0" (pv),
2637 "c" (0x1000 >> 2),
2638 "a" (0)
2639 : "memory");
2640# endif
2641# else
2642 __asm
2643 {
2644# ifdef __AMD64__
2645 xor rax, rax
2646 mov ecx, 0200h
2647 mov rdi, [pv]
2648 rep stosq
2649# else
2650 xor eax, eax
2651 mov ecx, 0400h
2652 mov edi, [pv]
2653 rep stosd
2654# endif
2655 }
2656# endif
2657}
2658# endif
2659
2660
2661/**
2662 * Zeros a memory block with a 32-bit aligned size.
2663 *
2664 * @param pv Pointer to the memory block.
2665 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2666 */
2667#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2668DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2669#else
2670DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2671{
2672# if RT_INLINE_ASM_USES_INTRIN
2673 __stosd((unsigned long *)pv, 0, cb >> 2);
2674
2675# elif RT_INLINE_ASM_GNU_STYLE
2676 __asm__ __volatile__ ("rep stosl"
2677 : "=D" (pv),
2678 "=c" (cb)
2679 : "0" (pv),
2680 "1" (cb >> 2),
2681 "a" (0)
2682 : "memory");
2683# else
2684 __asm
2685 {
2686 xor eax, eax
2687# ifdef __AMD64__
2688 mov rcx, [cb]
2689 shr rcx, 2
2690 mov rdi, [pv]
2691# else
2692 mov ecx, [cb]
2693 shr ecx, 2
2694 mov edi, [pv]
2695# endif
2696 rep stosd
2697 }
2698# endif
2699}
2700#endif
2701
2702
2703/**
2704 * Fills a memory block with a 32-bit aligned size.
2705 *
2706 * @param pv Pointer to the memory block.
2707 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2708 * @param u32 The value to fill with.
2709 */
2710#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2711DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2712#else
2713DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2714{
2715# if RT_INLINE_ASM_USES_INTRIN
2716 __stosd((unsigned long *)pv, 0, cb >> 2);
2717
2718# elif RT_INLINE_ASM_GNU_STYLE
2719 __asm__ __volatile__ ("rep stosl"
2720 : "=D" (pv),
2721 "=c" (cb)
2722 : "0" (pv),
2723 "1" (cb >> 2),
2724 "a" (u32)
2725 : "memory");
2726# else
2727 __asm
2728 {
2729# ifdef __AMD64__
2730 mov rcx, [cb]
2731 shr rcx, 2
2732 mov rdi, [pv]
2733# else
2734 mov ecx, [cb]
2735 shr ecx, 2
2736 mov edi, [pv]
2737# endif
2738 mov eax, [u32]
2739 rep stosd
2740 }
2741# endif
2742}
2743#endif
2744
2745
2746
2747/**
2748 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2749 *
2750 * @returns u32F1 * u32F2.
2751 */
2752#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2753DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2754#else
2755DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2756{
2757# ifdef __AMD64__
2758 return (uint64_t)u32F1 * u32F2;
2759# else /* !__AMD64__ */
2760 uint64_t u64;
2761# if RT_INLINE_ASM_GNU_STYLE
2762 __asm__ __volatile__("mull %%edx"
2763 : "=A" (u64)
2764 : "a" (u32F2), "d" (u32F1));
2765# else
2766 __asm
2767 {
2768 mov edx, [u32F1]
2769 mov eax, [u32F2]
2770 mul edx
2771 mov dword ptr [u64], eax
2772 mov dword ptr [u64 + 4], edx
2773 }
2774# endif
2775 return u64;
2776# endif /* !__AMD64__ */
2777}
2778#endif
2779
2780
2781/**
2782 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2783 *
2784 * @returns u32F1 * u32F2.
2785 */
2786#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2787DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2788#else
2789DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2790{
2791# ifdef __AMD64__
2792 return (int64_t)i32F1 * i32F2;
2793# else /* !__AMD64__ */
2794 int64_t i64;
2795# if RT_INLINE_ASM_GNU_STYLE
2796 __asm__ __volatile__("imull %%edx"
2797 : "=A" (i64)
2798 : "a" (i32F2), "d" (i32F1));
2799# else
2800 __asm
2801 {
2802 mov edx, [i32F1]
2803 mov eax, [i32F2]
2804 imul edx
2805 mov dword ptr [i64], eax
2806 mov dword ptr [i64 + 4], edx
2807 }
2808# endif
2809 return i64;
2810# endif /* !__AMD64__ */
2811}
2812#endif
2813
2814
2815/**
2816 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2817 *
2818 * @returns u64 / u32.
2819 */
2820#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2821DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2822#else
2823DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2824{
2825# ifdef __AMD64__
2826 return (uint32_t)(u64 / u32);
2827# else /* !__AMD64__ */
2828# if RT_INLINE_ASM_GNU_STYLE
2829 RTUINTREG uDummy;
2830 __asm__ __volatile__("divl %3"
2831 : "=a" (u32), "=d"(uDummy)
2832 : "A" (u64), "r" (u32));
2833# else
2834 __asm
2835 {
2836 mov eax, dword ptr [u64]
2837 mov edx, dword ptr [u64 + 4]
2838 mov ecx, [u32]
2839 div ecx
2840 mov [u32], eax
2841 }
2842# endif
2843 return u32;
2844# endif /* !__AMD64__ */
2845}
2846#endif
2847
2848
2849/**
2850 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2851 *
2852 * @returns u64 / u32.
2853 */
2854#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2855DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2856#else
2857DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2858{
2859# ifdef __AMD64__
2860 return (int32_t)(i64 / i32);
2861# else /* !__AMD64__ */
2862# if RT_INLINE_ASM_GNU_STYLE
2863 RTUINTREG iDummy;
2864 __asm__ __volatile__("idivl %3"
2865 : "=a" (i32), "=d"(iDummy)
2866 : "A" (i64), "r" (i32));
2867# else
2868 __asm
2869 {
2870 mov eax, dword ptr [i64]
2871 mov edx, dword ptr [i64 + 4]
2872 mov ecx, [i32]
2873 idiv ecx
2874 mov [i32], eax
2875 }
2876# endif
2877 return i32;
2878# endif /* !__AMD64__ */
2879}
2880#endif
2881
2882
2883/**
2884 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
2885 * using a 96 bit intermediate result.
2886 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
2887 * __udivdi3 and __umoddi3 even if this inline function is not used.
2888 *
2889 * @returns (u64A * u32B) / u32C.
2890 * @param u64A The 64-bit value.
2891 * @param u32B The 32-bit value to multiple by A.
2892 * @param u32C The 32-bit value to divide A*B by.
2893 */
2894#if RT_INLINE_ASM_EXTERNAL
2895DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
2896#else
2897DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
2898{
2899# if RT_INLINE_ASM_GNU_STYLE
2900# ifdef __AMD64__
2901 uint64_t u64Result, u64Spill;
2902 __asm__ __volatile__("mulq %2\n\t"
2903 "divq %3\n\t"
2904 : "=a" (u64Result),
2905 "=d" (u64Spill)
2906 : "r" ((uint64_t)u32B),
2907 "r" ((uint64_t)u32C),
2908 "0" (u64A),
2909 "1" (0));
2910 return u64Result;
2911# else
2912 uint32_t u32Dummy;
2913 uint64_t u64Result;
2914 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
2915 edx = u64Lo.hi = (u64A.lo * u32B).hi */
2916 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
2917 eax = u64A.hi */
2918 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
2919 edx = u32C */
2920 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
2921 edx = u32B */
2922 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
2923 edx = u64Hi.hi = (u64A.hi * u32B).hi */
2924 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
2925 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
2926 "divl %%ecx \n\t" /* eax = u64Hi / u32C
2927 edx = u64Hi % u32C */
2928 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
2929 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
2930 "divl %%ecx \n\t" /* u64Result.lo */
2931 "movl %%edi,%%edx \n\t" /* u64Result.hi */
2932 : "=A"(u64Result),
2933 "=S"(u32Dummy), "=D"(u32Dummy)
2934 : "a"((uint32_t)u64A),
2935 "S"((uint32_t)(u64A >> 32)),
2936 "c"(u32B),
2937 "D"(u32C));
2938 return u64Result;
2939# endif
2940# else
2941 RTUINT64U u;
2942 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
2943 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
2944 u64Hi += (u64Lo >> 32);
2945 u.s.Hi = (uint32_t)(u64Hi / u32C);
2946 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
2947 return u.u;
2948# endif
2949}
2950#endif
2951
2952
2953/**
2954 * Probes a byte pointer for read access.
2955 *
2956 * While the function will not fault if the byte is not read accessible,
2957 * the idea is to do this in a safe place like before acquiring locks
2958 * and such like.
2959 *
2960 * Also, this functions guarantees that an eager compiler is not going
2961 * to optimize the probing away.
2962 *
2963 * @param pvByte Pointer to the byte.
2964 */
2965#if RT_INLINE_ASM_EXTERNAL
2966DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2967#else
2968DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2969{
2970 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2971 uint8_t u8;
2972# if RT_INLINE_ASM_GNU_STYLE
2973 __asm__ __volatile__("movb (%1), %0\n\t"
2974 : "=r" (u8)
2975 : "r" (pvByte));
2976# else
2977 __asm
2978 {
2979# ifdef __AMD64__
2980 mov rax, [pvByte]
2981 mov al, [rax]
2982# else
2983 mov eax, [pvByte]
2984 mov al, [eax]
2985# endif
2986 mov [u8], al
2987 }
2988# endif
2989 return u8;
2990}
2991#endif
2992
2993/**
2994 * Probes a buffer for read access page by page.
2995 *
2996 * While the function will fault if the buffer is not fully read
2997 * accessible, the idea is to do this in a safe place like before
2998 * acquiring locks and such like.
2999 *
3000 * Also, this functions guarantees that an eager compiler is not going
3001 * to optimize the probing away.
3002 *
3003 * @param pvBuf Pointer to the buffer.
3004 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3005 */
3006DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3007{
3008 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3009 /* the first byte */
3010 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3011 ASMProbeReadByte(pu8);
3012
3013 /* the pages in between pages. */
3014 while (cbBuf > /*PAGE_SIZE*/0x1000)
3015 {
3016 ASMProbeReadByte(pu8);
3017 cbBuf -= /*PAGE_SIZE*/0x1000;
3018 pu8 += /*PAGE_SIZE*/0x1000;
3019 }
3020
3021 /* the last byte */
3022 ASMProbeReadByte(pu8 + cbBuf - 1);
3023}
3024
3025
3026/** @def ASMBreakpoint
3027 * Debugger Breakpoint.
3028 * @remark In the gnu world we add a nop instruction after the int3 to
3029 * force gdb to remain at the int3 source line.
3030 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3031 * @internal
3032 */
3033#if RT_INLINE_ASM_GNU_STYLE
3034# ifndef __L4ENV__
3035# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3036# else
3037# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3038# endif
3039#else
3040# define ASMBreakpoint() __debugbreak()
3041#endif
3042
3043
3044
3045/** @defgroup grp_inline_bits Bit Operations
3046 * @{
3047 */
3048
3049
3050/**
3051 * Sets a bit in a bitmap.
3052 *
3053 * @param pvBitmap Pointer to the bitmap.
3054 * @param iBit The bit to set.
3055 */
3056#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3057DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3058#else
3059DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3060{
3061# if RT_INLINE_ASM_USES_INTRIN
3062 _bittestandset((long *)pvBitmap, iBit);
3063
3064# elif RT_INLINE_ASM_GNU_STYLE
3065 __asm__ __volatile__ ("btsl %1, %0"
3066 : "=m" (*(volatile long *)pvBitmap)
3067 : "Ir" (iBit)
3068 : "memory");
3069# else
3070 __asm
3071 {
3072# ifdef __AMD64__
3073 mov rax, [pvBitmap]
3074 mov edx, [iBit]
3075 bts [rax], edx
3076# else
3077 mov eax, [pvBitmap]
3078 mov edx, [iBit]
3079 bts [eax], edx
3080# endif
3081 }
3082# endif
3083}
3084#endif
3085
3086
3087/**
3088 * Atomically sets a bit in a bitmap.
3089 *
3090 * @param pvBitmap Pointer to the bitmap.
3091 * @param iBit The bit to set.
3092 */
3093#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3094DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3095#else
3096DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3097{
3098# if RT_INLINE_ASM_USES_INTRIN
3099 _interlockedbittestandset((long *)pvBitmap, iBit);
3100# elif RT_INLINE_ASM_GNU_STYLE
3101 __asm__ __volatile__ ("lock; btsl %1, %0"
3102 : "=m" (*(volatile long *)pvBitmap)
3103 : "Ir" (iBit)
3104 : "memory");
3105# else
3106 __asm
3107 {
3108# ifdef __AMD64__
3109 mov rax, [pvBitmap]
3110 mov edx, [iBit]
3111 lock bts [rax], edx
3112# else
3113 mov eax, [pvBitmap]
3114 mov edx, [iBit]
3115 lock bts [eax], edx
3116# endif
3117 }
3118# endif
3119}
3120#endif
3121
3122
3123/**
3124 * Clears a bit in a bitmap.
3125 *
3126 * @param pvBitmap Pointer to the bitmap.
3127 * @param iBit The bit to clear.
3128 */
3129#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3130DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3131#else
3132DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3133{
3134# if RT_INLINE_ASM_USES_INTRIN
3135 _bittestandreset((long *)pvBitmap, iBit);
3136
3137# elif RT_INLINE_ASM_GNU_STYLE
3138 __asm__ __volatile__ ("btrl %1, %0"
3139 : "=m" (*(volatile long *)pvBitmap)
3140 : "Ir" (iBit)
3141 : "memory");
3142# else
3143 __asm
3144 {
3145# ifdef __AMD64__
3146 mov rax, [pvBitmap]
3147 mov edx, [iBit]
3148 btr [rax], edx
3149# else
3150 mov eax, [pvBitmap]
3151 mov edx, [iBit]
3152 btr [eax], edx
3153# endif
3154 }
3155# endif
3156}
3157#endif
3158
3159
3160/**
3161 * Atomically clears a bit in a bitmap.
3162 *
3163 * @param pvBitmap Pointer to the bitmap.
3164 * @param iBit The bit to toggle set.
3165 * @remark No memory barrier, take care on smp.
3166 */
3167#if RT_INLINE_ASM_EXTERNAL
3168DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3169#else
3170DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3171{
3172# if RT_INLINE_ASM_GNU_STYLE
3173 __asm__ __volatile__ ("lock; btrl %1, %0"
3174 : "=m" (*(volatile long *)pvBitmap)
3175 : "Ir" (iBit)
3176 : "memory");
3177# else
3178 __asm
3179 {
3180# ifdef __AMD64__
3181 mov rax, [pvBitmap]
3182 mov edx, [iBit]
3183 lock btr [rax], edx
3184# else
3185 mov eax, [pvBitmap]
3186 mov edx, [iBit]
3187 lock btr [eax], edx
3188# endif
3189 }
3190# endif
3191}
3192#endif
3193
3194
3195/**
3196 * Toggles a bit in a bitmap.
3197 *
3198 * @param pvBitmap Pointer to the bitmap.
3199 * @param iBit The bit to toggle.
3200 */
3201#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3202DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3203#else
3204DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3205{
3206# if RT_INLINE_ASM_USES_INTRIN
3207 _bittestandcomplement((long *)pvBitmap, iBit);
3208# elif RT_INLINE_ASM_GNU_STYLE
3209 __asm__ __volatile__ ("btcl %1, %0"
3210 : "=m" (*(volatile long *)pvBitmap)
3211 : "Ir" (iBit)
3212 : "memory");
3213# else
3214 __asm
3215 {
3216# ifdef __AMD64__
3217 mov rax, [pvBitmap]
3218 mov edx, [iBit]
3219 btc [rax], edx
3220# else
3221 mov eax, [pvBitmap]
3222 mov edx, [iBit]
3223 btc [eax], edx
3224# endif
3225 }
3226# endif
3227}
3228#endif
3229
3230
3231/**
3232 * Atomically toggles a bit in a bitmap.
3233 *
3234 * @param pvBitmap Pointer to the bitmap.
3235 * @param iBit The bit to test and set.
3236 */
3237#if RT_INLINE_ASM_EXTERNAL
3238DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3239#else
3240DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3241{
3242# if RT_INLINE_ASM_GNU_STYLE
3243 __asm__ __volatile__ ("lock; btcl %1, %0"
3244 : "=m" (*(volatile long *)pvBitmap)
3245 : "Ir" (iBit)
3246 : "memory");
3247# else
3248 __asm
3249 {
3250# ifdef __AMD64__
3251 mov rax, [pvBitmap]
3252 mov edx, [iBit]
3253 lock btc [rax], edx
3254# else
3255 mov eax, [pvBitmap]
3256 mov edx, [iBit]
3257 lock btc [eax], edx
3258# endif
3259 }
3260# endif
3261}
3262#endif
3263
3264
3265/**
3266 * Tests and sets a bit in a bitmap.
3267 *
3268 * @returns true if the bit was set.
3269 * @returns false if the bit was clear.
3270 * @param pvBitmap Pointer to the bitmap.
3271 * @param iBit The bit to test and set.
3272 */
3273#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3274DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3275#else
3276DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3277{
3278 union { bool f; uint32_t u32; uint8_t u8; } rc;
3279# if RT_INLINE_ASM_USES_INTRIN
3280 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3281
3282# elif RT_INLINE_ASM_GNU_STYLE
3283 __asm__ __volatile__ ("btsl %2, %1\n\t"
3284 "setc %b0\n\t"
3285 "andl $1, %0\n\t"
3286 : "=q" (rc.u32),
3287 "=m" (*(volatile long *)pvBitmap)
3288 : "Ir" (iBit)
3289 : "memory");
3290# else
3291 __asm
3292 {
3293 mov edx, [iBit]
3294# ifdef __AMD64__
3295 mov rax, [pvBitmap]
3296 bts [rax], edx
3297# else
3298 mov eax, [pvBitmap]
3299 bts [eax], edx
3300# endif
3301 setc al
3302 and eax, 1
3303 mov [rc.u32], eax
3304 }
3305# endif
3306 return rc.f;
3307}
3308#endif
3309
3310
3311/**
3312 * Atomically tests and sets a bit in a bitmap.
3313 *
3314 * @returns true if the bit was set.
3315 * @returns false if the bit was clear.
3316 * @param pvBitmap Pointer to the bitmap.
3317 * @param iBit The bit to set.
3318 */
3319#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3320DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3321#else
3322DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3323{
3324 union { bool f; uint32_t u32; uint8_t u8; } rc;
3325# if RT_INLINE_ASM_USES_INTRIN
3326 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3327# elif RT_INLINE_ASM_GNU_STYLE
3328 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3329 "setc %b0\n\t"
3330 "andl $1, %0\n\t"
3331 : "=q" (rc.u32),
3332 "=m" (*(volatile long *)pvBitmap)
3333 : "Ir" (iBit)
3334 : "memory");
3335# else
3336 __asm
3337 {
3338 mov edx, [iBit]
3339# ifdef __AMD64__
3340 mov rax, [pvBitmap]
3341 lock bts [rax], edx
3342# else
3343 mov eax, [pvBitmap]
3344 lock bts [eax], edx
3345# endif
3346 setc al
3347 and eax, 1
3348 mov [rc.u32], eax
3349 }
3350# endif
3351 return rc.f;
3352}
3353#endif
3354
3355
3356/**
3357 * Tests and clears a bit in a bitmap.
3358 *
3359 * @returns true if the bit was set.
3360 * @returns false if the bit was clear.
3361 * @param pvBitmap Pointer to the bitmap.
3362 * @param iBit The bit to test and clear.
3363 */
3364#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3365DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3366#else
3367DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3368{
3369 union { bool f; uint32_t u32; uint8_t u8; } rc;
3370# if RT_INLINE_ASM_USES_INTRIN
3371 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3372
3373# elif RT_INLINE_ASM_GNU_STYLE
3374 __asm__ __volatile__ ("btrl %2, %1\n\t"
3375 "setc %b0\n\t"
3376 "andl $1, %0\n\t"
3377 : "=q" (rc.u32),
3378 "=m" (*(volatile long *)pvBitmap)
3379 : "Ir" (iBit)
3380 : "memory");
3381# else
3382 __asm
3383 {
3384 mov edx, [iBit]
3385# ifdef __AMD64__
3386 mov rax, [pvBitmap]
3387 btr [rax], edx
3388# else
3389 mov eax, [pvBitmap]
3390 btr [eax], edx
3391# endif
3392 setc al
3393 and eax, 1
3394 mov [rc.u32], eax
3395 }
3396# endif
3397 return rc.f;
3398}
3399#endif
3400
3401
3402/**
3403 * Atomically tests and clears a bit in a bitmap.
3404 *
3405 * @returns true if the bit was set.
3406 * @returns false if the bit was clear.
3407 * @param pvBitmap Pointer to the bitmap.
3408 * @param iBit The bit to test and clear.
3409 * @remark No memory barrier, take care on smp.
3410 */
3411#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3412DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3413#else
3414DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3415{
3416 union { bool f; uint32_t u32; uint8_t u8; } rc;
3417# if RT_INLINE_ASM_USES_INTRIN
3418 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3419
3420# elif RT_INLINE_ASM_GNU_STYLE
3421 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3422 "setc %b0\n\t"
3423 "andl $1, %0\n\t"
3424 : "=q" (rc.u32),
3425 "=m" (*(volatile long *)pvBitmap)
3426 : "Ir" (iBit)
3427 : "memory");
3428# else
3429 __asm
3430 {
3431 mov edx, [iBit]
3432# ifdef __AMD64__
3433 mov rax, [pvBitmap]
3434 lock btr [rax], edx
3435# else
3436 mov eax, [pvBitmap]
3437 lock btr [eax], edx
3438# endif
3439 setc al
3440 and eax, 1
3441 mov [rc.u32], eax
3442 }
3443# endif
3444 return rc.f;
3445}
3446#endif
3447
3448
3449/**
3450 * Tests and toggles a bit in a bitmap.
3451 *
3452 * @returns true if the bit was set.
3453 * @returns false if the bit was clear.
3454 * @param pvBitmap Pointer to the bitmap.
3455 * @param iBit The bit to test and toggle.
3456 */
3457#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3458DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3459#else
3460DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3461{
3462 union { bool f; uint32_t u32; uint8_t u8; } rc;
3463# if RT_INLINE_ASM_USES_INTRIN
3464 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3465
3466# elif RT_INLINE_ASM_GNU_STYLE
3467 __asm__ __volatile__ ("btcl %2, %1\n\t"
3468 "setc %b0\n\t"
3469 "andl $1, %0\n\t"
3470 : "=q" (rc.u32),
3471 "=m" (*(volatile long *)pvBitmap)
3472 : "Ir" (iBit)
3473 : "memory");
3474# else
3475 __asm
3476 {
3477 mov edx, [iBit]
3478# ifdef __AMD64__
3479 mov rax, [pvBitmap]
3480 btc [rax], edx
3481# else
3482 mov eax, [pvBitmap]
3483 btc [eax], edx
3484# endif
3485 setc al
3486 and eax, 1
3487 mov [rc.u32], eax
3488 }
3489# endif
3490 return rc.f;
3491}
3492#endif
3493
3494
3495/**
3496 * Atomically tests and toggles a bit in a bitmap.
3497 *
3498 * @returns true if the bit was set.
3499 * @returns false if the bit was clear.
3500 * @param pvBitmap Pointer to the bitmap.
3501 * @param iBit The bit to test and toggle.
3502 */
3503#if RT_INLINE_ASM_EXTERNAL
3504DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3505#else
3506DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3507{
3508 union { bool f; uint32_t u32; uint8_t u8; } rc;
3509# if RT_INLINE_ASM_GNU_STYLE
3510 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3511 "setc %b0\n\t"
3512 "andl $1, %0\n\t"
3513 : "=q" (rc.u32),
3514 "=m" (*(volatile long *)pvBitmap)
3515 : "Ir" (iBit)
3516 : "memory");
3517# else
3518 __asm
3519 {
3520 mov edx, [iBit]
3521# ifdef __AMD64__
3522 mov rax, [pvBitmap]
3523 lock btc [rax], edx
3524# else
3525 mov eax, [pvBitmap]
3526 lock btc [eax], edx
3527# endif
3528 setc al
3529 and eax, 1
3530 mov [rc.u32], eax
3531 }
3532# endif
3533 return rc.f;
3534}
3535#endif
3536
3537
3538/**
3539 * Tests if a bit in a bitmap is set.
3540 *
3541 * @returns true if the bit is set.
3542 * @returns false if the bit is clear.
3543 * @param pvBitmap Pointer to the bitmap.
3544 * @param iBit The bit to test.
3545 */
3546#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3547DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3548#else
3549DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3550{
3551 union { bool f; uint32_t u32; uint8_t u8; } rc;
3552# if RT_INLINE_ASM_USES_INTRIN
3553 rc.u32 = _bittest((long *)pvBitmap, iBit);
3554# elif RT_INLINE_ASM_GNU_STYLE
3555
3556 __asm__ __volatile__ ("btl %2, %1\n\t"
3557 "setc %b0\n\t"
3558 "andl $1, %0\n\t"
3559 : "=q" (rc.u32),
3560 "=m" (*(volatile long *)pvBitmap)
3561 : "Ir" (iBit)
3562 : "memory");
3563# else
3564 __asm
3565 {
3566 mov edx, [iBit]
3567# ifdef __AMD64__
3568 mov rax, [pvBitmap]
3569 bt [rax], edx
3570# else
3571 mov eax, [pvBitmap]
3572 bt [eax], edx
3573# endif
3574 setc al
3575 and eax, 1
3576 mov [rc.u32], eax
3577 }
3578# endif
3579 return rc.f;
3580}
3581#endif
3582
3583
3584/**
3585 * Clears a bit range within a bitmap.
3586 *
3587 * @param pvBitmap Pointer to the bitmap.
3588 * @param iBitStart The First bit to clear.
3589 * @param iBitEnd The first bit not to clear.
3590 */
3591DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3592{
3593 if (iBitStart < iBitEnd)
3594 {
3595 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3596 int iStart = iBitStart & ~31;
3597 int iEnd = iBitEnd & ~31;
3598 if (iStart == iEnd)
3599 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3600 else
3601 {
3602 /* bits in first dword. */
3603 if (iBitStart & 31)
3604 {
3605 *pu32 &= (1 << (iBitStart & 31)) - 1;
3606 pu32++;
3607 iBitStart = iStart + 32;
3608 }
3609
3610 /* whole dword. */
3611 if (iBitStart != iEnd)
3612 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3613
3614 /* bits in last dword. */
3615 if (iBitEnd & 31)
3616 {
3617 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3618 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3619 }
3620 }
3621 }
3622}
3623
3624
3625/**
3626 * Finds the first clear bit in a bitmap.
3627 *
3628 * @returns Index of the first zero bit.
3629 * @returns -1 if no clear bit was found.
3630 * @param pvBitmap Pointer to the bitmap.
3631 * @param cBits The number of bits in the bitmap. Multiple of 32.
3632 */
3633#if RT_INLINE_ASM_EXTERNAL
3634DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3635#else
3636DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3637{
3638 if (cBits)
3639 {
3640 int32_t iBit;
3641# if RT_INLINE_ASM_GNU_STYLE
3642 RTCCUINTREG uEAX, uECX, uEDI;
3643 cBits = RT_ALIGN_32(cBits, 32);
3644 __asm__ __volatile__("repe; scasl\n\t"
3645 "je 1f\n\t"
3646# ifdef __AMD64__
3647 "lea -4(%%rdi), %%rdi\n\t"
3648 "xorl (%%rdi), %%eax\n\t"
3649 "subq %5, %%rdi\n\t"
3650# else
3651 "lea -4(%%edi), %%edi\n\t"
3652 "xorl (%%edi), %%eax\n\t"
3653 "subl %5, %%edi\n\t"
3654# endif
3655 "shll $3, %%edi\n\t"
3656 "bsfl %%eax, %%edx\n\t"
3657 "addl %%edi, %%edx\n\t"
3658 "1:\t\n"
3659 : "=d" (iBit),
3660 "=&c" (uECX),
3661 "=&D" (uEDI),
3662 "=&a" (uEAX)
3663 : "0" (0xffffffff),
3664 "mr" (pvBitmap),
3665 "1" (cBits >> 5),
3666 "2" (pvBitmap),
3667 "3" (0xffffffff));
3668# else
3669 cBits = RT_ALIGN_32(cBits, 32);
3670 __asm
3671 {
3672# ifdef __AMD64__
3673 mov rdi, [pvBitmap]
3674 mov rbx, rdi
3675# else
3676 mov edi, [pvBitmap]
3677 mov ebx, edi
3678# endif
3679 mov edx, 0ffffffffh
3680 mov eax, edx
3681 mov ecx, [cBits]
3682 shr ecx, 5
3683 repe scasd
3684 je done
3685
3686# ifdef __AMD64__
3687 lea rdi, [rdi - 4]
3688 xor eax, [rdi]
3689 sub rdi, rbx
3690# else
3691 lea edi, [edi - 4]
3692 xor eax, [edi]
3693 sub edi, ebx
3694# endif
3695 shl edi, 3
3696 bsf edx, eax
3697 add edx, edi
3698 done:
3699 mov [iBit], edx
3700 }
3701# endif
3702 return iBit;
3703 }
3704 return -1;
3705}
3706#endif
3707
3708
3709/**
3710 * Finds the next clear bit in a bitmap.
3711 *
3712 * @returns Index of the first zero bit.
3713 * @returns -1 if no clear bit was found.
3714 * @param pvBitmap Pointer to the bitmap.
3715 * @param cBits The number of bits in the bitmap. Multiple of 32.
3716 * @param iBitPrev The bit returned from the last search.
3717 * The search will start at iBitPrev + 1.
3718 */
3719#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3720DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3721#else
3722DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3723{
3724 int iBit = ++iBitPrev & 31;
3725 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3726 cBits -= iBitPrev & ~31;
3727 if (iBit)
3728 {
3729 /* inspect the first dword. */
3730 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3731# if RT_INLINE_ASM_USES_INTRIN
3732 unsigned long ulBit = 0;
3733 if (_BitScanForward(&ulBit, u32))
3734 return ulBit + iBitPrev;
3735 iBit = -1;
3736# else
3737# if RT_INLINE_ASM_GNU_STYLE
3738 __asm__ __volatile__("bsf %1, %0\n\t"
3739 "jnz 1f\n\t"
3740 "movl $-1, %0\n\t"
3741 "1:\n\t"
3742 : "=r" (iBit)
3743 : "r" (u32));
3744# else
3745 __asm
3746 {
3747 mov edx, [u32]
3748 bsf eax, edx
3749 jnz done
3750 mov eax, 0ffffffffh
3751 done:
3752 mov [iBit], eax
3753 }
3754# endif
3755 if (iBit >= 0)
3756 return iBit + iBitPrev;
3757# endif
3758 /* Search the rest of the bitmap, if there is anything. */
3759 if (cBits > 32)
3760 {
3761 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3762 if (iBit >= 0)
3763 return iBit + (iBitPrev & ~31) + 32;
3764 }
3765 }
3766 else
3767 {
3768 /* Search the rest of the bitmap. */
3769 iBit = ASMBitFirstClear(pvBitmap, cBits);
3770 if (iBit >= 0)
3771 return iBit + (iBitPrev & ~31);
3772 }
3773 return iBit;
3774}
3775#endif
3776
3777
3778/**
3779 * Finds the first set bit in a bitmap.
3780 *
3781 * @returns Index of the first set bit.
3782 * @returns -1 if no clear bit was found.
3783 * @param pvBitmap Pointer to the bitmap.
3784 * @param cBits The number of bits in the bitmap. Multiple of 32.
3785 */
3786#if RT_INLINE_ASM_EXTERNAL
3787DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3788#else
3789DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3790{
3791 if (cBits)
3792 {
3793 int32_t iBit;
3794# if RT_INLINE_ASM_GNU_STYLE
3795 RTCCUINTREG uEAX, uECX, uEDI;
3796 cBits = RT_ALIGN_32(cBits, 32);
3797 __asm__ __volatile__("repe; scasl\n\t"
3798 "je 1f\n\t"
3799# ifdef __AMD64__
3800 "lea -4(%%rdi), %%rdi\n\t"
3801 "movl (%%rdi), %%eax\n\t"
3802 "subq %5, %%rdi\n\t"
3803# else
3804 "lea -4(%%edi), %%edi\n\t"
3805 "movl (%%edi), %%eax\n\t"
3806 "subl %5, %%edi\n\t"
3807# endif
3808 "shll $3, %%edi\n\t"
3809 "bsfl %%eax, %%edx\n\t"
3810 "addl %%edi, %%edx\n\t"
3811 "1:\t\n"
3812 : "=d" (iBit),
3813 "=&c" (uECX),
3814 "=&D" (uEDI),
3815 "=&a" (uEAX)
3816 : "0" (0xffffffff),
3817 "mr" (pvBitmap),
3818 "1" (cBits >> 5),
3819 "2" (pvBitmap),
3820 "3" (0));
3821# else
3822 cBits = RT_ALIGN_32(cBits, 32);
3823 __asm
3824 {
3825# ifdef __AMD64__
3826 mov rdi, [pvBitmap]
3827 mov rbx, rdi
3828# else
3829 mov edi, [pvBitmap]
3830 mov ebx, edi
3831# endif
3832 mov edx, 0ffffffffh
3833 xor eax, eax
3834 mov ecx, [cBits]
3835 shr ecx, 5
3836 repe scasd
3837 je done
3838# ifdef __AMD64__
3839 lea rdi, [rdi - 4]
3840 mov eax, [rdi]
3841 sub rdi, rbx
3842# else
3843 lea edi, [edi - 4]
3844 mov eax, [edi]
3845 sub edi, ebx
3846# endif
3847 shl edi, 3
3848 bsf edx, eax
3849 add edx, edi
3850 done:
3851 mov [iBit], edx
3852 }
3853# endif
3854 return iBit;
3855 }
3856 return -1;
3857}
3858#endif
3859
3860
3861/**
3862 * Finds the next set bit in a bitmap.
3863 *
3864 * @returns Index of the next set bit.
3865 * @returns -1 if no set bit was found.
3866 * @param pvBitmap Pointer to the bitmap.
3867 * @param cBits The number of bits in the bitmap. Multiple of 32.
3868 * @param iBitPrev The bit returned from the last search.
3869 * The search will start at iBitPrev + 1.
3870 */
3871#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3872DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3873#else
3874DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3875{
3876 int iBit = ++iBitPrev & 31;
3877 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3878 cBits -= iBitPrev & ~31;
3879 if (iBit)
3880 {
3881 /* inspect the first dword. */
3882 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3883# if RT_INLINE_ASM_USES_INTRIN
3884 unsigned long ulBit = 0;
3885 if (_BitScanForward(&ulBit, u32))
3886 return ulBit + iBitPrev;
3887 iBit = -1;
3888# else
3889# if RT_INLINE_ASM_GNU_STYLE
3890 __asm__ __volatile__("bsf %1, %0\n\t"
3891 "jnz 1f\n\t"
3892 "movl $-1, %0\n\t"
3893 "1:\n\t"
3894 : "=r" (iBit)
3895 : "r" (u32));
3896# else
3897 __asm
3898 {
3899 mov edx, u32
3900 bsf eax, edx
3901 jnz done
3902 mov eax, 0ffffffffh
3903 done:
3904 mov [iBit], eax
3905 }
3906# endif
3907 if (iBit >= 0)
3908 return iBit + iBitPrev;
3909# endif
3910 /* Search the rest of the bitmap, if there is anything. */
3911 if (cBits > 32)
3912 {
3913 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3914 if (iBit >= 0)
3915 return iBit + (iBitPrev & ~31) + 32;
3916 }
3917
3918 }
3919 else
3920 {
3921 /* Search the rest of the bitmap. */
3922 iBit = ASMBitFirstSet(pvBitmap, cBits);
3923 if (iBit >= 0)
3924 return iBit + (iBitPrev & ~31);
3925 }
3926 return iBit;
3927}
3928#endif
3929
3930
3931/**
3932 * Finds the first bit which is set in the given 32-bit integer.
3933 * Bits are numbered from 1 (least significant) to 32.
3934 *
3935 * @returns index [1..32] of the first set bit.
3936 * @returns 0 if all bits are cleared.
3937 * @param u32 Integer to search for set bits.
3938 * @remark Similar to ffs() in BSD.
3939 */
3940DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3941{
3942# if RT_INLINE_ASM_USES_INTRIN
3943 unsigned long iBit;
3944 if (_BitScanForward(&iBit, u32))
3945 iBit++;
3946 else
3947 iBit = 0;
3948# elif RT_INLINE_ASM_GNU_STYLE
3949 uint32_t iBit;
3950 __asm__ __volatile__("bsf %1, %0\n\t"
3951 "jnz 1f\n\t"
3952 "xorl %0, %0\n\t"
3953 "jmp 2f\n"
3954 "1:\n\t"
3955 "incl %0\n"
3956 "2:\n\t"
3957 : "=r" (iBit)
3958 : "rm" (u32));
3959# else
3960 uint32_t iBit;
3961 _asm
3962 {
3963 bsf eax, [u32]
3964 jnz found
3965 xor eax, eax
3966 jmp done
3967 found:
3968 inc eax
3969 done:
3970 mov [iBit], eax
3971 }
3972# endif
3973 return iBit;
3974}
3975
3976
3977/**
3978 * Finds the first bit which is set in the given 32-bit integer.
3979 * Bits are numbered from 1 (least significant) to 32.
3980 *
3981 * @returns index [1..32] of the first set bit.
3982 * @returns 0 if all bits are cleared.
3983 * @param i32 Integer to search for set bits.
3984 * @remark Similar to ffs() in BSD.
3985 */
3986DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
3987{
3988 return ASMBitFirstSetU32((uint32_t)i32);
3989}
3990
3991
3992/**
3993 * Finds the last bit which is set in the given 32-bit integer.
3994 * Bits are numbered from 1 (least significant) to 32.
3995 *
3996 * @returns index [1..32] of the last set bit.
3997 * @returns 0 if all bits are cleared.
3998 * @param u32 Integer to search for set bits.
3999 * @remark Similar to fls() in BSD.
4000 */
4001DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4002{
4003# if RT_INLINE_ASM_USES_INTRIN
4004 unsigned long iBit;
4005 if (_BitScanReverse(&iBit, u32))
4006 iBit++;
4007 else
4008 iBit = 0;
4009# elif RT_INLINE_ASM_GNU_STYLE
4010 uint32_t iBit;
4011 __asm__ __volatile__("bsrl %1, %0\n\t"
4012 "jnz 1f\n\t"
4013 "xorl %0, %0\n\t"
4014 "jmp 2f\n"
4015 "1:\n\t"
4016 "incl %0\n"
4017 "2:\n\t"
4018 : "=r" (iBit)
4019 : "rm" (u32));
4020# else
4021 uint32_t iBit;
4022 _asm
4023 {
4024 bsr eax, [u32]
4025 jnz found
4026 xor eax, eax
4027 jmp done
4028 found:
4029 inc eax
4030 done:
4031 mov [iBit], eax
4032 }
4033# endif
4034 return iBit;
4035}
4036
4037
4038/**
4039 * Finds the last bit which is set in the given 32-bit integer.
4040 * Bits are numbered from 1 (least significant) to 32.
4041 *
4042 * @returns index [1..32] of the last set bit.
4043 * @returns 0 if all bits are cleared.
4044 * @param i32 Integer to search for set bits.
4045 * @remark Similar to fls() in BSD.
4046 */
4047DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4048{
4049 return ASMBitLastSetS32((uint32_t)i32);
4050}
4051
4052
4053/**
4054 * Reverse the byte order of the given 32-bit integer.
4055 * @param u32 Integer
4056 */
4057DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4058{
4059#if RT_INLINE_ASM_USES_INTRIN
4060 u32 = _byteswap_ulong(u32);
4061#elif RT_INLINE_ASM_GNU_STYLE
4062 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4063#else
4064 _asm
4065 {
4066 mov eax, [u32]
4067 bswap eax
4068 mov [u32], eax
4069 }
4070#endif
4071 return u32;
4072}
4073
4074/** @} */
4075
4076
4077/** @} */
4078#endif
4079
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette