VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 2069

Last change on this file since 2069 was 1905, checked in by vboxsync, 18 years ago

renamed variables to make Knut happy

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 98.3 KB
Line 
1/** @file
2 * InnoTek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef __iprt_asm_h__
22#define __iprt_asm_h__
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26/** @todo #include <iprt/param.h> for PAGE_SIZE. */
27/** @def RT_INLINE_ASM_USES_INTRIN
28 * Defined as 1 if we're using a _MSC_VER 1400.
29 * Otherwise defined as 0.
30 */
31
32#ifdef _MSC_VER
33# if _MSC_VER >= 1400
34# define RT_INLINE_ASM_USES_INTRIN 1
35# include <intrin.h>
36 /* Emit the intrinsics at all optimization levels. */
37# pragma intrinsic(__cpuid)
38# pragma intrinsic(_enable)
39# pragma intrinsic(_disable)
40# pragma intrinsic(__rdtsc)
41# pragma intrinsic(__readmsr)
42# pragma intrinsic(__writemsr)
43# pragma intrinsic(__outbyte)
44# pragma intrinsic(__outword)
45# pragma intrinsic(__outdword)
46# pragma intrinsic(__inbyte)
47# pragma intrinsic(__inword)
48# pragma intrinsic(__indword)
49# pragma intrinsic(__invlpg)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(__readcr0)
54# pragma intrinsic(__readcr2)
55# pragma intrinsic(__readcr3)
56# pragma intrinsic(__readcr4)
57# pragma intrinsic(__writecr0)
58# pragma intrinsic(__writecr3)
59# pragma intrinsic(__writecr4)
60# pragma intrinsic(_BitScanForward)
61# pragma intrinsic(_BitScanReverse)
62# pragma intrinsic(_bittest)
63# pragma intrinsic(_bittestandset)
64# pragma intrinsic(_bittestandreset)
65# pragma intrinsic(_bittestandcomplement)
66# pragma intrinsic(_byteswap_ushort)
67# pragma intrinsic(_byteswap_ulong)
68# pragma intrinsic(_interlockedbittestandset)
69# pragma intrinsic(_interlockedbittestandreset)
70# pragma intrinsic(_InterlockedAnd)
71# pragma intrinsic(_InterlockedOr)
72# pragma intrinsic(_InterlockedIncrement)
73# pragma intrinsic(_InterlockedDecrement)
74# pragma intrinsic(_InterlockedExchange)
75# pragma intrinsic(_InterlockedCompareExchange)
76# pragma intrinsic(_InterlockedCompareExchange64)
77# ifdef __AMD64__
78# pragma intrinsic(__stosq)
79# pragma intrinsic(__readcr8)
80# pragma intrinsic(__writecr8)
81# pragma intrinsic(_byteswap_uint64)
82# pragma intrinsic(_InterlockedExchange64)
83# endif
84# endif
85#endif
86#ifndef RT_INLINE_ASM_USES_INTRIN
87# define RT_INLINE_ASM_USES_INTRIN 0
88#endif
89
90
91
92/** @defgroup grp_asm ASM - Assembly Routines
93 * @ingroup grp_rt
94 * @{
95 */
96
97/** @def RT_INLINE_ASM_EXTERNAL
98 * Defined as 1 if the compiler does not support inline assembly.
99 * The ASM* functions will then be implemented in an external .asm file.
100 *
101 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
102 * inline assmebly in their AMD64 compiler.
103 */
104#if defined(_MSC_VER) && defined(__AMD64__)
105# define RT_INLINE_ASM_EXTERNAL 1
106#else
107# define RT_INLINE_ASM_EXTERNAL 0
108#endif
109
110/** @def RT_INLINE_ASM_GNU_STYLE
111 * Defined as 1 if the compiler understand GNU style inline assembly.
112 */
113#if defined(_MSC_VER)
114# define RT_INLINE_ASM_GNU_STYLE 0
115#else
116# define RT_INLINE_ASM_GNU_STYLE 1
117#endif
118
119
120/** @todo find a more proper place for this structure? */
121#pragma pack(1)
122/** IDTR */
123typedef struct RTIDTR
124{
125 /** Size of the IDT. */
126 uint16_t cbIdt;
127 /** Address of the IDT. */
128 uintptr_t pIdt;
129} RTIDTR, *PRTIDTR;
130#pragma pack()
131
132#pragma pack(1)
133/** GDTR */
134typedef struct RTGDTR
135{
136 /** Size of the GDT. */
137 uint16_t cbGdt;
138 /** Address of the GDT. */
139 uintptr_t pGdt;
140} RTGDTR, *PRTGDTR;
141#pragma pack()
142
143
144/** @def ASMReturnAddress
145 * Gets the return address of the current (or calling if you like) function or method.
146 */
147#ifdef _MSC_VER
148# ifdef __cplusplus
149extern "C"
150# endif
151void * _ReturnAddress(void);
152# pragma intrinsic(_ReturnAddress)
153# define ASMReturnAddress() _ReturnAddress()
154#elif defined(__GNUC__) || defined(__DOXYGEN__)
155# define ASMReturnAddress() __builtin_return_address(0)
156#else
157# error "Unsupported compiler."
158#endif
159
160
161/**
162 * Gets the content of the IDTR CPU register.
163 * @param pIdtr Where to store the IDTR contents.
164 */
165#if RT_INLINE_ASM_EXTERNAL
166DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
167#else
168DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
169{
170# if RT_INLINE_ASM_GNU_STYLE
171 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
172# else
173 __asm
174 {
175# ifdef __AMD64__
176 mov rax, [pIdtr]
177 sidt [rax]
178# else
179 mov eax, [pIdtr]
180 sidt [eax]
181# endif
182 }
183# endif
184}
185#endif
186
187
188/**
189 * Sets the content of the IDTR CPU register.
190 * @param pIdtr Where to load the IDTR contents from
191 */
192#if RT_INLINE_ASM_EXTERNAL
193DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
194#else
195DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
196{
197# if RT_INLINE_ASM_GNU_STYLE
198 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
199# else
200 __asm
201 {
202# ifdef __AMD64__
203 mov rax, [pIdtr]
204 lidt [rax]
205# else
206 mov eax, [pIdtr]
207 lidt [eax]
208# endif
209 }
210# endif
211}
212#endif
213
214
215/**
216 * Gets the content of the GDTR CPU register.
217 * @param pGdtr Where to store the GDTR contents.
218 */
219#if RT_INLINE_ASM_EXTERNAL
220DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
221#else
222DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
223{
224# if RT_INLINE_ASM_GNU_STYLE
225 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
226# else
227 __asm
228 {
229# ifdef __AMD64__
230 mov rax, [pGdtr]
231 sgdt [rax]
232# else
233 mov eax, [pGdtr]
234 sgdt [eax]
235# endif
236 }
237# endif
238}
239#endif
240
241/**
242 * Get the cs register.
243 * @returns cs.
244 */
245#if RT_INLINE_ASM_EXTERNAL
246DECLASM(RTSEL) ASMGetCS(void);
247#else
248DECLINLINE(RTSEL) ASMGetCS(void)
249{
250 RTSEL SelCS;
251# if RT_INLINE_ASM_GNU_STYLE
252 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
253# else
254 __asm
255 {
256 mov ax, cs
257 mov [SelCS], ax
258 }
259# endif
260 return SelCS;
261}
262#endif
263
264
265/**
266 * Get the DS register.
267 * @returns DS.
268 */
269#if RT_INLINE_ASM_EXTERNAL
270DECLASM(RTSEL) ASMGetDS(void);
271#else
272DECLINLINE(RTSEL) ASMGetDS(void)
273{
274 RTSEL SelDS;
275# if RT_INLINE_ASM_GNU_STYLE
276 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
277# else
278 __asm
279 {
280 mov ax, ds
281 mov [SelDS], ax
282 }
283# endif
284 return SelDS;
285}
286#endif
287
288
289/**
290 * Get the ES register.
291 * @returns ES.
292 */
293#if RT_INLINE_ASM_EXTERNAL
294DECLASM(RTSEL) ASMGetES(void);
295#else
296DECLINLINE(RTSEL) ASMGetES(void)
297{
298 RTSEL SelES;
299# if RT_INLINE_ASM_GNU_STYLE
300 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
301# else
302 __asm
303 {
304 mov ax, es
305 mov [SelES], ax
306 }
307# endif
308 return SelES;
309}
310#endif
311
312
313/**
314 * Get the FS register.
315 * @returns FS.
316 */
317#if RT_INLINE_ASM_EXTERNAL
318DECLASM(RTSEL) ASMGetFS(void);
319#else
320DECLINLINE(RTSEL) ASMGetFS(void)
321{
322 RTSEL SelFS;
323# if RT_INLINE_ASM_GNU_STYLE
324 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
325# else
326 __asm
327 {
328 mov ax, fs
329 mov [SelFS], ax
330 }
331# endif
332 return SelFS;
333}
334# endif
335
336
337/**
338 * Get the GS register.
339 * @returns GS.
340 */
341#if RT_INLINE_ASM_EXTERNAL
342DECLASM(RTSEL) ASMGetGS(void);
343#else
344DECLINLINE(RTSEL) ASMGetGS(void)
345{
346 RTSEL SelGS;
347# if RT_INLINE_ASM_GNU_STYLE
348 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
349# else
350 __asm
351 {
352 mov ax, gs
353 mov [SelGS], ax
354 }
355# endif
356 return SelGS;
357}
358#endif
359
360
361/**
362 * Get the SS register.
363 * @returns SS.
364 */
365#if RT_INLINE_ASM_EXTERNAL
366DECLASM(RTSEL) ASMGetSS(void);
367#else
368DECLINLINE(RTSEL) ASMGetSS(void)
369{
370 RTSEL SelSS;
371# if RT_INLINE_ASM_GNU_STYLE
372 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
373# else
374 __asm
375 {
376 mov ax, ss
377 mov [SelSS], ax
378 }
379# endif
380 return SelSS;
381}
382#endif
383
384
385/**
386 * Get the TR register.
387 * @returns TR.
388 */
389#if RT_INLINE_ASM_EXTERNAL
390DECLASM(RTSEL) ASMGetTR(void);
391#else
392DECLINLINE(RTSEL) ASMGetTR(void)
393{
394 RTSEL SelTR;
395# if RT_INLINE_ASM_GNU_STYLE
396 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
397# else
398 __asm
399 {
400 str ax
401 mov [SelTR], ax
402 }
403# endif
404 return SelTR;
405}
406#endif
407
408
409/**
410 * Get the [RE]FLAGS register.
411 * @returns [RE]FLAGS.
412 */
413#if RT_INLINE_ASM_EXTERNAL
414DECLASM(RTCCUINTREG) ASMGetFlags(void);
415#else
416DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
417{
418 RTCCUINTREG uFlags;
419# if RT_INLINE_ASM_GNU_STYLE
420# ifdef __AMD64__
421 __asm__ __volatile__("pushfq\n\t"
422 "popq %0\n\t"
423 : "=m" (uFlags));
424# else
425 __asm__ __volatile__("pushfl\n\t"
426 "popl %0\n\t"
427 : "=m" (uFlags));
428# endif
429# else
430 __asm
431 {
432# ifdef __AMD64__
433 pushfq
434 pop [uFlags]
435# else
436 pushfd
437 pop [uFlags]
438# endif
439 }
440# endif
441 return uFlags;
442}
443#endif
444
445
446/**
447 * Set the [RE]FLAGS register.
448 * @param uFlags The new [RE]FLAGS value.
449 */
450#if RT_INLINE_ASM_EXTERNAL
451DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
452#else
453DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
454{
455# if RT_INLINE_ASM_GNU_STYLE
456# ifdef __AMD64__
457 __asm__ __volatile__("pushq %0\n\t"
458 "popfq\n\t"
459 : : "m" (uFlags));
460# else
461 __asm__ __volatile__("pushl %0\n\t"
462 "popfl\n\t"
463 : : "m" (uFlags));
464# endif
465# else
466 __asm
467 {
468# ifdef __AMD64__
469 push [uFlags]
470 popfq
471# else
472 push [uFlags]
473 popfd
474# endif
475 }
476# endif
477}
478#endif
479
480
481/**
482 * Gets the content of the CPU timestamp counter register.
483 *
484 * @returns TSC.
485 */
486#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
487DECLASM(uint64_t) ASMReadTSC(void);
488#else
489DECLINLINE(uint64_t) ASMReadTSC(void)
490{
491 RTUINT64U u;
492# if RT_INLINE_ASM_GNU_STYLE
493 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
494# else
495# if RT_INLINE_ASM_USES_INTRIN
496 u.u = __rdtsc();
497# else
498 __asm
499 {
500 rdtsc
501 mov [u.s.Lo], eax
502 mov [u.s.Hi], edx
503 }
504# endif
505# endif
506 return u.u;
507}
508#endif
509
510
511/**
512 * Performs the cpuid instruction returning all registers.
513 *
514 * @param uOperator CPUID operation (eax).
515 * @param pvEAX Where to store eax.
516 * @param pvEBX Where to store ebx.
517 * @param pvECX Where to store ecx.
518 * @param pvEDX Where to store edx.
519 * @remark We're using void pointers to ease the use of special bitfield structures and such.
520 */
521#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
522DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
523#else
524DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
525{
526# if RT_INLINE_ASM_GNU_STYLE
527# ifdef __AMD64__
528 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
529 __asm__ ("cpuid\n\t"
530 : "=a" (uRAX),
531 "=b" (uRBX),
532 "=c" (uRCX),
533 "=d" (uRDX)
534 : "0" (uOperator));
535 *(uint32_t *)pvEAX = (uint32_t)uRAX;
536 *(uint32_t *)pvEBX = (uint32_t)uRBX;
537 *(uint32_t *)pvECX = (uint32_t)uRCX;
538 *(uint32_t *)pvEDX = (uint32_t)uRDX;
539# else
540 __asm__ ("xchgl %%ebx, %1\n\t"
541 "cpuid\n\t"
542 "xchgl %%ebx, %1\n\t"
543 : "=a" (*(uint32_t *)pvEAX),
544 "=r" (*(uint32_t *)pvEBX),
545 "=c" (*(uint32_t *)pvECX),
546 "=d" (*(uint32_t *)pvEDX)
547 : "0" (uOperator));
548# endif
549
550# elif RT_INLINE_ASM_USES_INTRIN
551 int aInfo[4];
552 __cpuid(aInfo, uOperator);
553 *(uint32_t *)pvEAX = aInfo[0];
554 *(uint32_t *)pvEBX = aInfo[1];
555 *(uint32_t *)pvECX = aInfo[2];
556 *(uint32_t *)pvEDX = aInfo[3];
557
558# else
559 uint32_t uEAX;
560 uint32_t uEBX;
561 uint32_t uECX;
562 uint32_t uEDX;
563 __asm
564 {
565 push ebx
566 mov eax, [uOperator]
567 cpuid
568 mov [uEAX], eax
569 mov [uEBX], ebx
570 mov [uECX], ecx
571 mov [uEDX], edx
572 pop ebx
573 }
574 *(uint32_t *)pvEAX = uEAX;
575 *(uint32_t *)pvEBX = uEBX;
576 *(uint32_t *)pvECX = uECX;
577 *(uint32_t *)pvEDX = uEDX;
578# endif
579}
580#endif
581
582
583/**
584 * Performs the cpuid instruction returning ecx and edx.
585 *
586 * @param uOperator CPUID operation (eax).
587 * @param pvECX Where to store ecx.
588 * @param pvEDX Where to store edx.
589 * @remark We're using void pointers to ease the use of special bitfield structures and such.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
593#else
594DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
595{
596 uint32_t uEBX;
597 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
598}
599#endif
600
601
602/**
603 * Performs the cpuid instruction returning edx.
604 *
605 * @param uOperator CPUID operation (eax).
606 * @returns EDX after cpuid operation.
607 */
608#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
609DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
610#else
611DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
612{
613 RTCCUINTREG xDX;
614# if RT_INLINE_ASM_GNU_STYLE
615# ifdef __AMD64__
616 RTCCUINTREG uSpill;
617 __asm__ ("cpuid"
618 : "=a" (uSpill),
619 "=d" (xDX)
620 : "0" (uOperator)
621 : "rbx", "rcx");
622# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: PIC by default. */
623 __asm__ ("push %%ebx\n\t"
624 "cpuid\n\t"
625 "pop %%ebx\n\t"
626 : "=a" (uOperator),
627 "=d" (xDX)
628 : "0" (uOperator)
629 : "ecx");
630# else
631 __asm__ ("cpuid"
632 : "=a" (uOperator),
633 "=d" (xDX)
634 : "0" (uOperator)
635 : "ebx", "ecx");
636# endif
637
638# elif RT_INLINE_ASM_USES_INTRIN
639 int aInfo[4];
640 __cpuid(aInfo, uOperator);
641 xDX = aInfo[3];
642
643# else
644 __asm
645 {
646 push ebx
647 mov eax, [uOperator]
648 cpuid
649 mov [xDX], edx
650 pop ebx
651 }
652# endif
653 return (uint32_t)xDX;
654}
655#endif
656
657
658/**
659 * Performs the cpuid instruction returning ecx.
660 *
661 * @param uOperator CPUID operation (eax).
662 * @returns ECX after cpuid operation.
663 */
664#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
665DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
666#else
667DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
668{
669 RTCCUINTREG xCX;
670# if RT_INLINE_ASM_GNU_STYLE
671# ifdef __AMD64__
672 RTCCUINTREG uSpill;
673 __asm__ ("cpuid"
674 : "=a" (uSpill),
675 "=c" (xCX)
676 : "0" (uOperator)
677 : "rbx", "rdx");
678# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
679 __asm__ ("push %%ebx\n\t"
680 "cpuid\n\t"
681 "pop %%ebx\n\t"
682 : "=a" (uOperator),
683 "=c" (xCX)
684 : "0" (uOperator)
685 : "edx");
686# else
687 __asm__ ("cpuid"
688 : "=a" (uOperator),
689 "=c" (xCX)
690 : "0" (uOperator)
691 : "ebx", "edx");
692
693# endif
694
695# elif RT_INLINE_ASM_USES_INTRIN
696 int aInfo[4];
697 __cpuid(aInfo, uOperator);
698 xCX = aInfo[2];
699
700# else
701 __asm
702 {
703 push ebx
704 mov eax, [uOperator]
705 cpuid
706 mov [xCX], ecx
707 pop ebx
708 }
709# endif
710 return (uint32_t)xCX;
711}
712#endif
713
714
715/**
716 * Checks if the current CPU supports CPUID.
717 *
718 * @returns true if CPUID is supported.
719 */
720DECLINLINE(bool) ASMHasCpuId(void)
721{
722#ifdef __AMD64__
723 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
724#else /* !__AMD64__ */
725 bool fRet = false;
726# if RT_INLINE_ASM_GNU_STYLE
727 uint32_t u1;
728 uint32_t u2;
729 __asm__ ("pushf\n\t"
730 "pop %1\n\t"
731 "mov %1, %2\n\t"
732 "xorl $0x200000, %1\n\t"
733 "push %1\n\t"
734 "popf\n\t"
735 "pushf\n\t"
736 "pop %1\n\t"
737 "cmpl %1, %2\n\t"
738 "setne %0\n\t"
739 "push %2\n\t"
740 "popf\n\t"
741 : "=m" (fRet), "=r" (u1), "=r" (u2));
742# else
743 __asm
744 {
745 pushfd
746 pop eax
747 mov ebx, eax
748 xor eax, 0200000h
749 push eax
750 popfd
751 pushfd
752 pop eax
753 cmp eax, ebx
754 setne fRet
755 push ebx
756 popfd
757 }
758# endif
759 return fRet;
760#endif /* !__AMD64__ */
761}
762
763
764/**
765 * Gets the APIC ID of the current CPU.
766 *
767 * @returns the APIC ID.
768 */
769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
770DECLASM(uint8_t) ASMGetApicId(void);
771#else
772DECLINLINE(uint8_t) ASMGetApicId(void)
773{
774 RTCCUINTREG xBX;
775# if RT_INLINE_ASM_GNU_STYLE
776# ifdef __AMD64__
777 RTCCUINTREG uSpill;
778 __asm__ ("cpuid"
779 : "=a" (uSpill),
780 "=b" (xBX)
781 : "0" (1)
782 : "rcx", "rdx");
783# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__)
784 RTCCUINTREG uSpill;
785 __asm__ ("mov %%ebx,%1\n\t"
786 "cpuid\n\t"
787 "xchgl %%ebx,%1\n\t"
788 : "=a" (uSpill),
789 "=r" (xBX)
790 : "0" (1)
791 : "ecx", "edx");
792# else
793 RTCCUINTREG uSpill;
794 __asm__ ("cpuid"
795 : "=a" (uSpill),
796 "=b" (xBX)
797 : "0" (1)
798 : "ecx", "edx");
799# endif
800
801# elif RT_INLINE_ASM_USES_INTRIN
802 int aInfo[4];
803 __cpuid(aInfo, 1);
804 xBX = aInfo[1];
805
806# else
807 __asm
808 {
809 push ebx
810 mov eax, 1
811 cpuid
812 mov [xBX], ebx
813 pop ebx
814 }
815# endif
816 return (uint8_t)(xBX >> 24);
817}
818#endif
819
820/**
821 * Get cr0.
822 * @returns cr0.
823 */
824#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
825DECLASM(RTCCUINTREG) ASMGetCR0(void);
826#else
827DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
828{
829 RTCCUINTREG uCR0;
830# if RT_INLINE_ASM_USES_INTRIN
831 uCR0 = __readcr0();
832
833# elif RT_INLINE_ASM_GNU_STYLE
834# ifdef __AMD64__
835 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
836# else
837 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
838# endif
839# else
840 __asm
841 {
842# ifdef __AMD64__
843 mov rax, cr0
844 mov [uCR0], rax
845# else
846 mov eax, cr0
847 mov [uCR0], eax
848# endif
849 }
850# endif
851 return uCR0;
852}
853#endif
854
855
856/**
857 * Sets the CR0 register.
858 * @param uCR0 The new CR0 value.
859 */
860#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
861DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
862#else
863DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
864{
865# if RT_INLINE_ASM_USES_INTRIN
866 __writecr0(uCR0);
867
868# elif RT_INLINE_ASM_GNU_STYLE
869# ifdef __AMD64__
870 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
871# else
872 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
873# endif
874# else
875 __asm
876 {
877# ifdef __AMD64__
878 mov rax, [uCR0]
879 mov cr0, rax
880# else
881 mov eax, [uCR0]
882 mov cr0, eax
883# endif
884 }
885# endif
886}
887#endif
888
889
890/**
891 * Get cr2.
892 * @returns cr2.
893 */
894#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
895DECLASM(RTCCUINTREG) ASMGetCR2(void);
896#else
897DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
898{
899 RTCCUINTREG uCR2;
900# if RT_INLINE_ASM_USES_INTRIN
901 uCR2 = __readcr2();
902
903# elif RT_INLINE_ASM_GNU_STYLE
904# ifdef __AMD64__
905 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
906# else
907 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
908# endif
909# else
910 __asm
911 {
912# ifdef __AMD64__
913 mov rax, cr2
914 mov [uCR2], rax
915# else
916 mov eax, cr2
917 mov [uCR2], eax
918# endif
919 }
920# endif
921 return uCR2;
922}
923#endif
924
925
926/**
927 * Sets the CR2 register.
928 * @param uCR2 The new CR0 value.
929 */
930#if RT_INLINE_ASM_EXTERNAL
931DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
932#else
933DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
934{
935# if RT_INLINE_ASM_GNU_STYLE
936# ifdef __AMD64__
937 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
938# else
939 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
940# endif
941# else
942 __asm
943 {
944# ifdef __AMD64__
945 mov rax, [uCR2]
946 mov cr2, rax
947# else
948 mov eax, [uCR2]
949 mov cr2, eax
950# endif
951 }
952# endif
953}
954#endif
955
956
957/**
958 * Get cr3.
959 * @returns cr3.
960 */
961#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
962DECLASM(RTCCUINTREG) ASMGetCR3(void);
963#else
964DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
965{
966 RTCCUINTREG uCR3;
967# if RT_INLINE_ASM_USES_INTRIN
968 uCR3 = __readcr3();
969
970# elif RT_INLINE_ASM_GNU_STYLE
971# ifdef __AMD64__
972 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
973# else
974 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
975# endif
976# else
977 __asm
978 {
979# ifdef __AMD64__
980 mov rax, cr3
981 mov [uCR3], rax
982# else
983 mov eax, cr3
984 mov [uCR3], eax
985# endif
986 }
987# endif
988 return uCR3;
989}
990#endif
991
992
993/**
994 * Sets the CR3 register.
995 *
996 * @param uCR3 New CR3 value.
997 */
998#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
999DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1000#else
1001DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1002{
1003# if RT_INLINE_ASM_USES_INTRIN
1004 __writecr3(uCR3);
1005
1006# elif RT_INLINE_ASM_GNU_STYLE
1007# ifdef __AMD64__
1008 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1009# else
1010 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1011# endif
1012# else
1013 __asm
1014 {
1015# ifdef __AMD64__
1016 mov rax, [uCR3]
1017 mov cr3, rax
1018# else
1019 mov eax, [uCR3]
1020 mov cr3, eax
1021# endif
1022 }
1023# endif
1024}
1025#endif
1026
1027
1028/**
1029 * Reloads the CR3 register.
1030 */
1031#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1032DECLASM(void) ASMReloadCR3(void);
1033#else
1034DECLINLINE(void) ASMReloadCR3(void)
1035{
1036# if RT_INLINE_ASM_USES_INTRIN
1037 __writecr3(__readcr3());
1038
1039# elif RT_INLINE_ASM_GNU_STYLE
1040 RTCCUINTREG u;
1041# ifdef __AMD64__
1042 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1043 "movq %0, %%cr3\n\t"
1044 : "=r" (u));
1045# else
1046 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1047 "movl %0, %%cr3\n\t"
1048 : "=r" (u));
1049# endif
1050# else
1051 __asm
1052 {
1053# ifdef __AMD64__
1054 mov rax, cr3
1055 mov cr3, rax
1056# else
1057 mov eax, cr3
1058 mov cr3, eax
1059# endif
1060 }
1061# endif
1062}
1063#endif
1064
1065
1066/**
1067 * Get cr4.
1068 * @returns cr4.
1069 */
1070#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1071DECLASM(RTCCUINTREG) ASMGetCR4(void);
1072#else
1073DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1074{
1075 RTCCUINTREG uCR4;
1076# if RT_INLINE_ASM_USES_INTRIN
1077 uCR4 = __readcr4();
1078
1079# elif RT_INLINE_ASM_GNU_STYLE
1080# ifdef __AMD64__
1081 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1082# else
1083 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1084# endif
1085# else
1086 __asm
1087 {
1088# ifdef __AMD64__
1089 mov rax, cr4
1090 mov [uCR4], rax
1091# else
1092 push eax /* just in case */
1093 /*mov eax, cr4*/
1094 _emit 0x0f
1095 _emit 0x20
1096 _emit 0xe0
1097 mov [uCR4], eax
1098 pop eax
1099# endif
1100 }
1101# endif
1102 return uCR4;
1103}
1104#endif
1105
1106
1107/**
1108 * Sets the CR4 register.
1109 *
1110 * @param uCR4 New CR4 value.
1111 */
1112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1113DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1114#else
1115DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1116{
1117# if RT_INLINE_ASM_USES_INTRIN
1118 __writecr4(uCR4);
1119
1120# elif RT_INLINE_ASM_GNU_STYLE
1121# ifdef __AMD64__
1122 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1123# else
1124 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1125# endif
1126# else
1127 __asm
1128 {
1129# ifdef __AMD64__
1130 mov rax, [uCR4]
1131 mov cr4, rax
1132# else
1133 mov eax, [uCR4]
1134 _emit 0x0F
1135 _emit 0x22
1136 _emit 0xE0 /* mov cr4, eax */
1137# endif
1138 }
1139# endif
1140}
1141#endif
1142
1143
1144/**
1145 * Get cr8.
1146 * @returns cr8.
1147 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1148 */
1149#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1150DECLASM(RTCCUINTREG) ASMGetCR8(void);
1151#else
1152DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1153{
1154# ifdef __AMD64__
1155 RTCCUINTREG uCR8;
1156# if RT_INLINE_ASM_USES_INTRIN
1157 uCR8 = __readcr8();
1158
1159# elif RT_INLINE_ASM_GNU_STYLE
1160 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1161# else
1162 __asm
1163 {
1164 mov rax, cr8
1165 mov [uCR8], rax
1166 }
1167# endif
1168 return uCR8;
1169# else /* !__AMD64__ */
1170 return 0;
1171# endif /* !__AMD64__ */
1172}
1173#endif
1174
1175
1176/**
1177 * Enables interrupts (EFLAGS.IF).
1178 */
1179#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1180DECLASM(void) ASMIntEnable(void);
1181#else
1182DECLINLINE(void) ASMIntEnable(void)
1183{
1184# if RT_INLINE_ASM_GNU_STYLE
1185 __asm("sti\n");
1186# elif RT_INLINE_ASM_USES_INTRIN
1187 _enable();
1188# else
1189 __asm sti
1190# endif
1191}
1192#endif
1193
1194
1195/**
1196 * Disables interrupts (!EFLAGS.IF).
1197 */
1198#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1199DECLASM(void) ASMIntDisable(void);
1200#else
1201DECLINLINE(void) ASMIntDisable(void)
1202{
1203# if RT_INLINE_ASM_GNU_STYLE
1204 __asm("cli\n");
1205# elif RT_INLINE_ASM_USES_INTRIN
1206 _disable();
1207# else
1208 __asm cli
1209# endif
1210}
1211#endif
1212
1213
1214/**
1215 * Disables interrupts and returns previous xFLAGS.
1216 */
1217#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1218DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1219#else
1220DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1221{
1222 RTCCUINTREG xFlags;
1223# if RT_INLINE_ASM_GNU_STYLE
1224# ifdef __AMD64__
1225 __asm__ __volatile__("pushfq\n\t"
1226 "cli\n\t"
1227 "popq %0\n\t"
1228 : "=m" (xFlags));
1229# else
1230 __asm__ __volatile__("pushfl\n\t"
1231 "cli\n\t"
1232 "popl %0\n\t"
1233 : "=m" (xFlags));
1234# endif
1235# elif RT_INLINE_ASM_USES_INTRIN && !defined(__X86__)
1236 xFlags = ASMGetFlags();
1237 _disable();
1238# else
1239 __asm {
1240 pushfd
1241 cli
1242 pop [xFlags]
1243 }
1244# endif
1245 return xFlags;
1246}
1247#endif
1248
1249
1250/**
1251 * Reads a machine specific register.
1252 *
1253 * @returns Register content.
1254 * @param uRegister Register to read.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1258#else
1259DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1260{
1261 RTUINT64U u;
1262# if RT_INLINE_ASM_GNU_STYLE
1263 __asm__ ("rdmsr\n\t"
1264 : "=a" (u.s.Lo),
1265 "=d" (u.s.Hi)
1266 : "c" (uRegister));
1267
1268# elif RT_INLINE_ASM_USES_INTRIN
1269 u.u = __readmsr(uRegister);
1270
1271# else
1272 __asm
1273 {
1274 mov ecx, [uRegister]
1275 rdmsr
1276 mov [u.s.Lo], eax
1277 mov [u.s.Hi], edx
1278 }
1279# endif
1280
1281 return u.u;
1282}
1283#endif
1284
1285
1286/**
1287 * Writes a machine specific register.
1288 *
1289 * @returns Register content.
1290 * @param uRegister Register to write to.
1291 * @param u64Val Value to write.
1292 */
1293#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1294DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1295#else
1296DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1297{
1298 RTUINT64U u;
1299
1300 u.u = u64Val;
1301# if RT_INLINE_ASM_GNU_STYLE
1302 __asm__ __volatile__("wrmsr\n\t"
1303 ::"a" (u.s.Lo),
1304 "d" (u.s.Hi),
1305 "c" (uRegister));
1306
1307# elif RT_INLINE_ASM_USES_INTRIN
1308 __writemsr(uRegister, u.u);
1309
1310# else
1311 __asm
1312 {
1313 mov ecx, [uRegister]
1314 mov edx, [u.s.Hi]
1315 mov eax, [u.s.Lo]
1316 wrmsr
1317 }
1318# endif
1319}
1320#endif
1321
1322
1323/**
1324 * Reads low part of a machine specific register.
1325 *
1326 * @returns Register content.
1327 * @param uRegister Register to read.
1328 */
1329#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1330DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1331#else
1332DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1333{
1334 uint32_t u32;
1335# if RT_INLINE_ASM_GNU_STYLE
1336 __asm__ ("rdmsr\n\t"
1337 : "=a" (u32)
1338 : "c" (uRegister)
1339 : "edx");
1340
1341# elif RT_INLINE_ASM_USES_INTRIN
1342 u32 = (uint32_t)__readmsr(uRegister);
1343
1344#else
1345 __asm
1346 {
1347 mov ecx, [uRegister]
1348 rdmsr
1349 mov [u32], eax
1350 }
1351# endif
1352
1353 return u32;
1354}
1355#endif
1356
1357
1358/**
1359 * Reads high part of a machine specific register.
1360 *
1361 * @returns Register content.
1362 * @param uRegister Register to read.
1363 */
1364#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1365DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1366#else
1367DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1368{
1369 uint32_t u32;
1370# if RT_INLINE_ASM_GNU_STYLE
1371 __asm__ ("rdmsr\n\t"
1372 : "=d" (u32)
1373 : "c" (uRegister)
1374 : "eax");
1375
1376# elif RT_INLINE_ASM_USES_INTRIN
1377 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1378
1379# else
1380 __asm
1381 {
1382 mov ecx, [uRegister]
1383 rdmsr
1384 mov [u32], edx
1385 }
1386# endif
1387
1388 return u32;
1389}
1390#endif
1391
1392
1393/**
1394 * Gets dr7.
1395 *
1396 * @returns dr7.
1397 */
1398#if RT_INLINE_ASM_EXTERNAL
1399DECLASM(RTCCUINTREG) ASMGetDR7(void);
1400#else
1401DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1402{
1403 RTCCUINTREG uDR7;
1404# if RT_INLINE_ASM_GNU_STYLE
1405# ifdef __AMD64__
1406 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1407# else
1408 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1409# endif
1410# else
1411 __asm
1412 {
1413# ifdef __AMD64__
1414 mov rax, dr7
1415 mov [uDR7], rax
1416# else
1417 mov eax, dr7
1418 mov [uDR7], eax
1419# endif
1420 }
1421# endif
1422 return uDR7;
1423}
1424#endif
1425
1426
1427/**
1428 * Gets dr6.
1429 *
1430 * @returns dr6.
1431 */
1432#if RT_INLINE_ASM_EXTERNAL
1433DECLASM(RTCCUINTREG) ASMGetDR6(void);
1434#else
1435DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1436{
1437 RTCCUINTREG uDR6;
1438# if RT_INLINE_ASM_GNU_STYLE
1439# ifdef __AMD64__
1440 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1441# else
1442 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1443# endif
1444# else
1445 __asm
1446 {
1447# ifdef __AMD64__
1448 mov rax, dr6
1449 mov [uDR6], rax
1450# else
1451 mov eax, dr6
1452 mov [uDR6], eax
1453# endif
1454 }
1455# endif
1456 return uDR6;
1457}
1458#endif
1459
1460
1461/**
1462 * Reads and clears DR6.
1463 *
1464 * @returns DR6.
1465 */
1466#if RT_INLINE_ASM_EXTERNAL
1467DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1468#else
1469DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1470{
1471 RTCCUINTREG uDR6;
1472# if RT_INLINE_ASM_GNU_STYLE
1473 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1474# ifdef __AMD64__
1475 __asm__ ("movq %%dr6, %0\n\t"
1476 "movq %1, %%dr6\n\t"
1477 : "=r" (uDR6)
1478 : "r" (uNewValue));
1479# else
1480 __asm__ ("movl %%dr6, %0\n\t"
1481 "movl %1, %%dr6\n\t"
1482 : "=r" (uDR6)
1483 : "r" (uNewValue));
1484# endif
1485# else
1486 __asm
1487 {
1488# ifdef __AMD64__
1489 mov rax, dr6
1490 mov [uDR6], rax
1491 mov rcx, rax
1492 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1493 mov dr6, rcx
1494# else
1495 mov eax, dr6
1496 mov [uDR6], eax
1497 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1498 mov dr6, ecx
1499# endif
1500 }
1501# endif
1502 return uDR6;
1503}
1504#endif
1505
1506
1507/** @deprecated */
1508#define ASMOutB(p, b) ASMOutU8(p,b)
1509/** @deprecated */
1510#define ASMInB(p) ASMInU8(p)
1511
1512/**
1513 * Writes a 8-bit unsigned integer to an I/O port.
1514 *
1515 * @param Port I/O port to read from.
1516 * @param u8 8-bit integer to write.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1519DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1520#else
1521DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1522{
1523# if RT_INLINE_ASM_GNU_STYLE
1524 __asm__ __volatile__("outb %b1, %w0\n\t"
1525 :: "Nd" (Port),
1526 "a" (u8));
1527
1528# elif RT_INLINE_ASM_USES_INTRIN
1529 __outbyte(Port, u8);
1530
1531# else
1532 __asm
1533 {
1534 mov dx, [Port]
1535 mov al, [u8]
1536 out dx, al
1537 }
1538# endif
1539}
1540#endif
1541
1542
1543/**
1544 * Gets a 8-bit unsigned integer from an I/O port.
1545 *
1546 * @returns 8-bit integer.
1547 * @param Port I/O port to read from.
1548 */
1549#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1550DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1551#else
1552DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1553{
1554 uint8_t u8;
1555# if RT_INLINE_ASM_GNU_STYLE
1556 __asm__ __volatile__("inb %w1, %b0\n\t"
1557 : "=a" (u8)
1558 : "Nd" (Port));
1559
1560# elif RT_INLINE_ASM_USES_INTRIN
1561 u8 = __inbyte(Port);
1562
1563# else
1564 __asm
1565 {
1566 mov dx, [Port]
1567 in al, dx
1568 mov [u8], al
1569 }
1570# endif
1571 return u8;
1572}
1573#endif
1574
1575
1576/**
1577 * Writes a 16-bit unsigned integer to an I/O port.
1578 *
1579 * @param Port I/O port to read from.
1580 * @param u16 16-bit integer to write.
1581 */
1582#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1583DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1584#else
1585DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1586{
1587# if RT_INLINE_ASM_GNU_STYLE
1588 __asm__ __volatile__("outw %w1, %w0\n\t"
1589 :: "Nd" (Port),
1590 "a" (u16));
1591
1592# elif RT_INLINE_ASM_USES_INTRIN
1593 __outword(Port, u16);
1594
1595# else
1596 __asm
1597 {
1598 mov dx, [Port]
1599 mov ax, [u16]
1600 out dx, ax
1601 }
1602# endif
1603}
1604#endif
1605
1606
1607/**
1608 * Gets a 16-bit unsigned integer from an I/O port.
1609 *
1610 * @returns 16-bit integer.
1611 * @param Port I/O port to read from.
1612 */
1613#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1614DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1615#else
1616DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1617{
1618 uint16_t u16;
1619# if RT_INLINE_ASM_GNU_STYLE
1620 __asm__ __volatile__("inw %w1, %w0\n\t"
1621 : "=a" (u16)
1622 : "Nd" (Port));
1623
1624# elif RT_INLINE_ASM_USES_INTRIN
1625 u16 = __inword(Port);
1626
1627# else
1628 __asm
1629 {
1630 mov dx, [Port]
1631 in ax, dx
1632 mov [u16], ax
1633 }
1634# endif
1635 return u16;
1636}
1637#endif
1638
1639
1640/**
1641 * Writes a 32-bit unsigned integer to an I/O port.
1642 *
1643 * @param Port I/O port to read from.
1644 * @param u32 32-bit integer to write.
1645 */
1646#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1647DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1648#else
1649DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1650{
1651# if RT_INLINE_ASM_GNU_STYLE
1652 __asm__ __volatile__("outl %1, %w0\n\t"
1653 :: "Nd" (Port),
1654 "a" (u32));
1655
1656# elif RT_INLINE_ASM_USES_INTRIN
1657 __outdword(Port, u32);
1658
1659# else
1660 __asm
1661 {
1662 mov dx, [Port]
1663 mov eax, [u32]
1664 out dx, eax
1665 }
1666# endif
1667}
1668#endif
1669
1670
1671/**
1672 * Gets a 32-bit unsigned integer from an I/O port.
1673 *
1674 * @returns 32-bit integer.
1675 * @param Port I/O port to read from.
1676 */
1677#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1678DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1679#else
1680DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1681{
1682 uint32_t u32;
1683# if RT_INLINE_ASM_GNU_STYLE
1684 __asm__ __volatile__("inl %w1, %0\n\t"
1685 : "=a" (u32)
1686 : "Nd" (Port));
1687
1688# elif RT_INLINE_ASM_USES_INTRIN
1689 u32 = __indword(Port);
1690
1691# else
1692 __asm
1693 {
1694 mov dx, [Port]
1695 in eax, dx
1696 mov [u32], eax
1697 }
1698# endif
1699 return u32;
1700}
1701#endif
1702
1703
1704/**
1705 * Atomically Exchange an unsigned 8-bit value.
1706 *
1707 * @returns Current *pu8 value
1708 * @param pu8 Pointer to the 8-bit variable to update.
1709 * @param u8 The 8-bit value to assign to *pu8.
1710 */
1711#if RT_INLINE_ASM_EXTERNAL
1712DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1713#else
1714DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1715{
1716# if RT_INLINE_ASM_GNU_STYLE
1717 __asm__ __volatile__("xchgb %0, %1\n\t"
1718 : "=m" (*pu8),
1719 "=r" (u8)
1720 : "1" (u8));
1721# else
1722 __asm
1723 {
1724# ifdef __AMD64__
1725 mov rdx, [pu8]
1726 mov al, [u8]
1727 xchg [rdx], al
1728 mov [u8], al
1729# else
1730 mov edx, [pu8]
1731 mov al, [u8]
1732 xchg [edx], al
1733 mov [u8], al
1734# endif
1735 }
1736# endif
1737 return u8;
1738}
1739#endif
1740
1741
1742/**
1743 * Atomically Exchange a signed 8-bit value.
1744 *
1745 * @returns Current *pu8 value
1746 * @param pi8 Pointer to the 8-bit variable to update.
1747 * @param i8 The 8-bit value to assign to *pi8.
1748 */
1749DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1750{
1751 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1752}
1753
1754
1755/**
1756 * Atomically Exchange an unsigned 16-bit value.
1757 *
1758 * @returns Current *pu16 value
1759 * @param pu16 Pointer to the 16-bit variable to update.
1760 * @param u16 The 16-bit value to assign to *pu16.
1761 */
1762#if RT_INLINE_ASM_EXTERNAL
1763DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1764#else
1765DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1766{
1767# if RT_INLINE_ASM_GNU_STYLE
1768 __asm__ __volatile__("xchgw %0, %1\n\t"
1769 : "=m" (*pu16),
1770 "=r" (u16)
1771 : "1" (u16));
1772# else
1773 __asm
1774 {
1775# ifdef __AMD64__
1776 mov rdx, [pu16]
1777 mov ax, [u16]
1778 xchg [rdx], ax
1779 mov [u16], ax
1780# else
1781 mov edx, [pu16]
1782 mov ax, [u16]
1783 xchg [edx], ax
1784 mov [u16], ax
1785# endif
1786 }
1787# endif
1788 return u16;
1789}
1790#endif
1791
1792
1793/**
1794 * Atomically Exchange a signed 16-bit value.
1795 *
1796 * @returns Current *pu16 value
1797 * @param pi16 Pointer to the 16-bit variable to update.
1798 * @param i16 The 16-bit value to assign to *pi16.
1799 */
1800DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1801{
1802 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1803}
1804
1805
1806/**
1807 * Atomically Exchange an unsigned 32-bit value.
1808 *
1809 * @returns Current *pu32 value
1810 * @param pu32 Pointer to the 32-bit variable to update.
1811 * @param u32 The 32-bit value to assign to *pu32.
1812 */
1813#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1814DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1815#else
1816DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1817{
1818# if RT_INLINE_ASM_GNU_STYLE
1819 __asm__ __volatile__("xchgl %0, %1\n\t"
1820 : "=m" (*pu32),
1821 "=r" (u32)
1822 : "1" (u32));
1823
1824# elif RT_INLINE_ASM_USES_INTRIN
1825 u32 = _InterlockedExchange((long *)pu32, u32);
1826
1827# else
1828 __asm
1829 {
1830# ifdef __AMD64__
1831 mov rdx, [pu32]
1832 mov eax, u32
1833 xchg [rdx], eax
1834 mov [u32], eax
1835# else
1836 mov edx, [pu32]
1837 mov eax, u32
1838 xchg [edx], eax
1839 mov [u32], eax
1840# endif
1841 }
1842# endif
1843 return u32;
1844}
1845#endif
1846
1847
1848/**
1849 * Atomically Exchange a signed 32-bit value.
1850 *
1851 * @returns Current *pu32 value
1852 * @param pi32 Pointer to the 32-bit variable to update.
1853 * @param i32 The 32-bit value to assign to *pi32.
1854 */
1855DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1856{
1857 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1858}
1859
1860
1861/**
1862 * Atomically Exchange an unsigned 64-bit value.
1863 *
1864 * @returns Current *pu64 value
1865 * @param pu64 Pointer to the 64-bit variable to update.
1866 * @param u64 The 64-bit value to assign to *pu64.
1867 */
1868#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1869DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1870#else
1871DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1872{
1873# if defined(__AMD64__)
1874# if RT_INLINE_ASM_USES_INTRIN
1875 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1876
1877# elif RT_INLINE_ASM_GNU_STYLE
1878 __asm__ __volatile__("xchgq %0, %1\n\t"
1879 : "=m" (*pu64),
1880 "=r" (u64)
1881 : "1" (u64));
1882# else
1883 __asm
1884 {
1885 mov rdx, [pu64]
1886 mov rax, [u64]
1887 xchg [rdx], rax
1888 mov [u64], rax
1889 }
1890# endif
1891# else /* !__AMD64__ */
1892# if RT_INLINE_ASM_GNU_STYLE
1893# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
1894 uint32_t u32 = (uint32_t)u64;
1895 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1896 "xchgl %%ebx, %3\n\t"
1897 "1:\n\t"
1898 "lock; cmpxchg8b (%5)\n\t"
1899 "jnz 1b\n\t"
1900 "xchgl %%ebx, %3\n\t"
1901 /*"xchgl %%esi, %5\n\t"*/
1902 : "=A" (u64),
1903 "=m" (*pu64)
1904 : "0" (*pu64),
1905 "m" ( u32 ),
1906 "c" ( (uint32_t)(u64 >> 32) ),
1907 "S" (pu64) );
1908# else /* !PIC */
1909 __asm__ __volatile__("1:\n\t"
1910 "lock; cmpxchg8b %1\n\t"
1911 "jnz 1b\n\t"
1912 : "=A" (u64),
1913 "=m" (*pu64)
1914 : "0" (*pu64),
1915 "b" ( (uint32_t)u64 ),
1916 "c" ( (uint32_t)(u64 >> 32) ));
1917# endif
1918# else
1919 __asm
1920 {
1921 mov ebx, dword ptr [u64]
1922 mov ecx, dword ptr [u64 + 4]
1923 mov edi, pu64
1924 mov eax, dword ptr [edi]
1925 mov edx, dword ptr [edi + 4]
1926 retry:
1927 lock cmpxchg8b [edi]
1928 jnz retry
1929 mov dword ptr [u64], eax
1930 mov dword ptr [u64 + 4], edx
1931 }
1932# endif
1933# endif /* !__AMD64__ */
1934 return u64;
1935}
1936#endif
1937
1938
1939/**
1940 * Atomically Exchange an signed 64-bit value.
1941 *
1942 * @returns Current *pi64 value
1943 * @param pi64 Pointer to the 64-bit variable to update.
1944 * @param i64 The 64-bit value to assign to *pi64.
1945 */
1946DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1947{
1948 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1949}
1950
1951
1952#ifdef __AMD64__
1953/**
1954 * Atomically Exchange an unsigned 128-bit value.
1955 *
1956 * @returns Current *pu128.
1957 * @param pu128 Pointer to the 128-bit variable to update.
1958 * @param u128 The 128-bit value to assign to *pu128.
1959 *
1960 * @remark We cannot really assume that any hardware supports this. Nor do I have
1961 * GAS support for it. So, for the time being we'll BREAK the atomic
1962 * bit of this function and use two 64-bit exchanges instead.
1963 */
1964# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
1965DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
1966# else
1967DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
1968{
1969 if (true)/*ASMCpuId_ECX(1) & BIT(13))*/
1970 {
1971 /** @todo this is clumsy code */
1972 RTUINT128U u128Ret;
1973 u128Ret.u = u128;
1974 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
1975 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
1976 return u128Ret.u;
1977 }
1978#if 0 /* later? */
1979 else
1980 {
1981# if RT_INLINE_ASM_GNU_STYLE
1982 __asm__ __volatile__("1:\n\t"
1983 "lock; cmpxchg8b %1\n\t"
1984 "jnz 1b\n\t"
1985 : "=A" (u128),
1986 "=m" (*pu128)
1987 : "0" (*pu128),
1988 "b" ( (uint64_t)u128 ),
1989 "c" ( (uint64_t)(u128 >> 64) ));
1990# else
1991 __asm
1992 {
1993 mov rbx, dword ptr [u128]
1994 mov rcx, dword ptr [u128 + 4]
1995 mov rdi, pu128
1996 mov rax, dword ptr [rdi]
1997 mov rdx, dword ptr [rdi + 4]
1998 retry:
1999 lock cmpxchg16b [rdi]
2000 jnz retry
2001 mov dword ptr [u128], rax
2002 mov dword ptr [u128 + 4], rdx
2003 }
2004# endif
2005 }
2006 return u128;
2007#endif
2008}
2009# endif
2010#endif /* __AMD64__ */
2011
2012
2013/**
2014 * Atomically Reads a unsigned 64-bit value.
2015 *
2016 * @returns Current *pu64 value
2017 * @param pu64 Pointer to the 64-bit variable to read.
2018 * The memory pointed to must be writable.
2019 * @remark This will fault if the memory is read-only!
2020 */
2021#if RT_INLINE_ASM_EXTERNAL
2022DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2023#else
2024DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2025{
2026 uint64_t u64;
2027# ifdef __AMD64__
2028# if RT_INLINE_ASM_GNU_STYLE
2029 __asm__ __volatile__("movq %1, %0\n\t"
2030 : "=r" (u64)
2031 : "m" (*pu64));
2032# else
2033 __asm
2034 {
2035 mov rdx, [pu64]
2036 mov rax, [rdx]
2037 mov [u64], rax
2038 }
2039# endif
2040# else /* !__AMD64__ */
2041# if RT_INLINE_ASM_GNU_STYLE
2042# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2043 uint32_t u32EBX = 0;
2044 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2045 "lock; cmpxchg8b (%5)\n\t"
2046 "xchgl %%ebx, %3\n\t"
2047 : "=A" (u64),
2048 "=m" (*pu64)
2049 : "0" (0),
2050 "m" (u32EBX),
2051 "c" (0),
2052 "S" (pu64));
2053# else /* !PIC */
2054 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2055 : "=A" (u64),
2056 "=m" (*pu64)
2057 : "0" (0),
2058 "b" (0),
2059 "c" (0));
2060# endif
2061# else
2062 __asm
2063 {
2064 xor eax, eax
2065 xor edx, edx
2066 mov edi, pu64
2067 xor ecx, ecx
2068 xor ebx, ebx
2069 lock cmpxchg8b [edi]
2070 mov dword ptr [u64], eax
2071 mov dword ptr [u64 + 4], edx
2072 }
2073# endif
2074# endif /* !__AMD64__ */
2075 return u64;
2076}
2077#endif
2078
2079
2080/**
2081 * Atomically Reads a signed 64-bit value.
2082 *
2083 * @returns Current *pi64 value
2084 * @param pi64 Pointer to the 64-bit variable to read.
2085 * The memory pointed to must be writable.
2086 * @remark This will fault if the memory is read-only!
2087 */
2088DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2089{
2090 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2091}
2092
2093
2094/**
2095 * Atomically Exchange a value which size might differ
2096 * between platforms or compilers.
2097 *
2098 * @param pu Pointer to the variable to update.
2099 * @param uNew The value to assign to *pu.
2100 */
2101#define ASMAtomicXchgSize(pu, uNew) \
2102 do { \
2103 switch (sizeof(*(pu))) { \
2104 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2105 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2106 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2107 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2108 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2109 } \
2110 } while (0)
2111
2112
2113/**
2114 * Atomically Exchange a pointer value.
2115 *
2116 * @returns Current *ppv value
2117 * @param ppv Pointer to the pointer variable to update.
2118 * @param pv The pointer value to assign to *ppv.
2119 */
2120DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2121{
2122#if ARCH_BITS == 32
2123 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2124#elif ARCH_BITS == 64
2125 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2126#else
2127# error "ARCH_BITS is bogus"
2128#endif
2129}
2130
2131
2132/**
2133 * Atomically Compare and Exchange an unsigned 32-bit value.
2134 *
2135 * @returns true if xchg was done.
2136 * @returns false if xchg wasn't done.
2137 *
2138 * @param pu32 Pointer to the value to update.
2139 * @param u32New The new value to assigned to *pu32.
2140 * @param u32Old The old value to *pu32 compare with.
2141 */
2142#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2143DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2144#else
2145DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2146{
2147# if RT_INLINE_ASM_GNU_STYLE
2148 uint32_t u32Ret;
2149 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2150 "setz %%al\n\t"
2151 "movzx %%al, %%eax\n\t"
2152 : "=m" (*pu32),
2153 "=a" (u32Ret)
2154 : "r" (u32New),
2155 "1" (u32Old));
2156 return (bool)u32Ret;
2157
2158# elif RT_INLINE_ASM_USES_INTRIN
2159 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2160
2161# else
2162 uint32_t u32Ret;
2163 __asm
2164 {
2165# ifdef __AMD64__
2166 mov rdx, [pu32]
2167# else
2168 mov edx, [pu32]
2169# endif
2170 mov eax, [u32Old]
2171 mov ecx, [u32New]
2172# ifdef __AMD64__
2173 lock cmpxchg [rdx], ecx
2174# else
2175 lock cmpxchg [edx], ecx
2176# endif
2177 setz al
2178 movzx eax, al
2179 mov [u32Ret], eax
2180 }
2181 return !!u32Ret;
2182# endif
2183}
2184#endif
2185
2186
2187/**
2188 * Atomically Compare and Exchange a signed 32-bit value.
2189 *
2190 * @returns true if xchg was done.
2191 * @returns false if xchg wasn't done.
2192 *
2193 * @param pi32 Pointer to the value to update.
2194 * @param i32New The new value to assigned to *pi32.
2195 * @param i32Old The old value to *pi32 compare with.
2196 */
2197DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2198{
2199 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2200}
2201
2202
2203/**
2204 * Atomically Compare and exchange an unsigned 64-bit value.
2205 *
2206 * @returns true if xchg was done.
2207 * @returns false if xchg wasn't done.
2208 *
2209 * @param pu64 Pointer to the 64-bit variable to update.
2210 * @param u64New The 64-bit value to assign to *pu64.
2211 * @param u64Old The value to compare with.
2212 */
2213#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2214DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2215#else
2216DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2217{
2218# if RT_INLINE_ASM_USES_INTRIN
2219 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2220
2221# elif defined(__AMD64__)
2222# if RT_INLINE_ASM_GNU_STYLE
2223 uint64_t u64Ret;
2224 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2225 "setz %%al\n\t"
2226 "movzx %%al, %%eax\n\t"
2227 : "=m" (*pu64),
2228 "=a" (u64Ret)
2229 : "r" (u64New),
2230 "1" (u64Old));
2231 return (bool)u64Ret;
2232# else
2233 bool fRet;
2234 __asm
2235 {
2236 mov rdx, [pu32]
2237 mov rax, [u64Old]
2238 mov rcx, [u64New]
2239 lock cmpxchg [rdx], rcx
2240 setz al
2241 mov [fRet], al
2242 }
2243 return fRet;
2244# endif
2245# else /* !__AMD64__ */
2246 uint32_t u32Ret;
2247# if RT_INLINE_ASM_GNU_STYLE
2248# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2249 uint32_t u32 = (uint32_t)u64New;
2250 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2251 "lock; cmpxchg8b (%5)\n\t"
2252 "setz %%al\n\t"
2253 "xchgl %%ebx, %3\n\t"
2254 "movzx %%al, %%eax\n\t"
2255 : "=a" (u32Ret),
2256 "=m" (*pu64)
2257 : "A" (u64Old),
2258 "m" ( u32 ),
2259 "c" ( (uint32_t)(u64New >> 32) ),
2260 "S" (pu64) );
2261# else /* !PIC */
2262 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2263 "setz %%al\n\t"
2264 "movzx %%al, %%eax\n\t"
2265 : "=a" (u32Ret),
2266 "=m" (*pu64)
2267 : "A" (u64Old),
2268 "b" ( (uint32_t)u64New ),
2269 "c" ( (uint32_t)(u64New >> 32) ));
2270# endif
2271 return (bool)u32Ret;
2272# else
2273 __asm
2274 {
2275 mov ebx, dword ptr [u64New]
2276 mov ecx, dword ptr [u64New + 4]
2277 mov edi, [pu64]
2278 mov eax, dword ptr [u64Old]
2279 mov edx, dword ptr [u64Old + 4]
2280 lock cmpxchg8b [edi]
2281 setz al
2282 movzx eax, al
2283 mov dword ptr [u32Ret], eax
2284 }
2285 return !!u32Ret;
2286# endif
2287# endif /* !__AMD64__ */
2288}
2289#endif
2290
2291
2292/**
2293 * Atomically Compare and exchange a signed 64-bit value.
2294 *
2295 * @returns true if xchg was done.
2296 * @returns false if xchg wasn't done.
2297 *
2298 * @param pi64 Pointer to the 64-bit variable to update.
2299 * @param i64 The 64-bit value to assign to *pu64.
2300 * @param i64Old The value to compare with.
2301 */
2302DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2303{
2304 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2305}
2306
2307
2308
2309/** @def ASMAtomicCmpXchgSize
2310 * Atomically Compare and Exchange a value which size might differ
2311 * between platforms or compilers.
2312 *
2313 * @param pu Pointer to the value to update.
2314 * @param uNew The new value to assigned to *pu.
2315 * @param uOld The old value to *pu compare with.
2316 * @param fRc Where to store the result.
2317 */
2318#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2319 do { \
2320 switch (sizeof(*(pu))) { \
2321 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2322 break; \
2323 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2324 break; \
2325 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2326 (fRc) = false; \
2327 break; \
2328 } \
2329 } while (0)
2330
2331
2332/**
2333 * Atomically Compare and Exchange a pointer value.
2334 *
2335 * @returns true if xchg was done.
2336 * @returns false if xchg wasn't done.
2337 *
2338 * @param ppv Pointer to the value to update.
2339 * @param pvNew The new value to assigned to *ppv.
2340 * @param pvOld The old value to *ppv compare with.
2341 */
2342DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2343{
2344#if ARCH_BITS == 32
2345 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2346#elif ARCH_BITS == 64
2347 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2348#else
2349# error "ARCH_BITS is bogus"
2350#endif
2351}
2352
2353
2354/**
2355 * Atomically increment a 32-bit value.
2356 *
2357 * @returns The new value.
2358 * @param pu32 Pointer to the value to increment.
2359 */
2360#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2361DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2362#else
2363DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2364{
2365 uint32_t u32;
2366# if RT_INLINE_ASM_USES_INTRIN
2367 u32 = _InterlockedIncrement((long *)pu32);
2368
2369# elif RT_INLINE_ASM_GNU_STYLE
2370 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2371 "incl %0\n\t"
2372 : "=r" (u32),
2373 "=m" (*pu32)
2374 : "0" (1)
2375 : "memory");
2376# else
2377 __asm
2378 {
2379 mov eax, 1
2380# ifdef __AMD64__
2381 mov rdx, [pu32]
2382 lock xadd [rdx], eax
2383# else
2384 mov edx, [pu32]
2385 lock xadd [edx], eax
2386# endif
2387 inc eax
2388 mov u32, eax
2389 }
2390# endif
2391 return u32;
2392}
2393#endif
2394
2395
2396/**
2397 * Atomically increment a signed 32-bit value.
2398 *
2399 * @returns The new value.
2400 * @param pi32 Pointer to the value to increment.
2401 */
2402DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2403{
2404 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2405}
2406
2407
2408/**
2409 * Atomically decrement an unsigned 32-bit value.
2410 *
2411 * @returns The new value.
2412 * @param pu32 Pointer to the value to decrement.
2413 */
2414#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2415DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2416#else
2417DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2418{
2419 uint32_t u32;
2420# if RT_INLINE_ASM_USES_INTRIN
2421 u32 = _InterlockedDecrement((long *)pu32);
2422
2423# elif RT_INLINE_ASM_GNU_STYLE
2424 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2425 "decl %0\n\t"
2426 : "=r" (u32),
2427 "=m" (*pu32)
2428 : "0" (-1)
2429 : "memory");
2430# else
2431 __asm
2432 {
2433 mov eax, -1
2434# ifdef __AMD64__
2435 mov rdx, [pu32]
2436 lock xadd [rdx], eax
2437# else
2438 mov edx, [pu32]
2439 lock xadd [edx], eax
2440# endif
2441 dec eax
2442 mov u32, eax
2443 }
2444# endif
2445 return u32;
2446}
2447#endif
2448
2449
2450/**
2451 * Atomically decrement a signed 32-bit value.
2452 *
2453 * @returns The new value.
2454 * @param pi32 Pointer to the value to decrement.
2455 */
2456DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2457{
2458 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2459}
2460
2461
2462/**
2463 * Atomically Or an unsigned 32-bit value.
2464 *
2465 * @param pu32 Pointer to the pointer variable to OR u32 with.
2466 * @param u32 The value to OR *pu32 with.
2467 */
2468#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2469DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2470#else
2471DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2472{
2473# if RT_INLINE_ASM_USES_INTRIN
2474 _InterlockedOr((long volatile *)pu32, (long)u32);
2475
2476# elif RT_INLINE_ASM_GNU_STYLE
2477 __asm__ __volatile__("lock; orl %1, %0\n\t"
2478 : "=m" (*pu32)
2479 : "r" (u32));
2480# else
2481 __asm
2482 {
2483 mov eax, [u32]
2484# ifdef __AMD64__
2485 mov rdx, [pu32]
2486 lock or [rdx], eax
2487# else
2488 mov edx, [pu32]
2489 lock or [edx], eax
2490# endif
2491 }
2492# endif
2493}
2494#endif
2495
2496
2497/**
2498 * Atomically Or a signed 32-bit value.
2499 *
2500 * @param pi32 Pointer to the pointer variable to OR u32 with.
2501 * @param i32 The value to OR *pu32 with.
2502 */
2503DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2504{
2505 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2506}
2507
2508
2509/**
2510 * Atomically And an unsigned 32-bit value.
2511 *
2512 * @param pu32 Pointer to the pointer variable to AND u32 with.
2513 * @param u32 The value to AND *pu32 with.
2514 */
2515#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2516DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2517#else
2518DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2519{
2520# if RT_INLINE_ASM_USES_INTRIN
2521 _InterlockedAnd((long volatile *)pu32, u32);
2522
2523# elif RT_INLINE_ASM_GNU_STYLE
2524 __asm__ __volatile__("lock; andl %1, %0\n\t"
2525 : "=m" (*pu32)
2526 : "r" (u32));
2527# else
2528 __asm
2529 {
2530 mov eax, [u32]
2531# ifdef __AMD64__
2532 mov rdx, [pu32]
2533 lock and [rdx], eax
2534# else
2535 mov edx, [pu32]
2536 lock and [edx], eax
2537# endif
2538 }
2539# endif
2540}
2541#endif
2542
2543
2544/**
2545 * Atomically And a signed 32-bit value.
2546 *
2547 * @param pi32 Pointer to the pointer variable to AND i32 with.
2548 * @param i32 The value to AND *pi32 with.
2549 */
2550DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2551{
2552 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2553}
2554
2555
2556/**
2557 * Invalidate page.
2558 *
2559 * @param pv Address of the page to invalidate.
2560 */
2561#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2562DECLASM(void) ASMInvalidatePage(void *pv);
2563#else
2564DECLINLINE(void) ASMInvalidatePage(void *pv)
2565{
2566# if RT_INLINE_ASM_USES_INTRIN
2567 __invlpg(pv);
2568
2569# elif RT_INLINE_ASM_GNU_STYLE
2570 __asm__ __volatile__("invlpg %0\n\t"
2571 : : "m" (*(uint8_t *)pv));
2572# else
2573 __asm
2574 {
2575# ifdef __AMD64__
2576 mov rax, [pv]
2577 invlpg [rax]
2578# else
2579 mov eax, [pv]
2580 invlpg [eax]
2581# endif
2582 }
2583# endif
2584}
2585#endif
2586
2587
2588#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2589# if PAGE_SIZE != 0x1000
2590# error "PAGE_SIZE is not 0x1000!"
2591# endif
2592#endif
2593
2594/**
2595 * Zeros a 4K memory page.
2596 *
2597 * @param pv Pointer to the memory block. This must be page aligned.
2598 */
2599#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2600DECLASM(void) ASMMemZeroPage(volatile void *pv);
2601# else
2602DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2603{
2604# if RT_INLINE_ASM_USES_INTRIN
2605# ifdef __AMD64__
2606 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2607# else
2608 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2609# endif
2610
2611# elif RT_INLINE_ASM_GNU_STYLE
2612 RTUINTREG uDummy;
2613# ifdef __AMD64__
2614 __asm__ __volatile__ ("rep stosq"
2615 : "=D" (pv),
2616 "=c" (uDummy)
2617 : "0" (pv),
2618 "c" (0x1000 >> 3),
2619 "a" (0)
2620 : "memory");
2621# else
2622 __asm__ __volatile__ ("rep stosl"
2623 : "=D" (pv),
2624 "=c" (uDummy)
2625 : "0" (pv),
2626 "c" (0x1000 >> 2),
2627 "a" (0)
2628 : "memory");
2629# endif
2630# else
2631 __asm
2632 {
2633# ifdef __AMD64__
2634 xor rax, rax
2635 mov ecx, 0200h
2636 mov rdi, [pv]
2637 rep stosq
2638# else
2639 xor eax, eax
2640 mov ecx, 0400h
2641 mov edi, [pv]
2642 rep stosd
2643# endif
2644 }
2645# endif
2646}
2647# endif
2648
2649
2650/**
2651 * Zeros a memory block with a 32-bit aligned size.
2652 *
2653 * @param pv Pointer to the memory block.
2654 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2655 */
2656#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2657DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2658#else
2659DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2660{
2661# if RT_INLINE_ASM_USES_INTRIN
2662 __stosd((unsigned long *)pv, 0, cb >> 2);
2663
2664# elif RT_INLINE_ASM_GNU_STYLE
2665 __asm__ __volatile__ ("rep stosl"
2666 : "=D" (pv),
2667 "=c" (cb)
2668 : "0" (pv),
2669 "1" (cb >> 2),
2670 "a" (0)
2671 : "memory");
2672# else
2673 __asm
2674 {
2675 xor eax, eax
2676# ifdef __AMD64__
2677 mov rcx, [cb]
2678 shr rcx, 2
2679 mov rdi, [pv]
2680# else
2681 mov ecx, [cb]
2682 shr ecx, 2
2683 mov edi, [pv]
2684# endif
2685 rep stosd
2686 }
2687# endif
2688}
2689#endif
2690
2691
2692/**
2693 * Fills a memory block with a 32-bit aligned size.
2694 *
2695 * @param pv Pointer to the memory block.
2696 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2697 * @param u32 The value to fill with.
2698 */
2699#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2700DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2701#else
2702DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2703{
2704# if RT_INLINE_ASM_USES_INTRIN
2705 __stosd((unsigned long *)pv, 0, cb >> 2);
2706
2707# elif RT_INLINE_ASM_GNU_STYLE
2708 __asm__ __volatile__ ("rep stosl"
2709 : "=D" (pv),
2710 "=c" (cb)
2711 : "0" (pv),
2712 "1" (cb >> 2),
2713 "a" (u32)
2714 : "memory");
2715# else
2716 __asm
2717 {
2718# ifdef __AMD64__
2719 mov rcx, [cb]
2720 shr rcx, 2
2721 mov rdi, [pv]
2722# else
2723 mov ecx, [cb]
2724 shr ecx, 2
2725 mov edi, [pv]
2726# endif
2727 mov eax, [u32]
2728 rep stosd
2729 }
2730# endif
2731}
2732#endif
2733
2734
2735
2736/**
2737 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2738 *
2739 * @returns u32F1 * u32F2.
2740 */
2741#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2742DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2743#else
2744DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2745{
2746# ifdef __AMD64__
2747 return (uint64_t)u32F1 * u32F2;
2748# else /* !__AMD64__ */
2749 uint64_t u64;
2750# if RT_INLINE_ASM_GNU_STYLE
2751 __asm__ __volatile__("mull %%edx"
2752 : "=A" (u64)
2753 : "a" (u32F2), "d" (u32F1));
2754# else
2755 __asm
2756 {
2757 mov edx, [u32F1]
2758 mov eax, [u32F2]
2759 mul edx
2760 mov dword ptr [u64], eax
2761 mov dword ptr [u64 + 4], edx
2762 }
2763# endif
2764 return u64;
2765# endif /* !__AMD64__ */
2766}
2767#endif
2768
2769
2770/**
2771 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2772 *
2773 * @returns u32F1 * u32F2.
2774 */
2775#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2776DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2777#else
2778DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2779{
2780# ifdef __AMD64__
2781 return (int64_t)i32F1 * i32F2;
2782# else /* !__AMD64__ */
2783 int64_t i64;
2784# if RT_INLINE_ASM_GNU_STYLE
2785 __asm__ __volatile__("imull %%edx"
2786 : "=A" (i64)
2787 : "a" (i32F2), "d" (i32F1));
2788# else
2789 __asm
2790 {
2791 mov edx, [i32F1]
2792 mov eax, [i32F2]
2793 imul edx
2794 mov dword ptr [i64], eax
2795 mov dword ptr [i64 + 4], edx
2796 }
2797# endif
2798 return i64;
2799# endif /* !__AMD64__ */
2800}
2801#endif
2802
2803
2804/**
2805 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2806 *
2807 * @returns u64 / u32.
2808 */
2809#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2810DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2811#else
2812DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2813{
2814# ifdef __AMD64__
2815 return (uint32_t)(u64 / u32);
2816# else /* !__AMD64__ */
2817# if RT_INLINE_ASM_GNU_STYLE
2818 RTUINTREG uDummy;
2819 __asm__ __volatile__("divl %3"
2820 : "=a" (u32), "=d"(uDummy)
2821 : "A" (u64), "r" (u32));
2822# else
2823 __asm
2824 {
2825 mov eax, dword ptr [u64]
2826 mov edx, dword ptr [u64 + 4]
2827 mov ecx, [u32]
2828 div ecx
2829 mov [u32], eax
2830 }
2831# endif
2832 return u32;
2833# endif /* !__AMD64__ */
2834}
2835#endif
2836
2837
2838/**
2839 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2840 *
2841 * @returns u64 / u32.
2842 */
2843#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2844DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2845#else
2846DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2847{
2848# ifdef __AMD64__
2849 return (int32_t)(i64 / i32);
2850# else /* !__AMD64__ */
2851# if RT_INLINE_ASM_GNU_STYLE
2852 RTUINTREG iDummy;
2853 __asm__ __volatile__("idivl %3"
2854 : "=a" (i32), "=d"(iDummy)
2855 : "A" (i64), "r" (i32));
2856# else
2857 __asm
2858 {
2859 mov eax, dword ptr [i64]
2860 mov edx, dword ptr [i64 + 4]
2861 mov ecx, [i32]
2862 idiv ecx
2863 mov [i32], eax
2864 }
2865# endif
2866 return i32;
2867# endif /* !__AMD64__ */
2868}
2869#endif
2870
2871
2872/**
2873 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
2874 * using a 96 bit intermediate result.
2875 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
2876 * __udivdi3 and __umoddi3 even if this inline function is not used.
2877 *
2878 * @returns (u64A * u32B) / u32C.
2879 * @param u64A The 64-bit value.
2880 * @param u32B The 32-bit value to multiple by A.
2881 * @param u32C The 32-bit value to divide A*B by.
2882 */
2883#if RT_INLINE_ASM_EXTERNAL
2884DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
2885#else
2886DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
2887{
2888# if RT_INLINE_ASM_GNU_STYLE
2889# ifdef __AMD64__
2890 uint64_t u64Result, u64Spill;
2891 __asm__ __volatile__("mulq %2\n\t"
2892 "divq %3\n\t"
2893 : "=a" (u64Result),
2894 "=d" (u64Spill)
2895 : "r" ((uint64_t)u32B),
2896 "r" ((uint64_t)u32C),
2897 "0" (u64A),
2898 "1" (0));
2899 return u64Result;
2900# else
2901 uint32_t u32Dummy;
2902 uint64_t u64Result;
2903 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
2904 edx = u64Lo.hi = (u64A.lo * u32B).hi */
2905 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
2906 eax = u64A.hi */
2907 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
2908 edx = u32C */
2909 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
2910 edx = u32B */
2911 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
2912 edx = u64Hi.hi = (u64A.hi * u32B).hi */
2913 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
2914 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
2915 "divl %%ecx \n\t" /* eax = u64Hi / u32C
2916 edx = u64Hi % u32C */
2917 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
2918 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
2919 "divl %%ecx \n\t" /* u64Result.lo */
2920 "movl %%edi,%%edx \n\t" /* u64Result.hi */
2921 : "=A"(u64Result),
2922 "=S"(u32Dummy), "=D"(u32Dummy)
2923 : "a"((uint32_t)u64A),
2924 "S"((uint32_t)(u64A >> 32)),
2925 "c"(u32B),
2926 "D"(u32C));
2927 return u64Result;
2928# endif
2929# else
2930 RTUINT64U u;
2931 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
2932 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
2933 u64Hi += (u64Lo >> 32);
2934 u.s.Hi = (uint32_t)(u64Hi / u32C);
2935 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
2936 return u.u;
2937# endif
2938}
2939#endif
2940
2941
2942/**
2943 * Probes a byte pointer for read access.
2944 *
2945 * While the function will not fault if the byte is not read accessible,
2946 * the idea is to do this in a safe place like before acquiring locks
2947 * and such like.
2948 *
2949 * Also, this functions guarantees that an eager compiler is not going
2950 * to optimize the probing away.
2951 *
2952 * @param pvByte Pointer to the byte.
2953 */
2954#if RT_INLINE_ASM_EXTERNAL
2955DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2956#else
2957DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2958{
2959 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2960 uint8_t u8;
2961# if RT_INLINE_ASM_GNU_STYLE
2962 __asm__ __volatile__("movb (%1), %0\n\t"
2963 : "=r" (u8)
2964 : "r" (pvByte));
2965# else
2966 __asm
2967 {
2968# ifdef __AMD64__
2969 mov rax, [pvByte]
2970 mov al, [rax]
2971# else
2972 mov eax, [pvByte]
2973 mov al, [eax]
2974# endif
2975 mov [u8], al
2976 }
2977# endif
2978 return u8;
2979}
2980#endif
2981
2982/**
2983 * Probes a buffer for read access page by page.
2984 *
2985 * While the function will fault if the buffer is not fully read
2986 * accessible, the idea is to do this in a safe place like before
2987 * acquiring locks and such like.
2988 *
2989 * Also, this functions guarantees that an eager compiler is not going
2990 * to optimize the probing away.
2991 *
2992 * @param pvBuf Pointer to the buffer.
2993 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
2994 */
2995DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
2996{
2997 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2998 /* the first byte */
2999 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3000 ASMProbeReadByte(pu8);
3001
3002 /* the pages in between pages. */
3003 while (cbBuf > /*PAGE_SIZE*/0x1000)
3004 {
3005 ASMProbeReadByte(pu8);
3006 cbBuf -= /*PAGE_SIZE*/0x1000;
3007 pu8 += /*PAGE_SIZE*/0x1000;
3008 }
3009
3010 /* the last byte */
3011 ASMProbeReadByte(pu8 + cbBuf - 1);
3012}
3013
3014
3015/** @def ASMBreakpoint
3016 * Debugger Breakpoint.
3017 * @remark In the gnu world we add a nop instruction after the int3 to
3018 * force gdb to remain at the int3 source line.
3019 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3020 * @internal
3021 */
3022#if RT_INLINE_ASM_GNU_STYLE
3023# ifndef __L4ENV__
3024# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3025# else
3026# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3027# endif
3028#else
3029# define ASMBreakpoint() __debugbreak()
3030#endif
3031
3032
3033
3034/** @defgroup grp_inline_bits Bit Operations
3035 * @{
3036 */
3037
3038
3039/**
3040 * Sets a bit in a bitmap.
3041 *
3042 * @param pvBitmap Pointer to the bitmap.
3043 * @param iBit The bit to set.
3044 */
3045#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3046DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3047#else
3048DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3049{
3050# if RT_INLINE_ASM_USES_INTRIN
3051 _bittestandset((long *)pvBitmap, iBit);
3052
3053# elif RT_INLINE_ASM_GNU_STYLE
3054 __asm__ __volatile__ ("btsl %1, %0"
3055 : "=m" (*(volatile long *)pvBitmap)
3056 : "Ir" (iBit)
3057 : "memory");
3058# else
3059 __asm
3060 {
3061# ifdef __AMD64__
3062 mov rax, [pvBitmap]
3063 mov edx, [iBit]
3064 bts [rax], edx
3065# else
3066 mov eax, [pvBitmap]
3067 mov edx, [iBit]
3068 bts [eax], edx
3069# endif
3070 }
3071# endif
3072}
3073#endif
3074
3075
3076/**
3077 * Atomically sets a bit in a bitmap.
3078 *
3079 * @param pvBitmap Pointer to the bitmap.
3080 * @param iBit The bit to set.
3081 */
3082#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3083DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3084#else
3085DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3086{
3087# if RT_INLINE_ASM_USES_INTRIN
3088 _interlockedbittestandset((long *)pvBitmap, iBit);
3089# elif RT_INLINE_ASM_GNU_STYLE
3090 __asm__ __volatile__ ("lock; btsl %1, %0"
3091 : "=m" (*(volatile long *)pvBitmap)
3092 : "Ir" (iBit)
3093 : "memory");
3094# else
3095 __asm
3096 {
3097# ifdef __AMD64__
3098 mov rax, [pvBitmap]
3099 mov edx, [iBit]
3100 lock bts [rax], edx
3101# else
3102 mov eax, [pvBitmap]
3103 mov edx, [iBit]
3104 lock bts [eax], edx
3105# endif
3106 }
3107# endif
3108}
3109#endif
3110
3111
3112/**
3113 * Clears a bit in a bitmap.
3114 *
3115 * @param pvBitmap Pointer to the bitmap.
3116 * @param iBit The bit to clear.
3117 */
3118#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3119DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3120#else
3121DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3122{
3123# if RT_INLINE_ASM_USES_INTRIN
3124 _bittestandreset((long *)pvBitmap, iBit);
3125
3126# elif RT_INLINE_ASM_GNU_STYLE
3127 __asm__ __volatile__ ("btrl %1, %0"
3128 : "=m" (*(volatile long *)pvBitmap)
3129 : "Ir" (iBit)
3130 : "memory");
3131# else
3132 __asm
3133 {
3134# ifdef __AMD64__
3135 mov rax, [pvBitmap]
3136 mov edx, [iBit]
3137 btr [rax], edx
3138# else
3139 mov eax, [pvBitmap]
3140 mov edx, [iBit]
3141 btr [eax], edx
3142# endif
3143 }
3144# endif
3145}
3146#endif
3147
3148
3149/**
3150 * Atomically clears a bit in a bitmap.
3151 *
3152 * @param pvBitmap Pointer to the bitmap.
3153 * @param iBit The bit to toggle set.
3154 * @remark No memory barrier, take care on smp.
3155 */
3156#if RT_INLINE_ASM_EXTERNAL
3157DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3158#else
3159DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3160{
3161# if RT_INLINE_ASM_GNU_STYLE
3162 __asm__ __volatile__ ("lock; btrl %1, %0"
3163 : "=m" (*(volatile long *)pvBitmap)
3164 : "Ir" (iBit)
3165 : "memory");
3166# else
3167 __asm
3168 {
3169# ifdef __AMD64__
3170 mov rax, [pvBitmap]
3171 mov edx, [iBit]
3172 lock btr [rax], edx
3173# else
3174 mov eax, [pvBitmap]
3175 mov edx, [iBit]
3176 lock btr [eax], edx
3177# endif
3178 }
3179# endif
3180}
3181#endif
3182
3183
3184/**
3185 * Toggles a bit in a bitmap.
3186 *
3187 * @param pvBitmap Pointer to the bitmap.
3188 * @param iBit The bit to toggle.
3189 */
3190#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3191DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3192#else
3193DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3194{
3195# if RT_INLINE_ASM_USES_INTRIN
3196 _bittestandcomplement((long *)pvBitmap, iBit);
3197# elif RT_INLINE_ASM_GNU_STYLE
3198 __asm__ __volatile__ ("btcl %1, %0"
3199 : "=m" (*(volatile long *)pvBitmap)
3200 : "Ir" (iBit)
3201 : "memory");
3202# else
3203 __asm
3204 {
3205# ifdef __AMD64__
3206 mov rax, [pvBitmap]
3207 mov edx, [iBit]
3208 btc [rax], edx
3209# else
3210 mov eax, [pvBitmap]
3211 mov edx, [iBit]
3212 btc [eax], edx
3213# endif
3214 }
3215# endif
3216}
3217#endif
3218
3219
3220/**
3221 * Atomically toggles a bit in a bitmap.
3222 *
3223 * @param pvBitmap Pointer to the bitmap.
3224 * @param iBit The bit to test and set.
3225 */
3226#if RT_INLINE_ASM_EXTERNAL
3227DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3228#else
3229DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3230{
3231# if RT_INLINE_ASM_GNU_STYLE
3232 __asm__ __volatile__ ("lock; btcl %1, %0"
3233 : "=m" (*(volatile long *)pvBitmap)
3234 : "Ir" (iBit)
3235 : "memory");
3236# else
3237 __asm
3238 {
3239# ifdef __AMD64__
3240 mov rax, [pvBitmap]
3241 mov edx, [iBit]
3242 lock btc [rax], edx
3243# else
3244 mov eax, [pvBitmap]
3245 mov edx, [iBit]
3246 lock btc [eax], edx
3247# endif
3248 }
3249# endif
3250}
3251#endif
3252
3253
3254/**
3255 * Tests and sets a bit in a bitmap.
3256 *
3257 * @returns true if the bit was set.
3258 * @returns false if the bit was clear.
3259 * @param pvBitmap Pointer to the bitmap.
3260 * @param iBit The bit to test and set.
3261 */
3262#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3263DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3264#else
3265DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3266{
3267 union { bool f; uint32_t u32; uint8_t u8; } rc;
3268# if RT_INLINE_ASM_USES_INTRIN
3269 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3270
3271# elif RT_INLINE_ASM_GNU_STYLE
3272 __asm__ __volatile__ ("btsl %2, %1\n\t"
3273 "setc %b0\n\t"
3274 "andl $1, %0\n\t"
3275 : "=q" (rc.u32),
3276 "=m" (*(volatile long *)pvBitmap)
3277 : "Ir" (iBit)
3278 : "memory");
3279# else
3280 __asm
3281 {
3282 mov edx, [iBit]
3283# ifdef __AMD64__
3284 mov rax, [pvBitmap]
3285 bts [rax], edx
3286# else
3287 mov eax, [pvBitmap]
3288 bts [eax], edx
3289# endif
3290 setc al
3291 and eax, 1
3292 mov [rc.u32], eax
3293 }
3294# endif
3295 return rc.f;
3296}
3297#endif
3298
3299
3300/**
3301 * Atomically tests and sets a bit in a bitmap.
3302 *
3303 * @returns true if the bit was set.
3304 * @returns false if the bit was clear.
3305 * @param pvBitmap Pointer to the bitmap.
3306 * @param iBit The bit to set.
3307 */
3308#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3309DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3310#else
3311DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3312{
3313 union { bool f; uint32_t u32; uint8_t u8; } rc;
3314# if RT_INLINE_ASM_USES_INTRIN
3315 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3316# elif RT_INLINE_ASM_GNU_STYLE
3317 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3318 "setc %b0\n\t"
3319 "andl $1, %0\n\t"
3320 : "=q" (rc.u32),
3321 "=m" (*(volatile long *)pvBitmap)
3322 : "Ir" (iBit)
3323 : "memory");
3324# else
3325 __asm
3326 {
3327 mov edx, [iBit]
3328# ifdef __AMD64__
3329 mov rax, [pvBitmap]
3330 lock bts [rax], edx
3331# else
3332 mov eax, [pvBitmap]
3333 lock bts [eax], edx
3334# endif
3335 setc al
3336 and eax, 1
3337 mov [rc.u32], eax
3338 }
3339# endif
3340 return rc.f;
3341}
3342#endif
3343
3344
3345/**
3346 * Tests and clears a bit in a bitmap.
3347 *
3348 * @returns true if the bit was set.
3349 * @returns false if the bit was clear.
3350 * @param pvBitmap Pointer to the bitmap.
3351 * @param iBit The bit to test and clear.
3352 */
3353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3354DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3355#else
3356DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3357{
3358 union { bool f; uint32_t u32; uint8_t u8; } rc;
3359# if RT_INLINE_ASM_USES_INTRIN
3360 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3361
3362# elif RT_INLINE_ASM_GNU_STYLE
3363 __asm__ __volatile__ ("btrl %2, %1\n\t"
3364 "setc %b0\n\t"
3365 "andl $1, %0\n\t"
3366 : "=q" (rc.u32),
3367 "=m" (*(volatile long *)pvBitmap)
3368 : "Ir" (iBit)
3369 : "memory");
3370# else
3371 __asm
3372 {
3373 mov edx, [iBit]
3374# ifdef __AMD64__
3375 mov rax, [pvBitmap]
3376 btr [rax], edx
3377# else
3378 mov eax, [pvBitmap]
3379 btr [eax], edx
3380# endif
3381 setc al
3382 and eax, 1
3383 mov [rc.u32], eax
3384 }
3385# endif
3386 return rc.f;
3387}
3388#endif
3389
3390
3391/**
3392 * Atomically tests and clears a bit in a bitmap.
3393 *
3394 * @returns true if the bit was set.
3395 * @returns false if the bit was clear.
3396 * @param pvBitmap Pointer to the bitmap.
3397 * @param iBit The bit to test and clear.
3398 * @remark No memory barrier, take care on smp.
3399 */
3400#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3401DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3402#else
3403DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3404{
3405 union { bool f; uint32_t u32; uint8_t u8; } rc;
3406# if RT_INLINE_ASM_USES_INTRIN
3407 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3408
3409# elif RT_INLINE_ASM_GNU_STYLE
3410 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3411 "setc %b0\n\t"
3412 "andl $1, %0\n\t"
3413 : "=q" (rc.u32),
3414 "=m" (*(volatile long *)pvBitmap)
3415 : "Ir" (iBit)
3416 : "memory");
3417# else
3418 __asm
3419 {
3420 mov edx, [iBit]
3421# ifdef __AMD64__
3422 mov rax, [pvBitmap]
3423 lock btr [rax], edx
3424# else
3425 mov eax, [pvBitmap]
3426 lock btr [eax], edx
3427# endif
3428 setc al
3429 and eax, 1
3430 mov [rc.u32], eax
3431 }
3432# endif
3433 return rc.f;
3434}
3435#endif
3436
3437
3438/**
3439 * Tests and toggles a bit in a bitmap.
3440 *
3441 * @returns true if the bit was set.
3442 * @returns false if the bit was clear.
3443 * @param pvBitmap Pointer to the bitmap.
3444 * @param iBit The bit to test and toggle.
3445 */
3446#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3447DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3448#else
3449DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3450{
3451 union { bool f; uint32_t u32; uint8_t u8; } rc;
3452# if RT_INLINE_ASM_USES_INTRIN
3453 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3454
3455# elif RT_INLINE_ASM_GNU_STYLE
3456 __asm__ __volatile__ ("btcl %2, %1\n\t"
3457 "setc %b0\n\t"
3458 "andl $1, %0\n\t"
3459 : "=q" (rc.u32),
3460 "=m" (*(volatile long *)pvBitmap)
3461 : "Ir" (iBit)
3462 : "memory");
3463# else
3464 __asm
3465 {
3466 mov edx, [iBit]
3467# ifdef __AMD64__
3468 mov rax, [pvBitmap]
3469 btc [rax], edx
3470# else
3471 mov eax, [pvBitmap]
3472 btc [eax], edx
3473# endif
3474 setc al
3475 and eax, 1
3476 mov [rc.u32], eax
3477 }
3478# endif
3479 return rc.f;
3480}
3481#endif
3482
3483
3484/**
3485 * Atomically tests and toggles a bit in a bitmap.
3486 *
3487 * @returns true if the bit was set.
3488 * @returns false if the bit was clear.
3489 * @param pvBitmap Pointer to the bitmap.
3490 * @param iBit The bit to test and toggle.
3491 */
3492#if RT_INLINE_ASM_EXTERNAL
3493DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3494#else
3495DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3496{
3497 union { bool f; uint32_t u32; uint8_t u8; } rc;
3498# if RT_INLINE_ASM_GNU_STYLE
3499 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3500 "setc %b0\n\t"
3501 "andl $1, %0\n\t"
3502 : "=q" (rc.u32),
3503 "=m" (*(volatile long *)pvBitmap)
3504 : "Ir" (iBit)
3505 : "memory");
3506# else
3507 __asm
3508 {
3509 mov edx, [iBit]
3510# ifdef __AMD64__
3511 mov rax, [pvBitmap]
3512 lock btc [rax], edx
3513# else
3514 mov eax, [pvBitmap]
3515 lock btc [eax], edx
3516# endif
3517 setc al
3518 and eax, 1
3519 mov [rc.u32], eax
3520 }
3521# endif
3522 return rc.f;
3523}
3524#endif
3525
3526
3527/**
3528 * Tests if a bit in a bitmap is set.
3529 *
3530 * @returns true if the bit is set.
3531 * @returns false if the bit is clear.
3532 * @param pvBitmap Pointer to the bitmap.
3533 * @param iBit The bit to test.
3534 */
3535#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3536DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3537#else
3538DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3539{
3540 union { bool f; uint32_t u32; uint8_t u8; } rc;
3541# if RT_INLINE_ASM_USES_INTRIN
3542 rc.u32 = _bittest((long *)pvBitmap, iBit);
3543# elif RT_INLINE_ASM_GNU_STYLE
3544
3545 __asm__ __volatile__ ("btl %2, %1\n\t"
3546 "setc %b0\n\t"
3547 "andl $1, %0\n\t"
3548 : "=q" (rc.u32),
3549 "=m" (*(volatile long *)pvBitmap)
3550 : "Ir" (iBit)
3551 : "memory");
3552# else
3553 __asm
3554 {
3555 mov edx, [iBit]
3556# ifdef __AMD64__
3557 mov rax, [pvBitmap]
3558 bt [rax], edx
3559# else
3560 mov eax, [pvBitmap]
3561 bt [eax], edx
3562# endif
3563 setc al
3564 and eax, 1
3565 mov [rc.u32], eax
3566 }
3567# endif
3568 return rc.f;
3569}
3570#endif
3571
3572
3573/**
3574 * Clears a bit range within a bitmap.
3575 *
3576 * @param pvBitmap Pointer to the bitmap.
3577 * @param iBitStart The First bit to clear.
3578 * @param iBitEnd The first bit not to clear.
3579 */
3580DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3581{
3582 if (iBitStart < iBitEnd)
3583 {
3584 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3585 int iStart = iBitStart & ~31;
3586 int iEnd = iBitEnd & ~31;
3587 if (iStart == iEnd)
3588 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3589 else
3590 {
3591 /* bits in first dword. */
3592 if (iBitStart & 31)
3593 {
3594 *pu32 &= (1 << (iBitStart & 31)) - 1;
3595 pu32++;
3596 iBitStart = iStart + 32;
3597 }
3598
3599 /* whole dword. */
3600 if (iBitStart != iEnd)
3601 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3602
3603 /* bits in last dword. */
3604 if (iBitEnd & 31)
3605 {
3606 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3607 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3608 }
3609 }
3610 }
3611}
3612
3613
3614/**
3615 * Finds the first clear bit in a bitmap.
3616 *
3617 * @returns Index of the first zero bit.
3618 * @returns -1 if no clear bit was found.
3619 * @param pvBitmap Pointer to the bitmap.
3620 * @param cBits The number of bits in the bitmap. Multiple of 32.
3621 */
3622#if RT_INLINE_ASM_EXTERNAL
3623DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3624#else
3625DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3626{
3627 if (cBits)
3628 {
3629 int32_t iBit;
3630# if RT_INLINE_ASM_GNU_STYLE
3631 RTCCUINTREG uEAX, uECX, uEDI;
3632 cBits = RT_ALIGN_32(cBits, 32);
3633 __asm__ __volatile__("repe; scasl\n\t"
3634 "je 1f\n\t"
3635# ifdef __AMD64__
3636 "lea -4(%%rdi), %%rdi\n\t"
3637 "xorl (%%rdi), %%eax\n\t"
3638 "subq %5, %%rdi\n\t"
3639# else
3640 "lea -4(%%edi), %%edi\n\t"
3641 "xorl (%%edi), %%eax\n\t"
3642 "subl %5, %%edi\n\t"
3643# endif
3644 "shll $3, %%edi\n\t"
3645 "bsfl %%eax, %%edx\n\t"
3646 "addl %%edi, %%edx\n\t"
3647 "1:\t\n"
3648 : "=d" (iBit),
3649 "=&c" (uECX),
3650 "=&D" (uEDI),
3651 "=&a" (uEAX)
3652 : "0" (0xffffffff),
3653 "mr" (pvBitmap),
3654 "1" (cBits >> 5),
3655 "2" (pvBitmap),
3656 "3" (0xffffffff));
3657# else
3658 cBits = RT_ALIGN_32(cBits, 32);
3659 __asm
3660 {
3661# ifdef __AMD64__
3662 mov rdi, [pvBitmap]
3663 mov rbx, rdi
3664# else
3665 mov edi, [pvBitmap]
3666 mov ebx, edi
3667# endif
3668 mov edx, 0ffffffffh
3669 mov eax, edx
3670 mov ecx, [cBits]
3671 shr ecx, 5
3672 repe scasd
3673 je done
3674
3675# ifdef __AMD64__
3676 lea rdi, [rdi - 4]
3677 xor eax, [rdi]
3678 sub rdi, rbx
3679# else
3680 lea edi, [edi - 4]
3681 xor eax, [edi]
3682 sub edi, ebx
3683# endif
3684 shl edi, 3
3685 bsf edx, eax
3686 add edx, edi
3687 done:
3688 mov [iBit], edx
3689 }
3690# endif
3691 return iBit;
3692 }
3693 return -1;
3694}
3695#endif
3696
3697
3698/**
3699 * Finds the next clear bit in a bitmap.
3700 *
3701 * @returns Index of the first zero bit.
3702 * @returns -1 if no clear bit was found.
3703 * @param pvBitmap Pointer to the bitmap.
3704 * @param cBits The number of bits in the bitmap. Multiple of 32.
3705 * @param iBitPrev The bit returned from the last search.
3706 * The search will start at iBitPrev + 1.
3707 */
3708#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3709DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3710#else
3711DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3712{
3713 int iBit = ++iBitPrev & 31;
3714 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3715 cBits -= iBitPrev & ~31;
3716 if (iBit)
3717 {
3718 /* inspect the first dword. */
3719 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3720# if RT_INLINE_ASM_USES_INTRIN
3721 unsigned long ulBit = 0;
3722 if (_BitScanForward(&ulBit, u32))
3723 return ulBit + iBitPrev;
3724 iBit = -1;
3725# else
3726# if RT_INLINE_ASM_GNU_STYLE
3727 __asm__ __volatile__("bsf %1, %0\n\t"
3728 "jnz 1f\n\t"
3729 "movl $-1, %0\n\t"
3730 "1:\n\t"
3731 : "=r" (iBit)
3732 : "r" (u32));
3733# else
3734 __asm
3735 {
3736 mov edx, [u32]
3737 bsf eax, edx
3738 jnz done
3739 mov eax, 0ffffffffh
3740 done:
3741 mov [iBit], eax
3742 }
3743# endif
3744 if (iBit >= 0)
3745 return iBit + iBitPrev;
3746# endif
3747 /* Search the rest of the bitmap, if there is anything. */
3748 if (cBits > 32)
3749 {
3750 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3751 if (iBit >= 0)
3752 return iBit + (iBitPrev & ~31) + 32;
3753 }
3754 }
3755 else
3756 {
3757 /* Search the rest of the bitmap. */
3758 iBit = ASMBitFirstClear(pvBitmap, cBits);
3759 if (iBit >= 0)
3760 return iBit + (iBitPrev & ~31);
3761 }
3762 return iBit;
3763}
3764#endif
3765
3766
3767/**
3768 * Finds the first set bit in a bitmap.
3769 *
3770 * @returns Index of the first set bit.
3771 * @returns -1 if no clear bit was found.
3772 * @param pvBitmap Pointer to the bitmap.
3773 * @param cBits The number of bits in the bitmap. Multiple of 32.
3774 */
3775#if RT_INLINE_ASM_EXTERNAL
3776DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3777#else
3778DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3779{
3780 if (cBits)
3781 {
3782 int32_t iBit;
3783# if RT_INLINE_ASM_GNU_STYLE
3784 RTCCUINTREG uEAX, uECX, uEDI;
3785 cBits = RT_ALIGN_32(cBits, 32);
3786 __asm__ __volatile__("repe; scasl\n\t"
3787 "je 1f\n\t"
3788# ifdef __AMD64__
3789 "lea -4(%%rdi), %%rdi\n\t"
3790 "movl (%%rdi), %%eax\n\t"
3791 "subq %5, %%rdi\n\t"
3792# else
3793 "lea -4(%%edi), %%edi\n\t"
3794 "movl (%%edi), %%eax\n\t"
3795 "subl %5, %%edi\n\t"
3796# endif
3797 "shll $3, %%edi\n\t"
3798 "bsfl %%eax, %%edx\n\t"
3799 "addl %%edi, %%edx\n\t"
3800 "1:\t\n"
3801 : "=d" (iBit),
3802 "=&c" (uECX),
3803 "=&D" (uEDI),
3804 "=&a" (uEAX)
3805 : "0" (0xffffffff),
3806 "mr" (pvBitmap),
3807 "1" (cBits >> 5),
3808 "2" (pvBitmap),
3809 "3" (0));
3810# else
3811 cBits = RT_ALIGN_32(cBits, 32);
3812 __asm
3813 {
3814# ifdef __AMD64__
3815 mov rdi, [pvBitmap]
3816 mov rbx, rdi
3817# else
3818 mov edi, [pvBitmap]
3819 mov ebx, edi
3820# endif
3821 mov edx, 0ffffffffh
3822 xor eax, eax
3823 mov ecx, [cBits]
3824 shr ecx, 5
3825 repe scasd
3826 je done
3827# ifdef __AMD64__
3828 lea rdi, [rdi - 4]
3829 mov eax, [rdi]
3830 sub rdi, rbx
3831# else
3832 lea edi, [edi - 4]
3833 mov eax, [edi]
3834 sub edi, ebx
3835# endif
3836 shl edi, 3
3837 bsf edx, eax
3838 add edx, edi
3839 done:
3840 mov [iBit], edx
3841 }
3842# endif
3843 return iBit;
3844 }
3845 return -1;
3846}
3847#endif
3848
3849
3850/**
3851 * Finds the next set bit in a bitmap.
3852 *
3853 * @returns Index of the next set bit.
3854 * @returns -1 if no set bit was found.
3855 * @param pvBitmap Pointer to the bitmap.
3856 * @param cBits The number of bits in the bitmap. Multiple of 32.
3857 * @param iBitPrev The bit returned from the last search.
3858 * The search will start at iBitPrev + 1.
3859 */
3860#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3861DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3862#else
3863DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3864{
3865 int iBit = ++iBitPrev & 31;
3866 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3867 cBits -= iBitPrev & ~31;
3868 if (iBit)
3869 {
3870 /* inspect the first dword. */
3871 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3872# if RT_INLINE_ASM_USES_INTRIN
3873 unsigned long ulBit = 0;
3874 if (_BitScanForward(&ulBit, u32))
3875 return ulBit + iBitPrev;
3876 iBit = -1;
3877# else
3878# if RT_INLINE_ASM_GNU_STYLE
3879 __asm__ __volatile__("bsf %1, %0\n\t"
3880 "jnz 1f\n\t"
3881 "movl $-1, %0\n\t"
3882 "1:\n\t"
3883 : "=r" (iBit)
3884 : "r" (u32));
3885# else
3886 __asm
3887 {
3888 mov edx, u32
3889 bsf eax, edx
3890 jnz done
3891 mov eax, 0ffffffffh
3892 done:
3893 mov [iBit], eax
3894 }
3895# endif
3896 if (iBit >= 0)
3897 return iBit + iBitPrev;
3898# endif
3899 /* Search the rest of the bitmap, if there is anything. */
3900 if (cBits > 32)
3901 {
3902 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3903 if (iBit >= 0)
3904 return iBit + (iBitPrev & ~31) + 32;
3905 }
3906
3907 }
3908 else
3909 {
3910 /* Search the rest of the bitmap. */
3911 iBit = ASMBitFirstSet(pvBitmap, cBits);
3912 if (iBit >= 0)
3913 return iBit + (iBitPrev & ~31);
3914 }
3915 return iBit;
3916}
3917#endif
3918
3919
3920/**
3921 * Finds the first bit which is set in the given 32-bit integer.
3922 * Bits are numbered from 1 (least significant) to 32.
3923 *
3924 * @returns index [1..32] of the first set bit.
3925 * @returns 0 if all bits are cleared.
3926 * @param u32 Integer to search for set bits.
3927 * @remark Similar to ffs() in BSD.
3928 */
3929DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3930{
3931# if RT_INLINE_ASM_USES_INTRIN
3932 unsigned long iBit;
3933 if (_BitScanForward(&iBit, u32))
3934 iBit++;
3935 else
3936 iBit = 0;
3937# elif RT_INLINE_ASM_GNU_STYLE
3938 uint32_t iBit;
3939 __asm__ __volatile__("bsf %1, %0\n\t"
3940 "jnz 1f\n\t"
3941 "xorl %0, %0\n\t"
3942 "jmp 2f\n"
3943 "1:\n\t"
3944 "incl %0\n"
3945 "2:\n\t"
3946 : "=r" (iBit)
3947 : "rm" (u32));
3948# else
3949 uint32_t iBit;
3950 _asm
3951 {
3952 bsf eax, [u32]
3953 jnz found
3954 xor eax, eax
3955 jmp done
3956 found:
3957 inc eax
3958 done:
3959 mov [iBit], eax
3960 }
3961# endif
3962 return iBit;
3963}
3964
3965
3966/**
3967 * Finds the first bit which is set in the given 32-bit integer.
3968 * Bits are numbered from 1 (least significant) to 32.
3969 *
3970 * @returns index [1..32] of the first set bit.
3971 * @returns 0 if all bits are cleared.
3972 * @param i32 Integer to search for set bits.
3973 * @remark Similar to ffs() in BSD.
3974 */
3975DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
3976{
3977 return ASMBitFirstSetU32((uint32_t)i32);
3978}
3979
3980
3981/**
3982 * Finds the last bit which is set in the given 32-bit integer.
3983 * Bits are numbered from 1 (least significant) to 32.
3984 *
3985 * @returns index [1..32] of the last set bit.
3986 * @returns 0 if all bits are cleared.
3987 * @param u32 Integer to search for set bits.
3988 * @remark Similar to fls() in BSD.
3989 */
3990DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
3991{
3992# if RT_INLINE_ASM_USES_INTRIN
3993 unsigned long iBit;
3994 if (_BitScanReverse(&iBit, u32))
3995 iBit++;
3996 else
3997 iBit = 0;
3998# elif RT_INLINE_ASM_GNU_STYLE
3999 uint32_t iBit;
4000 __asm__ __volatile__("bsrl %1, %0\n\t"
4001 "jnz 1f\n\t"
4002 "xorl %0, %0\n\t"
4003 "jmp 2f\n"
4004 "1:\n\t"
4005 "incl %0\n"
4006 "2:\n\t"
4007 : "=r" (iBit)
4008 : "rm" (u32));
4009# else
4010 uint32_t iBit;
4011 _asm
4012 {
4013 bsr eax, [u32]
4014 jnz found
4015 xor eax, eax
4016 jmp done
4017 found:
4018 inc eax
4019 done:
4020 mov [iBit], eax
4021 }
4022# endif
4023 return iBit;
4024}
4025
4026
4027/**
4028 * Finds the last bit which is set in the given 32-bit integer.
4029 * Bits are numbered from 1 (least significant) to 32.
4030 *
4031 * @returns index [1..32] of the last set bit.
4032 * @returns 0 if all bits are cleared.
4033 * @param i32 Integer to search for set bits.
4034 * @remark Similar to fls() in BSD.
4035 */
4036DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4037{
4038 return ASMBitLastSetS32((uint32_t)i32);
4039}
4040
4041
4042/**
4043 * Reverse the byte order of the given 32-bit integer.
4044 * @param u32 Integer
4045 */
4046DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4047{
4048#if RT_INLINE_ASM_USES_INTRIN
4049 u32 = _byteswap_ulong(u32);
4050#elif RT_INLINE_ASM_GNU_STYLE
4051 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4052#else
4053 _asm
4054 {
4055 mov eax, [u32]
4056 bswap eax
4057 mov [u32], eax
4058 }
4059#endif
4060 return u32;
4061}
4062
4063/** @} */
4064
4065
4066/** @} */
4067#endif
4068
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette