VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 1703

Last change on this file since 1703 was 1228, checked in by vboxsync, 18 years ago

ASMIntDisableFlags.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 95.1 KB
Line 
1/** @file
2 * InnoTek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef __iprt_asm_h__
22#define __iprt_asm_h__
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26/** @todo #include <iprt/param.h> for PAGE_SIZE. */
27/** @def RT_INLINE_ASM_USES_INTRIN
28 * Defined as 1 if we're using a _MSC_VER 1400.
29 * Otherwise defined as 0.
30 */
31
32#ifdef _MSC_VER
33# if _MSC_VER >= 1400
34# define RT_INLINE_ASM_USES_INTRIN 1
35# include <intrin.h>
36 /* Emit the intrinsics at all optimization levels. */
37# pragma intrinsic(__cpuid)
38# pragma intrinsic(_enable)
39# pragma intrinsic(_disable)
40# pragma intrinsic(__rdtsc)
41# pragma intrinsic(__readmsr)
42# pragma intrinsic(__writemsr)
43# pragma intrinsic(__outbyte)
44# pragma intrinsic(__outword)
45# pragma intrinsic(__outdword)
46# pragma intrinsic(__inbyte)
47# pragma intrinsic(__inword)
48# pragma intrinsic(__indword)
49# pragma intrinsic(__invlpg)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(__readcr0)
54# pragma intrinsic(__readcr2)
55# pragma intrinsic(__readcr3)
56# pragma intrinsic(__readcr4)
57# pragma intrinsic(__writecr0)
58# pragma intrinsic(__writecr3)
59# pragma intrinsic(__writecr4)
60# pragma intrinsic(_BitScanForward)
61# pragma intrinsic(_BitScanReverse)
62# pragma intrinsic(_bittest)
63# pragma intrinsic(_bittestandset)
64# pragma intrinsic(_bittestandreset)
65# pragma intrinsic(_bittestandcomplement)
66# pragma intrinsic(_byteswap_ushort)
67# pragma intrinsic(_byteswap_ulong)
68# pragma intrinsic(_interlockedbittestandset)
69# pragma intrinsic(_interlockedbittestandreset)
70# pragma intrinsic(_InterlockedAnd)
71# pragma intrinsic(_InterlockedOr)
72# pragma intrinsic(_InterlockedIncrement)
73# pragma intrinsic(_InterlockedDecrement)
74# pragma intrinsic(_InterlockedExchange)
75# pragma intrinsic(_InterlockedCompareExchange)
76# pragma intrinsic(_InterlockedCompareExchange64)
77# ifdef __AMD64__
78# pragma intrinsic(__stosq)
79# pragma intrinsic(__readcr8)
80# pragma intrinsic(__writecr8)
81# pragma intrinsic(_byteswap_uint64)
82# pragma intrinsic(_InterlockedExchange64)
83# endif
84# endif
85#endif
86#ifndef RT_INLINE_ASM_USES_INTRIN
87# define RT_INLINE_ASM_USES_INTRIN 0
88#endif
89
90
91
92/** @defgroup grp_asm ASM - Assembly Routines
93 * @ingroup grp_rt
94 * @{
95 */
96
97/** @def RT_INLINE_ASM_EXTERNAL
98 * Defined as 1 if the compiler does not support inline assembly.
99 * The ASM* functions will then be implemented in an external .asm file.
100 *
101 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
102 * inline assmebly in their AMD64 compiler.
103 */
104#if defined(_MSC_VER) && defined(__AMD64__)
105# define RT_INLINE_ASM_EXTERNAL 1
106#else
107# define RT_INLINE_ASM_EXTERNAL 0
108#endif
109
110/** @def RT_INLINE_ASM_GNU_STYLE
111 * Defined as 1 if the compiler understand GNU style inline assembly.
112 */
113#if defined(_MSC_VER)
114# define RT_INLINE_ASM_GNU_STYLE 0
115#else
116# define RT_INLINE_ASM_GNU_STYLE 1
117#endif
118
119
120/** @todo find a more proper place for this structure? */
121#pragma pack(1)
122/** IDTR */
123typedef struct RTIDTR
124{
125 /** Size of the IDT. */
126 uint16_t cbIdt;
127 /** Address of the IDT. */
128 uintptr_t pIdt;
129} RTIDTR, *PRTIDTR;
130#pragma pack()
131
132#pragma pack(1)
133/** GDTR */
134typedef struct RTGDTR
135{
136 /** Size of the GDT. */
137 uint16_t cbGdt;
138 /** Address of the GDT. */
139 uintptr_t pGdt;
140} RTGDTR, *PRTGDTR;
141#pragma pack()
142
143
144/** @def ASMReturnAddress
145 * Gets the return address of the current (or calling if you like) function or method.
146 */
147#ifdef _MSC_VER
148# ifdef __cplusplus
149extern "C"
150# endif
151void * _ReturnAddress(void);
152# pragma intrinsic(_ReturnAddress)
153# define ASMReturnAddress() _ReturnAddress()
154#elif defined(__GNUC__) || defined(__DOXYGEN__)
155# define ASMReturnAddress() __builtin_return_address(0)
156#else
157# error "Unsupported compiler."
158#endif
159
160
161/**
162 * Gets the content of the IDTR CPU register.
163 * @param pIdtr Where to store the IDTR contents.
164 */
165#if RT_INLINE_ASM_EXTERNAL
166DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
167#else
168DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
169{
170# if RT_INLINE_ASM_GNU_STYLE
171 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
172# else
173 __asm
174 {
175# ifdef __AMD64__
176 mov rax, [pIdtr]
177 sidt [rax]
178# else
179 mov eax, [pIdtr]
180 sidt [eax]
181# endif
182 }
183# endif
184}
185#endif
186
187
188/**
189 * Sets the content of the IDTR CPU register.
190 * @param pIdtr Where to load the IDTR contents from
191 */
192#if RT_INLINE_ASM_EXTERNAL
193DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
194#else
195DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
196{
197# if RT_INLINE_ASM_GNU_STYLE
198 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
199# else
200 __asm
201 {
202# ifdef __AMD64__
203 mov rax, [pIdtr]
204 lidt [rax]
205# else
206 mov eax, [pIdtr]
207 lidt [eax]
208# endif
209 }
210# endif
211}
212#endif
213
214
215/**
216 * Gets the content of the GDTR CPU register.
217 * @param pGdtr Where to store the GDTR contents.
218 */
219#if RT_INLINE_ASM_EXTERNAL
220DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
221#else
222DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
223{
224# if RT_INLINE_ASM_GNU_STYLE
225 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
226# else
227 __asm
228 {
229# ifdef __AMD64__
230 mov rax, [pGdtr]
231 sgdt [rax]
232# else
233 mov eax, [pGdtr]
234 sgdt [eax]
235# endif
236 }
237# endif
238}
239#endif
240
241/**
242 * Get the cs register.
243 * @returns cs.
244 */
245#if RT_INLINE_ASM_EXTERNAL
246DECLASM(RTSEL) ASMGetCS(void);
247#else
248DECLINLINE(RTSEL) ASMGetCS(void)
249{
250 RTSEL SelCS;
251# if RT_INLINE_ASM_GNU_STYLE
252 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
253# else
254 __asm
255 {
256 mov ax, cs
257 mov [SelCS], ax
258 }
259# endif
260 return SelCS;
261}
262#endif
263
264
265/**
266 * Get the DS register.
267 * @returns DS.
268 */
269#if RT_INLINE_ASM_EXTERNAL
270DECLASM(RTSEL) ASMGetDS(void);
271#else
272DECLINLINE(RTSEL) ASMGetDS(void)
273{
274 RTSEL SelDS;
275# if RT_INLINE_ASM_GNU_STYLE
276 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
277# else
278 __asm
279 {
280 mov ax, ds
281 mov [SelDS], ax
282 }
283# endif
284 return SelDS;
285}
286#endif
287
288
289/**
290 * Get the ES register.
291 * @returns ES.
292 */
293#if RT_INLINE_ASM_EXTERNAL
294DECLASM(RTSEL) ASMGetES(void);
295#else
296DECLINLINE(RTSEL) ASMGetES(void)
297{
298 RTSEL SelES;
299# if RT_INLINE_ASM_GNU_STYLE
300 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
301# else
302 __asm
303 {
304 mov ax, es
305 mov [SelES], ax
306 }
307# endif
308 return SelES;
309}
310#endif
311
312
313/**
314 * Get the FS register.
315 * @returns FS.
316 */
317#if RT_INLINE_ASM_EXTERNAL
318DECLASM(RTSEL) ASMGetFS(void);
319#else
320DECLINLINE(RTSEL) ASMGetFS(void)
321{
322 RTSEL SelFS;
323# if RT_INLINE_ASM_GNU_STYLE
324 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
325# else
326 __asm
327 {
328 mov ax, fs
329 mov [SelFS], ax
330 }
331# endif
332 return SelFS;
333}
334# endif
335
336
337/**
338 * Get the GS register.
339 * @returns GS.
340 */
341#if RT_INLINE_ASM_EXTERNAL
342DECLASM(RTSEL) ASMGetGS(void);
343#else
344DECLINLINE(RTSEL) ASMGetGS(void)
345{
346 RTSEL SelGS;
347# if RT_INLINE_ASM_GNU_STYLE
348 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
349# else
350 __asm
351 {
352 mov ax, gs
353 mov [SelGS], ax
354 }
355# endif
356 return SelGS;
357}
358#endif
359
360
361/**
362 * Get the SS register.
363 * @returns SS.
364 */
365#if RT_INLINE_ASM_EXTERNAL
366DECLASM(RTSEL) ASMGetSS(void);
367#else
368DECLINLINE(RTSEL) ASMGetSS(void)
369{
370 RTSEL SelSS;
371# if RT_INLINE_ASM_GNU_STYLE
372 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
373# else
374 __asm
375 {
376 mov ax, ss
377 mov [SelSS], ax
378 }
379# endif
380 return SelSS;
381}
382#endif
383
384
385/**
386 * Get the TR register.
387 * @returns TR.
388 */
389#if RT_INLINE_ASM_EXTERNAL
390DECLASM(RTSEL) ASMGetTR(void);
391#else
392DECLINLINE(RTSEL) ASMGetTR(void)
393{
394 RTSEL SelTR;
395# if RT_INLINE_ASM_GNU_STYLE
396 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
397# else
398 __asm
399 {
400 str ax
401 mov [SelTR], ax
402 }
403# endif
404 return SelTR;
405}
406#endif
407
408
409/**
410 * Get the [RE]FLAGS register.
411 * @returns [RE]FLAGS.
412 */
413#if RT_INLINE_ASM_EXTERNAL
414DECLASM(RTCCUINTREG) ASMGetFlags(void);
415#else
416DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
417{
418 RTCCUINTREG uFlags;
419# if RT_INLINE_ASM_GNU_STYLE
420# ifdef __AMD64__
421 __asm__ __volatile__("pushfq\n\t"
422 "popq %0\n\t"
423 : "=m" (uFlags));
424# else
425 __asm__ __volatile__("pushfl\n\t"
426 "popl %0\n\t"
427 : "=m" (uFlags));
428# endif
429# else
430 __asm
431 {
432# ifdef __AMD64__
433 pushfq
434 pop [uFlags]
435# else
436 pushfd
437 pop [uFlags]
438# endif
439 }
440# endif
441 return uFlags;
442}
443#endif
444
445
446/**
447 * Set the [RE]FLAGS register.
448 * @param uFlags The new [RE]FLAGS value.
449 */
450#if RT_INLINE_ASM_EXTERNAL
451DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
452#else
453DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
454{
455# if RT_INLINE_ASM_GNU_STYLE
456# ifdef __AMD64__
457 __asm__ __volatile__("pushq %0\n\t"
458 "popfq\n\t"
459 : : "m" (uFlags));
460# else
461 __asm__ __volatile__("pushl %0\n\t"
462 "popfl\n\t"
463 : : "m" (uFlags));
464# endif
465# else
466 __asm
467 {
468# ifdef __AMD64__
469 push [uFlags]
470 popfq
471# else
472 push [uFlags]
473 popfd
474# endif
475 }
476# endif
477}
478#endif
479
480
481/**
482 * Gets the content of the CPU timestamp counter register.
483 *
484 * @returns TSC.
485 */
486#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
487DECLASM(uint64_t) ASMReadTSC(void);
488#else
489DECLINLINE(uint64_t) ASMReadTSC(void)
490{
491 RTUINT64U u;
492# if RT_INLINE_ASM_GNU_STYLE
493 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
494# else
495# if RT_INLINE_ASM_USES_INTRIN
496 u.u = __rdtsc();
497# else
498 __asm
499 {
500 rdtsc
501 mov [u.s.Lo], eax
502 mov [u.s.Hi], edx
503 }
504# endif
505# endif
506 return u.u;
507}
508#endif
509
510
511/**
512 * Performs the cpuid instruction returning all registers.
513 *
514 * @param uOperator CPUID operation (eax).
515 * @param pvEAX Where to store eax.
516 * @param pvEBX Where to store ebx.
517 * @param pvECX Where to store ecx.
518 * @param pvEDX Where to store edx.
519 * @remark We're using void pointers to ease the use of special bitfield structures and such.
520 */
521#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
522DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
523#else
524DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
525{
526# if RT_INLINE_ASM_GNU_STYLE
527# ifdef __AMD64__
528 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
529 __asm__ ("cpuid\n\t"
530 : "=a" (uRAX),
531 "=b" (uRBX),
532 "=c" (uRCX),
533 "=d" (uRDX)
534 : "0" (uOperator));
535 *(uint32_t *)pvEAX = (uint32_t)uRAX;
536 *(uint32_t *)pvEBX = (uint32_t)uRBX;
537 *(uint32_t *)pvECX = (uint32_t)uRCX;
538 *(uint32_t *)pvEDX = (uint32_t)uRDX;
539# else
540 __asm__ ("xchgl %%ebx, %1\n\t"
541 "cpuid\n\t"
542 "xchgl %%ebx, %1\n\t"
543 : "=a" (*(uint32_t *)pvEAX),
544 "=r" (*(uint32_t *)pvEBX),
545 "=c" (*(uint32_t *)pvECX),
546 "=d" (*(uint32_t *)pvEDX)
547 : "0" (uOperator));
548# endif
549
550# elif RT_INLINE_ASM_USES_INTRIN
551 int aInfo[4];
552 __cpuid(aInfo, uOperator);
553 *(uint32_t *)pvEAX = aInfo[0];
554 *(uint32_t *)pvEBX = aInfo[1];
555 *(uint32_t *)pvECX = aInfo[2];
556 *(uint32_t *)pvEDX = aInfo[3];
557
558# else
559 uint32_t uEAX;
560 uint32_t uEBX;
561 uint32_t uECX;
562 uint32_t uEDX;
563 __asm
564 {
565 push ebx
566 mov eax, [uOperator]
567 cpuid
568 mov [uEAX], eax
569 mov [uEBX], ebx
570 mov [uECX], ecx
571 mov [uEDX], edx
572 pop ebx
573 }
574 *(uint32_t *)pvEAX = uEAX;
575 *(uint32_t *)pvEBX = uEBX;
576 *(uint32_t *)pvECX = uECX;
577 *(uint32_t *)pvEDX = uEDX;
578# endif
579}
580#endif
581
582
583/**
584 * Performs the cpuid instruction returning ecx and edx.
585 *
586 * @param uOperator CPUID operation (eax).
587 * @param pvECX Where to store ecx.
588 * @param pvEDX Where to store edx.
589 * @remark We're using void pointers to ease the use of special bitfield structures and such.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
593#else
594DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
595{
596 uint32_t uEBX;
597 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
598}
599#endif
600
601
602/**
603 * Performs the cpuid instruction returning edx.
604 *
605 * @param uOperator CPUID operation (eax).
606 * @returns EDX after cpuid operation.
607 */
608#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
609DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
610#else
611DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
612{
613 RTCCUINTREG xDX;
614# if RT_INLINE_ASM_GNU_STYLE
615# ifdef __AMD64__
616 RTCCUINTREG uSpill;
617 __asm__ ("cpuid"
618 : "=a" (uSpill),
619 "=d" (xDX)
620 : "0" (uOperator)
621 : "rbx", "rcx");
622# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: PIC by default. */
623 __asm__ ("push %%ebx\n\t"
624 "cpuid\n\t"
625 "pop %%ebx\n\t"
626 : "=a" (uOperator),
627 "=d" (xDX)
628 : "0" (uOperator)
629 : "ecx");
630# else
631 __asm__ ("cpuid"
632 : "=a" (uOperator),
633 "=d" (xDX)
634 : "0" (uOperator)
635 : "ebx", "ecx");
636# endif
637
638# elif RT_INLINE_ASM_USES_INTRIN
639 int aInfo[4];
640 __cpuid(aInfo, uOperator);
641 xDX = aInfo[3];
642
643# else
644 __asm
645 {
646 push ebx
647 mov eax, [uOperator]
648 cpuid
649 mov [xDX], edx
650 pop ebx
651 }
652# endif
653 return (uint32_t)xDX;
654}
655#endif
656
657
658/**
659 * Performs the cpuid instruction returning ecx.
660 *
661 * @param uOperator CPUID operation (eax).
662 * @returns ECX after cpuid operation.
663 */
664#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
665DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
666#else
667DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
668{
669 RTCCUINTREG xCX;
670# if RT_INLINE_ASM_GNU_STYLE
671# ifdef __AMD64__
672 RTCCUINTREG uSpill;
673 __asm__ ("cpuid"
674 : "=a" (uSpill),
675 "=c" (xCX)
676 : "0" (uOperator)
677 : "rbx", "rdx");
678# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
679 __asm__ ("push %%ebx\n\t"
680 "cpuid\n\t"
681 "pop %%ebx\n\t"
682 : "=a" (uOperator),
683 "=c" (xCX)
684 : "0" (uOperator)
685 : "edx");
686# else
687 __asm__ ("cpuid"
688 : "=a" (uOperator),
689 "=c" (xCX)
690 : "0" (uOperator)
691 : "ebx", "edx");
692
693# endif
694
695# elif RT_INLINE_ASM_USES_INTRIN
696 int aInfo[4];
697 __cpuid(aInfo, uOperator);
698 xCX = aInfo[2];
699
700# else
701 __asm
702 {
703 push ebx
704 mov eax, [uOperator]
705 cpuid
706 mov [xCX], ecx
707 pop ebx
708 }
709# endif
710 return (uint32_t)xCX;
711}
712#endif
713
714
715/**
716 * Checks if the current CPU supports CPUID.
717 *
718 * @returns true if CPUID is supported.
719 */
720DECLINLINE(bool) ASMHasCpuId(void)
721{
722#ifdef __AMD64__
723 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
724#else /* !__AMD64__ */
725 bool fRet = false;
726# if RT_INLINE_ASM_GNU_STYLE
727 uint32_t u1;
728 uint32_t u2;
729 __asm__ ("pushf\n\t"
730 "pop %1\n\t"
731 "mov %1, %2\n\t"
732 "xorl $0x200000, %1\n\t"
733 "push %1\n\t"
734 "popf\n\t"
735 "pushf\n\t"
736 "pop %1\n\t"
737 "cmpl %1, %2\n\t"
738 "setne %0\n\t"
739 "push %2\n\t"
740 "popf\n\t"
741 : "=m" (fRet), "=r" (u1), "=r" (u2));
742# else
743 __asm
744 {
745 pushfd
746 pop eax
747 mov ebx, eax
748 xor eax, 0200000h
749 push eax
750 popfd
751 pushfd
752 pop eax
753 cmp eax, ebx
754 setne fRet
755 push ebx
756 popfd
757 }
758# endif
759 return fRet;
760#endif /* !__AMD64__ */
761}
762
763
764/**
765 * Gets the APIC ID of the current CPU.
766 *
767 * @returns the APIC ID.
768 */
769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
770DECLASM(uint8_t) ASMGetApicId(void);
771#else
772DECLINLINE(uint8_t) ASMGetApicId(void)
773{
774 RTCCUINTREG xBX;
775# if RT_INLINE_ASM_GNU_STYLE
776# ifdef __AMD64__
777 RTCCUINTREG uSpill;
778 __asm__ ("cpuid"
779 : "=a" (uSpill),
780 "=b" (xBX)
781 : "0" (1)
782 : "rcx", "rdx");
783# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__)
784 RTCCUINTREG uSpill;
785 __asm__ ("mov %%ebx,%1\n\t"
786 "cpuid\n\t"
787 "xchgl %%ebx,%1\n\t"
788 : "=a" (uSpill),
789 "=r" (xBX)
790 : "0" (1)
791 : "ecx", "edx");
792# else
793 RTCCUINTREG uSpill;
794 __asm__ ("cpuid"
795 : "=a" (uSpill),
796 "=b" (xBX)
797 : "0" (1)
798 : "ecx", "edx");
799# endif
800
801# elif RT_INLINE_ASM_USES_INTRIN
802 int aInfo[4];
803 __cpuid(aInfo, 1);
804 xBX = aInfo[1];
805
806# else
807 __asm
808 {
809 push ebx
810 mov eax, 1
811 cpuid
812 mov [xBX], ebx
813 pop ebx
814 }
815# endif
816 return (uint8_t)(xBX >> 24);
817}
818#endif
819
820/**
821 * Get cr0.
822 * @returns cr0.
823 */
824#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
825DECLASM(RTCCUINTREG) ASMGetCR0(void);
826#else
827DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
828{
829 RTCCUINTREG uCR0;
830# if RT_INLINE_ASM_USES_INTRIN
831 uCR0 = __readcr0();
832
833# elif RT_INLINE_ASM_GNU_STYLE
834# ifdef __AMD64__
835 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
836# else
837 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
838# endif
839# else
840 __asm
841 {
842# ifdef __AMD64__
843 mov rax, cr0
844 mov [uCR0], rax
845# else
846 mov eax, cr0
847 mov [uCR0], eax
848# endif
849 }
850# endif
851 return uCR0;
852}
853#endif
854
855
856/**
857 * Sets the CR0 register.
858 * @param uCR0 The new CR0 value.
859 */
860#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
861DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
862#else
863DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
864{
865# if RT_INLINE_ASM_USES_INTRIN
866 __writecr0(uCR0);
867
868# elif RT_INLINE_ASM_GNU_STYLE
869# ifdef __AMD64__
870 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
871# else
872 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
873# endif
874# else
875 __asm
876 {
877# ifdef __AMD64__
878 mov rax, [uCR0]
879 mov cr0, rax
880# else
881 mov eax, [uCR0]
882 mov cr0, eax
883# endif
884 }
885# endif
886}
887#endif
888
889
890/**
891 * Get cr2.
892 * @returns cr2.
893 */
894#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
895DECLASM(RTCCUINTREG) ASMGetCR2(void);
896#else
897DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
898{
899 RTCCUINTREG uCR2;
900# if RT_INLINE_ASM_USES_INTRIN
901 uCR2 = __readcr2();
902
903# elif RT_INLINE_ASM_GNU_STYLE
904# ifdef __AMD64__
905 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
906# else
907 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
908# endif
909# else
910 __asm
911 {
912# ifdef __AMD64__
913 mov rax, cr2
914 mov [uCR2], rax
915# else
916 mov eax, cr2
917 mov [uCR2], eax
918# endif
919 }
920# endif
921 return uCR2;
922}
923#endif
924
925
926/**
927 * Sets the CR2 register.
928 * @param uCR2 The new CR0 value.
929 */
930#if RT_INLINE_ASM_EXTERNAL
931DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
932#else
933DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
934{
935# if RT_INLINE_ASM_GNU_STYLE
936# ifdef __AMD64__
937 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
938# else
939 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
940# endif
941# else
942 __asm
943 {
944# ifdef __AMD64__
945 mov rax, [uCR2]
946 mov cr2, rax
947# else
948 mov eax, [uCR2]
949 mov cr2, eax
950# endif
951 }
952# endif
953}
954#endif
955
956
957/**
958 * Get cr3.
959 * @returns cr3.
960 */
961#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
962DECLASM(RTCCUINTREG) ASMGetCR3(void);
963#else
964DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
965{
966 RTCCUINTREG uCR3;
967# if RT_INLINE_ASM_USES_INTRIN
968 uCR3 = __readcr3();
969
970# elif RT_INLINE_ASM_GNU_STYLE
971# ifdef __AMD64__
972 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
973# else
974 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
975# endif
976# else
977 __asm
978 {
979# ifdef __AMD64__
980 mov rax, cr3
981 mov [uCR3], rax
982# else
983 mov eax, cr3
984 mov [uCR3], eax
985# endif
986 }
987# endif
988 return uCR3;
989}
990#endif
991
992
993/**
994 * Sets the CR3 register.
995 *
996 * @param uCR3 New CR3 value.
997 */
998#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
999DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1000#else
1001DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1002{
1003# if RT_INLINE_ASM_USES_INTRIN
1004 __writecr3(uCR3);
1005
1006# elif RT_INLINE_ASM_GNU_STYLE
1007# ifdef __AMD64__
1008 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1009# else
1010 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1011# endif
1012# else
1013 __asm
1014 {
1015# ifdef __AMD64__
1016 mov rax, [uCR3]
1017 mov cr3, rax
1018# else
1019 mov eax, [uCR3]
1020 mov cr3, eax
1021# endif
1022 }
1023# endif
1024}
1025#endif
1026
1027
1028/**
1029 * Reloads the CR3 register.
1030 */
1031#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1032DECLASM(void) ASMReloadCR3(void);
1033#else
1034DECLINLINE(void) ASMReloadCR3(void)
1035{
1036# if RT_INLINE_ASM_USES_INTRIN
1037 __writecr3(__readcr3());
1038
1039# elif RT_INLINE_ASM_GNU_STYLE
1040 RTCCUINTREG u;
1041# ifdef __AMD64__
1042 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1043 "movq %0, %%cr3\n\t"
1044 : "=r" (u));
1045# else
1046 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1047 "movl %0, %%cr3\n\t"
1048 : "=r" (u));
1049# endif
1050# else
1051 __asm
1052 {
1053# ifdef __AMD64__
1054 mov rax, cr3
1055 mov cr3, rax
1056# else
1057 mov eax, cr3
1058 mov cr3, eax
1059# endif
1060 }
1061# endif
1062}
1063#endif
1064
1065
1066/**
1067 * Get cr4.
1068 * @returns cr4.
1069 */
1070#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1071DECLASM(RTCCUINTREG) ASMGetCR4(void);
1072#else
1073DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1074{
1075 RTCCUINTREG uCR4;
1076# if RT_INLINE_ASM_USES_INTRIN
1077 uCR4 = __readcr4();
1078
1079# elif RT_INLINE_ASM_GNU_STYLE
1080# ifdef __AMD64__
1081 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1082# else
1083 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1084# endif
1085# else
1086 __asm
1087 {
1088# ifdef __AMD64__
1089 mov rax, cr4
1090 mov [uCR4], rax
1091# else
1092 push eax /* just in case */
1093 /*mov eax, cr4*/
1094 _emit 0x0f
1095 _emit 0x20
1096 _emit 0xe0
1097 mov [uCR4], eax
1098 pop eax
1099# endif
1100 }
1101# endif
1102 return uCR4;
1103}
1104#endif
1105
1106
1107/**
1108 * Sets the CR4 register.
1109 *
1110 * @param uCR4 New CR4 value.
1111 */
1112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1113DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1114#else
1115DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1116{
1117# if RT_INLINE_ASM_USES_INTRIN
1118 __writecr4(uCR4);
1119
1120# elif RT_INLINE_ASM_GNU_STYLE
1121# ifdef __AMD64__
1122 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1123# else
1124 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1125# endif
1126# else
1127 __asm
1128 {
1129# ifdef __AMD64__
1130 mov rax, [uCR4]
1131 mov cr4, rax
1132# else
1133 mov eax, [uCR4]
1134 _emit 0x0F
1135 _emit 0x22
1136 _emit 0xE0 /* mov cr4, eax */
1137# endif
1138 }
1139# endif
1140}
1141#endif
1142
1143
1144/**
1145 * Get cr8.
1146 * @returns cr8.
1147 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1148 */
1149#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1150DECLASM(RTCCUINTREG) ASMGetCR8(void);
1151#else
1152DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1153{
1154# ifdef __AMD64__
1155 RTCCUINTREG uCR8;
1156# if RT_INLINE_ASM_USES_INTRIN
1157 uCR8 = __readcr8();
1158
1159# elif RT_INLINE_ASM_GNU_STYLE
1160 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1161# else
1162 __asm
1163 {
1164 mov rax, cr8
1165 mov [uCR8], rax
1166 }
1167# endif
1168 return uCR8;
1169# else /* !__AMD64__ */
1170 return 0;
1171# endif /* !__AMD64__ */
1172}
1173#endif
1174
1175
1176/**
1177 * Enables interrupts (EFLAGS.IF).
1178 */
1179#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1180DECLASM(void) ASMIntEnable(void);
1181#else
1182DECLINLINE(void) ASMIntEnable(void)
1183{
1184# if RT_INLINE_ASM_GNU_STYLE
1185 __asm("sti\n");
1186# elif RT_INLINE_ASM_USES_INTRIN
1187 _enable();
1188# else
1189 __asm sti
1190# endif
1191}
1192#endif
1193
1194
1195/**
1196 * Disables interrupts (!EFLAGS.IF).
1197 */
1198#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1199DECLASM(void) ASMIntDisable(void);
1200#else
1201DECLINLINE(void) ASMIntDisable(void)
1202{
1203# if RT_INLINE_ASM_GNU_STYLE
1204 __asm("cli\n");
1205# elif RT_INLINE_ASM_USES_INTRIN
1206 _disable();
1207# else
1208 __asm cli
1209# endif
1210}
1211#endif
1212
1213
1214/**
1215 * Disables interrupts and returns previous xFLAGS.
1216 */
1217#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1218DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1219#else
1220DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1221{
1222 RTCCUINTREG xFlags;
1223# if RT_INLINE_ASM_GNU_STYLE
1224# ifdef __AMD64__
1225 __asm__ __volatile__("pushfq\n\t"
1226 "cli\n\t"
1227 "popq %0\n\t"
1228 : "=m" (xFlags));
1229# else
1230 __asm__ __volatile__("pushfl\n\t"
1231 "cli\n\t"
1232 "popl %0\n\t"
1233 : "=m" (xFlags));
1234# endif
1235# elif RT_INLINE_ASM_USES_INTRIN && !defined(__X86__)
1236 xFlags = ASMGetFlags();
1237 _disable();
1238# else
1239 __asm {
1240 pushfd
1241 cli
1242 pop [xFlags]
1243 }
1244# endif
1245 return xFlags;
1246}
1247#endif
1248
1249
1250/**
1251 * Reads a machine specific register.
1252 *
1253 * @returns Register content.
1254 * @param uRegister Register to read.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1258#else
1259DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1260{
1261 RTUINT64U u;
1262# if RT_INLINE_ASM_GNU_STYLE
1263 __asm__ ("rdmsr\n\t"
1264 : "=a" (u.s.Lo),
1265 "=d" (u.s.Hi)
1266 : "c" (uRegister));
1267
1268# elif RT_INLINE_ASM_USES_INTRIN
1269 u.u = __readmsr(uRegister);
1270
1271# else
1272 __asm
1273 {
1274 mov ecx, [uRegister]
1275 rdmsr
1276 mov [u.s.Lo], eax
1277 mov [u.s.Hi], edx
1278 }
1279# endif
1280
1281 return u.u;
1282}
1283#endif
1284
1285
1286/**
1287 * Writes a machine specific register.
1288 *
1289 * @returns Register content.
1290 * @param uRegister Register to write to.
1291 * @param u64Val Value to write.
1292 */
1293#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1294DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1295#else
1296DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1297{
1298 RTUINT64U u;
1299
1300 u.u = u64Val;
1301# if RT_INLINE_ASM_GNU_STYLE
1302 __asm__ __volatile__("wrmsr\n\t"
1303 ::"a" (u.s.Lo),
1304 "d" (u.s.Hi),
1305 "c" (uRegister));
1306
1307# elif RT_INLINE_ASM_USES_INTRIN
1308 __writemsr(uRegister, u.u);
1309
1310# else
1311 __asm
1312 {
1313 mov ecx, [uRegister]
1314 mov edx, [u.s.Hi]
1315 mov eax, [u.s.Lo]
1316 wrmsr
1317 }
1318# endif
1319}
1320#endif
1321
1322
1323/**
1324 * Reads low part of a machine specific register.
1325 *
1326 * @returns Register content.
1327 * @param uRegister Register to read.
1328 */
1329#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1330DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1331#else
1332DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1333{
1334 uint32_t u32;
1335# if RT_INLINE_ASM_GNU_STYLE
1336 __asm__ ("rdmsr\n\t"
1337 : "=a" (u32)
1338 : "c" (uRegister)
1339 : "edx");
1340
1341# elif RT_INLINE_ASM_USES_INTRIN
1342 u32 = (uint32_t)__readmsr(uRegister);
1343
1344#else
1345 __asm
1346 {
1347 mov ecx, [uRegister]
1348 rdmsr
1349 mov [u32], eax
1350 }
1351# endif
1352
1353 return u32;
1354}
1355#endif
1356
1357
1358/**
1359 * Reads high part of a machine specific register.
1360 *
1361 * @returns Register content.
1362 * @param uRegister Register to read.
1363 */
1364#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1365DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1366#else
1367DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1368{
1369 uint32_t u32;
1370# if RT_INLINE_ASM_GNU_STYLE
1371 __asm__ ("rdmsr\n\t"
1372 : "=d" (u32)
1373 : "c" (uRegister)
1374 : "eax");
1375
1376# elif RT_INLINE_ASM_USES_INTRIN
1377 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1378
1379# else
1380 __asm
1381 {
1382 mov ecx, [uRegister]
1383 rdmsr
1384 mov [u32], edx
1385 }
1386# endif
1387
1388 return u32;
1389}
1390#endif
1391
1392
1393/**
1394 * Gets dr7.
1395 *
1396 * @returns dr7.
1397 */
1398#if RT_INLINE_ASM_EXTERNAL
1399DECLASM(RTCCUINTREG) ASMGetDR7(void);
1400#else
1401DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1402{
1403 RTCCUINTREG uDR7;
1404# if RT_INLINE_ASM_GNU_STYLE
1405# ifdef __AMD64__
1406 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1407# else
1408 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1409# endif
1410# else
1411 __asm
1412 {
1413# ifdef __AMD64__
1414 mov rax, dr7
1415 mov [uDR7], rax
1416# else
1417 mov eax, dr7
1418 mov [uDR7], eax
1419# endif
1420 }
1421# endif
1422 return uDR7;
1423}
1424#endif
1425
1426
1427/**
1428 * Gets dr6.
1429 *
1430 * @returns dr6.
1431 */
1432#if RT_INLINE_ASM_EXTERNAL
1433DECLASM(RTCCUINTREG) ASMGetDR6(void);
1434#else
1435DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1436{
1437 RTCCUINTREG uDR6;
1438# if RT_INLINE_ASM_GNU_STYLE
1439# ifdef __AMD64__
1440 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1441# else
1442 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1443# endif
1444# else
1445 __asm
1446 {
1447# ifdef __AMD64__
1448 mov rax, dr6
1449 mov [uDR6], rax
1450# else
1451 mov eax, dr6
1452 mov [uDR6], eax
1453# endif
1454 }
1455# endif
1456 return uDR6;
1457}
1458#endif
1459
1460
1461/**
1462 * Reads and clears DR6.
1463 *
1464 * @returns DR6.
1465 */
1466#if RT_INLINE_ASM_EXTERNAL
1467DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1468#else
1469DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1470{
1471 RTCCUINTREG uDR6;
1472# if RT_INLINE_ASM_GNU_STYLE
1473 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1474# ifdef __AMD64__
1475 __asm__ ("movq %%dr6, %0\n\t"
1476 "movq %1, %%dr6\n\t"
1477 : "=r" (uDR6)
1478 : "r" (uNewValue));
1479# else
1480 __asm__ ("movl %%dr6, %0\n\t"
1481 "movl %1, %%dr6\n\t"
1482 : "=r" (uDR6)
1483 : "r" (uNewValue));
1484# endif
1485# else
1486 __asm
1487 {
1488# ifdef __AMD64__
1489 mov rax, dr6
1490 mov [uDR6], rax
1491 mov rcx, rax
1492 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1493 mov dr6, rcx
1494# else
1495 mov eax, dr6
1496 mov [uDR6], eax
1497 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1498 mov dr6, ecx
1499# endif
1500 }
1501# endif
1502 return uDR6;
1503}
1504#endif
1505
1506
1507/** @deprecated */
1508#define ASMOutB(p, b) ASMOutU8(p,b)
1509/** @deprecated */
1510#define ASMInB(p) ASMInU8(p)
1511
1512/**
1513 * Writes a 8-bit unsigned integer to an I/O port.
1514 *
1515 * @param Port I/O port to read from.
1516 * @param u8 8-bit integer to write.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1519DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1520#else
1521DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1522{
1523# if RT_INLINE_ASM_GNU_STYLE
1524 __asm__ __volatile__("outb %b1, %w0\n\t"
1525 :: "Nd" (Port),
1526 "a" (u8));
1527
1528# elif RT_INLINE_ASM_USES_INTRIN
1529 __outbyte(Port, u8);
1530
1531# else
1532 __asm
1533 {
1534 mov dx, [Port]
1535 mov al, [u8]
1536 out dx, al
1537 }
1538# endif
1539}
1540#endif
1541
1542
1543/**
1544 * Gets a 8-bit unsigned integer from an I/O port.
1545 *
1546 * @returns 8-bit integer.
1547 * @param Port I/O port to read from.
1548 */
1549#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1550DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1551#else
1552DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1553{
1554 uint8_t u8;
1555# if RT_INLINE_ASM_GNU_STYLE
1556 __asm__ __volatile__("inb %w1, %b0\n\t"
1557 : "=a" (u8)
1558 : "Nd" (Port));
1559
1560# elif RT_INLINE_ASM_USES_INTRIN
1561 u8 = __inbyte(Port);
1562
1563# else
1564 __asm
1565 {
1566 mov dx, [Port]
1567 in al, dx
1568 mov [u8], al
1569 }
1570# endif
1571 return u8;
1572}
1573#endif
1574
1575
1576/**
1577 * Writes a 16-bit unsigned integer to an I/O port.
1578 *
1579 * @param Port I/O port to read from.
1580 * @param u16 16-bit integer to write.
1581 */
1582#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1583DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1584#else
1585DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1586{
1587# if RT_INLINE_ASM_GNU_STYLE
1588 __asm__ __volatile__("outw %w1, %w0\n\t"
1589 :: "Nd" (Port),
1590 "a" (u16));
1591
1592# elif RT_INLINE_ASM_USES_INTRIN
1593 __outword(Port, u16);
1594
1595# else
1596 __asm
1597 {
1598 mov dx, [Port]
1599 mov ax, [u16]
1600 out dx, ax
1601 }
1602# endif
1603}
1604#endif
1605
1606
1607/**
1608 * Gets a 16-bit unsigned integer from an I/O port.
1609 *
1610 * @returns 16-bit integer.
1611 * @param Port I/O port to read from.
1612 */
1613#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1614DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1615#else
1616DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1617{
1618 uint16_t u16;
1619# if RT_INLINE_ASM_GNU_STYLE
1620 __asm__ __volatile__("inw %w1, %w0\n\t"
1621 : "=a" (u16)
1622 : "Nd" (Port));
1623
1624# elif RT_INLINE_ASM_USES_INTRIN
1625 u16 = __inword(Port);
1626
1627# else
1628 __asm
1629 {
1630 mov dx, [Port]
1631 in ax, dx
1632 mov [u16], ax
1633 }
1634# endif
1635 return u16;
1636}
1637#endif
1638
1639
1640/**
1641 * Writes a 32-bit unsigned integer to an I/O port.
1642 *
1643 * @param Port I/O port to read from.
1644 * @param u32 32-bit integer to write.
1645 */
1646#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1647DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1648#else
1649DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1650{
1651# if RT_INLINE_ASM_GNU_STYLE
1652 __asm__ __volatile__("outl %1, %w0\n\t"
1653 :: "Nd" (Port),
1654 "a" (u32));
1655
1656# elif RT_INLINE_ASM_USES_INTRIN
1657 __outdword(Port, u32);
1658
1659# else
1660 __asm
1661 {
1662 mov dx, [Port]
1663 mov eax, [u32]
1664 out dx, eax
1665 }
1666# endif
1667}
1668#endif
1669
1670
1671/**
1672 * Gets a 32-bit unsigned integer from an I/O port.
1673 *
1674 * @returns 32-bit integer.
1675 * @param Port I/O port to read from.
1676 */
1677#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1678DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1679#else
1680DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1681{
1682 uint32_t u32;
1683# if RT_INLINE_ASM_GNU_STYLE
1684 __asm__ __volatile__("inl %w1, %0\n\t"
1685 : "=a" (u32)
1686 : "Nd" (Port));
1687
1688# elif RT_INLINE_ASM_USES_INTRIN
1689 u32 = __indword(Port);
1690
1691# else
1692 __asm
1693 {
1694 mov dx, [Port]
1695 in eax, dx
1696 mov [u32], eax
1697 }
1698# endif
1699 return u32;
1700}
1701#endif
1702
1703
1704/**
1705 * Atomically Exchange an unsigned 8-bit value.
1706 *
1707 * @returns Current *pu8 value
1708 * @param pu8 Pointer to the 8-bit variable to update.
1709 * @param u8 The 8-bit value to assign to *pu8.
1710 */
1711#if RT_INLINE_ASM_EXTERNAL
1712DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1713#else
1714DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1715{
1716# if RT_INLINE_ASM_GNU_STYLE
1717 __asm__ __volatile__("xchgb %0, %1\n\t"
1718 : "=m" (*pu8),
1719 "=r" (u8)
1720 : "1" (u8));
1721# else
1722 __asm
1723 {
1724# ifdef __AMD64__
1725 mov rdx, [pu8]
1726 mov al, [u8]
1727 xchg [rdx], al
1728 mov [u8], al
1729# else
1730 mov edx, [pu8]
1731 mov al, [u8]
1732 xchg [edx], al
1733 mov [u8], al
1734# endif
1735 }
1736# endif
1737 return u8;
1738}
1739#endif
1740
1741
1742/**
1743 * Atomically Exchange a signed 8-bit value.
1744 *
1745 * @returns Current *pu8 value
1746 * @param pi8 Pointer to the 8-bit variable to update.
1747 * @param i8 The 8-bit value to assign to *pi8.
1748 */
1749DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1750{
1751 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1752}
1753
1754
1755/**
1756 * Atomically Exchange an unsigned 16-bit value.
1757 *
1758 * @returns Current *pu16 value
1759 * @param pu16 Pointer to the 16-bit variable to update.
1760 * @param u16 The 16-bit value to assign to *pu16.
1761 */
1762#if RT_INLINE_ASM_EXTERNAL
1763DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1764#else
1765DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1766{
1767# if RT_INLINE_ASM_GNU_STYLE
1768 __asm__ __volatile__("xchgw %0, %1\n\t"
1769 : "=m" (*pu16),
1770 "=r" (u16)
1771 : "1" (u16));
1772# else
1773 __asm
1774 {
1775# ifdef __AMD64__
1776 mov rdx, [pu16]
1777 mov ax, [u16]
1778 xchg [rdx], ax
1779 mov [u16], ax
1780# else
1781 mov edx, [pu16]
1782 mov ax, [u16]
1783 xchg [edx], ax
1784 mov [u16], ax
1785# endif
1786 }
1787# endif
1788 return u16;
1789}
1790#endif
1791
1792
1793/**
1794 * Atomically Exchange a signed 16-bit value.
1795 *
1796 * @returns Current *pu16 value
1797 * @param pi16 Pointer to the 16-bit variable to update.
1798 * @param i16 The 16-bit value to assign to *pi16.
1799 */
1800DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1801{
1802 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1803}
1804
1805
1806/**
1807 * Atomically Exchange an unsigned 32-bit value.
1808 *
1809 * @returns Current *pu32 value
1810 * @param pu32 Pointer to the 32-bit variable to update.
1811 * @param u32 The 32-bit value to assign to *pu32.
1812 */
1813#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1814DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1815#else
1816DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1817{
1818# if RT_INLINE_ASM_GNU_STYLE
1819 __asm__ __volatile__("xchgl %0, %1\n\t"
1820 : "=m" (*pu32),
1821 "=r" (u32)
1822 : "1" (u32));
1823
1824# elif RT_INLINE_ASM_USES_INTRIN
1825 u32 = _InterlockedExchange((long *)pu32, u32);
1826
1827# else
1828 __asm
1829 {
1830# ifdef __AMD64__
1831 mov rdx, [pu32]
1832 mov eax, u32
1833 xchg [rdx], eax
1834 mov [u32], eax
1835# else
1836 mov edx, [pu32]
1837 mov eax, u32
1838 xchg [edx], eax
1839 mov [u32], eax
1840# endif
1841 }
1842# endif
1843 return u32;
1844}
1845#endif
1846
1847
1848/**
1849 * Atomically Exchange a signed 32-bit value.
1850 *
1851 * @returns Current *pu32 value
1852 * @param pi32 Pointer to the 32-bit variable to update.
1853 * @param i32 The 32-bit value to assign to *pi32.
1854 */
1855DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1856{
1857 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1858}
1859
1860
1861/**
1862 * Atomically Exchange an unsigned 64-bit value.
1863 *
1864 * @returns Current *pu64 value
1865 * @param pu64 Pointer to the 64-bit variable to update.
1866 * @param u64 The 64-bit value to assign to *pu64.
1867 */
1868#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1869DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1870#else
1871DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1872{
1873# if defined(__AMD64__)
1874# if RT_INLINE_ASM_USES_INTRIN
1875 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1876
1877# elif RT_INLINE_ASM_GNU_STYLE
1878 __asm__ __volatile__("xchgq %0, %1\n\t"
1879 : "=m" (*pu64),
1880 "=r" (u64)
1881 : "1" (u64));
1882# else
1883 __asm
1884 {
1885 mov rdx, [pu64]
1886 mov rax, [u64]
1887 xchg [rdx], rax
1888 mov [u64], rax
1889 }
1890# endif
1891# else /* !__AMD64__ */
1892# if RT_INLINE_ASM_GNU_STYLE
1893# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
1894 uint32_t u32 = (uint32_t)u64;
1895 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1896 "xchgl %%ebx, %3\n\t"
1897 "1:\n\t"
1898 "lock; cmpxchg8b (%5)\n\t"
1899 "jnz 1b\n\t"
1900 "xchgl %%ebx, %3\n\t"
1901 /*"xchgl %%esi, %5\n\t"*/
1902 : "=A" (u64),
1903 "=m" (*pu64)
1904 : "0" (*pu64),
1905 "m" ( u32 ),
1906 "c" ( (uint32_t)(u64 >> 32) ),
1907 "S" (pu64) );
1908# else /* !PIC */
1909 __asm__ __volatile__("1:\n\t"
1910 "lock; cmpxchg8b %1\n\t"
1911 "jnz 1b\n\t"
1912 : "=A" (u64),
1913 "=m" (*pu64)
1914 : "0" (*pu64),
1915 "b" ( (uint32_t)u64 ),
1916 "c" ( (uint32_t)(u64 >> 32) ));
1917# endif
1918# else
1919 __asm
1920 {
1921 mov ebx, dword ptr [u64]
1922 mov ecx, dword ptr [u64 + 4]
1923 mov edi, pu64
1924 mov eax, dword ptr [edi]
1925 mov edx, dword ptr [edi + 4]
1926 retry:
1927 lock cmpxchg8b [edi]
1928 jnz retry
1929 mov dword ptr [u64], eax
1930 mov dword ptr [u64 + 4], edx
1931 }
1932# endif
1933# endif /* !__AMD64__ */
1934 return u64;
1935}
1936#endif
1937
1938
1939/**
1940 * Atomically Exchange an signed 64-bit value.
1941 *
1942 * @returns Current *pi64 value
1943 * @param pi64 Pointer to the 64-bit variable to update.
1944 * @param i64 The 64-bit value to assign to *pi64.
1945 */
1946DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1947{
1948 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1949}
1950
1951
1952#ifdef __AMD64__
1953/**
1954 * Atomically Exchange an unsigned 128-bit value.
1955 *
1956 * @returns Current *pu128.
1957 * @param pu128 Pointer to the 128-bit variable to update.
1958 * @param u128 The 128-bit value to assign to *pu128.
1959 *
1960 * @remark We cannot really assume that any hardware supports this. Nor do I have
1961 * GAS support for it. So, for the time being we'll BREAK the atomic
1962 * bit of this function and use two 64-bit exchanges instead.
1963 */
1964# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
1965DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
1966# else
1967DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
1968{
1969 if (true)/*ASMCpuId_ECX(1) & BIT(13))*/
1970 {
1971 /** @todo this is clumsy code */
1972 RTUINT128U u128Ret;
1973 u128Ret.u = u128;
1974 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
1975 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
1976 return u128Ret.u;
1977 }
1978#if 0 /* later? */
1979 else
1980 {
1981# if RT_INLINE_ASM_GNU_STYLE
1982 __asm__ __volatile__("1:\n\t"
1983 "lock; cmpxchg8b %1\n\t"
1984 "jnz 1b\n\t"
1985 : "=A" (u128),
1986 "=m" (*pu128)
1987 : "0" (*pu128),
1988 "b" ( (uint64_t)u128 ),
1989 "c" ( (uint64_t)(u128 >> 64) ));
1990# else
1991 __asm
1992 {
1993 mov rbx, dword ptr [u128]
1994 mov rcx, dword ptr [u128 + 4]
1995 mov rdi, pu128
1996 mov rax, dword ptr [rdi]
1997 mov rdx, dword ptr [rdi + 4]
1998 retry:
1999 lock cmpxchg16b [rdi]
2000 jnz retry
2001 mov dword ptr [u128], rax
2002 mov dword ptr [u128 + 4], rdx
2003 }
2004# endif
2005 }
2006 return u128;
2007#endif
2008}
2009# endif
2010#endif /* __AMD64__ */
2011
2012
2013/**
2014 * Atomically Reads a unsigned 64-bit value.
2015 *
2016 * @returns Current *pu64 value
2017 * @param pu64 Pointer to the 64-bit variable to read.
2018 * The memory pointed to must be writable.
2019 * @remark This will fault if the memory is read-only!
2020 */
2021#if RT_INLINE_ASM_EXTERNAL
2022DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2023#else
2024DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2025{
2026 uint64_t u64;
2027# ifdef __AMD64__
2028# if RT_INLINE_ASM_GNU_STYLE
2029 __asm__ __volatile__("movq %1, %0\n\t"
2030 : "=r" (u64)
2031 : "m" (*pu64));
2032# else
2033 __asm
2034 {
2035 mov rdx, [pu64]
2036 mov rax, [rdx]
2037 mov [u64], rax
2038 }
2039# endif
2040# else /* !__AMD64__ */
2041# if RT_INLINE_ASM_GNU_STYLE
2042# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2043 uint32_t u32EBX = 0;
2044 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2045 "lock; cmpxchg8b (%5)\n\t"
2046 "xchgl %%ebx, %3\n\t"
2047 : "=A" (u64),
2048 "=m" (*pu64)
2049 : "0" (0),
2050 "m" (u32EBX),
2051 "c" (0),
2052 "S" (pu64));
2053# else /* !PIC */
2054 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2055 : "=A" (u64),
2056 "=m" (*pu64)
2057 : "0" (0),
2058 "b" (0),
2059 "c" (0));
2060# endif
2061# else
2062 __asm
2063 {
2064 xor eax, eax
2065 xor edx, edx
2066 mov edi, pu64
2067 xor ecx, ecx
2068 xor ebx, ebx
2069 lock cmpxchg8b [edi]
2070 mov dword ptr [u64], eax
2071 mov dword ptr [u64 + 4], edx
2072 }
2073# endif
2074# endif /* !__AMD64__ */
2075 return u64;
2076}
2077#endif
2078
2079
2080/**
2081 * Atomically Reads a signed 64-bit value.
2082 *
2083 * @returns Current *pi64 value
2084 * @param pi64 Pointer to the 64-bit variable to read.
2085 * The memory pointed to must be writable.
2086 * @remark This will fault if the memory is read-only!
2087 */
2088DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2089{
2090 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2091}
2092
2093
2094/**
2095 * Atomically Exchange a value which size might differ
2096 * between platforms or compilers.
2097 *
2098 * @param pu Pointer to the variable to update.
2099 * @param uNew The value to assign to *pu.
2100 */
2101#define ASMAtomicXchgSize(pu, uNew) \
2102 do { \
2103 switch (sizeof(*(pu))) { \
2104 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2105 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2106 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2107 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2108 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2109 } \
2110 } while (0)
2111
2112
2113/**
2114 * Atomically Exchange a pointer value.
2115 *
2116 * @returns Current *ppv value
2117 * @param ppv Pointer to the pointer variable to update.
2118 * @param pv The pointer value to assign to *ppv.
2119 */
2120DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2121{
2122#if ARCH_BITS == 32
2123 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2124#elif ARCH_BITS == 64
2125 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2126#else
2127# error "ARCH_BITS is bogus"
2128#endif
2129}
2130
2131
2132/**
2133 * Atomically Compare and Exchange an unsigned 32-bit value.
2134 *
2135 * @returns true if xchg was done.
2136 * @returns false if xchg wasn't done.
2137 *
2138 * @param pu32 Pointer to the value to update.
2139 * @param u32New The new value to assigned to *pu32.
2140 * @param u32Old The old value to *pu32 compare with.
2141 */
2142#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2143DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2144#else
2145DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2146{
2147# if RT_INLINE_ASM_GNU_STYLE
2148 uint32_t u32Ret;
2149 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2150 "setz %%al\n\t"
2151 "movzx %%al, %%eax\n\t"
2152 : "=m" (*pu32),
2153 "=a" (u32Ret)
2154 : "r" (u32New),
2155 "1" (u32Old));
2156 return (bool)u32Ret;
2157
2158# elif RT_INLINE_ASM_USES_INTRIN
2159 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2160
2161# else
2162 uint32_t u32Ret;
2163 __asm
2164 {
2165# ifdef __AMD64__
2166 mov rdx, [pu32]
2167# else
2168 mov edx, [pu32]
2169# endif
2170 mov eax, [u32Old]
2171 mov ecx, [u32New]
2172# ifdef __AMD64__
2173 lock cmpxchg [rdx], ecx
2174# else
2175 lock cmpxchg [edx], ecx
2176# endif
2177 setz al
2178 movzx eax, al
2179 mov [u32Ret], eax
2180 }
2181 return !!u32Ret;
2182# endif
2183}
2184#endif
2185
2186
2187/**
2188 * Atomically Compare and Exchange a signed 32-bit value.
2189 *
2190 * @returns true if xchg was done.
2191 * @returns false if xchg wasn't done.
2192 *
2193 * @param pi32 Pointer to the value to update.
2194 * @param i32New The new value to assigned to *pi32.
2195 * @param i32Old The old value to *pi32 compare with.
2196 */
2197DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2198{
2199 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2200}
2201
2202
2203/**
2204 * Atomically Compare and exchange an unsigned 64-bit value.
2205 *
2206 * @returns true if xchg was done.
2207 * @returns false if xchg wasn't done.
2208 *
2209 * @param pu64 Pointer to the 64-bit variable to update.
2210 * @param u64New The 64-bit value to assign to *pu64.
2211 * @param u64Old The value to compare with.
2212 */
2213#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2214DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2215#else
2216DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2217{
2218# if RT_INLINE_ASM_USES_INTRIN
2219 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2220
2221# elif defined(__AMD64__)
2222# if RT_INLINE_ASM_GNU_STYLE
2223 uint64_t u64Ret;
2224 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2225 "setz %%al\n\t"
2226 "movzx %%al, %%eax\n\t"
2227 : "=m" (*pu64),
2228 "=a" (u64Ret)
2229 : "r" (u64New),
2230 "1" (u64Old));
2231 return (bool)u64Ret;
2232# else
2233 bool fRet;
2234 __asm
2235 {
2236 mov rdx, [pu32]
2237 mov rax, [u64Old]
2238 mov rcx, [u64New]
2239 lock cmpxchg [rdx], rcx
2240 setz al
2241 mov [fRet], al
2242 }
2243 return fRet;
2244# endif
2245# else /* !__AMD64__ */
2246 uint32_t u32Ret;
2247# if RT_INLINE_ASM_GNU_STYLE
2248# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2249 uint32_t u32 = (uint32_t)u64New;
2250 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2251 "lock; cmpxchg8b (%5)\n\t"
2252 "setz %%al\n\t"
2253 "xchgl %%ebx, %3\n\t"
2254 "movzx %%al, %%eax\n\t"
2255 : "=a" (u32Ret),
2256 "=m" (*pu64)
2257 : "A" (u64Old),
2258 "m" ( u32 ),
2259 "c" ( (uint32_t)(u64New >> 32) ),
2260 "S" (pu64) );
2261# else /* !PIC */
2262 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2263 "setz %%al\n\t"
2264 "movzx %%al, %%eax\n\t"
2265 : "=a" (u32Ret),
2266 "=m" (*pu64)
2267 : "A" (u64Old),
2268 "b" ( (uint32_t)u64New ),
2269 "c" ( (uint32_t)(u64New >> 32) ));
2270# endif
2271 return (bool)u32Ret;
2272# else
2273 __asm
2274 {
2275 mov ebx, dword ptr [u64New]
2276 mov ecx, dword ptr [u64New + 4]
2277 mov edi, [pu64]
2278 mov eax, dword ptr [u64Old]
2279 mov edx, dword ptr [u64Old + 4]
2280 lock cmpxchg8b [edi]
2281 setz al
2282 movzx eax, al
2283 mov dword ptr [u32Ret], eax
2284 }
2285 return !!u32Ret;
2286# endif
2287# endif /* !__AMD64__ */
2288}
2289#endif
2290
2291
2292/**
2293 * Atomically Compare and exchange a signed 64-bit value.
2294 *
2295 * @returns true if xchg was done.
2296 * @returns false if xchg wasn't done.
2297 *
2298 * @param pi64 Pointer to the 64-bit variable to update.
2299 * @param i64 The 64-bit value to assign to *pu64.
2300 * @param i64Old The value to compare with.
2301 */
2302DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2303{
2304 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2305}
2306
2307
2308
2309/** @def ASMAtomicCmpXchgSize
2310 * Atomically Compare and Exchange a value which size might differ
2311 * between platforms or compilers.
2312 *
2313 * @param pu Pointer to the value to update.
2314 * @param uNew The new value to assigned to *pu.
2315 * @param uOld The old value to *pu compare with.
2316 * @param fRc Where to store the result.
2317 */
2318#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2319 do { \
2320 switch (sizeof(*(pu))) { \
2321 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2322 break; \
2323 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2324 break; \
2325 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2326 (fRc) = false; \
2327 break; \
2328 } \
2329 } while (0)
2330
2331
2332/**
2333 * Atomically Compare and Exchange a pointer value.
2334 *
2335 * @returns true if xchg was done.
2336 * @returns false if xchg wasn't done.
2337 *
2338 * @param ppv Pointer to the value to update.
2339 * @param pvNew The new value to assigned to *ppv.
2340 * @param pvOld The old value to *ppv compare with.
2341 */
2342DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2343{
2344#if ARCH_BITS == 32
2345 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2346#elif ARCH_BITS == 64
2347 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2348#else
2349# error "ARCH_BITS is bogus"
2350#endif
2351}
2352
2353
2354/**
2355 * Atomically increment a 32-bit value.
2356 *
2357 * @returns The new value.
2358 * @param pu32 Pointer to the value to increment.
2359 */
2360#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2361DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2362#else
2363DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2364{
2365 uint32_t u32;
2366# if RT_INLINE_ASM_USES_INTRIN
2367 u32 = _InterlockedIncrement((long *)pu32);
2368
2369# elif RT_INLINE_ASM_GNU_STYLE
2370 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2371 "incl %0\n\t"
2372 : "=r" (u32),
2373 "=m" (*pu32)
2374 : "0" (1)
2375 : "memory");
2376# else
2377 __asm
2378 {
2379 mov eax, 1
2380# ifdef __AMD64__
2381 mov rdx, [pu32]
2382 lock xadd [rdx], eax
2383# else
2384 mov edx, [pu32]
2385 lock xadd [edx], eax
2386# endif
2387 inc eax
2388 mov u32, eax
2389 }
2390# endif
2391 return u32;
2392}
2393#endif
2394
2395
2396/**
2397 * Atomically increment a signed 32-bit value.
2398 *
2399 * @returns The new value.
2400 * @param pi32 Pointer to the value to increment.
2401 */
2402DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2403{
2404 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2405}
2406
2407
2408/**
2409 * Atomically decrement an unsigned 32-bit value.
2410 *
2411 * @returns The new value.
2412 * @param pu32 Pointer to the value to decrement.
2413 */
2414#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2415DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2416#else
2417DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2418{
2419 uint32_t u32;
2420# if RT_INLINE_ASM_USES_INTRIN
2421 u32 = _InterlockedDecrement((long *)pu32);
2422
2423# elif RT_INLINE_ASM_GNU_STYLE
2424 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2425 "decl %0\n\t"
2426 : "=r" (u32),
2427 "=m" (*pu32)
2428 : "0" (-1)
2429 : "memory");
2430# else
2431 __asm
2432 {
2433 mov eax, -1
2434# ifdef __AMD64__
2435 mov rdx, [pu32]
2436 lock xadd [rdx], eax
2437# else
2438 mov edx, [pu32]
2439 lock xadd [edx], eax
2440# endif
2441 dec eax
2442 mov u32, eax
2443 }
2444# endif
2445 return u32;
2446}
2447#endif
2448
2449
2450/**
2451 * Atomically decrement a signed 32-bit value.
2452 *
2453 * @returns The new value.
2454 * @param pi32 Pointer to the value to decrement.
2455 */
2456DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2457{
2458 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2459}
2460
2461
2462/**
2463 * Atomically Or an unsigned 32-bit value.
2464 *
2465 * @param pu32 Pointer to the pointer variable to OR u32 with.
2466 * @param u32 The value to OR *pu32 with.
2467 */
2468#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2469DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2470#else
2471DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2472{
2473# if RT_INLINE_ASM_USES_INTRIN
2474 _InterlockedOr((long volatile *)pu32, (long)u32);
2475
2476# elif RT_INLINE_ASM_GNU_STYLE
2477 __asm__ __volatile__("lock; orl %1, %0\n\t"
2478 : "=m" (*pu32)
2479 : "r" (u32));
2480# else
2481 __asm
2482 {
2483 mov eax, [u32]
2484# ifdef __AMD64__
2485 mov rdx, [pu32]
2486 lock or [rdx], eax
2487# else
2488 mov edx, [pu32]
2489 lock or [edx], eax
2490# endif
2491 }
2492# endif
2493}
2494#endif
2495
2496
2497/**
2498 * Atomically Or a signed 32-bit value.
2499 *
2500 * @param pi32 Pointer to the pointer variable to OR u32 with.
2501 * @param i32 The value to OR *pu32 with.
2502 */
2503DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2504{
2505 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2506}
2507
2508
2509/**
2510 * Atomically And an unsigned 32-bit value.
2511 *
2512 * @param pu32 Pointer to the pointer variable to AND u32 with.
2513 * @param u32 The value to AND *pu32 with.
2514 */
2515#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2516DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2517#else
2518DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2519{
2520# if RT_INLINE_ASM_USES_INTRIN
2521 _InterlockedAnd((long volatile *)pu32, u32);
2522
2523# elif RT_INLINE_ASM_GNU_STYLE
2524 __asm__ __volatile__("lock; andl %1, %0\n\t"
2525 : "=m" (*pu32)
2526 : "r" (u32));
2527# else
2528 __asm
2529 {
2530 mov eax, [u32]
2531# ifdef __AMD64__
2532 mov rdx, [pu32]
2533 lock and [rdx], eax
2534# else
2535 mov edx, [pu32]
2536 lock and [edx], eax
2537# endif
2538 }
2539# endif
2540}
2541#endif
2542
2543
2544/**
2545 * Atomically And a signed 32-bit value.
2546 *
2547 * @param pi32 Pointer to the pointer variable to AND i32 with.
2548 * @param i32 The value to AND *pi32 with.
2549 */
2550DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2551{
2552 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2553}
2554
2555
2556/**
2557 * Invalidate page.
2558 *
2559 * @param pv Address of the page to invalidate.
2560 */
2561#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2562DECLASM(void) ASMInvalidatePage(void *pv);
2563#else
2564DECLINLINE(void) ASMInvalidatePage(void *pv)
2565{
2566# if RT_INLINE_ASM_USES_INTRIN
2567 __invlpg(pv);
2568
2569# elif RT_INLINE_ASM_GNU_STYLE
2570 __asm__ __volatile__("invlpg %0\n\t"
2571 : : "m" (*(uint8_t *)pv));
2572# else
2573 __asm
2574 {
2575# ifdef __AMD64__
2576 mov rax, [pv]
2577 invlpg [rax]
2578# else
2579 mov eax, [pv]
2580 invlpg [eax]
2581# endif
2582 }
2583# endif
2584}
2585#endif
2586
2587
2588#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2589# if PAGE_SIZE != 0x1000
2590# error "PAGE_SIZE is not 0x1000!"
2591# endif
2592#endif
2593
2594/**
2595 * Zeros a 4K memory page.
2596 *
2597 * @param pv Pointer to the memory block. This must be page aligned.
2598 */
2599#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2600DECLASM(void) ASMMemZeroPage(volatile void *pv);
2601# else
2602DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2603{
2604# if RT_INLINE_ASM_USES_INTRIN
2605# ifdef __AMD64__
2606 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2607# else
2608 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2609# endif
2610
2611# elif RT_INLINE_ASM_GNU_STYLE
2612 RTUINTREG uDummy;
2613# ifdef __AMD64__
2614 __asm__ __volatile__ ("rep stosq"
2615 : "=D" (pv),
2616 "=c" (uDummy)
2617 : "0" (pv),
2618 "c" (0x1000 >> 3),
2619 "a" (0)
2620 : "memory");
2621# else
2622 __asm__ __volatile__ ("rep stosl"
2623 : "=D" (pv),
2624 "=c" (uDummy)
2625 : "0" (pv),
2626 "c" (0x1000 >> 2),
2627 "a" (0)
2628 : "memory");
2629# endif
2630# else
2631 __asm
2632 {
2633# ifdef __AMD64__
2634 xor rax, rax
2635 mov ecx, 0200h
2636 mov rdi, [pv]
2637 rep stosq
2638# else
2639 xor eax, eax
2640 mov ecx, 0400h
2641 mov edi, [pv]
2642 rep stosd
2643# endif
2644 }
2645# endif
2646}
2647# endif
2648
2649
2650/**
2651 * Zeros a memory block with a 32-bit aligned size.
2652 *
2653 * @param pv Pointer to the memory block.
2654 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2655 */
2656#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2657DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2658#else
2659DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2660{
2661# if RT_INLINE_ASM_USES_INTRIN
2662 __stosd((unsigned long *)pv, 0, cb >> 2);
2663
2664# elif RT_INLINE_ASM_GNU_STYLE
2665 __asm__ __volatile__ ("rep stosl"
2666 : "=D" (pv),
2667 "=c" (cb)
2668 : "0" (pv),
2669 "1" (cb >> 2),
2670 "a" (0)
2671 : "memory");
2672# else
2673 __asm
2674 {
2675 xor eax, eax
2676# ifdef __AMD64__
2677 mov rcx, [cb]
2678 shr rcx, 2
2679 mov rdi, [pv]
2680# else
2681 mov ecx, [cb]
2682 shr ecx, 2
2683 mov edi, [pv]
2684# endif
2685 rep stosd
2686 }
2687# endif
2688}
2689#endif
2690
2691
2692/**
2693 * Fills a memory block with a 32-bit aligned size.
2694 *
2695 * @param pv Pointer to the memory block.
2696 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2697 * @param u32 The value to fill with.
2698 */
2699#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2700DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2701#else
2702DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2703{
2704# if RT_INLINE_ASM_USES_INTRIN
2705 __stosd((unsigned long *)pv, 0, cb >> 2);
2706
2707# elif RT_INLINE_ASM_GNU_STYLE
2708 __asm__ __volatile__ ("rep stosl"
2709 : "=D" (pv),
2710 "=c" (cb)
2711 : "0" (pv),
2712 "1" (cb >> 2),
2713 "a" (u32)
2714 : "memory");
2715# else
2716 __asm
2717 {
2718# ifdef __AMD64__
2719 mov rcx, [cb]
2720 shr rcx, 2
2721 mov rdi, [pv]
2722# else
2723 mov ecx, [cb]
2724 shr ecx, 2
2725 mov edi, [pv]
2726# endif
2727 mov eax, [u32]
2728 rep stosd
2729 }
2730# endif
2731}
2732#endif
2733
2734
2735
2736/**
2737 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2738 *
2739 * @returns u32F1 * u32F2.
2740 */
2741#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2742DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2743#else
2744DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2745{
2746# ifdef __AMD64__
2747 return (uint64_t)u32F1 * u32F2;
2748# else /* !__AMD64__ */
2749 uint64_t u64;
2750# if RT_INLINE_ASM_GNU_STYLE
2751 __asm__ __volatile__("mull %%edx"
2752 : "=A" (u64)
2753 : "a" (u32F2), "d" (u32F1));
2754# else
2755 __asm
2756 {
2757 mov edx, [u32F1]
2758 mov eax, [u32F2]
2759 mul edx
2760 mov dword ptr [u64], eax
2761 mov dword ptr [u64 + 4], edx
2762 }
2763# endif
2764 return u64;
2765# endif /* !__AMD64__ */
2766}
2767#endif
2768
2769
2770/**
2771 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2772 *
2773 * @returns u32F1 * u32F2.
2774 */
2775#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2776DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2777#else
2778DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2779{
2780# ifdef __AMD64__
2781 return (int64_t)i32F1 * i32F2;
2782# else /* !__AMD64__ */
2783 int64_t i64;
2784# if RT_INLINE_ASM_GNU_STYLE
2785 __asm__ __volatile__("imull %%edx"
2786 : "=A" (i64)
2787 : "a" (i32F2), "d" (i32F1));
2788# else
2789 __asm
2790 {
2791 mov edx, [i32F1]
2792 mov eax, [i32F2]
2793 imul edx
2794 mov dword ptr [i64], eax
2795 mov dword ptr [i64 + 4], edx
2796 }
2797# endif
2798 return i64;
2799# endif /* !__AMD64__ */
2800}
2801#endif
2802
2803
2804/**
2805 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2806 *
2807 * @returns u64 / u32.
2808 */
2809#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2810DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2811#else
2812DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2813{
2814# ifdef __AMD64__
2815 return (uint32_t)(u64 / u32);
2816# else /* !__AMD64__ */
2817# if RT_INLINE_ASM_GNU_STYLE
2818 RTUINTREG uDummy;
2819 __asm__ __volatile__("divl %3"
2820 : "=a" (u32), "=d"(uDummy)
2821 : "A" (u64), "r" (u32));
2822# else
2823 __asm
2824 {
2825 mov eax, dword ptr [u64]
2826 mov edx, dword ptr [u64 + 4]
2827 mov ecx, [u32]
2828 div ecx
2829 mov [u32], eax
2830 }
2831# endif
2832 return u32;
2833# endif /* !__AMD64__ */
2834}
2835#endif
2836
2837
2838/**
2839 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2840 *
2841 * @returns u64 / u32.
2842 */
2843#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2844DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2845#else
2846DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2847{
2848# ifdef __AMD64__
2849 return (int32_t)(i64 / i32);
2850# else /* !__AMD64__ */
2851# if RT_INLINE_ASM_GNU_STYLE
2852 RTUINTREG iDummy;
2853 __asm__ __volatile__("idivl %3"
2854 : "=a" (i32), "=d"(iDummy)
2855 : "A" (i64), "r" (i32));
2856# else
2857 __asm
2858 {
2859 mov eax, dword ptr [i64]
2860 mov edx, dword ptr [i64 + 4]
2861 mov ecx, [i32]
2862 idiv ecx
2863 mov [i32], eax
2864 }
2865# endif
2866 return i32;
2867# endif /* !__AMD64__ */
2868}
2869#endif
2870
2871
2872/**
2873 * Probes a byte pointer for read access.
2874 *
2875 * While the function will not fault if the byte is not read accessible,
2876 * the idea is to do this in a safe place like before acquiring locks
2877 * and such like.
2878 *
2879 * Also, this functions guarantees that an eager compiler is not going
2880 * to optimize the probing away.
2881 *
2882 * @param pvByte Pointer to the byte.
2883 */
2884#if RT_INLINE_ASM_EXTERNAL
2885DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2886#else
2887DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2888{
2889 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2890 uint8_t u8;
2891# if RT_INLINE_ASM_GNU_STYLE
2892 __asm__ __volatile__("movb (%1), %0\n\t"
2893 : "=r" (u8)
2894 : "r" (pvByte));
2895# else
2896 __asm
2897 {
2898# ifdef __AMD64__
2899 mov rax, [pvByte]
2900 mov al, [rax]
2901# else
2902 mov eax, [pvByte]
2903 mov al, [eax]
2904# endif
2905 mov [u8], al
2906 }
2907# endif
2908 return u8;
2909}
2910#endif
2911
2912/**
2913 * Probes a buffer for read access page by page.
2914 *
2915 * While the function will fault if the buffer is not fully read
2916 * accessible, the idea is to do this in a safe place like before
2917 * acquiring locks and such like.
2918 *
2919 * Also, this functions guarantees that an eager compiler is not going
2920 * to optimize the probing away.
2921 *
2922 * @param pvBuf Pointer to the buffer.
2923 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
2924 */
2925DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
2926{
2927 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2928 /* the first byte */
2929 const uint8_t *pu8 = (const uint8_t *)pvBuf;
2930 ASMProbeReadByte(pu8);
2931
2932 /* the pages in between pages. */
2933 while (cbBuf > /*PAGE_SIZE*/0x1000)
2934 {
2935 ASMProbeReadByte(pu8);
2936 cbBuf -= /*PAGE_SIZE*/0x1000;
2937 pu8 += /*PAGE_SIZE*/0x1000;
2938 }
2939
2940 /* the last byte */
2941 ASMProbeReadByte(pu8 + cbBuf - 1);
2942}
2943
2944
2945/** @def ASMBreakpoint
2946 * Debugger Breakpoint.
2947 * @remark In the gnu world we add a nop instruction after the int3 to
2948 * force gdb to remain at the int3 source line.
2949 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
2950 * @internal
2951 */
2952#if RT_INLINE_ASM_GNU_STYLE
2953# ifndef __L4ENV__
2954# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
2955# else
2956# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
2957# endif
2958#else
2959# define ASMBreakpoint() __debugbreak()
2960#endif
2961
2962
2963
2964/** @defgroup grp_inline_bits Bit Operations
2965 * @{
2966 */
2967
2968
2969/**
2970 * Sets a bit in a bitmap.
2971 *
2972 * @param pvBitmap Pointer to the bitmap.
2973 * @param iBit The bit to set.
2974 */
2975#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2976DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
2977#else
2978DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
2979{
2980# if RT_INLINE_ASM_USES_INTRIN
2981 _bittestandset((long *)pvBitmap, iBit);
2982
2983# elif RT_INLINE_ASM_GNU_STYLE
2984 __asm__ __volatile__ ("btsl %1, %0"
2985 : "=m" (*(volatile long *)pvBitmap)
2986 : "Ir" (iBit)
2987 : "memory");
2988# else
2989 __asm
2990 {
2991# ifdef __AMD64__
2992 mov rax, [pvBitmap]
2993 mov edx, [iBit]
2994 bts [rax], edx
2995# else
2996 mov eax, [pvBitmap]
2997 mov edx, [iBit]
2998 bts [eax], edx
2999# endif
3000 }
3001# endif
3002}
3003#endif
3004
3005
3006/**
3007 * Atomically sets a bit in a bitmap.
3008 *
3009 * @param pvBitmap Pointer to the bitmap.
3010 * @param iBit The bit to set.
3011 */
3012#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3013DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3014#else
3015DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3016{
3017# if RT_INLINE_ASM_USES_INTRIN
3018 _interlockedbittestandset((long *)pvBitmap, iBit);
3019# elif RT_INLINE_ASM_GNU_STYLE
3020 __asm__ __volatile__ ("lock; btsl %1, %0"
3021 : "=m" (*(volatile long *)pvBitmap)
3022 : "Ir" (iBit)
3023 : "memory");
3024# else
3025 __asm
3026 {
3027# ifdef __AMD64__
3028 mov rax, [pvBitmap]
3029 mov edx, [iBit]
3030 lock bts [rax], edx
3031# else
3032 mov eax, [pvBitmap]
3033 mov edx, [iBit]
3034 lock bts [eax], edx
3035# endif
3036 }
3037# endif
3038}
3039#endif
3040
3041
3042/**
3043 * Clears a bit in a bitmap.
3044 *
3045 * @param pvBitmap Pointer to the bitmap.
3046 * @param iBit The bit to clear.
3047 */
3048#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3049DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3050#else
3051DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3052{
3053# if RT_INLINE_ASM_USES_INTRIN
3054 _bittestandreset((long *)pvBitmap, iBit);
3055
3056# elif RT_INLINE_ASM_GNU_STYLE
3057 __asm__ __volatile__ ("btrl %1, %0"
3058 : "=m" (*(volatile long *)pvBitmap)
3059 : "Ir" (iBit)
3060 : "memory");
3061# else
3062 __asm
3063 {
3064# ifdef __AMD64__
3065 mov rax, [pvBitmap]
3066 mov edx, [iBit]
3067 btr [rax], edx
3068# else
3069 mov eax, [pvBitmap]
3070 mov edx, [iBit]
3071 btr [eax], edx
3072# endif
3073 }
3074# endif
3075}
3076#endif
3077
3078
3079/**
3080 * Atomically clears a bit in a bitmap.
3081 *
3082 * @param pvBitmap Pointer to the bitmap.
3083 * @param iBit The bit to toggle set.
3084 * @remark No memory barrier, take care on smp.
3085 */
3086#if RT_INLINE_ASM_EXTERNAL
3087DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3088#else
3089DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3090{
3091# if RT_INLINE_ASM_GNU_STYLE
3092 __asm__ __volatile__ ("lock; btrl %1, %0"
3093 : "=m" (*(volatile long *)pvBitmap)
3094 : "Ir" (iBit)
3095 : "memory");
3096# else
3097 __asm
3098 {
3099# ifdef __AMD64__
3100 mov rax, [pvBitmap]
3101 mov edx, [iBit]
3102 lock btr [rax], edx
3103# else
3104 mov eax, [pvBitmap]
3105 mov edx, [iBit]
3106 lock btr [eax], edx
3107# endif
3108 }
3109# endif
3110}
3111#endif
3112
3113
3114/**
3115 * Toggles a bit in a bitmap.
3116 *
3117 * @param pvBitmap Pointer to the bitmap.
3118 * @param iBit The bit to toggle.
3119 */
3120#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3121DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3122#else
3123DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3124{
3125# if RT_INLINE_ASM_USES_INTRIN
3126 _bittestandcomplement((long *)pvBitmap, iBit);
3127# elif RT_INLINE_ASM_GNU_STYLE
3128 __asm__ __volatile__ ("btcl %1, %0"
3129 : "=m" (*(volatile long *)pvBitmap)
3130 : "Ir" (iBit)
3131 : "memory");
3132# else
3133 __asm
3134 {
3135# ifdef __AMD64__
3136 mov rax, [pvBitmap]
3137 mov edx, [iBit]
3138 btc [rax], edx
3139# else
3140 mov eax, [pvBitmap]
3141 mov edx, [iBit]
3142 btc [eax], edx
3143# endif
3144 }
3145# endif
3146}
3147#endif
3148
3149
3150/**
3151 * Atomically toggles a bit in a bitmap.
3152 *
3153 * @param pvBitmap Pointer to the bitmap.
3154 * @param iBit The bit to test and set.
3155 */
3156#if RT_INLINE_ASM_EXTERNAL
3157DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3158#else
3159DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3160{
3161# if RT_INLINE_ASM_GNU_STYLE
3162 __asm__ __volatile__ ("lock; btcl %1, %0"
3163 : "=m" (*(volatile long *)pvBitmap)
3164 : "Ir" (iBit)
3165 : "memory");
3166# else
3167 __asm
3168 {
3169# ifdef __AMD64__
3170 mov rax, [pvBitmap]
3171 mov edx, [iBit]
3172 lock btc [rax], edx
3173# else
3174 mov eax, [pvBitmap]
3175 mov edx, [iBit]
3176 lock btc [eax], edx
3177# endif
3178 }
3179# endif
3180}
3181#endif
3182
3183
3184/**
3185 * Tests and sets a bit in a bitmap.
3186 *
3187 * @returns true if the bit was set.
3188 * @returns false if the bit was clear.
3189 * @param pvBitmap Pointer to the bitmap.
3190 * @param iBit The bit to test and set.
3191 */
3192#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3193DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3194#else
3195DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3196{
3197 union { bool f; uint32_t u32; uint8_t u8; } rc;
3198# if RT_INLINE_ASM_USES_INTRIN
3199 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3200
3201# elif RT_INLINE_ASM_GNU_STYLE
3202 __asm__ __volatile__ ("btsl %2, %1\n\t"
3203 "setc %b0\n\t"
3204 "andl $1, %0\n\t"
3205 : "=q" (rc.u32),
3206 "=m" (*(volatile long *)pvBitmap)
3207 : "Ir" (iBit)
3208 : "memory");
3209# else
3210 __asm
3211 {
3212 mov edx, [iBit]
3213# ifdef __AMD64__
3214 mov rax, [pvBitmap]
3215 bts [rax], edx
3216# else
3217 mov eax, [pvBitmap]
3218 bts [eax], edx
3219# endif
3220 setc al
3221 and eax, 1
3222 mov [rc.u32], eax
3223 }
3224# endif
3225 return rc.f;
3226}
3227#endif
3228
3229
3230/**
3231 * Atomically tests and sets a bit in a bitmap.
3232 *
3233 * @returns true if the bit was set.
3234 * @returns false if the bit was clear.
3235 * @param pvBitmap Pointer to the bitmap.
3236 * @param iBit The bit to set.
3237 */
3238#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3239DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3240#else
3241DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3242{
3243 union { bool f; uint32_t u32; uint8_t u8; } rc;
3244# if RT_INLINE_ASM_USES_INTRIN
3245 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3246# elif RT_INLINE_ASM_GNU_STYLE
3247 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3248 "setc %b0\n\t"
3249 "andl $1, %0\n\t"
3250 : "=q" (rc.u32),
3251 "=m" (*(volatile long *)pvBitmap)
3252 : "Ir" (iBit)
3253 : "memory");
3254# else
3255 __asm
3256 {
3257 mov edx, [iBit]
3258# ifdef __AMD64__
3259 mov rax, [pvBitmap]
3260 lock bts [rax], edx
3261# else
3262 mov eax, [pvBitmap]
3263 lock bts [eax], edx
3264# endif
3265 setc al
3266 and eax, 1
3267 mov [rc.u32], eax
3268 }
3269# endif
3270 return rc.f;
3271}
3272#endif
3273
3274
3275/**
3276 * Tests and clears a bit in a bitmap.
3277 *
3278 * @returns true if the bit was set.
3279 * @returns false if the bit was clear.
3280 * @param pvBitmap Pointer to the bitmap.
3281 * @param iBit The bit to test and clear.
3282 */
3283#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3284DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3285#else
3286DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3287{
3288 union { bool f; uint32_t u32; uint8_t u8; } rc;
3289# if RT_INLINE_ASM_USES_INTRIN
3290 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3291
3292# elif RT_INLINE_ASM_GNU_STYLE
3293 __asm__ __volatile__ ("btrl %2, %1\n\t"
3294 "setc %b0\n\t"
3295 "andl $1, %0\n\t"
3296 : "=q" (rc.u32),
3297 "=m" (*(volatile long *)pvBitmap)
3298 : "Ir" (iBit)
3299 : "memory");
3300# else
3301 __asm
3302 {
3303 mov edx, [iBit]
3304# ifdef __AMD64__
3305 mov rax, [pvBitmap]
3306 btr [rax], edx
3307# else
3308 mov eax, [pvBitmap]
3309 btr [eax], edx
3310# endif
3311 setc al
3312 and eax, 1
3313 mov [rc.u32], eax
3314 }
3315# endif
3316 return rc.f;
3317}
3318#endif
3319
3320
3321/**
3322 * Atomically tests and clears a bit in a bitmap.
3323 *
3324 * @returns true if the bit was set.
3325 * @returns false if the bit was clear.
3326 * @param pvBitmap Pointer to the bitmap.
3327 * @param iBit The bit to test and clear.
3328 * @remark No memory barrier, take care on smp.
3329 */
3330#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3331DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3332#else
3333DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3334{
3335 union { bool f; uint32_t u32; uint8_t u8; } rc;
3336# if RT_INLINE_ASM_USES_INTRIN
3337 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3338
3339# elif RT_INLINE_ASM_GNU_STYLE
3340 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3341 "setc %b0\n\t"
3342 "andl $1, %0\n\t"
3343 : "=q" (rc.u32),
3344 "=m" (*(volatile long *)pvBitmap)
3345 : "Ir" (iBit)
3346 : "memory");
3347# else
3348 __asm
3349 {
3350 mov edx, [iBit]
3351# ifdef __AMD64__
3352 mov rax, [pvBitmap]
3353 lock btr [rax], edx
3354# else
3355 mov eax, [pvBitmap]
3356 lock btr [eax], edx
3357# endif
3358 setc al
3359 and eax, 1
3360 mov [rc.u32], eax
3361 }
3362# endif
3363 return rc.f;
3364}
3365#endif
3366
3367
3368/**
3369 * Tests and toggles a bit in a bitmap.
3370 *
3371 * @returns true if the bit was set.
3372 * @returns false if the bit was clear.
3373 * @param pvBitmap Pointer to the bitmap.
3374 * @param iBit The bit to test and toggle.
3375 */
3376#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3377DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3378#else
3379DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3380{
3381 union { bool f; uint32_t u32; uint8_t u8; } rc;
3382# if RT_INLINE_ASM_USES_INTRIN
3383 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3384
3385# elif RT_INLINE_ASM_GNU_STYLE
3386 __asm__ __volatile__ ("btcl %2, %1\n\t"
3387 "setc %b0\n\t"
3388 "andl $1, %0\n\t"
3389 : "=q" (rc.u32),
3390 "=m" (*(volatile long *)pvBitmap)
3391 : "Ir" (iBit)
3392 : "memory");
3393# else
3394 __asm
3395 {
3396 mov edx, [iBit]
3397# ifdef __AMD64__
3398 mov rax, [pvBitmap]
3399 btc [rax], edx
3400# else
3401 mov eax, [pvBitmap]
3402 btc [eax], edx
3403# endif
3404 setc al
3405 and eax, 1
3406 mov [rc.u32], eax
3407 }
3408# endif
3409 return rc.f;
3410}
3411#endif
3412
3413
3414/**
3415 * Atomically tests and toggles a bit in a bitmap.
3416 *
3417 * @returns true if the bit was set.
3418 * @returns false if the bit was clear.
3419 * @param pvBitmap Pointer to the bitmap.
3420 * @param iBit The bit to test and toggle.
3421 */
3422#if RT_INLINE_ASM_EXTERNAL
3423DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3424#else
3425DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3426{
3427 union { bool f; uint32_t u32; uint8_t u8; } rc;
3428# if RT_INLINE_ASM_GNU_STYLE
3429 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3430 "setc %b0\n\t"
3431 "andl $1, %0\n\t"
3432 : "=q" (rc.u32),
3433 "=m" (*(volatile long *)pvBitmap)
3434 : "Ir" (iBit)
3435 : "memory");
3436# else
3437 __asm
3438 {
3439 mov edx, [iBit]
3440# ifdef __AMD64__
3441 mov rax, [pvBitmap]
3442 lock btc [rax], edx
3443# else
3444 mov eax, [pvBitmap]
3445 lock btc [eax], edx
3446# endif
3447 setc al
3448 and eax, 1
3449 mov [rc.u32], eax
3450 }
3451# endif
3452 return rc.f;
3453}
3454#endif
3455
3456
3457/**
3458 * Tests if a bit in a bitmap is set.
3459 *
3460 * @returns true if the bit is set.
3461 * @returns false if the bit is clear.
3462 * @param pvBitmap Pointer to the bitmap.
3463 * @param iBit The bit to test.
3464 */
3465#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3466DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3467#else
3468DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3469{
3470 union { bool f; uint32_t u32; uint8_t u8; } rc;
3471# if RT_INLINE_ASM_USES_INTRIN
3472 rc.u32 = _bittest((long *)pvBitmap, iBit);
3473# elif RT_INLINE_ASM_GNU_STYLE
3474
3475 __asm__ __volatile__ ("btl %2, %1\n\t"
3476 "setc %b0\n\t"
3477 "andl $1, %0\n\t"
3478 : "=q" (rc.u32),
3479 "=m" (*(volatile long *)pvBitmap)
3480 : "Ir" (iBit)
3481 : "memory");
3482# else
3483 __asm
3484 {
3485 mov edx, [iBit]
3486# ifdef __AMD64__
3487 mov rax, [pvBitmap]
3488 bt [rax], edx
3489# else
3490 mov eax, [pvBitmap]
3491 bt [eax], edx
3492# endif
3493 setc al
3494 and eax, 1
3495 mov [rc.u32], eax
3496 }
3497# endif
3498 return rc.f;
3499}
3500#endif
3501
3502
3503/**
3504 * Clears a bit range within a bitmap.
3505 *
3506 * @param pvBitmap Pointer to the bitmap.
3507 * @param iBitStart The First bit to clear.
3508 * @param iBitEnd The first bit not to clear.
3509 */
3510DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3511{
3512 if (iBitStart < iBitEnd)
3513 {
3514 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3515 int iStart = iBitStart & ~31;
3516 int iEnd = iBitEnd & ~31;
3517 if (iStart == iEnd)
3518 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3519 else
3520 {
3521 /* bits in first dword. */
3522 if (iBitStart & 31)
3523 {
3524 *pu32 &= (1 << (iBitStart & 31)) - 1;
3525 pu32++;
3526 iBitStart = iStart + 32;
3527 }
3528
3529 /* whole dword. */
3530 if (iBitStart != iEnd)
3531 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3532
3533 /* bits in last dword. */
3534 if (iBitEnd & 31)
3535 {
3536 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3537 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3538 }
3539 }
3540 }
3541}
3542
3543
3544/**
3545 * Finds the first clear bit in a bitmap.
3546 *
3547 * @returns Index of the first zero bit.
3548 * @returns -1 if no clear bit was found.
3549 * @param pvBitmap Pointer to the bitmap.
3550 * @param cBits The number of bits in the bitmap. Multiple of 32.
3551 */
3552#if RT_INLINE_ASM_EXTERNAL
3553DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3554#else
3555DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3556{
3557 if (cBits)
3558 {
3559 int32_t iBit;
3560# if RT_INLINE_ASM_GNU_STYLE
3561 RTCCUINTREG uEAX, uECX, uEDI;
3562 cBits = RT_ALIGN_32(cBits, 32);
3563 __asm__ __volatile__("repe; scasl\n\t"
3564 "je 1f\n\t"
3565# ifdef __AMD64__
3566 "lea -4(%%rdi), %%rdi\n\t"
3567 "xorl (%%rdi), %%eax\n\t"
3568 "subq %5, %%rdi\n\t"
3569# else
3570 "lea -4(%%edi), %%edi\n\t"
3571 "xorl (%%edi), %%eax\n\t"
3572 "subl %5, %%edi\n\t"
3573# endif
3574 "shll $3, %%edi\n\t"
3575 "bsfl %%eax, %%edx\n\t"
3576 "addl %%edi, %%edx\n\t"
3577 "1:\t\n"
3578 : "=d" (iBit),
3579 "=&c" (uECX),
3580 "=&D" (uEDI),
3581 "=&a" (uEAX)
3582 : "0" (0xffffffff),
3583 "mr" (pvBitmap),
3584 "1" (cBits >> 5),
3585 "2" (pvBitmap),
3586 "3" (0xffffffff));
3587# else
3588 cBits = RT_ALIGN_32(cBits, 32);
3589 __asm
3590 {
3591# ifdef __AMD64__
3592 mov rdi, [pvBitmap]
3593 mov rbx, rdi
3594# else
3595 mov edi, [pvBitmap]
3596 mov ebx, edi
3597# endif
3598 mov edx, 0ffffffffh
3599 mov eax, edx
3600 mov ecx, [cBits]
3601 shr ecx, 5
3602 repe scasd
3603 je done
3604
3605# ifdef __AMD64__
3606 lea rdi, [rdi - 4]
3607 xor eax, [rdi]
3608 sub rdi, rbx
3609# else
3610 lea edi, [edi - 4]
3611 xor eax, [edi]
3612 sub edi, ebx
3613# endif
3614 shl edi, 3
3615 bsf edx, eax
3616 add edx, edi
3617 done:
3618 mov [iBit], edx
3619 }
3620# endif
3621 return iBit;
3622 }
3623 return -1;
3624}
3625#endif
3626
3627
3628/**
3629 * Finds the next clear bit in a bitmap.
3630 *
3631 * @returns Index of the first zero bit.
3632 * @returns -1 if no clear bit was found.
3633 * @param pvBitmap Pointer to the bitmap.
3634 * @param cBits The number of bits in the bitmap. Multiple of 32.
3635 * @param iBitPrev The bit returned from the last search.
3636 * The search will start at iBitPrev + 1.
3637 */
3638#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3639DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3640#else
3641DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3642{
3643 int iBit = ++iBitPrev & 31;
3644 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3645 cBits -= iBitPrev & ~31;
3646 if (iBit)
3647 {
3648 /* inspect the first dword. */
3649 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3650# if RT_INLINE_ASM_USES_INTRIN
3651 unsigned long ulBit = 0;
3652 if (_BitScanForward(&ulBit, u32))
3653 return ulBit + iBitPrev;
3654 iBit = -1;
3655# else
3656# if RT_INLINE_ASM_GNU_STYLE
3657 __asm__ __volatile__("bsf %1, %0\n\t"
3658 "jnz 1f\n\t"
3659 "movl $-1, %0\n\t"
3660 "1:\n\t"
3661 : "=r" (iBit)
3662 : "r" (u32));
3663# else
3664 __asm
3665 {
3666 mov edx, [u32]
3667 bsf eax, edx
3668 jnz done
3669 mov eax, 0ffffffffh
3670 done:
3671 mov [iBit], eax
3672 }
3673# endif
3674 if (iBit >= 0)
3675 return iBit + iBitPrev;
3676# endif
3677 /* Search the rest of the bitmap, if there is anything. */
3678 if (cBits > 32)
3679 {
3680 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3681 if (iBit >= 0)
3682 return iBit + (iBitPrev & ~31) + 32;
3683 }
3684 }
3685 else
3686 {
3687 /* Search the rest of the bitmap. */
3688 iBit = ASMBitFirstClear(pvBitmap, cBits);
3689 if (iBit >= 0)
3690 return iBit + (iBitPrev & ~31);
3691 }
3692 return iBit;
3693}
3694#endif
3695
3696
3697/**
3698 * Finds the first set bit in a bitmap.
3699 *
3700 * @returns Index of the first set bit.
3701 * @returns -1 if no clear bit was found.
3702 * @param pvBitmap Pointer to the bitmap.
3703 * @param cBits The number of bits in the bitmap. Multiple of 32.
3704 */
3705#if RT_INLINE_ASM_EXTERNAL
3706DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3707#else
3708DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3709{
3710 if (cBits)
3711 {
3712 int32_t iBit;
3713# if RT_INLINE_ASM_GNU_STYLE
3714 RTCCUINTREG uEAX, uECX, uEDI;
3715 cBits = RT_ALIGN_32(cBits, 32);
3716 __asm__ __volatile__("repe; scasl\n\t"
3717 "je 1f\n\t"
3718# ifdef __AMD64__
3719 "lea -4(%%rdi), %%rdi\n\t"
3720 "movl (%%rdi), %%eax\n\t"
3721 "subq %5, %%rdi\n\t"
3722# else
3723 "lea -4(%%edi), %%edi\n\t"
3724 "movl (%%edi), %%eax\n\t"
3725 "subl %5, %%edi\n\t"
3726# endif
3727 "shll $3, %%edi\n\t"
3728 "bsfl %%eax, %%edx\n\t"
3729 "addl %%edi, %%edx\n\t"
3730 "1:\t\n"
3731 : "=d" (iBit),
3732 "=&c" (uECX),
3733 "=&D" (uEDI),
3734 "=&a" (uEAX)
3735 : "0" (0xffffffff),
3736 "mr" (pvBitmap),
3737 "1" (cBits >> 5),
3738 "2" (pvBitmap),
3739 "3" (0));
3740# else
3741 cBits = RT_ALIGN_32(cBits, 32);
3742 __asm
3743 {
3744# ifdef __AMD64__
3745 mov rdi, [pvBitmap]
3746 mov rbx, rdi
3747# else
3748 mov edi, [pvBitmap]
3749 mov ebx, edi
3750# endif
3751 mov edx, 0ffffffffh
3752 xor eax, eax
3753 mov ecx, [cBits]
3754 shr ecx, 5
3755 repe scasd
3756 je done
3757# ifdef __AMD64__
3758 lea rdi, [rdi - 4]
3759 mov eax, [rdi]
3760 sub rdi, rbx
3761# else
3762 lea edi, [edi - 4]
3763 mov eax, [edi]
3764 sub edi, ebx
3765# endif
3766 shl edi, 3
3767 bsf edx, eax
3768 add edx, edi
3769 done:
3770 mov [iBit], edx
3771 }
3772# endif
3773 return iBit;
3774 }
3775 return -1;
3776}
3777#endif
3778
3779
3780/**
3781 * Finds the next set bit in a bitmap.
3782 *
3783 * @returns Index of the next set bit.
3784 * @returns -1 if no set bit was found.
3785 * @param pvBitmap Pointer to the bitmap.
3786 * @param cBits The number of bits in the bitmap. Multiple of 32.
3787 * @param iBitPrev The bit returned from the last search.
3788 * The search will start at iBitPrev + 1.
3789 */
3790#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3791DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3792#else
3793DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3794{
3795 int iBit = ++iBitPrev & 31;
3796 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3797 cBits -= iBitPrev & ~31;
3798 if (iBit)
3799 {
3800 /* inspect the first dword. */
3801 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3802# if RT_INLINE_ASM_USES_INTRIN
3803 unsigned long ulBit = 0;
3804 if (_BitScanForward(&ulBit, u32))
3805 return ulBit + iBitPrev;
3806 iBit = -1;
3807# else
3808# if RT_INLINE_ASM_GNU_STYLE
3809 __asm__ __volatile__("bsf %1, %0\n\t"
3810 "jnz 1f\n\t"
3811 "movl $-1, %0\n\t"
3812 "1:\n\t"
3813 : "=r" (iBit)
3814 : "r" (u32));
3815# else
3816 __asm
3817 {
3818 mov edx, u32
3819 bsf eax, edx
3820 jnz done
3821 mov eax, 0ffffffffh
3822 done:
3823 mov [iBit], eax
3824 }
3825# endif
3826 if (iBit >= 0)
3827 return iBit + iBitPrev;
3828# endif
3829 /* Search the rest of the bitmap, if there is anything. */
3830 if (cBits > 32)
3831 {
3832 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3833 if (iBit >= 0)
3834 return iBit + (iBitPrev & ~31) + 32;
3835 }
3836
3837 }
3838 else
3839 {
3840 /* Search the rest of the bitmap. */
3841 iBit = ASMBitFirstSet(pvBitmap, cBits);
3842 if (iBit >= 0)
3843 return iBit + (iBitPrev & ~31);
3844 }
3845 return iBit;
3846}
3847#endif
3848
3849
3850/**
3851 * Finds the first bit which is set in the given 32-bit integer.
3852 * Bits are numbered from 1 (least significant) to 32.
3853 *
3854 * @returns index [1..32] of the first set bit.
3855 * @returns 0 if all bits are cleared.
3856 * @param u32 Integer to search for set bits.
3857 * @remark Similar to ffs() in BSD.
3858 */
3859DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3860{
3861# if RT_INLINE_ASM_USES_INTRIN
3862 unsigned long iBit;
3863 if (_BitScanForward(&iBit, u32))
3864 iBit++;
3865 else
3866 iBit = 0;
3867# elif RT_INLINE_ASM_GNU_STYLE
3868 uint32_t iBit;
3869 __asm__ __volatile__("bsf %1, %0\n\t"
3870 "jnz 1f\n\t"
3871 "xorl %0, %0\n\t"
3872 "jmp 2f\n"
3873 "1:\n\t"
3874 "incl %0\n"
3875 "2:\n\t"
3876 : "=r" (iBit)
3877 : "rm" (u32));
3878# else
3879 uint32_t iBit;
3880 _asm
3881 {
3882 bsf eax, [u32]
3883 jnz found
3884 xor eax, eax
3885 jmp done
3886 found:
3887 inc eax
3888 done:
3889 mov [iBit], eax
3890 }
3891# endif
3892 return iBit;
3893}
3894
3895
3896/**
3897 * Finds the first bit which is set in the given 32-bit integer.
3898 * Bits are numbered from 1 (least significant) to 32.
3899 *
3900 * @returns index [1..32] of the first set bit.
3901 * @returns 0 if all bits are cleared.
3902 * @param i32 Integer to search for set bits.
3903 * @remark Similar to ffs() in BSD.
3904 */
3905DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
3906{
3907 return ASMBitFirstSetU32((uint32_t)i32);
3908}
3909
3910
3911/**
3912 * Finds the last bit which is set in the given 32-bit integer.
3913 * Bits are numbered from 1 (least significant) to 32.
3914 *
3915 * @returns index [1..32] of the last set bit.
3916 * @returns 0 if all bits are cleared.
3917 * @param u32 Integer to search for set bits.
3918 * @remark Similar to fls() in BSD.
3919 */
3920DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
3921{
3922# if RT_INLINE_ASM_USES_INTRIN
3923 unsigned long iBit;
3924 if (_BitScanReverse(&iBit, u32))
3925 iBit++;
3926 else
3927 iBit = 0;
3928# elif RT_INLINE_ASM_GNU_STYLE
3929 uint32_t iBit;
3930 __asm__ __volatile__("bsrl %1, %0\n\t"
3931 "jnz 1f\n\t"
3932 "xorl %0, %0\n\t"
3933 "jmp 2f\n"
3934 "1:\n\t"
3935 "incl %0\n"
3936 "2:\n\t"
3937 : "=r" (iBit)
3938 : "rm" (u32));
3939# else
3940 uint32_t iBit;
3941 _asm
3942 {
3943 bsr eax, [u32]
3944 jnz found
3945 xor eax, eax
3946 jmp done
3947 found:
3948 inc eax
3949 done:
3950 mov [iBit], eax
3951 }
3952# endif
3953 return iBit;
3954}
3955
3956
3957/**
3958 * Finds the last bit which is set in the given 32-bit integer.
3959 * Bits are numbered from 1 (least significant) to 32.
3960 *
3961 * @returns index [1..32] of the last set bit.
3962 * @returns 0 if all bits are cleared.
3963 * @param i32 Integer to search for set bits.
3964 * @remark Similar to fls() in BSD.
3965 */
3966DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
3967{
3968 return ASMBitLastSetS32((uint32_t)i32);
3969}
3970
3971
3972/**
3973 * Reverse the byte order of the given 32-bit integer.
3974 * @param u32 Integer
3975 */
3976DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
3977{
3978#if RT_INLINE_ASM_USES_INTRIN
3979 u32 = _byteswap_ulong(u32);
3980#elif RT_INLINE_ASM_GNU_STYLE
3981 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
3982#else
3983 _asm
3984 {
3985 mov eax, [u32]
3986 bswap eax
3987 mov [u32], eax
3988 }
3989#endif
3990 return u32;
3991}
3992
3993/** @} */
3994
3995
3996/** @} */
3997#endif
3998
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette