VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 1098

Last change on this file since 1098 was 1030, checked in by vboxsync, 18 years ago

just let the compiler decide

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 94.3 KB
Line 
1/** @file
2 * InnoTek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef __iprt_asm_h__
22#define __iprt_asm_h__
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26/** @todo #include <iprt/param.h> for PAGE_SIZE. */
27/** @def RT_INLINE_ASM_USES_INTRIN
28 * Defined as 1 if we're using a _MSC_VER 1400.
29 * Otherwise defined as 0.
30 */
31
32#ifdef _MSC_VER
33# if _MSC_VER >= 1400
34# define RT_INLINE_ASM_USES_INTRIN 1
35# include <intrin.h>
36 /* Emit the intrinsics at all optimization levels. */
37# pragma intrinsic(__cpuid)
38# pragma intrinsic(_enable)
39# pragma intrinsic(_disable)
40# pragma intrinsic(__rdtsc)
41# pragma intrinsic(__readmsr)
42# pragma intrinsic(__writemsr)
43# pragma intrinsic(__outbyte)
44# pragma intrinsic(__outword)
45# pragma intrinsic(__outdword)
46# pragma intrinsic(__inbyte)
47# pragma intrinsic(__inword)
48# pragma intrinsic(__indword)
49# pragma intrinsic(__invlpg)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(__readcr0)
54# pragma intrinsic(__readcr2)
55# pragma intrinsic(__readcr3)
56# pragma intrinsic(__readcr4)
57# pragma intrinsic(__writecr0)
58# pragma intrinsic(__writecr3)
59# pragma intrinsic(__writecr4)
60# pragma intrinsic(_BitScanForward)
61# pragma intrinsic(_BitScanReverse)
62# pragma intrinsic(_bittest)
63# pragma intrinsic(_bittestandset)
64# pragma intrinsic(_bittestandreset)
65# pragma intrinsic(_bittestandcomplement)
66# pragma intrinsic(_byteswap_ushort)
67# pragma intrinsic(_byteswap_ulong)
68# pragma intrinsic(_interlockedbittestandset)
69# pragma intrinsic(_interlockedbittestandreset)
70# pragma intrinsic(_InterlockedAnd)
71# pragma intrinsic(_InterlockedOr)
72# pragma intrinsic(_InterlockedIncrement)
73# pragma intrinsic(_InterlockedDecrement)
74# pragma intrinsic(_InterlockedExchange)
75# pragma intrinsic(_InterlockedCompareExchange)
76# pragma intrinsic(_InterlockedCompareExchange64)
77# ifdef __AMD64__
78# pragma intrinsic(__stosq)
79# pragma intrinsic(__readcr8)
80# pragma intrinsic(__writecr8)
81# pragma intrinsic(_byteswap_uint64)
82# pragma intrinsic(_InterlockedExchange64)
83# endif
84# endif
85#endif
86#ifndef RT_INLINE_ASM_USES_INTRIN
87# define RT_INLINE_ASM_USES_INTRIN 0
88#endif
89
90
91
92/** @defgroup grp_asm ASM - Assembly Routines
93 * @ingroup grp_rt
94 * @{
95 */
96
97/** @def RT_INLINE_ASM_EXTERNAL
98 * Defined as 1 if the compiler does not support inline assembly.
99 * The ASM* functions will then be implemented in an external .asm file.
100 *
101 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
102 * inline assmebly in their AMD64 compiler.
103 */
104#if defined(_MSC_VER) && defined(__AMD64__)
105# define RT_INLINE_ASM_EXTERNAL 1
106#else
107# define RT_INLINE_ASM_EXTERNAL 0
108#endif
109
110/** @def RT_INLINE_ASM_GNU_STYLE
111 * Defined as 1 if the compiler understand GNU style inline assembly.
112 */
113#if defined(_MSC_VER)
114# define RT_INLINE_ASM_GNU_STYLE 0
115#else
116# define RT_INLINE_ASM_GNU_STYLE 1
117#endif
118
119
120/** @todo find a more proper place for this structure? */
121#pragma pack(1)
122/** IDTR */
123typedef struct RTIDTR
124{
125 /** Size of the IDT. */
126 uint16_t cbIdt;
127 /** Address of the IDT. */
128 uintptr_t pIdt;
129} RTIDTR, *PRTIDTR;
130#pragma pack()
131
132#pragma pack(1)
133/** GDTR */
134typedef struct RTGDTR
135{
136 /** Size of the GDT. */
137 uint16_t cbGdt;
138 /** Address of the GDT. */
139 uintptr_t pGdt;
140} RTGDTR, *PRTGDTR;
141#pragma pack()
142
143
144/** @def ASMReturnAddress
145 * Gets the return address of the current (or calling if you like) function or method.
146 */
147#ifdef _MSC_VER
148# ifdef __cplusplus
149extern "C"
150# endif
151void * _ReturnAddress(void);
152# pragma intrinsic(_ReturnAddress)
153# define ASMReturnAddress() _ReturnAddress()
154#elif defined(__GNUC__) || defined(__DOXYGEN__)
155# define ASMReturnAddress() __builtin_return_address(0)
156#else
157# error "Unsupported compiler."
158#endif
159
160
161/**
162 * Gets the content of the IDTR CPU register.
163 * @param pIdtr Where to store the IDTR contents.
164 */
165#if RT_INLINE_ASM_EXTERNAL
166DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
167#else
168DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
169{
170# if RT_INLINE_ASM_GNU_STYLE
171 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
172# else
173 __asm
174 {
175# ifdef __AMD64__
176 mov rax, [pIdtr]
177 sidt [rax]
178# else
179 mov eax, [pIdtr]
180 sidt [eax]
181# endif
182 }
183# endif
184}
185#endif
186
187
188/**
189 * Sets the content of the IDTR CPU register.
190 * @param pIdtr Where to load the IDTR contents from
191 */
192#if RT_INLINE_ASM_EXTERNAL
193DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
194#else
195DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
196{
197# if RT_INLINE_ASM_GNU_STYLE
198 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
199# else
200 __asm
201 {
202# ifdef __AMD64__
203 mov rax, [pIdtr]
204 lidt [rax]
205# else
206 mov eax, [pIdtr]
207 lidt [eax]
208# endif
209 }
210# endif
211}
212#endif
213
214
215/**
216 * Gets the content of the GDTR CPU register.
217 * @param pGdtr Where to store the GDTR contents.
218 */
219#if RT_INLINE_ASM_EXTERNAL
220DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
221#else
222DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
223{
224# if RT_INLINE_ASM_GNU_STYLE
225 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
226# else
227 __asm
228 {
229# ifdef __AMD64__
230 mov rax, [pGdtr]
231 sgdt [rax]
232# else
233 mov eax, [pGdtr]
234 sgdt [eax]
235# endif
236 }
237# endif
238}
239#endif
240
241/**
242 * Get the cs register.
243 * @returns cs.
244 */
245#if RT_INLINE_ASM_EXTERNAL
246DECLASM(RTSEL) ASMGetCS(void);
247#else
248DECLINLINE(RTSEL) ASMGetCS(void)
249{
250 RTSEL SelCS;
251# if RT_INLINE_ASM_GNU_STYLE
252 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
253# else
254 __asm
255 {
256 mov ax, cs
257 mov [SelCS], ax
258 }
259# endif
260 return SelCS;
261}
262#endif
263
264
265/**
266 * Get the DS register.
267 * @returns DS.
268 */
269#if RT_INLINE_ASM_EXTERNAL
270DECLASM(RTSEL) ASMGetDS(void);
271#else
272DECLINLINE(RTSEL) ASMGetDS(void)
273{
274 RTSEL SelDS;
275# if RT_INLINE_ASM_GNU_STYLE
276 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
277# else
278 __asm
279 {
280 mov ax, ds
281 mov [SelDS], ax
282 }
283# endif
284 return SelDS;
285}
286#endif
287
288
289/**
290 * Get the ES register.
291 * @returns ES.
292 */
293#if RT_INLINE_ASM_EXTERNAL
294DECLASM(RTSEL) ASMGetES(void);
295#else
296DECLINLINE(RTSEL) ASMGetES(void)
297{
298 RTSEL SelES;
299# if RT_INLINE_ASM_GNU_STYLE
300 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
301# else
302 __asm
303 {
304 mov ax, es
305 mov [SelES], ax
306 }
307# endif
308 return SelES;
309}
310#endif
311
312
313/**
314 * Get the FS register.
315 * @returns FS.
316 */
317#if RT_INLINE_ASM_EXTERNAL
318DECLASM(RTSEL) ASMGetFS(void);
319#else
320DECLINLINE(RTSEL) ASMGetFS(void)
321{
322 RTSEL SelFS;
323# if RT_INLINE_ASM_GNU_STYLE
324 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
325# else
326 __asm
327 {
328 mov ax, fs
329 mov [SelFS], ax
330 }
331# endif
332 return SelFS;
333}
334# endif
335
336
337/**
338 * Get the GS register.
339 * @returns GS.
340 */
341#if RT_INLINE_ASM_EXTERNAL
342DECLASM(RTSEL) ASMGetGS(void);
343#else
344DECLINLINE(RTSEL) ASMGetGS(void)
345{
346 RTSEL SelGS;
347# if RT_INLINE_ASM_GNU_STYLE
348 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
349# else
350 __asm
351 {
352 mov ax, gs
353 mov [SelGS], ax
354 }
355# endif
356 return SelGS;
357}
358#endif
359
360
361/**
362 * Get the SS register.
363 * @returns SS.
364 */
365#if RT_INLINE_ASM_EXTERNAL
366DECLASM(RTSEL) ASMGetSS(void);
367#else
368DECLINLINE(RTSEL) ASMGetSS(void)
369{
370 RTSEL SelSS;
371# if RT_INLINE_ASM_GNU_STYLE
372 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
373# else
374 __asm
375 {
376 mov ax, ss
377 mov [SelSS], ax
378 }
379# endif
380 return SelSS;
381}
382#endif
383
384
385/**
386 * Get the TR register.
387 * @returns TR.
388 */
389#if RT_INLINE_ASM_EXTERNAL
390DECLASM(RTSEL) ASMGetTR(void);
391#else
392DECLINLINE(RTSEL) ASMGetTR(void)
393{
394 RTSEL SelTR;
395# if RT_INLINE_ASM_GNU_STYLE
396 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
397# else
398 __asm
399 {
400 str ax
401 mov [SelTR], ax
402 }
403# endif
404 return SelTR;
405}
406#endif
407
408
409/**
410 * Get the [RE]FLAGS register.
411 * @returns [RE]FLAGS.
412 */
413#if RT_INLINE_ASM_EXTERNAL
414DECLASM(RTCCUINTREG) ASMGetFlags(void);
415#else
416DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
417{
418 RTCCUINTREG uFlags;
419# if RT_INLINE_ASM_GNU_STYLE
420# ifdef __AMD64__
421 __asm__ __volatile__("pushfq\n\t"
422 "popq %0\n\t"
423 : "=m" (uFlags));
424# else
425 __asm__ __volatile__("pushfl\n\t"
426 "popl %0\n\t"
427 : "=m" (uFlags));
428# endif
429# else
430 __asm
431 {
432# ifdef __AMD64__
433 pushfq
434 pop [uFlags]
435# else
436 pushfd
437 pop [uFlags]
438# endif
439 }
440# endif
441 return uFlags;
442}
443#endif
444
445
446/**
447 * Set the [RE]FLAGS register.
448 * @param uFlags The new [RE]FLAGS value.
449 */
450#if RT_INLINE_ASM_EXTERNAL
451DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
452#else
453DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
454{
455# if RT_INLINE_ASM_GNU_STYLE
456# ifdef __AMD64__
457 __asm__ __volatile__("pushq %0\n\t"
458 "popfq\n\t"
459 : : "m" (uFlags));
460# else
461 __asm__ __volatile__("pushl %0\n\t"
462 "popfl\n\t"
463 : : "m" (uFlags));
464# endif
465# else
466 __asm
467 {
468# ifdef __AMD64__
469 push [uFlags]
470 popfq
471# else
472 push [uFlags]
473 popfd
474# endif
475 }
476# endif
477}
478#endif
479
480
481/**
482 * Gets the content of the CPU timestamp counter register.
483 *
484 * @returns TSC.
485 */
486#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
487DECLASM(uint64_t) ASMReadTSC(void);
488#else
489DECLINLINE(uint64_t) ASMReadTSC(void)
490{
491 RTUINT64U u;
492# if RT_INLINE_ASM_GNU_STYLE
493 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
494# else
495# if RT_INLINE_ASM_USES_INTRIN
496 u.u = __rdtsc();
497# else
498 __asm
499 {
500 rdtsc
501 mov [u.s.Lo], eax
502 mov [u.s.Hi], edx
503 }
504# endif
505# endif
506 return u.u;
507}
508#endif
509
510
511/**
512 * Performs the cpuid instruction returning all registers.
513 *
514 * @param uOperator CPUID operation (eax).
515 * @param pvEAX Where to store eax.
516 * @param pvEBX Where to store ebx.
517 * @param pvECX Where to store ecx.
518 * @param pvEDX Where to store edx.
519 * @remark We're using void pointers to ease the use of special bitfield structures and such.
520 */
521#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
522DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
523#else
524DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
525{
526# if RT_INLINE_ASM_GNU_STYLE
527# ifdef __AMD64__
528 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
529 __asm__ ("cpuid\n\t"
530 : "=a" (uRAX),
531 "=b" (uRBX),
532 "=c" (uRCX),
533 "=d" (uRDX)
534 : "0" (uOperator));
535 *(uint32_t *)pvEAX = (uint32_t)uRAX;
536 *(uint32_t *)pvEBX = (uint32_t)uRBX;
537 *(uint32_t *)pvECX = (uint32_t)uRCX;
538 *(uint32_t *)pvEDX = (uint32_t)uRDX;
539# else
540 __asm__ ("xchgl %%ebx, %1\n\t"
541 "cpuid\n\t"
542 "xchgl %%ebx, %1\n\t"
543 : "=a" (*(uint32_t *)pvEAX),
544 "=r" (*(uint32_t *)pvEBX),
545 "=c" (*(uint32_t *)pvECX),
546 "=d" (*(uint32_t *)pvEDX)
547 : "0" (uOperator));
548# endif
549
550# elif RT_INLINE_ASM_USES_INTRIN
551 int aInfo[4];
552 __cpuid(aInfo, uOperator);
553 *(uint32_t *)pvEAX = aInfo[0];
554 *(uint32_t *)pvEBX = aInfo[1];
555 *(uint32_t *)pvECX = aInfo[2];
556 *(uint32_t *)pvEDX = aInfo[3];
557
558# else
559 uint32_t uEAX;
560 uint32_t uEBX;
561 uint32_t uECX;
562 uint32_t uEDX;
563 __asm
564 {
565 push ebx
566 mov eax, [uOperator]
567 cpuid
568 mov [uEAX], eax
569 mov [uEBX], ebx
570 mov [uECX], ecx
571 mov [uEDX], edx
572 pop ebx
573 }
574 *(uint32_t *)pvEAX = uEAX;
575 *(uint32_t *)pvEBX = uEBX;
576 *(uint32_t *)pvECX = uECX;
577 *(uint32_t *)pvEDX = uEDX;
578# endif
579}
580#endif
581
582
583/**
584 * Performs the cpuid instruction returning ecx and edx.
585 *
586 * @param uOperator CPUID operation (eax).
587 * @param pvECX Where to store ecx.
588 * @param pvEDX Where to store edx.
589 * @remark We're using void pointers to ease the use of special bitfield structures and such.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
593#else
594DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
595{
596 uint32_t uEBX;
597 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
598}
599#endif
600
601
602/**
603 * Performs the cpuid instruction returning edx.
604 *
605 * @param uOperator CPUID operation (eax).
606 * @returns EDX after cpuid operation.
607 */
608#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
609DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
610#else
611DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
612{
613 RTCCUINTREG xDX;
614# if RT_INLINE_ASM_GNU_STYLE
615# ifdef __AMD64__
616 RTCCUINTREG uSpill;
617 __asm__ ("cpuid"
618 : "=a" (uSpill),
619 "=d" (xDX)
620 : "0" (uOperator)
621 : "rbx", "rcx");
622# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: PIC by default. */
623 __asm__ ("push %%ebx\n\t"
624 "cpuid\n\t"
625 "pop %%ebx\n\t"
626 : "=a" (uOperator),
627 "=d" (xDX)
628 : "0" (uOperator)
629 : "ecx");
630# else
631 __asm__ ("cpuid"
632 : "=a" (uOperator),
633 "=d" (xDX)
634 : "0" (uOperator)
635 : "ebx", "ecx");
636# endif
637
638# elif RT_INLINE_ASM_USES_INTRIN
639 int aInfo[4];
640 __cpuid(aInfo, uOperator);
641 xDX = aInfo[3];
642
643# else
644 __asm
645 {
646 push ebx
647 mov eax, [uOperator]
648 cpuid
649 mov [xDX], edx
650 pop ebx
651 }
652# endif
653 return (uint32_t)xDX;
654}
655#endif
656
657
658/**
659 * Performs the cpuid instruction returning ecx.
660 *
661 * @param uOperator CPUID operation (eax).
662 * @returns ECX after cpuid operation.
663 */
664#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
665DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
666#else
667DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
668{
669 RTCCUINTREG xCX;
670# if RT_INLINE_ASM_GNU_STYLE
671# ifdef __AMD64__
672 RTCCUINTREG uSpill;
673 __asm__ ("cpuid"
674 : "=a" (uSpill),
675 "=c" (xCX)
676 : "0" (uOperator)
677 : "rbx", "rdx");
678# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
679 __asm__ ("push %%ebx\n\t"
680 "cpuid\n\t"
681 "pop %%ebx\n\t"
682 : "=a" (uOperator),
683 "=c" (xCX)
684 : "0" (uOperator)
685 : "edx");
686# else
687 __asm__ ("cpuid"
688 : "=a" (uOperator),
689 "=c" (xCX)
690 : "0" (uOperator)
691 : "ebx", "edx");
692
693# endif
694
695# elif RT_INLINE_ASM_USES_INTRIN
696 int aInfo[4];
697 __cpuid(aInfo, uOperator);
698 xCX = aInfo[2];
699
700# else
701 __asm
702 {
703 push ebx
704 mov eax, [uOperator]
705 cpuid
706 mov [xCX], ecx
707 pop ebx
708 }
709# endif
710 return (uint32_t)xCX;
711}
712#endif
713
714
715/**
716 * Checks if the current CPU supports CPUID.
717 *
718 * @returns true if CPUID is supported.
719 */
720DECLINLINE(bool) ASMHasCpuId(void)
721{
722#ifdef __AMD64__
723 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
724#else /* !__AMD64__ */
725 bool fRet = false;
726# if RT_INLINE_ASM_GNU_STYLE
727 uint32_t u1;
728 uint32_t u2;
729 __asm__ ("pushf\n\t"
730 "pop %1\n\t"
731 "mov %1, %2\n\t"
732 "xorl $0x200000, %1\n\t"
733 "push %1\n\t"
734 "popf\n\t"
735 "pushf\n\t"
736 "pop %1\n\t"
737 "cmpl %1, %2\n\t"
738 "setne %0\n\t"
739 "push %2\n\t"
740 "popf\n\t"
741 : "=m" (fRet), "=r" (u1), "=r" (u2));
742# else
743 __asm
744 {
745 pushfd
746 pop eax
747 mov ebx, eax
748 xor eax, 0200000h
749 push eax
750 popfd
751 pushfd
752 pop eax
753 cmp eax, ebx
754 setne fRet
755 push ebx
756 popfd
757 }
758# endif
759 return fRet;
760#endif /* !__AMD64__ */
761}
762
763
764/**
765 * Gets the APIC ID of the current CPU.
766 *
767 * @returns the APIC ID.
768 */
769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
770DECLASM(uint8_t) ASMGetApicId(void);
771#else
772DECLINLINE(uint8_t) ASMGetApicId(void)
773{
774 RTCCUINTREG xBX;
775# if RT_INLINE_ASM_GNU_STYLE
776# ifdef __AMD64__
777 RTCCUINTREG uSpill;
778 __asm__ ("cpuid"
779 : "=a" (uSpill),
780 "=b" (xBX)
781 : "0" (1)
782 : "rcx", "rdx");
783# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__)
784 RTCCUINTREG uSpill;
785 __asm__ ("mov %%ebx,%1\n\t"
786 "cpuid\n\t"
787 "xchgl %%ebx,%1\n\t"
788 : "=a" (uSpill),
789 "=r" (xBX)
790 : "0" (1)
791 : "ecx", "edx");
792# else
793 RTCCUINTREG uSpill;
794 __asm__ ("cpuid"
795 : "=a" (uSpill),
796 "=b" (xBX)
797 : "0" (1)
798 : "ecx", "edx");
799# endif
800
801# elif RT_INLINE_ASM_USES_INTRIN
802 int aInfo[4];
803 __cpuid(aInfo, 1);
804 xBX = aInfo[1];
805
806# else
807 __asm
808 {
809 push ebx
810 mov eax, 1
811 cpuid
812 mov [xBX], ebx
813 pop ebx
814 }
815# endif
816 return (uint8_t)(xBX >> 24);
817}
818#endif
819
820/**
821 * Get cr0.
822 * @returns cr0.
823 */
824#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
825DECLASM(RTCCUINTREG) ASMGetCR0(void);
826#else
827DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
828{
829 RTCCUINTREG uCR0;
830# if RT_INLINE_ASM_USES_INTRIN
831 uCR0 = __readcr0();
832
833# elif RT_INLINE_ASM_GNU_STYLE
834# ifdef __AMD64__
835 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
836# else
837 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
838# endif
839# else
840 __asm
841 {
842# ifdef __AMD64__
843 mov rax, cr0
844 mov [uCR0], rax
845# else
846 mov eax, cr0
847 mov [uCR0], eax
848# endif
849 }
850# endif
851 return uCR0;
852}
853#endif
854
855
856/**
857 * Sets the CR0 register.
858 * @param uCR0 The new CR0 value.
859 */
860#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
861DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
862#else
863DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
864{
865# if RT_INLINE_ASM_USES_INTRIN
866 __writecr0(uCR0);
867
868# elif RT_INLINE_ASM_GNU_STYLE
869# ifdef __AMD64__
870 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
871# else
872 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
873# endif
874# else
875 __asm
876 {
877# ifdef __AMD64__
878 mov rax, [uCR0]
879 mov cr0, rax
880# else
881 mov eax, [uCR0]
882 mov cr0, eax
883# endif
884 }
885# endif
886}
887#endif
888
889
890/**
891 * Get cr2.
892 * @returns cr2.
893 */
894#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
895DECLASM(RTCCUINTREG) ASMGetCR2(void);
896#else
897DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
898{
899 RTCCUINTREG uCR2;
900# if RT_INLINE_ASM_USES_INTRIN
901 uCR2 = __readcr2();
902
903# elif RT_INLINE_ASM_GNU_STYLE
904# ifdef __AMD64__
905 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
906# else
907 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
908# endif
909# else
910 __asm
911 {
912# ifdef __AMD64__
913 mov rax, cr2
914 mov [uCR2], rax
915# else
916 mov eax, cr2
917 mov [uCR2], eax
918# endif
919 }
920# endif
921 return uCR2;
922}
923#endif
924
925
926/**
927 * Sets the CR2 register.
928 * @param uCR2 The new CR0 value.
929 */
930#if RT_INLINE_ASM_EXTERNAL
931DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
932#else
933DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
934{
935# if RT_INLINE_ASM_GNU_STYLE
936# ifdef __AMD64__
937 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
938# else
939 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
940# endif
941# else
942 __asm
943 {
944# ifdef __AMD64__
945 mov rax, [uCR2]
946 mov cr2, rax
947# else
948 mov eax, [uCR2]
949 mov cr2, eax
950# endif
951 }
952# endif
953}
954#endif
955
956
957/**
958 * Get cr3.
959 * @returns cr3.
960 */
961#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
962DECLASM(RTCCUINTREG) ASMGetCR3(void);
963#else
964DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
965{
966 RTCCUINTREG uCR3;
967# if RT_INLINE_ASM_USES_INTRIN
968 uCR3 = __readcr3();
969
970# elif RT_INLINE_ASM_GNU_STYLE
971# ifdef __AMD64__
972 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
973# else
974 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
975# endif
976# else
977 __asm
978 {
979# ifdef __AMD64__
980 mov rax, cr3
981 mov [uCR3], rax
982# else
983 mov eax, cr3
984 mov [uCR3], eax
985# endif
986 }
987# endif
988 return uCR3;
989}
990#endif
991
992
993/**
994 * Sets the CR3 register.
995 *
996 * @param uCR3 New CR3 value.
997 */
998#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
999DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1000#else
1001DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1002{
1003# if RT_INLINE_ASM_USES_INTRIN
1004 __writecr3(uCR3);
1005
1006# elif RT_INLINE_ASM_GNU_STYLE
1007# ifdef __AMD64__
1008 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1009# else
1010 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1011# endif
1012# else
1013 __asm
1014 {
1015# ifdef __AMD64__
1016 mov rax, [uCR3]
1017 mov cr3, rax
1018# else
1019 mov eax, [uCR3]
1020 mov cr3, eax
1021# endif
1022 }
1023# endif
1024}
1025#endif
1026
1027
1028/**
1029 * Reloads the CR3 register.
1030 */
1031#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1032DECLASM(void) ASMReloadCR3(void);
1033#else
1034DECLINLINE(void) ASMReloadCR3(void)
1035{
1036# if RT_INLINE_ASM_USES_INTRIN
1037 __writecr3(__readcr3());
1038
1039# elif RT_INLINE_ASM_GNU_STYLE
1040 RTCCUINTREG u;
1041# ifdef __AMD64__
1042 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1043 "movq %0, %%cr3\n\t"
1044 : "=r" (u));
1045# else
1046 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1047 "movl %0, %%cr3\n\t"
1048 : "=r" (u));
1049# endif
1050# else
1051 __asm
1052 {
1053# ifdef __AMD64__
1054 mov rax, cr3
1055 mov cr3, rax
1056# else
1057 mov eax, cr3
1058 mov cr3, eax
1059# endif
1060 }
1061# endif
1062}
1063#endif
1064
1065
1066/**
1067 * Get cr4.
1068 * @returns cr4.
1069 */
1070#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1071DECLASM(RTCCUINTREG) ASMGetCR4(void);
1072#else
1073DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1074{
1075 RTCCUINTREG uCR4;
1076# if RT_INLINE_ASM_USES_INTRIN
1077 uCR4 = __readcr4();
1078
1079# elif RT_INLINE_ASM_GNU_STYLE
1080# ifdef __AMD64__
1081 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1082# else
1083 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1084# endif
1085# else
1086 __asm
1087 {
1088# ifdef __AMD64__
1089 mov rax, cr4
1090 mov [uCR4], rax
1091# else
1092 push eax /* just in case */
1093 /*mov eax, cr4*/
1094 _emit 0x0f
1095 _emit 0x20
1096 _emit 0xe0
1097 mov [uCR4], eax
1098 pop eax
1099# endif
1100 }
1101# endif
1102 return uCR4;
1103}
1104#endif
1105
1106
1107/**
1108 * Sets the CR4 register.
1109 *
1110 * @param uCR4 New CR4 value.
1111 */
1112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1113DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1114#else
1115DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1116{
1117# if RT_INLINE_ASM_USES_INTRIN
1118 __writecr4(uCR4);
1119
1120# elif RT_INLINE_ASM_GNU_STYLE
1121# ifdef __AMD64__
1122 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1123# else
1124 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1125# endif
1126# else
1127 __asm
1128 {
1129# ifdef __AMD64__
1130 mov rax, [uCR4]
1131 mov cr4, rax
1132# else
1133 mov eax, [uCR4]
1134 _emit 0x0F
1135 _emit 0x22
1136 _emit 0xE0 /* mov cr4, eax */
1137# endif
1138 }
1139# endif
1140}
1141#endif
1142
1143
1144/**
1145 * Get cr8.
1146 * @returns cr8.
1147 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1148 */
1149#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1150DECLASM(RTCCUINTREG) ASMGetCR8(void);
1151#else
1152DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1153{
1154# ifdef __AMD64__
1155 RTCCUINTREG uCR8;
1156# if RT_INLINE_ASM_USES_INTRIN
1157 uCR8 = __readcr8();
1158
1159# elif RT_INLINE_ASM_GNU_STYLE
1160 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1161# else
1162 __asm
1163 {
1164 mov rax, cr8
1165 mov [uCR8], rax
1166 }
1167# endif
1168 return uCR8;
1169# else /* !__AMD64__ */
1170 return 0;
1171# endif /* !__AMD64__ */
1172}
1173#endif
1174
1175
1176/**
1177 * Enables interrupts (EFLAGS.IF).
1178 */
1179#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1180DECLASM(void) ASMIntEnable(void);
1181#else
1182DECLINLINE(void) ASMIntEnable(void)
1183{
1184# if RT_INLINE_ASM_GNU_STYLE
1185 __asm("sti\n");
1186# elif RT_INLINE_ASM_USES_INTRIN
1187 _enable();
1188# else
1189 __asm sti
1190# endif
1191}
1192#endif
1193
1194
1195/**
1196 * Disables interrupts (!EFLAGS.IF).
1197 */
1198#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1199DECLASM(void) ASMIntDisable(void);
1200#else
1201DECLINLINE(void) ASMIntDisable(void)
1202{
1203# if RT_INLINE_ASM_GNU_STYLE
1204 __asm("cli\n");
1205# elif RT_INLINE_ASM_USES_INTRIN
1206 _disable();
1207# else
1208 __asm cli
1209# endif
1210}
1211#endif
1212
1213
1214/**
1215 * Reads a machine specific register.
1216 *
1217 * @returns Register content.
1218 * @param uRegister Register to read.
1219 */
1220#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1221DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1222#else
1223DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1224{
1225 RTUINT64U u;
1226# if RT_INLINE_ASM_GNU_STYLE
1227 __asm__ ("rdmsr\n\t"
1228 : "=a" (u.s.Lo),
1229 "=d" (u.s.Hi)
1230 : "c" (uRegister));
1231
1232# elif RT_INLINE_ASM_USES_INTRIN
1233 u.u = __readmsr(uRegister);
1234
1235# else
1236 __asm
1237 {
1238 mov ecx, [uRegister]
1239 rdmsr
1240 mov [u.s.Lo], eax
1241 mov [u.s.Hi], edx
1242 }
1243# endif
1244
1245 return u.u;
1246}
1247#endif
1248
1249
1250/**
1251 * Writes a machine specific register.
1252 *
1253 * @returns Register content.
1254 * @param uRegister Register to write to.
1255 * @param u64Val Value to write.
1256 */
1257#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1258DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1259#else
1260DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1261{
1262 RTUINT64U u;
1263
1264 u.u = u64Val;
1265# if RT_INLINE_ASM_GNU_STYLE
1266 __asm__ __volatile__("wrmsr\n\t"
1267 ::"a" (u.s.Lo),
1268 "d" (u.s.Hi),
1269 "c" (uRegister));
1270
1271# elif RT_INLINE_ASM_USES_INTRIN
1272 __writemsr(uRegister, u.u);
1273
1274# else
1275 __asm
1276 {
1277 mov ecx, [uRegister]
1278 mov edx, [u.s.Hi]
1279 mov eax, [u.s.Lo]
1280 wrmsr
1281 }
1282# endif
1283}
1284#endif
1285
1286
1287/**
1288 * Reads low part of a machine specific register.
1289 *
1290 * @returns Register content.
1291 * @param uRegister Register to read.
1292 */
1293#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1294DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1295#else
1296DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1297{
1298 uint32_t u32;
1299# if RT_INLINE_ASM_GNU_STYLE
1300 __asm__ ("rdmsr\n\t"
1301 : "=a" (u32)
1302 : "c" (uRegister)
1303 : "edx");
1304
1305# elif RT_INLINE_ASM_USES_INTRIN
1306 u32 = (uint32_t)__readmsr(uRegister);
1307
1308#else
1309 __asm
1310 {
1311 mov ecx, [uRegister]
1312 rdmsr
1313 mov [u32], eax
1314 }
1315# endif
1316
1317 return u32;
1318}
1319#endif
1320
1321
1322/**
1323 * Reads high part of a machine specific register.
1324 *
1325 * @returns Register content.
1326 * @param uRegister Register to read.
1327 */
1328#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1329DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1330#else
1331DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1332{
1333 uint32_t u32;
1334# if RT_INLINE_ASM_GNU_STYLE
1335 __asm__ ("rdmsr\n\t"
1336 : "=d" (u32)
1337 : "c" (uRegister)
1338 : "eax");
1339
1340# elif RT_INLINE_ASM_USES_INTRIN
1341 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1342
1343# else
1344 __asm
1345 {
1346 mov ecx, [uRegister]
1347 rdmsr
1348 mov [u32], edx
1349 }
1350# endif
1351
1352 return u32;
1353}
1354#endif
1355
1356
1357/**
1358 * Gets dr7.
1359 *
1360 * @returns dr7.
1361 */
1362#if RT_INLINE_ASM_EXTERNAL
1363DECLASM(RTCCUINTREG) ASMGetDR7(void);
1364#else
1365DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1366{
1367 RTCCUINTREG uDR7;
1368# if RT_INLINE_ASM_GNU_STYLE
1369# ifdef __AMD64__
1370 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1371# else
1372 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1373# endif
1374# else
1375 __asm
1376 {
1377# ifdef __AMD64__
1378 mov rax, dr7
1379 mov [uDR7], rax
1380# else
1381 mov eax, dr7
1382 mov [uDR7], eax
1383# endif
1384 }
1385# endif
1386 return uDR7;
1387}
1388#endif
1389
1390
1391/**
1392 * Gets dr6.
1393 *
1394 * @returns dr6.
1395 */
1396#if RT_INLINE_ASM_EXTERNAL
1397DECLASM(RTCCUINTREG) ASMGetDR6(void);
1398#else
1399DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1400{
1401 RTCCUINTREG uDR6;
1402# if RT_INLINE_ASM_GNU_STYLE
1403# ifdef __AMD64__
1404 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1405# else
1406 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1407# endif
1408# else
1409 __asm
1410 {
1411# ifdef __AMD64__
1412 mov rax, dr6
1413 mov [uDR6], rax
1414# else
1415 mov eax, dr6
1416 mov [uDR6], eax
1417# endif
1418 }
1419# endif
1420 return uDR6;
1421}
1422#endif
1423
1424
1425/**
1426 * Reads and clears DR6.
1427 *
1428 * @returns DR6.
1429 */
1430#if RT_INLINE_ASM_EXTERNAL
1431DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1432#else
1433DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1434{
1435 RTCCUINTREG uDR6;
1436# if RT_INLINE_ASM_GNU_STYLE
1437 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1438# ifdef __AMD64__
1439 __asm__ ("movq %%dr6, %0\n\t"
1440 "movq %1, %%dr6\n\t"
1441 : "=r" (uDR6)
1442 : "r" (uNewValue));
1443# else
1444 __asm__ ("movl %%dr6, %0\n\t"
1445 "movl %1, %%dr6\n\t"
1446 : "=r" (uDR6)
1447 : "r" (uNewValue));
1448# endif
1449# else
1450 __asm
1451 {
1452# ifdef __AMD64__
1453 mov rax, dr6
1454 mov [uDR6], rax
1455 mov rcx, rax
1456 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1457 mov dr6, rcx
1458# else
1459 mov eax, dr6
1460 mov [uDR6], eax
1461 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1462 mov dr6, ecx
1463# endif
1464 }
1465# endif
1466 return uDR6;
1467}
1468#endif
1469
1470
1471/** @deprecated */
1472#define ASMOutB(p, b) ASMOutU8(p,b)
1473/** @deprecated */
1474#define ASMInB(p) ASMInU8(p)
1475
1476/**
1477 * Writes a 8-bit unsigned integer to an I/O port.
1478 *
1479 * @param Port I/O port to read from.
1480 * @param u8 8-bit integer to write.
1481 */
1482#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1483DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1484#else
1485DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1486{
1487# if RT_INLINE_ASM_GNU_STYLE
1488 __asm__ __volatile__("outb %b1, %w0\n\t"
1489 :: "Nd" (Port),
1490 "a" (u8));
1491
1492# elif RT_INLINE_ASM_USES_INTRIN
1493 __outbyte(Port, u8);
1494
1495# else
1496 __asm
1497 {
1498 mov dx, [Port]
1499 mov al, [u8]
1500 out dx, al
1501 }
1502# endif
1503}
1504#endif
1505
1506
1507/**
1508 * Gets a 8-bit unsigned integer from an I/O port.
1509 *
1510 * @returns 8-bit integer.
1511 * @param Port I/O port to read from.
1512 */
1513#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1514DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1515#else
1516DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1517{
1518 uint8_t u8;
1519# if RT_INLINE_ASM_GNU_STYLE
1520 __asm__ __volatile__("inb %w1, %b0\n\t"
1521 : "=a" (u8)
1522 : "Nd" (Port));
1523
1524# elif RT_INLINE_ASM_USES_INTRIN
1525 u8 = __inbyte(Port);
1526
1527# else
1528 __asm
1529 {
1530 mov dx, [Port]
1531 in al, dx
1532 mov [u8], al
1533 }
1534# endif
1535 return u8;
1536}
1537#endif
1538
1539
1540/**
1541 * Writes a 16-bit unsigned integer to an I/O port.
1542 *
1543 * @param Port I/O port to read from.
1544 * @param u16 16-bit integer to write.
1545 */
1546#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1547DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1548#else
1549DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1550{
1551# if RT_INLINE_ASM_GNU_STYLE
1552 __asm__ __volatile__("outw %w1, %w0\n\t"
1553 :: "Nd" (Port),
1554 "a" (u16));
1555
1556# elif RT_INLINE_ASM_USES_INTRIN
1557 __outword(Port, u16);
1558
1559# else
1560 __asm
1561 {
1562 mov dx, [Port]
1563 mov ax, [u16]
1564 out dx, ax
1565 }
1566# endif
1567}
1568#endif
1569
1570
1571/**
1572 * Gets a 16-bit unsigned integer from an I/O port.
1573 *
1574 * @returns 16-bit integer.
1575 * @param Port I/O port to read from.
1576 */
1577#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1578DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1579#else
1580DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1581{
1582 uint16_t u16;
1583# if RT_INLINE_ASM_GNU_STYLE
1584 __asm__ __volatile__("inw %w1, %w0\n\t"
1585 : "=a" (u16)
1586 : "Nd" (Port));
1587
1588# elif RT_INLINE_ASM_USES_INTRIN
1589 u16 = __inword(Port);
1590
1591# else
1592 __asm
1593 {
1594 mov dx, [Port]
1595 in ax, dx
1596 mov [u16], ax
1597 }
1598# endif
1599 return u16;
1600}
1601#endif
1602
1603
1604/**
1605 * Writes a 32-bit unsigned integer to an I/O port.
1606 *
1607 * @param Port I/O port to read from.
1608 * @param u32 32-bit integer to write.
1609 */
1610#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1611DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1612#else
1613DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1614{
1615# if RT_INLINE_ASM_GNU_STYLE
1616 __asm__ __volatile__("outl %1, %w0\n\t"
1617 :: "Nd" (Port),
1618 "a" (u32));
1619
1620# elif RT_INLINE_ASM_USES_INTRIN
1621 __outdword(Port, u32);
1622
1623# else
1624 __asm
1625 {
1626 mov dx, [Port]
1627 mov eax, [u32]
1628 out dx, eax
1629 }
1630# endif
1631}
1632#endif
1633
1634
1635/**
1636 * Gets a 32-bit unsigned integer from an I/O port.
1637 *
1638 * @returns 32-bit integer.
1639 * @param Port I/O port to read from.
1640 */
1641#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1642DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1643#else
1644DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1645{
1646 uint32_t u32;
1647# if RT_INLINE_ASM_GNU_STYLE
1648 __asm__ __volatile__("inl %w1, %0\n\t"
1649 : "=a" (u32)
1650 : "Nd" (Port));
1651
1652# elif RT_INLINE_ASM_USES_INTRIN
1653 u32 = __indword(Port);
1654
1655# else
1656 __asm
1657 {
1658 mov dx, [Port]
1659 in eax, dx
1660 mov [u32], eax
1661 }
1662# endif
1663 return u32;
1664}
1665#endif
1666
1667
1668/**
1669 * Atomically Exchange an unsigned 8-bit value.
1670 *
1671 * @returns Current *pu8 value
1672 * @param pu8 Pointer to the 8-bit variable to update.
1673 * @param u8 The 8-bit value to assign to *pu8.
1674 */
1675#if RT_INLINE_ASM_EXTERNAL
1676DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1677#else
1678DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1679{
1680# if RT_INLINE_ASM_GNU_STYLE
1681 __asm__ __volatile__("xchgb %0, %1\n\t"
1682 : "=m" (*pu8),
1683 "=r" (u8)
1684 : "1" (u8));
1685# else
1686 __asm
1687 {
1688# ifdef __AMD64__
1689 mov rdx, [pu8]
1690 mov al, [u8]
1691 xchg [rdx], al
1692 mov [u8], al
1693# else
1694 mov edx, [pu8]
1695 mov al, [u8]
1696 xchg [edx], al
1697 mov [u8], al
1698# endif
1699 }
1700# endif
1701 return u8;
1702}
1703#endif
1704
1705
1706/**
1707 * Atomically Exchange a signed 8-bit value.
1708 *
1709 * @returns Current *pu8 value
1710 * @param pi8 Pointer to the 8-bit variable to update.
1711 * @param i8 The 8-bit value to assign to *pi8.
1712 */
1713DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1714{
1715 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1716}
1717
1718
1719/**
1720 * Atomically Exchange an unsigned 16-bit value.
1721 *
1722 * @returns Current *pu16 value
1723 * @param pu16 Pointer to the 16-bit variable to update.
1724 * @param u16 The 16-bit value to assign to *pu16.
1725 */
1726#if RT_INLINE_ASM_EXTERNAL
1727DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1728#else
1729DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1730{
1731# if RT_INLINE_ASM_GNU_STYLE
1732 __asm__ __volatile__("xchgw %0, %1\n\t"
1733 : "=m" (*pu16),
1734 "=r" (u16)
1735 : "1" (u16));
1736# else
1737 __asm
1738 {
1739# ifdef __AMD64__
1740 mov rdx, [pu16]
1741 mov ax, [u16]
1742 xchg [rdx], ax
1743 mov [u16], ax
1744# else
1745 mov edx, [pu16]
1746 mov ax, [u16]
1747 xchg [edx], ax
1748 mov [u16], ax
1749# endif
1750 }
1751# endif
1752 return u16;
1753}
1754#endif
1755
1756
1757/**
1758 * Atomically Exchange a signed 16-bit value.
1759 *
1760 * @returns Current *pu16 value
1761 * @param pi16 Pointer to the 16-bit variable to update.
1762 * @param i16 The 16-bit value to assign to *pi16.
1763 */
1764DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1765{
1766 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1767}
1768
1769
1770/**
1771 * Atomically Exchange an unsigned 32-bit value.
1772 *
1773 * @returns Current *pu32 value
1774 * @param pu32 Pointer to the 32-bit variable to update.
1775 * @param u32 The 32-bit value to assign to *pu32.
1776 */
1777#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1778DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1779#else
1780DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1781{
1782# if RT_INLINE_ASM_GNU_STYLE
1783 __asm__ __volatile__("xchgl %0, %1\n\t"
1784 : "=m" (*pu32),
1785 "=r" (u32)
1786 : "1" (u32));
1787
1788# elif RT_INLINE_ASM_USES_INTRIN
1789 u32 = _InterlockedExchange((long *)pu32, u32);
1790
1791# else
1792 __asm
1793 {
1794# ifdef __AMD64__
1795 mov rdx, [pu32]
1796 mov eax, u32
1797 xchg [rdx], eax
1798 mov [u32], eax
1799# else
1800 mov edx, [pu32]
1801 mov eax, u32
1802 xchg [edx], eax
1803 mov [u32], eax
1804# endif
1805 }
1806# endif
1807 return u32;
1808}
1809#endif
1810
1811
1812/**
1813 * Atomically Exchange a signed 32-bit value.
1814 *
1815 * @returns Current *pu32 value
1816 * @param pi32 Pointer to the 32-bit variable to update.
1817 * @param i32 The 32-bit value to assign to *pi32.
1818 */
1819DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1820{
1821 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1822}
1823
1824
1825/**
1826 * Atomically Exchange an unsigned 64-bit value.
1827 *
1828 * @returns Current *pu64 value
1829 * @param pu64 Pointer to the 64-bit variable to update.
1830 * @param u64 The 64-bit value to assign to *pu64.
1831 */
1832#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1833DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1834#else
1835DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1836{
1837# if defined(__AMD64__)
1838# if RT_INLINE_ASM_USES_INTRIN
1839 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1840
1841# elif RT_INLINE_ASM_GNU_STYLE
1842 __asm__ __volatile__("xchgq %0, %1\n\t"
1843 : "=m" (*pu64),
1844 "=r" (u64)
1845 : "1" (u64));
1846# else
1847 __asm
1848 {
1849 mov rdx, [pu64]
1850 mov rax, [u64]
1851 xchg [rdx], rax
1852 mov [u64], rax
1853 }
1854# endif
1855# else /* !__AMD64__ */
1856# if RT_INLINE_ASM_GNU_STYLE
1857# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
1858 uint32_t u32 = (uint32_t)u64;
1859 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1860 "xchgl %%ebx, %3\n\t"
1861 "1:\n\t"
1862 "lock; cmpxchg8b (%5)\n\t"
1863 "jnz 1b\n\t"
1864 "xchgl %%ebx, %3\n\t"
1865 /*"xchgl %%esi, %5\n\t"*/
1866 : "=A" (u64),
1867 "=m" (*pu64)
1868 : "0" (*pu64),
1869 "m" ( u32 ),
1870 "c" ( (uint32_t)(u64 >> 32) ),
1871 "S" (pu64) );
1872# else /* !PIC */
1873 __asm__ __volatile__("1:\n\t"
1874 "lock; cmpxchg8b %1\n\t"
1875 "jnz 1b\n\t"
1876 : "=A" (u64),
1877 "=m" (*pu64)
1878 : "0" (*pu64),
1879 "b" ( (uint32_t)u64 ),
1880 "c" ( (uint32_t)(u64 >> 32) ));
1881# endif
1882# else
1883 __asm
1884 {
1885 mov ebx, dword ptr [u64]
1886 mov ecx, dword ptr [u64 + 4]
1887 mov edi, pu64
1888 mov eax, dword ptr [edi]
1889 mov edx, dword ptr [edi + 4]
1890 retry:
1891 lock cmpxchg8b [edi]
1892 jnz retry
1893 mov dword ptr [u64], eax
1894 mov dword ptr [u64 + 4], edx
1895 }
1896# endif
1897# endif /* !__AMD64__ */
1898 return u64;
1899}
1900#endif
1901
1902
1903/**
1904 * Atomically Exchange an signed 64-bit value.
1905 *
1906 * @returns Current *pi64 value
1907 * @param pi64 Pointer to the 64-bit variable to update.
1908 * @param i64 The 64-bit value to assign to *pi64.
1909 */
1910DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1911{
1912 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1913}
1914
1915
1916#ifdef __AMD64__
1917/**
1918 * Atomically Exchange an unsigned 128-bit value.
1919 *
1920 * @returns Current *pu128.
1921 * @param pu128 Pointer to the 128-bit variable to update.
1922 * @param u128 The 128-bit value to assign to *pu128.
1923 *
1924 * @remark We cannot really assume that any hardware supports this. Nor do I have
1925 * GAS support for it. So, for the time being we'll BREAK the atomic
1926 * bit of this function and use two 64-bit exchanges instead.
1927 */
1928# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
1929DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
1930# else
1931DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
1932{
1933 if (true)/*ASMCpuId_ECX(1) & BIT(13))*/
1934 {
1935 /** @todo this is clumsy code */
1936 RTUINT128U u128Ret;
1937 u128Ret.u = u128;
1938 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
1939 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
1940 return u128Ret.u;
1941 }
1942#if 0 /* later? */
1943 else
1944 {
1945# if RT_INLINE_ASM_GNU_STYLE
1946 __asm__ __volatile__("1:\n\t"
1947 "lock; cmpxchg8b %1\n\t"
1948 "jnz 1b\n\t"
1949 : "=A" (u128),
1950 "=m" (*pu128)
1951 : "0" (*pu128),
1952 "b" ( (uint64_t)u128 ),
1953 "c" ( (uint64_t)(u128 >> 64) ));
1954# else
1955 __asm
1956 {
1957 mov rbx, dword ptr [u128]
1958 mov rcx, dword ptr [u128 + 4]
1959 mov rdi, pu128
1960 mov rax, dword ptr [rdi]
1961 mov rdx, dword ptr [rdi + 4]
1962 retry:
1963 lock cmpxchg16b [rdi]
1964 jnz retry
1965 mov dword ptr [u128], rax
1966 mov dword ptr [u128 + 4], rdx
1967 }
1968# endif
1969 }
1970 return u128;
1971#endif
1972}
1973# endif
1974#endif /* __AMD64__ */
1975
1976
1977/**
1978 * Atomically Reads a unsigned 64-bit value.
1979 *
1980 * @returns Current *pu64 value
1981 * @param pu64 Pointer to the 64-bit variable to read.
1982 * The memory pointed to must be writable.
1983 * @remark This will fault if the memory is read-only!
1984 */
1985#if RT_INLINE_ASM_EXTERNAL
1986DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1987#else
1988DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1989{
1990 uint64_t u64;
1991# ifdef __AMD64__
1992# if RT_INLINE_ASM_GNU_STYLE
1993 __asm__ __volatile__("movq %1, %0\n\t"
1994 : "=r" (u64)
1995 : "m" (*pu64));
1996# else
1997 __asm
1998 {
1999 mov rdx, [pu64]
2000 mov rax, [rdx]
2001 mov [u64], rax
2002 }
2003# endif
2004# else /* !__AMD64__ */
2005# if RT_INLINE_ASM_GNU_STYLE
2006# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2007 uint32_t u32EBX = 0;
2008 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2009 "lock; cmpxchg8b (%5)\n\t"
2010 "xchgl %%ebx, %3\n\t"
2011 : "=A" (u64),
2012 "=m" (*pu64)
2013 : "0" (0),
2014 "m" (u32EBX),
2015 "c" (0),
2016 "S" (pu64));
2017# else /* !PIC */
2018 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2019 : "=A" (u64),
2020 "=m" (*pu64)
2021 : "0" (0),
2022 "b" (0),
2023 "c" (0));
2024# endif
2025# else
2026 __asm
2027 {
2028 xor eax, eax
2029 xor edx, edx
2030 mov edi, pu64
2031 xor ecx, ecx
2032 xor ebx, ebx
2033 lock cmpxchg8b [edi]
2034 mov dword ptr [u64], eax
2035 mov dword ptr [u64 + 4], edx
2036 }
2037# endif
2038# endif /* !__AMD64__ */
2039 return u64;
2040}
2041#endif
2042
2043
2044/**
2045 * Atomically Reads a signed 64-bit value.
2046 *
2047 * @returns Current *pi64 value
2048 * @param pi64 Pointer to the 64-bit variable to read.
2049 * The memory pointed to must be writable.
2050 * @remark This will fault if the memory is read-only!
2051 */
2052DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2053{
2054 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2055}
2056
2057
2058/**
2059 * Atomically Exchange a value which size might differ
2060 * between platforms or compilers.
2061 *
2062 * @param pu Pointer to the variable to update.
2063 * @param uNew The value to assign to *pu.
2064 */
2065#define ASMAtomicXchgSize(pu, uNew) \
2066 do { \
2067 switch (sizeof(*(pu))) { \
2068 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2069 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2070 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2071 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2072 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2073 } \
2074 } while (0)
2075
2076
2077/**
2078 * Atomically Exchange a pointer value.
2079 *
2080 * @returns Current *ppv value
2081 * @param ppv Pointer to the pointer variable to update.
2082 * @param pv The pointer value to assign to *ppv.
2083 */
2084DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2085{
2086#if ARCH_BITS == 32
2087 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2088#elif ARCH_BITS == 64
2089 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2090#else
2091# error "ARCH_BITS is bogus"
2092#endif
2093}
2094
2095
2096/**
2097 * Atomically Compare and Exchange an unsigned 32-bit value.
2098 *
2099 * @returns true if xchg was done.
2100 * @returns false if xchg wasn't done.
2101 *
2102 * @param pu32 Pointer to the value to update.
2103 * @param u32New The new value to assigned to *pu32.
2104 * @param u32Old The old value to *pu32 compare with.
2105 */
2106#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2107DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2108#else
2109DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2110{
2111# if RT_INLINE_ASM_GNU_STYLE
2112 uint32_t u32Ret;
2113 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2114 "setz %%al\n\t"
2115 "movzx %%al, %%eax\n\t"
2116 : "=m" (*pu32),
2117 "=a" (u32Ret)
2118 : "r" (u32New),
2119 "1" (u32Old));
2120 return (bool)u32Ret;
2121
2122# elif RT_INLINE_ASM_USES_INTRIN
2123 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2124
2125# else
2126 uint32_t u32Ret;
2127 __asm
2128 {
2129# ifdef __AMD64__
2130 mov rdx, [pu32]
2131# else
2132 mov edx, [pu32]
2133# endif
2134 mov eax, [u32Old]
2135 mov ecx, [u32New]
2136# ifdef __AMD64__
2137 lock cmpxchg [rdx], ecx
2138# else
2139 lock cmpxchg [edx], ecx
2140# endif
2141 setz al
2142 movzx eax, al
2143 mov [u32Ret], eax
2144 }
2145 return !!u32Ret;
2146# endif
2147}
2148#endif
2149
2150
2151/**
2152 * Atomically Compare and Exchange a signed 32-bit value.
2153 *
2154 * @returns true if xchg was done.
2155 * @returns false if xchg wasn't done.
2156 *
2157 * @param pi32 Pointer to the value to update.
2158 * @param i32New The new value to assigned to *pi32.
2159 * @param i32Old The old value to *pi32 compare with.
2160 */
2161DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2162{
2163 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2164}
2165
2166
2167/**
2168 * Atomically Compare and exchange an unsigned 64-bit value.
2169 *
2170 * @returns true if xchg was done.
2171 * @returns false if xchg wasn't done.
2172 *
2173 * @param pu64 Pointer to the 64-bit variable to update.
2174 * @param u64New The 64-bit value to assign to *pu64.
2175 * @param u64Old The value to compare with.
2176 */
2177#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2178DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2179#else
2180DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2181{
2182# if RT_INLINE_ASM_USES_INTRIN
2183 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2184
2185# elif defined(__AMD64__)
2186# if RT_INLINE_ASM_GNU_STYLE
2187 uint64_t u64Ret;
2188 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2189 "setz %%al\n\t"
2190 "movzx %%al, %%eax\n\t"
2191 : "=m" (*pu64),
2192 "=a" (u64Ret)
2193 : "r" (u64New),
2194 "1" (u64Old));
2195 return (bool)u64Ret;
2196# else
2197 bool fRet;
2198 __asm
2199 {
2200 mov rdx, [pu32]
2201 mov rax, [u64Old]
2202 mov rcx, [u64New]
2203 lock cmpxchg [rdx], rcx
2204 setz al
2205 mov [fRet], al
2206 }
2207 return fRet;
2208# endif
2209# else /* !__AMD64__ */
2210 uint32_t u32Ret;
2211# if RT_INLINE_ASM_GNU_STYLE
2212# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2213 uint32_t u32 = (uint32_t)u64New;
2214 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2215 "lock; cmpxchg8b (%5)\n\t"
2216 "setz %%al\n\t"
2217 "xchgl %%ebx, %3\n\t"
2218 "movzx %%al, %%eax\n\t"
2219 : "=a" (u32Ret),
2220 "=m" (*pu64)
2221 : "A" (u64Old),
2222 "m" ( u32 ),
2223 "c" ( (uint32_t)(u64New >> 32) ),
2224 "S" (pu64) );
2225# else /* !PIC */
2226 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2227 "setz %%al\n\t"
2228 "movzx %%al, %%eax\n\t"
2229 : "=a" (u32Ret),
2230 "=m" (*pu64)
2231 : "A" (u64Old),
2232 "b" ( (uint32_t)u64New ),
2233 "c" ( (uint32_t)(u64New >> 32) ));
2234# endif
2235 return (bool)u32Ret;
2236# else
2237 __asm
2238 {
2239 mov ebx, dword ptr [u64New]
2240 mov ecx, dword ptr [u64New + 4]
2241 mov edi, [pu64]
2242 mov eax, dword ptr [u64Old]
2243 mov edx, dword ptr [u64Old + 4]
2244 lock cmpxchg8b [edi]
2245 setz al
2246 movzx eax, al
2247 mov dword ptr [u32Ret], eax
2248 }
2249 return !!u32Ret;
2250# endif
2251# endif /* !__AMD64__ */
2252}
2253#endif
2254
2255
2256/**
2257 * Atomically Compare and exchange a signed 64-bit value.
2258 *
2259 * @returns true if xchg was done.
2260 * @returns false if xchg wasn't done.
2261 *
2262 * @param pi64 Pointer to the 64-bit variable to update.
2263 * @param i64 The 64-bit value to assign to *pu64.
2264 * @param i64Old The value to compare with.
2265 */
2266DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2267{
2268 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2269}
2270
2271
2272
2273/** @def ASMAtomicCmpXchgSize
2274 * Atomically Compare and Exchange a value which size might differ
2275 * between platforms or compilers.
2276 *
2277 * @param pu Pointer to the value to update.
2278 * @param uNew The new value to assigned to *pu.
2279 * @param uOld The old value to *pu compare with.
2280 * @param fRc Where to store the result.
2281 */
2282#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2283 do { \
2284 switch (sizeof(*(pu))) { \
2285 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2286 break; \
2287 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2288 break; \
2289 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2290 (fRc) = false; \
2291 break; \
2292 } \
2293 } while (0)
2294
2295
2296/**
2297 * Atomically Compare and Exchange a pointer value.
2298 *
2299 * @returns true if xchg was done.
2300 * @returns false if xchg wasn't done.
2301 *
2302 * @param ppv Pointer to the value to update.
2303 * @param pvNew The new value to assigned to *ppv.
2304 * @param pvOld The old value to *ppv compare with.
2305 */
2306DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2307{
2308#if ARCH_BITS == 32
2309 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2310#elif ARCH_BITS == 64
2311 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2312#else
2313# error "ARCH_BITS is bogus"
2314#endif
2315}
2316
2317
2318/**
2319 * Atomically increment a 32-bit value.
2320 *
2321 * @returns The new value.
2322 * @param pu32 Pointer to the value to increment.
2323 */
2324#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2325DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2326#else
2327DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2328{
2329 uint32_t u32;
2330# if RT_INLINE_ASM_USES_INTRIN
2331 u32 = _InterlockedIncrement((long *)pu32);
2332
2333# elif RT_INLINE_ASM_GNU_STYLE
2334 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2335 "incl %0\n\t"
2336 : "=r" (u32),
2337 "=m" (*pu32)
2338 : "0" (1)
2339 : "memory");
2340# else
2341 __asm
2342 {
2343 mov eax, 1
2344# ifdef __AMD64__
2345 mov rdx, [pu32]
2346 lock xadd [rdx], eax
2347# else
2348 mov edx, [pu32]
2349 lock xadd [edx], eax
2350# endif
2351 inc eax
2352 mov u32, eax
2353 }
2354# endif
2355 return u32;
2356}
2357#endif
2358
2359
2360/**
2361 * Atomically increment a signed 32-bit value.
2362 *
2363 * @returns The new value.
2364 * @param pi32 Pointer to the value to increment.
2365 */
2366DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2367{
2368 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2369}
2370
2371
2372/**
2373 * Atomically decrement an unsigned 32-bit value.
2374 *
2375 * @returns The new value.
2376 * @param pu32 Pointer to the value to decrement.
2377 */
2378#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2379DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2380#else
2381DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2382{
2383 uint32_t u32;
2384# if RT_INLINE_ASM_USES_INTRIN
2385 u32 = _InterlockedDecrement((long *)pu32);
2386
2387# elif RT_INLINE_ASM_GNU_STYLE
2388 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2389 "decl %0\n\t"
2390 : "=r" (u32),
2391 "=m" (*pu32)
2392 : "0" (-1)
2393 : "memory");
2394# else
2395 __asm
2396 {
2397 mov eax, -1
2398# ifdef __AMD64__
2399 mov rdx, [pu32]
2400 lock xadd [rdx], eax
2401# else
2402 mov edx, [pu32]
2403 lock xadd [edx], eax
2404# endif
2405 dec eax
2406 mov u32, eax
2407 }
2408# endif
2409 return u32;
2410}
2411#endif
2412
2413
2414/**
2415 * Atomically decrement a signed 32-bit value.
2416 *
2417 * @returns The new value.
2418 * @param pi32 Pointer to the value to decrement.
2419 */
2420DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2421{
2422 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2423}
2424
2425
2426/**
2427 * Atomically Or an unsigned 32-bit value.
2428 *
2429 * @param pu32 Pointer to the pointer variable to OR u32 with.
2430 * @param u32 The value to OR *pu32 with.
2431 */
2432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2433DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2434#else
2435DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2436{
2437# if RT_INLINE_ASM_USES_INTRIN
2438 _InterlockedOr((long volatile *)pu32, (long)u32);
2439
2440# elif RT_INLINE_ASM_GNU_STYLE
2441 __asm__ __volatile__("lock; orl %1, %0\n\t"
2442 : "=m" (*pu32)
2443 : "r" (u32));
2444# else
2445 __asm
2446 {
2447 mov eax, [u32]
2448# ifdef __AMD64__
2449 mov rdx, [pu32]
2450 lock or [rdx], eax
2451# else
2452 mov edx, [pu32]
2453 lock or [edx], eax
2454# endif
2455 }
2456# endif
2457}
2458#endif
2459
2460
2461/**
2462 * Atomically Or a signed 32-bit value.
2463 *
2464 * @param pi32 Pointer to the pointer variable to OR u32 with.
2465 * @param i32 The value to OR *pu32 with.
2466 */
2467DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2468{
2469 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2470}
2471
2472
2473/**
2474 * Atomically And an unsigned 32-bit value.
2475 *
2476 * @param pu32 Pointer to the pointer variable to AND u32 with.
2477 * @param u32 The value to AND *pu32 with.
2478 */
2479#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2480DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2481#else
2482DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2483{
2484# if RT_INLINE_ASM_USES_INTRIN
2485 _InterlockedAnd((long volatile *)pu32, u32);
2486
2487# elif RT_INLINE_ASM_GNU_STYLE
2488 __asm__ __volatile__("lock; andl %1, %0\n\t"
2489 : "=m" (*pu32)
2490 : "r" (u32));
2491# else
2492 __asm
2493 {
2494 mov eax, [u32]
2495# ifdef __AMD64__
2496 mov rdx, [pu32]
2497 lock and [rdx], eax
2498# else
2499 mov edx, [pu32]
2500 lock and [edx], eax
2501# endif
2502 }
2503# endif
2504}
2505#endif
2506
2507
2508/**
2509 * Atomically And a signed 32-bit value.
2510 *
2511 * @param pi32 Pointer to the pointer variable to AND i32 with.
2512 * @param i32 The value to AND *pi32 with.
2513 */
2514DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2515{
2516 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2517}
2518
2519
2520/**
2521 * Invalidate page.
2522 *
2523 * @param pv Address of the page to invalidate.
2524 */
2525#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2526DECLASM(void) ASMInvalidatePage(void *pv);
2527#else
2528DECLINLINE(void) ASMInvalidatePage(void *pv)
2529{
2530# if RT_INLINE_ASM_USES_INTRIN
2531 __invlpg(pv);
2532
2533# elif RT_INLINE_ASM_GNU_STYLE
2534 __asm__ __volatile__("invlpg %0\n\t"
2535 : : "m" (*(uint8_t *)pv));
2536# else
2537 __asm
2538 {
2539# ifdef __AMD64__
2540 mov rax, [pv]
2541 invlpg [rax]
2542# else
2543 mov eax, [pv]
2544 invlpg [eax]
2545# endif
2546 }
2547# endif
2548}
2549#endif
2550
2551
2552#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2553# if PAGE_SIZE != 0x1000
2554# error "PAGE_SIZE is not 0x1000!"
2555# endif
2556#endif
2557
2558/**
2559 * Zeros a 4K memory page.
2560 *
2561 * @param pv Pointer to the memory block. This must be page aligned.
2562 */
2563#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2564DECLASM(void) ASMMemZeroPage(volatile void *pv);
2565# else
2566DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2567{
2568# if RT_INLINE_ASM_USES_INTRIN
2569# ifdef __AMD64__
2570 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2571# else
2572 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2573# endif
2574
2575# elif RT_INLINE_ASM_GNU_STYLE
2576 RTUINTREG uDummy;
2577# ifdef __AMD64__
2578 __asm__ __volatile__ ("rep stosq"
2579 : "=D" (pv),
2580 "=c" (uDummy)
2581 : "0" (pv),
2582 "c" (0x1000 >> 3),
2583 "a" (0)
2584 : "memory");
2585# else
2586 __asm__ __volatile__ ("rep stosl"
2587 : "=D" (pv),
2588 "=c" (uDummy)
2589 : "0" (pv),
2590 "c" (0x1000 >> 2),
2591 "a" (0)
2592 : "memory");
2593# endif
2594# else
2595 __asm
2596 {
2597# ifdef __AMD64__
2598 xor rax, rax
2599 mov ecx, 0200h
2600 mov rdi, [pv]
2601 rep stosq
2602# else
2603 xor eax, eax
2604 mov ecx, 0400h
2605 mov edi, [pv]
2606 rep stosd
2607# endif
2608 }
2609# endif
2610}
2611# endif
2612
2613
2614/**
2615 * Zeros a memory block with a 32-bit aligned size.
2616 *
2617 * @param pv Pointer to the memory block.
2618 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2619 */
2620#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2621DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2622#else
2623DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2624{
2625# if RT_INLINE_ASM_USES_INTRIN
2626 __stosd((unsigned long *)pv, 0, cb >> 2);
2627
2628# elif RT_INLINE_ASM_GNU_STYLE
2629 __asm__ __volatile__ ("rep stosl"
2630 : "=D" (pv),
2631 "=c" (cb)
2632 : "0" (pv),
2633 "1" (cb >> 2),
2634 "a" (0)
2635 : "memory");
2636# else
2637 __asm
2638 {
2639 xor eax, eax
2640# ifdef __AMD64__
2641 mov rcx, [cb]
2642 shr rcx, 2
2643 mov rdi, [pv]
2644# else
2645 mov ecx, [cb]
2646 shr ecx, 2
2647 mov edi, [pv]
2648# endif
2649 rep stosd
2650 }
2651# endif
2652}
2653#endif
2654
2655
2656/**
2657 * Fills a memory block with a 32-bit aligned size.
2658 *
2659 * @param pv Pointer to the memory block.
2660 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2661 * @param u32 The value to fill with.
2662 */
2663#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2664DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2665#else
2666DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2667{
2668# if RT_INLINE_ASM_USES_INTRIN
2669 __stosd((unsigned long *)pv, 0, cb >> 2);
2670
2671# elif RT_INLINE_ASM_GNU_STYLE
2672 __asm__ __volatile__ ("rep stosl"
2673 : "=D" (pv),
2674 "=c" (cb)
2675 : "0" (pv),
2676 "1" (cb >> 2),
2677 "a" (u32)
2678 : "memory");
2679# else
2680 __asm
2681 {
2682# ifdef __AMD64__
2683 mov rcx, [cb]
2684 shr rcx, 2
2685 mov rdi, [pv]
2686# else
2687 mov ecx, [cb]
2688 shr ecx, 2
2689 mov edi, [pv]
2690# endif
2691 mov eax, [u32]
2692 rep stosd
2693 }
2694# endif
2695}
2696#endif
2697
2698
2699
2700/**
2701 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2702 *
2703 * @returns u32F1 * u32F2.
2704 */
2705#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2706DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2707#else
2708DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2709{
2710# ifdef __AMD64__
2711 return (uint64_t)u32F1 * u32F2;
2712# else /* !__AMD64__ */
2713 uint64_t u64;
2714# if RT_INLINE_ASM_GNU_STYLE
2715 __asm__ __volatile__("mull %%edx"
2716 : "=A" (u64)
2717 : "a" (u32F2), "d" (u32F1));
2718# else
2719 __asm
2720 {
2721 mov edx, [u32F1]
2722 mov eax, [u32F2]
2723 mul edx
2724 mov dword ptr [u64], eax
2725 mov dword ptr [u64 + 4], edx
2726 }
2727# endif
2728 return u64;
2729# endif /* !__AMD64__ */
2730}
2731#endif
2732
2733
2734/**
2735 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2736 *
2737 * @returns u32F1 * u32F2.
2738 */
2739#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2740DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2741#else
2742DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2743{
2744# ifdef __AMD64__
2745 return (int64_t)i32F1 * i32F2;
2746# else /* !__AMD64__ */
2747 int64_t i64;
2748# if RT_INLINE_ASM_GNU_STYLE
2749 __asm__ __volatile__("imull %%edx"
2750 : "=A" (i64)
2751 : "a" (i32F2), "d" (i32F1));
2752# else
2753 __asm
2754 {
2755 mov edx, [i32F1]
2756 mov eax, [i32F2]
2757 imul edx
2758 mov dword ptr [i64], eax
2759 mov dword ptr [i64 + 4], edx
2760 }
2761# endif
2762 return i64;
2763# endif /* !__AMD64__ */
2764}
2765#endif
2766
2767
2768/**
2769 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2770 *
2771 * @returns u64 / u32.
2772 */
2773#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2774DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2775#else
2776DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2777{
2778# ifdef __AMD64__
2779 return (uint32_t)(u64 / u32);
2780# else /* !__AMD64__ */
2781# if RT_INLINE_ASM_GNU_STYLE
2782 RTUINTREG uDummy;
2783 __asm__ __volatile__("divl %3"
2784 : "=a" (u32), "=d"(uDummy)
2785 : "A" (u64), "r" (u32));
2786# else
2787 __asm
2788 {
2789 mov eax, dword ptr [u64]
2790 mov edx, dword ptr [u64 + 4]
2791 mov ecx, [u32]
2792 div ecx
2793 mov [u32], eax
2794 }
2795# endif
2796 return u32;
2797# endif /* !__AMD64__ */
2798}
2799#endif
2800
2801
2802/**
2803 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2804 *
2805 * @returns u64 / u32.
2806 */
2807#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2808DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2809#else
2810DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2811{
2812# ifdef __AMD64__
2813 return (int32_t)(i64 / i32);
2814# else /* !__AMD64__ */
2815# if RT_INLINE_ASM_GNU_STYLE
2816 RTUINTREG iDummy;
2817 __asm__ __volatile__("idivl %3"
2818 : "=a" (i32), "=d"(iDummy)
2819 : "A" (i64), "r" (i32));
2820# else
2821 __asm
2822 {
2823 mov eax, dword ptr [i64]
2824 mov edx, dword ptr [i64 + 4]
2825 mov ecx, [i32]
2826 idiv ecx
2827 mov [i32], eax
2828 }
2829# endif
2830 return i32;
2831# endif /* !__AMD64__ */
2832}
2833#endif
2834
2835
2836/**
2837 * Probes a byte pointer for read access.
2838 *
2839 * While the function will not fault if the byte is not read accessible,
2840 * the idea is to do this in a safe place like before acquiring locks
2841 * and such like.
2842 *
2843 * Also, this functions guarantees that an eager compiler is not going
2844 * to optimize the probing away.
2845 *
2846 * @param pvByte Pointer to the byte.
2847 */
2848#if RT_INLINE_ASM_EXTERNAL
2849DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2850#else
2851DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2852{
2853 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2854 uint8_t u8;
2855# if RT_INLINE_ASM_GNU_STYLE
2856 __asm__ __volatile__("movb (%1), %0\n\t"
2857 : "=r" (u8)
2858 : "r" (pvByte));
2859# else
2860 __asm
2861 {
2862# ifdef __AMD64__
2863 mov rax, [pvByte]
2864 mov al, [rax]
2865# else
2866 mov eax, [pvByte]
2867 mov al, [eax]
2868# endif
2869 mov [u8], al
2870 }
2871# endif
2872 return u8;
2873}
2874#endif
2875
2876/**
2877 * Probes a buffer for read access page by page.
2878 *
2879 * While the function will fault if the buffer is not fully read
2880 * accessible, the idea is to do this in a safe place like before
2881 * acquiring locks and such like.
2882 *
2883 * Also, this functions guarantees that an eager compiler is not going
2884 * to optimize the probing away.
2885 *
2886 * @param pvBuf Pointer to the buffer.
2887 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
2888 */
2889DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
2890{
2891 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2892 /* the first byte */
2893 const uint8_t *pu8 = (const uint8_t *)pvBuf;
2894 ASMProbeReadByte(pu8);
2895
2896 /* the pages in between pages. */
2897 while (cbBuf > /*PAGE_SIZE*/0x1000)
2898 {
2899 ASMProbeReadByte(pu8);
2900 cbBuf -= /*PAGE_SIZE*/0x1000;
2901 pu8 += /*PAGE_SIZE*/0x1000;
2902 }
2903
2904 /* the last byte */
2905 ASMProbeReadByte(pu8 + cbBuf - 1);
2906}
2907
2908
2909/** @def ASMBreakpoint
2910 * Debugger Breakpoint.
2911 * @remark In the gnu world we add a nop instruction after the int3 to
2912 * force gdb to remain at the int3 source line.
2913 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
2914 * @internal
2915 */
2916#if RT_INLINE_ASM_GNU_STYLE
2917# ifndef __L4ENV__
2918# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
2919# else
2920# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
2921# endif
2922#else
2923# define ASMBreakpoint() __debugbreak()
2924#endif
2925
2926
2927
2928/** @defgroup grp_inline_bits Bit Operations
2929 * @{
2930 */
2931
2932
2933/**
2934 * Sets a bit in a bitmap.
2935 *
2936 * @param pvBitmap Pointer to the bitmap.
2937 * @param iBit The bit to set.
2938 */
2939#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2940DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
2941#else
2942DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
2943{
2944# if RT_INLINE_ASM_USES_INTRIN
2945 _bittestandset((long *)pvBitmap, iBit);
2946
2947# elif RT_INLINE_ASM_GNU_STYLE
2948 __asm__ __volatile__ ("btsl %1, %0"
2949 : "=m" (*(volatile long *)pvBitmap)
2950 : "Ir" (iBit)
2951 : "memory");
2952# else
2953 __asm
2954 {
2955# ifdef __AMD64__
2956 mov rax, [pvBitmap]
2957 mov edx, [iBit]
2958 bts [rax], edx
2959# else
2960 mov eax, [pvBitmap]
2961 mov edx, [iBit]
2962 bts [eax], edx
2963# endif
2964 }
2965# endif
2966}
2967#endif
2968
2969
2970/**
2971 * Atomically sets a bit in a bitmap.
2972 *
2973 * @param pvBitmap Pointer to the bitmap.
2974 * @param iBit The bit to set.
2975 */
2976#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2977DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
2978#else
2979DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
2980{
2981# if RT_INLINE_ASM_USES_INTRIN
2982 _interlockedbittestandset((long *)pvBitmap, iBit);
2983# elif RT_INLINE_ASM_GNU_STYLE
2984 __asm__ __volatile__ ("lock; btsl %1, %0"
2985 : "=m" (*(volatile long *)pvBitmap)
2986 : "Ir" (iBit)
2987 : "memory");
2988# else
2989 __asm
2990 {
2991# ifdef __AMD64__
2992 mov rax, [pvBitmap]
2993 mov edx, [iBit]
2994 lock bts [rax], edx
2995# else
2996 mov eax, [pvBitmap]
2997 mov edx, [iBit]
2998 lock bts [eax], edx
2999# endif
3000 }
3001# endif
3002}
3003#endif
3004
3005
3006/**
3007 * Clears a bit in a bitmap.
3008 *
3009 * @param pvBitmap Pointer to the bitmap.
3010 * @param iBit The bit to clear.
3011 */
3012#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3013DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3014#else
3015DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3016{
3017# if RT_INLINE_ASM_USES_INTRIN
3018 _bittestandreset((long *)pvBitmap, iBit);
3019
3020# elif RT_INLINE_ASM_GNU_STYLE
3021 __asm__ __volatile__ ("btrl %1, %0"
3022 : "=m" (*(volatile long *)pvBitmap)
3023 : "Ir" (iBit)
3024 : "memory");
3025# else
3026 __asm
3027 {
3028# ifdef __AMD64__
3029 mov rax, [pvBitmap]
3030 mov edx, [iBit]
3031 btr [rax], edx
3032# else
3033 mov eax, [pvBitmap]
3034 mov edx, [iBit]
3035 btr [eax], edx
3036# endif
3037 }
3038# endif
3039}
3040#endif
3041
3042
3043/**
3044 * Atomically clears a bit in a bitmap.
3045 *
3046 * @param pvBitmap Pointer to the bitmap.
3047 * @param iBit The bit to toggle set.
3048 * @remark No memory barrier, take care on smp.
3049 */
3050#if RT_INLINE_ASM_EXTERNAL
3051DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3052#else
3053DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3054{
3055# if RT_INLINE_ASM_GNU_STYLE
3056 __asm__ __volatile__ ("lock; btrl %1, %0"
3057 : "=m" (*(volatile long *)pvBitmap)
3058 : "Ir" (iBit)
3059 : "memory");
3060# else
3061 __asm
3062 {
3063# ifdef __AMD64__
3064 mov rax, [pvBitmap]
3065 mov edx, [iBit]
3066 lock btr [rax], edx
3067# else
3068 mov eax, [pvBitmap]
3069 mov edx, [iBit]
3070 lock btr [eax], edx
3071# endif
3072 }
3073# endif
3074}
3075#endif
3076
3077
3078/**
3079 * Toggles a bit in a bitmap.
3080 *
3081 * @param pvBitmap Pointer to the bitmap.
3082 * @param iBit The bit to toggle.
3083 */
3084#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3085DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3086#else
3087DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3088{
3089# if RT_INLINE_ASM_USES_INTRIN
3090 _bittestandcomplement((long *)pvBitmap, iBit);
3091# elif RT_INLINE_ASM_GNU_STYLE
3092 __asm__ __volatile__ ("btcl %1, %0"
3093 : "=m" (*(volatile long *)pvBitmap)
3094 : "Ir" (iBit)
3095 : "memory");
3096# else
3097 __asm
3098 {
3099# ifdef __AMD64__
3100 mov rax, [pvBitmap]
3101 mov edx, [iBit]
3102 btc [rax], edx
3103# else
3104 mov eax, [pvBitmap]
3105 mov edx, [iBit]
3106 btc [eax], edx
3107# endif
3108 }
3109# endif
3110}
3111#endif
3112
3113
3114/**
3115 * Atomically toggles a bit in a bitmap.
3116 *
3117 * @param pvBitmap Pointer to the bitmap.
3118 * @param iBit The bit to test and set.
3119 */
3120#if RT_INLINE_ASM_EXTERNAL
3121DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3122#else
3123DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3124{
3125# if RT_INLINE_ASM_GNU_STYLE
3126 __asm__ __volatile__ ("lock; btcl %1, %0"
3127 : "=m" (*(volatile long *)pvBitmap)
3128 : "Ir" (iBit)
3129 : "memory");
3130# else
3131 __asm
3132 {
3133# ifdef __AMD64__
3134 mov rax, [pvBitmap]
3135 mov edx, [iBit]
3136 lock btc [rax], edx
3137# else
3138 mov eax, [pvBitmap]
3139 mov edx, [iBit]
3140 lock btc [eax], edx
3141# endif
3142 }
3143# endif
3144}
3145#endif
3146
3147
3148/**
3149 * Tests and sets a bit in a bitmap.
3150 *
3151 * @returns true if the bit was set.
3152 * @returns false if the bit was clear.
3153 * @param pvBitmap Pointer to the bitmap.
3154 * @param iBit The bit to test and set.
3155 */
3156#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3157DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3158#else
3159DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3160{
3161 union { bool f; uint32_t u32; uint8_t u8; } rc;
3162# if RT_INLINE_ASM_USES_INTRIN
3163 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3164
3165# elif RT_INLINE_ASM_GNU_STYLE
3166 __asm__ __volatile__ ("btsl %2, %1\n\t"
3167 "setc %b0\n\t"
3168 "andl $1, %0\n\t"
3169 : "=q" (rc.u32),
3170 "=m" (*(volatile long *)pvBitmap)
3171 : "Ir" (iBit)
3172 : "memory");
3173# else
3174 __asm
3175 {
3176 mov edx, [iBit]
3177# ifdef __AMD64__
3178 mov rax, [pvBitmap]
3179 bts [rax], edx
3180# else
3181 mov eax, [pvBitmap]
3182 bts [eax], edx
3183# endif
3184 setc al
3185 and eax, 1
3186 mov [rc.u32], eax
3187 }
3188# endif
3189 return rc.f;
3190}
3191#endif
3192
3193
3194/**
3195 * Atomically tests and sets a bit in a bitmap.
3196 *
3197 * @returns true if the bit was set.
3198 * @returns false if the bit was clear.
3199 * @param pvBitmap Pointer to the bitmap.
3200 * @param iBit The bit to set.
3201 */
3202#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3203DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3204#else
3205DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3206{
3207 union { bool f; uint32_t u32; uint8_t u8; } rc;
3208# if RT_INLINE_ASM_USES_INTRIN
3209 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3210# elif RT_INLINE_ASM_GNU_STYLE
3211 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3212 "setc %b0\n\t"
3213 "andl $1, %0\n\t"
3214 : "=q" (rc.u32),
3215 "=m" (*(volatile long *)pvBitmap)
3216 : "Ir" (iBit)
3217 : "memory");
3218# else
3219 __asm
3220 {
3221 mov edx, [iBit]
3222# ifdef __AMD64__
3223 mov rax, [pvBitmap]
3224 lock bts [rax], edx
3225# else
3226 mov eax, [pvBitmap]
3227 lock bts [eax], edx
3228# endif
3229 setc al
3230 and eax, 1
3231 mov [rc.u32], eax
3232 }
3233# endif
3234 return rc.f;
3235}
3236#endif
3237
3238
3239/**
3240 * Tests and clears a bit in a bitmap.
3241 *
3242 * @returns true if the bit was set.
3243 * @returns false if the bit was clear.
3244 * @param pvBitmap Pointer to the bitmap.
3245 * @param iBit The bit to test and clear.
3246 */
3247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3248DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3249#else
3250DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3251{
3252 union { bool f; uint32_t u32; uint8_t u8; } rc;
3253# if RT_INLINE_ASM_USES_INTRIN
3254 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3255
3256# elif RT_INLINE_ASM_GNU_STYLE
3257 __asm__ __volatile__ ("btrl %2, %1\n\t"
3258 "setc %b0\n\t"
3259 "andl $1, %0\n\t"
3260 : "=q" (rc.u32),
3261 "=m" (*(volatile long *)pvBitmap)
3262 : "Ir" (iBit)
3263 : "memory");
3264# else
3265 __asm
3266 {
3267 mov edx, [iBit]
3268# ifdef __AMD64__
3269 mov rax, [pvBitmap]
3270 btr [rax], edx
3271# else
3272 mov eax, [pvBitmap]
3273 btr [eax], edx
3274# endif
3275 setc al
3276 and eax, 1
3277 mov [rc.u32], eax
3278 }
3279# endif
3280 return rc.f;
3281}
3282#endif
3283
3284
3285/**
3286 * Atomically tests and clears a bit in a bitmap.
3287 *
3288 * @returns true if the bit was set.
3289 * @returns false if the bit was clear.
3290 * @param pvBitmap Pointer to the bitmap.
3291 * @param iBit The bit to test and clear.
3292 * @remark No memory barrier, take care on smp.
3293 */
3294#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3295DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3296#else
3297DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3298{
3299 union { bool f; uint32_t u32; uint8_t u8; } rc;
3300# if RT_INLINE_ASM_USES_INTRIN
3301 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3302
3303# elif RT_INLINE_ASM_GNU_STYLE
3304 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3305 "setc %b0\n\t"
3306 "andl $1, %0\n\t"
3307 : "=q" (rc.u32),
3308 "=m" (*(volatile long *)pvBitmap)
3309 : "Ir" (iBit)
3310 : "memory");
3311# else
3312 __asm
3313 {
3314 mov edx, [iBit]
3315# ifdef __AMD64__
3316 mov rax, [pvBitmap]
3317 lock btr [rax], edx
3318# else
3319 mov eax, [pvBitmap]
3320 lock btr [eax], edx
3321# endif
3322 setc al
3323 and eax, 1
3324 mov [rc.u32], eax
3325 }
3326# endif
3327 return rc.f;
3328}
3329#endif
3330
3331
3332/**
3333 * Tests and toggles a bit in a bitmap.
3334 *
3335 * @returns true if the bit was set.
3336 * @returns false if the bit was clear.
3337 * @param pvBitmap Pointer to the bitmap.
3338 * @param iBit The bit to test and toggle.
3339 */
3340#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3341DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3342#else
3343DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3344{
3345 union { bool f; uint32_t u32; uint8_t u8; } rc;
3346# if RT_INLINE_ASM_USES_INTRIN
3347 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3348
3349# elif RT_INLINE_ASM_GNU_STYLE
3350 __asm__ __volatile__ ("btcl %2, %1\n\t"
3351 "setc %b0\n\t"
3352 "andl $1, %0\n\t"
3353 : "=q" (rc.u32),
3354 "=m" (*(volatile long *)pvBitmap)
3355 : "Ir" (iBit)
3356 : "memory");
3357# else
3358 __asm
3359 {
3360 mov edx, [iBit]
3361# ifdef __AMD64__
3362 mov rax, [pvBitmap]
3363 btc [rax], edx
3364# else
3365 mov eax, [pvBitmap]
3366 btc [eax], edx
3367# endif
3368 setc al
3369 and eax, 1
3370 mov [rc.u32], eax
3371 }
3372# endif
3373 return rc.f;
3374}
3375#endif
3376
3377
3378/**
3379 * Atomically tests and toggles a bit in a bitmap.
3380 *
3381 * @returns true if the bit was set.
3382 * @returns false if the bit was clear.
3383 * @param pvBitmap Pointer to the bitmap.
3384 * @param iBit The bit to test and toggle.
3385 */
3386#if RT_INLINE_ASM_EXTERNAL
3387DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3388#else
3389DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3390{
3391 union { bool f; uint32_t u32; uint8_t u8; } rc;
3392# if RT_INLINE_ASM_GNU_STYLE
3393 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3394 "setc %b0\n\t"
3395 "andl $1, %0\n\t"
3396 : "=q" (rc.u32),
3397 "=m" (*(volatile long *)pvBitmap)
3398 : "Ir" (iBit)
3399 : "memory");
3400# else
3401 __asm
3402 {
3403 mov edx, [iBit]
3404# ifdef __AMD64__
3405 mov rax, [pvBitmap]
3406 lock btc [rax], edx
3407# else
3408 mov eax, [pvBitmap]
3409 lock btc [eax], edx
3410# endif
3411 setc al
3412 and eax, 1
3413 mov [rc.u32], eax
3414 }
3415# endif
3416 return rc.f;
3417}
3418#endif
3419
3420
3421/**
3422 * Tests if a bit in a bitmap is set.
3423 *
3424 * @returns true if the bit is set.
3425 * @returns false if the bit is clear.
3426 * @param pvBitmap Pointer to the bitmap.
3427 * @param iBit The bit to test.
3428 */
3429#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3430DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3431#else
3432DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3433{
3434 union { bool f; uint32_t u32; uint8_t u8; } rc;
3435# if RT_INLINE_ASM_USES_INTRIN
3436 rc.u32 = _bittest((long *)pvBitmap, iBit);
3437# elif RT_INLINE_ASM_GNU_STYLE
3438
3439 __asm__ __volatile__ ("btl %2, %1\n\t"
3440 "setc %b0\n\t"
3441 "andl $1, %0\n\t"
3442 : "=q" (rc.u32),
3443 "=m" (*(volatile long *)pvBitmap)
3444 : "Ir" (iBit)
3445 : "memory");
3446# else
3447 __asm
3448 {
3449 mov edx, [iBit]
3450# ifdef __AMD64__
3451 mov rax, [pvBitmap]
3452 bt [rax], edx
3453# else
3454 mov eax, [pvBitmap]
3455 bt [eax], edx
3456# endif
3457 setc al
3458 and eax, 1
3459 mov [rc.u32], eax
3460 }
3461# endif
3462 return rc.f;
3463}
3464#endif
3465
3466
3467/**
3468 * Clears a bit range within a bitmap.
3469 *
3470 * @param pvBitmap Pointer to the bitmap.
3471 * @param iBitStart The First bit to clear.
3472 * @param iBitEnd The first bit not to clear.
3473 */
3474DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3475{
3476 if (iBitStart < iBitEnd)
3477 {
3478 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3479 int iStart = iBitStart & ~31;
3480 int iEnd = iBitEnd & ~31;
3481 if (iStart == iEnd)
3482 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3483 else
3484 {
3485 /* bits in first dword. */
3486 if (iBitStart & 31)
3487 {
3488 *pu32 &= (1 << (iBitStart & 31)) - 1;
3489 pu32++;
3490 iBitStart = iStart + 32;
3491 }
3492
3493 /* whole dword. */
3494 if (iBitStart != iEnd)
3495 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3496
3497 /* bits in last dword. */
3498 if (iBitEnd & 31)
3499 {
3500 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3501 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3502 }
3503 }
3504 }
3505}
3506
3507
3508/**
3509 * Finds the first clear bit in a bitmap.
3510 *
3511 * @returns Index of the first zero bit.
3512 * @returns -1 if no clear bit was found.
3513 * @param pvBitmap Pointer to the bitmap.
3514 * @param cBits The number of bits in the bitmap. Multiple of 32.
3515 */
3516#if RT_INLINE_ASM_EXTERNAL
3517DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3518#else
3519DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3520{
3521 if (cBits)
3522 {
3523 int32_t iBit;
3524# if RT_INLINE_ASM_GNU_STYLE
3525 RTCCUINTREG uEAX, uECX, uEDI;
3526 cBits = RT_ALIGN_32(cBits, 32);
3527 __asm__ __volatile__("repe; scasl\n\t"
3528 "je 1f\n\t"
3529# ifdef __AMD64__
3530 "lea -4(%%rdi), %%rdi\n\t"
3531 "xorl (%%rdi), %%eax\n\t"
3532 "subq %5, %%rdi\n\t"
3533# else
3534 "lea -4(%%edi), %%edi\n\t"
3535 "xorl (%%edi), %%eax\n\t"
3536 "subl %5, %%edi\n\t"
3537# endif
3538 "shll $3, %%edi\n\t"
3539 "bsfl %%eax, %%edx\n\t"
3540 "addl %%edi, %%edx\n\t"
3541 "1:\t\n"
3542 : "=d" (iBit),
3543 "=&c" (uECX),
3544 "=&D" (uEDI),
3545 "=&a" (uEAX)
3546 : "0" (0xffffffff),
3547 "mr" (pvBitmap),
3548 "1" (cBits >> 5),
3549 "2" (pvBitmap),
3550 "3" (0xffffffff));
3551# else
3552 cBits = RT_ALIGN_32(cBits, 32);
3553 __asm
3554 {
3555# ifdef __AMD64__
3556 mov rdi, [pvBitmap]
3557 mov rbx, rdi
3558# else
3559 mov edi, [pvBitmap]
3560 mov ebx, edi
3561# endif
3562 mov edx, 0ffffffffh
3563 mov eax, edx
3564 mov ecx, [cBits]
3565 shr ecx, 5
3566 repe scasd
3567 je done
3568
3569# ifdef __AMD64__
3570 lea rdi, [rdi - 4]
3571 xor eax, [rdi]
3572 sub rdi, rbx
3573# else
3574 lea edi, [edi - 4]
3575 xor eax, [edi]
3576 sub edi, ebx
3577# endif
3578 shl edi, 3
3579 bsf edx, eax
3580 add edx, edi
3581 done:
3582 mov [iBit], edx
3583 }
3584# endif
3585 return iBit;
3586 }
3587 return -1;
3588}
3589#endif
3590
3591
3592/**
3593 * Finds the next clear bit in a bitmap.
3594 *
3595 * @returns Index of the first zero bit.
3596 * @returns -1 if no clear bit was found.
3597 * @param pvBitmap Pointer to the bitmap.
3598 * @param cBits The number of bits in the bitmap. Multiple of 32.
3599 * @param iBitPrev The bit returned from the last search.
3600 * The search will start at iBitPrev + 1.
3601 */
3602#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3603DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3604#else
3605DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3606{
3607 int iBit = ++iBitPrev & 31;
3608 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3609 cBits -= iBitPrev & ~31;
3610 if (iBit)
3611 {
3612 /* inspect the first dword. */
3613 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3614# if RT_INLINE_ASM_USES_INTRIN
3615 unsigned long ulBit = 0;
3616 if (_BitScanForward(&ulBit, u32))
3617 return ulBit + iBitPrev;
3618 iBit = -1;
3619# else
3620# if RT_INLINE_ASM_GNU_STYLE
3621 __asm__ __volatile__("bsf %1, %0\n\t"
3622 "jnz 1f\n\t"
3623 "movl $-1, %0\n\t"
3624 "1:\n\t"
3625 : "=r" (iBit)
3626 : "r" (u32));
3627# else
3628 __asm
3629 {
3630 mov edx, [u32]
3631 bsf eax, edx
3632 jnz done
3633 mov eax, 0ffffffffh
3634 done:
3635 mov [iBit], eax
3636 }
3637# endif
3638 if (iBit >= 0)
3639 return iBit + iBitPrev;
3640# endif
3641 /* Search the rest of the bitmap, if there is anything. */
3642 if (cBits > 32)
3643 {
3644 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3645 if (iBit >= 0)
3646 return iBit + (iBitPrev & ~31) + 32;
3647 }
3648 }
3649 else
3650 {
3651 /* Search the rest of the bitmap. */
3652 iBit = ASMBitFirstClear(pvBitmap, cBits);
3653 if (iBit >= 0)
3654 return iBit + (iBitPrev & ~31);
3655 }
3656 return iBit;
3657}
3658#endif
3659
3660
3661/**
3662 * Finds the first set bit in a bitmap.
3663 *
3664 * @returns Index of the first set bit.
3665 * @returns -1 if no clear bit was found.
3666 * @param pvBitmap Pointer to the bitmap.
3667 * @param cBits The number of bits in the bitmap. Multiple of 32.
3668 */
3669#if RT_INLINE_ASM_EXTERNAL
3670DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3671#else
3672DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3673{
3674 if (cBits)
3675 {
3676 int32_t iBit;
3677# if RT_INLINE_ASM_GNU_STYLE
3678 RTCCUINTREG uEAX, uECX, uEDI;
3679 cBits = RT_ALIGN_32(cBits, 32);
3680 __asm__ __volatile__("repe; scasl\n\t"
3681 "je 1f\n\t"
3682# ifdef __AMD64__
3683 "lea -4(%%rdi), %%rdi\n\t"
3684 "movl (%%rdi), %%eax\n\t"
3685 "subq %5, %%rdi\n\t"
3686# else
3687 "lea -4(%%edi), %%edi\n\t"
3688 "movl (%%edi), %%eax\n\t"
3689 "subl %5, %%edi\n\t"
3690# endif
3691 "shll $3, %%edi\n\t"
3692 "bsfl %%eax, %%edx\n\t"
3693 "addl %%edi, %%edx\n\t"
3694 "1:\t\n"
3695 : "=d" (iBit),
3696 "=&c" (uECX),
3697 "=&D" (uEDI),
3698 "=&a" (uEAX)
3699 : "0" (0xffffffff),
3700 "mr" (pvBitmap),
3701 "1" (cBits >> 5),
3702 "2" (pvBitmap),
3703 "3" (0));
3704# else
3705 cBits = RT_ALIGN_32(cBits, 32);
3706 __asm
3707 {
3708# ifdef __AMD64__
3709 mov rdi, [pvBitmap]
3710 mov rbx, rdi
3711# else
3712 mov edi, [pvBitmap]
3713 mov ebx, edi
3714# endif
3715 mov edx, 0ffffffffh
3716 xor eax, eax
3717 mov ecx, [cBits]
3718 shr ecx, 5
3719 repe scasd
3720 je done
3721# ifdef __AMD64__
3722 lea rdi, [rdi - 4]
3723 mov eax, [rdi]
3724 sub rdi, rbx
3725# else
3726 lea edi, [edi - 4]
3727 mov eax, [edi]
3728 sub edi, ebx
3729# endif
3730 shl edi, 3
3731 bsf edx, eax
3732 add edx, edi
3733 done:
3734 mov [iBit], edx
3735 }
3736# endif
3737 return iBit;
3738 }
3739 return -1;
3740}
3741#endif
3742
3743
3744/**
3745 * Finds the next set bit in a bitmap.
3746 *
3747 * @returns Index of the next set bit.
3748 * @returns -1 if no set bit was found.
3749 * @param pvBitmap Pointer to the bitmap.
3750 * @param cBits The number of bits in the bitmap. Multiple of 32.
3751 * @param iBitPrev The bit returned from the last search.
3752 * The search will start at iBitPrev + 1.
3753 */
3754#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3755DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3756#else
3757DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3758{
3759 int iBit = ++iBitPrev & 31;
3760 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3761 cBits -= iBitPrev & ~31;
3762 if (iBit)
3763 {
3764 /* inspect the first dword. */
3765 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3766# if RT_INLINE_ASM_USES_INTRIN
3767 unsigned long ulBit = 0;
3768 if (_BitScanForward(&ulBit, u32))
3769 return ulBit + iBitPrev;
3770 iBit = -1;
3771# else
3772# if RT_INLINE_ASM_GNU_STYLE
3773 __asm__ __volatile__("bsf %1, %0\n\t"
3774 "jnz 1f\n\t"
3775 "movl $-1, %0\n\t"
3776 "1:\n\t"
3777 : "=r" (iBit)
3778 : "r" (u32));
3779# else
3780 __asm
3781 {
3782 mov edx, u32
3783 bsf eax, edx
3784 jnz done
3785 mov eax, 0ffffffffh
3786 done:
3787 mov [iBit], eax
3788 }
3789# endif
3790 if (iBit >= 0)
3791 return iBit + iBitPrev;
3792# endif
3793 /* Search the rest of the bitmap, if there is anything. */
3794 if (cBits > 32)
3795 {
3796 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3797 if (iBit >= 0)
3798 return iBit + (iBitPrev & ~31) + 32;
3799 }
3800
3801 }
3802 else
3803 {
3804 /* Search the rest of the bitmap. */
3805 iBit = ASMBitFirstSet(pvBitmap, cBits);
3806 if (iBit >= 0)
3807 return iBit + (iBitPrev & ~31);
3808 }
3809 return iBit;
3810}
3811#endif
3812
3813
3814/**
3815 * Finds the first bit which is set in the given 32-bit integer.
3816 * Bits are numbered from 1 (least significant) to 32.
3817 *
3818 * @returns index [1..32] of the first set bit.
3819 * @returns 0 if all bits are cleared.
3820 * @param u32 Integer to search for set bits.
3821 * @remark Similar to ffs() in BSD.
3822 */
3823DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3824{
3825# if RT_INLINE_ASM_USES_INTRIN
3826 unsigned long iBit;
3827 if (_BitScanForward(&iBit, u32))
3828 iBit++;
3829 else
3830 iBit = 0;
3831# elif RT_INLINE_ASM_GNU_STYLE
3832 uint32_t iBit;
3833 __asm__ __volatile__("bsf %1, %0\n\t"
3834 "jnz 1f\n\t"
3835 "xorl %0, %0\n\t"
3836 "jmp 2f\n"
3837 "1:\n\t"
3838 "incl %0\n"
3839 "2:\n\t"
3840 : "=r" (iBit)
3841 : "rm" (u32));
3842# else
3843 uint32_t iBit;
3844 _asm
3845 {
3846 bsf eax, [u32]
3847 jnz found
3848 xor eax, eax
3849 jmp done
3850 found:
3851 inc eax
3852 done:
3853 mov [iBit], eax
3854 }
3855# endif
3856 return iBit;
3857}
3858
3859
3860/**
3861 * Finds the first bit which is set in the given 32-bit integer.
3862 * Bits are numbered from 1 (least significant) to 32.
3863 *
3864 * @returns index [1..32] of the first set bit.
3865 * @returns 0 if all bits are cleared.
3866 * @param i32 Integer to search for set bits.
3867 * @remark Similar to ffs() in BSD.
3868 */
3869DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
3870{
3871 return ASMBitFirstSetU32((uint32_t)i32);
3872}
3873
3874
3875/**
3876 * Finds the last bit which is set in the given 32-bit integer.
3877 * Bits are numbered from 1 (least significant) to 32.
3878 *
3879 * @returns index [1..32] of the last set bit.
3880 * @returns 0 if all bits are cleared.
3881 * @param u32 Integer to search for set bits.
3882 * @remark Similar to fls() in BSD.
3883 */
3884DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
3885{
3886# if RT_INLINE_ASM_USES_INTRIN
3887 unsigned long iBit;
3888 if (_BitScanReverse(&iBit, u32))
3889 iBit++;
3890 else
3891 iBit = 0;
3892# elif RT_INLINE_ASM_GNU_STYLE
3893 uint32_t iBit;
3894 __asm__ __volatile__("bsrl %1, %0\n\t"
3895 "jnz 1f\n\t"
3896 "xorl %0, %0\n\t"
3897 "jmp 2f\n"
3898 "1:\n\t"
3899 "incl %0\n"
3900 "2:\n\t"
3901 : "=r" (iBit)
3902 : "rm" (u32));
3903# else
3904 uint32_t iBit;
3905 _asm
3906 {
3907 bsr eax, [u32]
3908 jnz found
3909 xor eax, eax
3910 jmp done
3911 found:
3912 inc eax
3913 done:
3914 mov [iBit], eax
3915 }
3916# endif
3917 return iBit;
3918}
3919
3920
3921/**
3922 * Finds the last bit which is set in the given 32-bit integer.
3923 * Bits are numbered from 1 (least significant) to 32.
3924 *
3925 * @returns index [1..32] of the last set bit.
3926 * @returns 0 if all bits are cleared.
3927 * @param i32 Integer to search for set bits.
3928 * @remark Similar to fls() in BSD.
3929 */
3930DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
3931{
3932 return ASMBitLastSetS32((uint32_t)i32);
3933}
3934
3935
3936/**
3937 * Reverse the byte order of the given 32-bit integer.
3938 * @param u32 Integer
3939 */
3940DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
3941{
3942#if RT_INLINE_ASM_USES_INTRIN
3943 u32 = _byteswap_ulong(u32);
3944#elif RT_INLINE_ASM_GNU_STYLE
3945 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
3946#else
3947 _asm
3948 {
3949 mov eax, [u32]
3950 bswap eax
3951 mov [u32], eax
3952 }
3953#endif
3954 return u32;
3955}
3956
3957/** @} */
3958
3959
3960/** @} */
3961#endif
3962
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette