VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 2257

Last change on this file since 2257 was 2252, checked in by vboxsync, 18 years ago

killed MSC warning.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 99.7 KB
Line 
1/** @file
2 * InnoTek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef __iprt_asm_h__
22#define __iprt_asm_h__
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26/** @todo #include <iprt/param.h> for PAGE_SIZE. */
27/** @def RT_INLINE_ASM_USES_INTRIN
28 * Defined as 1 if we're using a _MSC_VER 1400.
29 * Otherwise defined as 0.
30 */
31
32#ifdef _MSC_VER
33# if _MSC_VER >= 1400
34# define RT_INLINE_ASM_USES_INTRIN 1
35# include <intrin.h>
36 /* Emit the intrinsics at all optimization levels. */
37# pragma intrinsic(__cpuid)
38# pragma intrinsic(_enable)
39# pragma intrinsic(_disable)
40# pragma intrinsic(__rdtsc)
41# pragma intrinsic(__readmsr)
42# pragma intrinsic(__writemsr)
43# pragma intrinsic(__outbyte)
44# pragma intrinsic(__outword)
45# pragma intrinsic(__outdword)
46# pragma intrinsic(__inbyte)
47# pragma intrinsic(__inword)
48# pragma intrinsic(__indword)
49# pragma intrinsic(__invlpg)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(__readcr0)
54# pragma intrinsic(__readcr2)
55# pragma intrinsic(__readcr3)
56# pragma intrinsic(__readcr4)
57# pragma intrinsic(__writecr0)
58# pragma intrinsic(__writecr3)
59# pragma intrinsic(__writecr4)
60# pragma intrinsic(_BitScanForward)
61# pragma intrinsic(_BitScanReverse)
62# pragma intrinsic(_bittest)
63# pragma intrinsic(_bittestandset)
64# pragma intrinsic(_bittestandreset)
65# pragma intrinsic(_bittestandcomplement)
66# pragma intrinsic(_byteswap_ushort)
67# pragma intrinsic(_byteswap_ulong)
68# pragma intrinsic(_interlockedbittestandset)
69# pragma intrinsic(_interlockedbittestandreset)
70# pragma intrinsic(_InterlockedAnd)
71# pragma intrinsic(_InterlockedOr)
72# pragma intrinsic(_InterlockedIncrement)
73# pragma intrinsic(_InterlockedDecrement)
74# pragma intrinsic(_InterlockedExchange)
75# pragma intrinsic(_InterlockedCompareExchange)
76# pragma intrinsic(_InterlockedCompareExchange64)
77# ifdef __AMD64__
78# pragma intrinsic(__stosq)
79# pragma intrinsic(__readcr8)
80# pragma intrinsic(__writecr8)
81# pragma intrinsic(_byteswap_uint64)
82# pragma intrinsic(_InterlockedExchange64)
83# endif
84# endif
85#endif
86#ifndef RT_INLINE_ASM_USES_INTRIN
87# define RT_INLINE_ASM_USES_INTRIN 0
88#endif
89
90
91
92/** @defgroup grp_asm ASM - Assembly Routines
93 * @ingroup grp_rt
94 * @{
95 */
96
97/** @def RT_INLINE_ASM_EXTERNAL
98 * Defined as 1 if the compiler does not support inline assembly.
99 * The ASM* functions will then be implemented in an external .asm file.
100 *
101 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
102 * inline assmebly in their AMD64 compiler.
103 */
104#if defined(_MSC_VER) && defined(__AMD64__)
105# define RT_INLINE_ASM_EXTERNAL 1
106#else
107# define RT_INLINE_ASM_EXTERNAL 0
108#endif
109
110/** @def RT_INLINE_ASM_GNU_STYLE
111 * Defined as 1 if the compiler understand GNU style inline assembly.
112 */
113#if defined(_MSC_VER)
114# define RT_INLINE_ASM_GNU_STYLE 0
115#else
116# define RT_INLINE_ASM_GNU_STYLE 1
117#endif
118
119
120/** @todo find a more proper place for this structure? */
121#pragma pack(1)
122/** IDTR */
123typedef struct RTIDTR
124{
125 /** Size of the IDT. */
126 uint16_t cbIdt;
127 /** Address of the IDT. */
128 uintptr_t pIdt;
129} RTIDTR, *PRTIDTR;
130#pragma pack()
131
132#pragma pack(1)
133/** GDTR */
134typedef struct RTGDTR
135{
136 /** Size of the GDT. */
137 uint16_t cbGdt;
138 /** Address of the GDT. */
139 uintptr_t pGdt;
140} RTGDTR, *PRTGDTR;
141#pragma pack()
142
143
144/** @def ASMReturnAddress
145 * Gets the return address of the current (or calling if you like) function or method.
146 */
147#ifdef _MSC_VER
148# ifdef __cplusplus
149extern "C"
150# endif
151void * _ReturnAddress(void);
152# pragma intrinsic(_ReturnAddress)
153# define ASMReturnAddress() _ReturnAddress()
154#elif defined(__GNUC__) || defined(__DOXYGEN__)
155# define ASMReturnAddress() __builtin_return_address(0)
156#else
157# error "Unsupported compiler."
158#endif
159
160
161/**
162 * Gets the content of the IDTR CPU register.
163 * @param pIdtr Where to store the IDTR contents.
164 */
165#if RT_INLINE_ASM_EXTERNAL
166DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
167#else
168DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
169{
170# if RT_INLINE_ASM_GNU_STYLE
171 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
172# else
173 __asm
174 {
175# ifdef __AMD64__
176 mov rax, [pIdtr]
177 sidt [rax]
178# else
179 mov eax, [pIdtr]
180 sidt [eax]
181# endif
182 }
183# endif
184}
185#endif
186
187
188/**
189 * Sets the content of the IDTR CPU register.
190 * @param pIdtr Where to load the IDTR contents from
191 */
192#if RT_INLINE_ASM_EXTERNAL
193DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
194#else
195DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
196{
197# if RT_INLINE_ASM_GNU_STYLE
198 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
199# else
200 __asm
201 {
202# ifdef __AMD64__
203 mov rax, [pIdtr]
204 lidt [rax]
205# else
206 mov eax, [pIdtr]
207 lidt [eax]
208# endif
209 }
210# endif
211}
212#endif
213
214
215/**
216 * Gets the content of the GDTR CPU register.
217 * @param pGdtr Where to store the GDTR contents.
218 */
219#if RT_INLINE_ASM_EXTERNAL
220DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
221#else
222DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
223{
224# if RT_INLINE_ASM_GNU_STYLE
225 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
226# else
227 __asm
228 {
229# ifdef __AMD64__
230 mov rax, [pGdtr]
231 sgdt [rax]
232# else
233 mov eax, [pGdtr]
234 sgdt [eax]
235# endif
236 }
237# endif
238}
239#endif
240
241/**
242 * Get the cs register.
243 * @returns cs.
244 */
245#if RT_INLINE_ASM_EXTERNAL
246DECLASM(RTSEL) ASMGetCS(void);
247#else
248DECLINLINE(RTSEL) ASMGetCS(void)
249{
250 RTSEL SelCS;
251# if RT_INLINE_ASM_GNU_STYLE
252 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
253# else
254 __asm
255 {
256 mov ax, cs
257 mov [SelCS], ax
258 }
259# endif
260 return SelCS;
261}
262#endif
263
264
265/**
266 * Get the DS register.
267 * @returns DS.
268 */
269#if RT_INLINE_ASM_EXTERNAL
270DECLASM(RTSEL) ASMGetDS(void);
271#else
272DECLINLINE(RTSEL) ASMGetDS(void)
273{
274 RTSEL SelDS;
275# if RT_INLINE_ASM_GNU_STYLE
276 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
277# else
278 __asm
279 {
280 mov ax, ds
281 mov [SelDS], ax
282 }
283# endif
284 return SelDS;
285}
286#endif
287
288
289/**
290 * Get the ES register.
291 * @returns ES.
292 */
293#if RT_INLINE_ASM_EXTERNAL
294DECLASM(RTSEL) ASMGetES(void);
295#else
296DECLINLINE(RTSEL) ASMGetES(void)
297{
298 RTSEL SelES;
299# if RT_INLINE_ASM_GNU_STYLE
300 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
301# else
302 __asm
303 {
304 mov ax, es
305 mov [SelES], ax
306 }
307# endif
308 return SelES;
309}
310#endif
311
312
313/**
314 * Get the FS register.
315 * @returns FS.
316 */
317#if RT_INLINE_ASM_EXTERNAL
318DECLASM(RTSEL) ASMGetFS(void);
319#else
320DECLINLINE(RTSEL) ASMGetFS(void)
321{
322 RTSEL SelFS;
323# if RT_INLINE_ASM_GNU_STYLE
324 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
325# else
326 __asm
327 {
328 mov ax, fs
329 mov [SelFS], ax
330 }
331# endif
332 return SelFS;
333}
334# endif
335
336
337/**
338 * Get the GS register.
339 * @returns GS.
340 */
341#if RT_INLINE_ASM_EXTERNAL
342DECLASM(RTSEL) ASMGetGS(void);
343#else
344DECLINLINE(RTSEL) ASMGetGS(void)
345{
346 RTSEL SelGS;
347# if RT_INLINE_ASM_GNU_STYLE
348 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
349# else
350 __asm
351 {
352 mov ax, gs
353 mov [SelGS], ax
354 }
355# endif
356 return SelGS;
357}
358#endif
359
360
361/**
362 * Get the SS register.
363 * @returns SS.
364 */
365#if RT_INLINE_ASM_EXTERNAL
366DECLASM(RTSEL) ASMGetSS(void);
367#else
368DECLINLINE(RTSEL) ASMGetSS(void)
369{
370 RTSEL SelSS;
371# if RT_INLINE_ASM_GNU_STYLE
372 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
373# else
374 __asm
375 {
376 mov ax, ss
377 mov [SelSS], ax
378 }
379# endif
380 return SelSS;
381}
382#endif
383
384
385/**
386 * Get the TR register.
387 * @returns TR.
388 */
389#if RT_INLINE_ASM_EXTERNAL
390DECLASM(RTSEL) ASMGetTR(void);
391#else
392DECLINLINE(RTSEL) ASMGetTR(void)
393{
394 RTSEL SelTR;
395# if RT_INLINE_ASM_GNU_STYLE
396 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
397# else
398 __asm
399 {
400 str ax
401 mov [SelTR], ax
402 }
403# endif
404 return SelTR;
405}
406#endif
407
408
409/**
410 * Get the [RE]FLAGS register.
411 * @returns [RE]FLAGS.
412 */
413#if RT_INLINE_ASM_EXTERNAL
414DECLASM(RTCCUINTREG) ASMGetFlags(void);
415#else
416DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
417{
418 RTCCUINTREG uFlags;
419# if RT_INLINE_ASM_GNU_STYLE
420# ifdef __AMD64__
421 __asm__ __volatile__("pushfq\n\t"
422 "popq %0\n\t"
423 : "=m" (uFlags));
424# else
425 __asm__ __volatile__("pushfl\n\t"
426 "popl %0\n\t"
427 : "=m" (uFlags));
428# endif
429# else
430 __asm
431 {
432# ifdef __AMD64__
433 pushfq
434 pop [uFlags]
435# else
436 pushfd
437 pop [uFlags]
438# endif
439 }
440# endif
441 return uFlags;
442}
443#endif
444
445
446/**
447 * Set the [RE]FLAGS register.
448 * @param uFlags The new [RE]FLAGS value.
449 */
450#if RT_INLINE_ASM_EXTERNAL
451DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
452#else
453DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
454{
455# if RT_INLINE_ASM_GNU_STYLE
456# ifdef __AMD64__
457 __asm__ __volatile__("pushq %0\n\t"
458 "popfq\n\t"
459 : : "m" (uFlags));
460# else
461 __asm__ __volatile__("pushl %0\n\t"
462 "popfl\n\t"
463 : : "m" (uFlags));
464# endif
465# else
466 __asm
467 {
468# ifdef __AMD64__
469 push [uFlags]
470 popfq
471# else
472 push [uFlags]
473 popfd
474# endif
475 }
476# endif
477}
478#endif
479
480
481/**
482 * Gets the content of the CPU timestamp counter register.
483 *
484 * @returns TSC.
485 */
486#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
487DECLASM(uint64_t) ASMReadTSC(void);
488#else
489DECLINLINE(uint64_t) ASMReadTSC(void)
490{
491 RTUINT64U u;
492# if RT_INLINE_ASM_GNU_STYLE
493 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
494# else
495# if RT_INLINE_ASM_USES_INTRIN
496 u.u = __rdtsc();
497# else
498 __asm
499 {
500 rdtsc
501 mov [u.s.Lo], eax
502 mov [u.s.Hi], edx
503 }
504# endif
505# endif
506 return u.u;
507}
508#endif
509
510
511/**
512 * Performs the cpuid instruction returning all registers.
513 *
514 * @param uOperator CPUID operation (eax).
515 * @param pvEAX Where to store eax.
516 * @param pvEBX Where to store ebx.
517 * @param pvECX Where to store ecx.
518 * @param pvEDX Where to store edx.
519 * @remark We're using void pointers to ease the use of special bitfield structures and such.
520 */
521#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
522DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
523#else
524DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
525{
526# if RT_INLINE_ASM_GNU_STYLE
527# ifdef __AMD64__
528 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
529 __asm__ ("cpuid\n\t"
530 : "=a" (uRAX),
531 "=b" (uRBX),
532 "=c" (uRCX),
533 "=d" (uRDX)
534 : "0" (uOperator));
535 *(uint32_t *)pvEAX = (uint32_t)uRAX;
536 *(uint32_t *)pvEBX = (uint32_t)uRBX;
537 *(uint32_t *)pvECX = (uint32_t)uRCX;
538 *(uint32_t *)pvEDX = (uint32_t)uRDX;
539# else
540 __asm__ ("xchgl %%ebx, %1\n\t"
541 "cpuid\n\t"
542 "xchgl %%ebx, %1\n\t"
543 : "=a" (*(uint32_t *)pvEAX),
544 "=r" (*(uint32_t *)pvEBX),
545 "=c" (*(uint32_t *)pvECX),
546 "=d" (*(uint32_t *)pvEDX)
547 : "0" (uOperator));
548# endif
549
550# elif RT_INLINE_ASM_USES_INTRIN
551 int aInfo[4];
552 __cpuid(aInfo, uOperator);
553 *(uint32_t *)pvEAX = aInfo[0];
554 *(uint32_t *)pvEBX = aInfo[1];
555 *(uint32_t *)pvECX = aInfo[2];
556 *(uint32_t *)pvEDX = aInfo[3];
557
558# else
559 uint32_t uEAX;
560 uint32_t uEBX;
561 uint32_t uECX;
562 uint32_t uEDX;
563 __asm
564 {
565 push ebx
566 mov eax, [uOperator]
567 cpuid
568 mov [uEAX], eax
569 mov [uEBX], ebx
570 mov [uECX], ecx
571 mov [uEDX], edx
572 pop ebx
573 }
574 *(uint32_t *)pvEAX = uEAX;
575 *(uint32_t *)pvEBX = uEBX;
576 *(uint32_t *)pvECX = uECX;
577 *(uint32_t *)pvEDX = uEDX;
578# endif
579}
580#endif
581
582
583/**
584 * Performs the cpuid instruction returning ecx and edx.
585 *
586 * @param uOperator CPUID operation (eax).
587 * @param pvECX Where to store ecx.
588 * @param pvEDX Where to store edx.
589 * @remark We're using void pointers to ease the use of special bitfield structures and such.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
593#else
594DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
595{
596 uint32_t uEBX;
597 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
598}
599#endif
600
601
602/**
603 * Performs the cpuid instruction returning edx.
604 *
605 * @param uOperator CPUID operation (eax).
606 * @returns EDX after cpuid operation.
607 */
608#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
609DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
610#else
611DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
612{
613 RTCCUINTREG xDX;
614# if RT_INLINE_ASM_GNU_STYLE
615# ifdef __AMD64__
616 RTCCUINTREG uSpill;
617 __asm__ ("cpuid"
618 : "=a" (uSpill),
619 "=d" (xDX)
620 : "0" (uOperator)
621 : "rbx", "rcx");
622# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: PIC by default. */
623 __asm__ ("push %%ebx\n\t"
624 "cpuid\n\t"
625 "pop %%ebx\n\t"
626 : "=a" (uOperator),
627 "=d" (xDX)
628 : "0" (uOperator)
629 : "ecx");
630# else
631 __asm__ ("cpuid"
632 : "=a" (uOperator),
633 "=d" (xDX)
634 : "0" (uOperator)
635 : "ebx", "ecx");
636# endif
637
638# elif RT_INLINE_ASM_USES_INTRIN
639 int aInfo[4];
640 __cpuid(aInfo, uOperator);
641 xDX = aInfo[3];
642
643# else
644 __asm
645 {
646 push ebx
647 mov eax, [uOperator]
648 cpuid
649 mov [xDX], edx
650 pop ebx
651 }
652# endif
653 return (uint32_t)xDX;
654}
655#endif
656
657
658/**
659 * Performs the cpuid instruction returning ecx.
660 *
661 * @param uOperator CPUID operation (eax).
662 * @returns ECX after cpuid operation.
663 */
664#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
665DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
666#else
667DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
668{
669 RTCCUINTREG xCX;
670# if RT_INLINE_ASM_GNU_STYLE
671# ifdef __AMD64__
672 RTCCUINTREG uSpill;
673 __asm__ ("cpuid"
674 : "=a" (uSpill),
675 "=c" (xCX)
676 : "0" (uOperator)
677 : "rbx", "rdx");
678# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
679 __asm__ ("push %%ebx\n\t"
680 "cpuid\n\t"
681 "pop %%ebx\n\t"
682 : "=a" (uOperator),
683 "=c" (xCX)
684 : "0" (uOperator)
685 : "edx");
686# else
687 __asm__ ("cpuid"
688 : "=a" (uOperator),
689 "=c" (xCX)
690 : "0" (uOperator)
691 : "ebx", "edx");
692
693# endif
694
695# elif RT_INLINE_ASM_USES_INTRIN
696 int aInfo[4];
697 __cpuid(aInfo, uOperator);
698 xCX = aInfo[2];
699
700# else
701 __asm
702 {
703 push ebx
704 mov eax, [uOperator]
705 cpuid
706 mov [xCX], ecx
707 pop ebx
708 }
709# endif
710 return (uint32_t)xCX;
711}
712#endif
713
714
715/**
716 * Checks if the current CPU supports CPUID.
717 *
718 * @returns true if CPUID is supported.
719 */
720DECLINLINE(bool) ASMHasCpuId(void)
721{
722#ifdef __AMD64__
723 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
724#else /* !__AMD64__ */
725 bool fRet = false;
726# if RT_INLINE_ASM_GNU_STYLE
727 uint32_t u1;
728 uint32_t u2;
729 __asm__ ("pushf\n\t"
730 "pop %1\n\t"
731 "mov %1, %2\n\t"
732 "xorl $0x200000, %1\n\t"
733 "push %1\n\t"
734 "popf\n\t"
735 "pushf\n\t"
736 "pop %1\n\t"
737 "cmpl %1, %2\n\t"
738 "setne %0\n\t"
739 "push %2\n\t"
740 "popf\n\t"
741 : "=m" (fRet), "=r" (u1), "=r" (u2));
742# else
743 __asm
744 {
745 pushfd
746 pop eax
747 mov ebx, eax
748 xor eax, 0200000h
749 push eax
750 popfd
751 pushfd
752 pop eax
753 cmp eax, ebx
754 setne fRet
755 push ebx
756 popfd
757 }
758# endif
759 return fRet;
760#endif /* !__AMD64__ */
761}
762
763
764/**
765 * Gets the APIC ID of the current CPU.
766 *
767 * @returns the APIC ID.
768 */
769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
770DECLASM(uint8_t) ASMGetApicId(void);
771#else
772DECLINLINE(uint8_t) ASMGetApicId(void)
773{
774 RTCCUINTREG xBX;
775# if RT_INLINE_ASM_GNU_STYLE
776# ifdef __AMD64__
777 RTCCUINTREG uSpill;
778 __asm__ ("cpuid"
779 : "=a" (uSpill),
780 "=b" (xBX)
781 : "0" (1)
782 : "rcx", "rdx");
783# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__)
784 RTCCUINTREG uSpill;
785 __asm__ ("mov %%ebx,%1\n\t"
786 "cpuid\n\t"
787 "xchgl %%ebx,%1\n\t"
788 : "=a" (uSpill),
789 "=r" (xBX)
790 : "0" (1)
791 : "ecx", "edx");
792# else
793 RTCCUINTREG uSpill;
794 __asm__ ("cpuid"
795 : "=a" (uSpill),
796 "=b" (xBX)
797 : "0" (1)
798 : "ecx", "edx");
799# endif
800
801# elif RT_INLINE_ASM_USES_INTRIN
802 int aInfo[4];
803 __cpuid(aInfo, 1);
804 xBX = aInfo[1];
805
806# else
807 __asm
808 {
809 push ebx
810 mov eax, 1
811 cpuid
812 mov [xBX], ebx
813 pop ebx
814 }
815# endif
816 return (uint8_t)(xBX >> 24);
817}
818#endif
819
820/**
821 * Get cr0.
822 * @returns cr0.
823 */
824#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
825DECLASM(RTCCUINTREG) ASMGetCR0(void);
826#else
827DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
828{
829 RTCCUINTREG uCR0;
830# if RT_INLINE_ASM_USES_INTRIN
831 uCR0 = __readcr0();
832
833# elif RT_INLINE_ASM_GNU_STYLE
834# ifdef __AMD64__
835 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
836# else
837 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
838# endif
839# else
840 __asm
841 {
842# ifdef __AMD64__
843 mov rax, cr0
844 mov [uCR0], rax
845# else
846 mov eax, cr0
847 mov [uCR0], eax
848# endif
849 }
850# endif
851 return uCR0;
852}
853#endif
854
855
856/**
857 * Sets the CR0 register.
858 * @param uCR0 The new CR0 value.
859 */
860#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
861DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
862#else
863DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
864{
865# if RT_INLINE_ASM_USES_INTRIN
866 __writecr0(uCR0);
867
868# elif RT_INLINE_ASM_GNU_STYLE
869# ifdef __AMD64__
870 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
871# else
872 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
873# endif
874# else
875 __asm
876 {
877# ifdef __AMD64__
878 mov rax, [uCR0]
879 mov cr0, rax
880# else
881 mov eax, [uCR0]
882 mov cr0, eax
883# endif
884 }
885# endif
886}
887#endif
888
889
890/**
891 * Get cr2.
892 * @returns cr2.
893 */
894#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
895DECLASM(RTCCUINTREG) ASMGetCR2(void);
896#else
897DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
898{
899 RTCCUINTREG uCR2;
900# if RT_INLINE_ASM_USES_INTRIN
901 uCR2 = __readcr2();
902
903# elif RT_INLINE_ASM_GNU_STYLE
904# ifdef __AMD64__
905 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
906# else
907 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
908# endif
909# else
910 __asm
911 {
912# ifdef __AMD64__
913 mov rax, cr2
914 mov [uCR2], rax
915# else
916 mov eax, cr2
917 mov [uCR2], eax
918# endif
919 }
920# endif
921 return uCR2;
922}
923#endif
924
925
926/**
927 * Sets the CR2 register.
928 * @param uCR2 The new CR0 value.
929 */
930#if RT_INLINE_ASM_EXTERNAL
931DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
932#else
933DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
934{
935# if RT_INLINE_ASM_GNU_STYLE
936# ifdef __AMD64__
937 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
938# else
939 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
940# endif
941# else
942 __asm
943 {
944# ifdef __AMD64__
945 mov rax, [uCR2]
946 mov cr2, rax
947# else
948 mov eax, [uCR2]
949 mov cr2, eax
950# endif
951 }
952# endif
953}
954#endif
955
956
957/**
958 * Get cr3.
959 * @returns cr3.
960 */
961#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
962DECLASM(RTCCUINTREG) ASMGetCR3(void);
963#else
964DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
965{
966 RTCCUINTREG uCR3;
967# if RT_INLINE_ASM_USES_INTRIN
968 uCR3 = __readcr3();
969
970# elif RT_INLINE_ASM_GNU_STYLE
971# ifdef __AMD64__
972 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
973# else
974 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
975# endif
976# else
977 __asm
978 {
979# ifdef __AMD64__
980 mov rax, cr3
981 mov [uCR3], rax
982# else
983 mov eax, cr3
984 mov [uCR3], eax
985# endif
986 }
987# endif
988 return uCR3;
989}
990#endif
991
992
993/**
994 * Sets the CR3 register.
995 *
996 * @param uCR3 New CR3 value.
997 */
998#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
999DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1000#else
1001DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1002{
1003# if RT_INLINE_ASM_USES_INTRIN
1004 __writecr3(uCR3);
1005
1006# elif RT_INLINE_ASM_GNU_STYLE
1007# ifdef __AMD64__
1008 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1009# else
1010 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1011# endif
1012# else
1013 __asm
1014 {
1015# ifdef __AMD64__
1016 mov rax, [uCR3]
1017 mov cr3, rax
1018# else
1019 mov eax, [uCR3]
1020 mov cr3, eax
1021# endif
1022 }
1023# endif
1024}
1025#endif
1026
1027
1028/**
1029 * Reloads the CR3 register.
1030 */
1031#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1032DECLASM(void) ASMReloadCR3(void);
1033#else
1034DECLINLINE(void) ASMReloadCR3(void)
1035{
1036# if RT_INLINE_ASM_USES_INTRIN
1037 __writecr3(__readcr3());
1038
1039# elif RT_INLINE_ASM_GNU_STYLE
1040 RTCCUINTREG u;
1041# ifdef __AMD64__
1042 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1043 "movq %0, %%cr3\n\t"
1044 : "=r" (u));
1045# else
1046 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1047 "movl %0, %%cr3\n\t"
1048 : "=r" (u));
1049# endif
1050# else
1051 __asm
1052 {
1053# ifdef __AMD64__
1054 mov rax, cr3
1055 mov cr3, rax
1056# else
1057 mov eax, cr3
1058 mov cr3, eax
1059# endif
1060 }
1061# endif
1062}
1063#endif
1064
1065
1066/**
1067 * Get cr4.
1068 * @returns cr4.
1069 */
1070#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1071DECLASM(RTCCUINTREG) ASMGetCR4(void);
1072#else
1073DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1074{
1075 RTCCUINTREG uCR4;
1076# if RT_INLINE_ASM_USES_INTRIN
1077 uCR4 = __readcr4();
1078
1079# elif RT_INLINE_ASM_GNU_STYLE
1080# ifdef __AMD64__
1081 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1082# else
1083 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1084# endif
1085# else
1086 __asm
1087 {
1088# ifdef __AMD64__
1089 mov rax, cr4
1090 mov [uCR4], rax
1091# else
1092 push eax /* just in case */
1093 /*mov eax, cr4*/
1094 _emit 0x0f
1095 _emit 0x20
1096 _emit 0xe0
1097 mov [uCR4], eax
1098 pop eax
1099# endif
1100 }
1101# endif
1102 return uCR4;
1103}
1104#endif
1105
1106
1107/**
1108 * Sets the CR4 register.
1109 *
1110 * @param uCR4 New CR4 value.
1111 */
1112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1113DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1114#else
1115DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1116{
1117# if RT_INLINE_ASM_USES_INTRIN
1118 __writecr4(uCR4);
1119
1120# elif RT_INLINE_ASM_GNU_STYLE
1121# ifdef __AMD64__
1122 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1123# else
1124 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1125# endif
1126# else
1127 __asm
1128 {
1129# ifdef __AMD64__
1130 mov rax, [uCR4]
1131 mov cr4, rax
1132# else
1133 mov eax, [uCR4]
1134 _emit 0x0F
1135 _emit 0x22
1136 _emit 0xE0 /* mov cr4, eax */
1137# endif
1138 }
1139# endif
1140}
1141#endif
1142
1143
1144/**
1145 * Get cr8.
1146 * @returns cr8.
1147 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1148 */
1149#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1150DECLASM(RTCCUINTREG) ASMGetCR8(void);
1151#else
1152DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1153{
1154# ifdef __AMD64__
1155 RTCCUINTREG uCR8;
1156# if RT_INLINE_ASM_USES_INTRIN
1157 uCR8 = __readcr8();
1158
1159# elif RT_INLINE_ASM_GNU_STYLE
1160 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1161# else
1162 __asm
1163 {
1164 mov rax, cr8
1165 mov [uCR8], rax
1166 }
1167# endif
1168 return uCR8;
1169# else /* !__AMD64__ */
1170 return 0;
1171# endif /* !__AMD64__ */
1172}
1173#endif
1174
1175
1176/**
1177 * Enables interrupts (EFLAGS.IF).
1178 */
1179#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1180DECLASM(void) ASMIntEnable(void);
1181#else
1182DECLINLINE(void) ASMIntEnable(void)
1183{
1184# if RT_INLINE_ASM_GNU_STYLE
1185 __asm("sti\n");
1186# elif RT_INLINE_ASM_USES_INTRIN
1187 _enable();
1188# else
1189 __asm sti
1190# endif
1191}
1192#endif
1193
1194
1195/**
1196 * Disables interrupts (!EFLAGS.IF).
1197 */
1198#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1199DECLASM(void) ASMIntDisable(void);
1200#else
1201DECLINLINE(void) ASMIntDisable(void)
1202{
1203# if RT_INLINE_ASM_GNU_STYLE
1204 __asm("cli\n");
1205# elif RT_INLINE_ASM_USES_INTRIN
1206 _disable();
1207# else
1208 __asm cli
1209# endif
1210}
1211#endif
1212
1213
1214/**
1215 * Disables interrupts and returns previous xFLAGS.
1216 */
1217#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1218DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1219#else
1220DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1221{
1222 RTCCUINTREG xFlags;
1223# if RT_INLINE_ASM_GNU_STYLE
1224# ifdef __AMD64__
1225 __asm__ __volatile__("pushfq\n\t"
1226 "cli\n\t"
1227 "popq %0\n\t"
1228 : "=m" (xFlags));
1229# else
1230 __asm__ __volatile__("pushfl\n\t"
1231 "cli\n\t"
1232 "popl %0\n\t"
1233 : "=m" (xFlags));
1234# endif
1235# elif RT_INLINE_ASM_USES_INTRIN && !defined(__X86__)
1236 xFlags = ASMGetFlags();
1237 _disable();
1238# else
1239 __asm {
1240 pushfd
1241 cli
1242 pop [xFlags]
1243 }
1244# endif
1245 return xFlags;
1246}
1247#endif
1248
1249
1250/**
1251 * Reads a machine specific register.
1252 *
1253 * @returns Register content.
1254 * @param uRegister Register to read.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1258#else
1259DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1260{
1261 RTUINT64U u;
1262# if RT_INLINE_ASM_GNU_STYLE
1263 __asm__ ("rdmsr\n\t"
1264 : "=a" (u.s.Lo),
1265 "=d" (u.s.Hi)
1266 : "c" (uRegister));
1267
1268# elif RT_INLINE_ASM_USES_INTRIN
1269 u.u = __readmsr(uRegister);
1270
1271# else
1272 __asm
1273 {
1274 mov ecx, [uRegister]
1275 rdmsr
1276 mov [u.s.Lo], eax
1277 mov [u.s.Hi], edx
1278 }
1279# endif
1280
1281 return u.u;
1282}
1283#endif
1284
1285
1286/**
1287 * Writes a machine specific register.
1288 *
1289 * @returns Register content.
1290 * @param uRegister Register to write to.
1291 * @param u64Val Value to write.
1292 */
1293#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1294DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1295#else
1296DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1297{
1298 RTUINT64U u;
1299
1300 u.u = u64Val;
1301# if RT_INLINE_ASM_GNU_STYLE
1302 __asm__ __volatile__("wrmsr\n\t"
1303 ::"a" (u.s.Lo),
1304 "d" (u.s.Hi),
1305 "c" (uRegister));
1306
1307# elif RT_INLINE_ASM_USES_INTRIN
1308 __writemsr(uRegister, u.u);
1309
1310# else
1311 __asm
1312 {
1313 mov ecx, [uRegister]
1314 mov edx, [u.s.Hi]
1315 mov eax, [u.s.Lo]
1316 wrmsr
1317 }
1318# endif
1319}
1320#endif
1321
1322
1323/**
1324 * Reads low part of a machine specific register.
1325 *
1326 * @returns Register content.
1327 * @param uRegister Register to read.
1328 */
1329#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1330DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1331#else
1332DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1333{
1334 uint32_t u32;
1335# if RT_INLINE_ASM_GNU_STYLE
1336 __asm__ ("rdmsr\n\t"
1337 : "=a" (u32)
1338 : "c" (uRegister)
1339 : "edx");
1340
1341# elif RT_INLINE_ASM_USES_INTRIN
1342 u32 = (uint32_t)__readmsr(uRegister);
1343
1344#else
1345 __asm
1346 {
1347 mov ecx, [uRegister]
1348 rdmsr
1349 mov [u32], eax
1350 }
1351# endif
1352
1353 return u32;
1354}
1355#endif
1356
1357
1358/**
1359 * Reads high part of a machine specific register.
1360 *
1361 * @returns Register content.
1362 * @param uRegister Register to read.
1363 */
1364#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1365DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1366#else
1367DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1368{
1369 uint32_t u32;
1370# if RT_INLINE_ASM_GNU_STYLE
1371 __asm__ ("rdmsr\n\t"
1372 : "=d" (u32)
1373 : "c" (uRegister)
1374 : "eax");
1375
1376# elif RT_INLINE_ASM_USES_INTRIN
1377 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1378
1379# else
1380 __asm
1381 {
1382 mov ecx, [uRegister]
1383 rdmsr
1384 mov [u32], edx
1385 }
1386# endif
1387
1388 return u32;
1389}
1390#endif
1391
1392
1393/**
1394 * Gets dr7.
1395 *
1396 * @returns dr7.
1397 */
1398#if RT_INLINE_ASM_EXTERNAL
1399DECLASM(RTCCUINTREG) ASMGetDR7(void);
1400#else
1401DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1402{
1403 RTCCUINTREG uDR7;
1404# if RT_INLINE_ASM_GNU_STYLE
1405# ifdef __AMD64__
1406 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1407# else
1408 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1409# endif
1410# else
1411 __asm
1412 {
1413# ifdef __AMD64__
1414 mov rax, dr7
1415 mov [uDR7], rax
1416# else
1417 mov eax, dr7
1418 mov [uDR7], eax
1419# endif
1420 }
1421# endif
1422 return uDR7;
1423}
1424#endif
1425
1426
1427/**
1428 * Gets dr6.
1429 *
1430 * @returns dr6.
1431 */
1432#if RT_INLINE_ASM_EXTERNAL
1433DECLASM(RTCCUINTREG) ASMGetDR6(void);
1434#else
1435DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1436{
1437 RTCCUINTREG uDR6;
1438# if RT_INLINE_ASM_GNU_STYLE
1439# ifdef __AMD64__
1440 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1441# else
1442 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1443# endif
1444# else
1445 __asm
1446 {
1447# ifdef __AMD64__
1448 mov rax, dr6
1449 mov [uDR6], rax
1450# else
1451 mov eax, dr6
1452 mov [uDR6], eax
1453# endif
1454 }
1455# endif
1456 return uDR6;
1457}
1458#endif
1459
1460
1461/**
1462 * Reads and clears DR6.
1463 *
1464 * @returns DR6.
1465 */
1466#if RT_INLINE_ASM_EXTERNAL
1467DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1468#else
1469DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1470{
1471 RTCCUINTREG uDR6;
1472# if RT_INLINE_ASM_GNU_STYLE
1473 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1474# ifdef __AMD64__
1475 __asm__ ("movq %%dr6, %0\n\t"
1476 "movq %1, %%dr6\n\t"
1477 : "=r" (uDR6)
1478 : "r" (uNewValue));
1479# else
1480 __asm__ ("movl %%dr6, %0\n\t"
1481 "movl %1, %%dr6\n\t"
1482 : "=r" (uDR6)
1483 : "r" (uNewValue));
1484# endif
1485# else
1486 __asm
1487 {
1488# ifdef __AMD64__
1489 mov rax, dr6
1490 mov [uDR6], rax
1491 mov rcx, rax
1492 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1493 mov dr6, rcx
1494# else
1495 mov eax, dr6
1496 mov [uDR6], eax
1497 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1498 mov dr6, ecx
1499# endif
1500 }
1501# endif
1502 return uDR6;
1503}
1504#endif
1505
1506
1507/**
1508 * Ensure that gcc does not use any register value before this instruction. This function is used
1509 * for assembler instructions with side-effects, e.g. port writes to magical guest ports causing
1510 * guest memory changes by the host
1511 *
1512 * @todo r=bird: There are two things I don't like there, 1) the name and 2) what about msc?
1513 *
1514 * Unless I'm not much mistaken this construct is what is 'barrier' or 'mb' in the linux
1515 * kernel. The ASMMem is used as prefix elsewhere in this file, so ASMMemory* is a confusing
1516 * way to name a new function. I think a more fitting name would be ASMCompilerBarrier,
1517 * ASMCompilerMemoryBarrier, or perhaps ASMMemBarrier.
1518 *
1519 * For MSC I guess _ReadWriteBarrier is what we're looking for.
1520 * See http://msdn2.microsoft.com/en-us/library/f20w0x5e(VS.80).aspx
1521 *
1522 * We should also add a little note about considering using 'volatile' similar to the one found
1523 * with the _ReadWriteBarrier docs.
1524 */
1525#if RT_INLINE_ASM_GNU_STYLE
1526DECLINLINE(void) ASMMemoryClobber(void)
1527{
1528 __asm__ __volatile__ ("" : : : "memory");
1529}
1530#else
1531DECLINLINE(void) ASMMemoryClobber(void)
1532{
1533}
1534#endif
1535
1536/**
1537 * Writes a 8-bit unsigned integer to an I/O port.
1538 *
1539 * @param Port I/O port to read from.
1540 * @param u8 8-bit integer to write.
1541 */
1542#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1543DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1544#else
1545DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1546{
1547# if RT_INLINE_ASM_GNU_STYLE
1548 __asm__ __volatile__("outb %b1, %w0\n\t"
1549 :: "Nd" (Port),
1550 "a" (u8));
1551
1552# elif RT_INLINE_ASM_USES_INTRIN
1553 __outbyte(Port, u8);
1554
1555# else
1556 __asm
1557 {
1558 mov dx, [Port]
1559 mov al, [u8]
1560 out dx, al
1561 }
1562# endif
1563}
1564#endif
1565
1566
1567/**
1568 * Gets a 8-bit unsigned integer from an I/O port.
1569 *
1570 * @returns 8-bit integer.
1571 * @param Port I/O port to read from.
1572 */
1573#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1574DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1575#else
1576DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1577{
1578 uint8_t u8;
1579# if RT_INLINE_ASM_GNU_STYLE
1580 __asm__ __volatile__("inb %w1, %b0\n\t"
1581 : "=a" (u8)
1582 : "Nd" (Port));
1583
1584# elif RT_INLINE_ASM_USES_INTRIN
1585 u8 = __inbyte(Port);
1586
1587# else
1588 __asm
1589 {
1590 mov dx, [Port]
1591 in al, dx
1592 mov [u8], al
1593 }
1594# endif
1595 return u8;
1596}
1597#endif
1598
1599
1600/**
1601 * Writes a 16-bit unsigned integer to an I/O port.
1602 *
1603 * @param Port I/O port to read from.
1604 * @param u16 16-bit integer to write.
1605 */
1606#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1607DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1608#else
1609DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1610{
1611# if RT_INLINE_ASM_GNU_STYLE
1612 __asm__ __volatile__("outw %w1, %w0\n\t"
1613 :: "Nd" (Port),
1614 "a" (u16));
1615
1616# elif RT_INLINE_ASM_USES_INTRIN
1617 __outword(Port, u16);
1618
1619# else
1620 __asm
1621 {
1622 mov dx, [Port]
1623 mov ax, [u16]
1624 out dx, ax
1625 }
1626# endif
1627}
1628#endif
1629
1630
1631/**
1632 * Gets a 16-bit unsigned integer from an I/O port.
1633 *
1634 * @returns 16-bit integer.
1635 * @param Port I/O port to read from.
1636 */
1637#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1638DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1639#else
1640DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1641{
1642 uint16_t u16;
1643# if RT_INLINE_ASM_GNU_STYLE
1644 __asm__ __volatile__("inw %w1, %w0\n\t"
1645 : "=a" (u16)
1646 : "Nd" (Port));
1647
1648# elif RT_INLINE_ASM_USES_INTRIN
1649 u16 = __inword(Port);
1650
1651# else
1652 __asm
1653 {
1654 mov dx, [Port]
1655 in ax, dx
1656 mov [u16], ax
1657 }
1658# endif
1659 return u16;
1660}
1661#endif
1662
1663
1664/**
1665 * Writes a 32-bit unsigned integer to an I/O port.
1666 *
1667 * @param Port I/O port to read from.
1668 * @param u32 32-bit integer to write.
1669 */
1670#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1671DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1672#else
1673DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1674{
1675# if RT_INLINE_ASM_GNU_STYLE
1676 __asm__ __volatile__("outl %1, %w0\n\t"
1677 :: "Nd" (Port),
1678 "a" (u32));
1679
1680# elif RT_INLINE_ASM_USES_INTRIN
1681 __outdword(Port, u32);
1682
1683# else
1684 __asm
1685 {
1686 mov dx, [Port]
1687 mov eax, [u32]
1688 out dx, eax
1689 }
1690# endif
1691}
1692#endif
1693
1694
1695/**
1696 * Gets a 32-bit unsigned integer from an I/O port.
1697 *
1698 * @returns 32-bit integer.
1699 * @param Port I/O port to read from.
1700 */
1701#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1702DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1703#else
1704DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1705{
1706 uint32_t u32;
1707# if RT_INLINE_ASM_GNU_STYLE
1708 __asm__ __volatile__("inl %w1, %0\n\t"
1709 : "=a" (u32)
1710 : "Nd" (Port));
1711
1712# elif RT_INLINE_ASM_USES_INTRIN
1713 u32 = __indword(Port);
1714
1715# else
1716 __asm
1717 {
1718 mov dx, [Port]
1719 in eax, dx
1720 mov [u32], eax
1721 }
1722# endif
1723 return u32;
1724}
1725#endif
1726
1727
1728/**
1729 * Atomically Exchange an unsigned 8-bit value.
1730 *
1731 * @returns Current *pu8 value
1732 * @param pu8 Pointer to the 8-bit variable to update.
1733 * @param u8 The 8-bit value to assign to *pu8.
1734 */
1735#if RT_INLINE_ASM_EXTERNAL
1736DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1737#else
1738DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1739{
1740# if RT_INLINE_ASM_GNU_STYLE
1741 __asm__ __volatile__("xchgb %0, %1\n\t"
1742 : "=m" (*pu8),
1743 "=r" (u8)
1744 : "1" (u8));
1745# else
1746 __asm
1747 {
1748# ifdef __AMD64__
1749 mov rdx, [pu8]
1750 mov al, [u8]
1751 xchg [rdx], al
1752 mov [u8], al
1753# else
1754 mov edx, [pu8]
1755 mov al, [u8]
1756 xchg [edx], al
1757 mov [u8], al
1758# endif
1759 }
1760# endif
1761 return u8;
1762}
1763#endif
1764
1765
1766/**
1767 * Atomically Exchange a signed 8-bit value.
1768 *
1769 * @returns Current *pu8 value
1770 * @param pi8 Pointer to the 8-bit variable to update.
1771 * @param i8 The 8-bit value to assign to *pi8.
1772 */
1773DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1774{
1775 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1776}
1777
1778
1779/**
1780 * Atomically Exchange a bool value.
1781 *
1782 * @returns Current *pf value
1783 * @param pf Pointer to the 8-bit variable to update.
1784 * @param f The 8-bit value to assign to *pi8.
1785 */
1786DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1787{
1788#ifdef _MSC_VER
1789 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1790#else
1791 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1792#endif
1793}
1794
1795
1796/**
1797 * Atomically Exchange an unsigned 16-bit value.
1798 *
1799 * @returns Current *pu16 value
1800 * @param pu16 Pointer to the 16-bit variable to update.
1801 * @param u16 The 16-bit value to assign to *pu16.
1802 */
1803#if RT_INLINE_ASM_EXTERNAL
1804DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1805#else
1806DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1807{
1808# if RT_INLINE_ASM_GNU_STYLE
1809 __asm__ __volatile__("xchgw %0, %1\n\t"
1810 : "=m" (*pu16),
1811 "=r" (u16)
1812 : "1" (u16));
1813# else
1814 __asm
1815 {
1816# ifdef __AMD64__
1817 mov rdx, [pu16]
1818 mov ax, [u16]
1819 xchg [rdx], ax
1820 mov [u16], ax
1821# else
1822 mov edx, [pu16]
1823 mov ax, [u16]
1824 xchg [edx], ax
1825 mov [u16], ax
1826# endif
1827 }
1828# endif
1829 return u16;
1830}
1831#endif
1832
1833
1834/**
1835 * Atomically Exchange a signed 16-bit value.
1836 *
1837 * @returns Current *pu16 value
1838 * @param pi16 Pointer to the 16-bit variable to update.
1839 * @param i16 The 16-bit value to assign to *pi16.
1840 */
1841DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1842{
1843 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1844}
1845
1846
1847/**
1848 * Atomically Exchange an unsigned 32-bit value.
1849 *
1850 * @returns Current *pu32 value
1851 * @param pu32 Pointer to the 32-bit variable to update.
1852 * @param u32 The 32-bit value to assign to *pu32.
1853 */
1854#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1855DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1856#else
1857DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1858{
1859# if RT_INLINE_ASM_GNU_STYLE
1860 __asm__ __volatile__("xchgl %0, %1\n\t"
1861 : "=m" (*pu32),
1862 "=r" (u32)
1863 : "1" (u32));
1864
1865# elif RT_INLINE_ASM_USES_INTRIN
1866 u32 = _InterlockedExchange((long *)pu32, u32);
1867
1868# else
1869 __asm
1870 {
1871# ifdef __AMD64__
1872 mov rdx, [pu32]
1873 mov eax, u32
1874 xchg [rdx], eax
1875 mov [u32], eax
1876# else
1877 mov edx, [pu32]
1878 mov eax, u32
1879 xchg [edx], eax
1880 mov [u32], eax
1881# endif
1882 }
1883# endif
1884 return u32;
1885}
1886#endif
1887
1888
1889/**
1890 * Atomically Exchange a signed 32-bit value.
1891 *
1892 * @returns Current *pu32 value
1893 * @param pi32 Pointer to the 32-bit variable to update.
1894 * @param i32 The 32-bit value to assign to *pi32.
1895 */
1896DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1897{
1898 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1899}
1900
1901
1902/**
1903 * Atomically Exchange an unsigned 64-bit value.
1904 *
1905 * @returns Current *pu64 value
1906 * @param pu64 Pointer to the 64-bit variable to update.
1907 * @param u64 The 64-bit value to assign to *pu64.
1908 */
1909#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1910DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1911#else
1912DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1913{
1914# if defined(__AMD64__)
1915# if RT_INLINE_ASM_USES_INTRIN
1916 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1917
1918# elif RT_INLINE_ASM_GNU_STYLE
1919 __asm__ __volatile__("xchgq %0, %1\n\t"
1920 : "=m" (*pu64),
1921 "=r" (u64)
1922 : "1" (u64));
1923# else
1924 __asm
1925 {
1926 mov rdx, [pu64]
1927 mov rax, [u64]
1928 xchg [rdx], rax
1929 mov [u64], rax
1930 }
1931# endif
1932# else /* !__AMD64__ */
1933# if RT_INLINE_ASM_GNU_STYLE
1934# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
1935 uint32_t u32 = (uint32_t)u64;
1936 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1937 "xchgl %%ebx, %3\n\t"
1938 "1:\n\t"
1939 "lock; cmpxchg8b (%5)\n\t"
1940 "jnz 1b\n\t"
1941 "xchgl %%ebx, %3\n\t"
1942 /*"xchgl %%esi, %5\n\t"*/
1943 : "=A" (u64),
1944 "=m" (*pu64)
1945 : "0" (*pu64),
1946 "m" ( u32 ),
1947 "c" ( (uint32_t)(u64 >> 32) ),
1948 "S" (pu64) );
1949# else /* !PIC */
1950 __asm__ __volatile__("1:\n\t"
1951 "lock; cmpxchg8b %1\n\t"
1952 "jnz 1b\n\t"
1953 : "=A" (u64),
1954 "=m" (*pu64)
1955 : "0" (*pu64),
1956 "b" ( (uint32_t)u64 ),
1957 "c" ( (uint32_t)(u64 >> 32) ));
1958# endif
1959# else
1960 __asm
1961 {
1962 mov ebx, dword ptr [u64]
1963 mov ecx, dword ptr [u64 + 4]
1964 mov edi, pu64
1965 mov eax, dword ptr [edi]
1966 mov edx, dword ptr [edi + 4]
1967 retry:
1968 lock cmpxchg8b [edi]
1969 jnz retry
1970 mov dword ptr [u64], eax
1971 mov dword ptr [u64 + 4], edx
1972 }
1973# endif
1974# endif /* !__AMD64__ */
1975 return u64;
1976}
1977#endif
1978
1979
1980/**
1981 * Atomically Exchange an signed 64-bit value.
1982 *
1983 * @returns Current *pi64 value
1984 * @param pi64 Pointer to the 64-bit variable to update.
1985 * @param i64 The 64-bit value to assign to *pi64.
1986 */
1987DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1988{
1989 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1990}
1991
1992
1993#ifdef __AMD64__
1994/**
1995 * Atomically Exchange an unsigned 128-bit value.
1996 *
1997 * @returns Current *pu128.
1998 * @param pu128 Pointer to the 128-bit variable to update.
1999 * @param u128 The 128-bit value to assign to *pu128.
2000 *
2001 * @remark We cannot really assume that any hardware supports this. Nor do I have
2002 * GAS support for it. So, for the time being we'll BREAK the atomic
2003 * bit of this function and use two 64-bit exchanges instead.
2004 */
2005# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2006DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2007# else
2008DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2009{
2010 if (true)/*ASMCpuId_ECX(1) & BIT(13))*/
2011 {
2012 /** @todo this is clumsy code */
2013 RTUINT128U u128Ret;
2014 u128Ret.u = u128;
2015 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2016 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2017 return u128Ret.u;
2018 }
2019#if 0 /* later? */
2020 else
2021 {
2022# if RT_INLINE_ASM_GNU_STYLE
2023 __asm__ __volatile__("1:\n\t"
2024 "lock; cmpxchg8b %1\n\t"
2025 "jnz 1b\n\t"
2026 : "=A" (u128),
2027 "=m" (*pu128)
2028 : "0" (*pu128),
2029 "b" ( (uint64_t)u128 ),
2030 "c" ( (uint64_t)(u128 >> 64) ));
2031# else
2032 __asm
2033 {
2034 mov rbx, dword ptr [u128]
2035 mov rcx, dword ptr [u128 + 4]
2036 mov rdi, pu128
2037 mov rax, dword ptr [rdi]
2038 mov rdx, dword ptr [rdi + 4]
2039 retry:
2040 lock cmpxchg16b [rdi]
2041 jnz retry
2042 mov dword ptr [u128], rax
2043 mov dword ptr [u128 + 4], rdx
2044 }
2045# endif
2046 }
2047 return u128;
2048#endif
2049}
2050# endif
2051#endif /* __AMD64__ */
2052
2053
2054/**
2055 * Atomically Reads a unsigned 64-bit value.
2056 *
2057 * @returns Current *pu64 value
2058 * @param pu64 Pointer to the 64-bit variable to read.
2059 * The memory pointed to must be writable.
2060 * @remark This will fault if the memory is read-only!
2061 */
2062#if RT_INLINE_ASM_EXTERNAL
2063DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2064#else
2065DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2066{
2067 uint64_t u64;
2068# ifdef __AMD64__
2069# if RT_INLINE_ASM_GNU_STYLE
2070 __asm__ __volatile__("movq %1, %0\n\t"
2071 : "=r" (u64)
2072 : "m" (*pu64));
2073# else
2074 __asm
2075 {
2076 mov rdx, [pu64]
2077 mov rax, [rdx]
2078 mov [u64], rax
2079 }
2080# endif
2081# else /* !__AMD64__ */
2082# if RT_INLINE_ASM_GNU_STYLE
2083# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2084 uint32_t u32EBX = 0;
2085 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2086 "lock; cmpxchg8b (%5)\n\t"
2087 "xchgl %%ebx, %3\n\t"
2088 : "=A" (u64),
2089 "=m" (*pu64)
2090 : "0" (0),
2091 "m" (u32EBX),
2092 "c" (0),
2093 "S" (pu64));
2094# else /* !PIC */
2095 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2096 : "=A" (u64),
2097 "=m" (*pu64)
2098 : "0" (0),
2099 "b" (0),
2100 "c" (0));
2101# endif
2102# else
2103 __asm
2104 {
2105 xor eax, eax
2106 xor edx, edx
2107 mov edi, pu64
2108 xor ecx, ecx
2109 xor ebx, ebx
2110 lock cmpxchg8b [edi]
2111 mov dword ptr [u64], eax
2112 mov dword ptr [u64 + 4], edx
2113 }
2114# endif
2115# endif /* !__AMD64__ */
2116 return u64;
2117}
2118#endif
2119
2120
2121/**
2122 * Atomically Reads a signed 64-bit value.
2123 *
2124 * @returns Current *pi64 value
2125 * @param pi64 Pointer to the 64-bit variable to read.
2126 * The memory pointed to must be writable.
2127 * @remark This will fault if the memory is read-only!
2128 */
2129DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2130{
2131 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2132}
2133
2134
2135/**
2136 * Atomically Exchange a value which size might differ
2137 * between platforms or compilers.
2138 *
2139 * @param pu Pointer to the variable to update.
2140 * @param uNew The value to assign to *pu.
2141 */
2142#define ASMAtomicXchgSize(pu, uNew) \
2143 do { \
2144 switch (sizeof(*(pu))) { \
2145 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2146 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2147 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2148 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2149 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2150 } \
2151 } while (0)
2152
2153
2154/**
2155 * Atomically Exchange a pointer value.
2156 *
2157 * @returns Current *ppv value
2158 * @param ppv Pointer to the pointer variable to update.
2159 * @param pv The pointer value to assign to *ppv.
2160 */
2161DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2162{
2163#if ARCH_BITS == 32
2164 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2165#elif ARCH_BITS == 64
2166 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2167#else
2168# error "ARCH_BITS is bogus"
2169#endif
2170}
2171
2172
2173/**
2174 * Atomically Compare and Exchange an unsigned 32-bit value.
2175 *
2176 * @returns true if xchg was done.
2177 * @returns false if xchg wasn't done.
2178 *
2179 * @param pu32 Pointer to the value to update.
2180 * @param u32New The new value to assigned to *pu32.
2181 * @param u32Old The old value to *pu32 compare with.
2182 */
2183#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2184DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2185#else
2186DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2187{
2188# if RT_INLINE_ASM_GNU_STYLE
2189 uint32_t u32Ret;
2190 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2191 "setz %%al\n\t"
2192 "movzx %%al, %%eax\n\t"
2193 : "=m" (*pu32),
2194 "=a" (u32Ret)
2195 : "r" (u32New),
2196 "1" (u32Old));
2197 return (bool)u32Ret;
2198
2199# elif RT_INLINE_ASM_USES_INTRIN
2200 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2201
2202# else
2203 uint32_t u32Ret;
2204 __asm
2205 {
2206# ifdef __AMD64__
2207 mov rdx, [pu32]
2208# else
2209 mov edx, [pu32]
2210# endif
2211 mov eax, [u32Old]
2212 mov ecx, [u32New]
2213# ifdef __AMD64__
2214 lock cmpxchg [rdx], ecx
2215# else
2216 lock cmpxchg [edx], ecx
2217# endif
2218 setz al
2219 movzx eax, al
2220 mov [u32Ret], eax
2221 }
2222 return !!u32Ret;
2223# endif
2224}
2225#endif
2226
2227
2228/**
2229 * Atomically Compare and Exchange a signed 32-bit value.
2230 *
2231 * @returns true if xchg was done.
2232 * @returns false if xchg wasn't done.
2233 *
2234 * @param pi32 Pointer to the value to update.
2235 * @param i32New The new value to assigned to *pi32.
2236 * @param i32Old The old value to *pi32 compare with.
2237 */
2238DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2239{
2240 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2241}
2242
2243
2244/**
2245 * Atomically Compare and exchange an unsigned 64-bit value.
2246 *
2247 * @returns true if xchg was done.
2248 * @returns false if xchg wasn't done.
2249 *
2250 * @param pu64 Pointer to the 64-bit variable to update.
2251 * @param u64New The 64-bit value to assign to *pu64.
2252 * @param u64Old The value to compare with.
2253 */
2254#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2255DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2256#else
2257DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2258{
2259# if RT_INLINE_ASM_USES_INTRIN
2260 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2261
2262# elif defined(__AMD64__)
2263# if RT_INLINE_ASM_GNU_STYLE
2264 uint64_t u64Ret;
2265 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2266 "setz %%al\n\t"
2267 "movzx %%al, %%eax\n\t"
2268 : "=m" (*pu64),
2269 "=a" (u64Ret)
2270 : "r" (u64New),
2271 "1" (u64Old));
2272 return (bool)u64Ret;
2273# else
2274 bool fRet;
2275 __asm
2276 {
2277 mov rdx, [pu32]
2278 mov rax, [u64Old]
2279 mov rcx, [u64New]
2280 lock cmpxchg [rdx], rcx
2281 setz al
2282 mov [fRet], al
2283 }
2284 return fRet;
2285# endif
2286# else /* !__AMD64__ */
2287 uint32_t u32Ret;
2288# if RT_INLINE_ASM_GNU_STYLE
2289# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2290 uint32_t u32 = (uint32_t)u64New;
2291 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2292 "lock; cmpxchg8b (%5)\n\t"
2293 "setz %%al\n\t"
2294 "xchgl %%ebx, %3\n\t"
2295 "movzx %%al, %%eax\n\t"
2296 : "=a" (u32Ret),
2297 "=m" (*pu64)
2298 : "A" (u64Old),
2299 "m" ( u32 ),
2300 "c" ( (uint32_t)(u64New >> 32) ),
2301 "S" (pu64) );
2302# else /* !PIC */
2303 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2304 "setz %%al\n\t"
2305 "movzx %%al, %%eax\n\t"
2306 : "=a" (u32Ret),
2307 "=m" (*pu64)
2308 : "A" (u64Old),
2309 "b" ( (uint32_t)u64New ),
2310 "c" ( (uint32_t)(u64New >> 32) ));
2311# endif
2312 return (bool)u32Ret;
2313# else
2314 __asm
2315 {
2316 mov ebx, dword ptr [u64New]
2317 mov ecx, dword ptr [u64New + 4]
2318 mov edi, [pu64]
2319 mov eax, dword ptr [u64Old]
2320 mov edx, dword ptr [u64Old + 4]
2321 lock cmpxchg8b [edi]
2322 setz al
2323 movzx eax, al
2324 mov dword ptr [u32Ret], eax
2325 }
2326 return !!u32Ret;
2327# endif
2328# endif /* !__AMD64__ */
2329}
2330#endif
2331
2332
2333/**
2334 * Atomically Compare and exchange a signed 64-bit value.
2335 *
2336 * @returns true if xchg was done.
2337 * @returns false if xchg wasn't done.
2338 *
2339 * @param pi64 Pointer to the 64-bit variable to update.
2340 * @param i64 The 64-bit value to assign to *pu64.
2341 * @param i64Old The value to compare with.
2342 */
2343DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2344{
2345 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2346}
2347
2348
2349
2350/** @def ASMAtomicCmpXchgSize
2351 * Atomically Compare and Exchange a value which size might differ
2352 * between platforms or compilers.
2353 *
2354 * @param pu Pointer to the value to update.
2355 * @param uNew The new value to assigned to *pu.
2356 * @param uOld The old value to *pu compare with.
2357 * @param fRc Where to store the result.
2358 */
2359#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2360 do { \
2361 switch (sizeof(*(pu))) { \
2362 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2363 break; \
2364 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2365 break; \
2366 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2367 (fRc) = false; \
2368 break; \
2369 } \
2370 } while (0)
2371
2372
2373/**
2374 * Atomically Compare and Exchange a pointer value.
2375 *
2376 * @returns true if xchg was done.
2377 * @returns false if xchg wasn't done.
2378 *
2379 * @param ppv Pointer to the value to update.
2380 * @param pvNew The new value to assigned to *ppv.
2381 * @param pvOld The old value to *ppv compare with.
2382 */
2383DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2384{
2385#if ARCH_BITS == 32
2386 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2387#elif ARCH_BITS == 64
2388 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2389#else
2390# error "ARCH_BITS is bogus"
2391#endif
2392}
2393
2394
2395/**
2396 * Atomically increment a 32-bit value.
2397 *
2398 * @returns The new value.
2399 * @param pu32 Pointer to the value to increment.
2400 */
2401#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2402DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2403#else
2404DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2405{
2406 uint32_t u32;
2407# if RT_INLINE_ASM_USES_INTRIN
2408 u32 = _InterlockedIncrement((long *)pu32);
2409
2410# elif RT_INLINE_ASM_GNU_STYLE
2411 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2412 "incl %0\n\t"
2413 : "=r" (u32),
2414 "=m" (*pu32)
2415 : "0" (1)
2416 : "memory");
2417# else
2418 __asm
2419 {
2420 mov eax, 1
2421# ifdef __AMD64__
2422 mov rdx, [pu32]
2423 lock xadd [rdx], eax
2424# else
2425 mov edx, [pu32]
2426 lock xadd [edx], eax
2427# endif
2428 inc eax
2429 mov u32, eax
2430 }
2431# endif
2432 return u32;
2433}
2434#endif
2435
2436
2437/**
2438 * Atomically increment a signed 32-bit value.
2439 *
2440 * @returns The new value.
2441 * @param pi32 Pointer to the value to increment.
2442 */
2443DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2444{
2445 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2446}
2447
2448
2449/**
2450 * Atomically decrement an unsigned 32-bit value.
2451 *
2452 * @returns The new value.
2453 * @param pu32 Pointer to the value to decrement.
2454 */
2455#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2456DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2457#else
2458DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2459{
2460 uint32_t u32;
2461# if RT_INLINE_ASM_USES_INTRIN
2462 u32 = _InterlockedDecrement((long *)pu32);
2463
2464# elif RT_INLINE_ASM_GNU_STYLE
2465 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2466 "decl %0\n\t"
2467 : "=r" (u32),
2468 "=m" (*pu32)
2469 : "0" (-1)
2470 : "memory");
2471# else
2472 __asm
2473 {
2474 mov eax, -1
2475# ifdef __AMD64__
2476 mov rdx, [pu32]
2477 lock xadd [rdx], eax
2478# else
2479 mov edx, [pu32]
2480 lock xadd [edx], eax
2481# endif
2482 dec eax
2483 mov u32, eax
2484 }
2485# endif
2486 return u32;
2487}
2488#endif
2489
2490
2491/**
2492 * Atomically decrement a signed 32-bit value.
2493 *
2494 * @returns The new value.
2495 * @param pi32 Pointer to the value to decrement.
2496 */
2497DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2498{
2499 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2500}
2501
2502
2503/**
2504 * Atomically Or an unsigned 32-bit value.
2505 *
2506 * @param pu32 Pointer to the pointer variable to OR u32 with.
2507 * @param u32 The value to OR *pu32 with.
2508 */
2509#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2510DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2511#else
2512DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2513{
2514# if RT_INLINE_ASM_USES_INTRIN
2515 _InterlockedOr((long volatile *)pu32, (long)u32);
2516
2517# elif RT_INLINE_ASM_GNU_STYLE
2518 __asm__ __volatile__("lock; orl %1, %0\n\t"
2519 : "=m" (*pu32)
2520 : "r" (u32));
2521# else
2522 __asm
2523 {
2524 mov eax, [u32]
2525# ifdef __AMD64__
2526 mov rdx, [pu32]
2527 lock or [rdx], eax
2528# else
2529 mov edx, [pu32]
2530 lock or [edx], eax
2531# endif
2532 }
2533# endif
2534}
2535#endif
2536
2537
2538/**
2539 * Atomically Or a signed 32-bit value.
2540 *
2541 * @param pi32 Pointer to the pointer variable to OR u32 with.
2542 * @param i32 The value to OR *pu32 with.
2543 */
2544DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2545{
2546 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2547}
2548
2549
2550/**
2551 * Atomically And an unsigned 32-bit value.
2552 *
2553 * @param pu32 Pointer to the pointer variable to AND u32 with.
2554 * @param u32 The value to AND *pu32 with.
2555 */
2556#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2557DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2558#else
2559DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2560{
2561# if RT_INLINE_ASM_USES_INTRIN
2562 _InterlockedAnd((long volatile *)pu32, u32);
2563
2564# elif RT_INLINE_ASM_GNU_STYLE
2565 __asm__ __volatile__("lock; andl %1, %0\n\t"
2566 : "=m" (*pu32)
2567 : "r" (u32));
2568# else
2569 __asm
2570 {
2571 mov eax, [u32]
2572# ifdef __AMD64__
2573 mov rdx, [pu32]
2574 lock and [rdx], eax
2575# else
2576 mov edx, [pu32]
2577 lock and [edx], eax
2578# endif
2579 }
2580# endif
2581}
2582#endif
2583
2584
2585/**
2586 * Atomically And a signed 32-bit value.
2587 *
2588 * @param pi32 Pointer to the pointer variable to AND i32 with.
2589 * @param i32 The value to AND *pi32 with.
2590 */
2591DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2592{
2593 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2594}
2595
2596
2597/**
2598 * Invalidate page.
2599 *
2600 * @param pv Address of the page to invalidate.
2601 */
2602#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2603DECLASM(void) ASMInvalidatePage(void *pv);
2604#else
2605DECLINLINE(void) ASMInvalidatePage(void *pv)
2606{
2607# if RT_INLINE_ASM_USES_INTRIN
2608 __invlpg(pv);
2609
2610# elif RT_INLINE_ASM_GNU_STYLE
2611 __asm__ __volatile__("invlpg %0\n\t"
2612 : : "m" (*(uint8_t *)pv));
2613# else
2614 __asm
2615 {
2616# ifdef __AMD64__
2617 mov rax, [pv]
2618 invlpg [rax]
2619# else
2620 mov eax, [pv]
2621 invlpg [eax]
2622# endif
2623 }
2624# endif
2625}
2626#endif
2627
2628
2629#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2630# if PAGE_SIZE != 0x1000
2631# error "PAGE_SIZE is not 0x1000!"
2632# endif
2633#endif
2634
2635/**
2636 * Zeros a 4K memory page.
2637 *
2638 * @param pv Pointer to the memory block. This must be page aligned.
2639 */
2640#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2641DECLASM(void) ASMMemZeroPage(volatile void *pv);
2642# else
2643DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2644{
2645# if RT_INLINE_ASM_USES_INTRIN
2646# ifdef __AMD64__
2647 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2648# else
2649 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2650# endif
2651
2652# elif RT_INLINE_ASM_GNU_STYLE
2653 RTUINTREG uDummy;
2654# ifdef __AMD64__
2655 __asm__ __volatile__ ("rep stosq"
2656 : "=D" (pv),
2657 "=c" (uDummy)
2658 : "0" (pv),
2659 "c" (0x1000 >> 3),
2660 "a" (0)
2661 : "memory");
2662# else
2663 __asm__ __volatile__ ("rep stosl"
2664 : "=D" (pv),
2665 "=c" (uDummy)
2666 : "0" (pv),
2667 "c" (0x1000 >> 2),
2668 "a" (0)
2669 : "memory");
2670# endif
2671# else
2672 __asm
2673 {
2674# ifdef __AMD64__
2675 xor rax, rax
2676 mov ecx, 0200h
2677 mov rdi, [pv]
2678 rep stosq
2679# else
2680 xor eax, eax
2681 mov ecx, 0400h
2682 mov edi, [pv]
2683 rep stosd
2684# endif
2685 }
2686# endif
2687}
2688# endif
2689
2690
2691/**
2692 * Zeros a memory block with a 32-bit aligned size.
2693 *
2694 * @param pv Pointer to the memory block.
2695 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2696 */
2697#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2698DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2699#else
2700DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2701{
2702# if RT_INLINE_ASM_USES_INTRIN
2703 __stosd((unsigned long *)pv, 0, cb >> 2);
2704
2705# elif RT_INLINE_ASM_GNU_STYLE
2706 __asm__ __volatile__ ("rep stosl"
2707 : "=D" (pv),
2708 "=c" (cb)
2709 : "0" (pv),
2710 "1" (cb >> 2),
2711 "a" (0)
2712 : "memory");
2713# else
2714 __asm
2715 {
2716 xor eax, eax
2717# ifdef __AMD64__
2718 mov rcx, [cb]
2719 shr rcx, 2
2720 mov rdi, [pv]
2721# else
2722 mov ecx, [cb]
2723 shr ecx, 2
2724 mov edi, [pv]
2725# endif
2726 rep stosd
2727 }
2728# endif
2729}
2730#endif
2731
2732
2733/**
2734 * Fills a memory block with a 32-bit aligned size.
2735 *
2736 * @param pv Pointer to the memory block.
2737 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2738 * @param u32 The value to fill with.
2739 */
2740#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2741DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2742#else
2743DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2744{
2745# if RT_INLINE_ASM_USES_INTRIN
2746 __stosd((unsigned long *)pv, 0, cb >> 2);
2747
2748# elif RT_INLINE_ASM_GNU_STYLE
2749 __asm__ __volatile__ ("rep stosl"
2750 : "=D" (pv),
2751 "=c" (cb)
2752 : "0" (pv),
2753 "1" (cb >> 2),
2754 "a" (u32)
2755 : "memory");
2756# else
2757 __asm
2758 {
2759# ifdef __AMD64__
2760 mov rcx, [cb]
2761 shr rcx, 2
2762 mov rdi, [pv]
2763# else
2764 mov ecx, [cb]
2765 shr ecx, 2
2766 mov edi, [pv]
2767# endif
2768 mov eax, [u32]
2769 rep stosd
2770 }
2771# endif
2772}
2773#endif
2774
2775
2776
2777/**
2778 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2779 *
2780 * @returns u32F1 * u32F2.
2781 */
2782#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2783DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2784#else
2785DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2786{
2787# ifdef __AMD64__
2788 return (uint64_t)u32F1 * u32F2;
2789# else /* !__AMD64__ */
2790 uint64_t u64;
2791# if RT_INLINE_ASM_GNU_STYLE
2792 __asm__ __volatile__("mull %%edx"
2793 : "=A" (u64)
2794 : "a" (u32F2), "d" (u32F1));
2795# else
2796 __asm
2797 {
2798 mov edx, [u32F1]
2799 mov eax, [u32F2]
2800 mul edx
2801 mov dword ptr [u64], eax
2802 mov dword ptr [u64 + 4], edx
2803 }
2804# endif
2805 return u64;
2806# endif /* !__AMD64__ */
2807}
2808#endif
2809
2810
2811/**
2812 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2813 *
2814 * @returns u32F1 * u32F2.
2815 */
2816#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2817DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2818#else
2819DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2820{
2821# ifdef __AMD64__
2822 return (int64_t)i32F1 * i32F2;
2823# else /* !__AMD64__ */
2824 int64_t i64;
2825# if RT_INLINE_ASM_GNU_STYLE
2826 __asm__ __volatile__("imull %%edx"
2827 : "=A" (i64)
2828 : "a" (i32F2), "d" (i32F1));
2829# else
2830 __asm
2831 {
2832 mov edx, [i32F1]
2833 mov eax, [i32F2]
2834 imul edx
2835 mov dword ptr [i64], eax
2836 mov dword ptr [i64 + 4], edx
2837 }
2838# endif
2839 return i64;
2840# endif /* !__AMD64__ */
2841}
2842#endif
2843
2844
2845/**
2846 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2847 *
2848 * @returns u64 / u32.
2849 */
2850#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2851DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2852#else
2853DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2854{
2855# ifdef __AMD64__
2856 return (uint32_t)(u64 / u32);
2857# else /* !__AMD64__ */
2858# if RT_INLINE_ASM_GNU_STYLE
2859 RTUINTREG uDummy;
2860 __asm__ __volatile__("divl %3"
2861 : "=a" (u32), "=d"(uDummy)
2862 : "A" (u64), "r" (u32));
2863# else
2864 __asm
2865 {
2866 mov eax, dword ptr [u64]
2867 mov edx, dword ptr [u64 + 4]
2868 mov ecx, [u32]
2869 div ecx
2870 mov [u32], eax
2871 }
2872# endif
2873 return u32;
2874# endif /* !__AMD64__ */
2875}
2876#endif
2877
2878
2879/**
2880 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2881 *
2882 * @returns u64 / u32.
2883 */
2884#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2885DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2886#else
2887DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2888{
2889# ifdef __AMD64__
2890 return (int32_t)(i64 / i32);
2891# else /* !__AMD64__ */
2892# if RT_INLINE_ASM_GNU_STYLE
2893 RTUINTREG iDummy;
2894 __asm__ __volatile__("idivl %3"
2895 : "=a" (i32), "=d"(iDummy)
2896 : "A" (i64), "r" (i32));
2897# else
2898 __asm
2899 {
2900 mov eax, dword ptr [i64]
2901 mov edx, dword ptr [i64 + 4]
2902 mov ecx, [i32]
2903 idiv ecx
2904 mov [i32], eax
2905 }
2906# endif
2907 return i32;
2908# endif /* !__AMD64__ */
2909}
2910#endif
2911
2912
2913/**
2914 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
2915 * using a 96 bit intermediate result.
2916 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
2917 * __udivdi3 and __umoddi3 even if this inline function is not used.
2918 *
2919 * @returns (u64A * u32B) / u32C.
2920 * @param u64A The 64-bit value.
2921 * @param u32B The 32-bit value to multiple by A.
2922 * @param u32C The 32-bit value to divide A*B by.
2923 */
2924#if RT_INLINE_ASM_EXTERNAL
2925DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
2926#else
2927DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
2928{
2929# if RT_INLINE_ASM_GNU_STYLE
2930# ifdef __AMD64__
2931 uint64_t u64Result, u64Spill;
2932 __asm__ __volatile__("mulq %2\n\t"
2933 "divq %3\n\t"
2934 : "=a" (u64Result),
2935 "=d" (u64Spill)
2936 : "r" ((uint64_t)u32B),
2937 "r" ((uint64_t)u32C),
2938 "0" (u64A),
2939 "1" (0));
2940 return u64Result;
2941# else
2942 uint32_t u32Dummy;
2943 uint64_t u64Result;
2944 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
2945 edx = u64Lo.hi = (u64A.lo * u32B).hi */
2946 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
2947 eax = u64A.hi */
2948 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
2949 edx = u32C */
2950 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
2951 edx = u32B */
2952 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
2953 edx = u64Hi.hi = (u64A.hi * u32B).hi */
2954 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
2955 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
2956 "divl %%ecx \n\t" /* eax = u64Hi / u32C
2957 edx = u64Hi % u32C */
2958 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
2959 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
2960 "divl %%ecx \n\t" /* u64Result.lo */
2961 "movl %%edi,%%edx \n\t" /* u64Result.hi */
2962 : "=A"(u64Result),
2963 "=S"(u32Dummy), "=D"(u32Dummy)
2964 : "a"((uint32_t)u64A),
2965 "S"((uint32_t)(u64A >> 32)),
2966 "c"(u32B),
2967 "D"(u32C));
2968 return u64Result;
2969# endif
2970# else
2971 RTUINT64U u;
2972 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
2973 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
2974 u64Hi += (u64Lo >> 32);
2975 u.s.Hi = (uint32_t)(u64Hi / u32C);
2976 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
2977 return u.u;
2978# endif
2979}
2980#endif
2981
2982
2983/**
2984 * Probes a byte pointer for read access.
2985 *
2986 * While the function will not fault if the byte is not read accessible,
2987 * the idea is to do this in a safe place like before acquiring locks
2988 * and such like.
2989 *
2990 * Also, this functions guarantees that an eager compiler is not going
2991 * to optimize the probing away.
2992 *
2993 * @param pvByte Pointer to the byte.
2994 */
2995#if RT_INLINE_ASM_EXTERNAL
2996DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2997#else
2998DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2999{
3000 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3001 uint8_t u8;
3002# if RT_INLINE_ASM_GNU_STYLE
3003 __asm__ __volatile__("movb (%1), %0\n\t"
3004 : "=r" (u8)
3005 : "r" (pvByte));
3006# else
3007 __asm
3008 {
3009# ifdef __AMD64__
3010 mov rax, [pvByte]
3011 mov al, [rax]
3012# else
3013 mov eax, [pvByte]
3014 mov al, [eax]
3015# endif
3016 mov [u8], al
3017 }
3018# endif
3019 return u8;
3020}
3021#endif
3022
3023/**
3024 * Probes a buffer for read access page by page.
3025 *
3026 * While the function will fault if the buffer is not fully read
3027 * accessible, the idea is to do this in a safe place like before
3028 * acquiring locks and such like.
3029 *
3030 * Also, this functions guarantees that an eager compiler is not going
3031 * to optimize the probing away.
3032 *
3033 * @param pvBuf Pointer to the buffer.
3034 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3035 */
3036DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3037{
3038 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3039 /* the first byte */
3040 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3041 ASMProbeReadByte(pu8);
3042
3043 /* the pages in between pages. */
3044 while (cbBuf > /*PAGE_SIZE*/0x1000)
3045 {
3046 ASMProbeReadByte(pu8);
3047 cbBuf -= /*PAGE_SIZE*/0x1000;
3048 pu8 += /*PAGE_SIZE*/0x1000;
3049 }
3050
3051 /* the last byte */
3052 ASMProbeReadByte(pu8 + cbBuf - 1);
3053}
3054
3055
3056/** @def ASMBreakpoint
3057 * Debugger Breakpoint.
3058 * @remark In the gnu world we add a nop instruction after the int3 to
3059 * force gdb to remain at the int3 source line.
3060 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3061 * @internal
3062 */
3063#if RT_INLINE_ASM_GNU_STYLE
3064# ifndef __L4ENV__
3065# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3066# else
3067# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3068# endif
3069#else
3070# define ASMBreakpoint() __debugbreak()
3071#endif
3072
3073
3074
3075/** @defgroup grp_inline_bits Bit Operations
3076 * @{
3077 */
3078
3079
3080/**
3081 * Sets a bit in a bitmap.
3082 *
3083 * @param pvBitmap Pointer to the bitmap.
3084 * @param iBit The bit to set.
3085 */
3086#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3087DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3088#else
3089DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3090{
3091# if RT_INLINE_ASM_USES_INTRIN
3092 _bittestandset((long *)pvBitmap, iBit);
3093
3094# elif RT_INLINE_ASM_GNU_STYLE
3095 __asm__ __volatile__ ("btsl %1, %0"
3096 : "=m" (*(volatile long *)pvBitmap)
3097 : "Ir" (iBit)
3098 : "memory");
3099# else
3100 __asm
3101 {
3102# ifdef __AMD64__
3103 mov rax, [pvBitmap]
3104 mov edx, [iBit]
3105 bts [rax], edx
3106# else
3107 mov eax, [pvBitmap]
3108 mov edx, [iBit]
3109 bts [eax], edx
3110# endif
3111 }
3112# endif
3113}
3114#endif
3115
3116
3117/**
3118 * Atomically sets a bit in a bitmap.
3119 *
3120 * @param pvBitmap Pointer to the bitmap.
3121 * @param iBit The bit to set.
3122 */
3123#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3124DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3125#else
3126DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3127{
3128# if RT_INLINE_ASM_USES_INTRIN
3129 _interlockedbittestandset((long *)pvBitmap, iBit);
3130# elif RT_INLINE_ASM_GNU_STYLE
3131 __asm__ __volatile__ ("lock; btsl %1, %0"
3132 : "=m" (*(volatile long *)pvBitmap)
3133 : "Ir" (iBit)
3134 : "memory");
3135# else
3136 __asm
3137 {
3138# ifdef __AMD64__
3139 mov rax, [pvBitmap]
3140 mov edx, [iBit]
3141 lock bts [rax], edx
3142# else
3143 mov eax, [pvBitmap]
3144 mov edx, [iBit]
3145 lock bts [eax], edx
3146# endif
3147 }
3148# endif
3149}
3150#endif
3151
3152
3153/**
3154 * Clears a bit in a bitmap.
3155 *
3156 * @param pvBitmap Pointer to the bitmap.
3157 * @param iBit The bit to clear.
3158 */
3159#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3160DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3161#else
3162DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3163{
3164# if RT_INLINE_ASM_USES_INTRIN
3165 _bittestandreset((long *)pvBitmap, iBit);
3166
3167# elif RT_INLINE_ASM_GNU_STYLE
3168 __asm__ __volatile__ ("btrl %1, %0"
3169 : "=m" (*(volatile long *)pvBitmap)
3170 : "Ir" (iBit)
3171 : "memory");
3172# else
3173 __asm
3174 {
3175# ifdef __AMD64__
3176 mov rax, [pvBitmap]
3177 mov edx, [iBit]
3178 btr [rax], edx
3179# else
3180 mov eax, [pvBitmap]
3181 mov edx, [iBit]
3182 btr [eax], edx
3183# endif
3184 }
3185# endif
3186}
3187#endif
3188
3189
3190/**
3191 * Atomically clears a bit in a bitmap.
3192 *
3193 * @param pvBitmap Pointer to the bitmap.
3194 * @param iBit The bit to toggle set.
3195 * @remark No memory barrier, take care on smp.
3196 */
3197#if RT_INLINE_ASM_EXTERNAL
3198DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3199#else
3200DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3201{
3202# if RT_INLINE_ASM_GNU_STYLE
3203 __asm__ __volatile__ ("lock; btrl %1, %0"
3204 : "=m" (*(volatile long *)pvBitmap)
3205 : "Ir" (iBit)
3206 : "memory");
3207# else
3208 __asm
3209 {
3210# ifdef __AMD64__
3211 mov rax, [pvBitmap]
3212 mov edx, [iBit]
3213 lock btr [rax], edx
3214# else
3215 mov eax, [pvBitmap]
3216 mov edx, [iBit]
3217 lock btr [eax], edx
3218# endif
3219 }
3220# endif
3221}
3222#endif
3223
3224
3225/**
3226 * Toggles a bit in a bitmap.
3227 *
3228 * @param pvBitmap Pointer to the bitmap.
3229 * @param iBit The bit to toggle.
3230 */
3231#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3232DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3233#else
3234DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3235{
3236# if RT_INLINE_ASM_USES_INTRIN
3237 _bittestandcomplement((long *)pvBitmap, iBit);
3238# elif RT_INLINE_ASM_GNU_STYLE
3239 __asm__ __volatile__ ("btcl %1, %0"
3240 : "=m" (*(volatile long *)pvBitmap)
3241 : "Ir" (iBit)
3242 : "memory");
3243# else
3244 __asm
3245 {
3246# ifdef __AMD64__
3247 mov rax, [pvBitmap]
3248 mov edx, [iBit]
3249 btc [rax], edx
3250# else
3251 mov eax, [pvBitmap]
3252 mov edx, [iBit]
3253 btc [eax], edx
3254# endif
3255 }
3256# endif
3257}
3258#endif
3259
3260
3261/**
3262 * Atomically toggles a bit in a bitmap.
3263 *
3264 * @param pvBitmap Pointer to the bitmap.
3265 * @param iBit The bit to test and set.
3266 */
3267#if RT_INLINE_ASM_EXTERNAL
3268DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3269#else
3270DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3271{
3272# if RT_INLINE_ASM_GNU_STYLE
3273 __asm__ __volatile__ ("lock; btcl %1, %0"
3274 : "=m" (*(volatile long *)pvBitmap)
3275 : "Ir" (iBit)
3276 : "memory");
3277# else
3278 __asm
3279 {
3280# ifdef __AMD64__
3281 mov rax, [pvBitmap]
3282 mov edx, [iBit]
3283 lock btc [rax], edx
3284# else
3285 mov eax, [pvBitmap]
3286 mov edx, [iBit]
3287 lock btc [eax], edx
3288# endif
3289 }
3290# endif
3291}
3292#endif
3293
3294
3295/**
3296 * Tests and sets a bit in a bitmap.
3297 *
3298 * @returns true if the bit was set.
3299 * @returns false if the bit was clear.
3300 * @param pvBitmap Pointer to the bitmap.
3301 * @param iBit The bit to test and set.
3302 */
3303#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3304DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3305#else
3306DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3307{
3308 union { bool f; uint32_t u32; uint8_t u8; } rc;
3309# if RT_INLINE_ASM_USES_INTRIN
3310 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3311
3312# elif RT_INLINE_ASM_GNU_STYLE
3313 __asm__ __volatile__ ("btsl %2, %1\n\t"
3314 "setc %b0\n\t"
3315 "andl $1, %0\n\t"
3316 : "=q" (rc.u32),
3317 "=m" (*(volatile long *)pvBitmap)
3318 : "Ir" (iBit)
3319 : "memory");
3320# else
3321 __asm
3322 {
3323 mov edx, [iBit]
3324# ifdef __AMD64__
3325 mov rax, [pvBitmap]
3326 bts [rax], edx
3327# else
3328 mov eax, [pvBitmap]
3329 bts [eax], edx
3330# endif
3331 setc al
3332 and eax, 1
3333 mov [rc.u32], eax
3334 }
3335# endif
3336 return rc.f;
3337}
3338#endif
3339
3340
3341/**
3342 * Atomically tests and sets a bit in a bitmap.
3343 *
3344 * @returns true if the bit was set.
3345 * @returns false if the bit was clear.
3346 * @param pvBitmap Pointer to the bitmap.
3347 * @param iBit The bit to set.
3348 */
3349#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3350DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3351#else
3352DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3353{
3354 union { bool f; uint32_t u32; uint8_t u8; } rc;
3355# if RT_INLINE_ASM_USES_INTRIN
3356 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3357# elif RT_INLINE_ASM_GNU_STYLE
3358 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3359 "setc %b0\n\t"
3360 "andl $1, %0\n\t"
3361 : "=q" (rc.u32),
3362 "=m" (*(volatile long *)pvBitmap)
3363 : "Ir" (iBit)
3364 : "memory");
3365# else
3366 __asm
3367 {
3368 mov edx, [iBit]
3369# ifdef __AMD64__
3370 mov rax, [pvBitmap]
3371 lock bts [rax], edx
3372# else
3373 mov eax, [pvBitmap]
3374 lock bts [eax], edx
3375# endif
3376 setc al
3377 and eax, 1
3378 mov [rc.u32], eax
3379 }
3380# endif
3381 return rc.f;
3382}
3383#endif
3384
3385
3386/**
3387 * Tests and clears a bit in a bitmap.
3388 *
3389 * @returns true if the bit was set.
3390 * @returns false if the bit was clear.
3391 * @param pvBitmap Pointer to the bitmap.
3392 * @param iBit The bit to test and clear.
3393 */
3394#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3395DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3396#else
3397DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3398{
3399 union { bool f; uint32_t u32; uint8_t u8; } rc;
3400# if RT_INLINE_ASM_USES_INTRIN
3401 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3402
3403# elif RT_INLINE_ASM_GNU_STYLE
3404 __asm__ __volatile__ ("btrl %2, %1\n\t"
3405 "setc %b0\n\t"
3406 "andl $1, %0\n\t"
3407 : "=q" (rc.u32),
3408 "=m" (*(volatile long *)pvBitmap)
3409 : "Ir" (iBit)
3410 : "memory");
3411# else
3412 __asm
3413 {
3414 mov edx, [iBit]
3415# ifdef __AMD64__
3416 mov rax, [pvBitmap]
3417 btr [rax], edx
3418# else
3419 mov eax, [pvBitmap]
3420 btr [eax], edx
3421# endif
3422 setc al
3423 and eax, 1
3424 mov [rc.u32], eax
3425 }
3426# endif
3427 return rc.f;
3428}
3429#endif
3430
3431
3432/**
3433 * Atomically tests and clears a bit in a bitmap.
3434 *
3435 * @returns true if the bit was set.
3436 * @returns false if the bit was clear.
3437 * @param pvBitmap Pointer to the bitmap.
3438 * @param iBit The bit to test and clear.
3439 * @remark No memory barrier, take care on smp.
3440 */
3441#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3442DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3443#else
3444DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3445{
3446 union { bool f; uint32_t u32; uint8_t u8; } rc;
3447# if RT_INLINE_ASM_USES_INTRIN
3448 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3449
3450# elif RT_INLINE_ASM_GNU_STYLE
3451 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3452 "setc %b0\n\t"
3453 "andl $1, %0\n\t"
3454 : "=q" (rc.u32),
3455 "=m" (*(volatile long *)pvBitmap)
3456 : "Ir" (iBit)
3457 : "memory");
3458# else
3459 __asm
3460 {
3461 mov edx, [iBit]
3462# ifdef __AMD64__
3463 mov rax, [pvBitmap]
3464 lock btr [rax], edx
3465# else
3466 mov eax, [pvBitmap]
3467 lock btr [eax], edx
3468# endif
3469 setc al
3470 and eax, 1
3471 mov [rc.u32], eax
3472 }
3473# endif
3474 return rc.f;
3475}
3476#endif
3477
3478
3479/**
3480 * Tests and toggles a bit in a bitmap.
3481 *
3482 * @returns true if the bit was set.
3483 * @returns false if the bit was clear.
3484 * @param pvBitmap Pointer to the bitmap.
3485 * @param iBit The bit to test and toggle.
3486 */
3487#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3488DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3489#else
3490DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3491{
3492 union { bool f; uint32_t u32; uint8_t u8; } rc;
3493# if RT_INLINE_ASM_USES_INTRIN
3494 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3495
3496# elif RT_INLINE_ASM_GNU_STYLE
3497 __asm__ __volatile__ ("btcl %2, %1\n\t"
3498 "setc %b0\n\t"
3499 "andl $1, %0\n\t"
3500 : "=q" (rc.u32),
3501 "=m" (*(volatile long *)pvBitmap)
3502 : "Ir" (iBit)
3503 : "memory");
3504# else
3505 __asm
3506 {
3507 mov edx, [iBit]
3508# ifdef __AMD64__
3509 mov rax, [pvBitmap]
3510 btc [rax], edx
3511# else
3512 mov eax, [pvBitmap]
3513 btc [eax], edx
3514# endif
3515 setc al
3516 and eax, 1
3517 mov [rc.u32], eax
3518 }
3519# endif
3520 return rc.f;
3521}
3522#endif
3523
3524
3525/**
3526 * Atomically tests and toggles a bit in a bitmap.
3527 *
3528 * @returns true if the bit was set.
3529 * @returns false if the bit was clear.
3530 * @param pvBitmap Pointer to the bitmap.
3531 * @param iBit The bit to test and toggle.
3532 */
3533#if RT_INLINE_ASM_EXTERNAL
3534DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3535#else
3536DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3537{
3538 union { bool f; uint32_t u32; uint8_t u8; } rc;
3539# if RT_INLINE_ASM_GNU_STYLE
3540 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3541 "setc %b0\n\t"
3542 "andl $1, %0\n\t"
3543 : "=q" (rc.u32),
3544 "=m" (*(volatile long *)pvBitmap)
3545 : "Ir" (iBit)
3546 : "memory");
3547# else
3548 __asm
3549 {
3550 mov edx, [iBit]
3551# ifdef __AMD64__
3552 mov rax, [pvBitmap]
3553 lock btc [rax], edx
3554# else
3555 mov eax, [pvBitmap]
3556 lock btc [eax], edx
3557# endif
3558 setc al
3559 and eax, 1
3560 mov [rc.u32], eax
3561 }
3562# endif
3563 return rc.f;
3564}
3565#endif
3566
3567
3568/**
3569 * Tests if a bit in a bitmap is set.
3570 *
3571 * @returns true if the bit is set.
3572 * @returns false if the bit is clear.
3573 * @param pvBitmap Pointer to the bitmap.
3574 * @param iBit The bit to test.
3575 */
3576#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3577DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3578#else
3579DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3580{
3581 union { bool f; uint32_t u32; uint8_t u8; } rc;
3582# if RT_INLINE_ASM_USES_INTRIN
3583 rc.u32 = _bittest((long *)pvBitmap, iBit);
3584# elif RT_INLINE_ASM_GNU_STYLE
3585
3586 __asm__ __volatile__ ("btl %2, %1\n\t"
3587 "setc %b0\n\t"
3588 "andl $1, %0\n\t"
3589 : "=q" (rc.u32),
3590 "=m" (*(volatile long *)pvBitmap)
3591 : "Ir" (iBit)
3592 : "memory");
3593# else
3594 __asm
3595 {
3596 mov edx, [iBit]
3597# ifdef __AMD64__
3598 mov rax, [pvBitmap]
3599 bt [rax], edx
3600# else
3601 mov eax, [pvBitmap]
3602 bt [eax], edx
3603# endif
3604 setc al
3605 and eax, 1
3606 mov [rc.u32], eax
3607 }
3608# endif
3609 return rc.f;
3610}
3611#endif
3612
3613
3614/**
3615 * Clears a bit range within a bitmap.
3616 *
3617 * @param pvBitmap Pointer to the bitmap.
3618 * @param iBitStart The First bit to clear.
3619 * @param iBitEnd The first bit not to clear.
3620 */
3621DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3622{
3623 if (iBitStart < iBitEnd)
3624 {
3625 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3626 int iStart = iBitStart & ~31;
3627 int iEnd = iBitEnd & ~31;
3628 if (iStart == iEnd)
3629 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3630 else
3631 {
3632 /* bits in first dword. */
3633 if (iBitStart & 31)
3634 {
3635 *pu32 &= (1 << (iBitStart & 31)) - 1;
3636 pu32++;
3637 iBitStart = iStart + 32;
3638 }
3639
3640 /* whole dword. */
3641 if (iBitStart != iEnd)
3642 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3643
3644 /* bits in last dword. */
3645 if (iBitEnd & 31)
3646 {
3647 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3648 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3649 }
3650 }
3651 }
3652}
3653
3654
3655/**
3656 * Finds the first clear bit in a bitmap.
3657 *
3658 * @returns Index of the first zero bit.
3659 * @returns -1 if no clear bit was found.
3660 * @param pvBitmap Pointer to the bitmap.
3661 * @param cBits The number of bits in the bitmap. Multiple of 32.
3662 */
3663#if RT_INLINE_ASM_EXTERNAL
3664DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3665#else
3666DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3667{
3668 if (cBits)
3669 {
3670 int32_t iBit;
3671# if RT_INLINE_ASM_GNU_STYLE
3672 RTCCUINTREG uEAX, uECX, uEDI;
3673 cBits = RT_ALIGN_32(cBits, 32);
3674 __asm__ __volatile__("repe; scasl\n\t"
3675 "je 1f\n\t"
3676# ifdef __AMD64__
3677 "lea -4(%%rdi), %%rdi\n\t"
3678 "xorl (%%rdi), %%eax\n\t"
3679 "subq %5, %%rdi\n\t"
3680# else
3681 "lea -4(%%edi), %%edi\n\t"
3682 "xorl (%%edi), %%eax\n\t"
3683 "subl %5, %%edi\n\t"
3684# endif
3685 "shll $3, %%edi\n\t"
3686 "bsfl %%eax, %%edx\n\t"
3687 "addl %%edi, %%edx\n\t"
3688 "1:\t\n"
3689 : "=d" (iBit),
3690 "=&c" (uECX),
3691 "=&D" (uEDI),
3692 "=&a" (uEAX)
3693 : "0" (0xffffffff),
3694 "mr" (pvBitmap),
3695 "1" (cBits >> 5),
3696 "2" (pvBitmap),
3697 "3" (0xffffffff));
3698# else
3699 cBits = RT_ALIGN_32(cBits, 32);
3700 __asm
3701 {
3702# ifdef __AMD64__
3703 mov rdi, [pvBitmap]
3704 mov rbx, rdi
3705# else
3706 mov edi, [pvBitmap]
3707 mov ebx, edi
3708# endif
3709 mov edx, 0ffffffffh
3710 mov eax, edx
3711 mov ecx, [cBits]
3712 shr ecx, 5
3713 repe scasd
3714 je done
3715
3716# ifdef __AMD64__
3717 lea rdi, [rdi - 4]
3718 xor eax, [rdi]
3719 sub rdi, rbx
3720# else
3721 lea edi, [edi - 4]
3722 xor eax, [edi]
3723 sub edi, ebx
3724# endif
3725 shl edi, 3
3726 bsf edx, eax
3727 add edx, edi
3728 done:
3729 mov [iBit], edx
3730 }
3731# endif
3732 return iBit;
3733 }
3734 return -1;
3735}
3736#endif
3737
3738
3739/**
3740 * Finds the next clear bit in a bitmap.
3741 *
3742 * @returns Index of the first zero bit.
3743 * @returns -1 if no clear bit was found.
3744 * @param pvBitmap Pointer to the bitmap.
3745 * @param cBits The number of bits in the bitmap. Multiple of 32.
3746 * @param iBitPrev The bit returned from the last search.
3747 * The search will start at iBitPrev + 1.
3748 */
3749#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3750DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3751#else
3752DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3753{
3754 int iBit = ++iBitPrev & 31;
3755 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3756 cBits -= iBitPrev & ~31;
3757 if (iBit)
3758 {
3759 /* inspect the first dword. */
3760 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3761# if RT_INLINE_ASM_USES_INTRIN
3762 unsigned long ulBit = 0;
3763 if (_BitScanForward(&ulBit, u32))
3764 return ulBit + iBitPrev;
3765 iBit = -1;
3766# else
3767# if RT_INLINE_ASM_GNU_STYLE
3768 __asm__ __volatile__("bsf %1, %0\n\t"
3769 "jnz 1f\n\t"
3770 "movl $-1, %0\n\t"
3771 "1:\n\t"
3772 : "=r" (iBit)
3773 : "r" (u32));
3774# else
3775 __asm
3776 {
3777 mov edx, [u32]
3778 bsf eax, edx
3779 jnz done
3780 mov eax, 0ffffffffh
3781 done:
3782 mov [iBit], eax
3783 }
3784# endif
3785 if (iBit >= 0)
3786 return iBit + iBitPrev;
3787# endif
3788 /* Search the rest of the bitmap, if there is anything. */
3789 if (cBits > 32)
3790 {
3791 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3792 if (iBit >= 0)
3793 return iBit + (iBitPrev & ~31) + 32;
3794 }
3795 }
3796 else
3797 {
3798 /* Search the rest of the bitmap. */
3799 iBit = ASMBitFirstClear(pvBitmap, cBits);
3800 if (iBit >= 0)
3801 return iBit + (iBitPrev & ~31);
3802 }
3803 return iBit;
3804}
3805#endif
3806
3807
3808/**
3809 * Finds the first set bit in a bitmap.
3810 *
3811 * @returns Index of the first set bit.
3812 * @returns -1 if no clear bit was found.
3813 * @param pvBitmap Pointer to the bitmap.
3814 * @param cBits The number of bits in the bitmap. Multiple of 32.
3815 */
3816#if RT_INLINE_ASM_EXTERNAL
3817DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3818#else
3819DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3820{
3821 if (cBits)
3822 {
3823 int32_t iBit;
3824# if RT_INLINE_ASM_GNU_STYLE
3825 RTCCUINTREG uEAX, uECX, uEDI;
3826 cBits = RT_ALIGN_32(cBits, 32);
3827 __asm__ __volatile__("repe; scasl\n\t"
3828 "je 1f\n\t"
3829# ifdef __AMD64__
3830 "lea -4(%%rdi), %%rdi\n\t"
3831 "movl (%%rdi), %%eax\n\t"
3832 "subq %5, %%rdi\n\t"
3833# else
3834 "lea -4(%%edi), %%edi\n\t"
3835 "movl (%%edi), %%eax\n\t"
3836 "subl %5, %%edi\n\t"
3837# endif
3838 "shll $3, %%edi\n\t"
3839 "bsfl %%eax, %%edx\n\t"
3840 "addl %%edi, %%edx\n\t"
3841 "1:\t\n"
3842 : "=d" (iBit),
3843 "=&c" (uECX),
3844 "=&D" (uEDI),
3845 "=&a" (uEAX)
3846 : "0" (0xffffffff),
3847 "mr" (pvBitmap),
3848 "1" (cBits >> 5),
3849 "2" (pvBitmap),
3850 "3" (0));
3851# else
3852 cBits = RT_ALIGN_32(cBits, 32);
3853 __asm
3854 {
3855# ifdef __AMD64__
3856 mov rdi, [pvBitmap]
3857 mov rbx, rdi
3858# else
3859 mov edi, [pvBitmap]
3860 mov ebx, edi
3861# endif
3862 mov edx, 0ffffffffh
3863 xor eax, eax
3864 mov ecx, [cBits]
3865 shr ecx, 5
3866 repe scasd
3867 je done
3868# ifdef __AMD64__
3869 lea rdi, [rdi - 4]
3870 mov eax, [rdi]
3871 sub rdi, rbx
3872# else
3873 lea edi, [edi - 4]
3874 mov eax, [edi]
3875 sub edi, ebx
3876# endif
3877 shl edi, 3
3878 bsf edx, eax
3879 add edx, edi
3880 done:
3881 mov [iBit], edx
3882 }
3883# endif
3884 return iBit;
3885 }
3886 return -1;
3887}
3888#endif
3889
3890
3891/**
3892 * Finds the next set bit in a bitmap.
3893 *
3894 * @returns Index of the next set bit.
3895 * @returns -1 if no set bit was found.
3896 * @param pvBitmap Pointer to the bitmap.
3897 * @param cBits The number of bits in the bitmap. Multiple of 32.
3898 * @param iBitPrev The bit returned from the last search.
3899 * The search will start at iBitPrev + 1.
3900 */
3901#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3902DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3903#else
3904DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3905{
3906 int iBit = ++iBitPrev & 31;
3907 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3908 cBits -= iBitPrev & ~31;
3909 if (iBit)
3910 {
3911 /* inspect the first dword. */
3912 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3913# if RT_INLINE_ASM_USES_INTRIN
3914 unsigned long ulBit = 0;
3915 if (_BitScanForward(&ulBit, u32))
3916 return ulBit + iBitPrev;
3917 iBit = -1;
3918# else
3919# if RT_INLINE_ASM_GNU_STYLE
3920 __asm__ __volatile__("bsf %1, %0\n\t"
3921 "jnz 1f\n\t"
3922 "movl $-1, %0\n\t"
3923 "1:\n\t"
3924 : "=r" (iBit)
3925 : "r" (u32));
3926# else
3927 __asm
3928 {
3929 mov edx, u32
3930 bsf eax, edx
3931 jnz done
3932 mov eax, 0ffffffffh
3933 done:
3934 mov [iBit], eax
3935 }
3936# endif
3937 if (iBit >= 0)
3938 return iBit + iBitPrev;
3939# endif
3940 /* Search the rest of the bitmap, if there is anything. */
3941 if (cBits > 32)
3942 {
3943 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3944 if (iBit >= 0)
3945 return iBit + (iBitPrev & ~31) + 32;
3946 }
3947
3948 }
3949 else
3950 {
3951 /* Search the rest of the bitmap. */
3952 iBit = ASMBitFirstSet(pvBitmap, cBits);
3953 if (iBit >= 0)
3954 return iBit + (iBitPrev & ~31);
3955 }
3956 return iBit;
3957}
3958#endif
3959
3960
3961/**
3962 * Finds the first bit which is set in the given 32-bit integer.
3963 * Bits are numbered from 1 (least significant) to 32.
3964 *
3965 * @returns index [1..32] of the first set bit.
3966 * @returns 0 if all bits are cleared.
3967 * @param u32 Integer to search for set bits.
3968 * @remark Similar to ffs() in BSD.
3969 */
3970DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3971{
3972# if RT_INLINE_ASM_USES_INTRIN
3973 unsigned long iBit;
3974 if (_BitScanForward(&iBit, u32))
3975 iBit++;
3976 else
3977 iBit = 0;
3978# elif RT_INLINE_ASM_GNU_STYLE
3979 uint32_t iBit;
3980 __asm__ __volatile__("bsf %1, %0\n\t"
3981 "jnz 1f\n\t"
3982 "xorl %0, %0\n\t"
3983 "jmp 2f\n"
3984 "1:\n\t"
3985 "incl %0\n"
3986 "2:\n\t"
3987 : "=r" (iBit)
3988 : "rm" (u32));
3989# else
3990 uint32_t iBit;
3991 _asm
3992 {
3993 bsf eax, [u32]
3994 jnz found
3995 xor eax, eax
3996 jmp done
3997 found:
3998 inc eax
3999 done:
4000 mov [iBit], eax
4001 }
4002# endif
4003 return iBit;
4004}
4005
4006
4007/**
4008 * Finds the first bit which is set in the given 32-bit integer.
4009 * Bits are numbered from 1 (least significant) to 32.
4010 *
4011 * @returns index [1..32] of the first set bit.
4012 * @returns 0 if all bits are cleared.
4013 * @param i32 Integer to search for set bits.
4014 * @remark Similar to ffs() in BSD.
4015 */
4016DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4017{
4018 return ASMBitFirstSetU32((uint32_t)i32);
4019}
4020
4021
4022/**
4023 * Finds the last bit which is set in the given 32-bit integer.
4024 * Bits are numbered from 1 (least significant) to 32.
4025 *
4026 * @returns index [1..32] of the last set bit.
4027 * @returns 0 if all bits are cleared.
4028 * @param u32 Integer to search for set bits.
4029 * @remark Similar to fls() in BSD.
4030 */
4031DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4032{
4033# if RT_INLINE_ASM_USES_INTRIN
4034 unsigned long iBit;
4035 if (_BitScanReverse(&iBit, u32))
4036 iBit++;
4037 else
4038 iBit = 0;
4039# elif RT_INLINE_ASM_GNU_STYLE
4040 uint32_t iBit;
4041 __asm__ __volatile__("bsrl %1, %0\n\t"
4042 "jnz 1f\n\t"
4043 "xorl %0, %0\n\t"
4044 "jmp 2f\n"
4045 "1:\n\t"
4046 "incl %0\n"
4047 "2:\n\t"
4048 : "=r" (iBit)
4049 : "rm" (u32));
4050# else
4051 uint32_t iBit;
4052 _asm
4053 {
4054 bsr eax, [u32]
4055 jnz found
4056 xor eax, eax
4057 jmp done
4058 found:
4059 inc eax
4060 done:
4061 mov [iBit], eax
4062 }
4063# endif
4064 return iBit;
4065}
4066
4067
4068/**
4069 * Finds the last bit which is set in the given 32-bit integer.
4070 * Bits are numbered from 1 (least significant) to 32.
4071 *
4072 * @returns index [1..32] of the last set bit.
4073 * @returns 0 if all bits are cleared.
4074 * @param i32 Integer to search for set bits.
4075 * @remark Similar to fls() in BSD.
4076 */
4077DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4078{
4079 return ASMBitLastSetS32((uint32_t)i32);
4080}
4081
4082
4083/**
4084 * Reverse the byte order of the given 32-bit integer.
4085 * @param u32 Integer
4086 */
4087DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4088{
4089#if RT_INLINE_ASM_USES_INTRIN
4090 u32 = _byteswap_ulong(u32);
4091#elif RT_INLINE_ASM_GNU_STYLE
4092 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4093#else
4094 _asm
4095 {
4096 mov eax, [u32]
4097 bswap eax
4098 mov [u32], eax
4099 }
4100#endif
4101 return u32;
4102}
4103
4104/** @} */
4105
4106
4107/** @} */
4108#endif
4109
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette