VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 2356

Last change on this file since 2356 was 2305, checked in by vboxsync, 18 years ago

We're using 2002 not 2003 it seems. too bad.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 99.4 KB
Line 
1/** @file
2 * InnoTek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef __iprt_asm_h__
22#define __iprt_asm_h__
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26/** @todo #include <iprt/param.h> for PAGE_SIZE. */
27/** @def RT_INLINE_ASM_USES_INTRIN
28 * Defined as 1 if we're using a _MSC_VER 1400.
29 * Otherwise defined as 0.
30 */
31
32#ifdef _MSC_VER
33# if _MSC_VER >= 1400
34# define RT_INLINE_ASM_USES_INTRIN 1
35# include <intrin.h>
36 /* Emit the intrinsics at all optimization levels. */
37# pragma intrinsic(_ReadWriteBarrier)
38# pragma intrinsic(__cpuid)
39# pragma intrinsic(_enable)
40# pragma intrinsic(_disable)
41# pragma intrinsic(__rdtsc)
42# pragma intrinsic(__readmsr)
43# pragma intrinsic(__writemsr)
44# pragma intrinsic(__outbyte)
45# pragma intrinsic(__outword)
46# pragma intrinsic(__outdword)
47# pragma intrinsic(__inbyte)
48# pragma intrinsic(__inword)
49# pragma intrinsic(__indword)
50# pragma intrinsic(__invlpg)
51# pragma intrinsic(__stosd)
52# pragma intrinsic(__stosw)
53# pragma intrinsic(__stosb)
54# pragma intrinsic(__readcr0)
55# pragma intrinsic(__readcr2)
56# pragma intrinsic(__readcr3)
57# pragma intrinsic(__readcr4)
58# pragma intrinsic(__writecr0)
59# pragma intrinsic(__writecr3)
60# pragma intrinsic(__writecr4)
61# pragma intrinsic(_BitScanForward)
62# pragma intrinsic(_BitScanReverse)
63# pragma intrinsic(_bittest)
64# pragma intrinsic(_bittestandset)
65# pragma intrinsic(_bittestandreset)
66# pragma intrinsic(_bittestandcomplement)
67# pragma intrinsic(_byteswap_ushort)
68# pragma intrinsic(_byteswap_ulong)
69# pragma intrinsic(_interlockedbittestandset)
70# pragma intrinsic(_interlockedbittestandreset)
71# pragma intrinsic(_InterlockedAnd)
72# pragma intrinsic(_InterlockedOr)
73# pragma intrinsic(_InterlockedIncrement)
74# pragma intrinsic(_InterlockedDecrement)
75# pragma intrinsic(_InterlockedExchange)
76# pragma intrinsic(_InterlockedCompareExchange)
77# pragma intrinsic(_InterlockedCompareExchange64)
78# ifdef __AMD64__
79# pragma intrinsic(__stosq)
80# pragma intrinsic(__readcr8)
81# pragma intrinsic(__writecr8)
82# pragma intrinsic(_byteswap_uint64)
83# pragma intrinsic(_InterlockedExchange64)
84# endif
85# endif
86#endif
87#ifndef RT_INLINE_ASM_USES_INTRIN
88# define RT_INLINE_ASM_USES_INTRIN 0
89#endif
90
91
92
93/** @defgroup grp_asm ASM - Assembly Routines
94 * @ingroup grp_rt
95 * @{
96 */
97
98/** @def RT_INLINE_ASM_EXTERNAL
99 * Defined as 1 if the compiler does not support inline assembly.
100 * The ASM* functions will then be implemented in an external .asm file.
101 *
102 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
103 * inline assmebly in their AMD64 compiler.
104 */
105#if defined(_MSC_VER) && defined(__AMD64__)
106# define RT_INLINE_ASM_EXTERNAL 1
107#else
108# define RT_INLINE_ASM_EXTERNAL 0
109#endif
110
111/** @def RT_INLINE_ASM_GNU_STYLE
112 * Defined as 1 if the compiler understand GNU style inline assembly.
113 */
114#if defined(_MSC_VER)
115# define RT_INLINE_ASM_GNU_STYLE 0
116#else
117# define RT_INLINE_ASM_GNU_STYLE 1
118#endif
119
120
121/** @todo find a more proper place for this structure? */
122#pragma pack(1)
123/** IDTR */
124typedef struct RTIDTR
125{
126 /** Size of the IDT. */
127 uint16_t cbIdt;
128 /** Address of the IDT. */
129 uintptr_t pIdt;
130} RTIDTR, *PRTIDTR;
131#pragma pack()
132
133#pragma pack(1)
134/** GDTR */
135typedef struct RTGDTR
136{
137 /** Size of the GDT. */
138 uint16_t cbGdt;
139 /** Address of the GDT. */
140 uintptr_t pGdt;
141} RTGDTR, *PRTGDTR;
142#pragma pack()
143
144
145/** @def ASMReturnAddress
146 * Gets the return address of the current (or calling if you like) function or method.
147 */
148#ifdef _MSC_VER
149# ifdef __cplusplus
150extern "C"
151# endif
152void * _ReturnAddress(void);
153# pragma intrinsic(_ReturnAddress)
154# define ASMReturnAddress() _ReturnAddress()
155#elif defined(__GNUC__) || defined(__DOXYGEN__)
156# define ASMReturnAddress() __builtin_return_address(0)
157#else
158# error "Unsupported compiler."
159#endif
160
161
162/**
163 * Gets the content of the IDTR CPU register.
164 * @param pIdtr Where to store the IDTR contents.
165 */
166#if RT_INLINE_ASM_EXTERNAL
167DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
168#else
169DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
170{
171# if RT_INLINE_ASM_GNU_STYLE
172 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
173# else
174 __asm
175 {
176# ifdef __AMD64__
177 mov rax, [pIdtr]
178 sidt [rax]
179# else
180 mov eax, [pIdtr]
181 sidt [eax]
182# endif
183 }
184# endif
185}
186#endif
187
188
189/**
190 * Sets the content of the IDTR CPU register.
191 * @param pIdtr Where to load the IDTR contents from
192 */
193#if RT_INLINE_ASM_EXTERNAL
194DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
195#else
196DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
197{
198# if RT_INLINE_ASM_GNU_STYLE
199 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
200# else
201 __asm
202 {
203# ifdef __AMD64__
204 mov rax, [pIdtr]
205 lidt [rax]
206# else
207 mov eax, [pIdtr]
208 lidt [eax]
209# endif
210 }
211# endif
212}
213#endif
214
215
216/**
217 * Gets the content of the GDTR CPU register.
218 * @param pGdtr Where to store the GDTR contents.
219 */
220#if RT_INLINE_ASM_EXTERNAL
221DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
222#else
223DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
224{
225# if RT_INLINE_ASM_GNU_STYLE
226 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
227# else
228 __asm
229 {
230# ifdef __AMD64__
231 mov rax, [pGdtr]
232 sgdt [rax]
233# else
234 mov eax, [pGdtr]
235 sgdt [eax]
236# endif
237 }
238# endif
239}
240#endif
241
242/**
243 * Get the cs register.
244 * @returns cs.
245 */
246#if RT_INLINE_ASM_EXTERNAL
247DECLASM(RTSEL) ASMGetCS(void);
248#else
249DECLINLINE(RTSEL) ASMGetCS(void)
250{
251 RTSEL SelCS;
252# if RT_INLINE_ASM_GNU_STYLE
253 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
254# else
255 __asm
256 {
257 mov ax, cs
258 mov [SelCS], ax
259 }
260# endif
261 return SelCS;
262}
263#endif
264
265
266/**
267 * Get the DS register.
268 * @returns DS.
269 */
270#if RT_INLINE_ASM_EXTERNAL
271DECLASM(RTSEL) ASMGetDS(void);
272#else
273DECLINLINE(RTSEL) ASMGetDS(void)
274{
275 RTSEL SelDS;
276# if RT_INLINE_ASM_GNU_STYLE
277 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
278# else
279 __asm
280 {
281 mov ax, ds
282 mov [SelDS], ax
283 }
284# endif
285 return SelDS;
286}
287#endif
288
289
290/**
291 * Get the ES register.
292 * @returns ES.
293 */
294#if RT_INLINE_ASM_EXTERNAL
295DECLASM(RTSEL) ASMGetES(void);
296#else
297DECLINLINE(RTSEL) ASMGetES(void)
298{
299 RTSEL SelES;
300# if RT_INLINE_ASM_GNU_STYLE
301 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
302# else
303 __asm
304 {
305 mov ax, es
306 mov [SelES], ax
307 }
308# endif
309 return SelES;
310}
311#endif
312
313
314/**
315 * Get the FS register.
316 * @returns FS.
317 */
318#if RT_INLINE_ASM_EXTERNAL
319DECLASM(RTSEL) ASMGetFS(void);
320#else
321DECLINLINE(RTSEL) ASMGetFS(void)
322{
323 RTSEL SelFS;
324# if RT_INLINE_ASM_GNU_STYLE
325 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
326# else
327 __asm
328 {
329 mov ax, fs
330 mov [SelFS], ax
331 }
332# endif
333 return SelFS;
334}
335# endif
336
337
338/**
339 * Get the GS register.
340 * @returns GS.
341 */
342#if RT_INLINE_ASM_EXTERNAL
343DECLASM(RTSEL) ASMGetGS(void);
344#else
345DECLINLINE(RTSEL) ASMGetGS(void)
346{
347 RTSEL SelGS;
348# if RT_INLINE_ASM_GNU_STYLE
349 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
350# else
351 __asm
352 {
353 mov ax, gs
354 mov [SelGS], ax
355 }
356# endif
357 return SelGS;
358}
359#endif
360
361
362/**
363 * Get the SS register.
364 * @returns SS.
365 */
366#if RT_INLINE_ASM_EXTERNAL
367DECLASM(RTSEL) ASMGetSS(void);
368#else
369DECLINLINE(RTSEL) ASMGetSS(void)
370{
371 RTSEL SelSS;
372# if RT_INLINE_ASM_GNU_STYLE
373 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
374# else
375 __asm
376 {
377 mov ax, ss
378 mov [SelSS], ax
379 }
380# endif
381 return SelSS;
382}
383#endif
384
385
386/**
387 * Get the TR register.
388 * @returns TR.
389 */
390#if RT_INLINE_ASM_EXTERNAL
391DECLASM(RTSEL) ASMGetTR(void);
392#else
393DECLINLINE(RTSEL) ASMGetTR(void)
394{
395 RTSEL SelTR;
396# if RT_INLINE_ASM_GNU_STYLE
397 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
398# else
399 __asm
400 {
401 str ax
402 mov [SelTR], ax
403 }
404# endif
405 return SelTR;
406}
407#endif
408
409
410/**
411 * Get the [RE]FLAGS register.
412 * @returns [RE]FLAGS.
413 */
414#if RT_INLINE_ASM_EXTERNAL
415DECLASM(RTCCUINTREG) ASMGetFlags(void);
416#else
417DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
418{
419 RTCCUINTREG uFlags;
420# if RT_INLINE_ASM_GNU_STYLE
421# ifdef __AMD64__
422 __asm__ __volatile__("pushfq\n\t"
423 "popq %0\n\t"
424 : "=m" (uFlags));
425# else
426 __asm__ __volatile__("pushfl\n\t"
427 "popl %0\n\t"
428 : "=m" (uFlags));
429# endif
430# else
431 __asm
432 {
433# ifdef __AMD64__
434 pushfq
435 pop [uFlags]
436# else
437 pushfd
438 pop [uFlags]
439# endif
440 }
441# endif
442 return uFlags;
443}
444#endif
445
446
447/**
448 * Set the [RE]FLAGS register.
449 * @param uFlags The new [RE]FLAGS value.
450 */
451#if RT_INLINE_ASM_EXTERNAL
452DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
453#else
454DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
455{
456# if RT_INLINE_ASM_GNU_STYLE
457# ifdef __AMD64__
458 __asm__ __volatile__("pushq %0\n\t"
459 "popfq\n\t"
460 : : "m" (uFlags));
461# else
462 __asm__ __volatile__("pushl %0\n\t"
463 "popfl\n\t"
464 : : "m" (uFlags));
465# endif
466# else
467 __asm
468 {
469# ifdef __AMD64__
470 push [uFlags]
471 popfq
472# else
473 push [uFlags]
474 popfd
475# endif
476 }
477# endif
478}
479#endif
480
481
482/**
483 * Gets the content of the CPU timestamp counter register.
484 *
485 * @returns TSC.
486 */
487#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
488DECLASM(uint64_t) ASMReadTSC(void);
489#else
490DECLINLINE(uint64_t) ASMReadTSC(void)
491{
492 RTUINT64U u;
493# if RT_INLINE_ASM_GNU_STYLE
494 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
495# else
496# if RT_INLINE_ASM_USES_INTRIN
497 u.u = __rdtsc();
498# else
499 __asm
500 {
501 rdtsc
502 mov [u.s.Lo], eax
503 mov [u.s.Hi], edx
504 }
505# endif
506# endif
507 return u.u;
508}
509#endif
510
511
512/**
513 * Performs the cpuid instruction returning all registers.
514 *
515 * @param uOperator CPUID operation (eax).
516 * @param pvEAX Where to store eax.
517 * @param pvEBX Where to store ebx.
518 * @param pvECX Where to store ecx.
519 * @param pvEDX Where to store edx.
520 * @remark We're using void pointers to ease the use of special bitfield structures and such.
521 */
522#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
523DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
524#else
525DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
526{
527# if RT_INLINE_ASM_GNU_STYLE
528# ifdef __AMD64__
529 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
530 __asm__ ("cpuid\n\t"
531 : "=a" (uRAX),
532 "=b" (uRBX),
533 "=c" (uRCX),
534 "=d" (uRDX)
535 : "0" (uOperator));
536 *(uint32_t *)pvEAX = (uint32_t)uRAX;
537 *(uint32_t *)pvEBX = (uint32_t)uRBX;
538 *(uint32_t *)pvECX = (uint32_t)uRCX;
539 *(uint32_t *)pvEDX = (uint32_t)uRDX;
540# else
541 __asm__ ("xchgl %%ebx, %1\n\t"
542 "cpuid\n\t"
543 "xchgl %%ebx, %1\n\t"
544 : "=a" (*(uint32_t *)pvEAX),
545 "=r" (*(uint32_t *)pvEBX),
546 "=c" (*(uint32_t *)pvECX),
547 "=d" (*(uint32_t *)pvEDX)
548 : "0" (uOperator));
549# endif
550
551# elif RT_INLINE_ASM_USES_INTRIN
552 int aInfo[4];
553 __cpuid(aInfo, uOperator);
554 *(uint32_t *)pvEAX = aInfo[0];
555 *(uint32_t *)pvEBX = aInfo[1];
556 *(uint32_t *)pvECX = aInfo[2];
557 *(uint32_t *)pvEDX = aInfo[3];
558
559# else
560 uint32_t uEAX;
561 uint32_t uEBX;
562 uint32_t uECX;
563 uint32_t uEDX;
564 __asm
565 {
566 push ebx
567 mov eax, [uOperator]
568 cpuid
569 mov [uEAX], eax
570 mov [uEBX], ebx
571 mov [uECX], ecx
572 mov [uEDX], edx
573 pop ebx
574 }
575 *(uint32_t *)pvEAX = uEAX;
576 *(uint32_t *)pvEBX = uEBX;
577 *(uint32_t *)pvECX = uECX;
578 *(uint32_t *)pvEDX = uEDX;
579# endif
580}
581#endif
582
583
584/**
585 * Performs the cpuid instruction returning ecx and edx.
586 *
587 * @param uOperator CPUID operation (eax).
588 * @param pvECX Where to store ecx.
589 * @param pvEDX Where to store edx.
590 * @remark We're using void pointers to ease the use of special bitfield structures and such.
591 */
592#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
593DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
594#else
595DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
596{
597 uint32_t uEBX;
598 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
599}
600#endif
601
602
603/**
604 * Performs the cpuid instruction returning edx.
605 *
606 * @param uOperator CPUID operation (eax).
607 * @returns EDX after cpuid operation.
608 */
609#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
610DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
611#else
612DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
613{
614 RTCCUINTREG xDX;
615# if RT_INLINE_ASM_GNU_STYLE
616# ifdef __AMD64__
617 RTCCUINTREG uSpill;
618 __asm__ ("cpuid"
619 : "=a" (uSpill),
620 "=d" (xDX)
621 : "0" (uOperator)
622 : "rbx", "rcx");
623# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: PIC by default. */
624 __asm__ ("push %%ebx\n\t"
625 "cpuid\n\t"
626 "pop %%ebx\n\t"
627 : "=a" (uOperator),
628 "=d" (xDX)
629 : "0" (uOperator)
630 : "ecx");
631# else
632 __asm__ ("cpuid"
633 : "=a" (uOperator),
634 "=d" (xDX)
635 : "0" (uOperator)
636 : "ebx", "ecx");
637# endif
638
639# elif RT_INLINE_ASM_USES_INTRIN
640 int aInfo[4];
641 __cpuid(aInfo, uOperator);
642 xDX = aInfo[3];
643
644# else
645 __asm
646 {
647 push ebx
648 mov eax, [uOperator]
649 cpuid
650 mov [xDX], edx
651 pop ebx
652 }
653# endif
654 return (uint32_t)xDX;
655}
656#endif
657
658
659/**
660 * Performs the cpuid instruction returning ecx.
661 *
662 * @param uOperator CPUID operation (eax).
663 * @returns ECX after cpuid operation.
664 */
665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
666DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
667#else
668DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
669{
670 RTCCUINTREG xCX;
671# if RT_INLINE_ASM_GNU_STYLE
672# ifdef __AMD64__
673 RTCCUINTREG uSpill;
674 __asm__ ("cpuid"
675 : "=a" (uSpill),
676 "=c" (xCX)
677 : "0" (uOperator)
678 : "rbx", "rdx");
679# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
680 __asm__ ("push %%ebx\n\t"
681 "cpuid\n\t"
682 "pop %%ebx\n\t"
683 : "=a" (uOperator),
684 "=c" (xCX)
685 : "0" (uOperator)
686 : "edx");
687# else
688 __asm__ ("cpuid"
689 : "=a" (uOperator),
690 "=c" (xCX)
691 : "0" (uOperator)
692 : "ebx", "edx");
693
694# endif
695
696# elif RT_INLINE_ASM_USES_INTRIN
697 int aInfo[4];
698 __cpuid(aInfo, uOperator);
699 xCX = aInfo[2];
700
701# else
702 __asm
703 {
704 push ebx
705 mov eax, [uOperator]
706 cpuid
707 mov [xCX], ecx
708 pop ebx
709 }
710# endif
711 return (uint32_t)xCX;
712}
713#endif
714
715
716/**
717 * Checks if the current CPU supports CPUID.
718 *
719 * @returns true if CPUID is supported.
720 */
721DECLINLINE(bool) ASMHasCpuId(void)
722{
723#ifdef __AMD64__
724 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
725#else /* !__AMD64__ */
726 bool fRet = false;
727# if RT_INLINE_ASM_GNU_STYLE
728 uint32_t u1;
729 uint32_t u2;
730 __asm__ ("pushf\n\t"
731 "pop %1\n\t"
732 "mov %1, %2\n\t"
733 "xorl $0x200000, %1\n\t"
734 "push %1\n\t"
735 "popf\n\t"
736 "pushf\n\t"
737 "pop %1\n\t"
738 "cmpl %1, %2\n\t"
739 "setne %0\n\t"
740 "push %2\n\t"
741 "popf\n\t"
742 : "=m" (fRet), "=r" (u1), "=r" (u2));
743# else
744 __asm
745 {
746 pushfd
747 pop eax
748 mov ebx, eax
749 xor eax, 0200000h
750 push eax
751 popfd
752 pushfd
753 pop eax
754 cmp eax, ebx
755 setne fRet
756 push ebx
757 popfd
758 }
759# endif
760 return fRet;
761#endif /* !__AMD64__ */
762}
763
764
765/**
766 * Gets the APIC ID of the current CPU.
767 *
768 * @returns the APIC ID.
769 */
770#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
771DECLASM(uint8_t) ASMGetApicId(void);
772#else
773DECLINLINE(uint8_t) ASMGetApicId(void)
774{
775 RTCCUINTREG xBX;
776# if RT_INLINE_ASM_GNU_STYLE
777# ifdef __AMD64__
778 RTCCUINTREG uSpill;
779 __asm__ ("cpuid"
780 : "=a" (uSpill),
781 "=b" (xBX)
782 : "0" (1)
783 : "rcx", "rdx");
784# elif (defined(PIC) || defined(__DARWIN__)) && defined(__i386__)
785 RTCCUINTREG uSpill;
786 __asm__ ("mov %%ebx,%1\n\t"
787 "cpuid\n\t"
788 "xchgl %%ebx,%1\n\t"
789 : "=a" (uSpill),
790 "=r" (xBX)
791 : "0" (1)
792 : "ecx", "edx");
793# else
794 RTCCUINTREG uSpill;
795 __asm__ ("cpuid"
796 : "=a" (uSpill),
797 "=b" (xBX)
798 : "0" (1)
799 : "ecx", "edx");
800# endif
801
802# elif RT_INLINE_ASM_USES_INTRIN
803 int aInfo[4];
804 __cpuid(aInfo, 1);
805 xBX = aInfo[1];
806
807# else
808 __asm
809 {
810 push ebx
811 mov eax, 1
812 cpuid
813 mov [xBX], ebx
814 pop ebx
815 }
816# endif
817 return (uint8_t)(xBX >> 24);
818}
819#endif
820
821/**
822 * Get cr0.
823 * @returns cr0.
824 */
825#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
826DECLASM(RTCCUINTREG) ASMGetCR0(void);
827#else
828DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
829{
830 RTCCUINTREG uCR0;
831# if RT_INLINE_ASM_USES_INTRIN
832 uCR0 = __readcr0();
833
834# elif RT_INLINE_ASM_GNU_STYLE
835# ifdef __AMD64__
836 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
837# else
838 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
839# endif
840# else
841 __asm
842 {
843# ifdef __AMD64__
844 mov rax, cr0
845 mov [uCR0], rax
846# else
847 mov eax, cr0
848 mov [uCR0], eax
849# endif
850 }
851# endif
852 return uCR0;
853}
854#endif
855
856
857/**
858 * Sets the CR0 register.
859 * @param uCR0 The new CR0 value.
860 */
861#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
862DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
863#else
864DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
865{
866# if RT_INLINE_ASM_USES_INTRIN
867 __writecr0(uCR0);
868
869# elif RT_INLINE_ASM_GNU_STYLE
870# ifdef __AMD64__
871 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
872# else
873 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
874# endif
875# else
876 __asm
877 {
878# ifdef __AMD64__
879 mov rax, [uCR0]
880 mov cr0, rax
881# else
882 mov eax, [uCR0]
883 mov cr0, eax
884# endif
885 }
886# endif
887}
888#endif
889
890
891/**
892 * Get cr2.
893 * @returns cr2.
894 */
895#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
896DECLASM(RTCCUINTREG) ASMGetCR2(void);
897#else
898DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
899{
900 RTCCUINTREG uCR2;
901# if RT_INLINE_ASM_USES_INTRIN
902 uCR2 = __readcr2();
903
904# elif RT_INLINE_ASM_GNU_STYLE
905# ifdef __AMD64__
906 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
907# else
908 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
909# endif
910# else
911 __asm
912 {
913# ifdef __AMD64__
914 mov rax, cr2
915 mov [uCR2], rax
916# else
917 mov eax, cr2
918 mov [uCR2], eax
919# endif
920 }
921# endif
922 return uCR2;
923}
924#endif
925
926
927/**
928 * Sets the CR2 register.
929 * @param uCR2 The new CR0 value.
930 */
931#if RT_INLINE_ASM_EXTERNAL
932DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
933#else
934DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
935{
936# if RT_INLINE_ASM_GNU_STYLE
937# ifdef __AMD64__
938 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
939# else
940 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
941# endif
942# else
943 __asm
944 {
945# ifdef __AMD64__
946 mov rax, [uCR2]
947 mov cr2, rax
948# else
949 mov eax, [uCR2]
950 mov cr2, eax
951# endif
952 }
953# endif
954}
955#endif
956
957
958/**
959 * Get cr3.
960 * @returns cr3.
961 */
962#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
963DECLASM(RTCCUINTREG) ASMGetCR3(void);
964#else
965DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
966{
967 RTCCUINTREG uCR3;
968# if RT_INLINE_ASM_USES_INTRIN
969 uCR3 = __readcr3();
970
971# elif RT_INLINE_ASM_GNU_STYLE
972# ifdef __AMD64__
973 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
974# else
975 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
976# endif
977# else
978 __asm
979 {
980# ifdef __AMD64__
981 mov rax, cr3
982 mov [uCR3], rax
983# else
984 mov eax, cr3
985 mov [uCR3], eax
986# endif
987 }
988# endif
989 return uCR3;
990}
991#endif
992
993
994/**
995 * Sets the CR3 register.
996 *
997 * @param uCR3 New CR3 value.
998 */
999#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1000DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1001#else
1002DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1003{
1004# if RT_INLINE_ASM_USES_INTRIN
1005 __writecr3(uCR3);
1006
1007# elif RT_INLINE_ASM_GNU_STYLE
1008# ifdef __AMD64__
1009 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1010# else
1011 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1012# endif
1013# else
1014 __asm
1015 {
1016# ifdef __AMD64__
1017 mov rax, [uCR3]
1018 mov cr3, rax
1019# else
1020 mov eax, [uCR3]
1021 mov cr3, eax
1022# endif
1023 }
1024# endif
1025}
1026#endif
1027
1028
1029/**
1030 * Reloads the CR3 register.
1031 */
1032#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1033DECLASM(void) ASMReloadCR3(void);
1034#else
1035DECLINLINE(void) ASMReloadCR3(void)
1036{
1037# if RT_INLINE_ASM_USES_INTRIN
1038 __writecr3(__readcr3());
1039
1040# elif RT_INLINE_ASM_GNU_STYLE
1041 RTCCUINTREG u;
1042# ifdef __AMD64__
1043 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1044 "movq %0, %%cr3\n\t"
1045 : "=r" (u));
1046# else
1047 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1048 "movl %0, %%cr3\n\t"
1049 : "=r" (u));
1050# endif
1051# else
1052 __asm
1053 {
1054# ifdef __AMD64__
1055 mov rax, cr3
1056 mov cr3, rax
1057# else
1058 mov eax, cr3
1059 mov cr3, eax
1060# endif
1061 }
1062# endif
1063}
1064#endif
1065
1066
1067/**
1068 * Get cr4.
1069 * @returns cr4.
1070 */
1071#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1072DECLASM(RTCCUINTREG) ASMGetCR4(void);
1073#else
1074DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1075{
1076 RTCCUINTREG uCR4;
1077# if RT_INLINE_ASM_USES_INTRIN
1078 uCR4 = __readcr4();
1079
1080# elif RT_INLINE_ASM_GNU_STYLE
1081# ifdef __AMD64__
1082 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1083# else
1084 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1085# endif
1086# else
1087 __asm
1088 {
1089# ifdef __AMD64__
1090 mov rax, cr4
1091 mov [uCR4], rax
1092# else
1093 push eax /* just in case */
1094 /*mov eax, cr4*/
1095 _emit 0x0f
1096 _emit 0x20
1097 _emit 0xe0
1098 mov [uCR4], eax
1099 pop eax
1100# endif
1101 }
1102# endif
1103 return uCR4;
1104}
1105#endif
1106
1107
1108/**
1109 * Sets the CR4 register.
1110 *
1111 * @param uCR4 New CR4 value.
1112 */
1113#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1114DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1115#else
1116DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1117{
1118# if RT_INLINE_ASM_USES_INTRIN
1119 __writecr4(uCR4);
1120
1121# elif RT_INLINE_ASM_GNU_STYLE
1122# ifdef __AMD64__
1123 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1124# else
1125 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1126# endif
1127# else
1128 __asm
1129 {
1130# ifdef __AMD64__
1131 mov rax, [uCR4]
1132 mov cr4, rax
1133# else
1134 mov eax, [uCR4]
1135 _emit 0x0F
1136 _emit 0x22
1137 _emit 0xE0 /* mov cr4, eax */
1138# endif
1139 }
1140# endif
1141}
1142#endif
1143
1144
1145/**
1146 * Get cr8.
1147 * @returns cr8.
1148 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1149 */
1150#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1151DECLASM(RTCCUINTREG) ASMGetCR8(void);
1152#else
1153DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1154{
1155# ifdef __AMD64__
1156 RTCCUINTREG uCR8;
1157# if RT_INLINE_ASM_USES_INTRIN
1158 uCR8 = __readcr8();
1159
1160# elif RT_INLINE_ASM_GNU_STYLE
1161 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1162# else
1163 __asm
1164 {
1165 mov rax, cr8
1166 mov [uCR8], rax
1167 }
1168# endif
1169 return uCR8;
1170# else /* !__AMD64__ */
1171 return 0;
1172# endif /* !__AMD64__ */
1173}
1174#endif
1175
1176
1177/**
1178 * Enables interrupts (EFLAGS.IF).
1179 */
1180#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1181DECLASM(void) ASMIntEnable(void);
1182#else
1183DECLINLINE(void) ASMIntEnable(void)
1184{
1185# if RT_INLINE_ASM_GNU_STYLE
1186 __asm("sti\n");
1187# elif RT_INLINE_ASM_USES_INTRIN
1188 _enable();
1189# else
1190 __asm sti
1191# endif
1192}
1193#endif
1194
1195
1196/**
1197 * Disables interrupts (!EFLAGS.IF).
1198 */
1199#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1200DECLASM(void) ASMIntDisable(void);
1201#else
1202DECLINLINE(void) ASMIntDisable(void)
1203{
1204# if RT_INLINE_ASM_GNU_STYLE
1205 __asm("cli\n");
1206# elif RT_INLINE_ASM_USES_INTRIN
1207 _disable();
1208# else
1209 __asm cli
1210# endif
1211}
1212#endif
1213
1214
1215/**
1216 * Disables interrupts and returns previous xFLAGS.
1217 */
1218#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1219DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1220#else
1221DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1222{
1223 RTCCUINTREG xFlags;
1224# if RT_INLINE_ASM_GNU_STYLE
1225# ifdef __AMD64__
1226 __asm__ __volatile__("pushfq\n\t"
1227 "cli\n\t"
1228 "popq %0\n\t"
1229 : "=m" (xFlags));
1230# else
1231 __asm__ __volatile__("pushfl\n\t"
1232 "cli\n\t"
1233 "popl %0\n\t"
1234 : "=m" (xFlags));
1235# endif
1236# elif RT_INLINE_ASM_USES_INTRIN && !defined(__X86__)
1237 xFlags = ASMGetFlags();
1238 _disable();
1239# else
1240 __asm {
1241 pushfd
1242 cli
1243 pop [xFlags]
1244 }
1245# endif
1246 return xFlags;
1247}
1248#endif
1249
1250
1251/**
1252 * Reads a machine specific register.
1253 *
1254 * @returns Register content.
1255 * @param uRegister Register to read.
1256 */
1257#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1258DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1259#else
1260DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1261{
1262 RTUINT64U u;
1263# if RT_INLINE_ASM_GNU_STYLE
1264 __asm__ ("rdmsr\n\t"
1265 : "=a" (u.s.Lo),
1266 "=d" (u.s.Hi)
1267 : "c" (uRegister));
1268
1269# elif RT_INLINE_ASM_USES_INTRIN
1270 u.u = __readmsr(uRegister);
1271
1272# else
1273 __asm
1274 {
1275 mov ecx, [uRegister]
1276 rdmsr
1277 mov [u.s.Lo], eax
1278 mov [u.s.Hi], edx
1279 }
1280# endif
1281
1282 return u.u;
1283}
1284#endif
1285
1286
1287/**
1288 * Writes a machine specific register.
1289 *
1290 * @returns Register content.
1291 * @param uRegister Register to write to.
1292 * @param u64Val Value to write.
1293 */
1294#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1295DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1296#else
1297DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1298{
1299 RTUINT64U u;
1300
1301 u.u = u64Val;
1302# if RT_INLINE_ASM_GNU_STYLE
1303 __asm__ __volatile__("wrmsr\n\t"
1304 ::"a" (u.s.Lo),
1305 "d" (u.s.Hi),
1306 "c" (uRegister));
1307
1308# elif RT_INLINE_ASM_USES_INTRIN
1309 __writemsr(uRegister, u.u);
1310
1311# else
1312 __asm
1313 {
1314 mov ecx, [uRegister]
1315 mov edx, [u.s.Hi]
1316 mov eax, [u.s.Lo]
1317 wrmsr
1318 }
1319# endif
1320}
1321#endif
1322
1323
1324/**
1325 * Reads low part of a machine specific register.
1326 *
1327 * @returns Register content.
1328 * @param uRegister Register to read.
1329 */
1330#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1331DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1332#else
1333DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1334{
1335 uint32_t u32;
1336# if RT_INLINE_ASM_GNU_STYLE
1337 __asm__ ("rdmsr\n\t"
1338 : "=a" (u32)
1339 : "c" (uRegister)
1340 : "edx");
1341
1342# elif RT_INLINE_ASM_USES_INTRIN
1343 u32 = (uint32_t)__readmsr(uRegister);
1344
1345#else
1346 __asm
1347 {
1348 mov ecx, [uRegister]
1349 rdmsr
1350 mov [u32], eax
1351 }
1352# endif
1353
1354 return u32;
1355}
1356#endif
1357
1358
1359/**
1360 * Reads high part of a machine specific register.
1361 *
1362 * @returns Register content.
1363 * @param uRegister Register to read.
1364 */
1365#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1366DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1367#else
1368DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1369{
1370 uint32_t u32;
1371# if RT_INLINE_ASM_GNU_STYLE
1372 __asm__ ("rdmsr\n\t"
1373 : "=d" (u32)
1374 : "c" (uRegister)
1375 : "eax");
1376
1377# elif RT_INLINE_ASM_USES_INTRIN
1378 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1379
1380# else
1381 __asm
1382 {
1383 mov ecx, [uRegister]
1384 rdmsr
1385 mov [u32], edx
1386 }
1387# endif
1388
1389 return u32;
1390}
1391#endif
1392
1393
1394/**
1395 * Gets dr7.
1396 *
1397 * @returns dr7.
1398 */
1399#if RT_INLINE_ASM_EXTERNAL
1400DECLASM(RTCCUINTREG) ASMGetDR7(void);
1401#else
1402DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1403{
1404 RTCCUINTREG uDR7;
1405# if RT_INLINE_ASM_GNU_STYLE
1406# ifdef __AMD64__
1407 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1408# else
1409 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1410# endif
1411# else
1412 __asm
1413 {
1414# ifdef __AMD64__
1415 mov rax, dr7
1416 mov [uDR7], rax
1417# else
1418 mov eax, dr7
1419 mov [uDR7], eax
1420# endif
1421 }
1422# endif
1423 return uDR7;
1424}
1425#endif
1426
1427
1428/**
1429 * Gets dr6.
1430 *
1431 * @returns dr6.
1432 */
1433#if RT_INLINE_ASM_EXTERNAL
1434DECLASM(RTCCUINTREG) ASMGetDR6(void);
1435#else
1436DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1437{
1438 RTCCUINTREG uDR6;
1439# if RT_INLINE_ASM_GNU_STYLE
1440# ifdef __AMD64__
1441 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1442# else
1443 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1444# endif
1445# else
1446 __asm
1447 {
1448# ifdef __AMD64__
1449 mov rax, dr6
1450 mov [uDR6], rax
1451# else
1452 mov eax, dr6
1453 mov [uDR6], eax
1454# endif
1455 }
1456# endif
1457 return uDR6;
1458}
1459#endif
1460
1461
1462/**
1463 * Reads and clears DR6.
1464 *
1465 * @returns DR6.
1466 */
1467#if RT_INLINE_ASM_EXTERNAL
1468DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1469#else
1470DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1471{
1472 RTCCUINTREG uDR6;
1473# if RT_INLINE_ASM_GNU_STYLE
1474 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1475# ifdef __AMD64__
1476 __asm__ ("movq %%dr6, %0\n\t"
1477 "movq %1, %%dr6\n\t"
1478 : "=r" (uDR6)
1479 : "r" (uNewValue));
1480# else
1481 __asm__ ("movl %%dr6, %0\n\t"
1482 "movl %1, %%dr6\n\t"
1483 : "=r" (uDR6)
1484 : "r" (uNewValue));
1485# endif
1486# else
1487 __asm
1488 {
1489# ifdef __AMD64__
1490 mov rax, dr6
1491 mov [uDR6], rax
1492 mov rcx, rax
1493 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1494 mov dr6, rcx
1495# else
1496 mov eax, dr6
1497 mov [uDR6], eax
1498 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1499 mov dr6, ecx
1500# endif
1501 }
1502# endif
1503 return uDR6;
1504}
1505#endif
1506
1507
1508/**
1509 * Compiler memory barrier.
1510 *
1511 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1512 * values or any outstanding writes when returning from this function.
1513 *
1514 * This function must be used if non-volatile data is modified by a
1515 * device or the VMM. Typical cases are port access, MMIO access,
1516 * trapping instruction, etc.
1517 */
1518#if RT_INLINE_ASM_GNU_STYLE
1519# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1520#elif RT_INLINE_ASM_USES_INTRIN
1521# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1522#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1523DECLINLINE(void) ASMCompilerBarrier(void)
1524{
1525 __asm
1526 {
1527 }
1528}
1529#endif
1530
1531
1532/**
1533 * Writes a 8-bit unsigned integer to an I/O port.
1534 *
1535 * @param Port I/O port to read from.
1536 * @param u8 8-bit integer to write.
1537 */
1538#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1539DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1540#else
1541DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1542{
1543# if RT_INLINE_ASM_GNU_STYLE
1544 __asm__ __volatile__("outb %b1, %w0\n\t"
1545 :: "Nd" (Port),
1546 "a" (u8));
1547
1548# elif RT_INLINE_ASM_USES_INTRIN
1549 __outbyte(Port, u8);
1550
1551# else
1552 __asm
1553 {
1554 mov dx, [Port]
1555 mov al, [u8]
1556 out dx, al
1557 }
1558# endif
1559}
1560#endif
1561
1562
1563/**
1564 * Gets a 8-bit unsigned integer from an I/O port.
1565 *
1566 * @returns 8-bit integer.
1567 * @param Port I/O port to read from.
1568 */
1569#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1570DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1571#else
1572DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1573{
1574 uint8_t u8;
1575# if RT_INLINE_ASM_GNU_STYLE
1576 __asm__ __volatile__("inb %w1, %b0\n\t"
1577 : "=a" (u8)
1578 : "Nd" (Port));
1579
1580# elif RT_INLINE_ASM_USES_INTRIN
1581 u8 = __inbyte(Port);
1582
1583# else
1584 __asm
1585 {
1586 mov dx, [Port]
1587 in al, dx
1588 mov [u8], al
1589 }
1590# endif
1591 return u8;
1592}
1593#endif
1594
1595
1596/**
1597 * Writes a 16-bit unsigned integer to an I/O port.
1598 *
1599 * @param Port I/O port to read from.
1600 * @param u16 16-bit integer to write.
1601 */
1602#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1603DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1604#else
1605DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1606{
1607# if RT_INLINE_ASM_GNU_STYLE
1608 __asm__ __volatile__("outw %w1, %w0\n\t"
1609 :: "Nd" (Port),
1610 "a" (u16));
1611
1612# elif RT_INLINE_ASM_USES_INTRIN
1613 __outword(Port, u16);
1614
1615# else
1616 __asm
1617 {
1618 mov dx, [Port]
1619 mov ax, [u16]
1620 out dx, ax
1621 }
1622# endif
1623}
1624#endif
1625
1626
1627/**
1628 * Gets a 16-bit unsigned integer from an I/O port.
1629 *
1630 * @returns 16-bit integer.
1631 * @param Port I/O port to read from.
1632 */
1633#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1634DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1635#else
1636DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1637{
1638 uint16_t u16;
1639# if RT_INLINE_ASM_GNU_STYLE
1640 __asm__ __volatile__("inw %w1, %w0\n\t"
1641 : "=a" (u16)
1642 : "Nd" (Port));
1643
1644# elif RT_INLINE_ASM_USES_INTRIN
1645 u16 = __inword(Port);
1646
1647# else
1648 __asm
1649 {
1650 mov dx, [Port]
1651 in ax, dx
1652 mov [u16], ax
1653 }
1654# endif
1655 return u16;
1656}
1657#endif
1658
1659
1660/**
1661 * Writes a 32-bit unsigned integer to an I/O port.
1662 *
1663 * @param Port I/O port to read from.
1664 * @param u32 32-bit integer to write.
1665 */
1666#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1667DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1668#else
1669DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1670{
1671# if RT_INLINE_ASM_GNU_STYLE
1672 __asm__ __volatile__("outl %1, %w0\n\t"
1673 :: "Nd" (Port),
1674 "a" (u32));
1675
1676# elif RT_INLINE_ASM_USES_INTRIN
1677 __outdword(Port, u32);
1678
1679# else
1680 __asm
1681 {
1682 mov dx, [Port]
1683 mov eax, [u32]
1684 out dx, eax
1685 }
1686# endif
1687}
1688#endif
1689
1690
1691/**
1692 * Gets a 32-bit unsigned integer from an I/O port.
1693 *
1694 * @returns 32-bit integer.
1695 * @param Port I/O port to read from.
1696 */
1697#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1698DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1699#else
1700DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1701{
1702 uint32_t u32;
1703# if RT_INLINE_ASM_GNU_STYLE
1704 __asm__ __volatile__("inl %w1, %0\n\t"
1705 : "=a" (u32)
1706 : "Nd" (Port));
1707
1708# elif RT_INLINE_ASM_USES_INTRIN
1709 u32 = __indword(Port);
1710
1711# else
1712 __asm
1713 {
1714 mov dx, [Port]
1715 in eax, dx
1716 mov [u32], eax
1717 }
1718# endif
1719 return u32;
1720}
1721#endif
1722
1723
1724/**
1725 * Atomically Exchange an unsigned 8-bit value.
1726 *
1727 * @returns Current *pu8 value
1728 * @param pu8 Pointer to the 8-bit variable to update.
1729 * @param u8 The 8-bit value to assign to *pu8.
1730 */
1731#if RT_INLINE_ASM_EXTERNAL
1732DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1733#else
1734DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1735{
1736# if RT_INLINE_ASM_GNU_STYLE
1737 __asm__ __volatile__("xchgb %0, %1\n\t"
1738 : "=m" (*pu8),
1739 "=r" (u8)
1740 : "1" (u8));
1741# else
1742 __asm
1743 {
1744# ifdef __AMD64__
1745 mov rdx, [pu8]
1746 mov al, [u8]
1747 xchg [rdx], al
1748 mov [u8], al
1749# else
1750 mov edx, [pu8]
1751 mov al, [u8]
1752 xchg [edx], al
1753 mov [u8], al
1754# endif
1755 }
1756# endif
1757 return u8;
1758}
1759#endif
1760
1761
1762/**
1763 * Atomically Exchange a signed 8-bit value.
1764 *
1765 * @returns Current *pu8 value
1766 * @param pi8 Pointer to the 8-bit variable to update.
1767 * @param i8 The 8-bit value to assign to *pi8.
1768 */
1769DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1770{
1771 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1772}
1773
1774
1775/**
1776 * Atomically Exchange a bool value.
1777 *
1778 * @returns Current *pf value
1779 * @param pf Pointer to the 8-bit variable to update.
1780 * @param f The 8-bit value to assign to *pi8.
1781 */
1782DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1783{
1784#ifdef _MSC_VER
1785 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1786#else
1787 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1788#endif
1789}
1790
1791
1792/**
1793 * Atomically Exchange an unsigned 16-bit value.
1794 *
1795 * @returns Current *pu16 value
1796 * @param pu16 Pointer to the 16-bit variable to update.
1797 * @param u16 The 16-bit value to assign to *pu16.
1798 */
1799#if RT_INLINE_ASM_EXTERNAL
1800DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1801#else
1802DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1803{
1804# if RT_INLINE_ASM_GNU_STYLE
1805 __asm__ __volatile__("xchgw %0, %1\n\t"
1806 : "=m" (*pu16),
1807 "=r" (u16)
1808 : "1" (u16));
1809# else
1810 __asm
1811 {
1812# ifdef __AMD64__
1813 mov rdx, [pu16]
1814 mov ax, [u16]
1815 xchg [rdx], ax
1816 mov [u16], ax
1817# else
1818 mov edx, [pu16]
1819 mov ax, [u16]
1820 xchg [edx], ax
1821 mov [u16], ax
1822# endif
1823 }
1824# endif
1825 return u16;
1826}
1827#endif
1828
1829
1830/**
1831 * Atomically Exchange a signed 16-bit value.
1832 *
1833 * @returns Current *pu16 value
1834 * @param pi16 Pointer to the 16-bit variable to update.
1835 * @param i16 The 16-bit value to assign to *pi16.
1836 */
1837DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1838{
1839 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1840}
1841
1842
1843/**
1844 * Atomically Exchange an unsigned 32-bit value.
1845 *
1846 * @returns Current *pu32 value
1847 * @param pu32 Pointer to the 32-bit variable to update.
1848 * @param u32 The 32-bit value to assign to *pu32.
1849 */
1850#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1851DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1852#else
1853DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1854{
1855# if RT_INLINE_ASM_GNU_STYLE
1856 __asm__ __volatile__("xchgl %0, %1\n\t"
1857 : "=m" (*pu32),
1858 "=r" (u32)
1859 : "1" (u32));
1860
1861# elif RT_INLINE_ASM_USES_INTRIN
1862 u32 = _InterlockedExchange((long *)pu32, u32);
1863
1864# else
1865 __asm
1866 {
1867# ifdef __AMD64__
1868 mov rdx, [pu32]
1869 mov eax, u32
1870 xchg [rdx], eax
1871 mov [u32], eax
1872# else
1873 mov edx, [pu32]
1874 mov eax, u32
1875 xchg [edx], eax
1876 mov [u32], eax
1877# endif
1878 }
1879# endif
1880 return u32;
1881}
1882#endif
1883
1884
1885/**
1886 * Atomically Exchange a signed 32-bit value.
1887 *
1888 * @returns Current *pu32 value
1889 * @param pi32 Pointer to the 32-bit variable to update.
1890 * @param i32 The 32-bit value to assign to *pi32.
1891 */
1892DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1893{
1894 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1895}
1896
1897
1898/**
1899 * Atomically Exchange an unsigned 64-bit value.
1900 *
1901 * @returns Current *pu64 value
1902 * @param pu64 Pointer to the 64-bit variable to update.
1903 * @param u64 The 64-bit value to assign to *pu64.
1904 */
1905#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1906DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1907#else
1908DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1909{
1910# if defined(__AMD64__)
1911# if RT_INLINE_ASM_USES_INTRIN
1912 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1913
1914# elif RT_INLINE_ASM_GNU_STYLE
1915 __asm__ __volatile__("xchgq %0, %1\n\t"
1916 : "=m" (*pu64),
1917 "=r" (u64)
1918 : "1" (u64));
1919# else
1920 __asm
1921 {
1922 mov rdx, [pu64]
1923 mov rax, [u64]
1924 xchg [rdx], rax
1925 mov [u64], rax
1926 }
1927# endif
1928# else /* !__AMD64__ */
1929# if RT_INLINE_ASM_GNU_STYLE
1930# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
1931 uint32_t u32 = (uint32_t)u64;
1932 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1933 "xchgl %%ebx, %3\n\t"
1934 "1:\n\t"
1935 "lock; cmpxchg8b (%5)\n\t"
1936 "jnz 1b\n\t"
1937 "xchgl %%ebx, %3\n\t"
1938 /*"xchgl %%esi, %5\n\t"*/
1939 : "=A" (u64),
1940 "=m" (*pu64)
1941 : "0" (*pu64),
1942 "m" ( u32 ),
1943 "c" ( (uint32_t)(u64 >> 32) ),
1944 "S" (pu64) );
1945# else /* !PIC */
1946 __asm__ __volatile__("1:\n\t"
1947 "lock; cmpxchg8b %1\n\t"
1948 "jnz 1b\n\t"
1949 : "=A" (u64),
1950 "=m" (*pu64)
1951 : "0" (*pu64),
1952 "b" ( (uint32_t)u64 ),
1953 "c" ( (uint32_t)(u64 >> 32) ));
1954# endif
1955# else
1956 __asm
1957 {
1958 mov ebx, dword ptr [u64]
1959 mov ecx, dword ptr [u64 + 4]
1960 mov edi, pu64
1961 mov eax, dword ptr [edi]
1962 mov edx, dword ptr [edi + 4]
1963 retry:
1964 lock cmpxchg8b [edi]
1965 jnz retry
1966 mov dword ptr [u64], eax
1967 mov dword ptr [u64 + 4], edx
1968 }
1969# endif
1970# endif /* !__AMD64__ */
1971 return u64;
1972}
1973#endif
1974
1975
1976/**
1977 * Atomically Exchange an signed 64-bit value.
1978 *
1979 * @returns Current *pi64 value
1980 * @param pi64 Pointer to the 64-bit variable to update.
1981 * @param i64 The 64-bit value to assign to *pi64.
1982 */
1983DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1984{
1985 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1986}
1987
1988
1989#ifdef __AMD64__
1990/**
1991 * Atomically Exchange an unsigned 128-bit value.
1992 *
1993 * @returns Current *pu128.
1994 * @param pu128 Pointer to the 128-bit variable to update.
1995 * @param u128 The 128-bit value to assign to *pu128.
1996 *
1997 * @remark We cannot really assume that any hardware supports this. Nor do I have
1998 * GAS support for it. So, for the time being we'll BREAK the atomic
1999 * bit of this function and use two 64-bit exchanges instead.
2000 */
2001# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2002DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2003# else
2004DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2005{
2006 if (true)/*ASMCpuId_ECX(1) & BIT(13))*/
2007 {
2008 /** @todo this is clumsy code */
2009 RTUINT128U u128Ret;
2010 u128Ret.u = u128;
2011 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2012 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2013 return u128Ret.u;
2014 }
2015#if 0 /* later? */
2016 else
2017 {
2018# if RT_INLINE_ASM_GNU_STYLE
2019 __asm__ __volatile__("1:\n\t"
2020 "lock; cmpxchg8b %1\n\t"
2021 "jnz 1b\n\t"
2022 : "=A" (u128),
2023 "=m" (*pu128)
2024 : "0" (*pu128),
2025 "b" ( (uint64_t)u128 ),
2026 "c" ( (uint64_t)(u128 >> 64) ));
2027# else
2028 __asm
2029 {
2030 mov rbx, dword ptr [u128]
2031 mov rcx, dword ptr [u128 + 4]
2032 mov rdi, pu128
2033 mov rax, dword ptr [rdi]
2034 mov rdx, dword ptr [rdi + 4]
2035 retry:
2036 lock cmpxchg16b [rdi]
2037 jnz retry
2038 mov dword ptr [u128], rax
2039 mov dword ptr [u128 + 4], rdx
2040 }
2041# endif
2042 }
2043 return u128;
2044#endif
2045}
2046# endif
2047#endif /* __AMD64__ */
2048
2049
2050/**
2051 * Atomically Reads a unsigned 64-bit value.
2052 *
2053 * @returns Current *pu64 value
2054 * @param pu64 Pointer to the 64-bit variable to read.
2055 * The memory pointed to must be writable.
2056 * @remark This will fault if the memory is read-only!
2057 */
2058#if RT_INLINE_ASM_EXTERNAL
2059DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2060#else
2061DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2062{
2063 uint64_t u64;
2064# ifdef __AMD64__
2065# if RT_INLINE_ASM_GNU_STYLE
2066 __asm__ __volatile__("movq %1, %0\n\t"
2067 : "=r" (u64)
2068 : "m" (*pu64));
2069# else
2070 __asm
2071 {
2072 mov rdx, [pu64]
2073 mov rax, [rdx]
2074 mov [u64], rax
2075 }
2076# endif
2077# else /* !__AMD64__ */
2078# if RT_INLINE_ASM_GNU_STYLE
2079# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2080 uint32_t u32EBX = 0;
2081 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2082 "lock; cmpxchg8b (%5)\n\t"
2083 "xchgl %%ebx, %3\n\t"
2084 : "=A" (u64),
2085 "=m" (*pu64)
2086 : "0" (0),
2087 "m" (u32EBX),
2088 "c" (0),
2089 "S" (pu64));
2090# else /* !PIC */
2091 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2092 : "=A" (u64),
2093 "=m" (*pu64)
2094 : "0" (0),
2095 "b" (0),
2096 "c" (0));
2097# endif
2098# else
2099 __asm
2100 {
2101 xor eax, eax
2102 xor edx, edx
2103 mov edi, pu64
2104 xor ecx, ecx
2105 xor ebx, ebx
2106 lock cmpxchg8b [edi]
2107 mov dword ptr [u64], eax
2108 mov dword ptr [u64 + 4], edx
2109 }
2110# endif
2111# endif /* !__AMD64__ */
2112 return u64;
2113}
2114#endif
2115
2116
2117/**
2118 * Atomically Reads a signed 64-bit value.
2119 *
2120 * @returns Current *pi64 value
2121 * @param pi64 Pointer to the 64-bit variable to read.
2122 * The memory pointed to must be writable.
2123 * @remark This will fault if the memory is read-only!
2124 */
2125DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2126{
2127 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2128}
2129
2130
2131/**
2132 * Atomically Exchange a value which size might differ
2133 * between platforms or compilers.
2134 *
2135 * @param pu Pointer to the variable to update.
2136 * @param uNew The value to assign to *pu.
2137 */
2138#define ASMAtomicXchgSize(pu, uNew) \
2139 do { \
2140 switch (sizeof(*(pu))) { \
2141 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2142 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2143 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2144 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2145 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2146 } \
2147 } while (0)
2148
2149
2150/**
2151 * Atomically Exchange a pointer value.
2152 *
2153 * @returns Current *ppv value
2154 * @param ppv Pointer to the pointer variable to update.
2155 * @param pv The pointer value to assign to *ppv.
2156 */
2157DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2158{
2159#if ARCH_BITS == 32
2160 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2161#elif ARCH_BITS == 64
2162 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2163#else
2164# error "ARCH_BITS is bogus"
2165#endif
2166}
2167
2168
2169/**
2170 * Atomically Compare and Exchange an unsigned 32-bit value.
2171 *
2172 * @returns true if xchg was done.
2173 * @returns false if xchg wasn't done.
2174 *
2175 * @param pu32 Pointer to the value to update.
2176 * @param u32New The new value to assigned to *pu32.
2177 * @param u32Old The old value to *pu32 compare with.
2178 */
2179#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2180DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2181#else
2182DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2183{
2184# if RT_INLINE_ASM_GNU_STYLE
2185 uint32_t u32Ret;
2186 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2187 "setz %%al\n\t"
2188 "movzx %%al, %%eax\n\t"
2189 : "=m" (*pu32),
2190 "=a" (u32Ret)
2191 : "r" (u32New),
2192 "1" (u32Old));
2193 return (bool)u32Ret;
2194
2195# elif RT_INLINE_ASM_USES_INTRIN
2196 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2197
2198# else
2199 uint32_t u32Ret;
2200 __asm
2201 {
2202# ifdef __AMD64__
2203 mov rdx, [pu32]
2204# else
2205 mov edx, [pu32]
2206# endif
2207 mov eax, [u32Old]
2208 mov ecx, [u32New]
2209# ifdef __AMD64__
2210 lock cmpxchg [rdx], ecx
2211# else
2212 lock cmpxchg [edx], ecx
2213# endif
2214 setz al
2215 movzx eax, al
2216 mov [u32Ret], eax
2217 }
2218 return !!u32Ret;
2219# endif
2220}
2221#endif
2222
2223
2224/**
2225 * Atomically Compare and Exchange a signed 32-bit value.
2226 *
2227 * @returns true if xchg was done.
2228 * @returns false if xchg wasn't done.
2229 *
2230 * @param pi32 Pointer to the value to update.
2231 * @param i32New The new value to assigned to *pi32.
2232 * @param i32Old The old value to *pi32 compare with.
2233 */
2234DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2235{
2236 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2237}
2238
2239
2240/**
2241 * Atomically Compare and exchange an unsigned 64-bit value.
2242 *
2243 * @returns true if xchg was done.
2244 * @returns false if xchg wasn't done.
2245 *
2246 * @param pu64 Pointer to the 64-bit variable to update.
2247 * @param u64New The 64-bit value to assign to *pu64.
2248 * @param u64Old The value to compare with.
2249 */
2250#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2251DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2252#else
2253DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2254{
2255# if RT_INLINE_ASM_USES_INTRIN
2256 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2257
2258# elif defined(__AMD64__)
2259# if RT_INLINE_ASM_GNU_STYLE
2260 uint64_t u64Ret;
2261 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2262 "setz %%al\n\t"
2263 "movzx %%al, %%eax\n\t"
2264 : "=m" (*pu64),
2265 "=a" (u64Ret)
2266 : "r" (u64New),
2267 "1" (u64Old));
2268 return (bool)u64Ret;
2269# else
2270 bool fRet;
2271 __asm
2272 {
2273 mov rdx, [pu32]
2274 mov rax, [u64Old]
2275 mov rcx, [u64New]
2276 lock cmpxchg [rdx], rcx
2277 setz al
2278 mov [fRet], al
2279 }
2280 return fRet;
2281# endif
2282# else /* !__AMD64__ */
2283 uint32_t u32Ret;
2284# if RT_INLINE_ASM_GNU_STYLE
2285# if defined(PIC) || defined(__DARWIN__) /* darwin: 4.0.1 compiler option / bug? */
2286 uint32_t u32 = (uint32_t)u64New;
2287 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2288 "lock; cmpxchg8b (%5)\n\t"
2289 "setz %%al\n\t"
2290 "xchgl %%ebx, %3\n\t"
2291 "movzx %%al, %%eax\n\t"
2292 : "=a" (u32Ret),
2293 "=m" (*pu64)
2294 : "A" (u64Old),
2295 "m" ( u32 ),
2296 "c" ( (uint32_t)(u64New >> 32) ),
2297 "S" (pu64) );
2298# else /* !PIC */
2299 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2300 "setz %%al\n\t"
2301 "movzx %%al, %%eax\n\t"
2302 : "=a" (u32Ret),
2303 "=m" (*pu64)
2304 : "A" (u64Old),
2305 "b" ( (uint32_t)u64New ),
2306 "c" ( (uint32_t)(u64New >> 32) ));
2307# endif
2308 return (bool)u32Ret;
2309# else
2310 __asm
2311 {
2312 mov ebx, dword ptr [u64New]
2313 mov ecx, dword ptr [u64New + 4]
2314 mov edi, [pu64]
2315 mov eax, dword ptr [u64Old]
2316 mov edx, dword ptr [u64Old + 4]
2317 lock cmpxchg8b [edi]
2318 setz al
2319 movzx eax, al
2320 mov dword ptr [u32Ret], eax
2321 }
2322 return !!u32Ret;
2323# endif
2324# endif /* !__AMD64__ */
2325}
2326#endif
2327
2328
2329/**
2330 * Atomically Compare and exchange a signed 64-bit value.
2331 *
2332 * @returns true if xchg was done.
2333 * @returns false if xchg wasn't done.
2334 *
2335 * @param pi64 Pointer to the 64-bit variable to update.
2336 * @param i64 The 64-bit value to assign to *pu64.
2337 * @param i64Old The value to compare with.
2338 */
2339DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2340{
2341 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2342}
2343
2344
2345
2346/** @def ASMAtomicCmpXchgSize
2347 * Atomically Compare and Exchange a value which size might differ
2348 * between platforms or compilers.
2349 *
2350 * @param pu Pointer to the value to update.
2351 * @param uNew The new value to assigned to *pu.
2352 * @param uOld The old value to *pu compare with.
2353 * @param fRc Where to store the result.
2354 */
2355#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2356 do { \
2357 switch (sizeof(*(pu))) { \
2358 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2359 break; \
2360 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2361 break; \
2362 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2363 (fRc) = false; \
2364 break; \
2365 } \
2366 } while (0)
2367
2368
2369/**
2370 * Atomically Compare and Exchange a pointer value.
2371 *
2372 * @returns true if xchg was done.
2373 * @returns false if xchg wasn't done.
2374 *
2375 * @param ppv Pointer to the value to update.
2376 * @param pvNew The new value to assigned to *ppv.
2377 * @param pvOld The old value to *ppv compare with.
2378 */
2379DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2380{
2381#if ARCH_BITS == 32
2382 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2383#elif ARCH_BITS == 64
2384 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2385#else
2386# error "ARCH_BITS is bogus"
2387#endif
2388}
2389
2390
2391/**
2392 * Atomically increment a 32-bit value.
2393 *
2394 * @returns The new value.
2395 * @param pu32 Pointer to the value to increment.
2396 */
2397#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2398DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2399#else
2400DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2401{
2402 uint32_t u32;
2403# if RT_INLINE_ASM_USES_INTRIN
2404 u32 = _InterlockedIncrement((long *)pu32);
2405
2406# elif RT_INLINE_ASM_GNU_STYLE
2407 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2408 "incl %0\n\t"
2409 : "=r" (u32),
2410 "=m" (*pu32)
2411 : "0" (1)
2412 : "memory");
2413# else
2414 __asm
2415 {
2416 mov eax, 1
2417# ifdef __AMD64__
2418 mov rdx, [pu32]
2419 lock xadd [rdx], eax
2420# else
2421 mov edx, [pu32]
2422 lock xadd [edx], eax
2423# endif
2424 inc eax
2425 mov u32, eax
2426 }
2427# endif
2428 return u32;
2429}
2430#endif
2431
2432
2433/**
2434 * Atomically increment a signed 32-bit value.
2435 *
2436 * @returns The new value.
2437 * @param pi32 Pointer to the value to increment.
2438 */
2439DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2440{
2441 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2442}
2443
2444
2445/**
2446 * Atomically decrement an unsigned 32-bit value.
2447 *
2448 * @returns The new value.
2449 * @param pu32 Pointer to the value to decrement.
2450 */
2451#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2452DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2453#else
2454DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2455{
2456 uint32_t u32;
2457# if RT_INLINE_ASM_USES_INTRIN
2458 u32 = _InterlockedDecrement((long *)pu32);
2459
2460# elif RT_INLINE_ASM_GNU_STYLE
2461 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2462 "decl %0\n\t"
2463 : "=r" (u32),
2464 "=m" (*pu32)
2465 : "0" (-1)
2466 : "memory");
2467# else
2468 __asm
2469 {
2470 mov eax, -1
2471# ifdef __AMD64__
2472 mov rdx, [pu32]
2473 lock xadd [rdx], eax
2474# else
2475 mov edx, [pu32]
2476 lock xadd [edx], eax
2477# endif
2478 dec eax
2479 mov u32, eax
2480 }
2481# endif
2482 return u32;
2483}
2484#endif
2485
2486
2487/**
2488 * Atomically decrement a signed 32-bit value.
2489 *
2490 * @returns The new value.
2491 * @param pi32 Pointer to the value to decrement.
2492 */
2493DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2494{
2495 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2496}
2497
2498
2499/**
2500 * Atomically Or an unsigned 32-bit value.
2501 *
2502 * @param pu32 Pointer to the pointer variable to OR u32 with.
2503 * @param u32 The value to OR *pu32 with.
2504 */
2505#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2506DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2507#else
2508DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2509{
2510# if RT_INLINE_ASM_USES_INTRIN
2511 _InterlockedOr((long volatile *)pu32, (long)u32);
2512
2513# elif RT_INLINE_ASM_GNU_STYLE
2514 __asm__ __volatile__("lock; orl %1, %0\n\t"
2515 : "=m" (*pu32)
2516 : "r" (u32));
2517# else
2518 __asm
2519 {
2520 mov eax, [u32]
2521# ifdef __AMD64__
2522 mov rdx, [pu32]
2523 lock or [rdx], eax
2524# else
2525 mov edx, [pu32]
2526 lock or [edx], eax
2527# endif
2528 }
2529# endif
2530}
2531#endif
2532
2533
2534/**
2535 * Atomically Or a signed 32-bit value.
2536 *
2537 * @param pi32 Pointer to the pointer variable to OR u32 with.
2538 * @param i32 The value to OR *pu32 with.
2539 */
2540DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2541{
2542 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2543}
2544
2545
2546/**
2547 * Atomically And an unsigned 32-bit value.
2548 *
2549 * @param pu32 Pointer to the pointer variable to AND u32 with.
2550 * @param u32 The value to AND *pu32 with.
2551 */
2552#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2553DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2554#else
2555DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2556{
2557# if RT_INLINE_ASM_USES_INTRIN
2558 _InterlockedAnd((long volatile *)pu32, u32);
2559
2560# elif RT_INLINE_ASM_GNU_STYLE
2561 __asm__ __volatile__("lock; andl %1, %0\n\t"
2562 : "=m" (*pu32)
2563 : "r" (u32));
2564# else
2565 __asm
2566 {
2567 mov eax, [u32]
2568# ifdef __AMD64__
2569 mov rdx, [pu32]
2570 lock and [rdx], eax
2571# else
2572 mov edx, [pu32]
2573 lock and [edx], eax
2574# endif
2575 }
2576# endif
2577}
2578#endif
2579
2580
2581/**
2582 * Atomically And a signed 32-bit value.
2583 *
2584 * @param pi32 Pointer to the pointer variable to AND i32 with.
2585 * @param i32 The value to AND *pi32 with.
2586 */
2587DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2588{
2589 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2590}
2591
2592
2593/**
2594 * Invalidate page.
2595 *
2596 * @param pv Address of the page to invalidate.
2597 */
2598#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2599DECLASM(void) ASMInvalidatePage(void *pv);
2600#else
2601DECLINLINE(void) ASMInvalidatePage(void *pv)
2602{
2603# if RT_INLINE_ASM_USES_INTRIN
2604 __invlpg(pv);
2605
2606# elif RT_INLINE_ASM_GNU_STYLE
2607 __asm__ __volatile__("invlpg %0\n\t"
2608 : : "m" (*(uint8_t *)pv));
2609# else
2610 __asm
2611 {
2612# ifdef __AMD64__
2613 mov rax, [pv]
2614 invlpg [rax]
2615# else
2616 mov eax, [pv]
2617 invlpg [eax]
2618# endif
2619 }
2620# endif
2621}
2622#endif
2623
2624
2625#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2626# if PAGE_SIZE != 0x1000
2627# error "PAGE_SIZE is not 0x1000!"
2628# endif
2629#endif
2630
2631/**
2632 * Zeros a 4K memory page.
2633 *
2634 * @param pv Pointer to the memory block. This must be page aligned.
2635 */
2636#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2637DECLASM(void) ASMMemZeroPage(volatile void *pv);
2638# else
2639DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2640{
2641# if RT_INLINE_ASM_USES_INTRIN
2642# ifdef __AMD64__
2643 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2644# else
2645 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2646# endif
2647
2648# elif RT_INLINE_ASM_GNU_STYLE
2649 RTUINTREG uDummy;
2650# ifdef __AMD64__
2651 __asm__ __volatile__ ("rep stosq"
2652 : "=D" (pv),
2653 "=c" (uDummy)
2654 : "0" (pv),
2655 "c" (0x1000 >> 3),
2656 "a" (0)
2657 : "memory");
2658# else
2659 __asm__ __volatile__ ("rep stosl"
2660 : "=D" (pv),
2661 "=c" (uDummy)
2662 : "0" (pv),
2663 "c" (0x1000 >> 2),
2664 "a" (0)
2665 : "memory");
2666# endif
2667# else
2668 __asm
2669 {
2670# ifdef __AMD64__
2671 xor rax, rax
2672 mov ecx, 0200h
2673 mov rdi, [pv]
2674 rep stosq
2675# else
2676 xor eax, eax
2677 mov ecx, 0400h
2678 mov edi, [pv]
2679 rep stosd
2680# endif
2681 }
2682# endif
2683}
2684# endif
2685
2686
2687/**
2688 * Zeros a memory block with a 32-bit aligned size.
2689 *
2690 * @param pv Pointer to the memory block.
2691 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2692 */
2693#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2694DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2695#else
2696DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2697{
2698# if RT_INLINE_ASM_USES_INTRIN
2699 __stosd((unsigned long *)pv, 0, cb >> 2);
2700
2701# elif RT_INLINE_ASM_GNU_STYLE
2702 __asm__ __volatile__ ("rep stosl"
2703 : "=D" (pv),
2704 "=c" (cb)
2705 : "0" (pv),
2706 "1" (cb >> 2),
2707 "a" (0)
2708 : "memory");
2709# else
2710 __asm
2711 {
2712 xor eax, eax
2713# ifdef __AMD64__
2714 mov rcx, [cb]
2715 shr rcx, 2
2716 mov rdi, [pv]
2717# else
2718 mov ecx, [cb]
2719 shr ecx, 2
2720 mov edi, [pv]
2721# endif
2722 rep stosd
2723 }
2724# endif
2725}
2726#endif
2727
2728
2729/**
2730 * Fills a memory block with a 32-bit aligned size.
2731 *
2732 * @param pv Pointer to the memory block.
2733 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2734 * @param u32 The value to fill with.
2735 */
2736#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2737DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2738#else
2739DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2740{
2741# if RT_INLINE_ASM_USES_INTRIN
2742 __stosd((unsigned long *)pv, 0, cb >> 2);
2743
2744# elif RT_INLINE_ASM_GNU_STYLE
2745 __asm__ __volatile__ ("rep stosl"
2746 : "=D" (pv),
2747 "=c" (cb)
2748 : "0" (pv),
2749 "1" (cb >> 2),
2750 "a" (u32)
2751 : "memory");
2752# else
2753 __asm
2754 {
2755# ifdef __AMD64__
2756 mov rcx, [cb]
2757 shr rcx, 2
2758 mov rdi, [pv]
2759# else
2760 mov ecx, [cb]
2761 shr ecx, 2
2762 mov edi, [pv]
2763# endif
2764 mov eax, [u32]
2765 rep stosd
2766 }
2767# endif
2768}
2769#endif
2770
2771
2772
2773/**
2774 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2775 *
2776 * @returns u32F1 * u32F2.
2777 */
2778#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2779DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2780#else
2781DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2782{
2783# ifdef __AMD64__
2784 return (uint64_t)u32F1 * u32F2;
2785# else /* !__AMD64__ */
2786 uint64_t u64;
2787# if RT_INLINE_ASM_GNU_STYLE
2788 __asm__ __volatile__("mull %%edx"
2789 : "=A" (u64)
2790 : "a" (u32F2), "d" (u32F1));
2791# else
2792 __asm
2793 {
2794 mov edx, [u32F1]
2795 mov eax, [u32F2]
2796 mul edx
2797 mov dword ptr [u64], eax
2798 mov dword ptr [u64 + 4], edx
2799 }
2800# endif
2801 return u64;
2802# endif /* !__AMD64__ */
2803}
2804#endif
2805
2806
2807/**
2808 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2809 *
2810 * @returns u32F1 * u32F2.
2811 */
2812#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2813DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2814#else
2815DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2816{
2817# ifdef __AMD64__
2818 return (int64_t)i32F1 * i32F2;
2819# else /* !__AMD64__ */
2820 int64_t i64;
2821# if RT_INLINE_ASM_GNU_STYLE
2822 __asm__ __volatile__("imull %%edx"
2823 : "=A" (i64)
2824 : "a" (i32F2), "d" (i32F1));
2825# else
2826 __asm
2827 {
2828 mov edx, [i32F1]
2829 mov eax, [i32F2]
2830 imul edx
2831 mov dword ptr [i64], eax
2832 mov dword ptr [i64 + 4], edx
2833 }
2834# endif
2835 return i64;
2836# endif /* !__AMD64__ */
2837}
2838#endif
2839
2840
2841/**
2842 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2843 *
2844 * @returns u64 / u32.
2845 */
2846#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2847DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2848#else
2849DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2850{
2851# ifdef __AMD64__
2852 return (uint32_t)(u64 / u32);
2853# else /* !__AMD64__ */
2854# if RT_INLINE_ASM_GNU_STYLE
2855 RTUINTREG uDummy;
2856 __asm__ __volatile__("divl %3"
2857 : "=a" (u32), "=d"(uDummy)
2858 : "A" (u64), "r" (u32));
2859# else
2860 __asm
2861 {
2862 mov eax, dword ptr [u64]
2863 mov edx, dword ptr [u64 + 4]
2864 mov ecx, [u32]
2865 div ecx
2866 mov [u32], eax
2867 }
2868# endif
2869 return u32;
2870# endif /* !__AMD64__ */
2871}
2872#endif
2873
2874
2875/**
2876 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2877 *
2878 * @returns u64 / u32.
2879 */
2880#if RT_INLINE_ASM_EXTERNAL && !defined(__AMD64__)
2881DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2882#else
2883DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2884{
2885# ifdef __AMD64__
2886 return (int32_t)(i64 / i32);
2887# else /* !__AMD64__ */
2888# if RT_INLINE_ASM_GNU_STYLE
2889 RTUINTREG iDummy;
2890 __asm__ __volatile__("idivl %3"
2891 : "=a" (i32), "=d"(iDummy)
2892 : "A" (i64), "r" (i32));
2893# else
2894 __asm
2895 {
2896 mov eax, dword ptr [i64]
2897 mov edx, dword ptr [i64 + 4]
2898 mov ecx, [i32]
2899 idiv ecx
2900 mov [i32], eax
2901 }
2902# endif
2903 return i32;
2904# endif /* !__AMD64__ */
2905}
2906#endif
2907
2908
2909/**
2910 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
2911 * using a 96 bit intermediate result.
2912 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
2913 * __udivdi3 and __umoddi3 even if this inline function is not used.
2914 *
2915 * @returns (u64A * u32B) / u32C.
2916 * @param u64A The 64-bit value.
2917 * @param u32B The 32-bit value to multiple by A.
2918 * @param u32C The 32-bit value to divide A*B by.
2919 */
2920#if RT_INLINE_ASM_EXTERNAL
2921DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
2922#else
2923DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
2924{
2925# if RT_INLINE_ASM_GNU_STYLE
2926# ifdef __AMD64__
2927 uint64_t u64Result, u64Spill;
2928 __asm__ __volatile__("mulq %2\n\t"
2929 "divq %3\n\t"
2930 : "=a" (u64Result),
2931 "=d" (u64Spill)
2932 : "r" ((uint64_t)u32B),
2933 "r" ((uint64_t)u32C),
2934 "0" (u64A),
2935 "1" (0));
2936 return u64Result;
2937# else
2938 uint32_t u32Dummy;
2939 uint64_t u64Result;
2940 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
2941 edx = u64Lo.hi = (u64A.lo * u32B).hi */
2942 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
2943 eax = u64A.hi */
2944 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
2945 edx = u32C */
2946 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
2947 edx = u32B */
2948 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
2949 edx = u64Hi.hi = (u64A.hi * u32B).hi */
2950 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
2951 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
2952 "divl %%ecx \n\t" /* eax = u64Hi / u32C
2953 edx = u64Hi % u32C */
2954 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
2955 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
2956 "divl %%ecx \n\t" /* u64Result.lo */
2957 "movl %%edi,%%edx \n\t" /* u64Result.hi */
2958 : "=A"(u64Result),
2959 "=S"(u32Dummy), "=D"(u32Dummy)
2960 : "a"((uint32_t)u64A),
2961 "S"((uint32_t)(u64A >> 32)),
2962 "c"(u32B),
2963 "D"(u32C));
2964 return u64Result;
2965# endif
2966# else
2967 RTUINT64U u;
2968 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
2969 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
2970 u64Hi += (u64Lo >> 32);
2971 u.s.Hi = (uint32_t)(u64Hi / u32C);
2972 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
2973 return u.u;
2974# endif
2975}
2976#endif
2977
2978
2979/**
2980 * Probes a byte pointer for read access.
2981 *
2982 * While the function will not fault if the byte is not read accessible,
2983 * the idea is to do this in a safe place like before acquiring locks
2984 * and such like.
2985 *
2986 * Also, this functions guarantees that an eager compiler is not going
2987 * to optimize the probing away.
2988 *
2989 * @param pvByte Pointer to the byte.
2990 */
2991#if RT_INLINE_ASM_EXTERNAL
2992DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2993#else
2994DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2995{
2996 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
2997 uint8_t u8;
2998# if RT_INLINE_ASM_GNU_STYLE
2999 __asm__ __volatile__("movb (%1), %0\n\t"
3000 : "=r" (u8)
3001 : "r" (pvByte));
3002# else
3003 __asm
3004 {
3005# ifdef __AMD64__
3006 mov rax, [pvByte]
3007 mov al, [rax]
3008# else
3009 mov eax, [pvByte]
3010 mov al, [eax]
3011# endif
3012 mov [u8], al
3013 }
3014# endif
3015 return u8;
3016}
3017#endif
3018
3019/**
3020 * Probes a buffer for read access page by page.
3021 *
3022 * While the function will fault if the buffer is not fully read
3023 * accessible, the idea is to do this in a safe place like before
3024 * acquiring locks and such like.
3025 *
3026 * Also, this functions guarantees that an eager compiler is not going
3027 * to optimize the probing away.
3028 *
3029 * @param pvBuf Pointer to the buffer.
3030 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3031 */
3032DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3033{
3034 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3035 /* the first byte */
3036 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3037 ASMProbeReadByte(pu8);
3038
3039 /* the pages in between pages. */
3040 while (cbBuf > /*PAGE_SIZE*/0x1000)
3041 {
3042 ASMProbeReadByte(pu8);
3043 cbBuf -= /*PAGE_SIZE*/0x1000;
3044 pu8 += /*PAGE_SIZE*/0x1000;
3045 }
3046
3047 /* the last byte */
3048 ASMProbeReadByte(pu8 + cbBuf - 1);
3049}
3050
3051
3052/** @def ASMBreakpoint
3053 * Debugger Breakpoint.
3054 * @remark In the gnu world we add a nop instruction after the int3 to
3055 * force gdb to remain at the int3 source line.
3056 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3057 * @internal
3058 */
3059#if RT_INLINE_ASM_GNU_STYLE
3060# ifndef __L4ENV__
3061# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3062# else
3063# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3064# endif
3065#else
3066# define ASMBreakpoint() __debugbreak()
3067#endif
3068
3069
3070
3071/** @defgroup grp_inline_bits Bit Operations
3072 * @{
3073 */
3074
3075
3076/**
3077 * Sets a bit in a bitmap.
3078 *
3079 * @param pvBitmap Pointer to the bitmap.
3080 * @param iBit The bit to set.
3081 */
3082#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3083DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3084#else
3085DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3086{
3087# if RT_INLINE_ASM_USES_INTRIN
3088 _bittestandset((long *)pvBitmap, iBit);
3089
3090# elif RT_INLINE_ASM_GNU_STYLE
3091 __asm__ __volatile__ ("btsl %1, %0"
3092 : "=m" (*(volatile long *)pvBitmap)
3093 : "Ir" (iBit)
3094 : "memory");
3095# else
3096 __asm
3097 {
3098# ifdef __AMD64__
3099 mov rax, [pvBitmap]
3100 mov edx, [iBit]
3101 bts [rax], edx
3102# else
3103 mov eax, [pvBitmap]
3104 mov edx, [iBit]
3105 bts [eax], edx
3106# endif
3107 }
3108# endif
3109}
3110#endif
3111
3112
3113/**
3114 * Atomically sets a bit in a bitmap.
3115 *
3116 * @param pvBitmap Pointer to the bitmap.
3117 * @param iBit The bit to set.
3118 */
3119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3120DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3121#else
3122DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3123{
3124# if RT_INLINE_ASM_USES_INTRIN
3125 _interlockedbittestandset((long *)pvBitmap, iBit);
3126# elif RT_INLINE_ASM_GNU_STYLE
3127 __asm__ __volatile__ ("lock; btsl %1, %0"
3128 : "=m" (*(volatile long *)pvBitmap)
3129 : "Ir" (iBit)
3130 : "memory");
3131# else
3132 __asm
3133 {
3134# ifdef __AMD64__
3135 mov rax, [pvBitmap]
3136 mov edx, [iBit]
3137 lock bts [rax], edx
3138# else
3139 mov eax, [pvBitmap]
3140 mov edx, [iBit]
3141 lock bts [eax], edx
3142# endif
3143 }
3144# endif
3145}
3146#endif
3147
3148
3149/**
3150 * Clears a bit in a bitmap.
3151 *
3152 * @param pvBitmap Pointer to the bitmap.
3153 * @param iBit The bit to clear.
3154 */
3155#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3156DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3157#else
3158DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3159{
3160# if RT_INLINE_ASM_USES_INTRIN
3161 _bittestandreset((long *)pvBitmap, iBit);
3162
3163# elif RT_INLINE_ASM_GNU_STYLE
3164 __asm__ __volatile__ ("btrl %1, %0"
3165 : "=m" (*(volatile long *)pvBitmap)
3166 : "Ir" (iBit)
3167 : "memory");
3168# else
3169 __asm
3170 {
3171# ifdef __AMD64__
3172 mov rax, [pvBitmap]
3173 mov edx, [iBit]
3174 btr [rax], edx
3175# else
3176 mov eax, [pvBitmap]
3177 mov edx, [iBit]
3178 btr [eax], edx
3179# endif
3180 }
3181# endif
3182}
3183#endif
3184
3185
3186/**
3187 * Atomically clears a bit in a bitmap.
3188 *
3189 * @param pvBitmap Pointer to the bitmap.
3190 * @param iBit The bit to toggle set.
3191 * @remark No memory barrier, take care on smp.
3192 */
3193#if RT_INLINE_ASM_EXTERNAL
3194DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3195#else
3196DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3197{
3198# if RT_INLINE_ASM_GNU_STYLE
3199 __asm__ __volatile__ ("lock; btrl %1, %0"
3200 : "=m" (*(volatile long *)pvBitmap)
3201 : "Ir" (iBit)
3202 : "memory");
3203# else
3204 __asm
3205 {
3206# ifdef __AMD64__
3207 mov rax, [pvBitmap]
3208 mov edx, [iBit]
3209 lock btr [rax], edx
3210# else
3211 mov eax, [pvBitmap]
3212 mov edx, [iBit]
3213 lock btr [eax], edx
3214# endif
3215 }
3216# endif
3217}
3218#endif
3219
3220
3221/**
3222 * Toggles a bit in a bitmap.
3223 *
3224 * @param pvBitmap Pointer to the bitmap.
3225 * @param iBit The bit to toggle.
3226 */
3227#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3228DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3229#else
3230DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3231{
3232# if RT_INLINE_ASM_USES_INTRIN
3233 _bittestandcomplement((long *)pvBitmap, iBit);
3234# elif RT_INLINE_ASM_GNU_STYLE
3235 __asm__ __volatile__ ("btcl %1, %0"
3236 : "=m" (*(volatile long *)pvBitmap)
3237 : "Ir" (iBit)
3238 : "memory");
3239# else
3240 __asm
3241 {
3242# ifdef __AMD64__
3243 mov rax, [pvBitmap]
3244 mov edx, [iBit]
3245 btc [rax], edx
3246# else
3247 mov eax, [pvBitmap]
3248 mov edx, [iBit]
3249 btc [eax], edx
3250# endif
3251 }
3252# endif
3253}
3254#endif
3255
3256
3257/**
3258 * Atomically toggles a bit in a bitmap.
3259 *
3260 * @param pvBitmap Pointer to the bitmap.
3261 * @param iBit The bit to test and set.
3262 */
3263#if RT_INLINE_ASM_EXTERNAL
3264DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3265#else
3266DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3267{
3268# if RT_INLINE_ASM_GNU_STYLE
3269 __asm__ __volatile__ ("lock; btcl %1, %0"
3270 : "=m" (*(volatile long *)pvBitmap)
3271 : "Ir" (iBit)
3272 : "memory");
3273# else
3274 __asm
3275 {
3276# ifdef __AMD64__
3277 mov rax, [pvBitmap]
3278 mov edx, [iBit]
3279 lock btc [rax], edx
3280# else
3281 mov eax, [pvBitmap]
3282 mov edx, [iBit]
3283 lock btc [eax], edx
3284# endif
3285 }
3286# endif
3287}
3288#endif
3289
3290
3291/**
3292 * Tests and sets a bit in a bitmap.
3293 *
3294 * @returns true if the bit was set.
3295 * @returns false if the bit was clear.
3296 * @param pvBitmap Pointer to the bitmap.
3297 * @param iBit The bit to test and set.
3298 */
3299#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3300DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3301#else
3302DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3303{
3304 union { bool f; uint32_t u32; uint8_t u8; } rc;
3305# if RT_INLINE_ASM_USES_INTRIN
3306 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3307
3308# elif RT_INLINE_ASM_GNU_STYLE
3309 __asm__ __volatile__ ("btsl %2, %1\n\t"
3310 "setc %b0\n\t"
3311 "andl $1, %0\n\t"
3312 : "=q" (rc.u32),
3313 "=m" (*(volatile long *)pvBitmap)
3314 : "Ir" (iBit)
3315 : "memory");
3316# else
3317 __asm
3318 {
3319 mov edx, [iBit]
3320# ifdef __AMD64__
3321 mov rax, [pvBitmap]
3322 bts [rax], edx
3323# else
3324 mov eax, [pvBitmap]
3325 bts [eax], edx
3326# endif
3327 setc al
3328 and eax, 1
3329 mov [rc.u32], eax
3330 }
3331# endif
3332 return rc.f;
3333}
3334#endif
3335
3336
3337/**
3338 * Atomically tests and sets a bit in a bitmap.
3339 *
3340 * @returns true if the bit was set.
3341 * @returns false if the bit was clear.
3342 * @param pvBitmap Pointer to the bitmap.
3343 * @param iBit The bit to set.
3344 */
3345#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3346DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3347#else
3348DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3349{
3350 union { bool f; uint32_t u32; uint8_t u8; } rc;
3351# if RT_INLINE_ASM_USES_INTRIN
3352 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3353# elif RT_INLINE_ASM_GNU_STYLE
3354 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3355 "setc %b0\n\t"
3356 "andl $1, %0\n\t"
3357 : "=q" (rc.u32),
3358 "=m" (*(volatile long *)pvBitmap)
3359 : "Ir" (iBit)
3360 : "memory");
3361# else
3362 __asm
3363 {
3364 mov edx, [iBit]
3365# ifdef __AMD64__
3366 mov rax, [pvBitmap]
3367 lock bts [rax], edx
3368# else
3369 mov eax, [pvBitmap]
3370 lock bts [eax], edx
3371# endif
3372 setc al
3373 and eax, 1
3374 mov [rc.u32], eax
3375 }
3376# endif
3377 return rc.f;
3378}
3379#endif
3380
3381
3382/**
3383 * Tests and clears a bit in a bitmap.
3384 *
3385 * @returns true if the bit was set.
3386 * @returns false if the bit was clear.
3387 * @param pvBitmap Pointer to the bitmap.
3388 * @param iBit The bit to test and clear.
3389 */
3390#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3391DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3392#else
3393DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3394{
3395 union { bool f; uint32_t u32; uint8_t u8; } rc;
3396# if RT_INLINE_ASM_USES_INTRIN
3397 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3398
3399# elif RT_INLINE_ASM_GNU_STYLE
3400 __asm__ __volatile__ ("btrl %2, %1\n\t"
3401 "setc %b0\n\t"
3402 "andl $1, %0\n\t"
3403 : "=q" (rc.u32),
3404 "=m" (*(volatile long *)pvBitmap)
3405 : "Ir" (iBit)
3406 : "memory");
3407# else
3408 __asm
3409 {
3410 mov edx, [iBit]
3411# ifdef __AMD64__
3412 mov rax, [pvBitmap]
3413 btr [rax], edx
3414# else
3415 mov eax, [pvBitmap]
3416 btr [eax], edx
3417# endif
3418 setc al
3419 and eax, 1
3420 mov [rc.u32], eax
3421 }
3422# endif
3423 return rc.f;
3424}
3425#endif
3426
3427
3428/**
3429 * Atomically tests and clears a bit in a bitmap.
3430 *
3431 * @returns true if the bit was set.
3432 * @returns false if the bit was clear.
3433 * @param pvBitmap Pointer to the bitmap.
3434 * @param iBit The bit to test and clear.
3435 * @remark No memory barrier, take care on smp.
3436 */
3437#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3438DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3439#else
3440DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3441{
3442 union { bool f; uint32_t u32; uint8_t u8; } rc;
3443# if RT_INLINE_ASM_USES_INTRIN
3444 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3445
3446# elif RT_INLINE_ASM_GNU_STYLE
3447 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3448 "setc %b0\n\t"
3449 "andl $1, %0\n\t"
3450 : "=q" (rc.u32),
3451 "=m" (*(volatile long *)pvBitmap)
3452 : "Ir" (iBit)
3453 : "memory");
3454# else
3455 __asm
3456 {
3457 mov edx, [iBit]
3458# ifdef __AMD64__
3459 mov rax, [pvBitmap]
3460 lock btr [rax], edx
3461# else
3462 mov eax, [pvBitmap]
3463 lock btr [eax], edx
3464# endif
3465 setc al
3466 and eax, 1
3467 mov [rc.u32], eax
3468 }
3469# endif
3470 return rc.f;
3471}
3472#endif
3473
3474
3475/**
3476 * Tests and toggles a bit in a bitmap.
3477 *
3478 * @returns true if the bit was set.
3479 * @returns false if the bit was clear.
3480 * @param pvBitmap Pointer to the bitmap.
3481 * @param iBit The bit to test and toggle.
3482 */
3483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3484DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3485#else
3486DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3487{
3488 union { bool f; uint32_t u32; uint8_t u8; } rc;
3489# if RT_INLINE_ASM_USES_INTRIN
3490 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3491
3492# elif RT_INLINE_ASM_GNU_STYLE
3493 __asm__ __volatile__ ("btcl %2, %1\n\t"
3494 "setc %b0\n\t"
3495 "andl $1, %0\n\t"
3496 : "=q" (rc.u32),
3497 "=m" (*(volatile long *)pvBitmap)
3498 : "Ir" (iBit)
3499 : "memory");
3500# else
3501 __asm
3502 {
3503 mov edx, [iBit]
3504# ifdef __AMD64__
3505 mov rax, [pvBitmap]
3506 btc [rax], edx
3507# else
3508 mov eax, [pvBitmap]
3509 btc [eax], edx
3510# endif
3511 setc al
3512 and eax, 1
3513 mov [rc.u32], eax
3514 }
3515# endif
3516 return rc.f;
3517}
3518#endif
3519
3520
3521/**
3522 * Atomically tests and toggles a bit in a bitmap.
3523 *
3524 * @returns true if the bit was set.
3525 * @returns false if the bit was clear.
3526 * @param pvBitmap Pointer to the bitmap.
3527 * @param iBit The bit to test and toggle.
3528 */
3529#if RT_INLINE_ASM_EXTERNAL
3530DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3531#else
3532DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3533{
3534 union { bool f; uint32_t u32; uint8_t u8; } rc;
3535# if RT_INLINE_ASM_GNU_STYLE
3536 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3537 "setc %b0\n\t"
3538 "andl $1, %0\n\t"
3539 : "=q" (rc.u32),
3540 "=m" (*(volatile long *)pvBitmap)
3541 : "Ir" (iBit)
3542 : "memory");
3543# else
3544 __asm
3545 {
3546 mov edx, [iBit]
3547# ifdef __AMD64__
3548 mov rax, [pvBitmap]
3549 lock btc [rax], edx
3550# else
3551 mov eax, [pvBitmap]
3552 lock btc [eax], edx
3553# endif
3554 setc al
3555 and eax, 1
3556 mov [rc.u32], eax
3557 }
3558# endif
3559 return rc.f;
3560}
3561#endif
3562
3563
3564/**
3565 * Tests if a bit in a bitmap is set.
3566 *
3567 * @returns true if the bit is set.
3568 * @returns false if the bit is clear.
3569 * @param pvBitmap Pointer to the bitmap.
3570 * @param iBit The bit to test.
3571 */
3572#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3573DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3574#else
3575DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3576{
3577 union { bool f; uint32_t u32; uint8_t u8; } rc;
3578# if RT_INLINE_ASM_USES_INTRIN
3579 rc.u32 = _bittest((long *)pvBitmap, iBit);
3580# elif RT_INLINE_ASM_GNU_STYLE
3581
3582 __asm__ __volatile__ ("btl %2, %1\n\t"
3583 "setc %b0\n\t"
3584 "andl $1, %0\n\t"
3585 : "=q" (rc.u32),
3586 "=m" (*(volatile long *)pvBitmap)
3587 : "Ir" (iBit)
3588 : "memory");
3589# else
3590 __asm
3591 {
3592 mov edx, [iBit]
3593# ifdef __AMD64__
3594 mov rax, [pvBitmap]
3595 bt [rax], edx
3596# else
3597 mov eax, [pvBitmap]
3598 bt [eax], edx
3599# endif
3600 setc al
3601 and eax, 1
3602 mov [rc.u32], eax
3603 }
3604# endif
3605 return rc.f;
3606}
3607#endif
3608
3609
3610/**
3611 * Clears a bit range within a bitmap.
3612 *
3613 * @param pvBitmap Pointer to the bitmap.
3614 * @param iBitStart The First bit to clear.
3615 * @param iBitEnd The first bit not to clear.
3616 */
3617DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3618{
3619 if (iBitStart < iBitEnd)
3620 {
3621 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3622 int iStart = iBitStart & ~31;
3623 int iEnd = iBitEnd & ~31;
3624 if (iStart == iEnd)
3625 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3626 else
3627 {
3628 /* bits in first dword. */
3629 if (iBitStart & 31)
3630 {
3631 *pu32 &= (1 << (iBitStart & 31)) - 1;
3632 pu32++;
3633 iBitStart = iStart + 32;
3634 }
3635
3636 /* whole dword. */
3637 if (iBitStart != iEnd)
3638 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3639
3640 /* bits in last dword. */
3641 if (iBitEnd & 31)
3642 {
3643 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3644 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3645 }
3646 }
3647 }
3648}
3649
3650
3651/**
3652 * Finds the first clear bit in a bitmap.
3653 *
3654 * @returns Index of the first zero bit.
3655 * @returns -1 if no clear bit was found.
3656 * @param pvBitmap Pointer to the bitmap.
3657 * @param cBits The number of bits in the bitmap. Multiple of 32.
3658 */
3659#if RT_INLINE_ASM_EXTERNAL
3660DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3661#else
3662DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3663{
3664 if (cBits)
3665 {
3666 int32_t iBit;
3667# if RT_INLINE_ASM_GNU_STYLE
3668 RTCCUINTREG uEAX, uECX, uEDI;
3669 cBits = RT_ALIGN_32(cBits, 32);
3670 __asm__ __volatile__("repe; scasl\n\t"
3671 "je 1f\n\t"
3672# ifdef __AMD64__
3673 "lea -4(%%rdi), %%rdi\n\t"
3674 "xorl (%%rdi), %%eax\n\t"
3675 "subq %5, %%rdi\n\t"
3676# else
3677 "lea -4(%%edi), %%edi\n\t"
3678 "xorl (%%edi), %%eax\n\t"
3679 "subl %5, %%edi\n\t"
3680# endif
3681 "shll $3, %%edi\n\t"
3682 "bsfl %%eax, %%edx\n\t"
3683 "addl %%edi, %%edx\n\t"
3684 "1:\t\n"
3685 : "=d" (iBit),
3686 "=&c" (uECX),
3687 "=&D" (uEDI),
3688 "=&a" (uEAX)
3689 : "0" (0xffffffff),
3690 "mr" (pvBitmap),
3691 "1" (cBits >> 5),
3692 "2" (pvBitmap),
3693 "3" (0xffffffff));
3694# else
3695 cBits = RT_ALIGN_32(cBits, 32);
3696 __asm
3697 {
3698# ifdef __AMD64__
3699 mov rdi, [pvBitmap]
3700 mov rbx, rdi
3701# else
3702 mov edi, [pvBitmap]
3703 mov ebx, edi
3704# endif
3705 mov edx, 0ffffffffh
3706 mov eax, edx
3707 mov ecx, [cBits]
3708 shr ecx, 5
3709 repe scasd
3710 je done
3711
3712# ifdef __AMD64__
3713 lea rdi, [rdi - 4]
3714 xor eax, [rdi]
3715 sub rdi, rbx
3716# else
3717 lea edi, [edi - 4]
3718 xor eax, [edi]
3719 sub edi, ebx
3720# endif
3721 shl edi, 3
3722 bsf edx, eax
3723 add edx, edi
3724 done:
3725 mov [iBit], edx
3726 }
3727# endif
3728 return iBit;
3729 }
3730 return -1;
3731}
3732#endif
3733
3734
3735/**
3736 * Finds the next clear bit in a bitmap.
3737 *
3738 * @returns Index of the first zero bit.
3739 * @returns -1 if no clear bit was found.
3740 * @param pvBitmap Pointer to the bitmap.
3741 * @param cBits The number of bits in the bitmap. Multiple of 32.
3742 * @param iBitPrev The bit returned from the last search.
3743 * The search will start at iBitPrev + 1.
3744 */
3745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3746DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3747#else
3748DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3749{
3750 int iBit = ++iBitPrev & 31;
3751 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3752 cBits -= iBitPrev & ~31;
3753 if (iBit)
3754 {
3755 /* inspect the first dword. */
3756 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3757# if RT_INLINE_ASM_USES_INTRIN
3758 unsigned long ulBit = 0;
3759 if (_BitScanForward(&ulBit, u32))
3760 return ulBit + iBitPrev;
3761 iBit = -1;
3762# else
3763# if RT_INLINE_ASM_GNU_STYLE
3764 __asm__ __volatile__("bsf %1, %0\n\t"
3765 "jnz 1f\n\t"
3766 "movl $-1, %0\n\t"
3767 "1:\n\t"
3768 : "=r" (iBit)
3769 : "r" (u32));
3770# else
3771 __asm
3772 {
3773 mov edx, [u32]
3774 bsf eax, edx
3775 jnz done
3776 mov eax, 0ffffffffh
3777 done:
3778 mov [iBit], eax
3779 }
3780# endif
3781 if (iBit >= 0)
3782 return iBit + iBitPrev;
3783# endif
3784 /* Search the rest of the bitmap, if there is anything. */
3785 if (cBits > 32)
3786 {
3787 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3788 if (iBit >= 0)
3789 return iBit + (iBitPrev & ~31) + 32;
3790 }
3791 }
3792 else
3793 {
3794 /* Search the rest of the bitmap. */
3795 iBit = ASMBitFirstClear(pvBitmap, cBits);
3796 if (iBit >= 0)
3797 return iBit + (iBitPrev & ~31);
3798 }
3799 return iBit;
3800}
3801#endif
3802
3803
3804/**
3805 * Finds the first set bit in a bitmap.
3806 *
3807 * @returns Index of the first set bit.
3808 * @returns -1 if no clear bit was found.
3809 * @param pvBitmap Pointer to the bitmap.
3810 * @param cBits The number of bits in the bitmap. Multiple of 32.
3811 */
3812#if RT_INLINE_ASM_EXTERNAL
3813DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3814#else
3815DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3816{
3817 if (cBits)
3818 {
3819 int32_t iBit;
3820# if RT_INLINE_ASM_GNU_STYLE
3821 RTCCUINTREG uEAX, uECX, uEDI;
3822 cBits = RT_ALIGN_32(cBits, 32);
3823 __asm__ __volatile__("repe; scasl\n\t"
3824 "je 1f\n\t"
3825# ifdef __AMD64__
3826 "lea -4(%%rdi), %%rdi\n\t"
3827 "movl (%%rdi), %%eax\n\t"
3828 "subq %5, %%rdi\n\t"
3829# else
3830 "lea -4(%%edi), %%edi\n\t"
3831 "movl (%%edi), %%eax\n\t"
3832 "subl %5, %%edi\n\t"
3833# endif
3834 "shll $3, %%edi\n\t"
3835 "bsfl %%eax, %%edx\n\t"
3836 "addl %%edi, %%edx\n\t"
3837 "1:\t\n"
3838 : "=d" (iBit),
3839 "=&c" (uECX),
3840 "=&D" (uEDI),
3841 "=&a" (uEAX)
3842 : "0" (0xffffffff),
3843 "mr" (pvBitmap),
3844 "1" (cBits >> 5),
3845 "2" (pvBitmap),
3846 "3" (0));
3847# else
3848 cBits = RT_ALIGN_32(cBits, 32);
3849 __asm
3850 {
3851# ifdef __AMD64__
3852 mov rdi, [pvBitmap]
3853 mov rbx, rdi
3854# else
3855 mov edi, [pvBitmap]
3856 mov ebx, edi
3857# endif
3858 mov edx, 0ffffffffh
3859 xor eax, eax
3860 mov ecx, [cBits]
3861 shr ecx, 5
3862 repe scasd
3863 je done
3864# ifdef __AMD64__
3865 lea rdi, [rdi - 4]
3866 mov eax, [rdi]
3867 sub rdi, rbx
3868# else
3869 lea edi, [edi - 4]
3870 mov eax, [edi]
3871 sub edi, ebx
3872# endif
3873 shl edi, 3
3874 bsf edx, eax
3875 add edx, edi
3876 done:
3877 mov [iBit], edx
3878 }
3879# endif
3880 return iBit;
3881 }
3882 return -1;
3883}
3884#endif
3885
3886
3887/**
3888 * Finds the next set bit in a bitmap.
3889 *
3890 * @returns Index of the next set bit.
3891 * @returns -1 if no set bit was found.
3892 * @param pvBitmap Pointer to the bitmap.
3893 * @param cBits The number of bits in the bitmap. Multiple of 32.
3894 * @param iBitPrev The bit returned from the last search.
3895 * The search will start at iBitPrev + 1.
3896 */
3897#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3898DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3899#else
3900DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3901{
3902 int iBit = ++iBitPrev & 31;
3903 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3904 cBits -= iBitPrev & ~31;
3905 if (iBit)
3906 {
3907 /* inspect the first dword. */
3908 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3909# if RT_INLINE_ASM_USES_INTRIN
3910 unsigned long ulBit = 0;
3911 if (_BitScanForward(&ulBit, u32))
3912 return ulBit + iBitPrev;
3913 iBit = -1;
3914# else
3915# if RT_INLINE_ASM_GNU_STYLE
3916 __asm__ __volatile__("bsf %1, %0\n\t"
3917 "jnz 1f\n\t"
3918 "movl $-1, %0\n\t"
3919 "1:\n\t"
3920 : "=r" (iBit)
3921 : "r" (u32));
3922# else
3923 __asm
3924 {
3925 mov edx, u32
3926 bsf eax, edx
3927 jnz done
3928 mov eax, 0ffffffffh
3929 done:
3930 mov [iBit], eax
3931 }
3932# endif
3933 if (iBit >= 0)
3934 return iBit + iBitPrev;
3935# endif
3936 /* Search the rest of the bitmap, if there is anything. */
3937 if (cBits > 32)
3938 {
3939 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3940 if (iBit >= 0)
3941 return iBit + (iBitPrev & ~31) + 32;
3942 }
3943
3944 }
3945 else
3946 {
3947 /* Search the rest of the bitmap. */
3948 iBit = ASMBitFirstSet(pvBitmap, cBits);
3949 if (iBit >= 0)
3950 return iBit + (iBitPrev & ~31);
3951 }
3952 return iBit;
3953}
3954#endif
3955
3956
3957/**
3958 * Finds the first bit which is set in the given 32-bit integer.
3959 * Bits are numbered from 1 (least significant) to 32.
3960 *
3961 * @returns index [1..32] of the first set bit.
3962 * @returns 0 if all bits are cleared.
3963 * @param u32 Integer to search for set bits.
3964 * @remark Similar to ffs() in BSD.
3965 */
3966DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3967{
3968# if RT_INLINE_ASM_USES_INTRIN
3969 unsigned long iBit;
3970 if (_BitScanForward(&iBit, u32))
3971 iBit++;
3972 else
3973 iBit = 0;
3974# elif RT_INLINE_ASM_GNU_STYLE
3975 uint32_t iBit;
3976 __asm__ __volatile__("bsf %1, %0\n\t"
3977 "jnz 1f\n\t"
3978 "xorl %0, %0\n\t"
3979 "jmp 2f\n"
3980 "1:\n\t"
3981 "incl %0\n"
3982 "2:\n\t"
3983 : "=r" (iBit)
3984 : "rm" (u32));
3985# else
3986 uint32_t iBit;
3987 _asm
3988 {
3989 bsf eax, [u32]
3990 jnz found
3991 xor eax, eax
3992 jmp done
3993 found:
3994 inc eax
3995 done:
3996 mov [iBit], eax
3997 }
3998# endif
3999 return iBit;
4000}
4001
4002
4003/**
4004 * Finds the first bit which is set in the given 32-bit integer.
4005 * Bits are numbered from 1 (least significant) to 32.
4006 *
4007 * @returns index [1..32] of the first set bit.
4008 * @returns 0 if all bits are cleared.
4009 * @param i32 Integer to search for set bits.
4010 * @remark Similar to ffs() in BSD.
4011 */
4012DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4013{
4014 return ASMBitFirstSetU32((uint32_t)i32);
4015}
4016
4017
4018/**
4019 * Finds the last bit which is set in the given 32-bit integer.
4020 * Bits are numbered from 1 (least significant) to 32.
4021 *
4022 * @returns index [1..32] of the last set bit.
4023 * @returns 0 if all bits are cleared.
4024 * @param u32 Integer to search for set bits.
4025 * @remark Similar to fls() in BSD.
4026 */
4027DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4028{
4029# if RT_INLINE_ASM_USES_INTRIN
4030 unsigned long iBit;
4031 if (_BitScanReverse(&iBit, u32))
4032 iBit++;
4033 else
4034 iBit = 0;
4035# elif RT_INLINE_ASM_GNU_STYLE
4036 uint32_t iBit;
4037 __asm__ __volatile__("bsrl %1, %0\n\t"
4038 "jnz 1f\n\t"
4039 "xorl %0, %0\n\t"
4040 "jmp 2f\n"
4041 "1:\n\t"
4042 "incl %0\n"
4043 "2:\n\t"
4044 : "=r" (iBit)
4045 : "rm" (u32));
4046# else
4047 uint32_t iBit;
4048 _asm
4049 {
4050 bsr eax, [u32]
4051 jnz found
4052 xor eax, eax
4053 jmp done
4054 found:
4055 inc eax
4056 done:
4057 mov [iBit], eax
4058 }
4059# endif
4060 return iBit;
4061}
4062
4063
4064/**
4065 * Finds the last bit which is set in the given 32-bit integer.
4066 * Bits are numbered from 1 (least significant) to 32.
4067 *
4068 * @returns index [1..32] of the last set bit.
4069 * @returns 0 if all bits are cleared.
4070 * @param i32 Integer to search for set bits.
4071 * @remark Similar to fls() in BSD.
4072 */
4073DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4074{
4075 return ASMBitLastSetS32((uint32_t)i32);
4076}
4077
4078
4079/**
4080 * Reverse the byte order of the given 32-bit integer.
4081 * @param u32 Integer
4082 */
4083DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4084{
4085#if RT_INLINE_ASM_USES_INTRIN
4086 u32 = _byteswap_ulong(u32);
4087#elif RT_INLINE_ASM_GNU_STYLE
4088 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4089#else
4090 _asm
4091 {
4092 mov eax, [u32]
4093 bswap eax
4094 mov [u32], eax
4095 }
4096#endif
4097 return u32;
4098}
4099
4100/** @} */
4101
4102
4103/** @} */
4104#endif
4105
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette