VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 3888

Last change on this file since 3888 was 3636, checked in by vboxsync, 17 years ago

AMD64 -> RT_ARCH_AMD64; X86 -> RT_ARCH_X86; [OS] (except LINUX) -> RT_OS_[OS].

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 99.9 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef ___iprt_asm_h
22#define ___iprt_asm_h
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26/** @todo #include <iprt/param.h> for PAGE_SIZE. */
27/** @def RT_INLINE_ASM_USES_INTRIN
28 * Defined as 1 if we're using a _MSC_VER 1400.
29 * Otherwise defined as 0.
30 */
31
32#ifdef _MSC_VER
33# if _MSC_VER >= 1400
34# define RT_INLINE_ASM_USES_INTRIN 1
35# include <intrin.h>
36 /* Emit the intrinsics at all optimization levels. */
37# pragma intrinsic(_ReadWriteBarrier)
38# pragma intrinsic(__cpuid)
39# pragma intrinsic(_enable)
40# pragma intrinsic(_disable)
41# pragma intrinsic(__rdtsc)
42# pragma intrinsic(__readmsr)
43# pragma intrinsic(__writemsr)
44# pragma intrinsic(__outbyte)
45# pragma intrinsic(__outword)
46# pragma intrinsic(__outdword)
47# pragma intrinsic(__inbyte)
48# pragma intrinsic(__inword)
49# pragma intrinsic(__indword)
50# pragma intrinsic(__invlpg)
51# pragma intrinsic(__stosd)
52# pragma intrinsic(__stosw)
53# pragma intrinsic(__stosb)
54# pragma intrinsic(__readcr0)
55# pragma intrinsic(__readcr2)
56# pragma intrinsic(__readcr3)
57# pragma intrinsic(__readcr4)
58# pragma intrinsic(__writecr0)
59# pragma intrinsic(__writecr3)
60# pragma intrinsic(__writecr4)
61# pragma intrinsic(_BitScanForward)
62# pragma intrinsic(_BitScanReverse)
63# pragma intrinsic(_bittest)
64# pragma intrinsic(_bittestandset)
65# pragma intrinsic(_bittestandreset)
66# pragma intrinsic(_bittestandcomplement)
67# pragma intrinsic(_byteswap_ushort)
68# pragma intrinsic(_byteswap_ulong)
69# pragma intrinsic(_interlockedbittestandset)
70# pragma intrinsic(_interlockedbittestandreset)
71# pragma intrinsic(_InterlockedAnd)
72# pragma intrinsic(_InterlockedOr)
73# pragma intrinsic(_InterlockedIncrement)
74# pragma intrinsic(_InterlockedDecrement)
75# pragma intrinsic(_InterlockedExchange)
76# pragma intrinsic(_InterlockedCompareExchange)
77# pragma intrinsic(_InterlockedCompareExchange64)
78# ifdef RT_ARCH_AMD64
79# pragma intrinsic(__stosq)
80# pragma intrinsic(__readcr8)
81# pragma intrinsic(__writecr8)
82# pragma intrinsic(_byteswap_uint64)
83# pragma intrinsic(_InterlockedExchange64)
84# endif
85# endif
86#endif
87#ifndef RT_INLINE_ASM_USES_INTRIN
88# define RT_INLINE_ASM_USES_INTRIN 0
89#endif
90
91
92
93/** @defgroup grp_asm ASM - Assembly Routines
94 * @ingroup grp_rt
95 * @{
96 */
97
98/** @def RT_INLINE_ASM_EXTERNAL
99 * Defined as 1 if the compiler does not support inline assembly.
100 * The ASM* functions will then be implemented in an external .asm file.
101 *
102 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
103 * inline assmebly in their AMD64 compiler.
104 */
105#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
106# define RT_INLINE_ASM_EXTERNAL 1
107#else
108# define RT_INLINE_ASM_EXTERNAL 0
109#endif
110
111/** @def RT_INLINE_ASM_GNU_STYLE
112 * Defined as 1 if the compiler understand GNU style inline assembly.
113 */
114#if defined(_MSC_VER)
115# define RT_INLINE_ASM_GNU_STYLE 0
116#else
117# define RT_INLINE_ASM_GNU_STYLE 1
118#endif
119
120
121/** @todo find a more proper place for this structure? */
122#pragma pack(1)
123/** IDTR */
124typedef struct RTIDTR
125{
126 /** Size of the IDT. */
127 uint16_t cbIdt;
128 /** Address of the IDT. */
129 uintptr_t pIdt;
130} RTIDTR, *PRTIDTR;
131#pragma pack()
132
133#pragma pack(1)
134/** GDTR */
135typedef struct RTGDTR
136{
137 /** Size of the GDT. */
138 uint16_t cbGdt;
139 /** Address of the GDT. */
140 uintptr_t pGdt;
141} RTGDTR, *PRTGDTR;
142#pragma pack()
143
144
145/** @def ASMReturnAddress
146 * Gets the return address of the current (or calling if you like) function or method.
147 */
148#ifdef _MSC_VER
149# ifdef __cplusplus
150extern "C"
151# endif
152void * _ReturnAddress(void);
153# pragma intrinsic(_ReturnAddress)
154# define ASMReturnAddress() _ReturnAddress()
155#elif defined(__GNUC__) || defined(__DOXYGEN__)
156# define ASMReturnAddress() __builtin_return_address(0)
157#else
158# error "Unsupported compiler."
159#endif
160
161
162/**
163 * Gets the content of the IDTR CPU register.
164 * @param pIdtr Where to store the IDTR contents.
165 */
166#if RT_INLINE_ASM_EXTERNAL
167DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
168#else
169DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
170{
171# if RT_INLINE_ASM_GNU_STYLE
172 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
173# else
174 __asm
175 {
176# ifdef RT_ARCH_AMD64
177 mov rax, [pIdtr]
178 sidt [rax]
179# else
180 mov eax, [pIdtr]
181 sidt [eax]
182# endif
183 }
184# endif
185}
186#endif
187
188
189/**
190 * Sets the content of the IDTR CPU register.
191 * @param pIdtr Where to load the IDTR contents from
192 */
193#if RT_INLINE_ASM_EXTERNAL
194DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
195#else
196DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
197{
198# if RT_INLINE_ASM_GNU_STYLE
199 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
200# else
201 __asm
202 {
203# ifdef RT_ARCH_AMD64
204 mov rax, [pIdtr]
205 lidt [rax]
206# else
207 mov eax, [pIdtr]
208 lidt [eax]
209# endif
210 }
211# endif
212}
213#endif
214
215
216/**
217 * Gets the content of the GDTR CPU register.
218 * @param pGdtr Where to store the GDTR contents.
219 */
220#if RT_INLINE_ASM_EXTERNAL
221DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
222#else
223DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
224{
225# if RT_INLINE_ASM_GNU_STYLE
226 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
227# else
228 __asm
229 {
230# ifdef RT_ARCH_AMD64
231 mov rax, [pGdtr]
232 sgdt [rax]
233# else
234 mov eax, [pGdtr]
235 sgdt [eax]
236# endif
237 }
238# endif
239}
240#endif
241
242/**
243 * Get the cs register.
244 * @returns cs.
245 */
246#if RT_INLINE_ASM_EXTERNAL
247DECLASM(RTSEL) ASMGetCS(void);
248#else
249DECLINLINE(RTSEL) ASMGetCS(void)
250{
251 RTSEL SelCS;
252# if RT_INLINE_ASM_GNU_STYLE
253 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
254# else
255 __asm
256 {
257 mov ax, cs
258 mov [SelCS], ax
259 }
260# endif
261 return SelCS;
262}
263#endif
264
265
266/**
267 * Get the DS register.
268 * @returns DS.
269 */
270#if RT_INLINE_ASM_EXTERNAL
271DECLASM(RTSEL) ASMGetDS(void);
272#else
273DECLINLINE(RTSEL) ASMGetDS(void)
274{
275 RTSEL SelDS;
276# if RT_INLINE_ASM_GNU_STYLE
277 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
278# else
279 __asm
280 {
281 mov ax, ds
282 mov [SelDS], ax
283 }
284# endif
285 return SelDS;
286}
287#endif
288
289
290/**
291 * Get the ES register.
292 * @returns ES.
293 */
294#if RT_INLINE_ASM_EXTERNAL
295DECLASM(RTSEL) ASMGetES(void);
296#else
297DECLINLINE(RTSEL) ASMGetES(void)
298{
299 RTSEL SelES;
300# if RT_INLINE_ASM_GNU_STYLE
301 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
302# else
303 __asm
304 {
305 mov ax, es
306 mov [SelES], ax
307 }
308# endif
309 return SelES;
310}
311#endif
312
313
314/**
315 * Get the FS register.
316 * @returns FS.
317 */
318#if RT_INLINE_ASM_EXTERNAL
319DECLASM(RTSEL) ASMGetFS(void);
320#else
321DECLINLINE(RTSEL) ASMGetFS(void)
322{
323 RTSEL SelFS;
324# if RT_INLINE_ASM_GNU_STYLE
325 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
326# else
327 __asm
328 {
329 mov ax, fs
330 mov [SelFS], ax
331 }
332# endif
333 return SelFS;
334}
335# endif
336
337
338/**
339 * Get the GS register.
340 * @returns GS.
341 */
342#if RT_INLINE_ASM_EXTERNAL
343DECLASM(RTSEL) ASMGetGS(void);
344#else
345DECLINLINE(RTSEL) ASMGetGS(void)
346{
347 RTSEL SelGS;
348# if RT_INLINE_ASM_GNU_STYLE
349 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
350# else
351 __asm
352 {
353 mov ax, gs
354 mov [SelGS], ax
355 }
356# endif
357 return SelGS;
358}
359#endif
360
361
362/**
363 * Get the SS register.
364 * @returns SS.
365 */
366#if RT_INLINE_ASM_EXTERNAL
367DECLASM(RTSEL) ASMGetSS(void);
368#else
369DECLINLINE(RTSEL) ASMGetSS(void)
370{
371 RTSEL SelSS;
372# if RT_INLINE_ASM_GNU_STYLE
373 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
374# else
375 __asm
376 {
377 mov ax, ss
378 mov [SelSS], ax
379 }
380# endif
381 return SelSS;
382}
383#endif
384
385
386/**
387 * Get the TR register.
388 * @returns TR.
389 */
390#if RT_INLINE_ASM_EXTERNAL
391DECLASM(RTSEL) ASMGetTR(void);
392#else
393DECLINLINE(RTSEL) ASMGetTR(void)
394{
395 RTSEL SelTR;
396# if RT_INLINE_ASM_GNU_STYLE
397 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
398# else
399 __asm
400 {
401 str ax
402 mov [SelTR], ax
403 }
404# endif
405 return SelTR;
406}
407#endif
408
409
410/**
411 * Get the [RE]FLAGS register.
412 * @returns [RE]FLAGS.
413 */
414#if RT_INLINE_ASM_EXTERNAL
415DECLASM(RTCCUINTREG) ASMGetFlags(void);
416#else
417DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
418{
419 RTCCUINTREG uFlags;
420# if RT_INLINE_ASM_GNU_STYLE
421# ifdef RT_ARCH_AMD64
422 __asm__ __volatile__("pushfq\n\t"
423 "popq %0\n\t"
424 : "=m" (uFlags));
425# else
426 __asm__ __volatile__("pushfl\n\t"
427 "popl %0\n\t"
428 : "=m" (uFlags));
429# endif
430# else
431 __asm
432 {
433# ifdef RT_ARCH_AMD64
434 pushfq
435 pop [uFlags]
436# else
437 pushfd
438 pop [uFlags]
439# endif
440 }
441# endif
442 return uFlags;
443}
444#endif
445
446
447/**
448 * Set the [RE]FLAGS register.
449 * @param uFlags The new [RE]FLAGS value.
450 */
451#if RT_INLINE_ASM_EXTERNAL
452DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
453#else
454DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
455{
456# if RT_INLINE_ASM_GNU_STYLE
457# ifdef RT_ARCH_AMD64
458 __asm__ __volatile__("pushq %0\n\t"
459 "popfq\n\t"
460 : : "m" (uFlags));
461# else
462 __asm__ __volatile__("pushl %0\n\t"
463 "popfl\n\t"
464 : : "m" (uFlags));
465# endif
466# else
467 __asm
468 {
469# ifdef RT_ARCH_AMD64
470 push [uFlags]
471 popfq
472# else
473 push [uFlags]
474 popfd
475# endif
476 }
477# endif
478}
479#endif
480
481
482/**
483 * Gets the content of the CPU timestamp counter register.
484 *
485 * @returns TSC.
486 */
487#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
488DECLASM(uint64_t) ASMReadTSC(void);
489#else
490DECLINLINE(uint64_t) ASMReadTSC(void)
491{
492 RTUINT64U u;
493# if RT_INLINE_ASM_GNU_STYLE
494 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
495# else
496# if RT_INLINE_ASM_USES_INTRIN
497 u.u = __rdtsc();
498# else
499 __asm
500 {
501 rdtsc
502 mov [u.s.Lo], eax
503 mov [u.s.Hi], edx
504 }
505# endif
506# endif
507 return u.u;
508}
509#endif
510
511
512/**
513 * Performs the cpuid instruction returning all registers.
514 *
515 * @param uOperator CPUID operation (eax).
516 * @param pvEAX Where to store eax.
517 * @param pvEBX Where to store ebx.
518 * @param pvECX Where to store ecx.
519 * @param pvEDX Where to store edx.
520 * @remark We're using void pointers to ease the use of special bitfield structures and such.
521 */
522#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
523DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
524#else
525DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
526{
527# if RT_INLINE_ASM_GNU_STYLE
528# ifdef RT_ARCH_AMD64
529 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
530 __asm__ ("cpuid\n\t"
531 : "=a" (uRAX),
532 "=b" (uRBX),
533 "=c" (uRCX),
534 "=d" (uRDX)
535 : "0" (uOperator));
536 *(uint32_t *)pvEAX = (uint32_t)uRAX;
537 *(uint32_t *)pvEBX = (uint32_t)uRBX;
538 *(uint32_t *)pvECX = (uint32_t)uRCX;
539 *(uint32_t *)pvEDX = (uint32_t)uRDX;
540# else
541 __asm__ ("xchgl %%ebx, %1\n\t"
542 "cpuid\n\t"
543 "xchgl %%ebx, %1\n\t"
544 : "=a" (*(uint32_t *)pvEAX),
545 "=r" (*(uint32_t *)pvEBX),
546 "=c" (*(uint32_t *)pvECX),
547 "=d" (*(uint32_t *)pvEDX)
548 : "0" (uOperator));
549# endif
550
551# elif RT_INLINE_ASM_USES_INTRIN
552 int aInfo[4];
553 __cpuid(aInfo, uOperator);
554 *(uint32_t *)pvEAX = aInfo[0];
555 *(uint32_t *)pvEBX = aInfo[1];
556 *(uint32_t *)pvECX = aInfo[2];
557 *(uint32_t *)pvEDX = aInfo[3];
558
559# else
560 uint32_t uEAX;
561 uint32_t uEBX;
562 uint32_t uECX;
563 uint32_t uEDX;
564 __asm
565 {
566 push ebx
567 mov eax, [uOperator]
568 cpuid
569 mov [uEAX], eax
570 mov [uEBX], ebx
571 mov [uECX], ecx
572 mov [uEDX], edx
573 pop ebx
574 }
575 *(uint32_t *)pvEAX = uEAX;
576 *(uint32_t *)pvEBX = uEBX;
577 *(uint32_t *)pvECX = uECX;
578 *(uint32_t *)pvEDX = uEDX;
579# endif
580}
581#endif
582
583
584/**
585 * Performs the cpuid instruction returning ecx and edx.
586 *
587 * @param uOperator CPUID operation (eax).
588 * @param pvECX Where to store ecx.
589 * @param pvEDX Where to store edx.
590 * @remark We're using void pointers to ease the use of special bitfield structures and such.
591 */
592#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
593DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
594#else
595DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
596{
597 uint32_t uEBX;
598 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
599}
600#endif
601
602
603/**
604 * Performs the cpuid instruction returning edx.
605 *
606 * @param uOperator CPUID operation (eax).
607 * @returns EDX after cpuid operation.
608 */
609#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
610DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
611#else
612DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
613{
614 RTCCUINTREG xDX;
615# if RT_INLINE_ASM_GNU_STYLE
616# ifdef RT_ARCH_AMD64
617 RTCCUINTREG uSpill;
618 __asm__ ("cpuid"
619 : "=a" (uSpill),
620 "=d" (xDX)
621 : "0" (uOperator)
622 : "rbx", "rcx");
623# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
624 __asm__ ("push %%ebx\n\t"
625 "cpuid\n\t"
626 "pop %%ebx\n\t"
627 : "=a" (uOperator),
628 "=d" (xDX)
629 : "0" (uOperator)
630 : "ecx");
631# else
632 __asm__ ("cpuid"
633 : "=a" (uOperator),
634 "=d" (xDX)
635 : "0" (uOperator)
636 : "ebx", "ecx");
637# endif
638
639# elif RT_INLINE_ASM_USES_INTRIN
640 int aInfo[4];
641 __cpuid(aInfo, uOperator);
642 xDX = aInfo[3];
643
644# else
645 __asm
646 {
647 push ebx
648 mov eax, [uOperator]
649 cpuid
650 mov [xDX], edx
651 pop ebx
652 }
653# endif
654 return (uint32_t)xDX;
655}
656#endif
657
658
659/**
660 * Performs the cpuid instruction returning ecx.
661 *
662 * @param uOperator CPUID operation (eax).
663 * @returns ECX after cpuid operation.
664 */
665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
666DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
667#else
668DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
669{
670 RTCCUINTREG xCX;
671# if RT_INLINE_ASM_GNU_STYLE
672# ifdef RT_ARCH_AMD64
673 RTCCUINTREG uSpill;
674 __asm__ ("cpuid"
675 : "=a" (uSpill),
676 "=c" (xCX)
677 : "0" (uOperator)
678 : "rbx", "rdx");
679# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
680 __asm__ ("push %%ebx\n\t"
681 "cpuid\n\t"
682 "pop %%ebx\n\t"
683 : "=a" (uOperator),
684 "=c" (xCX)
685 : "0" (uOperator)
686 : "edx");
687# else
688 __asm__ ("cpuid"
689 : "=a" (uOperator),
690 "=c" (xCX)
691 : "0" (uOperator)
692 : "ebx", "edx");
693
694# endif
695
696# elif RT_INLINE_ASM_USES_INTRIN
697 int aInfo[4];
698 __cpuid(aInfo, uOperator);
699 xCX = aInfo[2];
700
701# else
702 __asm
703 {
704 push ebx
705 mov eax, [uOperator]
706 cpuid
707 mov [xCX], ecx
708 pop ebx
709 }
710# endif
711 return (uint32_t)xCX;
712}
713#endif
714
715
716/**
717 * Checks if the current CPU supports CPUID.
718 *
719 * @returns true if CPUID is supported.
720 */
721DECLINLINE(bool) ASMHasCpuId(void)
722{
723#ifdef RT_ARCH_AMD64
724 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
725#else /* !RT_ARCH_AMD64 */
726 bool fRet = false;
727# if RT_INLINE_ASM_GNU_STYLE
728 uint32_t u1;
729 uint32_t u2;
730 __asm__ ("pushf\n\t"
731 "pop %1\n\t"
732 "mov %1, %2\n\t"
733 "xorl $0x200000, %1\n\t"
734 "push %1\n\t"
735 "popf\n\t"
736 "pushf\n\t"
737 "pop %1\n\t"
738 "cmpl %1, %2\n\t"
739 "setne %0\n\t"
740 "push %2\n\t"
741 "popf\n\t"
742 : "=m" (fRet), "=r" (u1), "=r" (u2));
743# else
744 __asm
745 {
746 pushfd
747 pop eax
748 mov ebx, eax
749 xor eax, 0200000h
750 push eax
751 popfd
752 pushfd
753 pop eax
754 cmp eax, ebx
755 setne fRet
756 push ebx
757 popfd
758 }
759# endif
760 return fRet;
761#endif /* !RT_ARCH_AMD64 */
762}
763
764
765/**
766 * Gets the APIC ID of the current CPU.
767 *
768 * @returns the APIC ID.
769 */
770#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
771DECLASM(uint8_t) ASMGetApicId(void);
772#else
773DECLINLINE(uint8_t) ASMGetApicId(void)
774{
775 RTCCUINTREG xBX;
776# if RT_INLINE_ASM_GNU_STYLE
777# ifdef RT_ARCH_AMD64
778 RTCCUINTREG uSpill;
779 __asm__ ("cpuid"
780 : "=a" (uSpill),
781 "=b" (xBX)
782 : "0" (1)
783 : "rcx", "rdx");
784# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
785 RTCCUINTREG uSpill;
786 __asm__ ("mov %%ebx,%1\n\t"
787 "cpuid\n\t"
788 "xchgl %%ebx,%1\n\t"
789 : "=a" (uSpill),
790 "=r" (xBX)
791 : "0" (1)
792 : "ecx", "edx");
793# else
794 RTCCUINTREG uSpill;
795 __asm__ ("cpuid"
796 : "=a" (uSpill),
797 "=b" (xBX)
798 : "0" (1)
799 : "ecx", "edx");
800# endif
801
802# elif RT_INLINE_ASM_USES_INTRIN
803 int aInfo[4];
804 __cpuid(aInfo, 1);
805 xBX = aInfo[1];
806
807# else
808 __asm
809 {
810 push ebx
811 mov eax, 1
812 cpuid
813 mov [xBX], ebx
814 pop ebx
815 }
816# endif
817 return (uint8_t)(xBX >> 24);
818}
819#endif
820
821/**
822 * Get cr0.
823 * @returns cr0.
824 */
825#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
826DECLASM(RTCCUINTREG) ASMGetCR0(void);
827#else
828DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
829{
830 RTCCUINTREG uCR0;
831# if RT_INLINE_ASM_USES_INTRIN
832 uCR0 = __readcr0();
833
834# elif RT_INLINE_ASM_GNU_STYLE
835# ifdef RT_ARCH_AMD64
836 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
837# else
838 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
839# endif
840# else
841 __asm
842 {
843# ifdef RT_ARCH_AMD64
844 mov rax, cr0
845 mov [uCR0], rax
846# else
847 mov eax, cr0
848 mov [uCR0], eax
849# endif
850 }
851# endif
852 return uCR0;
853}
854#endif
855
856
857/**
858 * Sets the CR0 register.
859 * @param uCR0 The new CR0 value.
860 */
861#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
862DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
863#else
864DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
865{
866# if RT_INLINE_ASM_USES_INTRIN
867 __writecr0(uCR0);
868
869# elif RT_INLINE_ASM_GNU_STYLE
870# ifdef RT_ARCH_AMD64
871 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
872# else
873 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
874# endif
875# else
876 __asm
877 {
878# ifdef RT_ARCH_AMD64
879 mov rax, [uCR0]
880 mov cr0, rax
881# else
882 mov eax, [uCR0]
883 mov cr0, eax
884# endif
885 }
886# endif
887}
888#endif
889
890
891/**
892 * Get cr2.
893 * @returns cr2.
894 */
895#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
896DECLASM(RTCCUINTREG) ASMGetCR2(void);
897#else
898DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
899{
900 RTCCUINTREG uCR2;
901# if RT_INLINE_ASM_USES_INTRIN
902 uCR2 = __readcr2();
903
904# elif RT_INLINE_ASM_GNU_STYLE
905# ifdef RT_ARCH_AMD64
906 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
907# else
908 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
909# endif
910# else
911 __asm
912 {
913# ifdef RT_ARCH_AMD64
914 mov rax, cr2
915 mov [uCR2], rax
916# else
917 mov eax, cr2
918 mov [uCR2], eax
919# endif
920 }
921# endif
922 return uCR2;
923}
924#endif
925
926
927/**
928 * Sets the CR2 register.
929 * @param uCR2 The new CR0 value.
930 */
931#if RT_INLINE_ASM_EXTERNAL
932DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
933#else
934DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
935{
936# if RT_INLINE_ASM_GNU_STYLE
937# ifdef RT_ARCH_AMD64
938 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
939# else
940 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
941# endif
942# else
943 __asm
944 {
945# ifdef RT_ARCH_AMD64
946 mov rax, [uCR2]
947 mov cr2, rax
948# else
949 mov eax, [uCR2]
950 mov cr2, eax
951# endif
952 }
953# endif
954}
955#endif
956
957
958/**
959 * Get cr3.
960 * @returns cr3.
961 */
962#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
963DECLASM(RTCCUINTREG) ASMGetCR3(void);
964#else
965DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
966{
967 RTCCUINTREG uCR3;
968# if RT_INLINE_ASM_USES_INTRIN
969 uCR3 = __readcr3();
970
971# elif RT_INLINE_ASM_GNU_STYLE
972# ifdef RT_ARCH_AMD64
973 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
974# else
975 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
976# endif
977# else
978 __asm
979 {
980# ifdef RT_ARCH_AMD64
981 mov rax, cr3
982 mov [uCR3], rax
983# else
984 mov eax, cr3
985 mov [uCR3], eax
986# endif
987 }
988# endif
989 return uCR3;
990}
991#endif
992
993
994/**
995 * Sets the CR3 register.
996 *
997 * @param uCR3 New CR3 value.
998 */
999#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1000DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1001#else
1002DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1003{
1004# if RT_INLINE_ASM_USES_INTRIN
1005 __writecr3(uCR3);
1006
1007# elif RT_INLINE_ASM_GNU_STYLE
1008# ifdef RT_ARCH_AMD64
1009 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1010# else
1011 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1012# endif
1013# else
1014 __asm
1015 {
1016# ifdef RT_ARCH_AMD64
1017 mov rax, [uCR3]
1018 mov cr3, rax
1019# else
1020 mov eax, [uCR3]
1021 mov cr3, eax
1022# endif
1023 }
1024# endif
1025}
1026#endif
1027
1028
1029/**
1030 * Reloads the CR3 register.
1031 */
1032#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1033DECLASM(void) ASMReloadCR3(void);
1034#else
1035DECLINLINE(void) ASMReloadCR3(void)
1036{
1037# if RT_INLINE_ASM_USES_INTRIN
1038 __writecr3(__readcr3());
1039
1040# elif RT_INLINE_ASM_GNU_STYLE
1041 RTCCUINTREG u;
1042# ifdef RT_ARCH_AMD64
1043 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1044 "movq %0, %%cr3\n\t"
1045 : "=r" (u));
1046# else
1047 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1048 "movl %0, %%cr3\n\t"
1049 : "=r" (u));
1050# endif
1051# else
1052 __asm
1053 {
1054# ifdef RT_ARCH_AMD64
1055 mov rax, cr3
1056 mov cr3, rax
1057# else
1058 mov eax, cr3
1059 mov cr3, eax
1060# endif
1061 }
1062# endif
1063}
1064#endif
1065
1066
1067/**
1068 * Get cr4.
1069 * @returns cr4.
1070 */
1071#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1072DECLASM(RTCCUINTREG) ASMGetCR4(void);
1073#else
1074DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1075{
1076 RTCCUINTREG uCR4;
1077# if RT_INLINE_ASM_USES_INTRIN
1078 uCR4 = __readcr4();
1079
1080# elif RT_INLINE_ASM_GNU_STYLE
1081# ifdef RT_ARCH_AMD64
1082 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1083# else
1084 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1085# endif
1086# else
1087 __asm
1088 {
1089# ifdef RT_ARCH_AMD64
1090 mov rax, cr4
1091 mov [uCR4], rax
1092# else
1093 push eax /* just in case */
1094 /*mov eax, cr4*/
1095 _emit 0x0f
1096 _emit 0x20
1097 _emit 0xe0
1098 mov [uCR4], eax
1099 pop eax
1100# endif
1101 }
1102# endif
1103 return uCR4;
1104}
1105#endif
1106
1107
1108/**
1109 * Sets the CR4 register.
1110 *
1111 * @param uCR4 New CR4 value.
1112 */
1113#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1114DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1115#else
1116DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1117{
1118# if RT_INLINE_ASM_USES_INTRIN
1119 __writecr4(uCR4);
1120
1121# elif RT_INLINE_ASM_GNU_STYLE
1122# ifdef RT_ARCH_AMD64
1123 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1124# else
1125 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1126# endif
1127# else
1128 __asm
1129 {
1130# ifdef RT_ARCH_AMD64
1131 mov rax, [uCR4]
1132 mov cr4, rax
1133# else
1134 mov eax, [uCR4]
1135 _emit 0x0F
1136 _emit 0x22
1137 _emit 0xE0 /* mov cr4, eax */
1138# endif
1139 }
1140# endif
1141}
1142#endif
1143
1144
1145/**
1146 * Get cr8.
1147 * @returns cr8.
1148 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1149 */
1150#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1151DECLASM(RTCCUINTREG) ASMGetCR8(void);
1152#else
1153DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1154{
1155# ifdef RT_ARCH_AMD64
1156 RTCCUINTREG uCR8;
1157# if RT_INLINE_ASM_USES_INTRIN
1158 uCR8 = __readcr8();
1159
1160# elif RT_INLINE_ASM_GNU_STYLE
1161 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1162# else
1163 __asm
1164 {
1165 mov rax, cr8
1166 mov [uCR8], rax
1167 }
1168# endif
1169 return uCR8;
1170# else /* !RT_ARCH_AMD64 */
1171 return 0;
1172# endif /* !RT_ARCH_AMD64 */
1173}
1174#endif
1175
1176
1177/**
1178 * Enables interrupts (EFLAGS.IF).
1179 */
1180#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1181DECLASM(void) ASMIntEnable(void);
1182#else
1183DECLINLINE(void) ASMIntEnable(void)
1184{
1185# if RT_INLINE_ASM_GNU_STYLE
1186 __asm("sti\n");
1187# elif RT_INLINE_ASM_USES_INTRIN
1188 _enable();
1189# else
1190 __asm sti
1191# endif
1192}
1193#endif
1194
1195
1196/**
1197 * Disables interrupts (!EFLAGS.IF).
1198 */
1199#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1200DECLASM(void) ASMIntDisable(void);
1201#else
1202DECLINLINE(void) ASMIntDisable(void)
1203{
1204# if RT_INLINE_ASM_GNU_STYLE
1205 __asm("cli\n");
1206# elif RT_INLINE_ASM_USES_INTRIN
1207 _disable();
1208# else
1209 __asm cli
1210# endif
1211}
1212#endif
1213
1214
1215/**
1216 * Disables interrupts and returns previous xFLAGS.
1217 */
1218#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1219DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1220#else
1221DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1222{
1223 RTCCUINTREG xFlags;
1224# if RT_INLINE_ASM_GNU_STYLE
1225# ifdef RT_ARCH_AMD64
1226 __asm__ __volatile__("pushfq\n\t"
1227 "cli\n\t"
1228 "popq %0\n\t"
1229 : "=m" (xFlags));
1230# else
1231 __asm__ __volatile__("pushfl\n\t"
1232 "cli\n\t"
1233 "popl %0\n\t"
1234 : "=m" (xFlags));
1235# endif
1236# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1237 xFlags = ASMGetFlags();
1238 _disable();
1239# else
1240 __asm {
1241 pushfd
1242 cli
1243 pop [xFlags]
1244 }
1245# endif
1246 return xFlags;
1247}
1248#endif
1249
1250
1251/**
1252 * Reads a machine specific register.
1253 *
1254 * @returns Register content.
1255 * @param uRegister Register to read.
1256 */
1257#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1258DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1259#else
1260DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1261{
1262 RTUINT64U u;
1263# if RT_INLINE_ASM_GNU_STYLE
1264 __asm__ ("rdmsr\n\t"
1265 : "=a" (u.s.Lo),
1266 "=d" (u.s.Hi)
1267 : "c" (uRegister));
1268
1269# elif RT_INLINE_ASM_USES_INTRIN
1270 u.u = __readmsr(uRegister);
1271
1272# else
1273 __asm
1274 {
1275 mov ecx, [uRegister]
1276 rdmsr
1277 mov [u.s.Lo], eax
1278 mov [u.s.Hi], edx
1279 }
1280# endif
1281
1282 return u.u;
1283}
1284#endif
1285
1286
1287/**
1288 * Writes a machine specific register.
1289 *
1290 * @returns Register content.
1291 * @param uRegister Register to write to.
1292 * @param u64Val Value to write.
1293 */
1294#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1295DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1296#else
1297DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1298{
1299 RTUINT64U u;
1300
1301 u.u = u64Val;
1302# if RT_INLINE_ASM_GNU_STYLE
1303 __asm__ __volatile__("wrmsr\n\t"
1304 ::"a" (u.s.Lo),
1305 "d" (u.s.Hi),
1306 "c" (uRegister));
1307
1308# elif RT_INLINE_ASM_USES_INTRIN
1309 __writemsr(uRegister, u.u);
1310
1311# else
1312 __asm
1313 {
1314 mov ecx, [uRegister]
1315 mov edx, [u.s.Hi]
1316 mov eax, [u.s.Lo]
1317 wrmsr
1318 }
1319# endif
1320}
1321#endif
1322
1323
1324/**
1325 * Reads low part of a machine specific register.
1326 *
1327 * @returns Register content.
1328 * @param uRegister Register to read.
1329 */
1330#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1331DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1332#else
1333DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1334{
1335 uint32_t u32;
1336# if RT_INLINE_ASM_GNU_STYLE
1337 __asm__ ("rdmsr\n\t"
1338 : "=a" (u32)
1339 : "c" (uRegister)
1340 : "edx");
1341
1342# elif RT_INLINE_ASM_USES_INTRIN
1343 u32 = (uint32_t)__readmsr(uRegister);
1344
1345#else
1346 __asm
1347 {
1348 mov ecx, [uRegister]
1349 rdmsr
1350 mov [u32], eax
1351 }
1352# endif
1353
1354 return u32;
1355}
1356#endif
1357
1358
1359/**
1360 * Reads high part of a machine specific register.
1361 *
1362 * @returns Register content.
1363 * @param uRegister Register to read.
1364 */
1365#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1366DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1367#else
1368DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1369{
1370 uint32_t u32;
1371# if RT_INLINE_ASM_GNU_STYLE
1372 __asm__ ("rdmsr\n\t"
1373 : "=d" (u32)
1374 : "c" (uRegister)
1375 : "eax");
1376
1377# elif RT_INLINE_ASM_USES_INTRIN
1378 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1379
1380# else
1381 __asm
1382 {
1383 mov ecx, [uRegister]
1384 rdmsr
1385 mov [u32], edx
1386 }
1387# endif
1388
1389 return u32;
1390}
1391#endif
1392
1393
1394/**
1395 * Gets dr7.
1396 *
1397 * @returns dr7.
1398 */
1399#if RT_INLINE_ASM_EXTERNAL
1400DECLASM(RTCCUINTREG) ASMGetDR7(void);
1401#else
1402DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1403{
1404 RTCCUINTREG uDR7;
1405# if RT_INLINE_ASM_GNU_STYLE
1406# ifdef RT_ARCH_AMD64
1407 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1408# else
1409 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1410# endif
1411# else
1412 __asm
1413 {
1414# ifdef RT_ARCH_AMD64
1415 mov rax, dr7
1416 mov [uDR7], rax
1417# else
1418 mov eax, dr7
1419 mov [uDR7], eax
1420# endif
1421 }
1422# endif
1423 return uDR7;
1424}
1425#endif
1426
1427
1428/**
1429 * Gets dr6.
1430 *
1431 * @returns dr6.
1432 */
1433#if RT_INLINE_ASM_EXTERNAL
1434DECLASM(RTCCUINTREG) ASMGetDR6(void);
1435#else
1436DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1437{
1438 RTCCUINTREG uDR6;
1439# if RT_INLINE_ASM_GNU_STYLE
1440# ifdef RT_ARCH_AMD64
1441 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1442# else
1443 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1444# endif
1445# else
1446 __asm
1447 {
1448# ifdef RT_ARCH_AMD64
1449 mov rax, dr6
1450 mov [uDR6], rax
1451# else
1452 mov eax, dr6
1453 mov [uDR6], eax
1454# endif
1455 }
1456# endif
1457 return uDR6;
1458}
1459#endif
1460
1461
1462/**
1463 * Reads and clears DR6.
1464 *
1465 * @returns DR6.
1466 */
1467#if RT_INLINE_ASM_EXTERNAL
1468DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1469#else
1470DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1471{
1472 RTCCUINTREG uDR6;
1473# if RT_INLINE_ASM_GNU_STYLE
1474 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1475# ifdef RT_ARCH_AMD64
1476 __asm__ ("movq %%dr6, %0\n\t"
1477 "movq %1, %%dr6\n\t"
1478 : "=r" (uDR6)
1479 : "r" (uNewValue));
1480# else
1481 __asm__ ("movl %%dr6, %0\n\t"
1482 "movl %1, %%dr6\n\t"
1483 : "=r" (uDR6)
1484 : "r" (uNewValue));
1485# endif
1486# else
1487 __asm
1488 {
1489# ifdef RT_ARCH_AMD64
1490 mov rax, dr6
1491 mov [uDR6], rax
1492 mov rcx, rax
1493 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1494 mov dr6, rcx
1495# else
1496 mov eax, dr6
1497 mov [uDR6], eax
1498 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1499 mov dr6, ecx
1500# endif
1501 }
1502# endif
1503 return uDR6;
1504}
1505#endif
1506
1507
1508/**
1509 * Compiler memory barrier.
1510 *
1511 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1512 * values or any outstanding writes when returning from this function.
1513 *
1514 * This function must be used if non-volatile data is modified by a
1515 * device or the VMM. Typical cases are port access, MMIO access,
1516 * trapping instruction, etc.
1517 */
1518#if RT_INLINE_ASM_GNU_STYLE
1519# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1520#elif RT_INLINE_ASM_USES_INTRIN
1521# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1522#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1523DECLINLINE(void) ASMCompilerBarrier(void)
1524{
1525 __asm
1526 {
1527 }
1528}
1529#endif
1530
1531
1532/**
1533 * Writes a 8-bit unsigned integer to an I/O port.
1534 *
1535 * @param Port I/O port to read from.
1536 * @param u8 8-bit integer to write.
1537 */
1538#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1539DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1540#else
1541DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1542{
1543# if RT_INLINE_ASM_GNU_STYLE
1544 __asm__ __volatile__("outb %b1, %w0\n\t"
1545 :: "Nd" (Port),
1546 "a" (u8));
1547
1548# elif RT_INLINE_ASM_USES_INTRIN
1549 __outbyte(Port, u8);
1550
1551# else
1552 __asm
1553 {
1554 mov dx, [Port]
1555 mov al, [u8]
1556 out dx, al
1557 }
1558# endif
1559}
1560#endif
1561
1562
1563/**
1564 * Gets a 8-bit unsigned integer from an I/O port.
1565 *
1566 * @returns 8-bit integer.
1567 * @param Port I/O port to read from.
1568 */
1569#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1570DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1571#else
1572DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1573{
1574 uint8_t u8;
1575# if RT_INLINE_ASM_GNU_STYLE
1576 __asm__ __volatile__("inb %w1, %b0\n\t"
1577 : "=a" (u8)
1578 : "Nd" (Port));
1579
1580# elif RT_INLINE_ASM_USES_INTRIN
1581 u8 = __inbyte(Port);
1582
1583# else
1584 __asm
1585 {
1586 mov dx, [Port]
1587 in al, dx
1588 mov [u8], al
1589 }
1590# endif
1591 return u8;
1592}
1593#endif
1594
1595
1596/**
1597 * Writes a 16-bit unsigned integer to an I/O port.
1598 *
1599 * @param Port I/O port to read from.
1600 * @param u16 16-bit integer to write.
1601 */
1602#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1603DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1604#else
1605DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1606{
1607# if RT_INLINE_ASM_GNU_STYLE
1608 __asm__ __volatile__("outw %w1, %w0\n\t"
1609 :: "Nd" (Port),
1610 "a" (u16));
1611
1612# elif RT_INLINE_ASM_USES_INTRIN
1613 __outword(Port, u16);
1614
1615# else
1616 __asm
1617 {
1618 mov dx, [Port]
1619 mov ax, [u16]
1620 out dx, ax
1621 }
1622# endif
1623}
1624#endif
1625
1626
1627/**
1628 * Gets a 16-bit unsigned integer from an I/O port.
1629 *
1630 * @returns 16-bit integer.
1631 * @param Port I/O port to read from.
1632 */
1633#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1634DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1635#else
1636DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1637{
1638 uint16_t u16;
1639# if RT_INLINE_ASM_GNU_STYLE
1640 __asm__ __volatile__("inw %w1, %w0\n\t"
1641 : "=a" (u16)
1642 : "Nd" (Port));
1643
1644# elif RT_INLINE_ASM_USES_INTRIN
1645 u16 = __inword(Port);
1646
1647# else
1648 __asm
1649 {
1650 mov dx, [Port]
1651 in ax, dx
1652 mov [u16], ax
1653 }
1654# endif
1655 return u16;
1656}
1657#endif
1658
1659
1660/**
1661 * Writes a 32-bit unsigned integer to an I/O port.
1662 *
1663 * @param Port I/O port to read from.
1664 * @param u32 32-bit integer to write.
1665 */
1666#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1667DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1668#else
1669DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1670{
1671# if RT_INLINE_ASM_GNU_STYLE
1672 __asm__ __volatile__("outl %1, %w0\n\t"
1673 :: "Nd" (Port),
1674 "a" (u32));
1675
1676# elif RT_INLINE_ASM_USES_INTRIN
1677 __outdword(Port, u32);
1678
1679# else
1680 __asm
1681 {
1682 mov dx, [Port]
1683 mov eax, [u32]
1684 out dx, eax
1685 }
1686# endif
1687}
1688#endif
1689
1690
1691/**
1692 * Gets a 32-bit unsigned integer from an I/O port.
1693 *
1694 * @returns 32-bit integer.
1695 * @param Port I/O port to read from.
1696 */
1697#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1698DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1699#else
1700DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1701{
1702 uint32_t u32;
1703# if RT_INLINE_ASM_GNU_STYLE
1704 __asm__ __volatile__("inl %w1, %0\n\t"
1705 : "=a" (u32)
1706 : "Nd" (Port));
1707
1708# elif RT_INLINE_ASM_USES_INTRIN
1709 u32 = __indword(Port);
1710
1711# else
1712 __asm
1713 {
1714 mov dx, [Port]
1715 in eax, dx
1716 mov [u32], eax
1717 }
1718# endif
1719 return u32;
1720}
1721#endif
1722
1723
1724/**
1725 * Atomically Exchange an unsigned 8-bit value.
1726 *
1727 * @returns Current *pu8 value
1728 * @param pu8 Pointer to the 8-bit variable to update.
1729 * @param u8 The 8-bit value to assign to *pu8.
1730 */
1731#if RT_INLINE_ASM_EXTERNAL
1732DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1733#else
1734DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1735{
1736# if RT_INLINE_ASM_GNU_STYLE
1737 __asm__ __volatile__("xchgb %0, %1\n\t"
1738 : "=m" (*pu8),
1739 "=r" (u8)
1740 : "1" (u8));
1741# else
1742 __asm
1743 {
1744# ifdef RT_ARCH_AMD64
1745 mov rdx, [pu8]
1746 mov al, [u8]
1747 xchg [rdx], al
1748 mov [u8], al
1749# else
1750 mov edx, [pu8]
1751 mov al, [u8]
1752 xchg [edx], al
1753 mov [u8], al
1754# endif
1755 }
1756# endif
1757 return u8;
1758}
1759#endif
1760
1761
1762/**
1763 * Atomically Exchange a signed 8-bit value.
1764 *
1765 * @returns Current *pu8 value
1766 * @param pi8 Pointer to the 8-bit variable to update.
1767 * @param i8 The 8-bit value to assign to *pi8.
1768 */
1769DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1770{
1771 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1772}
1773
1774
1775/**
1776 * Atomically Exchange a bool value.
1777 *
1778 * @returns Current *pf value
1779 * @param pf Pointer to the 8-bit variable to update.
1780 * @param f The 8-bit value to assign to *pi8.
1781 */
1782DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1783{
1784#ifdef _MSC_VER
1785 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1786#else
1787 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1788#endif
1789}
1790
1791
1792/**
1793 * Atomically Exchange an unsigned 16-bit value.
1794 *
1795 * @returns Current *pu16 value
1796 * @param pu16 Pointer to the 16-bit variable to update.
1797 * @param u16 The 16-bit value to assign to *pu16.
1798 */
1799#if RT_INLINE_ASM_EXTERNAL
1800DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1801#else
1802DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1803{
1804# if RT_INLINE_ASM_GNU_STYLE
1805 __asm__ __volatile__("xchgw %0, %1\n\t"
1806 : "=m" (*pu16),
1807 "=r" (u16)
1808 : "1" (u16));
1809# else
1810 __asm
1811 {
1812# ifdef RT_ARCH_AMD64
1813 mov rdx, [pu16]
1814 mov ax, [u16]
1815 xchg [rdx], ax
1816 mov [u16], ax
1817# else
1818 mov edx, [pu16]
1819 mov ax, [u16]
1820 xchg [edx], ax
1821 mov [u16], ax
1822# endif
1823 }
1824# endif
1825 return u16;
1826}
1827#endif
1828
1829
1830/**
1831 * Atomically Exchange a signed 16-bit value.
1832 *
1833 * @returns Current *pu16 value
1834 * @param pi16 Pointer to the 16-bit variable to update.
1835 * @param i16 The 16-bit value to assign to *pi16.
1836 */
1837DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1838{
1839 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1840}
1841
1842
1843/**
1844 * Atomically Exchange an unsigned 32-bit value.
1845 *
1846 * @returns Current *pu32 value
1847 * @param pu32 Pointer to the 32-bit variable to update.
1848 * @param u32 The 32-bit value to assign to *pu32.
1849 */
1850#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1851DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1852#else
1853DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1854{
1855# if RT_INLINE_ASM_GNU_STYLE
1856 __asm__ __volatile__("xchgl %0, %1\n\t"
1857 : "=m" (*pu32),
1858 "=r" (u32)
1859 : "1" (u32));
1860
1861# elif RT_INLINE_ASM_USES_INTRIN
1862 u32 = _InterlockedExchange((long *)pu32, u32);
1863
1864# else
1865 __asm
1866 {
1867# ifdef RT_ARCH_AMD64
1868 mov rdx, [pu32]
1869 mov eax, u32
1870 xchg [rdx], eax
1871 mov [u32], eax
1872# else
1873 mov edx, [pu32]
1874 mov eax, u32
1875 xchg [edx], eax
1876 mov [u32], eax
1877# endif
1878 }
1879# endif
1880 return u32;
1881}
1882#endif
1883
1884
1885/**
1886 * Atomically Exchange a signed 32-bit value.
1887 *
1888 * @returns Current *pu32 value
1889 * @param pi32 Pointer to the 32-bit variable to update.
1890 * @param i32 The 32-bit value to assign to *pi32.
1891 */
1892DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1893{
1894 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1895}
1896
1897
1898/**
1899 * Atomically Exchange an unsigned 64-bit value.
1900 *
1901 * @returns Current *pu64 value
1902 * @param pu64 Pointer to the 64-bit variable to update.
1903 * @param u64 The 64-bit value to assign to *pu64.
1904 */
1905#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1906DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1907#else
1908DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1909{
1910# if defined(RT_ARCH_AMD64)
1911# if RT_INLINE_ASM_USES_INTRIN
1912 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1913
1914# elif RT_INLINE_ASM_GNU_STYLE
1915 __asm__ __volatile__("xchgq %0, %1\n\t"
1916 : "=m" (*pu64),
1917 "=r" (u64)
1918 : "1" (u64));
1919# else
1920 __asm
1921 {
1922 mov rdx, [pu64]
1923 mov rax, [u64]
1924 xchg [rdx], rax
1925 mov [u64], rax
1926 }
1927# endif
1928# else /* !RT_ARCH_AMD64 */
1929# if RT_INLINE_ASM_GNU_STYLE
1930# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
1931 uint32_t u32 = (uint32_t)u64;
1932 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1933 "xchgl %%ebx, %3\n\t"
1934 "1:\n\t"
1935 "lock; cmpxchg8b (%5)\n\t"
1936 "jnz 1b\n\t"
1937 "xchgl %%ebx, %3\n\t"
1938 /*"xchgl %%esi, %5\n\t"*/
1939 : "=A" (u64),
1940 "=m" (*pu64)
1941 : "0" (*pu64),
1942 "m" ( u32 ),
1943 "c" ( (uint32_t)(u64 >> 32) ),
1944 "S" (pu64) );
1945# else /* !PIC */
1946 __asm__ __volatile__("1:\n\t"
1947 "lock; cmpxchg8b %1\n\t"
1948 "jnz 1b\n\t"
1949 : "=A" (u64),
1950 "=m" (*pu64)
1951 : "0" (*pu64),
1952 "b" ( (uint32_t)u64 ),
1953 "c" ( (uint32_t)(u64 >> 32) ));
1954# endif
1955# else
1956 __asm
1957 {
1958 mov ebx, dword ptr [u64]
1959 mov ecx, dword ptr [u64 + 4]
1960 mov edi, pu64
1961 mov eax, dword ptr [edi]
1962 mov edx, dword ptr [edi + 4]
1963 retry:
1964 lock cmpxchg8b [edi]
1965 jnz retry
1966 mov dword ptr [u64], eax
1967 mov dword ptr [u64 + 4], edx
1968 }
1969# endif
1970# endif /* !RT_ARCH_AMD64 */
1971 return u64;
1972}
1973#endif
1974
1975
1976/**
1977 * Atomically Exchange an signed 64-bit value.
1978 *
1979 * @returns Current *pi64 value
1980 * @param pi64 Pointer to the 64-bit variable to update.
1981 * @param i64 The 64-bit value to assign to *pi64.
1982 */
1983DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1984{
1985 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1986}
1987
1988
1989#ifdef RT_ARCH_AMD64
1990/**
1991 * Atomically Exchange an unsigned 128-bit value.
1992 *
1993 * @returns Current *pu128.
1994 * @param pu128 Pointer to the 128-bit variable to update.
1995 * @param u128 The 128-bit value to assign to *pu128.
1996 *
1997 * @remark We cannot really assume that any hardware supports this. Nor do I have
1998 * GAS support for it. So, for the time being we'll BREAK the atomic
1999 * bit of this function and use two 64-bit exchanges instead.
2000 */
2001# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2002DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2003# else
2004DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2005{
2006 if (true)/*ASMCpuId_ECX(1) & BIT(13))*/
2007 {
2008 /** @todo this is clumsy code */
2009 RTUINT128U u128Ret;
2010 u128Ret.u = u128;
2011 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2012 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2013 return u128Ret.u;
2014 }
2015#if 0 /* later? */
2016 else
2017 {
2018# if RT_INLINE_ASM_GNU_STYLE
2019 __asm__ __volatile__("1:\n\t"
2020 "lock; cmpxchg8b %1\n\t"
2021 "jnz 1b\n\t"
2022 : "=A" (u128),
2023 "=m" (*pu128)
2024 : "0" (*pu128),
2025 "b" ( (uint64_t)u128 ),
2026 "c" ( (uint64_t)(u128 >> 64) ));
2027# else
2028 __asm
2029 {
2030 mov rbx, dword ptr [u128]
2031 mov rcx, dword ptr [u128 + 4]
2032 mov rdi, pu128
2033 mov rax, dword ptr [rdi]
2034 mov rdx, dword ptr [rdi + 4]
2035 retry:
2036 lock cmpxchg16b [rdi]
2037 jnz retry
2038 mov dword ptr [u128], rax
2039 mov dword ptr [u128 + 4], rdx
2040 }
2041# endif
2042 }
2043 return u128;
2044#endif
2045}
2046# endif
2047#endif /* RT_ARCH_AMD64 */
2048
2049
2050/**
2051 * Atomically Reads a unsigned 64-bit value.
2052 *
2053 * @returns Current *pu64 value
2054 * @param pu64 Pointer to the 64-bit variable to read.
2055 * The memory pointed to must be writable.
2056 * @remark This will fault if the memory is read-only!
2057 */
2058#if RT_INLINE_ASM_EXTERNAL
2059DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2060#else
2061DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2062{
2063 uint64_t u64;
2064# ifdef RT_ARCH_AMD64
2065# if RT_INLINE_ASM_GNU_STYLE
2066 __asm__ __volatile__("movq %1, %0\n\t"
2067 : "=r" (u64)
2068 : "m" (*pu64));
2069# else
2070 __asm
2071 {
2072 mov rdx, [pu64]
2073 mov rax, [rdx]
2074 mov [u64], rax
2075 }
2076# endif
2077# else /* !RT_ARCH_AMD64 */
2078# if RT_INLINE_ASM_GNU_STYLE
2079# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2080 uint32_t u32EBX = 0;
2081 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2082 "lock; cmpxchg8b (%5)\n\t"
2083 "xchgl %%ebx, %3\n\t"
2084 : "=A" (u64),
2085 "=m" (*pu64)
2086 : "0" (0),
2087 "m" (u32EBX),
2088 "c" (0),
2089 "S" (pu64));
2090# else /* !PIC */
2091 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2092 : "=A" (u64),
2093 "=m" (*pu64)
2094 : "0" (0),
2095 "b" (0),
2096 "c" (0));
2097# endif
2098# else
2099 __asm
2100 {
2101 xor eax, eax
2102 xor edx, edx
2103 mov edi, pu64
2104 xor ecx, ecx
2105 xor ebx, ebx
2106 lock cmpxchg8b [edi]
2107 mov dword ptr [u64], eax
2108 mov dword ptr [u64 + 4], edx
2109 }
2110# endif
2111# endif /* !RT_ARCH_AMD64 */
2112 return u64;
2113}
2114#endif
2115
2116
2117/**
2118 * Atomically Reads a signed 64-bit value.
2119 *
2120 * @returns Current *pi64 value
2121 * @param pi64 Pointer to the 64-bit variable to read.
2122 * The memory pointed to must be writable.
2123 * @remark This will fault if the memory is read-only!
2124 */
2125DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2126{
2127 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2128}
2129
2130
2131/**
2132 * Atomically Exchange a value which size might differ
2133 * between platforms or compilers.
2134 *
2135 * @param pu Pointer to the variable to update.
2136 * @param uNew The value to assign to *pu.
2137 */
2138#define ASMAtomicXchgSize(pu, uNew) \
2139 do { \
2140 switch (sizeof(*(pu))) { \
2141 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2142 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2143 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2144 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2145 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2146 } \
2147 } while (0)
2148
2149
2150/**
2151 * Atomically Exchange a pointer value.
2152 *
2153 * @returns Current *ppv value
2154 * @param ppv Pointer to the pointer variable to update.
2155 * @param pv The pointer value to assign to *ppv.
2156 */
2157DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2158{
2159#if ARCH_BITS == 32
2160 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2161#elif ARCH_BITS == 64
2162 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2163#else
2164# error "ARCH_BITS is bogus"
2165#endif
2166}
2167
2168
2169/**
2170 * Atomically Compare and Exchange an unsigned 32-bit value.
2171 *
2172 * @returns true if xchg was done.
2173 * @returns false if xchg wasn't done.
2174 *
2175 * @param pu32 Pointer to the value to update.
2176 * @param u32New The new value to assigned to *pu32.
2177 * @param u32Old The old value to *pu32 compare with.
2178 */
2179#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2180DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2181#else
2182DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2183{
2184# if RT_INLINE_ASM_GNU_STYLE
2185 uint32_t u32Ret;
2186 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2187 "setz %%al\n\t"
2188 "movzx %%al, %%eax\n\t"
2189 : "=m" (*pu32),
2190 "=a" (u32Ret)
2191 : "r" (u32New),
2192 "1" (u32Old));
2193 return (bool)u32Ret;
2194
2195# elif RT_INLINE_ASM_USES_INTRIN
2196 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2197
2198# else
2199 uint32_t u32Ret;
2200 __asm
2201 {
2202# ifdef RT_ARCH_AMD64
2203 mov rdx, [pu32]
2204# else
2205 mov edx, [pu32]
2206# endif
2207 mov eax, [u32Old]
2208 mov ecx, [u32New]
2209# ifdef RT_ARCH_AMD64
2210 lock cmpxchg [rdx], ecx
2211# else
2212 lock cmpxchg [edx], ecx
2213# endif
2214 setz al
2215 movzx eax, al
2216 mov [u32Ret], eax
2217 }
2218 return !!u32Ret;
2219# endif
2220}
2221#endif
2222
2223
2224/**
2225 * Atomically Compare and Exchange a signed 32-bit value.
2226 *
2227 * @returns true if xchg was done.
2228 * @returns false if xchg wasn't done.
2229 *
2230 * @param pi32 Pointer to the value to update.
2231 * @param i32New The new value to assigned to *pi32.
2232 * @param i32Old The old value to *pi32 compare with.
2233 */
2234DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2235{
2236 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2237}
2238
2239
2240/**
2241 * Atomically Compare and exchange an unsigned 64-bit value.
2242 *
2243 * @returns true if xchg was done.
2244 * @returns false if xchg wasn't done.
2245 *
2246 * @param pu64 Pointer to the 64-bit variable to update.
2247 * @param u64New The 64-bit value to assign to *pu64.
2248 * @param u64Old The value to compare with.
2249 */
2250#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2251DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2252#else
2253DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2254{
2255# if RT_INLINE_ASM_USES_INTRIN
2256 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2257
2258# elif defined(RT_ARCH_AMD64)
2259# if RT_INLINE_ASM_GNU_STYLE
2260 uint64_t u64Ret;
2261 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2262 "setz %%al\n\t"
2263 "movzx %%al, %%eax\n\t"
2264 : "=m" (*pu64),
2265 "=a" (u64Ret)
2266 : "r" (u64New),
2267 "1" (u64Old));
2268 return (bool)u64Ret;
2269# else
2270 bool fRet;
2271 __asm
2272 {
2273 mov rdx, [pu32]
2274 mov rax, [u64Old]
2275 mov rcx, [u64New]
2276 lock cmpxchg [rdx], rcx
2277 setz al
2278 mov [fRet], al
2279 }
2280 return fRet;
2281# endif
2282# else /* !RT_ARCH_AMD64 */
2283 uint32_t u32Ret;
2284# if RT_INLINE_ASM_GNU_STYLE
2285# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2286 uint32_t u32 = (uint32_t)u64New;
2287 uint32_t u32Spill;
2288 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2289 "lock; cmpxchg8b (%6)\n\t"
2290 "setz %%al\n\t"
2291 "xchgl %%ebx, %4\n\t"
2292 "movzx %%al, %%eax\n\t"
2293 : "=a" (u32Ret),
2294 "=d" (u32Spill),
2295 "=m" (*pu64)
2296 : "A" (u64Old),
2297 "m" ( u32 ),
2298 "c" ( (uint32_t)(u64New >> 32) ),
2299 "S" (pu64) );
2300# else /* !PIC */
2301 uint32_t u32Spill;
2302 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2303 "setz %%al\n\t"
2304 "movzx %%al, %%eax\n\t"
2305 : "=a" (u32Ret),
2306 "=d" (u32Spill),
2307 "=m" (*pu64)
2308 : "A" (u64Old),
2309 "b" ( (uint32_t)u64New ),
2310 "c" ( (uint32_t)(u64New >> 32) ));
2311# endif
2312 return (bool)u32Ret;
2313# else
2314 __asm
2315 {
2316 mov ebx, dword ptr [u64New]
2317 mov ecx, dword ptr [u64New + 4]
2318 mov edi, [pu64]
2319 mov eax, dword ptr [u64Old]
2320 mov edx, dword ptr [u64Old + 4]
2321 lock cmpxchg8b [edi]
2322 setz al
2323 movzx eax, al
2324 mov dword ptr [u32Ret], eax
2325 }
2326 return !!u32Ret;
2327# endif
2328# endif /* !RT_ARCH_AMD64 */
2329}
2330#endif
2331
2332
2333/**
2334 * Atomically Compare and exchange a signed 64-bit value.
2335 *
2336 * @returns true if xchg was done.
2337 * @returns false if xchg wasn't done.
2338 *
2339 * @param pi64 Pointer to the 64-bit variable to update.
2340 * @param i64 The 64-bit value to assign to *pu64.
2341 * @param i64Old The value to compare with.
2342 */
2343DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2344{
2345 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2346}
2347
2348
2349
2350/** @def ASMAtomicCmpXchgSize
2351 * Atomically Compare and Exchange a value which size might differ
2352 * between platforms or compilers.
2353 *
2354 * @param pu Pointer to the value to update.
2355 * @param uNew The new value to assigned to *pu.
2356 * @param uOld The old value to *pu compare with.
2357 * @param fRc Where to store the result.
2358 */
2359#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2360 do { \
2361 switch (sizeof(*(pu))) { \
2362 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2363 break; \
2364 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2365 break; \
2366 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2367 (fRc) = false; \
2368 break; \
2369 } \
2370 } while (0)
2371
2372
2373/**
2374 * Atomically Compare and Exchange a pointer value.
2375 *
2376 * @returns true if xchg was done.
2377 * @returns false if xchg wasn't done.
2378 *
2379 * @param ppv Pointer to the value to update.
2380 * @param pvNew The new value to assigned to *ppv.
2381 * @param pvOld The old value to *ppv compare with.
2382 */
2383DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2384{
2385#if ARCH_BITS == 32
2386 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2387#elif ARCH_BITS == 64
2388 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2389#else
2390# error "ARCH_BITS is bogus"
2391#endif
2392}
2393
2394
2395/**
2396 * Atomically increment a 32-bit value.
2397 *
2398 * @returns The new value.
2399 * @param pu32 Pointer to the value to increment.
2400 */
2401#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2402DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2403#else
2404DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2405{
2406 uint32_t u32;
2407# if RT_INLINE_ASM_USES_INTRIN
2408 u32 = _InterlockedIncrement((long *)pu32);
2409
2410# elif RT_INLINE_ASM_GNU_STYLE
2411 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2412 "incl %0\n\t"
2413 : "=r" (u32),
2414 "=m" (*pu32)
2415 : "0" (1)
2416 : "memory");
2417# else
2418 __asm
2419 {
2420 mov eax, 1
2421# ifdef RT_ARCH_AMD64
2422 mov rdx, [pu32]
2423 lock xadd [rdx], eax
2424# else
2425 mov edx, [pu32]
2426 lock xadd [edx], eax
2427# endif
2428 inc eax
2429 mov u32, eax
2430 }
2431# endif
2432 return u32;
2433}
2434#endif
2435
2436
2437/**
2438 * Atomically increment a signed 32-bit value.
2439 *
2440 * @returns The new value.
2441 * @param pi32 Pointer to the value to increment.
2442 */
2443DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2444{
2445 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2446}
2447
2448
2449/**
2450 * Atomically decrement an unsigned 32-bit value.
2451 *
2452 * @returns The new value.
2453 * @param pu32 Pointer to the value to decrement.
2454 */
2455#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2456DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2457#else
2458DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2459{
2460 uint32_t u32;
2461# if RT_INLINE_ASM_USES_INTRIN
2462 u32 = _InterlockedDecrement((long *)pu32);
2463
2464# elif RT_INLINE_ASM_GNU_STYLE
2465 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2466 "decl %0\n\t"
2467 : "=r" (u32),
2468 "=m" (*pu32)
2469 : "0" (-1)
2470 : "memory");
2471# else
2472 __asm
2473 {
2474 mov eax, -1
2475# ifdef RT_ARCH_AMD64
2476 mov rdx, [pu32]
2477 lock xadd [rdx], eax
2478# else
2479 mov edx, [pu32]
2480 lock xadd [edx], eax
2481# endif
2482 dec eax
2483 mov u32, eax
2484 }
2485# endif
2486 return u32;
2487}
2488#endif
2489
2490
2491/**
2492 * Atomically decrement a signed 32-bit value.
2493 *
2494 * @returns The new value.
2495 * @param pi32 Pointer to the value to decrement.
2496 */
2497DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2498{
2499 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2500}
2501
2502
2503/**
2504 * Atomically Or an unsigned 32-bit value.
2505 *
2506 * @param pu32 Pointer to the pointer variable to OR u32 with.
2507 * @param u32 The value to OR *pu32 with.
2508 */
2509#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2510DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2511#else
2512DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2513{
2514# if RT_INLINE_ASM_USES_INTRIN
2515 _InterlockedOr((long volatile *)pu32, (long)u32);
2516
2517# elif RT_INLINE_ASM_GNU_STYLE
2518 __asm__ __volatile__("lock; orl %1, %0\n\t"
2519 : "=m" (*pu32)
2520 : "r" (u32));
2521# else
2522 __asm
2523 {
2524 mov eax, [u32]
2525# ifdef RT_ARCH_AMD64
2526 mov rdx, [pu32]
2527 lock or [rdx], eax
2528# else
2529 mov edx, [pu32]
2530 lock or [edx], eax
2531# endif
2532 }
2533# endif
2534}
2535#endif
2536
2537
2538/**
2539 * Atomically Or a signed 32-bit value.
2540 *
2541 * @param pi32 Pointer to the pointer variable to OR u32 with.
2542 * @param i32 The value to OR *pu32 with.
2543 */
2544DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2545{
2546 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2547}
2548
2549
2550/**
2551 * Atomically And an unsigned 32-bit value.
2552 *
2553 * @param pu32 Pointer to the pointer variable to AND u32 with.
2554 * @param u32 The value to AND *pu32 with.
2555 */
2556#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2557DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2558#else
2559DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2560{
2561# if RT_INLINE_ASM_USES_INTRIN
2562 _InterlockedAnd((long volatile *)pu32, u32);
2563
2564# elif RT_INLINE_ASM_GNU_STYLE
2565 __asm__ __volatile__("lock; andl %1, %0\n\t"
2566 : "=m" (*pu32)
2567 : "r" (u32));
2568# else
2569 __asm
2570 {
2571 mov eax, [u32]
2572# ifdef RT_ARCH_AMD64
2573 mov rdx, [pu32]
2574 lock and [rdx], eax
2575# else
2576 mov edx, [pu32]
2577 lock and [edx], eax
2578# endif
2579 }
2580# endif
2581}
2582#endif
2583
2584
2585/**
2586 * Atomically And a signed 32-bit value.
2587 *
2588 * @param pi32 Pointer to the pointer variable to AND i32 with.
2589 * @param i32 The value to AND *pi32 with.
2590 */
2591DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2592{
2593 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2594}
2595
2596
2597/**
2598 * Invalidate page.
2599 *
2600 * @param pv Address of the page to invalidate.
2601 */
2602#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2603DECLASM(void) ASMInvalidatePage(void *pv);
2604#else
2605DECLINLINE(void) ASMInvalidatePage(void *pv)
2606{
2607# if RT_INLINE_ASM_USES_INTRIN
2608 __invlpg(pv);
2609
2610# elif RT_INLINE_ASM_GNU_STYLE
2611 __asm__ __volatile__("invlpg %0\n\t"
2612 : : "m" (*(uint8_t *)pv));
2613# else
2614 __asm
2615 {
2616# ifdef RT_ARCH_AMD64
2617 mov rax, [pv]
2618 invlpg [rax]
2619# else
2620 mov eax, [pv]
2621 invlpg [eax]
2622# endif
2623 }
2624# endif
2625}
2626#endif
2627
2628
2629#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2630# if PAGE_SIZE != 0x1000
2631# error "PAGE_SIZE is not 0x1000!"
2632# endif
2633#endif
2634
2635/**
2636 * Zeros a 4K memory page.
2637 *
2638 * @param pv Pointer to the memory block. This must be page aligned.
2639 */
2640#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2641DECLASM(void) ASMMemZeroPage(volatile void *pv);
2642# else
2643DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2644{
2645# if RT_INLINE_ASM_USES_INTRIN
2646# ifdef RT_ARCH_AMD64
2647 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2648# else
2649 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2650# endif
2651
2652# elif RT_INLINE_ASM_GNU_STYLE
2653 RTUINTREG uDummy;
2654# ifdef RT_ARCH_AMD64
2655 __asm__ __volatile__ ("rep stosq"
2656 : "=D" (pv),
2657 "=c" (uDummy)
2658 : "0" (pv),
2659 "c" (0x1000 >> 3),
2660 "a" (0)
2661 : "memory");
2662# else
2663 __asm__ __volatile__ ("rep stosl"
2664 : "=D" (pv),
2665 "=c" (uDummy)
2666 : "0" (pv),
2667 "c" (0x1000 >> 2),
2668 "a" (0)
2669 : "memory");
2670# endif
2671# else
2672 __asm
2673 {
2674# ifdef RT_ARCH_AMD64
2675 xor rax, rax
2676 mov ecx, 0200h
2677 mov rdi, [pv]
2678 rep stosq
2679# else
2680 xor eax, eax
2681 mov ecx, 0400h
2682 mov edi, [pv]
2683 rep stosd
2684# endif
2685 }
2686# endif
2687}
2688# endif
2689
2690
2691/**
2692 * Zeros a memory block with a 32-bit aligned size.
2693 *
2694 * @param pv Pointer to the memory block.
2695 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2696 */
2697#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2698DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2699#else
2700DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2701{
2702# if RT_INLINE_ASM_USES_INTRIN
2703 __stosd((unsigned long *)pv, 0, cb >> 2);
2704
2705# elif RT_INLINE_ASM_GNU_STYLE
2706 __asm__ __volatile__ ("rep stosl"
2707 : "=D" (pv),
2708 "=c" (cb)
2709 : "0" (pv),
2710 "1" (cb >> 2),
2711 "a" (0)
2712 : "memory");
2713# else
2714 __asm
2715 {
2716 xor eax, eax
2717# ifdef RT_ARCH_AMD64
2718 mov rcx, [cb]
2719 shr rcx, 2
2720 mov rdi, [pv]
2721# else
2722 mov ecx, [cb]
2723 shr ecx, 2
2724 mov edi, [pv]
2725# endif
2726 rep stosd
2727 }
2728# endif
2729}
2730#endif
2731
2732
2733/**
2734 * Fills a memory block with a 32-bit aligned size.
2735 *
2736 * @param pv Pointer to the memory block.
2737 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2738 * @param u32 The value to fill with.
2739 */
2740#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2741DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2742#else
2743DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2744{
2745# if RT_INLINE_ASM_USES_INTRIN
2746 __stosd((unsigned long *)pv, 0, cb >> 2);
2747
2748# elif RT_INLINE_ASM_GNU_STYLE
2749 __asm__ __volatile__ ("rep stosl"
2750 : "=D" (pv),
2751 "=c" (cb)
2752 : "0" (pv),
2753 "1" (cb >> 2),
2754 "a" (u32)
2755 : "memory");
2756# else
2757 __asm
2758 {
2759# ifdef RT_ARCH_AMD64
2760 mov rcx, [cb]
2761 shr rcx, 2
2762 mov rdi, [pv]
2763# else
2764 mov ecx, [cb]
2765 shr ecx, 2
2766 mov edi, [pv]
2767# endif
2768 mov eax, [u32]
2769 rep stosd
2770 }
2771# endif
2772}
2773#endif
2774
2775
2776
2777/**
2778 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2779 *
2780 * @returns u32F1 * u32F2.
2781 */
2782#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2783DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2784#else
2785DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2786{
2787# ifdef RT_ARCH_AMD64
2788 return (uint64_t)u32F1 * u32F2;
2789# else /* !RT_ARCH_AMD64 */
2790 uint64_t u64;
2791# if RT_INLINE_ASM_GNU_STYLE
2792 __asm__ __volatile__("mull %%edx"
2793 : "=A" (u64)
2794 : "a" (u32F2), "d" (u32F1));
2795# else
2796 __asm
2797 {
2798 mov edx, [u32F1]
2799 mov eax, [u32F2]
2800 mul edx
2801 mov dword ptr [u64], eax
2802 mov dword ptr [u64 + 4], edx
2803 }
2804# endif
2805 return u64;
2806# endif /* !RT_ARCH_AMD64 */
2807}
2808#endif
2809
2810
2811/**
2812 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2813 *
2814 * @returns u32F1 * u32F2.
2815 */
2816#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2817DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2818#else
2819DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2820{
2821# ifdef RT_ARCH_AMD64
2822 return (int64_t)i32F1 * i32F2;
2823# else /* !RT_ARCH_AMD64 */
2824 int64_t i64;
2825# if RT_INLINE_ASM_GNU_STYLE
2826 __asm__ __volatile__("imull %%edx"
2827 : "=A" (i64)
2828 : "a" (i32F2), "d" (i32F1));
2829# else
2830 __asm
2831 {
2832 mov edx, [i32F1]
2833 mov eax, [i32F2]
2834 imul edx
2835 mov dword ptr [i64], eax
2836 mov dword ptr [i64 + 4], edx
2837 }
2838# endif
2839 return i64;
2840# endif /* !RT_ARCH_AMD64 */
2841}
2842#endif
2843
2844
2845/**
2846 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2847 *
2848 * @returns u64 / u32.
2849 */
2850#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2851DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2852#else
2853DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2854{
2855# ifdef RT_ARCH_AMD64
2856 return (uint32_t)(u64 / u32);
2857# else /* !RT_ARCH_AMD64 */
2858# if RT_INLINE_ASM_GNU_STYLE
2859 RTUINTREG uDummy;
2860 __asm__ __volatile__("divl %3"
2861 : "=a" (u32), "=d"(uDummy)
2862 : "A" (u64), "r" (u32));
2863# else
2864 __asm
2865 {
2866 mov eax, dword ptr [u64]
2867 mov edx, dword ptr [u64 + 4]
2868 mov ecx, [u32]
2869 div ecx
2870 mov [u32], eax
2871 }
2872# endif
2873 return u32;
2874# endif /* !RT_ARCH_AMD64 */
2875}
2876#endif
2877
2878
2879/**
2880 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2881 *
2882 * @returns u64 / u32.
2883 */
2884#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2885DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2886#else
2887DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2888{
2889# ifdef RT_ARCH_AMD64
2890 return (int32_t)(i64 / i32);
2891# else /* !RT_ARCH_AMD64 */
2892# if RT_INLINE_ASM_GNU_STYLE
2893 RTUINTREG iDummy;
2894 __asm__ __volatile__("idivl %3"
2895 : "=a" (i32), "=d"(iDummy)
2896 : "A" (i64), "r" (i32));
2897# else
2898 __asm
2899 {
2900 mov eax, dword ptr [i64]
2901 mov edx, dword ptr [i64 + 4]
2902 mov ecx, [i32]
2903 idiv ecx
2904 mov [i32], eax
2905 }
2906# endif
2907 return i32;
2908# endif /* !RT_ARCH_AMD64 */
2909}
2910#endif
2911
2912
2913/**
2914 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
2915 * using a 96 bit intermediate result.
2916 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
2917 * __udivdi3 and __umoddi3 even if this inline function is not used.
2918 *
2919 * @returns (u64A * u32B) / u32C.
2920 * @param u64A The 64-bit value.
2921 * @param u32B The 32-bit value to multiple by A.
2922 * @param u32C The 32-bit value to divide A*B by.
2923 */
2924#if RT_INLINE_ASM_EXTERNAL
2925DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
2926#else
2927DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
2928{
2929# if RT_INLINE_ASM_GNU_STYLE
2930# ifdef RT_ARCH_AMD64
2931 uint64_t u64Result, u64Spill;
2932 __asm__ __volatile__("mulq %2\n\t"
2933 "divq %3\n\t"
2934 : "=a" (u64Result),
2935 "=d" (u64Spill)
2936 : "r" ((uint64_t)u32B),
2937 "r" ((uint64_t)u32C),
2938 "0" (u64A),
2939 "1" (0));
2940 return u64Result;
2941# else
2942 uint32_t u32Dummy;
2943 uint64_t u64Result;
2944 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
2945 edx = u64Lo.hi = (u64A.lo * u32B).hi */
2946 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
2947 eax = u64A.hi */
2948 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
2949 edx = u32C */
2950 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
2951 edx = u32B */
2952 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
2953 edx = u64Hi.hi = (u64A.hi * u32B).hi */
2954 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
2955 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
2956 "divl %%ecx \n\t" /* eax = u64Hi / u32C
2957 edx = u64Hi % u32C */
2958 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
2959 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
2960 "divl %%ecx \n\t" /* u64Result.lo */
2961 "movl %%edi,%%edx \n\t" /* u64Result.hi */
2962 : "=A"(u64Result), "=c"(u32Dummy),
2963 "=S"(u32Dummy), "=D"(u32Dummy)
2964 : "a"((uint32_t)u64A),
2965 "S"((uint32_t)(u64A >> 32)),
2966 "c"(u32B),
2967 "D"(u32C));
2968 return u64Result;
2969# endif
2970# else
2971 RTUINT64U u;
2972 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
2973 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
2974 u64Hi += (u64Lo >> 32);
2975 u.s.Hi = (uint32_t)(u64Hi / u32C);
2976 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
2977 return u.u;
2978# endif
2979}
2980#endif
2981
2982
2983/**
2984 * Probes a byte pointer for read access.
2985 *
2986 * While the function will not fault if the byte is not read accessible,
2987 * the idea is to do this in a safe place like before acquiring locks
2988 * and such like.
2989 *
2990 * Also, this functions guarantees that an eager compiler is not going
2991 * to optimize the probing away.
2992 *
2993 * @param pvByte Pointer to the byte.
2994 */
2995#if RT_INLINE_ASM_EXTERNAL
2996DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
2997#else
2998DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
2999{
3000 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3001 uint8_t u8;
3002# if RT_INLINE_ASM_GNU_STYLE
3003 __asm__ __volatile__("movb (%1), %0\n\t"
3004 : "=r" (u8)
3005 : "r" (pvByte));
3006# else
3007 __asm
3008 {
3009# ifdef RT_ARCH_AMD64
3010 mov rax, [pvByte]
3011 mov al, [rax]
3012# else
3013 mov eax, [pvByte]
3014 mov al, [eax]
3015# endif
3016 mov [u8], al
3017 }
3018# endif
3019 return u8;
3020}
3021#endif
3022
3023/**
3024 * Probes a buffer for read access page by page.
3025 *
3026 * While the function will fault if the buffer is not fully read
3027 * accessible, the idea is to do this in a safe place like before
3028 * acquiring locks and such like.
3029 *
3030 * Also, this functions guarantees that an eager compiler is not going
3031 * to optimize the probing away.
3032 *
3033 * @param pvBuf Pointer to the buffer.
3034 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3035 */
3036DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3037{
3038 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3039 /* the first byte */
3040 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3041 ASMProbeReadByte(pu8);
3042
3043 /* the pages in between pages. */
3044 while (cbBuf > /*PAGE_SIZE*/0x1000)
3045 {
3046 ASMProbeReadByte(pu8);
3047 cbBuf -= /*PAGE_SIZE*/0x1000;
3048 pu8 += /*PAGE_SIZE*/0x1000;
3049 }
3050
3051 /* the last byte */
3052 ASMProbeReadByte(pu8 + cbBuf - 1);
3053}
3054
3055
3056/** @def ASMBreakpoint
3057 * Debugger Breakpoint.
3058 * @remark In the gnu world we add a nop instruction after the int3 to
3059 * force gdb to remain at the int3 source line.
3060 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3061 * @internal
3062 */
3063#if RT_INLINE_ASM_GNU_STYLE
3064# ifndef __L4ENV__
3065# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3066# else
3067# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3068# endif
3069#else
3070# define ASMBreakpoint() __debugbreak()
3071#endif
3072
3073
3074
3075/** @defgroup grp_inline_bits Bit Operations
3076 * @{
3077 */
3078
3079
3080/**
3081 * Sets a bit in a bitmap.
3082 *
3083 * @param pvBitmap Pointer to the bitmap.
3084 * @param iBit The bit to set.
3085 */
3086#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3087DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3088#else
3089DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3090{
3091# if RT_INLINE_ASM_USES_INTRIN
3092 _bittestandset((long *)pvBitmap, iBit);
3093
3094# elif RT_INLINE_ASM_GNU_STYLE
3095 __asm__ __volatile__ ("btsl %1, %0"
3096 : "=m" (*(volatile long *)pvBitmap)
3097 : "Ir" (iBit)
3098 : "memory");
3099# else
3100 __asm
3101 {
3102# ifdef RT_ARCH_AMD64
3103 mov rax, [pvBitmap]
3104 mov edx, [iBit]
3105 bts [rax], edx
3106# else
3107 mov eax, [pvBitmap]
3108 mov edx, [iBit]
3109 bts [eax], edx
3110# endif
3111 }
3112# endif
3113}
3114#endif
3115
3116
3117/**
3118 * Atomically sets a bit in a bitmap.
3119 *
3120 * @param pvBitmap Pointer to the bitmap.
3121 * @param iBit The bit to set.
3122 */
3123#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3124DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3125#else
3126DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3127{
3128# if RT_INLINE_ASM_USES_INTRIN
3129 _interlockedbittestandset((long *)pvBitmap, iBit);
3130# elif RT_INLINE_ASM_GNU_STYLE
3131 __asm__ __volatile__ ("lock; btsl %1, %0"
3132 : "=m" (*(volatile long *)pvBitmap)
3133 : "Ir" (iBit)
3134 : "memory");
3135# else
3136 __asm
3137 {
3138# ifdef RT_ARCH_AMD64
3139 mov rax, [pvBitmap]
3140 mov edx, [iBit]
3141 lock bts [rax], edx
3142# else
3143 mov eax, [pvBitmap]
3144 mov edx, [iBit]
3145 lock bts [eax], edx
3146# endif
3147 }
3148# endif
3149}
3150#endif
3151
3152
3153/**
3154 * Clears a bit in a bitmap.
3155 *
3156 * @param pvBitmap Pointer to the bitmap.
3157 * @param iBit The bit to clear.
3158 */
3159#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3160DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3161#else
3162DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3163{
3164# if RT_INLINE_ASM_USES_INTRIN
3165 _bittestandreset((long *)pvBitmap, iBit);
3166
3167# elif RT_INLINE_ASM_GNU_STYLE
3168 __asm__ __volatile__ ("btrl %1, %0"
3169 : "=m" (*(volatile long *)pvBitmap)
3170 : "Ir" (iBit)
3171 : "memory");
3172# else
3173 __asm
3174 {
3175# ifdef RT_ARCH_AMD64
3176 mov rax, [pvBitmap]
3177 mov edx, [iBit]
3178 btr [rax], edx
3179# else
3180 mov eax, [pvBitmap]
3181 mov edx, [iBit]
3182 btr [eax], edx
3183# endif
3184 }
3185# endif
3186}
3187#endif
3188
3189
3190/**
3191 * Atomically clears a bit in a bitmap.
3192 *
3193 * @param pvBitmap Pointer to the bitmap.
3194 * @param iBit The bit to toggle set.
3195 * @remark No memory barrier, take care on smp.
3196 */
3197#if RT_INLINE_ASM_EXTERNAL
3198DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3199#else
3200DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3201{
3202# if RT_INLINE_ASM_GNU_STYLE
3203 __asm__ __volatile__ ("lock; btrl %1, %0"
3204 : "=m" (*(volatile long *)pvBitmap)
3205 : "Ir" (iBit)
3206 : "memory");
3207# else
3208 __asm
3209 {
3210# ifdef RT_ARCH_AMD64
3211 mov rax, [pvBitmap]
3212 mov edx, [iBit]
3213 lock btr [rax], edx
3214# else
3215 mov eax, [pvBitmap]
3216 mov edx, [iBit]
3217 lock btr [eax], edx
3218# endif
3219 }
3220# endif
3221}
3222#endif
3223
3224
3225/**
3226 * Toggles a bit in a bitmap.
3227 *
3228 * @param pvBitmap Pointer to the bitmap.
3229 * @param iBit The bit to toggle.
3230 */
3231#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3232DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3233#else
3234DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3235{
3236# if RT_INLINE_ASM_USES_INTRIN
3237 _bittestandcomplement((long *)pvBitmap, iBit);
3238# elif RT_INLINE_ASM_GNU_STYLE
3239 __asm__ __volatile__ ("btcl %1, %0"
3240 : "=m" (*(volatile long *)pvBitmap)
3241 : "Ir" (iBit)
3242 : "memory");
3243# else
3244 __asm
3245 {
3246# ifdef RT_ARCH_AMD64
3247 mov rax, [pvBitmap]
3248 mov edx, [iBit]
3249 btc [rax], edx
3250# else
3251 mov eax, [pvBitmap]
3252 mov edx, [iBit]
3253 btc [eax], edx
3254# endif
3255 }
3256# endif
3257}
3258#endif
3259
3260
3261/**
3262 * Atomically toggles a bit in a bitmap.
3263 *
3264 * @param pvBitmap Pointer to the bitmap.
3265 * @param iBit The bit to test and set.
3266 */
3267#if RT_INLINE_ASM_EXTERNAL
3268DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3269#else
3270DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3271{
3272# if RT_INLINE_ASM_GNU_STYLE
3273 __asm__ __volatile__ ("lock; btcl %1, %0"
3274 : "=m" (*(volatile long *)pvBitmap)
3275 : "Ir" (iBit)
3276 : "memory");
3277# else
3278 __asm
3279 {
3280# ifdef RT_ARCH_AMD64
3281 mov rax, [pvBitmap]
3282 mov edx, [iBit]
3283 lock btc [rax], edx
3284# else
3285 mov eax, [pvBitmap]
3286 mov edx, [iBit]
3287 lock btc [eax], edx
3288# endif
3289 }
3290# endif
3291}
3292#endif
3293
3294
3295/**
3296 * Tests and sets a bit in a bitmap.
3297 *
3298 * @returns true if the bit was set.
3299 * @returns false if the bit was clear.
3300 * @param pvBitmap Pointer to the bitmap.
3301 * @param iBit The bit to test and set.
3302 */
3303#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3304DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3305#else
3306DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3307{
3308 union { bool f; uint32_t u32; uint8_t u8; } rc;
3309# if RT_INLINE_ASM_USES_INTRIN
3310 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3311
3312# elif RT_INLINE_ASM_GNU_STYLE
3313 __asm__ __volatile__ ("btsl %2, %1\n\t"
3314 "setc %b0\n\t"
3315 "andl $1, %0\n\t"
3316 : "=q" (rc.u32),
3317 "=m" (*(volatile long *)pvBitmap)
3318 : "Ir" (iBit)
3319 : "memory");
3320# else
3321 __asm
3322 {
3323 mov edx, [iBit]
3324# ifdef RT_ARCH_AMD64
3325 mov rax, [pvBitmap]
3326 bts [rax], edx
3327# else
3328 mov eax, [pvBitmap]
3329 bts [eax], edx
3330# endif
3331 setc al
3332 and eax, 1
3333 mov [rc.u32], eax
3334 }
3335# endif
3336 return rc.f;
3337}
3338#endif
3339
3340
3341/**
3342 * Atomically tests and sets a bit in a bitmap.
3343 *
3344 * @returns true if the bit was set.
3345 * @returns false if the bit was clear.
3346 * @param pvBitmap Pointer to the bitmap.
3347 * @param iBit The bit to set.
3348 */
3349#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3350DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3351#else
3352DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3353{
3354 union { bool f; uint32_t u32; uint8_t u8; } rc;
3355# if RT_INLINE_ASM_USES_INTRIN
3356 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3357# elif RT_INLINE_ASM_GNU_STYLE
3358 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3359 "setc %b0\n\t"
3360 "andl $1, %0\n\t"
3361 : "=q" (rc.u32),
3362 "=m" (*(volatile long *)pvBitmap)
3363 : "Ir" (iBit)
3364 : "memory");
3365# else
3366 __asm
3367 {
3368 mov edx, [iBit]
3369# ifdef RT_ARCH_AMD64
3370 mov rax, [pvBitmap]
3371 lock bts [rax], edx
3372# else
3373 mov eax, [pvBitmap]
3374 lock bts [eax], edx
3375# endif
3376 setc al
3377 and eax, 1
3378 mov [rc.u32], eax
3379 }
3380# endif
3381 return rc.f;
3382}
3383#endif
3384
3385
3386/**
3387 * Tests and clears a bit in a bitmap.
3388 *
3389 * @returns true if the bit was set.
3390 * @returns false if the bit was clear.
3391 * @param pvBitmap Pointer to the bitmap.
3392 * @param iBit The bit to test and clear.
3393 */
3394#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3395DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3396#else
3397DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3398{
3399 union { bool f; uint32_t u32; uint8_t u8; } rc;
3400# if RT_INLINE_ASM_USES_INTRIN
3401 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3402
3403# elif RT_INLINE_ASM_GNU_STYLE
3404 __asm__ __volatile__ ("btrl %2, %1\n\t"
3405 "setc %b0\n\t"
3406 "andl $1, %0\n\t"
3407 : "=q" (rc.u32),
3408 "=m" (*(volatile long *)pvBitmap)
3409 : "Ir" (iBit)
3410 : "memory");
3411# else
3412 __asm
3413 {
3414 mov edx, [iBit]
3415# ifdef RT_ARCH_AMD64
3416 mov rax, [pvBitmap]
3417 btr [rax], edx
3418# else
3419 mov eax, [pvBitmap]
3420 btr [eax], edx
3421# endif
3422 setc al
3423 and eax, 1
3424 mov [rc.u32], eax
3425 }
3426# endif
3427 return rc.f;
3428}
3429#endif
3430
3431
3432/**
3433 * Atomically tests and clears a bit in a bitmap.
3434 *
3435 * @returns true if the bit was set.
3436 * @returns false if the bit was clear.
3437 * @param pvBitmap Pointer to the bitmap.
3438 * @param iBit The bit to test and clear.
3439 * @remark No memory barrier, take care on smp.
3440 */
3441#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3442DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3443#else
3444DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3445{
3446 union { bool f; uint32_t u32; uint8_t u8; } rc;
3447# if RT_INLINE_ASM_USES_INTRIN
3448 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3449
3450# elif RT_INLINE_ASM_GNU_STYLE
3451 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3452 "setc %b0\n\t"
3453 "andl $1, %0\n\t"
3454 : "=q" (rc.u32),
3455 "=m" (*(volatile long *)pvBitmap)
3456 : "Ir" (iBit)
3457 : "memory");
3458# else
3459 __asm
3460 {
3461 mov edx, [iBit]
3462# ifdef RT_ARCH_AMD64
3463 mov rax, [pvBitmap]
3464 lock btr [rax], edx
3465# else
3466 mov eax, [pvBitmap]
3467 lock btr [eax], edx
3468# endif
3469 setc al
3470 and eax, 1
3471 mov [rc.u32], eax
3472 }
3473# endif
3474 return rc.f;
3475}
3476#endif
3477
3478
3479/**
3480 * Tests and toggles a bit in a bitmap.
3481 *
3482 * @returns true if the bit was set.
3483 * @returns false if the bit was clear.
3484 * @param pvBitmap Pointer to the bitmap.
3485 * @param iBit The bit to test and toggle.
3486 */
3487#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3488DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3489#else
3490DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3491{
3492 union { bool f; uint32_t u32; uint8_t u8; } rc;
3493# if RT_INLINE_ASM_USES_INTRIN
3494 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3495
3496# elif RT_INLINE_ASM_GNU_STYLE
3497 __asm__ __volatile__ ("btcl %2, %1\n\t"
3498 "setc %b0\n\t"
3499 "andl $1, %0\n\t"
3500 : "=q" (rc.u32),
3501 "=m" (*(volatile long *)pvBitmap)
3502 : "Ir" (iBit)
3503 : "memory");
3504# else
3505 __asm
3506 {
3507 mov edx, [iBit]
3508# ifdef RT_ARCH_AMD64
3509 mov rax, [pvBitmap]
3510 btc [rax], edx
3511# else
3512 mov eax, [pvBitmap]
3513 btc [eax], edx
3514# endif
3515 setc al
3516 and eax, 1
3517 mov [rc.u32], eax
3518 }
3519# endif
3520 return rc.f;
3521}
3522#endif
3523
3524
3525/**
3526 * Atomically tests and toggles a bit in a bitmap.
3527 *
3528 * @returns true if the bit was set.
3529 * @returns false if the bit was clear.
3530 * @param pvBitmap Pointer to the bitmap.
3531 * @param iBit The bit to test and toggle.
3532 */
3533#if RT_INLINE_ASM_EXTERNAL
3534DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3535#else
3536DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3537{
3538 union { bool f; uint32_t u32; uint8_t u8; } rc;
3539# if RT_INLINE_ASM_GNU_STYLE
3540 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3541 "setc %b0\n\t"
3542 "andl $1, %0\n\t"
3543 : "=q" (rc.u32),
3544 "=m" (*(volatile long *)pvBitmap)
3545 : "Ir" (iBit)
3546 : "memory");
3547# else
3548 __asm
3549 {
3550 mov edx, [iBit]
3551# ifdef RT_ARCH_AMD64
3552 mov rax, [pvBitmap]
3553 lock btc [rax], edx
3554# else
3555 mov eax, [pvBitmap]
3556 lock btc [eax], edx
3557# endif
3558 setc al
3559 and eax, 1
3560 mov [rc.u32], eax
3561 }
3562# endif
3563 return rc.f;
3564}
3565#endif
3566
3567
3568/**
3569 * Tests if a bit in a bitmap is set.
3570 *
3571 * @returns true if the bit is set.
3572 * @returns false if the bit is clear.
3573 * @param pvBitmap Pointer to the bitmap.
3574 * @param iBit The bit to test.
3575 */
3576#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3577DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3578#else
3579DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3580{
3581 union { bool f; uint32_t u32; uint8_t u8; } rc;
3582# if RT_INLINE_ASM_USES_INTRIN
3583 rc.u32 = _bittest((long *)pvBitmap, iBit);
3584# elif RT_INLINE_ASM_GNU_STYLE
3585
3586 __asm__ __volatile__ ("btl %2, %1\n\t"
3587 "setc %b0\n\t"
3588 "andl $1, %0\n\t"
3589 : "=q" (rc.u32),
3590 "=m" (*(volatile long *)pvBitmap)
3591 : "Ir" (iBit)
3592 : "memory");
3593# else
3594 __asm
3595 {
3596 mov edx, [iBit]
3597# ifdef RT_ARCH_AMD64
3598 mov rax, [pvBitmap]
3599 bt [rax], edx
3600# else
3601 mov eax, [pvBitmap]
3602 bt [eax], edx
3603# endif
3604 setc al
3605 and eax, 1
3606 mov [rc.u32], eax
3607 }
3608# endif
3609 return rc.f;
3610}
3611#endif
3612
3613
3614/**
3615 * Clears a bit range within a bitmap.
3616 *
3617 * @param pvBitmap Pointer to the bitmap.
3618 * @param iBitStart The First bit to clear.
3619 * @param iBitEnd The first bit not to clear.
3620 */
3621DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3622{
3623 if (iBitStart < iBitEnd)
3624 {
3625 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3626 int iStart = iBitStart & ~31;
3627 int iEnd = iBitEnd & ~31;
3628 if (iStart == iEnd)
3629 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3630 else
3631 {
3632 /* bits in first dword. */
3633 if (iBitStart & 31)
3634 {
3635 *pu32 &= (1 << (iBitStart & 31)) - 1;
3636 pu32++;
3637 iBitStart = iStart + 32;
3638 }
3639
3640 /* whole dword. */
3641 if (iBitStart != iEnd)
3642 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3643
3644 /* bits in last dword. */
3645 if (iBitEnd & 31)
3646 {
3647 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3648 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3649 }
3650 }
3651 }
3652}
3653
3654
3655/**
3656 * Finds the first clear bit in a bitmap.
3657 *
3658 * @returns Index of the first zero bit.
3659 * @returns -1 if no clear bit was found.
3660 * @param pvBitmap Pointer to the bitmap.
3661 * @param cBits The number of bits in the bitmap. Multiple of 32.
3662 */
3663#if RT_INLINE_ASM_EXTERNAL
3664DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3665#else
3666DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3667{
3668 if (cBits)
3669 {
3670 int32_t iBit;
3671# if RT_INLINE_ASM_GNU_STYLE
3672 RTCCUINTREG uEAX, uECX, uEDI;
3673 cBits = RT_ALIGN_32(cBits, 32);
3674 __asm__ __volatile__("repe; scasl\n\t"
3675 "je 1f\n\t"
3676# ifdef RT_ARCH_AMD64
3677 "lea -4(%%rdi), %%rdi\n\t"
3678 "xorl (%%rdi), %%eax\n\t"
3679 "subq %5, %%rdi\n\t"
3680# else
3681 "lea -4(%%edi), %%edi\n\t"
3682 "xorl (%%edi), %%eax\n\t"
3683 "subl %5, %%edi\n\t"
3684# endif
3685 "shll $3, %%edi\n\t"
3686 "bsfl %%eax, %%edx\n\t"
3687 "addl %%edi, %%edx\n\t"
3688 "1:\t\n"
3689 : "=d" (iBit),
3690 "=&c" (uECX),
3691 "=&D" (uEDI),
3692 "=&a" (uEAX)
3693 : "0" (0xffffffff),
3694 "mr" (pvBitmap),
3695 "1" (cBits >> 5),
3696 "2" (pvBitmap),
3697 "3" (0xffffffff));
3698# else
3699 cBits = RT_ALIGN_32(cBits, 32);
3700 __asm
3701 {
3702# ifdef RT_ARCH_AMD64
3703 mov rdi, [pvBitmap]
3704 mov rbx, rdi
3705# else
3706 mov edi, [pvBitmap]
3707 mov ebx, edi
3708# endif
3709 mov edx, 0ffffffffh
3710 mov eax, edx
3711 mov ecx, [cBits]
3712 shr ecx, 5
3713 repe scasd
3714 je done
3715
3716# ifdef RT_ARCH_AMD64
3717 lea rdi, [rdi - 4]
3718 xor eax, [rdi]
3719 sub rdi, rbx
3720# else
3721 lea edi, [edi - 4]
3722 xor eax, [edi]
3723 sub edi, ebx
3724# endif
3725 shl edi, 3
3726 bsf edx, eax
3727 add edx, edi
3728 done:
3729 mov [iBit], edx
3730 }
3731# endif
3732 return iBit;
3733 }
3734 return -1;
3735}
3736#endif
3737
3738
3739/**
3740 * Finds the next clear bit in a bitmap.
3741 *
3742 * @returns Index of the first zero bit.
3743 * @returns -1 if no clear bit was found.
3744 * @param pvBitmap Pointer to the bitmap.
3745 * @param cBits The number of bits in the bitmap. Multiple of 32.
3746 * @param iBitPrev The bit returned from the last search.
3747 * The search will start at iBitPrev + 1.
3748 */
3749#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3750DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3751#else
3752DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3753{
3754 int iBit = ++iBitPrev & 31;
3755 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3756 cBits -= iBitPrev & ~31;
3757 if (iBit)
3758 {
3759 /* inspect the first dword. */
3760 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3761# if RT_INLINE_ASM_USES_INTRIN
3762 unsigned long ulBit = 0;
3763 if (_BitScanForward(&ulBit, u32))
3764 return ulBit + iBitPrev;
3765 iBit = -1;
3766# else
3767# if RT_INLINE_ASM_GNU_STYLE
3768 __asm__ __volatile__("bsf %1, %0\n\t"
3769 "jnz 1f\n\t"
3770 "movl $-1, %0\n\t"
3771 "1:\n\t"
3772 : "=r" (iBit)
3773 : "r" (u32));
3774# else
3775 __asm
3776 {
3777 mov edx, [u32]
3778 bsf eax, edx
3779 jnz done
3780 mov eax, 0ffffffffh
3781 done:
3782 mov [iBit], eax
3783 }
3784# endif
3785 if (iBit >= 0)
3786 return iBit + iBitPrev;
3787# endif
3788 /* Search the rest of the bitmap, if there is anything. */
3789 if (cBits > 32)
3790 {
3791 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3792 if (iBit >= 0)
3793 return iBit + (iBitPrev & ~31) + 32;
3794 }
3795 }
3796 else
3797 {
3798 /* Search the rest of the bitmap. */
3799 iBit = ASMBitFirstClear(pvBitmap, cBits);
3800 if (iBit >= 0)
3801 return iBit + (iBitPrev & ~31);
3802 }
3803 return iBit;
3804}
3805#endif
3806
3807
3808/**
3809 * Finds the first set bit in a bitmap.
3810 *
3811 * @returns Index of the first set bit.
3812 * @returns -1 if no clear bit was found.
3813 * @param pvBitmap Pointer to the bitmap.
3814 * @param cBits The number of bits in the bitmap. Multiple of 32.
3815 */
3816#if RT_INLINE_ASM_EXTERNAL
3817DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3818#else
3819DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3820{
3821 if (cBits)
3822 {
3823 int32_t iBit;
3824# if RT_INLINE_ASM_GNU_STYLE
3825 RTCCUINTREG uEAX, uECX, uEDI;
3826 cBits = RT_ALIGN_32(cBits, 32);
3827 __asm__ __volatile__("repe; scasl\n\t"
3828 "je 1f\n\t"
3829# ifdef RT_ARCH_AMD64
3830 "lea -4(%%rdi), %%rdi\n\t"
3831 "movl (%%rdi), %%eax\n\t"
3832 "subq %5, %%rdi\n\t"
3833# else
3834 "lea -4(%%edi), %%edi\n\t"
3835 "movl (%%edi), %%eax\n\t"
3836 "subl %5, %%edi\n\t"
3837# endif
3838 "shll $3, %%edi\n\t"
3839 "bsfl %%eax, %%edx\n\t"
3840 "addl %%edi, %%edx\n\t"
3841 "1:\t\n"
3842 : "=d" (iBit),
3843 "=&c" (uECX),
3844 "=&D" (uEDI),
3845 "=&a" (uEAX)
3846 : "0" (0xffffffff),
3847 "mr" (pvBitmap),
3848 "1" (cBits >> 5),
3849 "2" (pvBitmap),
3850 "3" (0));
3851# else
3852 cBits = RT_ALIGN_32(cBits, 32);
3853 __asm
3854 {
3855# ifdef RT_ARCH_AMD64
3856 mov rdi, [pvBitmap]
3857 mov rbx, rdi
3858# else
3859 mov edi, [pvBitmap]
3860 mov ebx, edi
3861# endif
3862 mov edx, 0ffffffffh
3863 xor eax, eax
3864 mov ecx, [cBits]
3865 shr ecx, 5
3866 repe scasd
3867 je done
3868# ifdef RT_ARCH_AMD64
3869 lea rdi, [rdi - 4]
3870 mov eax, [rdi]
3871 sub rdi, rbx
3872# else
3873 lea edi, [edi - 4]
3874 mov eax, [edi]
3875 sub edi, ebx
3876# endif
3877 shl edi, 3
3878 bsf edx, eax
3879 add edx, edi
3880 done:
3881 mov [iBit], edx
3882 }
3883# endif
3884 return iBit;
3885 }
3886 return -1;
3887}
3888#endif
3889
3890
3891/**
3892 * Finds the next set bit in a bitmap.
3893 *
3894 * @returns Index of the next set bit.
3895 * @returns -1 if no set bit was found.
3896 * @param pvBitmap Pointer to the bitmap.
3897 * @param cBits The number of bits in the bitmap. Multiple of 32.
3898 * @param iBitPrev The bit returned from the last search.
3899 * The search will start at iBitPrev + 1.
3900 */
3901#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3902DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3903#else
3904DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3905{
3906 int iBit = ++iBitPrev & 31;
3907 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3908 cBits -= iBitPrev & ~31;
3909 if (iBit)
3910 {
3911 /* inspect the first dword. */
3912 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3913# if RT_INLINE_ASM_USES_INTRIN
3914 unsigned long ulBit = 0;
3915 if (_BitScanForward(&ulBit, u32))
3916 return ulBit + iBitPrev;
3917 iBit = -1;
3918# else
3919# if RT_INLINE_ASM_GNU_STYLE
3920 __asm__ __volatile__("bsf %1, %0\n\t"
3921 "jnz 1f\n\t"
3922 "movl $-1, %0\n\t"
3923 "1:\n\t"
3924 : "=r" (iBit)
3925 : "r" (u32));
3926# else
3927 __asm
3928 {
3929 mov edx, u32
3930 bsf eax, edx
3931 jnz done
3932 mov eax, 0ffffffffh
3933 done:
3934 mov [iBit], eax
3935 }
3936# endif
3937 if (iBit >= 0)
3938 return iBit + iBitPrev;
3939# endif
3940 /* Search the rest of the bitmap, if there is anything. */
3941 if (cBits > 32)
3942 {
3943 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3944 if (iBit >= 0)
3945 return iBit + (iBitPrev & ~31) + 32;
3946 }
3947
3948 }
3949 else
3950 {
3951 /* Search the rest of the bitmap. */
3952 iBit = ASMBitFirstSet(pvBitmap, cBits);
3953 if (iBit >= 0)
3954 return iBit + (iBitPrev & ~31);
3955 }
3956 return iBit;
3957}
3958#endif
3959
3960
3961/**
3962 * Finds the first bit which is set in the given 32-bit integer.
3963 * Bits are numbered from 1 (least significant) to 32.
3964 *
3965 * @returns index [1..32] of the first set bit.
3966 * @returns 0 if all bits are cleared.
3967 * @param u32 Integer to search for set bits.
3968 * @remark Similar to ffs() in BSD.
3969 */
3970DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3971{
3972# if RT_INLINE_ASM_USES_INTRIN
3973 unsigned long iBit;
3974 if (_BitScanForward(&iBit, u32))
3975 iBit++;
3976 else
3977 iBit = 0;
3978# elif RT_INLINE_ASM_GNU_STYLE
3979 uint32_t iBit;
3980 __asm__ __volatile__("bsf %1, %0\n\t"
3981 "jnz 1f\n\t"
3982 "xorl %0, %0\n\t"
3983 "jmp 2f\n"
3984 "1:\n\t"
3985 "incl %0\n"
3986 "2:\n\t"
3987 : "=r" (iBit)
3988 : "rm" (u32));
3989# else
3990 uint32_t iBit;
3991 _asm
3992 {
3993 bsf eax, [u32]
3994 jnz found
3995 xor eax, eax
3996 jmp done
3997 found:
3998 inc eax
3999 done:
4000 mov [iBit], eax
4001 }
4002# endif
4003 return iBit;
4004}
4005
4006
4007/**
4008 * Finds the first bit which is set in the given 32-bit integer.
4009 * Bits are numbered from 1 (least significant) to 32.
4010 *
4011 * @returns index [1..32] of the first set bit.
4012 * @returns 0 if all bits are cleared.
4013 * @param i32 Integer to search for set bits.
4014 * @remark Similar to ffs() in BSD.
4015 */
4016DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4017{
4018 return ASMBitFirstSetU32((uint32_t)i32);
4019}
4020
4021
4022/**
4023 * Finds the last bit which is set in the given 32-bit integer.
4024 * Bits are numbered from 1 (least significant) to 32.
4025 *
4026 * @returns index [1..32] of the last set bit.
4027 * @returns 0 if all bits are cleared.
4028 * @param u32 Integer to search for set bits.
4029 * @remark Similar to fls() in BSD.
4030 */
4031DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4032{
4033# if RT_INLINE_ASM_USES_INTRIN
4034 unsigned long iBit;
4035 if (_BitScanReverse(&iBit, u32))
4036 iBit++;
4037 else
4038 iBit = 0;
4039# elif RT_INLINE_ASM_GNU_STYLE
4040 uint32_t iBit;
4041 __asm__ __volatile__("bsrl %1, %0\n\t"
4042 "jnz 1f\n\t"
4043 "xorl %0, %0\n\t"
4044 "jmp 2f\n"
4045 "1:\n\t"
4046 "incl %0\n"
4047 "2:\n\t"
4048 : "=r" (iBit)
4049 : "rm" (u32));
4050# else
4051 uint32_t iBit;
4052 _asm
4053 {
4054 bsr eax, [u32]
4055 jnz found
4056 xor eax, eax
4057 jmp done
4058 found:
4059 inc eax
4060 done:
4061 mov [iBit], eax
4062 }
4063# endif
4064 return iBit;
4065}
4066
4067
4068/**
4069 * Finds the last bit which is set in the given 32-bit integer.
4070 * Bits are numbered from 1 (least significant) to 32.
4071 *
4072 * @returns index [1..32] of the last set bit.
4073 * @returns 0 if all bits are cleared.
4074 * @param i32 Integer to search for set bits.
4075 * @remark Similar to fls() in BSD.
4076 */
4077DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4078{
4079 return ASMBitLastSetS32((uint32_t)i32);
4080}
4081
4082
4083/**
4084 * Reverse the byte order of the given 32-bit integer.
4085 * @param u32 Integer
4086 */
4087DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4088{
4089#if RT_INLINE_ASM_USES_INTRIN
4090 u32 = _byteswap_ulong(u32);
4091#elif RT_INLINE_ASM_GNU_STYLE
4092 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4093#else
4094 _asm
4095 {
4096 mov eax, [u32]
4097 bswap eax
4098 mov [u32], eax
4099 }
4100#endif
4101 return u32;
4102}
4103
4104/** @} */
4105
4106
4107/** @} */
4108#endif
4109
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette