VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 5967

Last change on this file since 5967 was 5605, checked in by vboxsync, 17 years ago

BIT => RT_BIT, BIT64 => RT_BIT_64. BIT() is defined in Linux 2.6.24

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 100.5 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 */
16
17#ifndef ___iprt_asm_h
18#define ___iprt_asm_h
19
20#include <iprt/cdefs.h>
21#include <iprt/types.h>
22/** @todo #include <iprt/param.h> for PAGE_SIZE. */
23/** @def RT_INLINE_ASM_USES_INTRIN
24 * Defined as 1 if we're using a _MSC_VER 1400.
25 * Otherwise defined as 0.
26 */
27
28#ifdef _MSC_VER
29# if _MSC_VER >= 1400
30# define RT_INLINE_ASM_USES_INTRIN 1
31# include <intrin.h>
32 /* Emit the intrinsics at all optimization levels. */
33# pragma intrinsic(_ReadWriteBarrier)
34# pragma intrinsic(__cpuid)
35# pragma intrinsic(_enable)
36# pragma intrinsic(_disable)
37# pragma intrinsic(__rdtsc)
38# pragma intrinsic(__readmsr)
39# pragma intrinsic(__writemsr)
40# pragma intrinsic(__outbyte)
41# pragma intrinsic(__outword)
42# pragma intrinsic(__outdword)
43# pragma intrinsic(__inbyte)
44# pragma intrinsic(__inword)
45# pragma intrinsic(__indword)
46# pragma intrinsic(__invlpg)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(__readcr0)
51# pragma intrinsic(__readcr2)
52# pragma intrinsic(__readcr3)
53# pragma intrinsic(__readcr4)
54# pragma intrinsic(__writecr0)
55# pragma intrinsic(__writecr3)
56# pragma intrinsic(__writecr4)
57# pragma intrinsic(_BitScanForward)
58# pragma intrinsic(_BitScanReverse)
59# pragma intrinsic(_bittest)
60# pragma intrinsic(_bittestandset)
61# pragma intrinsic(_bittestandreset)
62# pragma intrinsic(_bittestandcomplement)
63# pragma intrinsic(_byteswap_ushort)
64# pragma intrinsic(_byteswap_ulong)
65# pragma intrinsic(_interlockedbittestandset)
66# pragma intrinsic(_interlockedbittestandreset)
67# pragma intrinsic(_InterlockedAnd)
68# pragma intrinsic(_InterlockedOr)
69# pragma intrinsic(_InterlockedIncrement)
70# pragma intrinsic(_InterlockedDecrement)
71# pragma intrinsic(_InterlockedExchange)
72# pragma intrinsic(_InterlockedCompareExchange)
73# pragma intrinsic(_InterlockedCompareExchange64)
74# ifdef RT_ARCH_AMD64
75# pragma intrinsic(__stosq)
76# pragma intrinsic(__readcr8)
77# pragma intrinsic(__writecr8)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# endif
81# endif
82#endif
83#ifndef RT_INLINE_ASM_USES_INTRIN
84# define RT_INLINE_ASM_USES_INTRIN 0
85#endif
86
87
88
89/** @defgroup grp_asm ASM - Assembly Routines
90 * @ingroup grp_rt
91 * @{
92 */
93
94/** @def RT_INLINE_ASM_EXTERNAL
95 * Defined as 1 if the compiler does not support inline assembly.
96 * The ASM* functions will then be implemented in an external .asm file.
97 *
98 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
99 * inline assmebly in their AMD64 compiler.
100 */
101#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
102# define RT_INLINE_ASM_EXTERNAL 1
103#else
104# define RT_INLINE_ASM_EXTERNAL 0
105#endif
106
107/** @def RT_INLINE_ASM_GNU_STYLE
108 * Defined as 1 if the compiler understand GNU style inline assembly.
109 */
110#if defined(_MSC_VER)
111# define RT_INLINE_ASM_GNU_STYLE 0
112#else
113# define RT_INLINE_ASM_GNU_STYLE 1
114#endif
115
116
117/** @todo find a more proper place for this structure? */
118#pragma pack(1)
119/** IDTR */
120typedef struct RTIDTR
121{
122 /** Size of the IDT. */
123 uint16_t cbIdt;
124 /** Address of the IDT. */
125 uintptr_t pIdt;
126} RTIDTR, *PRTIDTR;
127#pragma pack()
128
129#pragma pack(1)
130/** GDTR */
131typedef struct RTGDTR
132{
133 /** Size of the GDT. */
134 uint16_t cbGdt;
135 /** Address of the GDT. */
136 uintptr_t pGdt;
137} RTGDTR, *PRTGDTR;
138#pragma pack()
139
140
141/** @def ASMReturnAddress
142 * Gets the return address of the current (or calling if you like) function or method.
143 */
144#ifdef _MSC_VER
145# ifdef __cplusplus
146extern "C"
147# endif
148void * _ReturnAddress(void);
149# pragma intrinsic(_ReturnAddress)
150# define ASMReturnAddress() _ReturnAddress()
151#elif defined(__GNUC__) || defined(__DOXYGEN__)
152# define ASMReturnAddress() __builtin_return_address(0)
153#else
154# error "Unsupported compiler."
155#endif
156
157
158/**
159 * Gets the content of the IDTR CPU register.
160 * @param pIdtr Where to store the IDTR contents.
161 */
162#if RT_INLINE_ASM_EXTERNAL
163DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
164#else
165DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
166{
167# if RT_INLINE_ASM_GNU_STYLE
168 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
169# else
170 __asm
171 {
172# ifdef RT_ARCH_AMD64
173 mov rax, [pIdtr]
174 sidt [rax]
175# else
176 mov eax, [pIdtr]
177 sidt [eax]
178# endif
179 }
180# endif
181}
182#endif
183
184
185/**
186 * Sets the content of the IDTR CPU register.
187 * @param pIdtr Where to load the IDTR contents from
188 */
189#if RT_INLINE_ASM_EXTERNAL
190DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
191#else
192DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
193{
194# if RT_INLINE_ASM_GNU_STYLE
195 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
196# else
197 __asm
198 {
199# ifdef RT_ARCH_AMD64
200 mov rax, [pIdtr]
201 lidt [rax]
202# else
203 mov eax, [pIdtr]
204 lidt [eax]
205# endif
206 }
207# endif
208}
209#endif
210
211
212/**
213 * Gets the content of the GDTR CPU register.
214 * @param pGdtr Where to store the GDTR contents.
215 */
216#if RT_INLINE_ASM_EXTERNAL
217DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
218#else
219DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
220{
221# if RT_INLINE_ASM_GNU_STYLE
222 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
223# else
224 __asm
225 {
226# ifdef RT_ARCH_AMD64
227 mov rax, [pGdtr]
228 sgdt [rax]
229# else
230 mov eax, [pGdtr]
231 sgdt [eax]
232# endif
233 }
234# endif
235}
236#endif
237
238/**
239 * Get the cs register.
240 * @returns cs.
241 */
242#if RT_INLINE_ASM_EXTERNAL
243DECLASM(RTSEL) ASMGetCS(void);
244#else
245DECLINLINE(RTSEL) ASMGetCS(void)
246{
247 RTSEL SelCS;
248# if RT_INLINE_ASM_GNU_STYLE
249 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
250# else
251 __asm
252 {
253 mov ax, cs
254 mov [SelCS], ax
255 }
256# endif
257 return SelCS;
258}
259#endif
260
261
262/**
263 * Get the DS register.
264 * @returns DS.
265 */
266#if RT_INLINE_ASM_EXTERNAL
267DECLASM(RTSEL) ASMGetDS(void);
268#else
269DECLINLINE(RTSEL) ASMGetDS(void)
270{
271 RTSEL SelDS;
272# if RT_INLINE_ASM_GNU_STYLE
273 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
274# else
275 __asm
276 {
277 mov ax, ds
278 mov [SelDS], ax
279 }
280# endif
281 return SelDS;
282}
283#endif
284
285
286/**
287 * Get the ES register.
288 * @returns ES.
289 */
290#if RT_INLINE_ASM_EXTERNAL
291DECLASM(RTSEL) ASMGetES(void);
292#else
293DECLINLINE(RTSEL) ASMGetES(void)
294{
295 RTSEL SelES;
296# if RT_INLINE_ASM_GNU_STYLE
297 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
298# else
299 __asm
300 {
301 mov ax, es
302 mov [SelES], ax
303 }
304# endif
305 return SelES;
306}
307#endif
308
309
310/**
311 * Get the FS register.
312 * @returns FS.
313 */
314#if RT_INLINE_ASM_EXTERNAL
315DECLASM(RTSEL) ASMGetFS(void);
316#else
317DECLINLINE(RTSEL) ASMGetFS(void)
318{
319 RTSEL SelFS;
320# if RT_INLINE_ASM_GNU_STYLE
321 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
322# else
323 __asm
324 {
325 mov ax, fs
326 mov [SelFS], ax
327 }
328# endif
329 return SelFS;
330}
331# endif
332
333
334/**
335 * Get the GS register.
336 * @returns GS.
337 */
338#if RT_INLINE_ASM_EXTERNAL
339DECLASM(RTSEL) ASMGetGS(void);
340#else
341DECLINLINE(RTSEL) ASMGetGS(void)
342{
343 RTSEL SelGS;
344# if RT_INLINE_ASM_GNU_STYLE
345 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
346# else
347 __asm
348 {
349 mov ax, gs
350 mov [SelGS], ax
351 }
352# endif
353 return SelGS;
354}
355#endif
356
357
358/**
359 * Get the SS register.
360 * @returns SS.
361 */
362#if RT_INLINE_ASM_EXTERNAL
363DECLASM(RTSEL) ASMGetSS(void);
364#else
365DECLINLINE(RTSEL) ASMGetSS(void)
366{
367 RTSEL SelSS;
368# if RT_INLINE_ASM_GNU_STYLE
369 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
370# else
371 __asm
372 {
373 mov ax, ss
374 mov [SelSS], ax
375 }
376# endif
377 return SelSS;
378}
379#endif
380
381
382/**
383 * Get the TR register.
384 * @returns TR.
385 */
386#if RT_INLINE_ASM_EXTERNAL
387DECLASM(RTSEL) ASMGetTR(void);
388#else
389DECLINLINE(RTSEL) ASMGetTR(void)
390{
391 RTSEL SelTR;
392# if RT_INLINE_ASM_GNU_STYLE
393 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
394# else
395 __asm
396 {
397 str ax
398 mov [SelTR], ax
399 }
400# endif
401 return SelTR;
402}
403#endif
404
405
406/**
407 * Get the [RE]FLAGS register.
408 * @returns [RE]FLAGS.
409 */
410#if RT_INLINE_ASM_EXTERNAL
411DECLASM(RTCCUINTREG) ASMGetFlags(void);
412#else
413DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
414{
415 RTCCUINTREG uFlags;
416# if RT_INLINE_ASM_GNU_STYLE
417# ifdef RT_ARCH_AMD64
418 __asm__ __volatile__("pushfq\n\t"
419 "popq %0\n\t"
420 : "=g" (uFlags));
421# else
422 __asm__ __volatile__("pushfl\n\t"
423 "popl %0\n\t"
424 : "=g" (uFlags));
425# endif
426# else
427 __asm
428 {
429# ifdef RT_ARCH_AMD64
430 pushfq
431 pop [uFlags]
432# else
433 pushfd
434 pop [uFlags]
435# endif
436 }
437# endif
438 return uFlags;
439}
440#endif
441
442
443/**
444 * Set the [RE]FLAGS register.
445 * @param uFlags The new [RE]FLAGS value.
446 */
447#if RT_INLINE_ASM_EXTERNAL
448DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
449#else
450DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
451{
452# if RT_INLINE_ASM_GNU_STYLE
453# ifdef RT_ARCH_AMD64
454 __asm__ __volatile__("pushq %0\n\t"
455 "popfq\n\t"
456 : : "g" (uFlags));
457# else
458 __asm__ __volatile__("pushl %0\n\t"
459 "popfl\n\t"
460 : : "g" (uFlags));
461# endif
462# else
463 __asm
464 {
465# ifdef RT_ARCH_AMD64
466 push [uFlags]
467 popfq
468# else
469 push [uFlags]
470 popfd
471# endif
472 }
473# endif
474}
475#endif
476
477
478/**
479 * Gets the content of the CPU timestamp counter register.
480 *
481 * @returns TSC.
482 */
483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
484DECLASM(uint64_t) ASMReadTSC(void);
485#else
486DECLINLINE(uint64_t) ASMReadTSC(void)
487{
488 RTUINT64U u;
489# if RT_INLINE_ASM_GNU_STYLE
490 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
491# else
492# if RT_INLINE_ASM_USES_INTRIN
493 u.u = __rdtsc();
494# else
495 __asm
496 {
497 rdtsc
498 mov [u.s.Lo], eax
499 mov [u.s.Hi], edx
500 }
501# endif
502# endif
503 return u.u;
504}
505#endif
506
507
508/**
509 * Performs the cpuid instruction returning all registers.
510 *
511 * @param uOperator CPUID operation (eax).
512 * @param pvEAX Where to store eax.
513 * @param pvEBX Where to store ebx.
514 * @param pvECX Where to store ecx.
515 * @param pvEDX Where to store edx.
516 * @remark We're using void pointers to ease the use of special bitfield structures and such.
517 */
518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
519DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
520#else
521DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
522{
523# if RT_INLINE_ASM_GNU_STYLE
524# ifdef RT_ARCH_AMD64
525 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
526 __asm__ ("cpuid\n\t"
527 : "=a" (uRAX),
528 "=b" (uRBX),
529 "=c" (uRCX),
530 "=d" (uRDX)
531 : "0" (uOperator));
532 *(uint32_t *)pvEAX = (uint32_t)uRAX;
533 *(uint32_t *)pvEBX = (uint32_t)uRBX;
534 *(uint32_t *)pvECX = (uint32_t)uRCX;
535 *(uint32_t *)pvEDX = (uint32_t)uRDX;
536# else
537 __asm__ ("xchgl %%ebx, %1\n\t"
538 "cpuid\n\t"
539 "xchgl %%ebx, %1\n\t"
540 : "=a" (*(uint32_t *)pvEAX),
541 "=r" (*(uint32_t *)pvEBX),
542 "=c" (*(uint32_t *)pvECX),
543 "=d" (*(uint32_t *)pvEDX)
544 : "0" (uOperator));
545# endif
546
547# elif RT_INLINE_ASM_USES_INTRIN
548 int aInfo[4];
549 __cpuid(aInfo, uOperator);
550 *(uint32_t *)pvEAX = aInfo[0];
551 *(uint32_t *)pvEBX = aInfo[1];
552 *(uint32_t *)pvECX = aInfo[2];
553 *(uint32_t *)pvEDX = aInfo[3];
554
555# else
556 uint32_t uEAX;
557 uint32_t uEBX;
558 uint32_t uECX;
559 uint32_t uEDX;
560 __asm
561 {
562 push ebx
563 mov eax, [uOperator]
564 cpuid
565 mov [uEAX], eax
566 mov [uEBX], ebx
567 mov [uECX], ecx
568 mov [uEDX], edx
569 pop ebx
570 }
571 *(uint32_t *)pvEAX = uEAX;
572 *(uint32_t *)pvEBX = uEBX;
573 *(uint32_t *)pvECX = uECX;
574 *(uint32_t *)pvEDX = uEDX;
575# endif
576}
577#endif
578
579
580/**
581 * Performs the cpuid instruction returning ecx and edx.
582 *
583 * @param uOperator CPUID operation (eax).
584 * @param pvECX Where to store ecx.
585 * @param pvEDX Where to store edx.
586 * @remark We're using void pointers to ease the use of special bitfield structures and such.
587 */
588#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
589DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
590#else
591DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
592{
593 uint32_t uEBX;
594 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
595}
596#endif
597
598
599/**
600 * Performs the cpuid instruction returning edx.
601 *
602 * @param uOperator CPUID operation (eax).
603 * @returns EDX after cpuid operation.
604 */
605#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
606DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
607#else
608DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
609{
610 RTCCUINTREG xDX;
611# if RT_INLINE_ASM_GNU_STYLE
612# ifdef RT_ARCH_AMD64
613 RTCCUINTREG uSpill;
614 __asm__ ("cpuid"
615 : "=a" (uSpill),
616 "=d" (xDX)
617 : "0" (uOperator)
618 : "rbx", "rcx");
619# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
620 __asm__ ("push %%ebx\n\t"
621 "cpuid\n\t"
622 "pop %%ebx\n\t"
623 : "=a" (uOperator),
624 "=d" (xDX)
625 : "0" (uOperator)
626 : "ecx");
627# else
628 __asm__ ("cpuid"
629 : "=a" (uOperator),
630 "=d" (xDX)
631 : "0" (uOperator)
632 : "ebx", "ecx");
633# endif
634
635# elif RT_INLINE_ASM_USES_INTRIN
636 int aInfo[4];
637 __cpuid(aInfo, uOperator);
638 xDX = aInfo[3];
639
640# else
641 __asm
642 {
643 push ebx
644 mov eax, [uOperator]
645 cpuid
646 mov [xDX], edx
647 pop ebx
648 }
649# endif
650 return (uint32_t)xDX;
651}
652#endif
653
654
655/**
656 * Performs the cpuid instruction returning ecx.
657 *
658 * @param uOperator CPUID operation (eax).
659 * @returns ECX after cpuid operation.
660 */
661#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
662DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
663#else
664DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
665{
666 RTCCUINTREG xCX;
667# if RT_INLINE_ASM_GNU_STYLE
668# ifdef RT_ARCH_AMD64
669 RTCCUINTREG uSpill;
670 __asm__ ("cpuid"
671 : "=a" (uSpill),
672 "=c" (xCX)
673 : "0" (uOperator)
674 : "rbx", "rdx");
675# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
676 __asm__ ("push %%ebx\n\t"
677 "cpuid\n\t"
678 "pop %%ebx\n\t"
679 : "=a" (uOperator),
680 "=c" (xCX)
681 : "0" (uOperator)
682 : "edx");
683# else
684 __asm__ ("cpuid"
685 : "=a" (uOperator),
686 "=c" (xCX)
687 : "0" (uOperator)
688 : "ebx", "edx");
689
690# endif
691
692# elif RT_INLINE_ASM_USES_INTRIN
693 int aInfo[4];
694 __cpuid(aInfo, uOperator);
695 xCX = aInfo[2];
696
697# else
698 __asm
699 {
700 push ebx
701 mov eax, [uOperator]
702 cpuid
703 mov [xCX], ecx
704 pop ebx
705 }
706# endif
707 return (uint32_t)xCX;
708}
709#endif
710
711
712/**
713 * Checks if the current CPU supports CPUID.
714 *
715 * @returns true if CPUID is supported.
716 */
717DECLINLINE(bool) ASMHasCpuId(void)
718{
719#ifdef RT_ARCH_AMD64
720 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
721#else /* !RT_ARCH_AMD64 */
722 bool fRet = false;
723# if RT_INLINE_ASM_GNU_STYLE
724 uint32_t u1;
725 uint32_t u2;
726 __asm__ ("pushf\n\t"
727 "pop %1\n\t"
728 "mov %1, %2\n\t"
729 "xorl $0x200000, %1\n\t"
730 "push %1\n\t"
731 "popf\n\t"
732 "pushf\n\t"
733 "pop %1\n\t"
734 "cmpl %1, %2\n\t"
735 "setne %0\n\t"
736 "push %2\n\t"
737 "popf\n\t"
738 : "=m" (fRet), "=r" (u1), "=r" (u2));
739# else
740 __asm
741 {
742 pushfd
743 pop eax
744 mov ebx, eax
745 xor eax, 0200000h
746 push eax
747 popfd
748 pushfd
749 pop eax
750 cmp eax, ebx
751 setne fRet
752 push ebx
753 popfd
754 }
755# endif
756 return fRet;
757#endif /* !RT_ARCH_AMD64 */
758}
759
760
761/**
762 * Gets the APIC ID of the current CPU.
763 *
764 * @returns the APIC ID.
765 */
766#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
767DECLASM(uint8_t) ASMGetApicId(void);
768#else
769DECLINLINE(uint8_t) ASMGetApicId(void)
770{
771 RTCCUINTREG xBX;
772# if RT_INLINE_ASM_GNU_STYLE
773# ifdef RT_ARCH_AMD64
774 RTCCUINTREG uSpill;
775 __asm__ ("cpuid"
776 : "=a" (uSpill),
777 "=b" (xBX)
778 : "0" (1)
779 : "rcx", "rdx");
780# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
781 RTCCUINTREG uSpill;
782 __asm__ ("mov %%ebx,%1\n\t"
783 "cpuid\n\t"
784 "xchgl %%ebx,%1\n\t"
785 : "=a" (uSpill),
786 "=r" (xBX)
787 : "0" (1)
788 : "ecx", "edx");
789# else
790 RTCCUINTREG uSpill;
791 __asm__ ("cpuid"
792 : "=a" (uSpill),
793 "=b" (xBX)
794 : "0" (1)
795 : "ecx", "edx");
796# endif
797
798# elif RT_INLINE_ASM_USES_INTRIN
799 int aInfo[4];
800 __cpuid(aInfo, 1);
801 xBX = aInfo[1];
802
803# else
804 __asm
805 {
806 push ebx
807 mov eax, 1
808 cpuid
809 mov [xBX], ebx
810 pop ebx
811 }
812# endif
813 return (uint8_t)(xBX >> 24);
814}
815#endif
816
817/**
818 * Get cr0.
819 * @returns cr0.
820 */
821#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
822DECLASM(RTCCUINTREG) ASMGetCR0(void);
823#else
824DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
825{
826 RTCCUINTREG uCR0;
827# if RT_INLINE_ASM_USES_INTRIN
828 uCR0 = __readcr0();
829
830# elif RT_INLINE_ASM_GNU_STYLE
831# ifdef RT_ARCH_AMD64
832 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
833# else
834 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
835# endif
836# else
837 __asm
838 {
839# ifdef RT_ARCH_AMD64
840 mov rax, cr0
841 mov [uCR0], rax
842# else
843 mov eax, cr0
844 mov [uCR0], eax
845# endif
846 }
847# endif
848 return uCR0;
849}
850#endif
851
852
853/**
854 * Sets the CR0 register.
855 * @param uCR0 The new CR0 value.
856 */
857#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
858DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
859#else
860DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
861{
862# if RT_INLINE_ASM_USES_INTRIN
863 __writecr0(uCR0);
864
865# elif RT_INLINE_ASM_GNU_STYLE
866# ifdef RT_ARCH_AMD64
867 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
868# else
869 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
870# endif
871# else
872 __asm
873 {
874# ifdef RT_ARCH_AMD64
875 mov rax, [uCR0]
876 mov cr0, rax
877# else
878 mov eax, [uCR0]
879 mov cr0, eax
880# endif
881 }
882# endif
883}
884#endif
885
886
887/**
888 * Get cr2.
889 * @returns cr2.
890 */
891#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
892DECLASM(RTCCUINTREG) ASMGetCR2(void);
893#else
894DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
895{
896 RTCCUINTREG uCR2;
897# if RT_INLINE_ASM_USES_INTRIN
898 uCR2 = __readcr2();
899
900# elif RT_INLINE_ASM_GNU_STYLE
901# ifdef RT_ARCH_AMD64
902 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
903# else
904 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
905# endif
906# else
907 __asm
908 {
909# ifdef RT_ARCH_AMD64
910 mov rax, cr2
911 mov [uCR2], rax
912# else
913 mov eax, cr2
914 mov [uCR2], eax
915# endif
916 }
917# endif
918 return uCR2;
919}
920#endif
921
922
923/**
924 * Sets the CR2 register.
925 * @param uCR2 The new CR0 value.
926 */
927#if RT_INLINE_ASM_EXTERNAL
928DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
929#else
930DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
931{
932# if RT_INLINE_ASM_GNU_STYLE
933# ifdef RT_ARCH_AMD64
934 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
935# else
936 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
937# endif
938# else
939 __asm
940 {
941# ifdef RT_ARCH_AMD64
942 mov rax, [uCR2]
943 mov cr2, rax
944# else
945 mov eax, [uCR2]
946 mov cr2, eax
947# endif
948 }
949# endif
950}
951#endif
952
953
954/**
955 * Get cr3.
956 * @returns cr3.
957 */
958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
959DECLASM(RTCCUINTREG) ASMGetCR3(void);
960#else
961DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
962{
963 RTCCUINTREG uCR3;
964# if RT_INLINE_ASM_USES_INTRIN
965 uCR3 = __readcr3();
966
967# elif RT_INLINE_ASM_GNU_STYLE
968# ifdef RT_ARCH_AMD64
969 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
970# else
971 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
972# endif
973# else
974 __asm
975 {
976# ifdef RT_ARCH_AMD64
977 mov rax, cr3
978 mov [uCR3], rax
979# else
980 mov eax, cr3
981 mov [uCR3], eax
982# endif
983 }
984# endif
985 return uCR3;
986}
987#endif
988
989
990/**
991 * Sets the CR3 register.
992 *
993 * @param uCR3 New CR3 value.
994 */
995#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
996DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
997#else
998DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
999{
1000# if RT_INLINE_ASM_USES_INTRIN
1001 __writecr3(uCR3);
1002
1003# elif RT_INLINE_ASM_GNU_STYLE
1004# ifdef RT_ARCH_AMD64
1005 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1006# else
1007 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1008# endif
1009# else
1010 __asm
1011 {
1012# ifdef RT_ARCH_AMD64
1013 mov rax, [uCR3]
1014 mov cr3, rax
1015# else
1016 mov eax, [uCR3]
1017 mov cr3, eax
1018# endif
1019 }
1020# endif
1021}
1022#endif
1023
1024
1025/**
1026 * Reloads the CR3 register.
1027 */
1028#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1029DECLASM(void) ASMReloadCR3(void);
1030#else
1031DECLINLINE(void) ASMReloadCR3(void)
1032{
1033# if RT_INLINE_ASM_USES_INTRIN
1034 __writecr3(__readcr3());
1035
1036# elif RT_INLINE_ASM_GNU_STYLE
1037 RTCCUINTREG u;
1038# ifdef RT_ARCH_AMD64
1039 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1040 "movq %0, %%cr3\n\t"
1041 : "=r" (u));
1042# else
1043 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1044 "movl %0, %%cr3\n\t"
1045 : "=r" (u));
1046# endif
1047# else
1048 __asm
1049 {
1050# ifdef RT_ARCH_AMD64
1051 mov rax, cr3
1052 mov cr3, rax
1053# else
1054 mov eax, cr3
1055 mov cr3, eax
1056# endif
1057 }
1058# endif
1059}
1060#endif
1061
1062
1063/**
1064 * Get cr4.
1065 * @returns cr4.
1066 */
1067#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1068DECLASM(RTCCUINTREG) ASMGetCR4(void);
1069#else
1070DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1071{
1072 RTCCUINTREG uCR4;
1073# if RT_INLINE_ASM_USES_INTRIN
1074 uCR4 = __readcr4();
1075
1076# elif RT_INLINE_ASM_GNU_STYLE
1077# ifdef RT_ARCH_AMD64
1078 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1079# else
1080 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1081# endif
1082# else
1083 __asm
1084 {
1085# ifdef RT_ARCH_AMD64
1086 mov rax, cr4
1087 mov [uCR4], rax
1088# else
1089 push eax /* just in case */
1090 /*mov eax, cr4*/
1091 _emit 0x0f
1092 _emit 0x20
1093 _emit 0xe0
1094 mov [uCR4], eax
1095 pop eax
1096# endif
1097 }
1098# endif
1099 return uCR4;
1100}
1101#endif
1102
1103
1104/**
1105 * Sets the CR4 register.
1106 *
1107 * @param uCR4 New CR4 value.
1108 */
1109#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1110DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1111#else
1112DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1113{
1114# if RT_INLINE_ASM_USES_INTRIN
1115 __writecr4(uCR4);
1116
1117# elif RT_INLINE_ASM_GNU_STYLE
1118# ifdef RT_ARCH_AMD64
1119 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1120# else
1121 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1122# endif
1123# else
1124 __asm
1125 {
1126# ifdef RT_ARCH_AMD64
1127 mov rax, [uCR4]
1128 mov cr4, rax
1129# else
1130 mov eax, [uCR4]
1131 _emit 0x0F
1132 _emit 0x22
1133 _emit 0xE0 /* mov cr4, eax */
1134# endif
1135 }
1136# endif
1137}
1138#endif
1139
1140
1141/**
1142 * Get cr8.
1143 * @returns cr8.
1144 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1145 */
1146#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1147DECLASM(RTCCUINTREG) ASMGetCR8(void);
1148#else
1149DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1150{
1151# ifdef RT_ARCH_AMD64
1152 RTCCUINTREG uCR8;
1153# if RT_INLINE_ASM_USES_INTRIN
1154 uCR8 = __readcr8();
1155
1156# elif RT_INLINE_ASM_GNU_STYLE
1157 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1158# else
1159 __asm
1160 {
1161 mov rax, cr8
1162 mov [uCR8], rax
1163 }
1164# endif
1165 return uCR8;
1166# else /* !RT_ARCH_AMD64 */
1167 return 0;
1168# endif /* !RT_ARCH_AMD64 */
1169}
1170#endif
1171
1172
1173/**
1174 * Enables interrupts (EFLAGS.IF).
1175 */
1176#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1177DECLASM(void) ASMIntEnable(void);
1178#else
1179DECLINLINE(void) ASMIntEnable(void)
1180{
1181# if RT_INLINE_ASM_GNU_STYLE
1182 __asm("sti\n");
1183# elif RT_INLINE_ASM_USES_INTRIN
1184 _enable();
1185# else
1186 __asm sti
1187# endif
1188}
1189#endif
1190
1191
1192/**
1193 * Disables interrupts (!EFLAGS.IF).
1194 */
1195#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1196DECLASM(void) ASMIntDisable(void);
1197#else
1198DECLINLINE(void) ASMIntDisable(void)
1199{
1200# if RT_INLINE_ASM_GNU_STYLE
1201 __asm("cli\n");
1202# elif RT_INLINE_ASM_USES_INTRIN
1203 _disable();
1204# else
1205 __asm cli
1206# endif
1207}
1208#endif
1209
1210
1211/**
1212 * Disables interrupts and returns previous xFLAGS.
1213 */
1214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1215DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1216#else
1217DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1218{
1219 RTCCUINTREG xFlags;
1220# if RT_INLINE_ASM_GNU_STYLE
1221# ifdef RT_ARCH_AMD64
1222 __asm__ __volatile__("pushfq\n\t"
1223 "cli\n\t"
1224 "popq %0\n\t"
1225 : "=m" (xFlags));
1226# else
1227 __asm__ __volatile__("pushfl\n\t"
1228 "cli\n\t"
1229 "popl %0\n\t"
1230 : "=m" (xFlags));
1231# endif
1232# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1233 xFlags = ASMGetFlags();
1234 _disable();
1235# else
1236 __asm {
1237 pushfd
1238 cli
1239 pop [xFlags]
1240 }
1241# endif
1242 return xFlags;
1243}
1244#endif
1245
1246
1247/**
1248 * Reads a machine specific register.
1249 *
1250 * @returns Register content.
1251 * @param uRegister Register to read.
1252 */
1253#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1254DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1255#else
1256DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1257{
1258 RTUINT64U u;
1259# if RT_INLINE_ASM_GNU_STYLE
1260 __asm__ ("rdmsr\n\t"
1261 : "=a" (u.s.Lo),
1262 "=d" (u.s.Hi)
1263 : "c" (uRegister));
1264
1265# elif RT_INLINE_ASM_USES_INTRIN
1266 u.u = __readmsr(uRegister);
1267
1268# else
1269 __asm
1270 {
1271 mov ecx, [uRegister]
1272 rdmsr
1273 mov [u.s.Lo], eax
1274 mov [u.s.Hi], edx
1275 }
1276# endif
1277
1278 return u.u;
1279}
1280#endif
1281
1282
1283/**
1284 * Writes a machine specific register.
1285 *
1286 * @returns Register content.
1287 * @param uRegister Register to write to.
1288 * @param u64Val Value to write.
1289 */
1290#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1291DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1292#else
1293DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1294{
1295 RTUINT64U u;
1296
1297 u.u = u64Val;
1298# if RT_INLINE_ASM_GNU_STYLE
1299 __asm__ __volatile__("wrmsr\n\t"
1300 ::"a" (u.s.Lo),
1301 "d" (u.s.Hi),
1302 "c" (uRegister));
1303
1304# elif RT_INLINE_ASM_USES_INTRIN
1305 __writemsr(uRegister, u.u);
1306
1307# else
1308 __asm
1309 {
1310 mov ecx, [uRegister]
1311 mov edx, [u.s.Hi]
1312 mov eax, [u.s.Lo]
1313 wrmsr
1314 }
1315# endif
1316}
1317#endif
1318
1319
1320/**
1321 * Reads low part of a machine specific register.
1322 *
1323 * @returns Register content.
1324 * @param uRegister Register to read.
1325 */
1326#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1327DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1328#else
1329DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1330{
1331 uint32_t u32;
1332# if RT_INLINE_ASM_GNU_STYLE
1333 __asm__ ("rdmsr\n\t"
1334 : "=a" (u32)
1335 : "c" (uRegister)
1336 : "edx");
1337
1338# elif RT_INLINE_ASM_USES_INTRIN
1339 u32 = (uint32_t)__readmsr(uRegister);
1340
1341#else
1342 __asm
1343 {
1344 mov ecx, [uRegister]
1345 rdmsr
1346 mov [u32], eax
1347 }
1348# endif
1349
1350 return u32;
1351}
1352#endif
1353
1354
1355/**
1356 * Reads high part of a machine specific register.
1357 *
1358 * @returns Register content.
1359 * @param uRegister Register to read.
1360 */
1361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1362DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1363#else
1364DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1365{
1366 uint32_t u32;
1367# if RT_INLINE_ASM_GNU_STYLE
1368 __asm__ ("rdmsr\n\t"
1369 : "=d" (u32)
1370 : "c" (uRegister)
1371 : "eax");
1372
1373# elif RT_INLINE_ASM_USES_INTRIN
1374 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1375
1376# else
1377 __asm
1378 {
1379 mov ecx, [uRegister]
1380 rdmsr
1381 mov [u32], edx
1382 }
1383# endif
1384
1385 return u32;
1386}
1387#endif
1388
1389
1390/**
1391 * Gets dr7.
1392 *
1393 * @returns dr7.
1394 */
1395#if RT_INLINE_ASM_EXTERNAL
1396DECLASM(RTCCUINTREG) ASMGetDR7(void);
1397#else
1398DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1399{
1400 RTCCUINTREG uDR7;
1401# if RT_INLINE_ASM_GNU_STYLE
1402# ifdef RT_ARCH_AMD64
1403 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1404# else
1405 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1406# endif
1407# else
1408 __asm
1409 {
1410# ifdef RT_ARCH_AMD64
1411 mov rax, dr7
1412 mov [uDR7], rax
1413# else
1414 mov eax, dr7
1415 mov [uDR7], eax
1416# endif
1417 }
1418# endif
1419 return uDR7;
1420}
1421#endif
1422
1423
1424/**
1425 * Gets dr6.
1426 *
1427 * @returns dr6.
1428 */
1429#if RT_INLINE_ASM_EXTERNAL
1430DECLASM(RTCCUINTREG) ASMGetDR6(void);
1431#else
1432DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1433{
1434 RTCCUINTREG uDR6;
1435# if RT_INLINE_ASM_GNU_STYLE
1436# ifdef RT_ARCH_AMD64
1437 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1438# else
1439 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1440# endif
1441# else
1442 __asm
1443 {
1444# ifdef RT_ARCH_AMD64
1445 mov rax, dr6
1446 mov [uDR6], rax
1447# else
1448 mov eax, dr6
1449 mov [uDR6], eax
1450# endif
1451 }
1452# endif
1453 return uDR6;
1454}
1455#endif
1456
1457
1458/**
1459 * Reads and clears DR6.
1460 *
1461 * @returns DR6.
1462 */
1463#if RT_INLINE_ASM_EXTERNAL
1464DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1465#else
1466DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1467{
1468 RTCCUINTREG uDR6;
1469# if RT_INLINE_ASM_GNU_STYLE
1470 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1471# ifdef RT_ARCH_AMD64
1472 __asm__ ("movq %%dr6, %0\n\t"
1473 "movq %1, %%dr6\n\t"
1474 : "=r" (uDR6)
1475 : "r" (uNewValue));
1476# else
1477 __asm__ ("movl %%dr6, %0\n\t"
1478 "movl %1, %%dr6\n\t"
1479 : "=r" (uDR6)
1480 : "r" (uNewValue));
1481# endif
1482# else
1483 __asm
1484 {
1485# ifdef RT_ARCH_AMD64
1486 mov rax, dr6
1487 mov [uDR6], rax
1488 mov rcx, rax
1489 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1490 mov dr6, rcx
1491# else
1492 mov eax, dr6
1493 mov [uDR6], eax
1494 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1495 mov dr6, ecx
1496# endif
1497 }
1498# endif
1499 return uDR6;
1500}
1501#endif
1502
1503
1504/**
1505 * Compiler memory barrier.
1506 *
1507 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1508 * values or any outstanding writes when returning from this function.
1509 *
1510 * This function must be used if non-volatile data is modified by a
1511 * device or the VMM. Typical cases are port access, MMIO access,
1512 * trapping instruction, etc.
1513 */
1514#if RT_INLINE_ASM_GNU_STYLE
1515# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1516#elif RT_INLINE_ASM_USES_INTRIN
1517# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1518#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1519DECLINLINE(void) ASMCompilerBarrier(void)
1520{
1521 __asm
1522 {
1523 }
1524}
1525#endif
1526
1527
1528/**
1529 * Writes a 8-bit unsigned integer to an I/O port.
1530 *
1531 * @param Port I/O port to read from.
1532 * @param u8 8-bit integer to write.
1533 */
1534#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1535DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1536#else
1537DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1538{
1539# if RT_INLINE_ASM_GNU_STYLE
1540 __asm__ __volatile__("outb %b1, %w0\n\t"
1541 :: "Nd" (Port),
1542 "a" (u8));
1543
1544# elif RT_INLINE_ASM_USES_INTRIN
1545 __outbyte(Port, u8);
1546
1547# else
1548 __asm
1549 {
1550 mov dx, [Port]
1551 mov al, [u8]
1552 out dx, al
1553 }
1554# endif
1555}
1556#endif
1557
1558
1559/**
1560 * Gets a 8-bit unsigned integer from an I/O port.
1561 *
1562 * @returns 8-bit integer.
1563 * @param Port I/O port to read from.
1564 */
1565#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1566DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1567#else
1568DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1569{
1570 uint8_t u8;
1571# if RT_INLINE_ASM_GNU_STYLE
1572 __asm__ __volatile__("inb %w1, %b0\n\t"
1573 : "=a" (u8)
1574 : "Nd" (Port));
1575
1576# elif RT_INLINE_ASM_USES_INTRIN
1577 u8 = __inbyte(Port);
1578
1579# else
1580 __asm
1581 {
1582 mov dx, [Port]
1583 in al, dx
1584 mov [u8], al
1585 }
1586# endif
1587 return u8;
1588}
1589#endif
1590
1591
1592/**
1593 * Writes a 16-bit unsigned integer to an I/O port.
1594 *
1595 * @param Port I/O port to read from.
1596 * @param u16 16-bit integer to write.
1597 */
1598#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1599DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1600#else
1601DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1602{
1603# if RT_INLINE_ASM_GNU_STYLE
1604 __asm__ __volatile__("outw %w1, %w0\n\t"
1605 :: "Nd" (Port),
1606 "a" (u16));
1607
1608# elif RT_INLINE_ASM_USES_INTRIN
1609 __outword(Port, u16);
1610
1611# else
1612 __asm
1613 {
1614 mov dx, [Port]
1615 mov ax, [u16]
1616 out dx, ax
1617 }
1618# endif
1619}
1620#endif
1621
1622
1623/**
1624 * Gets a 16-bit unsigned integer from an I/O port.
1625 *
1626 * @returns 16-bit integer.
1627 * @param Port I/O port to read from.
1628 */
1629#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1630DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1631#else
1632DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1633{
1634 uint16_t u16;
1635# if RT_INLINE_ASM_GNU_STYLE
1636 __asm__ __volatile__("inw %w1, %w0\n\t"
1637 : "=a" (u16)
1638 : "Nd" (Port));
1639
1640# elif RT_INLINE_ASM_USES_INTRIN
1641 u16 = __inword(Port);
1642
1643# else
1644 __asm
1645 {
1646 mov dx, [Port]
1647 in ax, dx
1648 mov [u16], ax
1649 }
1650# endif
1651 return u16;
1652}
1653#endif
1654
1655
1656/**
1657 * Writes a 32-bit unsigned integer to an I/O port.
1658 *
1659 * @param Port I/O port to read from.
1660 * @param u32 32-bit integer to write.
1661 */
1662#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1663DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1664#else
1665DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1666{
1667# if RT_INLINE_ASM_GNU_STYLE
1668 __asm__ __volatile__("outl %1, %w0\n\t"
1669 :: "Nd" (Port),
1670 "a" (u32));
1671
1672# elif RT_INLINE_ASM_USES_INTRIN
1673 __outdword(Port, u32);
1674
1675# else
1676 __asm
1677 {
1678 mov dx, [Port]
1679 mov eax, [u32]
1680 out dx, eax
1681 }
1682# endif
1683}
1684#endif
1685
1686
1687/**
1688 * Gets a 32-bit unsigned integer from an I/O port.
1689 *
1690 * @returns 32-bit integer.
1691 * @param Port I/O port to read from.
1692 */
1693#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1694DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1695#else
1696DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1697{
1698 uint32_t u32;
1699# if RT_INLINE_ASM_GNU_STYLE
1700 __asm__ __volatile__("inl %w1, %0\n\t"
1701 : "=a" (u32)
1702 : "Nd" (Port));
1703
1704# elif RT_INLINE_ASM_USES_INTRIN
1705 u32 = __indword(Port);
1706
1707# else
1708 __asm
1709 {
1710 mov dx, [Port]
1711 in eax, dx
1712 mov [u32], eax
1713 }
1714# endif
1715 return u32;
1716}
1717#endif
1718
1719
1720/**
1721 * Atomically Exchange an unsigned 8-bit value.
1722 *
1723 * @returns Current *pu8 value
1724 * @param pu8 Pointer to the 8-bit variable to update.
1725 * @param u8 The 8-bit value to assign to *pu8.
1726 */
1727#if RT_INLINE_ASM_EXTERNAL
1728DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1729#else
1730DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1731{
1732# if RT_INLINE_ASM_GNU_STYLE
1733 __asm__ __volatile__("xchgb %0, %1\n\t"
1734 : "=m" (*pu8),
1735 "=r" (u8)
1736 : "1" (u8));
1737# else
1738 __asm
1739 {
1740# ifdef RT_ARCH_AMD64
1741 mov rdx, [pu8]
1742 mov al, [u8]
1743 xchg [rdx], al
1744 mov [u8], al
1745# else
1746 mov edx, [pu8]
1747 mov al, [u8]
1748 xchg [edx], al
1749 mov [u8], al
1750# endif
1751 }
1752# endif
1753 return u8;
1754}
1755#endif
1756
1757
1758/**
1759 * Atomically Exchange a signed 8-bit value.
1760 *
1761 * @returns Current *pu8 value
1762 * @param pi8 Pointer to the 8-bit variable to update.
1763 * @param i8 The 8-bit value to assign to *pi8.
1764 */
1765DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1766{
1767 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1768}
1769
1770
1771/**
1772 * Atomically Exchange a bool value.
1773 *
1774 * @returns Current *pf value
1775 * @param pf Pointer to the 8-bit variable to update.
1776 * @param f The 8-bit value to assign to *pi8.
1777 */
1778DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1779{
1780#ifdef _MSC_VER
1781 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1782#else
1783 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1784#endif
1785}
1786
1787
1788/**
1789 * Atomically Exchange an unsigned 16-bit value.
1790 *
1791 * @returns Current *pu16 value
1792 * @param pu16 Pointer to the 16-bit variable to update.
1793 * @param u16 The 16-bit value to assign to *pu16.
1794 */
1795#if RT_INLINE_ASM_EXTERNAL
1796DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1797#else
1798DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1799{
1800# if RT_INLINE_ASM_GNU_STYLE
1801 __asm__ __volatile__("xchgw %0, %1\n\t"
1802 : "=m" (*pu16),
1803 "=r" (u16)
1804 : "1" (u16));
1805# else
1806 __asm
1807 {
1808# ifdef RT_ARCH_AMD64
1809 mov rdx, [pu16]
1810 mov ax, [u16]
1811 xchg [rdx], ax
1812 mov [u16], ax
1813# else
1814 mov edx, [pu16]
1815 mov ax, [u16]
1816 xchg [edx], ax
1817 mov [u16], ax
1818# endif
1819 }
1820# endif
1821 return u16;
1822}
1823#endif
1824
1825
1826/**
1827 * Atomically Exchange a signed 16-bit value.
1828 *
1829 * @returns Current *pu16 value
1830 * @param pi16 Pointer to the 16-bit variable to update.
1831 * @param i16 The 16-bit value to assign to *pi16.
1832 */
1833DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1834{
1835 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1836}
1837
1838
1839/**
1840 * Atomically Exchange an unsigned 32-bit value.
1841 *
1842 * @returns Current *pu32 value
1843 * @param pu32 Pointer to the 32-bit variable to update.
1844 * @param u32 The 32-bit value to assign to *pu32.
1845 */
1846#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1847DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1848#else
1849DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1850{
1851# if RT_INLINE_ASM_GNU_STYLE
1852 __asm__ __volatile__("xchgl %0, %1\n\t"
1853 : "=m" (*pu32),
1854 "=r" (u32)
1855 : "1" (u32));
1856
1857# elif RT_INLINE_ASM_USES_INTRIN
1858 u32 = _InterlockedExchange((long *)pu32, u32);
1859
1860# else
1861 __asm
1862 {
1863# ifdef RT_ARCH_AMD64
1864 mov rdx, [pu32]
1865 mov eax, u32
1866 xchg [rdx], eax
1867 mov [u32], eax
1868# else
1869 mov edx, [pu32]
1870 mov eax, u32
1871 xchg [edx], eax
1872 mov [u32], eax
1873# endif
1874 }
1875# endif
1876 return u32;
1877}
1878#endif
1879
1880
1881/**
1882 * Atomically Exchange a signed 32-bit value.
1883 *
1884 * @returns Current *pu32 value
1885 * @param pi32 Pointer to the 32-bit variable to update.
1886 * @param i32 The 32-bit value to assign to *pi32.
1887 */
1888DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1889{
1890 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1891}
1892
1893
1894/**
1895 * Atomically Exchange an unsigned 64-bit value.
1896 *
1897 * @returns Current *pu64 value
1898 * @param pu64 Pointer to the 64-bit variable to update.
1899 * @param u64 The 64-bit value to assign to *pu64.
1900 */
1901#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1902DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1903#else
1904DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1905{
1906# if defined(RT_ARCH_AMD64)
1907# if RT_INLINE_ASM_USES_INTRIN
1908 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1909
1910# elif RT_INLINE_ASM_GNU_STYLE
1911 __asm__ __volatile__("xchgq %0, %1\n\t"
1912 : "=m" (*pu64),
1913 "=r" (u64)
1914 : "1" (u64));
1915# else
1916 __asm
1917 {
1918 mov rdx, [pu64]
1919 mov rax, [u64]
1920 xchg [rdx], rax
1921 mov [u64], rax
1922 }
1923# endif
1924# else /* !RT_ARCH_AMD64 */
1925# if RT_INLINE_ASM_GNU_STYLE
1926# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
1927 uint32_t u32 = (uint32_t)u64;
1928 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
1929 "xchgl %%ebx, %3\n\t"
1930 "1:\n\t"
1931 "lock; cmpxchg8b (%5)\n\t"
1932 "jnz 1b\n\t"
1933 "xchgl %%ebx, %3\n\t"
1934 /*"xchgl %%esi, %5\n\t"*/
1935 : "=A" (u64),
1936 "=m" (*pu64)
1937 : "0" (*pu64),
1938 "m" ( u32 ),
1939 "c" ( (uint32_t)(u64 >> 32) ),
1940 "S" (pu64) );
1941# else /* !PIC */
1942 __asm__ __volatile__("1:\n\t"
1943 "lock; cmpxchg8b %1\n\t"
1944 "jnz 1b\n\t"
1945 : "=A" (u64),
1946 "=m" (*pu64)
1947 : "0" (*pu64),
1948 "b" ( (uint32_t)u64 ),
1949 "c" ( (uint32_t)(u64 >> 32) ));
1950# endif
1951# else
1952 __asm
1953 {
1954 mov ebx, dword ptr [u64]
1955 mov ecx, dword ptr [u64 + 4]
1956 mov edi, pu64
1957 mov eax, dword ptr [edi]
1958 mov edx, dword ptr [edi + 4]
1959 retry:
1960 lock cmpxchg8b [edi]
1961 jnz retry
1962 mov dword ptr [u64], eax
1963 mov dword ptr [u64 + 4], edx
1964 }
1965# endif
1966# endif /* !RT_ARCH_AMD64 */
1967 return u64;
1968}
1969#endif
1970
1971
1972/**
1973 * Atomically Exchange an signed 64-bit value.
1974 *
1975 * @returns Current *pi64 value
1976 * @param pi64 Pointer to the 64-bit variable to update.
1977 * @param i64 The 64-bit value to assign to *pi64.
1978 */
1979DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
1980{
1981 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
1982}
1983
1984
1985#ifdef RT_ARCH_AMD64
1986/**
1987 * Atomically Exchange an unsigned 128-bit value.
1988 *
1989 * @returns Current *pu128.
1990 * @param pu128 Pointer to the 128-bit variable to update.
1991 * @param u128 The 128-bit value to assign to *pu128.
1992 *
1993 * @remark We cannot really assume that any hardware supports this. Nor do I have
1994 * GAS support for it. So, for the time being we'll BREAK the atomic
1995 * bit of this function and use two 64-bit exchanges instead.
1996 */
1997# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
1998DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
1999# else
2000DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2001{
2002 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2003 {
2004 /** @todo this is clumsy code */
2005 RTUINT128U u128Ret;
2006 u128Ret.u = u128;
2007 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2008 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2009 return u128Ret.u;
2010 }
2011#if 0 /* later? */
2012 else
2013 {
2014# if RT_INLINE_ASM_GNU_STYLE
2015 __asm__ __volatile__("1:\n\t"
2016 "lock; cmpxchg8b %1\n\t"
2017 "jnz 1b\n\t"
2018 : "=A" (u128),
2019 "=m" (*pu128)
2020 : "0" (*pu128),
2021 "b" ( (uint64_t)u128 ),
2022 "c" ( (uint64_t)(u128 >> 64) ));
2023# else
2024 __asm
2025 {
2026 mov rbx, dword ptr [u128]
2027 mov rcx, dword ptr [u128 + 4]
2028 mov rdi, pu128
2029 mov rax, dword ptr [rdi]
2030 mov rdx, dword ptr [rdi + 4]
2031 retry:
2032 lock cmpxchg16b [rdi]
2033 jnz retry
2034 mov dword ptr [u128], rax
2035 mov dword ptr [u128 + 4], rdx
2036 }
2037# endif
2038 }
2039 return u128;
2040#endif
2041}
2042# endif
2043#endif /* RT_ARCH_AMD64 */
2044
2045
2046/**
2047 * Atomically Reads a unsigned 64-bit value.
2048 *
2049 * @returns Current *pu64 value
2050 * @param pu64 Pointer to the 64-bit variable to read.
2051 * The memory pointed to must be writable.
2052 * @remark This will fault if the memory is read-only!
2053 */
2054#if RT_INLINE_ASM_EXTERNAL
2055DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2056#else
2057DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2058{
2059 uint64_t u64;
2060# ifdef RT_ARCH_AMD64
2061# if RT_INLINE_ASM_GNU_STYLE
2062 __asm__ __volatile__("movq %1, %0\n\t"
2063 : "=r" (u64)
2064 : "m" (*pu64));
2065# else
2066 __asm
2067 {
2068 mov rdx, [pu64]
2069 mov rax, [rdx]
2070 mov [u64], rax
2071 }
2072# endif
2073# else /* !RT_ARCH_AMD64 */
2074# if RT_INLINE_ASM_GNU_STYLE
2075# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2076 uint32_t u32EBX = 0;
2077 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2078 "lock; cmpxchg8b (%5)\n\t"
2079 "xchgl %%ebx, %3\n\t"
2080 : "=A" (u64),
2081 "=m" (*pu64)
2082 : "0" (0),
2083 "m" (u32EBX),
2084 "c" (0),
2085 "S" (pu64));
2086# else /* !PIC */
2087 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2088 : "=A" (u64),
2089 "=m" (*pu64)
2090 : "0" (0),
2091 "b" (0),
2092 "c" (0));
2093# endif
2094# else
2095 __asm
2096 {
2097 xor eax, eax
2098 xor edx, edx
2099 mov edi, pu64
2100 xor ecx, ecx
2101 xor ebx, ebx
2102 lock cmpxchg8b [edi]
2103 mov dword ptr [u64], eax
2104 mov dword ptr [u64 + 4], edx
2105 }
2106# endif
2107# endif /* !RT_ARCH_AMD64 */
2108 return u64;
2109}
2110#endif
2111
2112
2113/**
2114 * Atomically Reads a signed 64-bit value.
2115 *
2116 * @returns Current *pi64 value
2117 * @param pi64 Pointer to the 64-bit variable to read.
2118 * The memory pointed to must be writable.
2119 * @remark This will fault if the memory is read-only!
2120 */
2121DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2122{
2123 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2124}
2125
2126
2127/**
2128 * Atomically Exchange a value which size might differ
2129 * between platforms or compilers.
2130 *
2131 * @param pu Pointer to the variable to update.
2132 * @param uNew The value to assign to *pu.
2133 */
2134#define ASMAtomicXchgSize(pu, uNew) \
2135 do { \
2136 switch (sizeof(*(pu))) { \
2137 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2138 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2139 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2140 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2141 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2142 } \
2143 } while (0)
2144
2145
2146/**
2147 * Atomically Exchange a pointer value.
2148 *
2149 * @returns Current *ppv value
2150 * @param ppv Pointer to the pointer variable to update.
2151 * @param pv The pointer value to assign to *ppv.
2152 */
2153DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2154{
2155#if ARCH_BITS == 32
2156 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2157#elif ARCH_BITS == 64
2158 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2159#else
2160# error "ARCH_BITS is bogus"
2161#endif
2162}
2163
2164
2165/**
2166 * Atomically Compare and Exchange an unsigned 32-bit value.
2167 *
2168 * @returns true if xchg was done.
2169 * @returns false if xchg wasn't done.
2170 *
2171 * @param pu32 Pointer to the value to update.
2172 * @param u32New The new value to assigned to *pu32.
2173 * @param u32Old The old value to *pu32 compare with.
2174 */
2175#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2176DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2177#else
2178DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2179{
2180# if RT_INLINE_ASM_GNU_STYLE
2181 uint32_t u32Ret;
2182 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2183 "setz %%al\n\t"
2184 "movzbl %%al, %%eax\n\t"
2185 : "=m" (*pu32),
2186 "=a" (u32Ret)
2187 : "r" (u32New),
2188 "1" (u32Old));
2189 return (bool)u32Ret;
2190
2191# elif RT_INLINE_ASM_USES_INTRIN
2192 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2193
2194# else
2195 uint32_t u32Ret;
2196 __asm
2197 {
2198# ifdef RT_ARCH_AMD64
2199 mov rdx, [pu32]
2200# else
2201 mov edx, [pu32]
2202# endif
2203 mov eax, [u32Old]
2204 mov ecx, [u32New]
2205# ifdef RT_ARCH_AMD64
2206 lock cmpxchg [rdx], ecx
2207# else
2208 lock cmpxchg [edx], ecx
2209# endif
2210 setz al
2211 movzx eax, al
2212 mov [u32Ret], eax
2213 }
2214 return !!u32Ret;
2215# endif
2216}
2217#endif
2218
2219
2220/**
2221 * Atomically Compare and Exchange a signed 32-bit value.
2222 *
2223 * @returns true if xchg was done.
2224 * @returns false if xchg wasn't done.
2225 *
2226 * @param pi32 Pointer to the value to update.
2227 * @param i32New The new value to assigned to *pi32.
2228 * @param i32Old The old value to *pi32 compare with.
2229 */
2230DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2231{
2232 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2233}
2234
2235
2236/**
2237 * Atomically Compare and exchange an unsigned 64-bit value.
2238 *
2239 * @returns true if xchg was done.
2240 * @returns false if xchg wasn't done.
2241 *
2242 * @param pu64 Pointer to the 64-bit variable to update.
2243 * @param u64New The 64-bit value to assign to *pu64.
2244 * @param u64Old The value to compare with.
2245 */
2246#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2247DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2248#else
2249DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2250{
2251# if RT_INLINE_ASM_USES_INTRIN
2252 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2253
2254# elif defined(RT_ARCH_AMD64)
2255# if RT_INLINE_ASM_GNU_STYLE
2256 uint64_t u64Ret;
2257 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2258 "setz %%al\n\t"
2259 "movzbl %%al, %%eax\n\t"
2260 : "=m" (*pu64),
2261 "=a" (u64Ret)
2262 : "r" (u64New),
2263 "1" (u64Old));
2264 return (bool)u64Ret;
2265# else
2266 bool fRet;
2267 __asm
2268 {
2269 mov rdx, [pu32]
2270 mov rax, [u64Old]
2271 mov rcx, [u64New]
2272 lock cmpxchg [rdx], rcx
2273 setz al
2274 mov [fRet], al
2275 }
2276 return fRet;
2277# endif
2278# else /* !RT_ARCH_AMD64 */
2279 uint32_t u32Ret;
2280# if RT_INLINE_ASM_GNU_STYLE
2281# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2282 uint32_t u32 = (uint32_t)u64New;
2283 uint32_t u32Spill;
2284 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2285 "lock; cmpxchg8b (%6)\n\t"
2286 "setz %%al\n\t"
2287 "xchgl %%ebx, %4\n\t"
2288 "movzbl %%al, %%eax\n\t"
2289 : "=a" (u32Ret),
2290 "=d" (u32Spill),
2291 "=m" (*pu64)
2292 : "A" (u64Old),
2293 "m" ( u32 ),
2294 "c" ( (uint32_t)(u64New >> 32) ),
2295 "S" (pu64) );
2296# else /* !PIC */
2297 uint32_t u32Spill;
2298 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2299 "setz %%al\n\t"
2300 "movzbl %%al, %%eax\n\t"
2301 : "=a" (u32Ret),
2302 "=d" (u32Spill),
2303 "=m" (*pu64)
2304 : "A" (u64Old),
2305 "b" ( (uint32_t)u64New ),
2306 "c" ( (uint32_t)(u64New >> 32) ));
2307# endif
2308 return (bool)u32Ret;
2309# else
2310 __asm
2311 {
2312 mov ebx, dword ptr [u64New]
2313 mov ecx, dword ptr [u64New + 4]
2314 mov edi, [pu64]
2315 mov eax, dword ptr [u64Old]
2316 mov edx, dword ptr [u64Old + 4]
2317 lock cmpxchg8b [edi]
2318 setz al
2319 movzx eax, al
2320 mov dword ptr [u32Ret], eax
2321 }
2322 return !!u32Ret;
2323# endif
2324# endif /* !RT_ARCH_AMD64 */
2325}
2326#endif
2327
2328
2329/**
2330 * Atomically Compare and exchange a signed 64-bit value.
2331 *
2332 * @returns true if xchg was done.
2333 * @returns false if xchg wasn't done.
2334 *
2335 * @param pi64 Pointer to the 64-bit variable to update.
2336 * @param i64 The 64-bit value to assign to *pu64.
2337 * @param i64Old The value to compare with.
2338 */
2339DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2340{
2341 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2342}
2343
2344
2345
2346/** @def ASMAtomicCmpXchgSize
2347 * Atomically Compare and Exchange a value which size might differ
2348 * between platforms or compilers.
2349 *
2350 * @param pu Pointer to the value to update.
2351 * @param uNew The new value to assigned to *pu.
2352 * @param uOld The old value to *pu compare with.
2353 * @param fRc Where to store the result.
2354 */
2355#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2356 do { \
2357 switch (sizeof(*(pu))) { \
2358 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2359 break; \
2360 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2361 break; \
2362 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2363 (fRc) = false; \
2364 break; \
2365 } \
2366 } while (0)
2367
2368
2369/**
2370 * Atomically Compare and Exchange a pointer value.
2371 *
2372 * @returns true if xchg was done.
2373 * @returns false if xchg wasn't done.
2374 *
2375 * @param ppv Pointer to the value to update.
2376 * @param pvNew The new value to assigned to *ppv.
2377 * @param pvOld The old value to *ppv compare with.
2378 */
2379DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2380{
2381#if ARCH_BITS == 32
2382 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2383#elif ARCH_BITS == 64
2384 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2385#else
2386# error "ARCH_BITS is bogus"
2387#endif
2388}
2389
2390
2391/**
2392 * Atomically increment a 32-bit value.
2393 *
2394 * @returns The new value.
2395 * @param pu32 Pointer to the value to increment.
2396 */
2397#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2398DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2399#else
2400DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2401{
2402 uint32_t u32;
2403# if RT_INLINE_ASM_USES_INTRIN
2404 u32 = _InterlockedIncrement((long *)pu32);
2405
2406# elif RT_INLINE_ASM_GNU_STYLE
2407 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2408 "incl %0\n\t"
2409 : "=r" (u32),
2410 "=m" (*pu32)
2411 : "0" (1)
2412 : "memory");
2413# else
2414 __asm
2415 {
2416 mov eax, 1
2417# ifdef RT_ARCH_AMD64
2418 mov rdx, [pu32]
2419 lock xadd [rdx], eax
2420# else
2421 mov edx, [pu32]
2422 lock xadd [edx], eax
2423# endif
2424 inc eax
2425 mov u32, eax
2426 }
2427# endif
2428 return u32;
2429}
2430#endif
2431
2432
2433/**
2434 * Atomically increment a signed 32-bit value.
2435 *
2436 * @returns The new value.
2437 * @param pi32 Pointer to the value to increment.
2438 */
2439DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2440{
2441 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2442}
2443
2444
2445/**
2446 * Atomically decrement an unsigned 32-bit value.
2447 *
2448 * @returns The new value.
2449 * @param pu32 Pointer to the value to decrement.
2450 */
2451#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2452DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2453#else
2454DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2455{
2456 uint32_t u32;
2457# if RT_INLINE_ASM_USES_INTRIN
2458 u32 = _InterlockedDecrement((long *)pu32);
2459
2460# elif RT_INLINE_ASM_GNU_STYLE
2461 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2462 "decl %0\n\t"
2463 : "=r" (u32),
2464 "=m" (*pu32)
2465 : "0" (-1)
2466 : "memory");
2467# else
2468 __asm
2469 {
2470 mov eax, -1
2471# ifdef RT_ARCH_AMD64
2472 mov rdx, [pu32]
2473 lock xadd [rdx], eax
2474# else
2475 mov edx, [pu32]
2476 lock xadd [edx], eax
2477# endif
2478 dec eax
2479 mov u32, eax
2480 }
2481# endif
2482 return u32;
2483}
2484#endif
2485
2486
2487/**
2488 * Atomically decrement a signed 32-bit value.
2489 *
2490 * @returns The new value.
2491 * @param pi32 Pointer to the value to decrement.
2492 */
2493DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2494{
2495 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2496}
2497
2498
2499/**
2500 * Atomically Or an unsigned 32-bit value.
2501 *
2502 * @param pu32 Pointer to the pointer variable to OR u32 with.
2503 * @param u32 The value to OR *pu32 with.
2504 */
2505#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2506DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2507#else
2508DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2509{
2510# if RT_INLINE_ASM_USES_INTRIN
2511 _InterlockedOr((long volatile *)pu32, (long)u32);
2512
2513# elif RT_INLINE_ASM_GNU_STYLE
2514 __asm__ __volatile__("lock; orl %1, %0\n\t"
2515 : "=m" (*pu32)
2516 : "r" (u32));
2517# else
2518 __asm
2519 {
2520 mov eax, [u32]
2521# ifdef RT_ARCH_AMD64
2522 mov rdx, [pu32]
2523 lock or [rdx], eax
2524# else
2525 mov edx, [pu32]
2526 lock or [edx], eax
2527# endif
2528 }
2529# endif
2530}
2531#endif
2532
2533
2534/**
2535 * Atomically Or a signed 32-bit value.
2536 *
2537 * @param pi32 Pointer to the pointer variable to OR u32 with.
2538 * @param i32 The value to OR *pu32 with.
2539 */
2540DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2541{
2542 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2543}
2544
2545
2546/**
2547 * Atomically And an unsigned 32-bit value.
2548 *
2549 * @param pu32 Pointer to the pointer variable to AND u32 with.
2550 * @param u32 The value to AND *pu32 with.
2551 */
2552#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2553DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2554#else
2555DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2556{
2557# if RT_INLINE_ASM_USES_INTRIN
2558 _InterlockedAnd((long volatile *)pu32, u32);
2559
2560# elif RT_INLINE_ASM_GNU_STYLE
2561 __asm__ __volatile__("lock; andl %1, %0\n\t"
2562 : "=m" (*pu32)
2563 : "r" (u32));
2564# else
2565 __asm
2566 {
2567 mov eax, [u32]
2568# ifdef RT_ARCH_AMD64
2569 mov rdx, [pu32]
2570 lock and [rdx], eax
2571# else
2572 mov edx, [pu32]
2573 lock and [edx], eax
2574# endif
2575 }
2576# endif
2577}
2578#endif
2579
2580
2581/**
2582 * Atomically And a signed 32-bit value.
2583 *
2584 * @param pi32 Pointer to the pointer variable to AND i32 with.
2585 * @param i32 The value to AND *pi32 with.
2586 */
2587DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2588{
2589 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2590}
2591
2592
2593/**
2594 * Invalidate page.
2595 *
2596 * @param pv Address of the page to invalidate.
2597 */
2598#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2599DECLASM(void) ASMInvalidatePage(void *pv);
2600#else
2601DECLINLINE(void) ASMInvalidatePage(void *pv)
2602{
2603# if RT_INLINE_ASM_USES_INTRIN
2604 __invlpg(pv);
2605
2606# elif RT_INLINE_ASM_GNU_STYLE
2607 __asm__ __volatile__("invlpg %0\n\t"
2608 : : "m" (*(uint8_t *)pv));
2609# else
2610 __asm
2611 {
2612# ifdef RT_ARCH_AMD64
2613 mov rax, [pv]
2614 invlpg [rax]
2615# else
2616 mov eax, [pv]
2617 invlpg [eax]
2618# endif
2619 }
2620# endif
2621}
2622#endif
2623
2624
2625#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2626# if PAGE_SIZE != 0x1000
2627# error "PAGE_SIZE is not 0x1000!"
2628# endif
2629#endif
2630
2631/**
2632 * Zeros a 4K memory page.
2633 *
2634 * @param pv Pointer to the memory block. This must be page aligned.
2635 */
2636#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2637DECLASM(void) ASMMemZeroPage(volatile void *pv);
2638# else
2639DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2640{
2641# if RT_INLINE_ASM_USES_INTRIN
2642# ifdef RT_ARCH_AMD64
2643 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2644# else
2645 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2646# endif
2647
2648# elif RT_INLINE_ASM_GNU_STYLE
2649 RTUINTREG uDummy;
2650# ifdef RT_ARCH_AMD64
2651 __asm__ __volatile__ ("rep stosq"
2652 : "=D" (pv),
2653 "=c" (uDummy)
2654 : "0" (pv),
2655 "c" (0x1000 >> 3),
2656 "a" (0)
2657 : "memory");
2658# else
2659 __asm__ __volatile__ ("rep stosl"
2660 : "=D" (pv),
2661 "=c" (uDummy)
2662 : "0" (pv),
2663 "c" (0x1000 >> 2),
2664 "a" (0)
2665 : "memory");
2666# endif
2667# else
2668 __asm
2669 {
2670# ifdef RT_ARCH_AMD64
2671 xor rax, rax
2672 mov ecx, 0200h
2673 mov rdi, [pv]
2674 rep stosq
2675# else
2676 xor eax, eax
2677 mov ecx, 0400h
2678 mov edi, [pv]
2679 rep stosd
2680# endif
2681 }
2682# endif
2683}
2684# endif
2685
2686
2687/**
2688 * Zeros a memory block with a 32-bit aligned size.
2689 *
2690 * @param pv Pointer to the memory block.
2691 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2692 */
2693#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2694DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2695#else
2696DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2697{
2698# if RT_INLINE_ASM_USES_INTRIN
2699 __stosd((unsigned long *)pv, 0, cb >> 2);
2700
2701# elif RT_INLINE_ASM_GNU_STYLE
2702 __asm__ __volatile__ ("rep stosl"
2703 : "=D" (pv),
2704 "=c" (cb)
2705 : "0" (pv),
2706 "1" (cb >> 2),
2707 "a" (0)
2708 : "memory");
2709# else
2710 __asm
2711 {
2712 xor eax, eax
2713# ifdef RT_ARCH_AMD64
2714 mov rcx, [cb]
2715 shr rcx, 2
2716 mov rdi, [pv]
2717# else
2718 mov ecx, [cb]
2719 shr ecx, 2
2720 mov edi, [pv]
2721# endif
2722 rep stosd
2723 }
2724# endif
2725}
2726#endif
2727
2728
2729/**
2730 * Fills a memory block with a 32-bit aligned size.
2731 *
2732 * @param pv Pointer to the memory block.
2733 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2734 * @param u32 The value to fill with.
2735 */
2736#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2737DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2738#else
2739DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2740{
2741# if RT_INLINE_ASM_USES_INTRIN
2742 __stosd((unsigned long *)pv, 0, cb >> 2);
2743
2744# elif RT_INLINE_ASM_GNU_STYLE
2745 __asm__ __volatile__ ("rep stosl"
2746 : "=D" (pv),
2747 "=c" (cb)
2748 : "0" (pv),
2749 "1" (cb >> 2),
2750 "a" (u32)
2751 : "memory");
2752# else
2753 __asm
2754 {
2755# ifdef RT_ARCH_AMD64
2756 mov rcx, [cb]
2757 shr rcx, 2
2758 mov rdi, [pv]
2759# else
2760 mov ecx, [cb]
2761 shr ecx, 2
2762 mov edi, [pv]
2763# endif
2764 mov eax, [u32]
2765 rep stosd
2766 }
2767# endif
2768}
2769#endif
2770
2771
2772/**
2773 * Checks if a memory block is filled with the specified byte.
2774 *
2775 * This is a sort of inverted memchr.
2776 *
2777 * @returns Pointer to the byte which doesn't equal u8.
2778 * @returns NULL if all equal to u8.
2779 *
2780 * @param pv Pointer to the memory block.
2781 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2782 * @param u8 The value it's supposed to be filled with.
2783 */
2784#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2785DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
2786#else
2787DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
2788{
2789/** @todo rewrite this in inline assembly. */
2790 uint8_t const *pb = (uint8_t const *)pv;
2791 for (; cb; cb--, pb++)
2792 if (RT_UNLIKELY(*pb != u8))
2793 return (void *)pb;
2794 return NULL;
2795}
2796#endif
2797
2798
2799
2800/**
2801 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2802 *
2803 * @returns u32F1 * u32F2.
2804 */
2805#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2806DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2807#else
2808DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2809{
2810# ifdef RT_ARCH_AMD64
2811 return (uint64_t)u32F1 * u32F2;
2812# else /* !RT_ARCH_AMD64 */
2813 uint64_t u64;
2814# if RT_INLINE_ASM_GNU_STYLE
2815 __asm__ __volatile__("mull %%edx"
2816 : "=A" (u64)
2817 : "a" (u32F2), "d" (u32F1));
2818# else
2819 __asm
2820 {
2821 mov edx, [u32F1]
2822 mov eax, [u32F2]
2823 mul edx
2824 mov dword ptr [u64], eax
2825 mov dword ptr [u64 + 4], edx
2826 }
2827# endif
2828 return u64;
2829# endif /* !RT_ARCH_AMD64 */
2830}
2831#endif
2832
2833
2834/**
2835 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2836 *
2837 * @returns u32F1 * u32F2.
2838 */
2839#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2840DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2841#else
2842DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2843{
2844# ifdef RT_ARCH_AMD64
2845 return (int64_t)i32F1 * i32F2;
2846# else /* !RT_ARCH_AMD64 */
2847 int64_t i64;
2848# if RT_INLINE_ASM_GNU_STYLE
2849 __asm__ __volatile__("imull %%edx"
2850 : "=A" (i64)
2851 : "a" (i32F2), "d" (i32F1));
2852# else
2853 __asm
2854 {
2855 mov edx, [i32F1]
2856 mov eax, [i32F2]
2857 imul edx
2858 mov dword ptr [i64], eax
2859 mov dword ptr [i64 + 4], edx
2860 }
2861# endif
2862 return i64;
2863# endif /* !RT_ARCH_AMD64 */
2864}
2865#endif
2866
2867
2868/**
2869 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2870 *
2871 * @returns u64 / u32.
2872 */
2873#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2874DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2875#else
2876DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2877{
2878# ifdef RT_ARCH_AMD64
2879 return (uint32_t)(u64 / u32);
2880# else /* !RT_ARCH_AMD64 */
2881# if RT_INLINE_ASM_GNU_STYLE
2882 RTUINTREG uDummy;
2883 __asm__ __volatile__("divl %3"
2884 : "=a" (u32), "=d"(uDummy)
2885 : "A" (u64), "r" (u32));
2886# else
2887 __asm
2888 {
2889 mov eax, dword ptr [u64]
2890 mov edx, dword ptr [u64 + 4]
2891 mov ecx, [u32]
2892 div ecx
2893 mov [u32], eax
2894 }
2895# endif
2896 return u32;
2897# endif /* !RT_ARCH_AMD64 */
2898}
2899#endif
2900
2901
2902/**
2903 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2904 *
2905 * @returns u64 / u32.
2906 */
2907#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2908DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2909#else
2910DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2911{
2912# ifdef RT_ARCH_AMD64
2913 return (int32_t)(i64 / i32);
2914# else /* !RT_ARCH_AMD64 */
2915# if RT_INLINE_ASM_GNU_STYLE
2916 RTUINTREG iDummy;
2917 __asm__ __volatile__("idivl %3"
2918 : "=a" (i32), "=d"(iDummy)
2919 : "A" (i64), "r" (i32));
2920# else
2921 __asm
2922 {
2923 mov eax, dword ptr [i64]
2924 mov edx, dword ptr [i64 + 4]
2925 mov ecx, [i32]
2926 idiv ecx
2927 mov [i32], eax
2928 }
2929# endif
2930 return i32;
2931# endif /* !RT_ARCH_AMD64 */
2932}
2933#endif
2934
2935
2936/**
2937 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
2938 * using a 96 bit intermediate result.
2939 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
2940 * __udivdi3 and __umoddi3 even if this inline function is not used.
2941 *
2942 * @returns (u64A * u32B) / u32C.
2943 * @param u64A The 64-bit value.
2944 * @param u32B The 32-bit value to multiple by A.
2945 * @param u32C The 32-bit value to divide A*B by.
2946 */
2947#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
2948DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
2949#else
2950DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
2951{
2952# if RT_INLINE_ASM_GNU_STYLE
2953# ifdef RT_ARCH_AMD64
2954 uint64_t u64Result, u64Spill;
2955 __asm__ __volatile__("mulq %2\n\t"
2956 "divq %3\n\t"
2957 : "=a" (u64Result),
2958 "=d" (u64Spill)
2959 : "r" ((uint64_t)u32B),
2960 "r" ((uint64_t)u32C),
2961 "0" (u64A),
2962 "1" (0));
2963 return u64Result;
2964# else
2965 uint32_t u32Dummy;
2966 uint64_t u64Result;
2967 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
2968 edx = u64Lo.hi = (u64A.lo * u32B).hi */
2969 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
2970 eax = u64A.hi */
2971 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
2972 edx = u32C */
2973 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
2974 edx = u32B */
2975 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
2976 edx = u64Hi.hi = (u64A.hi * u32B).hi */
2977 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
2978 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
2979 "divl %%ecx \n\t" /* eax = u64Hi / u32C
2980 edx = u64Hi % u32C */
2981 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
2982 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
2983 "divl %%ecx \n\t" /* u64Result.lo */
2984 "movl %%edi,%%edx \n\t" /* u64Result.hi */
2985 : "=A"(u64Result), "=c"(u32Dummy),
2986 "=S"(u32Dummy), "=D"(u32Dummy)
2987 : "a"((uint32_t)u64A),
2988 "S"((uint32_t)(u64A >> 32)),
2989 "c"(u32B),
2990 "D"(u32C));
2991 return u64Result;
2992# endif
2993# else
2994 RTUINT64U u;
2995 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
2996 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
2997 u64Hi += (u64Lo >> 32);
2998 u.s.Hi = (uint32_t)(u64Hi / u32C);
2999 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
3000 return u.u;
3001# endif
3002}
3003#endif
3004
3005
3006/**
3007 * Probes a byte pointer for read access.
3008 *
3009 * While the function will not fault if the byte is not read accessible,
3010 * the idea is to do this in a safe place like before acquiring locks
3011 * and such like.
3012 *
3013 * Also, this functions guarantees that an eager compiler is not going
3014 * to optimize the probing away.
3015 *
3016 * @param pvByte Pointer to the byte.
3017 */
3018#if RT_INLINE_ASM_EXTERNAL
3019DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3020#else
3021DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3022{
3023 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3024 uint8_t u8;
3025# if RT_INLINE_ASM_GNU_STYLE
3026 __asm__ __volatile__("movb (%1), %0\n\t"
3027 : "=r" (u8)
3028 : "r" (pvByte));
3029# else
3030 __asm
3031 {
3032# ifdef RT_ARCH_AMD64
3033 mov rax, [pvByte]
3034 mov al, [rax]
3035# else
3036 mov eax, [pvByte]
3037 mov al, [eax]
3038# endif
3039 mov [u8], al
3040 }
3041# endif
3042 return u8;
3043}
3044#endif
3045
3046/**
3047 * Probes a buffer for read access page by page.
3048 *
3049 * While the function will fault if the buffer is not fully read
3050 * accessible, the idea is to do this in a safe place like before
3051 * acquiring locks and such like.
3052 *
3053 * Also, this functions guarantees that an eager compiler is not going
3054 * to optimize the probing away.
3055 *
3056 * @param pvBuf Pointer to the buffer.
3057 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3058 */
3059DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3060{
3061 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3062 /* the first byte */
3063 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3064 ASMProbeReadByte(pu8);
3065
3066 /* the pages in between pages. */
3067 while (cbBuf > /*PAGE_SIZE*/0x1000)
3068 {
3069 ASMProbeReadByte(pu8);
3070 cbBuf -= /*PAGE_SIZE*/0x1000;
3071 pu8 += /*PAGE_SIZE*/0x1000;
3072 }
3073
3074 /* the last byte */
3075 ASMProbeReadByte(pu8 + cbBuf - 1);
3076}
3077
3078
3079/** @def ASMBreakpoint
3080 * Debugger Breakpoint.
3081 * @remark In the gnu world we add a nop instruction after the int3 to
3082 * force gdb to remain at the int3 source line.
3083 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3084 * @internal
3085 */
3086#if RT_INLINE_ASM_GNU_STYLE
3087# ifndef __L4ENV__
3088# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3089# else
3090# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3091# endif
3092#else
3093# define ASMBreakpoint() __debugbreak()
3094#endif
3095
3096
3097
3098/** @defgroup grp_inline_bits Bit Operations
3099 * @{
3100 */
3101
3102
3103/**
3104 * Sets a bit in a bitmap.
3105 *
3106 * @param pvBitmap Pointer to the bitmap.
3107 * @param iBit The bit to set.
3108 */
3109#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3110DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3111#else
3112DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3113{
3114# if RT_INLINE_ASM_USES_INTRIN
3115 _bittestandset((long *)pvBitmap, iBit);
3116
3117# elif RT_INLINE_ASM_GNU_STYLE
3118 __asm__ __volatile__ ("btsl %1, %0"
3119 : "=m" (*(volatile long *)pvBitmap)
3120 : "Ir" (iBit)
3121 : "memory");
3122# else
3123 __asm
3124 {
3125# ifdef RT_ARCH_AMD64
3126 mov rax, [pvBitmap]
3127 mov edx, [iBit]
3128 bts [rax], edx
3129# else
3130 mov eax, [pvBitmap]
3131 mov edx, [iBit]
3132 bts [eax], edx
3133# endif
3134 }
3135# endif
3136}
3137#endif
3138
3139
3140/**
3141 * Atomically sets a bit in a bitmap.
3142 *
3143 * @param pvBitmap Pointer to the bitmap.
3144 * @param iBit The bit to set.
3145 */
3146#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3147DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3148#else
3149DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3150{
3151# if RT_INLINE_ASM_USES_INTRIN
3152 _interlockedbittestandset((long *)pvBitmap, iBit);
3153# elif RT_INLINE_ASM_GNU_STYLE
3154 __asm__ __volatile__ ("lock; btsl %1, %0"
3155 : "=m" (*(volatile long *)pvBitmap)
3156 : "Ir" (iBit)
3157 : "memory");
3158# else
3159 __asm
3160 {
3161# ifdef RT_ARCH_AMD64
3162 mov rax, [pvBitmap]
3163 mov edx, [iBit]
3164 lock bts [rax], edx
3165# else
3166 mov eax, [pvBitmap]
3167 mov edx, [iBit]
3168 lock bts [eax], edx
3169# endif
3170 }
3171# endif
3172}
3173#endif
3174
3175
3176/**
3177 * Clears a bit in a bitmap.
3178 *
3179 * @param pvBitmap Pointer to the bitmap.
3180 * @param iBit The bit to clear.
3181 */
3182#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3183DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3184#else
3185DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3186{
3187# if RT_INLINE_ASM_USES_INTRIN
3188 _bittestandreset((long *)pvBitmap, iBit);
3189
3190# elif RT_INLINE_ASM_GNU_STYLE
3191 __asm__ __volatile__ ("btrl %1, %0"
3192 : "=m" (*(volatile long *)pvBitmap)
3193 : "Ir" (iBit)
3194 : "memory");
3195# else
3196 __asm
3197 {
3198# ifdef RT_ARCH_AMD64
3199 mov rax, [pvBitmap]
3200 mov edx, [iBit]
3201 btr [rax], edx
3202# else
3203 mov eax, [pvBitmap]
3204 mov edx, [iBit]
3205 btr [eax], edx
3206# endif
3207 }
3208# endif
3209}
3210#endif
3211
3212
3213/**
3214 * Atomically clears a bit in a bitmap.
3215 *
3216 * @param pvBitmap Pointer to the bitmap.
3217 * @param iBit The bit to toggle set.
3218 * @remark No memory barrier, take care on smp.
3219 */
3220#if RT_INLINE_ASM_EXTERNAL
3221DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3222#else
3223DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3224{
3225# if RT_INLINE_ASM_GNU_STYLE
3226 __asm__ __volatile__ ("lock; btrl %1, %0"
3227 : "=m" (*(volatile long *)pvBitmap)
3228 : "Ir" (iBit)
3229 : "memory");
3230# else
3231 __asm
3232 {
3233# ifdef RT_ARCH_AMD64
3234 mov rax, [pvBitmap]
3235 mov edx, [iBit]
3236 lock btr [rax], edx
3237# else
3238 mov eax, [pvBitmap]
3239 mov edx, [iBit]
3240 lock btr [eax], edx
3241# endif
3242 }
3243# endif
3244}
3245#endif
3246
3247
3248/**
3249 * Toggles a bit in a bitmap.
3250 *
3251 * @param pvBitmap Pointer to the bitmap.
3252 * @param iBit The bit to toggle.
3253 */
3254#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3255DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3256#else
3257DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3258{
3259# if RT_INLINE_ASM_USES_INTRIN
3260 _bittestandcomplement((long *)pvBitmap, iBit);
3261# elif RT_INLINE_ASM_GNU_STYLE
3262 __asm__ __volatile__ ("btcl %1, %0"
3263 : "=m" (*(volatile long *)pvBitmap)
3264 : "Ir" (iBit)
3265 : "memory");
3266# else
3267 __asm
3268 {
3269# ifdef RT_ARCH_AMD64
3270 mov rax, [pvBitmap]
3271 mov edx, [iBit]
3272 btc [rax], edx
3273# else
3274 mov eax, [pvBitmap]
3275 mov edx, [iBit]
3276 btc [eax], edx
3277# endif
3278 }
3279# endif
3280}
3281#endif
3282
3283
3284/**
3285 * Atomically toggles a bit in a bitmap.
3286 *
3287 * @param pvBitmap Pointer to the bitmap.
3288 * @param iBit The bit to test and set.
3289 */
3290#if RT_INLINE_ASM_EXTERNAL
3291DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3292#else
3293DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3294{
3295# if RT_INLINE_ASM_GNU_STYLE
3296 __asm__ __volatile__ ("lock; btcl %1, %0"
3297 : "=m" (*(volatile long *)pvBitmap)
3298 : "Ir" (iBit)
3299 : "memory");
3300# else
3301 __asm
3302 {
3303# ifdef RT_ARCH_AMD64
3304 mov rax, [pvBitmap]
3305 mov edx, [iBit]
3306 lock btc [rax], edx
3307# else
3308 mov eax, [pvBitmap]
3309 mov edx, [iBit]
3310 lock btc [eax], edx
3311# endif
3312 }
3313# endif
3314}
3315#endif
3316
3317
3318/**
3319 * Tests and sets a bit in a bitmap.
3320 *
3321 * @returns true if the bit was set.
3322 * @returns false if the bit was clear.
3323 * @param pvBitmap Pointer to the bitmap.
3324 * @param iBit The bit to test and set.
3325 */
3326#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3327DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3328#else
3329DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3330{
3331 union { bool f; uint32_t u32; uint8_t u8; } rc;
3332# if RT_INLINE_ASM_USES_INTRIN
3333 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3334
3335# elif RT_INLINE_ASM_GNU_STYLE
3336 __asm__ __volatile__ ("btsl %2, %1\n\t"
3337 "setc %b0\n\t"
3338 "andl $1, %0\n\t"
3339 : "=q" (rc.u32),
3340 "=m" (*(volatile long *)pvBitmap)
3341 : "Ir" (iBit)
3342 : "memory");
3343# else
3344 __asm
3345 {
3346 mov edx, [iBit]
3347# ifdef RT_ARCH_AMD64
3348 mov rax, [pvBitmap]
3349 bts [rax], edx
3350# else
3351 mov eax, [pvBitmap]
3352 bts [eax], edx
3353# endif
3354 setc al
3355 and eax, 1
3356 mov [rc.u32], eax
3357 }
3358# endif
3359 return rc.f;
3360}
3361#endif
3362
3363
3364/**
3365 * Atomically tests and sets a bit in a bitmap.
3366 *
3367 * @returns true if the bit was set.
3368 * @returns false if the bit was clear.
3369 * @param pvBitmap Pointer to the bitmap.
3370 * @param iBit The bit to set.
3371 */
3372#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3373DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3374#else
3375DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3376{
3377 union { bool f; uint32_t u32; uint8_t u8; } rc;
3378# if RT_INLINE_ASM_USES_INTRIN
3379 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3380# elif RT_INLINE_ASM_GNU_STYLE
3381 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3382 "setc %b0\n\t"
3383 "andl $1, %0\n\t"
3384 : "=q" (rc.u32),
3385 "=m" (*(volatile long *)pvBitmap)
3386 : "Ir" (iBit)
3387 : "memory");
3388# else
3389 __asm
3390 {
3391 mov edx, [iBit]
3392# ifdef RT_ARCH_AMD64
3393 mov rax, [pvBitmap]
3394 lock bts [rax], edx
3395# else
3396 mov eax, [pvBitmap]
3397 lock bts [eax], edx
3398# endif
3399 setc al
3400 and eax, 1
3401 mov [rc.u32], eax
3402 }
3403# endif
3404 return rc.f;
3405}
3406#endif
3407
3408
3409/**
3410 * Tests and clears a bit in a bitmap.
3411 *
3412 * @returns true if the bit was set.
3413 * @returns false if the bit was clear.
3414 * @param pvBitmap Pointer to the bitmap.
3415 * @param iBit The bit to test and clear.
3416 */
3417#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3418DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3419#else
3420DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3421{
3422 union { bool f; uint32_t u32; uint8_t u8; } rc;
3423# if RT_INLINE_ASM_USES_INTRIN
3424 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3425
3426# elif RT_INLINE_ASM_GNU_STYLE
3427 __asm__ __volatile__ ("btrl %2, %1\n\t"
3428 "setc %b0\n\t"
3429 "andl $1, %0\n\t"
3430 : "=q" (rc.u32),
3431 "=m" (*(volatile long *)pvBitmap)
3432 : "Ir" (iBit)
3433 : "memory");
3434# else
3435 __asm
3436 {
3437 mov edx, [iBit]
3438# ifdef RT_ARCH_AMD64
3439 mov rax, [pvBitmap]
3440 btr [rax], edx
3441# else
3442 mov eax, [pvBitmap]
3443 btr [eax], edx
3444# endif
3445 setc al
3446 and eax, 1
3447 mov [rc.u32], eax
3448 }
3449# endif
3450 return rc.f;
3451}
3452#endif
3453
3454
3455/**
3456 * Atomically tests and clears a bit in a bitmap.
3457 *
3458 * @returns true if the bit was set.
3459 * @returns false if the bit was clear.
3460 * @param pvBitmap Pointer to the bitmap.
3461 * @param iBit The bit to test and clear.
3462 * @remark No memory barrier, take care on smp.
3463 */
3464#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3465DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3466#else
3467DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3468{
3469 union { bool f; uint32_t u32; uint8_t u8; } rc;
3470# if RT_INLINE_ASM_USES_INTRIN
3471 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3472
3473# elif RT_INLINE_ASM_GNU_STYLE
3474 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3475 "setc %b0\n\t"
3476 "andl $1, %0\n\t"
3477 : "=q" (rc.u32),
3478 "=m" (*(volatile long *)pvBitmap)
3479 : "Ir" (iBit)
3480 : "memory");
3481# else
3482 __asm
3483 {
3484 mov edx, [iBit]
3485# ifdef RT_ARCH_AMD64
3486 mov rax, [pvBitmap]
3487 lock btr [rax], edx
3488# else
3489 mov eax, [pvBitmap]
3490 lock btr [eax], edx
3491# endif
3492 setc al
3493 and eax, 1
3494 mov [rc.u32], eax
3495 }
3496# endif
3497 return rc.f;
3498}
3499#endif
3500
3501
3502/**
3503 * Tests and toggles a bit in a bitmap.
3504 *
3505 * @returns true if the bit was set.
3506 * @returns false if the bit was clear.
3507 * @param pvBitmap Pointer to the bitmap.
3508 * @param iBit The bit to test and toggle.
3509 */
3510#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3511DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3512#else
3513DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3514{
3515 union { bool f; uint32_t u32; uint8_t u8; } rc;
3516# if RT_INLINE_ASM_USES_INTRIN
3517 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3518
3519# elif RT_INLINE_ASM_GNU_STYLE
3520 __asm__ __volatile__ ("btcl %2, %1\n\t"
3521 "setc %b0\n\t"
3522 "andl $1, %0\n\t"
3523 : "=q" (rc.u32),
3524 "=m" (*(volatile long *)pvBitmap)
3525 : "Ir" (iBit)
3526 : "memory");
3527# else
3528 __asm
3529 {
3530 mov edx, [iBit]
3531# ifdef RT_ARCH_AMD64
3532 mov rax, [pvBitmap]
3533 btc [rax], edx
3534# else
3535 mov eax, [pvBitmap]
3536 btc [eax], edx
3537# endif
3538 setc al
3539 and eax, 1
3540 mov [rc.u32], eax
3541 }
3542# endif
3543 return rc.f;
3544}
3545#endif
3546
3547
3548/**
3549 * Atomically tests and toggles a bit in a bitmap.
3550 *
3551 * @returns true if the bit was set.
3552 * @returns false if the bit was clear.
3553 * @param pvBitmap Pointer to the bitmap.
3554 * @param iBit The bit to test and toggle.
3555 */
3556#if RT_INLINE_ASM_EXTERNAL
3557DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3558#else
3559DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3560{
3561 union { bool f; uint32_t u32; uint8_t u8; } rc;
3562# if RT_INLINE_ASM_GNU_STYLE
3563 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3564 "setc %b0\n\t"
3565 "andl $1, %0\n\t"
3566 : "=q" (rc.u32),
3567 "=m" (*(volatile long *)pvBitmap)
3568 : "Ir" (iBit)
3569 : "memory");
3570# else
3571 __asm
3572 {
3573 mov edx, [iBit]
3574# ifdef RT_ARCH_AMD64
3575 mov rax, [pvBitmap]
3576 lock btc [rax], edx
3577# else
3578 mov eax, [pvBitmap]
3579 lock btc [eax], edx
3580# endif
3581 setc al
3582 and eax, 1
3583 mov [rc.u32], eax
3584 }
3585# endif
3586 return rc.f;
3587}
3588#endif
3589
3590
3591/**
3592 * Tests if a bit in a bitmap is set.
3593 *
3594 * @returns true if the bit is set.
3595 * @returns false if the bit is clear.
3596 * @param pvBitmap Pointer to the bitmap.
3597 * @param iBit The bit to test.
3598 */
3599#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3600DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3601#else
3602DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3603{
3604 union { bool f; uint32_t u32; uint8_t u8; } rc;
3605# if RT_INLINE_ASM_USES_INTRIN
3606 rc.u32 = _bittest((long *)pvBitmap, iBit);
3607# elif RT_INLINE_ASM_GNU_STYLE
3608
3609 __asm__ __volatile__ ("btl %2, %1\n\t"
3610 "setc %b0\n\t"
3611 "andl $1, %0\n\t"
3612 : "=q" (rc.u32),
3613 "=m" (*(volatile long *)pvBitmap)
3614 : "Ir" (iBit)
3615 : "memory");
3616# else
3617 __asm
3618 {
3619 mov edx, [iBit]
3620# ifdef RT_ARCH_AMD64
3621 mov rax, [pvBitmap]
3622 bt [rax], edx
3623# else
3624 mov eax, [pvBitmap]
3625 bt [eax], edx
3626# endif
3627 setc al
3628 and eax, 1
3629 mov [rc.u32], eax
3630 }
3631# endif
3632 return rc.f;
3633}
3634#endif
3635
3636
3637/**
3638 * Clears a bit range within a bitmap.
3639 *
3640 * @param pvBitmap Pointer to the bitmap.
3641 * @param iBitStart The First bit to clear.
3642 * @param iBitEnd The first bit not to clear.
3643 */
3644DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3645{
3646 if (iBitStart < iBitEnd)
3647 {
3648 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3649 int iStart = iBitStart & ~31;
3650 int iEnd = iBitEnd & ~31;
3651 if (iStart == iEnd)
3652 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3653 else
3654 {
3655 /* bits in first dword. */
3656 if (iBitStart & 31)
3657 {
3658 *pu32 &= (1 << (iBitStart & 31)) - 1;
3659 pu32++;
3660 iBitStart = iStart + 32;
3661 }
3662
3663 /* whole dword. */
3664 if (iBitStart != iEnd)
3665 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3666
3667 /* bits in last dword. */
3668 if (iBitEnd & 31)
3669 {
3670 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3671 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3672 }
3673 }
3674 }
3675}
3676
3677
3678/**
3679 * Finds the first clear bit in a bitmap.
3680 *
3681 * @returns Index of the first zero bit.
3682 * @returns -1 if no clear bit was found.
3683 * @param pvBitmap Pointer to the bitmap.
3684 * @param cBits The number of bits in the bitmap. Multiple of 32.
3685 */
3686#if RT_INLINE_ASM_EXTERNAL
3687DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3688#else
3689DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3690{
3691 if (cBits)
3692 {
3693 int32_t iBit;
3694# if RT_INLINE_ASM_GNU_STYLE
3695 RTCCUINTREG uEAX, uECX, uEDI;
3696 cBits = RT_ALIGN_32(cBits, 32);
3697 __asm__ __volatile__("repe; scasl\n\t"
3698 "je 1f\n\t"
3699# ifdef RT_ARCH_AMD64
3700 "lea -4(%%rdi), %%rdi\n\t"
3701 "xorl (%%rdi), %%eax\n\t"
3702 "subq %5, %%rdi\n\t"
3703# else
3704 "lea -4(%%edi), %%edi\n\t"
3705 "xorl (%%edi), %%eax\n\t"
3706 "subl %5, %%edi\n\t"
3707# endif
3708 "shll $3, %%edi\n\t"
3709 "bsfl %%eax, %%edx\n\t"
3710 "addl %%edi, %%edx\n\t"
3711 "1:\t\n"
3712 : "=d" (iBit),
3713 "=&c" (uECX),
3714 "=&D" (uEDI),
3715 "=&a" (uEAX)
3716 : "0" (0xffffffff),
3717 "mr" (pvBitmap),
3718 "1" (cBits >> 5),
3719 "2" (pvBitmap),
3720 "3" (0xffffffff));
3721# else
3722 cBits = RT_ALIGN_32(cBits, 32);
3723 __asm
3724 {
3725# ifdef RT_ARCH_AMD64
3726 mov rdi, [pvBitmap]
3727 mov rbx, rdi
3728# else
3729 mov edi, [pvBitmap]
3730 mov ebx, edi
3731# endif
3732 mov edx, 0ffffffffh
3733 mov eax, edx
3734 mov ecx, [cBits]
3735 shr ecx, 5
3736 repe scasd
3737 je done
3738
3739# ifdef RT_ARCH_AMD64
3740 lea rdi, [rdi - 4]
3741 xor eax, [rdi]
3742 sub rdi, rbx
3743# else
3744 lea edi, [edi - 4]
3745 xor eax, [edi]
3746 sub edi, ebx
3747# endif
3748 shl edi, 3
3749 bsf edx, eax
3750 add edx, edi
3751 done:
3752 mov [iBit], edx
3753 }
3754# endif
3755 return iBit;
3756 }
3757 return -1;
3758}
3759#endif
3760
3761
3762/**
3763 * Finds the next clear bit in a bitmap.
3764 *
3765 * @returns Index of the first zero bit.
3766 * @returns -1 if no clear bit was found.
3767 * @param pvBitmap Pointer to the bitmap.
3768 * @param cBits The number of bits in the bitmap. Multiple of 32.
3769 * @param iBitPrev The bit returned from the last search.
3770 * The search will start at iBitPrev + 1.
3771 */
3772#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3773DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3774#else
3775DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3776{
3777 int iBit = ++iBitPrev & 31;
3778 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3779 cBits -= iBitPrev & ~31;
3780 if (iBit)
3781 {
3782 /* inspect the first dword. */
3783 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3784# if RT_INLINE_ASM_USES_INTRIN
3785 unsigned long ulBit = 0;
3786 if (_BitScanForward(&ulBit, u32))
3787 return ulBit + iBitPrev;
3788 iBit = -1;
3789# else
3790# if RT_INLINE_ASM_GNU_STYLE
3791 __asm__ __volatile__("bsf %1, %0\n\t"
3792 "jnz 1f\n\t"
3793 "movl $-1, %0\n\t"
3794 "1:\n\t"
3795 : "=r" (iBit)
3796 : "r" (u32));
3797# else
3798 __asm
3799 {
3800 mov edx, [u32]
3801 bsf eax, edx
3802 jnz done
3803 mov eax, 0ffffffffh
3804 done:
3805 mov [iBit], eax
3806 }
3807# endif
3808 if (iBit >= 0)
3809 return iBit + iBitPrev;
3810# endif
3811 /* Search the rest of the bitmap, if there is anything. */
3812 if (cBits > 32)
3813 {
3814 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3815 if (iBit >= 0)
3816 return iBit + (iBitPrev & ~31) + 32;
3817 }
3818 }
3819 else
3820 {
3821 /* Search the rest of the bitmap. */
3822 iBit = ASMBitFirstClear(pvBitmap, cBits);
3823 if (iBit >= 0)
3824 return iBit + (iBitPrev & ~31);
3825 }
3826 return iBit;
3827}
3828#endif
3829
3830
3831/**
3832 * Finds the first set bit in a bitmap.
3833 *
3834 * @returns Index of the first set bit.
3835 * @returns -1 if no clear bit was found.
3836 * @param pvBitmap Pointer to the bitmap.
3837 * @param cBits The number of bits in the bitmap. Multiple of 32.
3838 */
3839#if RT_INLINE_ASM_EXTERNAL
3840DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3841#else
3842DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3843{
3844 if (cBits)
3845 {
3846 int32_t iBit;
3847# if RT_INLINE_ASM_GNU_STYLE
3848 RTCCUINTREG uEAX, uECX, uEDI;
3849 cBits = RT_ALIGN_32(cBits, 32);
3850 __asm__ __volatile__("repe; scasl\n\t"
3851 "je 1f\n\t"
3852# ifdef RT_ARCH_AMD64
3853 "lea -4(%%rdi), %%rdi\n\t"
3854 "movl (%%rdi), %%eax\n\t"
3855 "subq %5, %%rdi\n\t"
3856# else
3857 "lea -4(%%edi), %%edi\n\t"
3858 "movl (%%edi), %%eax\n\t"
3859 "subl %5, %%edi\n\t"
3860# endif
3861 "shll $3, %%edi\n\t"
3862 "bsfl %%eax, %%edx\n\t"
3863 "addl %%edi, %%edx\n\t"
3864 "1:\t\n"
3865 : "=d" (iBit),
3866 "=&c" (uECX),
3867 "=&D" (uEDI),
3868 "=&a" (uEAX)
3869 : "0" (0xffffffff),
3870 "mr" (pvBitmap),
3871 "1" (cBits >> 5),
3872 "2" (pvBitmap),
3873 "3" (0));
3874# else
3875 cBits = RT_ALIGN_32(cBits, 32);
3876 __asm
3877 {
3878# ifdef RT_ARCH_AMD64
3879 mov rdi, [pvBitmap]
3880 mov rbx, rdi
3881# else
3882 mov edi, [pvBitmap]
3883 mov ebx, edi
3884# endif
3885 mov edx, 0ffffffffh
3886 xor eax, eax
3887 mov ecx, [cBits]
3888 shr ecx, 5
3889 repe scasd
3890 je done
3891# ifdef RT_ARCH_AMD64
3892 lea rdi, [rdi - 4]
3893 mov eax, [rdi]
3894 sub rdi, rbx
3895# else
3896 lea edi, [edi - 4]
3897 mov eax, [edi]
3898 sub edi, ebx
3899# endif
3900 shl edi, 3
3901 bsf edx, eax
3902 add edx, edi
3903 done:
3904 mov [iBit], edx
3905 }
3906# endif
3907 return iBit;
3908 }
3909 return -1;
3910}
3911#endif
3912
3913
3914/**
3915 * Finds the next set bit in a bitmap.
3916 *
3917 * @returns Index of the next set bit.
3918 * @returns -1 if no set bit was found.
3919 * @param pvBitmap Pointer to the bitmap.
3920 * @param cBits The number of bits in the bitmap. Multiple of 32.
3921 * @param iBitPrev The bit returned from the last search.
3922 * The search will start at iBitPrev + 1.
3923 */
3924#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3925DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3926#else
3927DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3928{
3929 int iBit = ++iBitPrev & 31;
3930 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3931 cBits -= iBitPrev & ~31;
3932 if (iBit)
3933 {
3934 /* inspect the first dword. */
3935 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
3936# if RT_INLINE_ASM_USES_INTRIN
3937 unsigned long ulBit = 0;
3938 if (_BitScanForward(&ulBit, u32))
3939 return ulBit + iBitPrev;
3940 iBit = -1;
3941# else
3942# if RT_INLINE_ASM_GNU_STYLE
3943 __asm__ __volatile__("bsf %1, %0\n\t"
3944 "jnz 1f\n\t"
3945 "movl $-1, %0\n\t"
3946 "1:\n\t"
3947 : "=r" (iBit)
3948 : "r" (u32));
3949# else
3950 __asm
3951 {
3952 mov edx, u32
3953 bsf eax, edx
3954 jnz done
3955 mov eax, 0ffffffffh
3956 done:
3957 mov [iBit], eax
3958 }
3959# endif
3960 if (iBit >= 0)
3961 return iBit + iBitPrev;
3962# endif
3963 /* Search the rest of the bitmap, if there is anything. */
3964 if (cBits > 32)
3965 {
3966 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3967 if (iBit >= 0)
3968 return iBit + (iBitPrev & ~31) + 32;
3969 }
3970
3971 }
3972 else
3973 {
3974 /* Search the rest of the bitmap. */
3975 iBit = ASMBitFirstSet(pvBitmap, cBits);
3976 if (iBit >= 0)
3977 return iBit + (iBitPrev & ~31);
3978 }
3979 return iBit;
3980}
3981#endif
3982
3983
3984/**
3985 * Finds the first bit which is set in the given 32-bit integer.
3986 * Bits are numbered from 1 (least significant) to 32.
3987 *
3988 * @returns index [1..32] of the first set bit.
3989 * @returns 0 if all bits are cleared.
3990 * @param u32 Integer to search for set bits.
3991 * @remark Similar to ffs() in BSD.
3992 */
3993DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
3994{
3995# if RT_INLINE_ASM_USES_INTRIN
3996 unsigned long iBit;
3997 if (_BitScanForward(&iBit, u32))
3998 iBit++;
3999 else
4000 iBit = 0;
4001# elif RT_INLINE_ASM_GNU_STYLE
4002 uint32_t iBit;
4003 __asm__ __volatile__("bsf %1, %0\n\t"
4004 "jnz 1f\n\t"
4005 "xorl %0, %0\n\t"
4006 "jmp 2f\n"
4007 "1:\n\t"
4008 "incl %0\n"
4009 "2:\n\t"
4010 : "=r" (iBit)
4011 : "rm" (u32));
4012# else
4013 uint32_t iBit;
4014 _asm
4015 {
4016 bsf eax, [u32]
4017 jnz found
4018 xor eax, eax
4019 jmp done
4020 found:
4021 inc eax
4022 done:
4023 mov [iBit], eax
4024 }
4025# endif
4026 return iBit;
4027}
4028
4029
4030/**
4031 * Finds the first bit which is set in the given 32-bit integer.
4032 * Bits are numbered from 1 (least significant) to 32.
4033 *
4034 * @returns index [1..32] of the first set bit.
4035 * @returns 0 if all bits are cleared.
4036 * @param i32 Integer to search for set bits.
4037 * @remark Similar to ffs() in BSD.
4038 */
4039DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4040{
4041 return ASMBitFirstSetU32((uint32_t)i32);
4042}
4043
4044
4045/**
4046 * Finds the last bit which is set in the given 32-bit integer.
4047 * Bits are numbered from 1 (least significant) to 32.
4048 *
4049 * @returns index [1..32] of the last set bit.
4050 * @returns 0 if all bits are cleared.
4051 * @param u32 Integer to search for set bits.
4052 * @remark Similar to fls() in BSD.
4053 */
4054DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4055{
4056# if RT_INLINE_ASM_USES_INTRIN
4057 unsigned long iBit;
4058 if (_BitScanReverse(&iBit, u32))
4059 iBit++;
4060 else
4061 iBit = 0;
4062# elif RT_INLINE_ASM_GNU_STYLE
4063 uint32_t iBit;
4064 __asm__ __volatile__("bsrl %1, %0\n\t"
4065 "jnz 1f\n\t"
4066 "xorl %0, %0\n\t"
4067 "jmp 2f\n"
4068 "1:\n\t"
4069 "incl %0\n"
4070 "2:\n\t"
4071 : "=r" (iBit)
4072 : "rm" (u32));
4073# else
4074 uint32_t iBit;
4075 _asm
4076 {
4077 bsr eax, [u32]
4078 jnz found
4079 xor eax, eax
4080 jmp done
4081 found:
4082 inc eax
4083 done:
4084 mov [iBit], eax
4085 }
4086# endif
4087 return iBit;
4088}
4089
4090
4091/**
4092 * Finds the last bit which is set in the given 32-bit integer.
4093 * Bits are numbered from 1 (least significant) to 32.
4094 *
4095 * @returns index [1..32] of the last set bit.
4096 * @returns 0 if all bits are cleared.
4097 * @param i32 Integer to search for set bits.
4098 * @remark Similar to fls() in BSD.
4099 */
4100DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4101{
4102 return ASMBitLastSetS32((uint32_t)i32);
4103}
4104
4105
4106/**
4107 * Reverse the byte order of the given 32-bit integer.
4108 * @param u32 Integer
4109 */
4110DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4111{
4112#if RT_INLINE_ASM_USES_INTRIN
4113 u32 = _byteswap_ulong(u32);
4114#elif RT_INLINE_ASM_GNU_STYLE
4115 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4116#else
4117 _asm
4118 {
4119 mov eax, [u32]
4120 bswap eax
4121 mov [u32], eax
4122 }
4123#endif
4124 return u32;
4125}
4126
4127/** @} */
4128
4129
4130/** @} */
4131#endif
4132
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette