VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 6385

Last change on this file since 6385 was 6385, checked in by vboxsync, 17 years ago

small optimization

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 103.3 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31/** @todo #include <iprt/param.h> for PAGE_SIZE. */
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37#ifdef _MSC_VER
38# if _MSC_VER >= 1400
39# define RT_INLINE_ASM_USES_INTRIN 1
40# include <intrin.h>
41 /* Emit the intrinsics at all optimization levels. */
42# pragma intrinsic(_ReadWriteBarrier)
43# pragma intrinsic(__cpuid)
44# pragma intrinsic(_enable)
45# pragma intrinsic(_disable)
46# pragma intrinsic(__rdtsc)
47# pragma intrinsic(__readmsr)
48# pragma intrinsic(__writemsr)
49# pragma intrinsic(__outbyte)
50# pragma intrinsic(__outword)
51# pragma intrinsic(__outdword)
52# pragma intrinsic(__inbyte)
53# pragma intrinsic(__inword)
54# pragma intrinsic(__indword)
55# pragma intrinsic(__invlpg)
56# pragma intrinsic(__stosd)
57# pragma intrinsic(__stosw)
58# pragma intrinsic(__stosb)
59# pragma intrinsic(__readcr0)
60# pragma intrinsic(__readcr2)
61# pragma intrinsic(__readcr3)
62# pragma intrinsic(__readcr4)
63# pragma intrinsic(__writecr0)
64# pragma intrinsic(__writecr3)
65# pragma intrinsic(__writecr4)
66# pragma intrinsic(_BitScanForward)
67# pragma intrinsic(_BitScanReverse)
68# pragma intrinsic(_bittest)
69# pragma intrinsic(_bittestandset)
70# pragma intrinsic(_bittestandreset)
71# pragma intrinsic(_bittestandcomplement)
72# pragma intrinsic(_byteswap_ushort)
73# pragma intrinsic(_byteswap_ulong)
74# pragma intrinsic(_interlockedbittestandset)
75# pragma intrinsic(_interlockedbittestandreset)
76# pragma intrinsic(_InterlockedAnd)
77# pragma intrinsic(_InterlockedOr)
78# pragma intrinsic(_InterlockedIncrement)
79# pragma intrinsic(_InterlockedDecrement)
80# pragma intrinsic(_InterlockedExchange)
81# pragma intrinsic(_InterlockedCompareExchange)
82# pragma intrinsic(_InterlockedCompareExchange64)
83# ifdef RT_ARCH_AMD64
84# pragma intrinsic(__stosq)
85# pragma intrinsic(__readcr8)
86# pragma intrinsic(__writecr8)
87# pragma intrinsic(_byteswap_uint64)
88# pragma intrinsic(_InterlockedExchange64)
89# endif
90# endif
91#endif
92#ifndef RT_INLINE_ASM_USES_INTRIN
93# define RT_INLINE_ASM_USES_INTRIN 0
94#endif
95
96
97
98/** @defgroup grp_asm ASM - Assembly Routines
99 * @ingroup grp_rt
100 * @{
101 */
102
103/** @def RT_INLINE_ASM_EXTERNAL
104 * Defined as 1 if the compiler does not support inline assembly.
105 * The ASM* functions will then be implemented in an external .asm file.
106 *
107 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
108 * inline assmebly in their AMD64 compiler.
109 */
110#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
111# define RT_INLINE_ASM_EXTERNAL 1
112#else
113# define RT_INLINE_ASM_EXTERNAL 0
114#endif
115
116/** @def RT_INLINE_ASM_GNU_STYLE
117 * Defined as 1 if the compiler understand GNU style inline assembly.
118 */
119#if defined(_MSC_VER)
120# define RT_INLINE_ASM_GNU_STYLE 0
121#else
122# define RT_INLINE_ASM_GNU_STYLE 1
123#endif
124
125
126/** @todo find a more proper place for this structure? */
127#pragma pack(1)
128/** IDTR */
129typedef struct RTIDTR
130{
131 /** Size of the IDT. */
132 uint16_t cbIdt;
133 /** Address of the IDT. */
134 uintptr_t pIdt;
135} RTIDTR, *PRTIDTR;
136#pragma pack()
137
138#pragma pack(1)
139/** GDTR */
140typedef struct RTGDTR
141{
142 /** Size of the GDT. */
143 uint16_t cbGdt;
144 /** Address of the GDT. */
145 uintptr_t pGdt;
146} RTGDTR, *PRTGDTR;
147#pragma pack()
148
149
150/** @def ASMReturnAddress
151 * Gets the return address of the current (or calling if you like) function or method.
152 */
153#ifdef _MSC_VER
154# ifdef __cplusplus
155extern "C"
156# endif
157void * _ReturnAddress(void);
158# pragma intrinsic(_ReturnAddress)
159# define ASMReturnAddress() _ReturnAddress()
160#elif defined(__GNUC__) || defined(__DOXYGEN__)
161# define ASMReturnAddress() __builtin_return_address(0)
162#else
163# error "Unsupported compiler."
164#endif
165
166
167/**
168 * Gets the content of the IDTR CPU register.
169 * @param pIdtr Where to store the IDTR contents.
170 */
171#if RT_INLINE_ASM_EXTERNAL
172DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
173#else
174DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
175{
176# if RT_INLINE_ASM_GNU_STYLE
177 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
178# else
179 __asm
180 {
181# ifdef RT_ARCH_AMD64
182 mov rax, [pIdtr]
183 sidt [rax]
184# else
185 mov eax, [pIdtr]
186 sidt [eax]
187# endif
188 }
189# endif
190}
191#endif
192
193
194/**
195 * Sets the content of the IDTR CPU register.
196 * @param pIdtr Where to load the IDTR contents from
197 */
198#if RT_INLINE_ASM_EXTERNAL
199DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
200#else
201DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
202{
203# if RT_INLINE_ASM_GNU_STYLE
204 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
205# else
206 __asm
207 {
208# ifdef RT_ARCH_AMD64
209 mov rax, [pIdtr]
210 lidt [rax]
211# else
212 mov eax, [pIdtr]
213 lidt [eax]
214# endif
215 }
216# endif
217}
218#endif
219
220
221/**
222 * Gets the content of the GDTR CPU register.
223 * @param pGdtr Where to store the GDTR contents.
224 */
225#if RT_INLINE_ASM_EXTERNAL
226DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
227#else
228DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
229{
230# if RT_INLINE_ASM_GNU_STYLE
231 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
232# else
233 __asm
234 {
235# ifdef RT_ARCH_AMD64
236 mov rax, [pGdtr]
237 sgdt [rax]
238# else
239 mov eax, [pGdtr]
240 sgdt [eax]
241# endif
242 }
243# endif
244}
245#endif
246
247/**
248 * Get the cs register.
249 * @returns cs.
250 */
251#if RT_INLINE_ASM_EXTERNAL
252DECLASM(RTSEL) ASMGetCS(void);
253#else
254DECLINLINE(RTSEL) ASMGetCS(void)
255{
256 RTSEL SelCS;
257# if RT_INLINE_ASM_GNU_STYLE
258 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
259# else
260 __asm
261 {
262 mov ax, cs
263 mov [SelCS], ax
264 }
265# endif
266 return SelCS;
267}
268#endif
269
270
271/**
272 * Get the DS register.
273 * @returns DS.
274 */
275#if RT_INLINE_ASM_EXTERNAL
276DECLASM(RTSEL) ASMGetDS(void);
277#else
278DECLINLINE(RTSEL) ASMGetDS(void)
279{
280 RTSEL SelDS;
281# if RT_INLINE_ASM_GNU_STYLE
282 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
283# else
284 __asm
285 {
286 mov ax, ds
287 mov [SelDS], ax
288 }
289# endif
290 return SelDS;
291}
292#endif
293
294
295/**
296 * Get the ES register.
297 * @returns ES.
298 */
299#if RT_INLINE_ASM_EXTERNAL
300DECLASM(RTSEL) ASMGetES(void);
301#else
302DECLINLINE(RTSEL) ASMGetES(void)
303{
304 RTSEL SelES;
305# if RT_INLINE_ASM_GNU_STYLE
306 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
307# else
308 __asm
309 {
310 mov ax, es
311 mov [SelES], ax
312 }
313# endif
314 return SelES;
315}
316#endif
317
318
319/**
320 * Get the FS register.
321 * @returns FS.
322 */
323#if RT_INLINE_ASM_EXTERNAL
324DECLASM(RTSEL) ASMGetFS(void);
325#else
326DECLINLINE(RTSEL) ASMGetFS(void)
327{
328 RTSEL SelFS;
329# if RT_INLINE_ASM_GNU_STYLE
330 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
331# else
332 __asm
333 {
334 mov ax, fs
335 mov [SelFS], ax
336 }
337# endif
338 return SelFS;
339}
340# endif
341
342
343/**
344 * Get the GS register.
345 * @returns GS.
346 */
347#if RT_INLINE_ASM_EXTERNAL
348DECLASM(RTSEL) ASMGetGS(void);
349#else
350DECLINLINE(RTSEL) ASMGetGS(void)
351{
352 RTSEL SelGS;
353# if RT_INLINE_ASM_GNU_STYLE
354 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
355# else
356 __asm
357 {
358 mov ax, gs
359 mov [SelGS], ax
360 }
361# endif
362 return SelGS;
363}
364#endif
365
366
367/**
368 * Get the SS register.
369 * @returns SS.
370 */
371#if RT_INLINE_ASM_EXTERNAL
372DECLASM(RTSEL) ASMGetSS(void);
373#else
374DECLINLINE(RTSEL) ASMGetSS(void)
375{
376 RTSEL SelSS;
377# if RT_INLINE_ASM_GNU_STYLE
378 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
379# else
380 __asm
381 {
382 mov ax, ss
383 mov [SelSS], ax
384 }
385# endif
386 return SelSS;
387}
388#endif
389
390
391/**
392 * Get the TR register.
393 * @returns TR.
394 */
395#if RT_INLINE_ASM_EXTERNAL
396DECLASM(RTSEL) ASMGetTR(void);
397#else
398DECLINLINE(RTSEL) ASMGetTR(void)
399{
400 RTSEL SelTR;
401# if RT_INLINE_ASM_GNU_STYLE
402 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
403# else
404 __asm
405 {
406 str ax
407 mov [SelTR], ax
408 }
409# endif
410 return SelTR;
411}
412#endif
413
414
415/**
416 * Get the [RE]FLAGS register.
417 * @returns [RE]FLAGS.
418 */
419#if RT_INLINE_ASM_EXTERNAL
420DECLASM(RTCCUINTREG) ASMGetFlags(void);
421#else
422DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
423{
424 RTCCUINTREG uFlags;
425# if RT_INLINE_ASM_GNU_STYLE
426# ifdef RT_ARCH_AMD64
427 __asm__ __volatile__("pushfq\n\t"
428 "popq %0\n\t"
429 : "=g" (uFlags));
430# else
431 __asm__ __volatile__("pushfl\n\t"
432 "popl %0\n\t"
433 : "=g" (uFlags));
434# endif
435# else
436 __asm
437 {
438# ifdef RT_ARCH_AMD64
439 pushfq
440 pop [uFlags]
441# else
442 pushfd
443 pop [uFlags]
444# endif
445 }
446# endif
447 return uFlags;
448}
449#endif
450
451
452/**
453 * Set the [RE]FLAGS register.
454 * @param uFlags The new [RE]FLAGS value.
455 */
456#if RT_INLINE_ASM_EXTERNAL
457DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
458#else
459DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
460{
461# if RT_INLINE_ASM_GNU_STYLE
462# ifdef RT_ARCH_AMD64
463 __asm__ __volatile__("pushq %0\n\t"
464 "popfq\n\t"
465 : : "g" (uFlags));
466# else
467 __asm__ __volatile__("pushl %0\n\t"
468 "popfl\n\t"
469 : : "g" (uFlags));
470# endif
471# else
472 __asm
473 {
474# ifdef RT_ARCH_AMD64
475 push [uFlags]
476 popfq
477# else
478 push [uFlags]
479 popfd
480# endif
481 }
482# endif
483}
484#endif
485
486
487/**
488 * Gets the content of the CPU timestamp counter register.
489 *
490 * @returns TSC.
491 */
492#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
493DECLASM(uint64_t) ASMReadTSC(void);
494#else
495DECLINLINE(uint64_t) ASMReadTSC(void)
496{
497 RTUINT64U u;
498# if RT_INLINE_ASM_GNU_STYLE
499 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
500# else
501# if RT_INLINE_ASM_USES_INTRIN
502 u.u = __rdtsc();
503# else
504 __asm
505 {
506 rdtsc
507 mov [u.s.Lo], eax
508 mov [u.s.Hi], edx
509 }
510# endif
511# endif
512 return u.u;
513}
514#endif
515
516
517/**
518 * Performs the cpuid instruction returning all registers.
519 *
520 * @param uOperator CPUID operation (eax).
521 * @param pvEAX Where to store eax.
522 * @param pvEBX Where to store ebx.
523 * @param pvECX Where to store ecx.
524 * @param pvEDX Where to store edx.
525 * @remark We're using void pointers to ease the use of special bitfield structures and such.
526 */
527#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
528DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
529#else
530DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
531{
532# if RT_INLINE_ASM_GNU_STYLE
533# ifdef RT_ARCH_AMD64
534 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
535 __asm__ ("cpuid\n\t"
536 : "=a" (uRAX),
537 "=b" (uRBX),
538 "=c" (uRCX),
539 "=d" (uRDX)
540 : "0" (uOperator));
541 *(uint32_t *)pvEAX = (uint32_t)uRAX;
542 *(uint32_t *)pvEBX = (uint32_t)uRBX;
543 *(uint32_t *)pvECX = (uint32_t)uRCX;
544 *(uint32_t *)pvEDX = (uint32_t)uRDX;
545# else
546 __asm__ ("xchgl %%ebx, %1\n\t"
547 "cpuid\n\t"
548 "xchgl %%ebx, %1\n\t"
549 : "=a" (*(uint32_t *)pvEAX),
550 "=r" (*(uint32_t *)pvEBX),
551 "=c" (*(uint32_t *)pvECX),
552 "=d" (*(uint32_t *)pvEDX)
553 : "0" (uOperator));
554# endif
555
556# elif RT_INLINE_ASM_USES_INTRIN
557 int aInfo[4];
558 __cpuid(aInfo, uOperator);
559 *(uint32_t *)pvEAX = aInfo[0];
560 *(uint32_t *)pvEBX = aInfo[1];
561 *(uint32_t *)pvECX = aInfo[2];
562 *(uint32_t *)pvEDX = aInfo[3];
563
564# else
565 uint32_t uEAX;
566 uint32_t uEBX;
567 uint32_t uECX;
568 uint32_t uEDX;
569 __asm
570 {
571 push ebx
572 mov eax, [uOperator]
573 cpuid
574 mov [uEAX], eax
575 mov [uEBX], ebx
576 mov [uECX], ecx
577 mov [uEDX], edx
578 pop ebx
579 }
580 *(uint32_t *)pvEAX = uEAX;
581 *(uint32_t *)pvEBX = uEBX;
582 *(uint32_t *)pvECX = uECX;
583 *(uint32_t *)pvEDX = uEDX;
584# endif
585}
586#endif
587
588
589/**
590 * Performs the cpuid instruction returning all registers.
591 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
592 *
593 * @param uOperator CPUID operation (eax).
594 * @param uIdxECX ecx index
595 * @param pvEAX Where to store eax.
596 * @param pvEBX Where to store ebx.
597 * @param pvECX Where to store ecx.
598 * @param pvEDX Where to store edx.
599 * @remark We're using void pointers to ease the use of special bitfield structures and such.
600 */
601#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
602DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
603#else
604DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
605{
606# if RT_INLINE_ASM_GNU_STYLE
607# ifdef RT_ARCH_AMD64
608 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
609 __asm__ ("cpuid\n\t"
610 : "=a" (uRAX),
611 "=b" (uRBX),
612 "=c" (uRCX),
613 "=d" (uRDX)
614 : "0" (uOperator),
615 "2" (uIdxECX));
616 *(uint32_t *)pvEAX = (uint32_t)uRAX;
617 *(uint32_t *)pvEBX = (uint32_t)uRBX;
618 *(uint32_t *)pvECX = (uint32_t)uRCX;
619 *(uint32_t *)pvEDX = (uint32_t)uRDX;
620# else
621 __asm__ ("xchgl %%ebx, %1\n\t"
622 "cpuid\n\t"
623 "xchgl %%ebx, %1\n\t"
624 : "=a" (*(uint32_t *)pvEAX),
625 "=r" (*(uint32_t *)pvEBX),
626 "=c" (*(uint32_t *)pvECX),
627 "=d" (*(uint32_t *)pvEDX)
628 : "0" (uOperator),
629 "2" (uIdxECX));
630# endif
631
632# elif RT_INLINE_ASM_USES_INTRIN
633 int aInfo[4];
634 /* ??? another intrinsic ??? */
635 __cpuid(aInfo, uOperator);
636 *(uint32_t *)pvEAX = aInfo[0];
637 *(uint32_t *)pvEBX = aInfo[1];
638 *(uint32_t *)pvECX = aInfo[2];
639 *(uint32_t *)pvEDX = aInfo[3];
640
641# else
642 uint32_t uEAX;
643 uint32_t uEBX;
644 uint32_t uECX;
645 uint32_t uEDX;
646 __asm
647 {
648 push ebx
649 mov eax, [uOperator]
650 mov ecx, [uIdxECX]
651 cpuid
652 mov [uEAX], eax
653 mov [uEBX], ebx
654 mov [uECX], ecx
655 mov [uEDX], edx
656 pop ebx
657 }
658 *(uint32_t *)pvEAX = uEAX;
659 *(uint32_t *)pvEBX = uEBX;
660 *(uint32_t *)pvECX = uECX;
661 *(uint32_t *)pvEDX = uEDX;
662# endif
663}
664#endif
665
666
667/**
668 * Performs the cpuid instruction returning ecx and edx.
669 *
670 * @param uOperator CPUID operation (eax).
671 * @param pvECX Where to store ecx.
672 * @param pvEDX Where to store edx.
673 * @remark We're using void pointers to ease the use of special bitfield structures and such.
674 */
675#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
676DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
677#else
678DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
679{
680 uint32_t uEBX;
681 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
682}
683#endif
684
685
686/**
687 * Performs the cpuid instruction returning edx.
688 *
689 * @param uOperator CPUID operation (eax).
690 * @returns EDX after cpuid operation.
691 */
692#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
693DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
694#else
695DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
696{
697 RTCCUINTREG xDX;
698# if RT_INLINE_ASM_GNU_STYLE
699# ifdef RT_ARCH_AMD64
700 RTCCUINTREG uSpill;
701 __asm__ ("cpuid"
702 : "=a" (uSpill),
703 "=d" (xDX)
704 : "0" (uOperator)
705 : "rbx", "rcx");
706# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
707 __asm__ ("push %%ebx\n\t"
708 "cpuid\n\t"
709 "pop %%ebx\n\t"
710 : "=a" (uOperator),
711 "=d" (xDX)
712 : "0" (uOperator)
713 : "ecx");
714# else
715 __asm__ ("cpuid"
716 : "=a" (uOperator),
717 "=d" (xDX)
718 : "0" (uOperator)
719 : "ebx", "ecx");
720# endif
721
722# elif RT_INLINE_ASM_USES_INTRIN
723 int aInfo[4];
724 __cpuid(aInfo, uOperator);
725 xDX = aInfo[3];
726
727# else
728 __asm
729 {
730 push ebx
731 mov eax, [uOperator]
732 cpuid
733 mov [xDX], edx
734 pop ebx
735 }
736# endif
737 return (uint32_t)xDX;
738}
739#endif
740
741
742/**
743 * Performs the cpuid instruction returning ecx.
744 *
745 * @param uOperator CPUID operation (eax).
746 * @returns ECX after cpuid operation.
747 */
748#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
749DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
750#else
751DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
752{
753 RTCCUINTREG xCX;
754# if RT_INLINE_ASM_GNU_STYLE
755# ifdef RT_ARCH_AMD64
756 RTCCUINTREG uSpill;
757 __asm__ ("cpuid"
758 : "=a" (uSpill),
759 "=c" (xCX)
760 : "0" (uOperator)
761 : "rbx", "rdx");
762# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
763 __asm__ ("push %%ebx\n\t"
764 "cpuid\n\t"
765 "pop %%ebx\n\t"
766 : "=a" (uOperator),
767 "=c" (xCX)
768 : "0" (uOperator)
769 : "edx");
770# else
771 __asm__ ("cpuid"
772 : "=a" (uOperator),
773 "=c" (xCX)
774 : "0" (uOperator)
775 : "ebx", "edx");
776
777# endif
778
779# elif RT_INLINE_ASM_USES_INTRIN
780 int aInfo[4];
781 __cpuid(aInfo, uOperator);
782 xCX = aInfo[2];
783
784# else
785 __asm
786 {
787 push ebx
788 mov eax, [uOperator]
789 cpuid
790 mov [xCX], ecx
791 pop ebx
792 }
793# endif
794 return (uint32_t)xCX;
795}
796#endif
797
798
799/**
800 * Checks if the current CPU supports CPUID.
801 *
802 * @returns true if CPUID is supported.
803 */
804DECLINLINE(bool) ASMHasCpuId(void)
805{
806#ifdef RT_ARCH_AMD64
807 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
808#else /* !RT_ARCH_AMD64 */
809 bool fRet = false;
810# if RT_INLINE_ASM_GNU_STYLE
811 uint32_t u1;
812 uint32_t u2;
813 __asm__ ("pushf\n\t"
814 "pop %1\n\t"
815 "mov %1, %2\n\t"
816 "xorl $0x200000, %1\n\t"
817 "push %1\n\t"
818 "popf\n\t"
819 "pushf\n\t"
820 "pop %1\n\t"
821 "cmpl %1, %2\n\t"
822 "setne %0\n\t"
823 "push %2\n\t"
824 "popf\n\t"
825 : "=m" (fRet), "=r" (u1), "=r" (u2));
826# else
827 __asm
828 {
829 pushfd
830 pop eax
831 mov ebx, eax
832 xor eax, 0200000h
833 push eax
834 popfd
835 pushfd
836 pop eax
837 cmp eax, ebx
838 setne fRet
839 push ebx
840 popfd
841 }
842# endif
843 return fRet;
844#endif /* !RT_ARCH_AMD64 */
845}
846
847
848/**
849 * Gets the APIC ID of the current CPU.
850 *
851 * @returns the APIC ID.
852 */
853#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
854DECLASM(uint8_t) ASMGetApicId(void);
855#else
856DECLINLINE(uint8_t) ASMGetApicId(void)
857{
858 RTCCUINTREG xBX;
859# if RT_INLINE_ASM_GNU_STYLE
860# ifdef RT_ARCH_AMD64
861 RTCCUINTREG uSpill;
862 __asm__ ("cpuid"
863 : "=a" (uSpill),
864 "=b" (xBX)
865 : "0" (1)
866 : "rcx", "rdx");
867# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
868 RTCCUINTREG uSpill;
869 __asm__ ("mov %%ebx,%1\n\t"
870 "cpuid\n\t"
871 "xchgl %%ebx,%1\n\t"
872 : "=a" (uSpill),
873 "=r" (xBX)
874 : "0" (1)
875 : "ecx", "edx");
876# else
877 RTCCUINTREG uSpill;
878 __asm__ ("cpuid"
879 : "=a" (uSpill),
880 "=b" (xBX)
881 : "0" (1)
882 : "ecx", "edx");
883# endif
884
885# elif RT_INLINE_ASM_USES_INTRIN
886 int aInfo[4];
887 __cpuid(aInfo, 1);
888 xBX = aInfo[1];
889
890# else
891 __asm
892 {
893 push ebx
894 mov eax, 1
895 cpuid
896 mov [xBX], ebx
897 pop ebx
898 }
899# endif
900 return (uint8_t)(xBX >> 24);
901}
902#endif
903
904/**
905 * Get cr0.
906 * @returns cr0.
907 */
908#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
909DECLASM(RTCCUINTREG) ASMGetCR0(void);
910#else
911DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
912{
913 RTCCUINTREG uCR0;
914# if RT_INLINE_ASM_USES_INTRIN
915 uCR0 = __readcr0();
916
917# elif RT_INLINE_ASM_GNU_STYLE
918# ifdef RT_ARCH_AMD64
919 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
920# else
921 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
922# endif
923# else
924 __asm
925 {
926# ifdef RT_ARCH_AMD64
927 mov rax, cr0
928 mov [uCR0], rax
929# else
930 mov eax, cr0
931 mov [uCR0], eax
932# endif
933 }
934# endif
935 return uCR0;
936}
937#endif
938
939
940/**
941 * Sets the CR0 register.
942 * @param uCR0 The new CR0 value.
943 */
944#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
945DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
946#else
947DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
948{
949# if RT_INLINE_ASM_USES_INTRIN
950 __writecr0(uCR0);
951
952# elif RT_INLINE_ASM_GNU_STYLE
953# ifdef RT_ARCH_AMD64
954 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
955# else
956 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
957# endif
958# else
959 __asm
960 {
961# ifdef RT_ARCH_AMD64
962 mov rax, [uCR0]
963 mov cr0, rax
964# else
965 mov eax, [uCR0]
966 mov cr0, eax
967# endif
968 }
969# endif
970}
971#endif
972
973
974/**
975 * Get cr2.
976 * @returns cr2.
977 */
978#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
979DECLASM(RTCCUINTREG) ASMGetCR2(void);
980#else
981DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
982{
983 RTCCUINTREG uCR2;
984# if RT_INLINE_ASM_USES_INTRIN
985 uCR2 = __readcr2();
986
987# elif RT_INLINE_ASM_GNU_STYLE
988# ifdef RT_ARCH_AMD64
989 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
990# else
991 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
992# endif
993# else
994 __asm
995 {
996# ifdef RT_ARCH_AMD64
997 mov rax, cr2
998 mov [uCR2], rax
999# else
1000 mov eax, cr2
1001 mov [uCR2], eax
1002# endif
1003 }
1004# endif
1005 return uCR2;
1006}
1007#endif
1008
1009
1010/**
1011 * Sets the CR2 register.
1012 * @param uCR2 The new CR0 value.
1013 */
1014#if RT_INLINE_ASM_EXTERNAL
1015DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1016#else
1017DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1018{
1019# if RT_INLINE_ASM_GNU_STYLE
1020# ifdef RT_ARCH_AMD64
1021 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1022# else
1023 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1024# endif
1025# else
1026 __asm
1027 {
1028# ifdef RT_ARCH_AMD64
1029 mov rax, [uCR2]
1030 mov cr2, rax
1031# else
1032 mov eax, [uCR2]
1033 mov cr2, eax
1034# endif
1035 }
1036# endif
1037}
1038#endif
1039
1040
1041/**
1042 * Get cr3.
1043 * @returns cr3.
1044 */
1045#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1046DECLASM(RTCCUINTREG) ASMGetCR3(void);
1047#else
1048DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1049{
1050 RTCCUINTREG uCR3;
1051# if RT_INLINE_ASM_USES_INTRIN
1052 uCR3 = __readcr3();
1053
1054# elif RT_INLINE_ASM_GNU_STYLE
1055# ifdef RT_ARCH_AMD64
1056 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
1057# else
1058 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
1059# endif
1060# else
1061 __asm
1062 {
1063# ifdef RT_ARCH_AMD64
1064 mov rax, cr3
1065 mov [uCR3], rax
1066# else
1067 mov eax, cr3
1068 mov [uCR3], eax
1069# endif
1070 }
1071# endif
1072 return uCR3;
1073}
1074#endif
1075
1076
1077/**
1078 * Sets the CR3 register.
1079 *
1080 * @param uCR3 New CR3 value.
1081 */
1082#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1083DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1084#else
1085DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1086{
1087# if RT_INLINE_ASM_USES_INTRIN
1088 __writecr3(uCR3);
1089
1090# elif RT_INLINE_ASM_GNU_STYLE
1091# ifdef RT_ARCH_AMD64
1092 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1093# else
1094 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1095# endif
1096# else
1097 __asm
1098 {
1099# ifdef RT_ARCH_AMD64
1100 mov rax, [uCR3]
1101 mov cr3, rax
1102# else
1103 mov eax, [uCR3]
1104 mov cr3, eax
1105# endif
1106 }
1107# endif
1108}
1109#endif
1110
1111
1112/**
1113 * Reloads the CR3 register.
1114 */
1115#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1116DECLASM(void) ASMReloadCR3(void);
1117#else
1118DECLINLINE(void) ASMReloadCR3(void)
1119{
1120# if RT_INLINE_ASM_USES_INTRIN
1121 __writecr3(__readcr3());
1122
1123# elif RT_INLINE_ASM_GNU_STYLE
1124 RTCCUINTREG u;
1125# ifdef RT_ARCH_AMD64
1126 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1127 "movq %0, %%cr3\n\t"
1128 : "=r" (u));
1129# else
1130 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1131 "movl %0, %%cr3\n\t"
1132 : "=r" (u));
1133# endif
1134# else
1135 __asm
1136 {
1137# ifdef RT_ARCH_AMD64
1138 mov rax, cr3
1139 mov cr3, rax
1140# else
1141 mov eax, cr3
1142 mov cr3, eax
1143# endif
1144 }
1145# endif
1146}
1147#endif
1148
1149
1150/**
1151 * Get cr4.
1152 * @returns cr4.
1153 */
1154#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1155DECLASM(RTCCUINTREG) ASMGetCR4(void);
1156#else
1157DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1158{
1159 RTCCUINTREG uCR4;
1160# if RT_INLINE_ASM_USES_INTRIN
1161 uCR4 = __readcr4();
1162
1163# elif RT_INLINE_ASM_GNU_STYLE
1164# ifdef RT_ARCH_AMD64
1165 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1166# else
1167 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1168# endif
1169# else
1170 __asm
1171 {
1172# ifdef RT_ARCH_AMD64
1173 mov rax, cr4
1174 mov [uCR4], rax
1175# else
1176 push eax /* just in case */
1177 /*mov eax, cr4*/
1178 _emit 0x0f
1179 _emit 0x20
1180 _emit 0xe0
1181 mov [uCR4], eax
1182 pop eax
1183# endif
1184 }
1185# endif
1186 return uCR4;
1187}
1188#endif
1189
1190
1191/**
1192 * Sets the CR4 register.
1193 *
1194 * @param uCR4 New CR4 value.
1195 */
1196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1197DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1198#else
1199DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1200{
1201# if RT_INLINE_ASM_USES_INTRIN
1202 __writecr4(uCR4);
1203
1204# elif RT_INLINE_ASM_GNU_STYLE
1205# ifdef RT_ARCH_AMD64
1206 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1207# else
1208 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1209# endif
1210# else
1211 __asm
1212 {
1213# ifdef RT_ARCH_AMD64
1214 mov rax, [uCR4]
1215 mov cr4, rax
1216# else
1217 mov eax, [uCR4]
1218 _emit 0x0F
1219 _emit 0x22
1220 _emit 0xE0 /* mov cr4, eax */
1221# endif
1222 }
1223# endif
1224}
1225#endif
1226
1227
1228/**
1229 * Get cr8.
1230 * @returns cr8.
1231 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1232 */
1233#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1234DECLASM(RTCCUINTREG) ASMGetCR8(void);
1235#else
1236DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1237{
1238# ifdef RT_ARCH_AMD64
1239 RTCCUINTREG uCR8;
1240# if RT_INLINE_ASM_USES_INTRIN
1241 uCR8 = __readcr8();
1242
1243# elif RT_INLINE_ASM_GNU_STYLE
1244 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1245# else
1246 __asm
1247 {
1248 mov rax, cr8
1249 mov [uCR8], rax
1250 }
1251# endif
1252 return uCR8;
1253# else /* !RT_ARCH_AMD64 */
1254 return 0;
1255# endif /* !RT_ARCH_AMD64 */
1256}
1257#endif
1258
1259
1260/**
1261 * Enables interrupts (EFLAGS.IF).
1262 */
1263#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1264DECLASM(void) ASMIntEnable(void);
1265#else
1266DECLINLINE(void) ASMIntEnable(void)
1267{
1268# if RT_INLINE_ASM_GNU_STYLE
1269 __asm("sti\n");
1270# elif RT_INLINE_ASM_USES_INTRIN
1271 _enable();
1272# else
1273 __asm sti
1274# endif
1275}
1276#endif
1277
1278
1279/**
1280 * Disables interrupts (!EFLAGS.IF).
1281 */
1282#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1283DECLASM(void) ASMIntDisable(void);
1284#else
1285DECLINLINE(void) ASMIntDisable(void)
1286{
1287# if RT_INLINE_ASM_GNU_STYLE
1288 __asm("cli\n");
1289# elif RT_INLINE_ASM_USES_INTRIN
1290 _disable();
1291# else
1292 __asm cli
1293# endif
1294}
1295#endif
1296
1297
1298/**
1299 * Disables interrupts and returns previous xFLAGS.
1300 */
1301#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1302DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1303#else
1304DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1305{
1306 RTCCUINTREG xFlags;
1307# if RT_INLINE_ASM_GNU_STYLE
1308# ifdef RT_ARCH_AMD64
1309 __asm__ __volatile__("pushfq\n\t"
1310 "cli\n\t"
1311 "popq %0\n\t"
1312 : "=rm" (xFlags));
1313# else
1314 __asm__ __volatile__("pushfl\n\t"
1315 "cli\n\t"
1316 "popl %0\n\t"
1317 : "=rm" (xFlags));
1318# endif
1319# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1320 xFlags = ASMGetFlags();
1321 _disable();
1322# else
1323 __asm {
1324 pushfd
1325 cli
1326 pop [xFlags]
1327 }
1328# endif
1329 return xFlags;
1330}
1331#endif
1332
1333
1334/**
1335 * Reads a machine specific register.
1336 *
1337 * @returns Register content.
1338 * @param uRegister Register to read.
1339 */
1340#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1341DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1342#else
1343DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1344{
1345 RTUINT64U u;
1346# if RT_INLINE_ASM_GNU_STYLE
1347 __asm__ ("rdmsr\n\t"
1348 : "=a" (u.s.Lo),
1349 "=d" (u.s.Hi)
1350 : "c" (uRegister));
1351
1352# elif RT_INLINE_ASM_USES_INTRIN
1353 u.u = __readmsr(uRegister);
1354
1355# else
1356 __asm
1357 {
1358 mov ecx, [uRegister]
1359 rdmsr
1360 mov [u.s.Lo], eax
1361 mov [u.s.Hi], edx
1362 }
1363# endif
1364
1365 return u.u;
1366}
1367#endif
1368
1369
1370/**
1371 * Writes a machine specific register.
1372 *
1373 * @returns Register content.
1374 * @param uRegister Register to write to.
1375 * @param u64Val Value to write.
1376 */
1377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1378DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1379#else
1380DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1381{
1382 RTUINT64U u;
1383
1384 u.u = u64Val;
1385# if RT_INLINE_ASM_GNU_STYLE
1386 __asm__ __volatile__("wrmsr\n\t"
1387 ::"a" (u.s.Lo),
1388 "d" (u.s.Hi),
1389 "c" (uRegister));
1390
1391# elif RT_INLINE_ASM_USES_INTRIN
1392 __writemsr(uRegister, u.u);
1393
1394# else
1395 __asm
1396 {
1397 mov ecx, [uRegister]
1398 mov edx, [u.s.Hi]
1399 mov eax, [u.s.Lo]
1400 wrmsr
1401 }
1402# endif
1403}
1404#endif
1405
1406
1407/**
1408 * Reads low part of a machine specific register.
1409 *
1410 * @returns Register content.
1411 * @param uRegister Register to read.
1412 */
1413#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1414DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1415#else
1416DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1417{
1418 uint32_t u32;
1419# if RT_INLINE_ASM_GNU_STYLE
1420 __asm__ ("rdmsr\n\t"
1421 : "=a" (u32)
1422 : "c" (uRegister)
1423 : "edx");
1424
1425# elif RT_INLINE_ASM_USES_INTRIN
1426 u32 = (uint32_t)__readmsr(uRegister);
1427
1428#else
1429 __asm
1430 {
1431 mov ecx, [uRegister]
1432 rdmsr
1433 mov [u32], eax
1434 }
1435# endif
1436
1437 return u32;
1438}
1439#endif
1440
1441
1442/**
1443 * Reads high part of a machine specific register.
1444 *
1445 * @returns Register content.
1446 * @param uRegister Register to read.
1447 */
1448#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1449DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1450#else
1451DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1452{
1453 uint32_t u32;
1454# if RT_INLINE_ASM_GNU_STYLE
1455 __asm__ ("rdmsr\n\t"
1456 : "=d" (u32)
1457 : "c" (uRegister)
1458 : "eax");
1459
1460# elif RT_INLINE_ASM_USES_INTRIN
1461 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1462
1463# else
1464 __asm
1465 {
1466 mov ecx, [uRegister]
1467 rdmsr
1468 mov [u32], edx
1469 }
1470# endif
1471
1472 return u32;
1473}
1474#endif
1475
1476
1477/**
1478 * Gets dr7.
1479 *
1480 * @returns dr7.
1481 */
1482#if RT_INLINE_ASM_EXTERNAL
1483DECLASM(RTCCUINTREG) ASMGetDR7(void);
1484#else
1485DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1486{
1487 RTCCUINTREG uDR7;
1488# if RT_INLINE_ASM_GNU_STYLE
1489# ifdef RT_ARCH_AMD64
1490 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1491# else
1492 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1493# endif
1494# else
1495 __asm
1496 {
1497# ifdef RT_ARCH_AMD64
1498 mov rax, dr7
1499 mov [uDR7], rax
1500# else
1501 mov eax, dr7
1502 mov [uDR7], eax
1503# endif
1504 }
1505# endif
1506 return uDR7;
1507}
1508#endif
1509
1510
1511/**
1512 * Gets dr6.
1513 *
1514 * @returns dr6.
1515 */
1516#if RT_INLINE_ASM_EXTERNAL
1517DECLASM(RTCCUINTREG) ASMGetDR6(void);
1518#else
1519DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1520{
1521 RTCCUINTREG uDR6;
1522# if RT_INLINE_ASM_GNU_STYLE
1523# ifdef RT_ARCH_AMD64
1524 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1525# else
1526 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1527# endif
1528# else
1529 __asm
1530 {
1531# ifdef RT_ARCH_AMD64
1532 mov rax, dr6
1533 mov [uDR6], rax
1534# else
1535 mov eax, dr6
1536 mov [uDR6], eax
1537# endif
1538 }
1539# endif
1540 return uDR6;
1541}
1542#endif
1543
1544
1545/**
1546 * Reads and clears DR6.
1547 *
1548 * @returns DR6.
1549 */
1550#if RT_INLINE_ASM_EXTERNAL
1551DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1552#else
1553DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1554{
1555 RTCCUINTREG uDR6;
1556# if RT_INLINE_ASM_GNU_STYLE
1557 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1558# ifdef RT_ARCH_AMD64
1559 __asm__ ("movq %%dr6, %0\n\t"
1560 "movq %1, %%dr6\n\t"
1561 : "=r" (uDR6)
1562 : "r" (uNewValue));
1563# else
1564 __asm__ ("movl %%dr6, %0\n\t"
1565 "movl %1, %%dr6\n\t"
1566 : "=r" (uDR6)
1567 : "r" (uNewValue));
1568# endif
1569# else
1570 __asm
1571 {
1572# ifdef RT_ARCH_AMD64
1573 mov rax, dr6
1574 mov [uDR6], rax
1575 mov rcx, rax
1576 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1577 mov dr6, rcx
1578# else
1579 mov eax, dr6
1580 mov [uDR6], eax
1581 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1582 mov dr6, ecx
1583# endif
1584 }
1585# endif
1586 return uDR6;
1587}
1588#endif
1589
1590
1591/**
1592 * Compiler memory barrier.
1593 *
1594 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1595 * values or any outstanding writes when returning from this function.
1596 *
1597 * This function must be used if non-volatile data is modified by a
1598 * device or the VMM. Typical cases are port access, MMIO access,
1599 * trapping instruction, etc.
1600 */
1601#if RT_INLINE_ASM_GNU_STYLE
1602# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1603#elif RT_INLINE_ASM_USES_INTRIN
1604# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1605#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1606DECLINLINE(void) ASMCompilerBarrier(void)
1607{
1608 __asm
1609 {
1610 }
1611}
1612#endif
1613
1614
1615/**
1616 * Writes a 8-bit unsigned integer to an I/O port.
1617 *
1618 * @param Port I/O port to read from.
1619 * @param u8 8-bit integer to write.
1620 */
1621#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1622DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1623#else
1624DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1625{
1626# if RT_INLINE_ASM_GNU_STYLE
1627 __asm__ __volatile__("outb %b1, %w0\n\t"
1628 :: "Nd" (Port),
1629 "a" (u8));
1630
1631# elif RT_INLINE_ASM_USES_INTRIN
1632 __outbyte(Port, u8);
1633
1634# else
1635 __asm
1636 {
1637 mov dx, [Port]
1638 mov al, [u8]
1639 out dx, al
1640 }
1641# endif
1642}
1643#endif
1644
1645
1646/**
1647 * Gets a 8-bit unsigned integer from an I/O port.
1648 *
1649 * @returns 8-bit integer.
1650 * @param Port I/O port to read from.
1651 */
1652#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1653DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1654#else
1655DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1656{
1657 uint8_t u8;
1658# if RT_INLINE_ASM_GNU_STYLE
1659 __asm__ __volatile__("inb %w1, %b0\n\t"
1660 : "=a" (u8)
1661 : "Nd" (Port));
1662
1663# elif RT_INLINE_ASM_USES_INTRIN
1664 u8 = __inbyte(Port);
1665
1666# else
1667 __asm
1668 {
1669 mov dx, [Port]
1670 in al, dx
1671 mov [u8], al
1672 }
1673# endif
1674 return u8;
1675}
1676#endif
1677
1678
1679/**
1680 * Writes a 16-bit unsigned integer to an I/O port.
1681 *
1682 * @param Port I/O port to read from.
1683 * @param u16 16-bit integer to write.
1684 */
1685#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1686DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1687#else
1688DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1689{
1690# if RT_INLINE_ASM_GNU_STYLE
1691 __asm__ __volatile__("outw %w1, %w0\n\t"
1692 :: "Nd" (Port),
1693 "a" (u16));
1694
1695# elif RT_INLINE_ASM_USES_INTRIN
1696 __outword(Port, u16);
1697
1698# else
1699 __asm
1700 {
1701 mov dx, [Port]
1702 mov ax, [u16]
1703 out dx, ax
1704 }
1705# endif
1706}
1707#endif
1708
1709
1710/**
1711 * Gets a 16-bit unsigned integer from an I/O port.
1712 *
1713 * @returns 16-bit integer.
1714 * @param Port I/O port to read from.
1715 */
1716#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1717DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1718#else
1719DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1720{
1721 uint16_t u16;
1722# if RT_INLINE_ASM_GNU_STYLE
1723 __asm__ __volatile__("inw %w1, %w0\n\t"
1724 : "=a" (u16)
1725 : "Nd" (Port));
1726
1727# elif RT_INLINE_ASM_USES_INTRIN
1728 u16 = __inword(Port);
1729
1730# else
1731 __asm
1732 {
1733 mov dx, [Port]
1734 in ax, dx
1735 mov [u16], ax
1736 }
1737# endif
1738 return u16;
1739}
1740#endif
1741
1742
1743/**
1744 * Writes a 32-bit unsigned integer to an I/O port.
1745 *
1746 * @param Port I/O port to read from.
1747 * @param u32 32-bit integer to write.
1748 */
1749#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1750DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1751#else
1752DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1753{
1754# if RT_INLINE_ASM_GNU_STYLE
1755 __asm__ __volatile__("outl %1, %w0\n\t"
1756 :: "Nd" (Port),
1757 "a" (u32));
1758
1759# elif RT_INLINE_ASM_USES_INTRIN
1760 __outdword(Port, u32);
1761
1762# else
1763 __asm
1764 {
1765 mov dx, [Port]
1766 mov eax, [u32]
1767 out dx, eax
1768 }
1769# endif
1770}
1771#endif
1772
1773
1774/**
1775 * Gets a 32-bit unsigned integer from an I/O port.
1776 *
1777 * @returns 32-bit integer.
1778 * @param Port I/O port to read from.
1779 */
1780#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1781DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1782#else
1783DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1784{
1785 uint32_t u32;
1786# if RT_INLINE_ASM_GNU_STYLE
1787 __asm__ __volatile__("inl %w1, %0\n\t"
1788 : "=a" (u32)
1789 : "Nd" (Port));
1790
1791# elif RT_INLINE_ASM_USES_INTRIN
1792 u32 = __indword(Port);
1793
1794# else
1795 __asm
1796 {
1797 mov dx, [Port]
1798 in eax, dx
1799 mov [u32], eax
1800 }
1801# endif
1802 return u32;
1803}
1804#endif
1805
1806
1807/**
1808 * Atomically Exchange an unsigned 8-bit value.
1809 *
1810 * @returns Current *pu8 value
1811 * @param pu8 Pointer to the 8-bit variable to update.
1812 * @param u8 The 8-bit value to assign to *pu8.
1813 */
1814#if RT_INLINE_ASM_EXTERNAL
1815DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1816#else
1817DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1818{
1819# if RT_INLINE_ASM_GNU_STYLE
1820 __asm__ __volatile__("xchgb %0, %1\n\t"
1821 : "=m" (*pu8),
1822 "=r" (u8)
1823 : "1" (u8));
1824# else
1825 __asm
1826 {
1827# ifdef RT_ARCH_AMD64
1828 mov rdx, [pu8]
1829 mov al, [u8]
1830 xchg [rdx], al
1831 mov [u8], al
1832# else
1833 mov edx, [pu8]
1834 mov al, [u8]
1835 xchg [edx], al
1836 mov [u8], al
1837# endif
1838 }
1839# endif
1840 return u8;
1841}
1842#endif
1843
1844
1845/**
1846 * Atomically Exchange a signed 8-bit value.
1847 *
1848 * @returns Current *pu8 value
1849 * @param pi8 Pointer to the 8-bit variable to update.
1850 * @param i8 The 8-bit value to assign to *pi8.
1851 */
1852DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1853{
1854 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1855}
1856
1857
1858/**
1859 * Atomically Exchange a bool value.
1860 *
1861 * @returns Current *pf value
1862 * @param pf Pointer to the 8-bit variable to update.
1863 * @param f The 8-bit value to assign to *pi8.
1864 */
1865DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1866{
1867#ifdef _MSC_VER
1868 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1869#else
1870 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1871#endif
1872}
1873
1874
1875/**
1876 * Atomically Exchange an unsigned 16-bit value.
1877 *
1878 * @returns Current *pu16 value
1879 * @param pu16 Pointer to the 16-bit variable to update.
1880 * @param u16 The 16-bit value to assign to *pu16.
1881 */
1882#if RT_INLINE_ASM_EXTERNAL
1883DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1884#else
1885DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1886{
1887# if RT_INLINE_ASM_GNU_STYLE
1888 __asm__ __volatile__("xchgw %0, %1\n\t"
1889 : "=m" (*pu16),
1890 "=r" (u16)
1891 : "1" (u16));
1892# else
1893 __asm
1894 {
1895# ifdef RT_ARCH_AMD64
1896 mov rdx, [pu16]
1897 mov ax, [u16]
1898 xchg [rdx], ax
1899 mov [u16], ax
1900# else
1901 mov edx, [pu16]
1902 mov ax, [u16]
1903 xchg [edx], ax
1904 mov [u16], ax
1905# endif
1906 }
1907# endif
1908 return u16;
1909}
1910#endif
1911
1912
1913/**
1914 * Atomically Exchange a signed 16-bit value.
1915 *
1916 * @returns Current *pu16 value
1917 * @param pi16 Pointer to the 16-bit variable to update.
1918 * @param i16 The 16-bit value to assign to *pi16.
1919 */
1920DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1921{
1922 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1923}
1924
1925
1926/**
1927 * Atomically Exchange an unsigned 32-bit value.
1928 *
1929 * @returns Current *pu32 value
1930 * @param pu32 Pointer to the 32-bit variable to update.
1931 * @param u32 The 32-bit value to assign to *pu32.
1932 */
1933#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1934DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1935#else
1936DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1937{
1938# if RT_INLINE_ASM_GNU_STYLE
1939 __asm__ __volatile__("xchgl %0, %1\n\t"
1940 : "=m" (*pu32),
1941 "=r" (u32)
1942 : "1" (u32));
1943
1944# elif RT_INLINE_ASM_USES_INTRIN
1945 u32 = _InterlockedExchange((long *)pu32, u32);
1946
1947# else
1948 __asm
1949 {
1950# ifdef RT_ARCH_AMD64
1951 mov rdx, [pu32]
1952 mov eax, u32
1953 xchg [rdx], eax
1954 mov [u32], eax
1955# else
1956 mov edx, [pu32]
1957 mov eax, u32
1958 xchg [edx], eax
1959 mov [u32], eax
1960# endif
1961 }
1962# endif
1963 return u32;
1964}
1965#endif
1966
1967
1968/**
1969 * Atomically Exchange a signed 32-bit value.
1970 *
1971 * @returns Current *pu32 value
1972 * @param pi32 Pointer to the 32-bit variable to update.
1973 * @param i32 The 32-bit value to assign to *pi32.
1974 */
1975DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1976{
1977 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1978}
1979
1980
1981/**
1982 * Atomically Exchange an unsigned 64-bit value.
1983 *
1984 * @returns Current *pu64 value
1985 * @param pu64 Pointer to the 64-bit variable to update.
1986 * @param u64 The 64-bit value to assign to *pu64.
1987 */
1988#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1989DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
1990#else
1991DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
1992{
1993# if defined(RT_ARCH_AMD64)
1994# if RT_INLINE_ASM_USES_INTRIN
1995 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
1996
1997# elif RT_INLINE_ASM_GNU_STYLE
1998 __asm__ __volatile__("xchgq %0, %1\n\t"
1999 : "=m" (*pu64),
2000 "=r" (u64)
2001 : "1" (u64));
2002# else
2003 __asm
2004 {
2005 mov rdx, [pu64]
2006 mov rax, [u64]
2007 xchg [rdx], rax
2008 mov [u64], rax
2009 }
2010# endif
2011# else /* !RT_ARCH_AMD64 */
2012# if RT_INLINE_ASM_GNU_STYLE
2013# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2014 uint32_t u32 = (uint32_t)u64;
2015 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2016 "xchgl %%ebx, %3\n\t"
2017 "1:\n\t"
2018 "lock; cmpxchg8b (%5)\n\t"
2019 "jnz 1b\n\t"
2020 "xchgl %%ebx, %3\n\t"
2021 /*"xchgl %%esi, %5\n\t"*/
2022 : "=A" (u64),
2023 "=m" (*pu64)
2024 : "0" (*pu64),
2025 "m" ( u32 ),
2026 "c" ( (uint32_t)(u64 >> 32) ),
2027 "S" (pu64) );
2028# else /* !PIC */
2029 __asm__ __volatile__("1:\n\t"
2030 "lock; cmpxchg8b %1\n\t"
2031 "jnz 1b\n\t"
2032 : "=A" (u64),
2033 "=m" (*pu64)
2034 : "0" (*pu64),
2035 "b" ( (uint32_t)u64 ),
2036 "c" ( (uint32_t)(u64 >> 32) ));
2037# endif
2038# else
2039 __asm
2040 {
2041 mov ebx, dword ptr [u64]
2042 mov ecx, dword ptr [u64 + 4]
2043 mov edi, pu64
2044 mov eax, dword ptr [edi]
2045 mov edx, dword ptr [edi + 4]
2046 retry:
2047 lock cmpxchg8b [edi]
2048 jnz retry
2049 mov dword ptr [u64], eax
2050 mov dword ptr [u64 + 4], edx
2051 }
2052# endif
2053# endif /* !RT_ARCH_AMD64 */
2054 return u64;
2055}
2056#endif
2057
2058
2059/**
2060 * Atomically Exchange an signed 64-bit value.
2061 *
2062 * @returns Current *pi64 value
2063 * @param pi64 Pointer to the 64-bit variable to update.
2064 * @param i64 The 64-bit value to assign to *pi64.
2065 */
2066DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2067{
2068 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2069}
2070
2071
2072#ifdef RT_ARCH_AMD64
2073/**
2074 * Atomically Exchange an unsigned 128-bit value.
2075 *
2076 * @returns Current *pu128.
2077 * @param pu128 Pointer to the 128-bit variable to update.
2078 * @param u128 The 128-bit value to assign to *pu128.
2079 *
2080 * @remark We cannot really assume that any hardware supports this. Nor do I have
2081 * GAS support for it. So, for the time being we'll BREAK the atomic
2082 * bit of this function and use two 64-bit exchanges instead.
2083 */
2084# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2085DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2086# else
2087DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2088{
2089 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2090 {
2091 /** @todo this is clumsy code */
2092 RTUINT128U u128Ret;
2093 u128Ret.u = u128;
2094 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2095 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2096 return u128Ret.u;
2097 }
2098#if 0 /* later? */
2099 else
2100 {
2101# if RT_INLINE_ASM_GNU_STYLE
2102 __asm__ __volatile__("1:\n\t"
2103 "lock; cmpxchg8b %1\n\t"
2104 "jnz 1b\n\t"
2105 : "=A" (u128),
2106 "=m" (*pu128)
2107 : "0" (*pu128),
2108 "b" ( (uint64_t)u128 ),
2109 "c" ( (uint64_t)(u128 >> 64) ));
2110# else
2111 __asm
2112 {
2113 mov rbx, dword ptr [u128]
2114 mov rcx, dword ptr [u128 + 4]
2115 mov rdi, pu128
2116 mov rax, dword ptr [rdi]
2117 mov rdx, dword ptr [rdi + 4]
2118 retry:
2119 lock cmpxchg16b [rdi]
2120 jnz retry
2121 mov dword ptr [u128], rax
2122 mov dword ptr [u128 + 4], rdx
2123 }
2124# endif
2125 }
2126 return u128;
2127#endif
2128}
2129# endif
2130#endif /* RT_ARCH_AMD64 */
2131
2132
2133/**
2134 * Atomically Reads a unsigned 64-bit value.
2135 *
2136 * @returns Current *pu64 value
2137 * @param pu64 Pointer to the 64-bit variable to read.
2138 * The memory pointed to must be writable.
2139 * @remark This will fault if the memory is read-only!
2140 */
2141#if RT_INLINE_ASM_EXTERNAL
2142DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
2143#else
2144DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
2145{
2146 uint64_t u64;
2147# ifdef RT_ARCH_AMD64
2148# if RT_INLINE_ASM_GNU_STYLE
2149 __asm__ __volatile__("movq %1, %0\n\t"
2150 : "=r" (u64)
2151 : "m" (*pu64));
2152# else
2153 __asm
2154 {
2155 mov rdx, [pu64]
2156 mov rax, [rdx]
2157 mov [u64], rax
2158 }
2159# endif
2160# else /* !RT_ARCH_AMD64 */
2161# if RT_INLINE_ASM_GNU_STYLE
2162# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2163 uint32_t u32EBX = 0;
2164 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2165 "lock; cmpxchg8b (%5)\n\t"
2166 "xchgl %%ebx, %3\n\t"
2167 : "=A" (u64),
2168 "=m" (*pu64)
2169 : "0" (0),
2170 "m" (u32EBX),
2171 "c" (0),
2172 "S" (pu64));
2173# else /* !PIC */
2174 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2175 : "=A" (u64),
2176 "=m" (*pu64)
2177 : "0" (0),
2178 "b" (0),
2179 "c" (0));
2180# endif
2181# else
2182 __asm
2183 {
2184 xor eax, eax
2185 xor edx, edx
2186 mov edi, pu64
2187 xor ecx, ecx
2188 xor ebx, ebx
2189 lock cmpxchg8b [edi]
2190 mov dword ptr [u64], eax
2191 mov dword ptr [u64 + 4], edx
2192 }
2193# endif
2194# endif /* !RT_ARCH_AMD64 */
2195 return u64;
2196}
2197#endif
2198
2199
2200/**
2201 * Atomically Reads a signed 64-bit value.
2202 *
2203 * @returns Current *pi64 value
2204 * @param pi64 Pointer to the 64-bit variable to read.
2205 * The memory pointed to must be writable.
2206 * @remark This will fault if the memory is read-only!
2207 */
2208DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
2209{
2210 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
2211}
2212
2213
2214/**
2215 * Atomically Exchange a value which size might differ
2216 * between platforms or compilers.
2217 *
2218 * @param pu Pointer to the variable to update.
2219 * @param uNew The value to assign to *pu.
2220 */
2221#define ASMAtomicXchgSize(pu, uNew) \
2222 do { \
2223 switch (sizeof(*(pu))) { \
2224 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2225 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2226 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2227 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2228 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2229 } \
2230 } while (0)
2231
2232
2233/**
2234 * Atomically Exchange a pointer value.
2235 *
2236 * @returns Current *ppv value
2237 * @param ppv Pointer to the pointer variable to update.
2238 * @param pv The pointer value to assign to *ppv.
2239 */
2240DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2241{
2242#if ARCH_BITS == 32
2243 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2244#elif ARCH_BITS == 64
2245 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2246#else
2247# error "ARCH_BITS is bogus"
2248#endif
2249}
2250
2251
2252/**
2253 * Atomically Compare and Exchange an unsigned 32-bit value.
2254 *
2255 * @returns true if xchg was done.
2256 * @returns false if xchg wasn't done.
2257 *
2258 * @param pu32 Pointer to the value to update.
2259 * @param u32New The new value to assigned to *pu32.
2260 * @param u32Old The old value to *pu32 compare with.
2261 */
2262#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2263DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2264#else
2265DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old)
2266{
2267# if RT_INLINE_ASM_GNU_STYLE
2268 uint32_t u32Ret;
2269 __asm__ __volatile__("lock; cmpxchgl %2, %0\n\t"
2270 "setz %%al\n\t"
2271 "movzbl %%al, %%eax\n\t"
2272 : "=m" (*pu32),
2273 "=a" (u32Ret)
2274 : "r" (u32New),
2275 "1" (u32Old));
2276 return (bool)u32Ret;
2277
2278# elif RT_INLINE_ASM_USES_INTRIN
2279 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2280
2281# else
2282 uint32_t u32Ret;
2283 __asm
2284 {
2285# ifdef RT_ARCH_AMD64
2286 mov rdx, [pu32]
2287# else
2288 mov edx, [pu32]
2289# endif
2290 mov eax, [u32Old]
2291 mov ecx, [u32New]
2292# ifdef RT_ARCH_AMD64
2293 lock cmpxchg [rdx], ecx
2294# else
2295 lock cmpxchg [edx], ecx
2296# endif
2297 setz al
2298 movzx eax, al
2299 mov [u32Ret], eax
2300 }
2301 return !!u32Ret;
2302# endif
2303}
2304#endif
2305
2306
2307/**
2308 * Atomically Compare and Exchange a signed 32-bit value.
2309 *
2310 * @returns true if xchg was done.
2311 * @returns false if xchg wasn't done.
2312 *
2313 * @param pi32 Pointer to the value to update.
2314 * @param i32New The new value to assigned to *pi32.
2315 * @param i32Old The old value to *pi32 compare with.
2316 */
2317DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2318{
2319 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2320}
2321
2322
2323/**
2324 * Atomically Compare and exchange an unsigned 64-bit value.
2325 *
2326 * @returns true if xchg was done.
2327 * @returns false if xchg wasn't done.
2328 *
2329 * @param pu64 Pointer to the 64-bit variable to update.
2330 * @param u64New The 64-bit value to assign to *pu64.
2331 * @param u64Old The value to compare with.
2332 */
2333#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2334DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2335#else
2336DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old)
2337{
2338# if RT_INLINE_ASM_USES_INTRIN
2339 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2340
2341# elif defined(RT_ARCH_AMD64)
2342# if RT_INLINE_ASM_GNU_STYLE
2343 uint64_t u64Ret;
2344 __asm__ __volatile__("lock; cmpxchgq %2, %0\n\t"
2345 "setz %%al\n\t"
2346 "movzbl %%al, %%eax\n\t"
2347 : "=m" (*pu64),
2348 "=a" (u64Ret)
2349 : "r" (u64New),
2350 "1" (u64Old));
2351 return (bool)u64Ret;
2352# else
2353 bool fRet;
2354 __asm
2355 {
2356 mov rdx, [pu32]
2357 mov rax, [u64Old]
2358 mov rcx, [u64New]
2359 lock cmpxchg [rdx], rcx
2360 setz al
2361 mov [fRet], al
2362 }
2363 return fRet;
2364# endif
2365# else /* !RT_ARCH_AMD64 */
2366 uint32_t u32Ret;
2367# if RT_INLINE_ASM_GNU_STYLE
2368# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2369 uint32_t u32 = (uint32_t)u64New;
2370 uint32_t u32Spill;
2371 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2372 "lock; cmpxchg8b (%6)\n\t"
2373 "setz %%al\n\t"
2374 "xchgl %%ebx, %4\n\t"
2375 "movzbl %%al, %%eax\n\t"
2376 : "=a" (u32Ret),
2377 "=d" (u32Spill),
2378 "=m" (*pu64)
2379 : "A" (u64Old),
2380 "m" ( u32 ),
2381 "c" ( (uint32_t)(u64New >> 32) ),
2382 "S" (pu64) );
2383# else /* !PIC */
2384 uint32_t u32Spill;
2385 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2386 "setz %%al\n\t"
2387 "movzbl %%al, %%eax\n\t"
2388 : "=a" (u32Ret),
2389 "=d" (u32Spill),
2390 "=m" (*pu64)
2391 : "A" (u64Old),
2392 "b" ( (uint32_t)u64New ),
2393 "c" ( (uint32_t)(u64New >> 32) ));
2394# endif
2395 return (bool)u32Ret;
2396# else
2397 __asm
2398 {
2399 mov ebx, dword ptr [u64New]
2400 mov ecx, dword ptr [u64New + 4]
2401 mov edi, [pu64]
2402 mov eax, dword ptr [u64Old]
2403 mov edx, dword ptr [u64Old + 4]
2404 lock cmpxchg8b [edi]
2405 setz al
2406 movzx eax, al
2407 mov dword ptr [u32Ret], eax
2408 }
2409 return !!u32Ret;
2410# endif
2411# endif /* !RT_ARCH_AMD64 */
2412}
2413#endif
2414
2415
2416/**
2417 * Atomically Compare and exchange a signed 64-bit value.
2418 *
2419 * @returns true if xchg was done.
2420 * @returns false if xchg wasn't done.
2421 *
2422 * @param pi64 Pointer to the 64-bit variable to update.
2423 * @param i64 The 64-bit value to assign to *pu64.
2424 * @param i64Old The value to compare with.
2425 */
2426DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2427{
2428 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2429}
2430
2431
2432
2433/** @def ASMAtomicCmpXchgSize
2434 * Atomically Compare and Exchange a value which size might differ
2435 * between platforms or compilers.
2436 *
2437 * @param pu Pointer to the value to update.
2438 * @param uNew The new value to assigned to *pu.
2439 * @param uOld The old value to *pu compare with.
2440 * @param fRc Where to store the result.
2441 */
2442#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2443 do { \
2444 switch (sizeof(*(pu))) { \
2445 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2446 break; \
2447 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2448 break; \
2449 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2450 (fRc) = false; \
2451 break; \
2452 } \
2453 } while (0)
2454
2455
2456/**
2457 * Atomically Compare and Exchange a pointer value.
2458 *
2459 * @returns true if xchg was done.
2460 * @returns false if xchg wasn't done.
2461 *
2462 * @param ppv Pointer to the value to update.
2463 * @param pvNew The new value to assigned to *ppv.
2464 * @param pvOld The old value to *ppv compare with.
2465 */
2466DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2467{
2468#if ARCH_BITS == 32
2469 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2470#elif ARCH_BITS == 64
2471 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2472#else
2473# error "ARCH_BITS is bogus"
2474#endif
2475}
2476
2477
2478/**
2479 * Atomically increment a 32-bit value.
2480 *
2481 * @returns The new value.
2482 * @param pu32 Pointer to the value to increment.
2483 */
2484#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2485DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2486#else
2487DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2488{
2489 uint32_t u32;
2490# if RT_INLINE_ASM_USES_INTRIN
2491 u32 = _InterlockedIncrement((long *)pu32);
2492 return u32;
2493
2494# elif RT_INLINE_ASM_GNU_STYLE
2495 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2496 : "=r" (u32),
2497 "=m" (*pu32)
2498 : "0" (1)
2499 : "memory");
2500 return u32+1;
2501# else
2502 __asm
2503 {
2504 mov eax, 1
2505# ifdef RT_ARCH_AMD64
2506 mov rdx, [pu32]
2507 lock xadd [rdx], eax
2508# else
2509 mov edx, [pu32]
2510 lock xadd [edx], eax
2511# endif
2512 mov u32, eax
2513 }
2514 return u32+1;
2515# endif
2516}
2517#endif
2518
2519
2520/**
2521 * Atomically increment a signed 32-bit value.
2522 *
2523 * @returns The new value.
2524 * @param pi32 Pointer to the value to increment.
2525 */
2526DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2527{
2528 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2529}
2530
2531
2532/**
2533 * Atomically decrement an unsigned 32-bit value.
2534 *
2535 * @returns The new value.
2536 * @param pu32 Pointer to the value to decrement.
2537 */
2538#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2539DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2540#else
2541DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2542{
2543 uint32_t u32;
2544# if RT_INLINE_ASM_USES_INTRIN
2545 u32 = _InterlockedDecrement((long *)pu32);
2546 return u32;
2547
2548# elif RT_INLINE_ASM_GNU_STYLE
2549 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2550 : "=r" (u32),
2551 "=m" (*pu32)
2552 : "0" (-1)
2553 : "memory");
2554 return u32-1;
2555# else
2556 __asm
2557 {
2558 mov eax, -1
2559# ifdef RT_ARCH_AMD64
2560 mov rdx, [pu32]
2561 lock xadd [rdx], eax
2562# else
2563 mov edx, [pu32]
2564 lock xadd [edx], eax
2565# endif
2566 mov u32, eax
2567 }
2568 return u32-1;
2569# endif
2570}
2571#endif
2572
2573
2574/**
2575 * Atomically decrement a signed 32-bit value.
2576 *
2577 * @returns The new value.
2578 * @param pi32 Pointer to the value to decrement.
2579 */
2580DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2581{
2582 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2583}
2584
2585
2586/**
2587 * Atomically Or an unsigned 32-bit value.
2588 *
2589 * @param pu32 Pointer to the pointer variable to OR u32 with.
2590 * @param u32 The value to OR *pu32 with.
2591 */
2592#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2593DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2594#else
2595DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2596{
2597# if RT_INLINE_ASM_USES_INTRIN
2598 _InterlockedOr((long volatile *)pu32, (long)u32);
2599
2600# elif RT_INLINE_ASM_GNU_STYLE
2601 __asm__ __volatile__("lock; orl %1, %0\n\t"
2602 : "=m" (*pu32)
2603 : "ir" (u32));
2604# else
2605 __asm
2606 {
2607 mov eax, [u32]
2608# ifdef RT_ARCH_AMD64
2609 mov rdx, [pu32]
2610 lock or [rdx], eax
2611# else
2612 mov edx, [pu32]
2613 lock or [edx], eax
2614# endif
2615 }
2616# endif
2617}
2618#endif
2619
2620
2621/**
2622 * Atomically Or a signed 32-bit value.
2623 *
2624 * @param pi32 Pointer to the pointer variable to OR u32 with.
2625 * @param i32 The value to OR *pu32 with.
2626 */
2627DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2628{
2629 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2630}
2631
2632
2633/**
2634 * Atomically And an unsigned 32-bit value.
2635 *
2636 * @param pu32 Pointer to the pointer variable to AND u32 with.
2637 * @param u32 The value to AND *pu32 with.
2638 */
2639#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2640DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2641#else
2642DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2643{
2644# if RT_INLINE_ASM_USES_INTRIN
2645 _InterlockedAnd((long volatile *)pu32, u32);
2646
2647# elif RT_INLINE_ASM_GNU_STYLE
2648 __asm__ __volatile__("lock; andl %1, %0\n\t"
2649 : "=m" (*pu32)
2650 : "ir" (u32));
2651# else
2652 __asm
2653 {
2654 mov eax, [u32]
2655# ifdef RT_ARCH_AMD64
2656 mov rdx, [pu32]
2657 lock and [rdx], eax
2658# else
2659 mov edx, [pu32]
2660 lock and [edx], eax
2661# endif
2662 }
2663# endif
2664}
2665#endif
2666
2667
2668/**
2669 * Atomically And a signed 32-bit value.
2670 *
2671 * @param pi32 Pointer to the pointer variable to AND i32 with.
2672 * @param i32 The value to AND *pi32 with.
2673 */
2674DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2675{
2676 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2677}
2678
2679
2680/**
2681 * Invalidate page.
2682 *
2683 * @param pv Address of the page to invalidate.
2684 */
2685#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2686DECLASM(void) ASMInvalidatePage(void *pv);
2687#else
2688DECLINLINE(void) ASMInvalidatePage(void *pv)
2689{
2690# if RT_INLINE_ASM_USES_INTRIN
2691 __invlpg(pv);
2692
2693# elif RT_INLINE_ASM_GNU_STYLE
2694 __asm__ __volatile__("invlpg %0\n\t"
2695 : : "m" (*(uint8_t *)pv));
2696# else
2697 __asm
2698 {
2699# ifdef RT_ARCH_AMD64
2700 mov rax, [pv]
2701 invlpg [rax]
2702# else
2703 mov eax, [pv]
2704 invlpg [eax]
2705# endif
2706 }
2707# endif
2708}
2709#endif
2710
2711
2712#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
2713# if PAGE_SIZE != 0x1000
2714# error "PAGE_SIZE is not 0x1000!"
2715# endif
2716#endif
2717
2718/**
2719 * Zeros a 4K memory page.
2720 *
2721 * @param pv Pointer to the memory block. This must be page aligned.
2722 */
2723#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2724DECLASM(void) ASMMemZeroPage(volatile void *pv);
2725# else
2726DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
2727{
2728# if RT_INLINE_ASM_USES_INTRIN
2729# ifdef RT_ARCH_AMD64
2730 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
2731# else
2732 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
2733# endif
2734
2735# elif RT_INLINE_ASM_GNU_STYLE
2736 RTUINTREG uDummy;
2737# ifdef RT_ARCH_AMD64
2738 __asm__ __volatile__ ("rep stosq"
2739 : "=D" (pv),
2740 "=c" (uDummy)
2741 : "0" (pv),
2742 "c" (0x1000 >> 3),
2743 "a" (0)
2744 : "memory");
2745# else
2746 __asm__ __volatile__ ("rep stosl"
2747 : "=D" (pv),
2748 "=c" (uDummy)
2749 : "0" (pv),
2750 "c" (0x1000 >> 2),
2751 "a" (0)
2752 : "memory");
2753# endif
2754# else
2755 __asm
2756 {
2757# ifdef RT_ARCH_AMD64
2758 xor rax, rax
2759 mov ecx, 0200h
2760 mov rdi, [pv]
2761 rep stosq
2762# else
2763 xor eax, eax
2764 mov ecx, 0400h
2765 mov edi, [pv]
2766 rep stosd
2767# endif
2768 }
2769# endif
2770}
2771# endif
2772
2773
2774/**
2775 * Zeros a memory block with a 32-bit aligned size.
2776 *
2777 * @param pv Pointer to the memory block.
2778 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2779 */
2780#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2781DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
2782#else
2783DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
2784{
2785# if RT_INLINE_ASM_USES_INTRIN
2786 __stosd((unsigned long *)pv, 0, cb >> 2);
2787
2788# elif RT_INLINE_ASM_GNU_STYLE
2789 __asm__ __volatile__ ("rep stosl"
2790 : "=D" (pv),
2791 "=c" (cb)
2792 : "0" (pv),
2793 "1" (cb >> 2),
2794 "a" (0)
2795 : "memory");
2796# else
2797 __asm
2798 {
2799 xor eax, eax
2800# ifdef RT_ARCH_AMD64
2801 mov rcx, [cb]
2802 shr rcx, 2
2803 mov rdi, [pv]
2804# else
2805 mov ecx, [cb]
2806 shr ecx, 2
2807 mov edi, [pv]
2808# endif
2809 rep stosd
2810 }
2811# endif
2812}
2813#endif
2814
2815
2816/**
2817 * Fills a memory block with a 32-bit aligned size.
2818 *
2819 * @param pv Pointer to the memory block.
2820 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2821 * @param u32 The value to fill with.
2822 */
2823#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2824DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
2825#else
2826DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
2827{
2828# if RT_INLINE_ASM_USES_INTRIN
2829 __stosd((unsigned long *)pv, 0, cb >> 2);
2830
2831# elif RT_INLINE_ASM_GNU_STYLE
2832 __asm__ __volatile__ ("rep stosl"
2833 : "=D" (pv),
2834 "=c" (cb)
2835 : "0" (pv),
2836 "1" (cb >> 2),
2837 "a" (u32)
2838 : "memory");
2839# else
2840 __asm
2841 {
2842# ifdef RT_ARCH_AMD64
2843 mov rcx, [cb]
2844 shr rcx, 2
2845 mov rdi, [pv]
2846# else
2847 mov ecx, [cb]
2848 shr ecx, 2
2849 mov edi, [pv]
2850# endif
2851 mov eax, [u32]
2852 rep stosd
2853 }
2854# endif
2855}
2856#endif
2857
2858
2859/**
2860 * Checks if a memory block is filled with the specified byte.
2861 *
2862 * This is a sort of inverted memchr.
2863 *
2864 * @returns Pointer to the byte which doesn't equal u8.
2865 * @returns NULL if all equal to u8.
2866 *
2867 * @param pv Pointer to the memory block.
2868 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
2869 * @param u8 The value it's supposed to be filled with.
2870 */
2871#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2872DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
2873#else
2874DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
2875{
2876/** @todo rewrite this in inline assembly. */
2877 uint8_t const *pb = (uint8_t const *)pv;
2878 for (; cb; cb--, pb++)
2879 if (RT_UNLIKELY(*pb != u8))
2880 return (void *)pb;
2881 return NULL;
2882}
2883#endif
2884
2885
2886
2887/**
2888 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
2889 *
2890 * @returns u32F1 * u32F2.
2891 */
2892#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2893DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
2894#else
2895DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
2896{
2897# ifdef RT_ARCH_AMD64
2898 return (uint64_t)u32F1 * u32F2;
2899# else /* !RT_ARCH_AMD64 */
2900 uint64_t u64;
2901# if RT_INLINE_ASM_GNU_STYLE
2902 __asm__ __volatile__("mull %%edx"
2903 : "=A" (u64)
2904 : "a" (u32F2), "d" (u32F1));
2905# else
2906 __asm
2907 {
2908 mov edx, [u32F1]
2909 mov eax, [u32F2]
2910 mul edx
2911 mov dword ptr [u64], eax
2912 mov dword ptr [u64 + 4], edx
2913 }
2914# endif
2915 return u64;
2916# endif /* !RT_ARCH_AMD64 */
2917}
2918#endif
2919
2920
2921/**
2922 * Multiplies two signed 32-bit values returning a signed 64-bit result.
2923 *
2924 * @returns u32F1 * u32F2.
2925 */
2926#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2927DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
2928#else
2929DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
2930{
2931# ifdef RT_ARCH_AMD64
2932 return (int64_t)i32F1 * i32F2;
2933# else /* !RT_ARCH_AMD64 */
2934 int64_t i64;
2935# if RT_INLINE_ASM_GNU_STYLE
2936 __asm__ __volatile__("imull %%edx"
2937 : "=A" (i64)
2938 : "a" (i32F2), "d" (i32F1));
2939# else
2940 __asm
2941 {
2942 mov edx, [i32F1]
2943 mov eax, [i32F2]
2944 imul edx
2945 mov dword ptr [i64], eax
2946 mov dword ptr [i64 + 4], edx
2947 }
2948# endif
2949 return i64;
2950# endif /* !RT_ARCH_AMD64 */
2951}
2952#endif
2953
2954
2955/**
2956 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
2957 *
2958 * @returns u64 / u32.
2959 */
2960#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2961DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
2962#else
2963DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
2964{
2965# ifdef RT_ARCH_AMD64
2966 return (uint32_t)(u64 / u32);
2967# else /* !RT_ARCH_AMD64 */
2968# if RT_INLINE_ASM_GNU_STYLE
2969 RTUINTREG uDummy;
2970 __asm__ __volatile__("divl %3"
2971 : "=a" (u32), "=d"(uDummy)
2972 : "A" (u64), "r" (u32));
2973# else
2974 __asm
2975 {
2976 mov eax, dword ptr [u64]
2977 mov edx, dword ptr [u64 + 4]
2978 mov ecx, [u32]
2979 div ecx
2980 mov [u32], eax
2981 }
2982# endif
2983 return u32;
2984# endif /* !RT_ARCH_AMD64 */
2985}
2986#endif
2987
2988
2989/**
2990 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
2991 *
2992 * @returns u64 / u32.
2993 */
2994#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
2995DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
2996#else
2997DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
2998{
2999# ifdef RT_ARCH_AMD64
3000 return (int32_t)(i64 / i32);
3001# else /* !RT_ARCH_AMD64 */
3002# if RT_INLINE_ASM_GNU_STYLE
3003 RTUINTREG iDummy;
3004 __asm__ __volatile__("idivl %3"
3005 : "=a" (i32), "=d"(iDummy)
3006 : "A" (i64), "r" (i32));
3007# else
3008 __asm
3009 {
3010 mov eax, dword ptr [i64]
3011 mov edx, dword ptr [i64 + 4]
3012 mov ecx, [i32]
3013 idiv ecx
3014 mov [i32], eax
3015 }
3016# endif
3017 return i32;
3018# endif /* !RT_ARCH_AMD64 */
3019}
3020#endif
3021
3022
3023/**
3024 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
3025 * using a 96 bit intermediate result.
3026 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
3027 * __udivdi3 and __umoddi3 even if this inline function is not used.
3028 *
3029 * @returns (u64A * u32B) / u32C.
3030 * @param u64A The 64-bit value.
3031 * @param u32B The 32-bit value to multiple by A.
3032 * @param u32C The 32-bit value to divide A*B by.
3033 */
3034#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
3035DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
3036#else
3037DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
3038{
3039# if RT_INLINE_ASM_GNU_STYLE
3040# ifdef RT_ARCH_AMD64
3041 uint64_t u64Result, u64Spill;
3042 __asm__ __volatile__("mulq %2\n\t"
3043 "divq %3\n\t"
3044 : "=a" (u64Result),
3045 "=d" (u64Spill)
3046 : "r" ((uint64_t)u32B),
3047 "r" ((uint64_t)u32C),
3048 "0" (u64A),
3049 "1" (0));
3050 return u64Result;
3051# else
3052 uint32_t u32Dummy;
3053 uint64_t u64Result;
3054 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
3055 edx = u64Lo.hi = (u64A.lo * u32B).hi */
3056 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
3057 eax = u64A.hi */
3058 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
3059 edx = u32C */
3060 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
3061 edx = u32B */
3062 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
3063 edx = u64Hi.hi = (u64A.hi * u32B).hi */
3064 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
3065 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
3066 "divl %%ecx \n\t" /* eax = u64Hi / u32C
3067 edx = u64Hi % u32C */
3068 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
3069 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
3070 "divl %%ecx \n\t" /* u64Result.lo */
3071 "movl %%edi,%%edx \n\t" /* u64Result.hi */
3072 : "=A"(u64Result), "=c"(u32Dummy),
3073 "=S"(u32Dummy), "=D"(u32Dummy)
3074 : "a"((uint32_t)u64A),
3075 "S"((uint32_t)(u64A >> 32)),
3076 "c"(u32B),
3077 "D"(u32C));
3078 return u64Result;
3079# endif
3080# else
3081 RTUINT64U u;
3082 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
3083 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
3084 u64Hi += (u64Lo >> 32);
3085 u.s.Hi = (uint32_t)(u64Hi / u32C);
3086 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
3087 return u.u;
3088# endif
3089}
3090#endif
3091
3092
3093/**
3094 * Probes a byte pointer for read access.
3095 *
3096 * While the function will not fault if the byte is not read accessible,
3097 * the idea is to do this in a safe place like before acquiring locks
3098 * and such like.
3099 *
3100 * Also, this functions guarantees that an eager compiler is not going
3101 * to optimize the probing away.
3102 *
3103 * @param pvByte Pointer to the byte.
3104 */
3105#if RT_INLINE_ASM_EXTERNAL
3106DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
3107#else
3108DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
3109{
3110 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3111 uint8_t u8;
3112# if RT_INLINE_ASM_GNU_STYLE
3113 __asm__ __volatile__("movb (%1), %0\n\t"
3114 : "=r" (u8)
3115 : "r" (pvByte));
3116# else
3117 __asm
3118 {
3119# ifdef RT_ARCH_AMD64
3120 mov rax, [pvByte]
3121 mov al, [rax]
3122# else
3123 mov eax, [pvByte]
3124 mov al, [eax]
3125# endif
3126 mov [u8], al
3127 }
3128# endif
3129 return u8;
3130}
3131#endif
3132
3133/**
3134 * Probes a buffer for read access page by page.
3135 *
3136 * While the function will fault if the buffer is not fully read
3137 * accessible, the idea is to do this in a safe place like before
3138 * acquiring locks and such like.
3139 *
3140 * Also, this functions guarantees that an eager compiler is not going
3141 * to optimize the probing away.
3142 *
3143 * @param pvBuf Pointer to the buffer.
3144 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
3145 */
3146DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
3147{
3148 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
3149 /* the first byte */
3150 const uint8_t *pu8 = (const uint8_t *)pvBuf;
3151 ASMProbeReadByte(pu8);
3152
3153 /* the pages in between pages. */
3154 while (cbBuf > /*PAGE_SIZE*/0x1000)
3155 {
3156 ASMProbeReadByte(pu8);
3157 cbBuf -= /*PAGE_SIZE*/0x1000;
3158 pu8 += /*PAGE_SIZE*/0x1000;
3159 }
3160
3161 /* the last byte */
3162 ASMProbeReadByte(pu8 + cbBuf - 1);
3163}
3164
3165
3166/** @def ASMBreakpoint
3167 * Debugger Breakpoint.
3168 * @remark In the gnu world we add a nop instruction after the int3 to
3169 * force gdb to remain at the int3 source line.
3170 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
3171 * @internal
3172 */
3173#if RT_INLINE_ASM_GNU_STYLE
3174# ifndef __L4ENV__
3175# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
3176# else
3177# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
3178# endif
3179#else
3180# define ASMBreakpoint() __debugbreak()
3181#endif
3182
3183
3184
3185/** @defgroup grp_inline_bits Bit Operations
3186 * @{
3187 */
3188
3189
3190/**
3191 * Sets a bit in a bitmap.
3192 *
3193 * @param pvBitmap Pointer to the bitmap.
3194 * @param iBit The bit to set.
3195 */
3196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3197DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
3198#else
3199DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
3200{
3201# if RT_INLINE_ASM_USES_INTRIN
3202 _bittestandset((long *)pvBitmap, iBit);
3203
3204# elif RT_INLINE_ASM_GNU_STYLE
3205 __asm__ __volatile__ ("btsl %1, %0"
3206 : "=m" (*(volatile long *)pvBitmap)
3207 : "Ir" (iBit)
3208 : "memory");
3209# else
3210 __asm
3211 {
3212# ifdef RT_ARCH_AMD64
3213 mov rax, [pvBitmap]
3214 mov edx, [iBit]
3215 bts [rax], edx
3216# else
3217 mov eax, [pvBitmap]
3218 mov edx, [iBit]
3219 bts [eax], edx
3220# endif
3221 }
3222# endif
3223}
3224#endif
3225
3226
3227/**
3228 * Atomically sets a bit in a bitmap.
3229 *
3230 * @param pvBitmap Pointer to the bitmap.
3231 * @param iBit The bit to set.
3232 */
3233#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3234DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
3235#else
3236DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
3237{
3238# if RT_INLINE_ASM_USES_INTRIN
3239 _interlockedbittestandset((long *)pvBitmap, iBit);
3240# elif RT_INLINE_ASM_GNU_STYLE
3241 __asm__ __volatile__ ("lock; btsl %1, %0"
3242 : "=m" (*(volatile long *)pvBitmap)
3243 : "Ir" (iBit)
3244 : "memory");
3245# else
3246 __asm
3247 {
3248# ifdef RT_ARCH_AMD64
3249 mov rax, [pvBitmap]
3250 mov edx, [iBit]
3251 lock bts [rax], edx
3252# else
3253 mov eax, [pvBitmap]
3254 mov edx, [iBit]
3255 lock bts [eax], edx
3256# endif
3257 }
3258# endif
3259}
3260#endif
3261
3262
3263/**
3264 * Clears a bit in a bitmap.
3265 *
3266 * @param pvBitmap Pointer to the bitmap.
3267 * @param iBit The bit to clear.
3268 */
3269#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3270DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
3271#else
3272DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
3273{
3274# if RT_INLINE_ASM_USES_INTRIN
3275 _bittestandreset((long *)pvBitmap, iBit);
3276
3277# elif RT_INLINE_ASM_GNU_STYLE
3278 __asm__ __volatile__ ("btrl %1, %0"
3279 : "=m" (*(volatile long *)pvBitmap)
3280 : "Ir" (iBit)
3281 : "memory");
3282# else
3283 __asm
3284 {
3285# ifdef RT_ARCH_AMD64
3286 mov rax, [pvBitmap]
3287 mov edx, [iBit]
3288 btr [rax], edx
3289# else
3290 mov eax, [pvBitmap]
3291 mov edx, [iBit]
3292 btr [eax], edx
3293# endif
3294 }
3295# endif
3296}
3297#endif
3298
3299
3300/**
3301 * Atomically clears a bit in a bitmap.
3302 *
3303 * @param pvBitmap Pointer to the bitmap.
3304 * @param iBit The bit to toggle set.
3305 * @remark No memory barrier, take care on smp.
3306 */
3307#if RT_INLINE_ASM_EXTERNAL
3308DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
3309#else
3310DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
3311{
3312# if RT_INLINE_ASM_GNU_STYLE
3313 __asm__ __volatile__ ("lock; btrl %1, %0"
3314 : "=m" (*(volatile long *)pvBitmap)
3315 : "Ir" (iBit)
3316 : "memory");
3317# else
3318 __asm
3319 {
3320# ifdef RT_ARCH_AMD64
3321 mov rax, [pvBitmap]
3322 mov edx, [iBit]
3323 lock btr [rax], edx
3324# else
3325 mov eax, [pvBitmap]
3326 mov edx, [iBit]
3327 lock btr [eax], edx
3328# endif
3329 }
3330# endif
3331}
3332#endif
3333
3334
3335/**
3336 * Toggles a bit in a bitmap.
3337 *
3338 * @param pvBitmap Pointer to the bitmap.
3339 * @param iBit The bit to toggle.
3340 */
3341#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3342DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
3343#else
3344DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
3345{
3346# if RT_INLINE_ASM_USES_INTRIN
3347 _bittestandcomplement((long *)pvBitmap, iBit);
3348# elif RT_INLINE_ASM_GNU_STYLE
3349 __asm__ __volatile__ ("btcl %1, %0"
3350 : "=m" (*(volatile long *)pvBitmap)
3351 : "Ir" (iBit)
3352 : "memory");
3353# else
3354 __asm
3355 {
3356# ifdef RT_ARCH_AMD64
3357 mov rax, [pvBitmap]
3358 mov edx, [iBit]
3359 btc [rax], edx
3360# else
3361 mov eax, [pvBitmap]
3362 mov edx, [iBit]
3363 btc [eax], edx
3364# endif
3365 }
3366# endif
3367}
3368#endif
3369
3370
3371/**
3372 * Atomically toggles a bit in a bitmap.
3373 *
3374 * @param pvBitmap Pointer to the bitmap.
3375 * @param iBit The bit to test and set.
3376 */
3377#if RT_INLINE_ASM_EXTERNAL
3378DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
3379#else
3380DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
3381{
3382# if RT_INLINE_ASM_GNU_STYLE
3383 __asm__ __volatile__ ("lock; btcl %1, %0"
3384 : "=m" (*(volatile long *)pvBitmap)
3385 : "Ir" (iBit)
3386 : "memory");
3387# else
3388 __asm
3389 {
3390# ifdef RT_ARCH_AMD64
3391 mov rax, [pvBitmap]
3392 mov edx, [iBit]
3393 lock btc [rax], edx
3394# else
3395 mov eax, [pvBitmap]
3396 mov edx, [iBit]
3397 lock btc [eax], edx
3398# endif
3399 }
3400# endif
3401}
3402#endif
3403
3404
3405/**
3406 * Tests and sets a bit in a bitmap.
3407 *
3408 * @returns true if the bit was set.
3409 * @returns false if the bit was clear.
3410 * @param pvBitmap Pointer to the bitmap.
3411 * @param iBit The bit to test and set.
3412 */
3413#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3414DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3415#else
3416DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3417{
3418 union { bool f; uint32_t u32; uint8_t u8; } rc;
3419# if RT_INLINE_ASM_USES_INTRIN
3420 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
3421
3422# elif RT_INLINE_ASM_GNU_STYLE
3423 __asm__ __volatile__ ("btsl %2, %1\n\t"
3424 "setc %b0\n\t"
3425 "andl $1, %0\n\t"
3426 : "=q" (rc.u32),
3427 "=m" (*(volatile long *)pvBitmap)
3428 : "Ir" (iBit)
3429 : "memory");
3430# else
3431 __asm
3432 {
3433 mov edx, [iBit]
3434# ifdef RT_ARCH_AMD64
3435 mov rax, [pvBitmap]
3436 bts [rax], edx
3437# else
3438 mov eax, [pvBitmap]
3439 bts [eax], edx
3440# endif
3441 setc al
3442 and eax, 1
3443 mov [rc.u32], eax
3444 }
3445# endif
3446 return rc.f;
3447}
3448#endif
3449
3450
3451/**
3452 * Atomically tests and sets a bit in a bitmap.
3453 *
3454 * @returns true if the bit was set.
3455 * @returns false if the bit was clear.
3456 * @param pvBitmap Pointer to the bitmap.
3457 * @param iBit The bit to set.
3458 */
3459#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3460DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
3461#else
3462DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
3463{
3464 union { bool f; uint32_t u32; uint8_t u8; } rc;
3465# if RT_INLINE_ASM_USES_INTRIN
3466 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
3467# elif RT_INLINE_ASM_GNU_STYLE
3468 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
3469 "setc %b0\n\t"
3470 "andl $1, %0\n\t"
3471 : "=q" (rc.u32),
3472 "=m" (*(volatile long *)pvBitmap)
3473 : "Ir" (iBit)
3474 : "memory");
3475# else
3476 __asm
3477 {
3478 mov edx, [iBit]
3479# ifdef RT_ARCH_AMD64
3480 mov rax, [pvBitmap]
3481 lock bts [rax], edx
3482# else
3483 mov eax, [pvBitmap]
3484 lock bts [eax], edx
3485# endif
3486 setc al
3487 and eax, 1
3488 mov [rc.u32], eax
3489 }
3490# endif
3491 return rc.f;
3492}
3493#endif
3494
3495
3496/**
3497 * Tests and clears a bit in a bitmap.
3498 *
3499 * @returns true if the bit was set.
3500 * @returns false if the bit was clear.
3501 * @param pvBitmap Pointer to the bitmap.
3502 * @param iBit The bit to test and clear.
3503 */
3504#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3505DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3506#else
3507DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3508{
3509 union { bool f; uint32_t u32; uint8_t u8; } rc;
3510# if RT_INLINE_ASM_USES_INTRIN
3511 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
3512
3513# elif RT_INLINE_ASM_GNU_STYLE
3514 __asm__ __volatile__ ("btrl %2, %1\n\t"
3515 "setc %b0\n\t"
3516 "andl $1, %0\n\t"
3517 : "=q" (rc.u32),
3518 "=m" (*(volatile long *)pvBitmap)
3519 : "Ir" (iBit)
3520 : "memory");
3521# else
3522 __asm
3523 {
3524 mov edx, [iBit]
3525# ifdef RT_ARCH_AMD64
3526 mov rax, [pvBitmap]
3527 btr [rax], edx
3528# else
3529 mov eax, [pvBitmap]
3530 btr [eax], edx
3531# endif
3532 setc al
3533 and eax, 1
3534 mov [rc.u32], eax
3535 }
3536# endif
3537 return rc.f;
3538}
3539#endif
3540
3541
3542/**
3543 * Atomically tests and clears a bit in a bitmap.
3544 *
3545 * @returns true if the bit was set.
3546 * @returns false if the bit was clear.
3547 * @param pvBitmap Pointer to the bitmap.
3548 * @param iBit The bit to test and clear.
3549 * @remark No memory barrier, take care on smp.
3550 */
3551#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3552DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
3553#else
3554DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
3555{
3556 union { bool f; uint32_t u32; uint8_t u8; } rc;
3557# if RT_INLINE_ASM_USES_INTRIN
3558 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
3559
3560# elif RT_INLINE_ASM_GNU_STYLE
3561 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
3562 "setc %b0\n\t"
3563 "andl $1, %0\n\t"
3564 : "=q" (rc.u32),
3565 "=m" (*(volatile long *)pvBitmap)
3566 : "Ir" (iBit)
3567 : "memory");
3568# else
3569 __asm
3570 {
3571 mov edx, [iBit]
3572# ifdef RT_ARCH_AMD64
3573 mov rax, [pvBitmap]
3574 lock btr [rax], edx
3575# else
3576 mov eax, [pvBitmap]
3577 lock btr [eax], edx
3578# endif
3579 setc al
3580 and eax, 1
3581 mov [rc.u32], eax
3582 }
3583# endif
3584 return rc.f;
3585}
3586#endif
3587
3588
3589/**
3590 * Tests and toggles a bit in a bitmap.
3591 *
3592 * @returns true if the bit was set.
3593 * @returns false if the bit was clear.
3594 * @param pvBitmap Pointer to the bitmap.
3595 * @param iBit The bit to test and toggle.
3596 */
3597#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3598DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3599#else
3600DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3601{
3602 union { bool f; uint32_t u32; uint8_t u8; } rc;
3603# if RT_INLINE_ASM_USES_INTRIN
3604 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
3605
3606# elif RT_INLINE_ASM_GNU_STYLE
3607 __asm__ __volatile__ ("btcl %2, %1\n\t"
3608 "setc %b0\n\t"
3609 "andl $1, %0\n\t"
3610 : "=q" (rc.u32),
3611 "=m" (*(volatile long *)pvBitmap)
3612 : "Ir" (iBit)
3613 : "memory");
3614# else
3615 __asm
3616 {
3617 mov edx, [iBit]
3618# ifdef RT_ARCH_AMD64
3619 mov rax, [pvBitmap]
3620 btc [rax], edx
3621# else
3622 mov eax, [pvBitmap]
3623 btc [eax], edx
3624# endif
3625 setc al
3626 and eax, 1
3627 mov [rc.u32], eax
3628 }
3629# endif
3630 return rc.f;
3631}
3632#endif
3633
3634
3635/**
3636 * Atomically tests and toggles a bit in a bitmap.
3637 *
3638 * @returns true if the bit was set.
3639 * @returns false if the bit was clear.
3640 * @param pvBitmap Pointer to the bitmap.
3641 * @param iBit The bit to test and toggle.
3642 */
3643#if RT_INLINE_ASM_EXTERNAL
3644DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
3645#else
3646DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
3647{
3648 union { bool f; uint32_t u32; uint8_t u8; } rc;
3649# if RT_INLINE_ASM_GNU_STYLE
3650 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
3651 "setc %b0\n\t"
3652 "andl $1, %0\n\t"
3653 : "=q" (rc.u32),
3654 "=m" (*(volatile long *)pvBitmap)
3655 : "Ir" (iBit)
3656 : "memory");
3657# else
3658 __asm
3659 {
3660 mov edx, [iBit]
3661# ifdef RT_ARCH_AMD64
3662 mov rax, [pvBitmap]
3663 lock btc [rax], edx
3664# else
3665 mov eax, [pvBitmap]
3666 lock btc [eax], edx
3667# endif
3668 setc al
3669 and eax, 1
3670 mov [rc.u32], eax
3671 }
3672# endif
3673 return rc.f;
3674}
3675#endif
3676
3677
3678/**
3679 * Tests if a bit in a bitmap is set.
3680 *
3681 * @returns true if the bit is set.
3682 * @returns false if the bit is clear.
3683 * @param pvBitmap Pointer to the bitmap.
3684 * @param iBit The bit to test.
3685 */
3686#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3687DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
3688#else
3689DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
3690{
3691 union { bool f; uint32_t u32; uint8_t u8; } rc;
3692# if RT_INLINE_ASM_USES_INTRIN
3693 rc.u32 = _bittest((long *)pvBitmap, iBit);
3694# elif RT_INLINE_ASM_GNU_STYLE
3695
3696 __asm__ __volatile__ ("btl %2, %1\n\t"
3697 "setc %b0\n\t"
3698 "andl $1, %0\n\t"
3699 : "=q" (rc.u32),
3700 "=m" (*(volatile long *)pvBitmap)
3701 : "Ir" (iBit)
3702 : "memory");
3703# else
3704 __asm
3705 {
3706 mov edx, [iBit]
3707# ifdef RT_ARCH_AMD64
3708 mov rax, [pvBitmap]
3709 bt [rax], edx
3710# else
3711 mov eax, [pvBitmap]
3712 bt [eax], edx
3713# endif
3714 setc al
3715 and eax, 1
3716 mov [rc.u32], eax
3717 }
3718# endif
3719 return rc.f;
3720}
3721#endif
3722
3723
3724/**
3725 * Clears a bit range within a bitmap.
3726 *
3727 * @param pvBitmap Pointer to the bitmap.
3728 * @param iBitStart The First bit to clear.
3729 * @param iBitEnd The first bit not to clear.
3730 */
3731DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
3732{
3733 if (iBitStart < iBitEnd)
3734 {
3735 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
3736 int iStart = iBitStart & ~31;
3737 int iEnd = iBitEnd & ~31;
3738 if (iStart == iEnd)
3739 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
3740 else
3741 {
3742 /* bits in first dword. */
3743 if (iBitStart & 31)
3744 {
3745 *pu32 &= (1 << (iBitStart & 31)) - 1;
3746 pu32++;
3747 iBitStart = iStart + 32;
3748 }
3749
3750 /* whole dword. */
3751 if (iBitStart != iEnd)
3752 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
3753
3754 /* bits in last dword. */
3755 if (iBitEnd & 31)
3756 {
3757 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
3758 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
3759 }
3760 }
3761 }
3762}
3763
3764
3765/**
3766 * Finds the first clear bit in a bitmap.
3767 *
3768 * @returns Index of the first zero bit.
3769 * @returns -1 if no clear bit was found.
3770 * @param pvBitmap Pointer to the bitmap.
3771 * @param cBits The number of bits in the bitmap. Multiple of 32.
3772 */
3773#if RT_INLINE_ASM_EXTERNAL
3774DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
3775#else
3776DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
3777{
3778 if (cBits)
3779 {
3780 int32_t iBit;
3781# if RT_INLINE_ASM_GNU_STYLE
3782 RTCCUINTREG uEAX, uECX, uEDI;
3783 cBits = RT_ALIGN_32(cBits, 32);
3784 __asm__ __volatile__("repe; scasl\n\t"
3785 "je 1f\n\t"
3786# ifdef RT_ARCH_AMD64
3787 "lea -4(%%rdi), %%rdi\n\t"
3788 "xorl (%%rdi), %%eax\n\t"
3789 "subq %5, %%rdi\n\t"
3790# else
3791 "lea -4(%%edi), %%edi\n\t"
3792 "xorl (%%edi), %%eax\n\t"
3793 "subl %5, %%edi\n\t"
3794# endif
3795 "shll $3, %%edi\n\t"
3796 "bsfl %%eax, %%edx\n\t"
3797 "addl %%edi, %%edx\n\t"
3798 "1:\t\n"
3799 : "=d" (iBit),
3800 "=&c" (uECX),
3801 "=&D" (uEDI),
3802 "=&a" (uEAX)
3803 : "0" (0xffffffff),
3804 "mr" (pvBitmap),
3805 "1" (cBits >> 5),
3806 "2" (pvBitmap),
3807 "3" (0xffffffff));
3808# else
3809 cBits = RT_ALIGN_32(cBits, 32);
3810 __asm
3811 {
3812# ifdef RT_ARCH_AMD64
3813 mov rdi, [pvBitmap]
3814 mov rbx, rdi
3815# else
3816 mov edi, [pvBitmap]
3817 mov ebx, edi
3818# endif
3819 mov edx, 0ffffffffh
3820 mov eax, edx
3821 mov ecx, [cBits]
3822 shr ecx, 5
3823 repe scasd
3824 je done
3825
3826# ifdef RT_ARCH_AMD64
3827 lea rdi, [rdi - 4]
3828 xor eax, [rdi]
3829 sub rdi, rbx
3830# else
3831 lea edi, [edi - 4]
3832 xor eax, [edi]
3833 sub edi, ebx
3834# endif
3835 shl edi, 3
3836 bsf edx, eax
3837 add edx, edi
3838 done:
3839 mov [iBit], edx
3840 }
3841# endif
3842 return iBit;
3843 }
3844 return -1;
3845}
3846#endif
3847
3848
3849/**
3850 * Finds the next clear bit in a bitmap.
3851 *
3852 * @returns Index of the first zero bit.
3853 * @returns -1 if no clear bit was found.
3854 * @param pvBitmap Pointer to the bitmap.
3855 * @param cBits The number of bits in the bitmap. Multiple of 32.
3856 * @param iBitPrev The bit returned from the last search.
3857 * The search will start at iBitPrev + 1.
3858 */
3859#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3860DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
3861#else
3862DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
3863{
3864 int iBit = ++iBitPrev & 31;
3865 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
3866 cBits -= iBitPrev & ~31;
3867 if (iBit)
3868 {
3869 /* inspect the first dword. */
3870 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
3871# if RT_INLINE_ASM_USES_INTRIN
3872 unsigned long ulBit = 0;
3873 if (_BitScanForward(&ulBit, u32))
3874 return ulBit + iBitPrev;
3875 iBit = -1;
3876# else
3877# if RT_INLINE_ASM_GNU_STYLE
3878 __asm__ __volatile__("bsf %1, %0\n\t"
3879 "jnz 1f\n\t"
3880 "movl $-1, %0\n\t"
3881 "1:\n\t"
3882 : "=r" (iBit)
3883 : "r" (u32));
3884# else
3885 __asm
3886 {
3887 mov edx, [u32]
3888 bsf eax, edx
3889 jnz done
3890 mov eax, 0ffffffffh
3891 done:
3892 mov [iBit], eax
3893 }
3894# endif
3895 if (iBit >= 0)
3896 return iBit + iBitPrev;
3897# endif
3898 /* Search the rest of the bitmap, if there is anything. */
3899 if (cBits > 32)
3900 {
3901 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
3902 if (iBit >= 0)
3903 return iBit + (iBitPrev & ~31) + 32;
3904 }
3905 }
3906 else
3907 {
3908 /* Search the rest of the bitmap. */
3909 iBit = ASMBitFirstClear(pvBitmap, cBits);
3910 if (iBit >= 0)
3911 return iBit + (iBitPrev & ~31);
3912 }
3913 return iBit;
3914}
3915#endif
3916
3917
3918/**
3919 * Finds the first set bit in a bitmap.
3920 *
3921 * @returns Index of the first set bit.
3922 * @returns -1 if no clear bit was found.
3923 * @param pvBitmap Pointer to the bitmap.
3924 * @param cBits The number of bits in the bitmap. Multiple of 32.
3925 */
3926#if RT_INLINE_ASM_EXTERNAL
3927DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
3928#else
3929DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
3930{
3931 if (cBits)
3932 {
3933 int32_t iBit;
3934# if RT_INLINE_ASM_GNU_STYLE
3935 RTCCUINTREG uEAX, uECX, uEDI;
3936 cBits = RT_ALIGN_32(cBits, 32);
3937 __asm__ __volatile__("repe; scasl\n\t"
3938 "je 1f\n\t"
3939# ifdef RT_ARCH_AMD64
3940 "lea -4(%%rdi), %%rdi\n\t"
3941 "movl (%%rdi), %%eax\n\t"
3942 "subq %5, %%rdi\n\t"
3943# else
3944 "lea -4(%%edi), %%edi\n\t"
3945 "movl (%%edi), %%eax\n\t"
3946 "subl %5, %%edi\n\t"
3947# endif
3948 "shll $3, %%edi\n\t"
3949 "bsfl %%eax, %%edx\n\t"
3950 "addl %%edi, %%edx\n\t"
3951 "1:\t\n"
3952 : "=d" (iBit),
3953 "=&c" (uECX),
3954 "=&D" (uEDI),
3955 "=&a" (uEAX)
3956 : "0" (0xffffffff),
3957 "mr" (pvBitmap),
3958 "1" (cBits >> 5),
3959 "2" (pvBitmap),
3960 "3" (0));
3961# else
3962 cBits = RT_ALIGN_32(cBits, 32);
3963 __asm
3964 {
3965# ifdef RT_ARCH_AMD64
3966 mov rdi, [pvBitmap]
3967 mov rbx, rdi
3968# else
3969 mov edi, [pvBitmap]
3970 mov ebx, edi
3971# endif
3972 mov edx, 0ffffffffh
3973 xor eax, eax
3974 mov ecx, [cBits]
3975 shr ecx, 5
3976 repe scasd
3977 je done
3978# ifdef RT_ARCH_AMD64
3979 lea rdi, [rdi - 4]
3980 mov eax, [rdi]
3981 sub rdi, rbx
3982# else
3983 lea edi, [edi - 4]
3984 mov eax, [edi]
3985 sub edi, ebx
3986# endif
3987 shl edi, 3
3988 bsf edx, eax
3989 add edx, edi
3990 done:
3991 mov [iBit], edx
3992 }
3993# endif
3994 return iBit;
3995 }
3996 return -1;
3997}
3998#endif
3999
4000
4001/**
4002 * Finds the next set bit in a bitmap.
4003 *
4004 * @returns Index of the next set bit.
4005 * @returns -1 if no set bit was found.
4006 * @param pvBitmap Pointer to the bitmap.
4007 * @param cBits The number of bits in the bitmap. Multiple of 32.
4008 * @param iBitPrev The bit returned from the last search.
4009 * The search will start at iBitPrev + 1.
4010 */
4011#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4012DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4013#else
4014DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4015{
4016 int iBit = ++iBitPrev & 31;
4017 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4018 cBits -= iBitPrev & ~31;
4019 if (iBit)
4020 {
4021 /* inspect the first dword. */
4022 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
4023# if RT_INLINE_ASM_USES_INTRIN
4024 unsigned long ulBit = 0;
4025 if (_BitScanForward(&ulBit, u32))
4026 return ulBit + iBitPrev;
4027 iBit = -1;
4028# else
4029# if RT_INLINE_ASM_GNU_STYLE
4030 __asm__ __volatile__("bsf %1, %0\n\t"
4031 "jnz 1f\n\t"
4032 "movl $-1, %0\n\t"
4033 "1:\n\t"
4034 : "=r" (iBit)
4035 : "r" (u32));
4036# else
4037 __asm
4038 {
4039 mov edx, u32
4040 bsf eax, edx
4041 jnz done
4042 mov eax, 0ffffffffh
4043 done:
4044 mov [iBit], eax
4045 }
4046# endif
4047 if (iBit >= 0)
4048 return iBit + iBitPrev;
4049# endif
4050 /* Search the rest of the bitmap, if there is anything. */
4051 if (cBits > 32)
4052 {
4053 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4054 if (iBit >= 0)
4055 return iBit + (iBitPrev & ~31) + 32;
4056 }
4057
4058 }
4059 else
4060 {
4061 /* Search the rest of the bitmap. */
4062 iBit = ASMBitFirstSet(pvBitmap, cBits);
4063 if (iBit >= 0)
4064 return iBit + (iBitPrev & ~31);
4065 }
4066 return iBit;
4067}
4068#endif
4069
4070
4071/**
4072 * Finds the first bit which is set in the given 32-bit integer.
4073 * Bits are numbered from 1 (least significant) to 32.
4074 *
4075 * @returns index [1..32] of the first set bit.
4076 * @returns 0 if all bits are cleared.
4077 * @param u32 Integer to search for set bits.
4078 * @remark Similar to ffs() in BSD.
4079 */
4080DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
4081{
4082# if RT_INLINE_ASM_USES_INTRIN
4083 unsigned long iBit;
4084 if (_BitScanForward(&iBit, u32))
4085 iBit++;
4086 else
4087 iBit = 0;
4088# elif RT_INLINE_ASM_GNU_STYLE
4089 uint32_t iBit;
4090 __asm__ __volatile__("bsf %1, %0\n\t"
4091 "jnz 1f\n\t"
4092 "xorl %0, %0\n\t"
4093 "jmp 2f\n"
4094 "1:\n\t"
4095 "incl %0\n"
4096 "2:\n\t"
4097 : "=r" (iBit)
4098 : "rm" (u32));
4099# else
4100 uint32_t iBit;
4101 _asm
4102 {
4103 bsf eax, [u32]
4104 jnz found
4105 xor eax, eax
4106 jmp done
4107 found:
4108 inc eax
4109 done:
4110 mov [iBit], eax
4111 }
4112# endif
4113 return iBit;
4114}
4115
4116
4117/**
4118 * Finds the first bit which is set in the given 32-bit integer.
4119 * Bits are numbered from 1 (least significant) to 32.
4120 *
4121 * @returns index [1..32] of the first set bit.
4122 * @returns 0 if all bits are cleared.
4123 * @param i32 Integer to search for set bits.
4124 * @remark Similar to ffs() in BSD.
4125 */
4126DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
4127{
4128 return ASMBitFirstSetU32((uint32_t)i32);
4129}
4130
4131
4132/**
4133 * Finds the last bit which is set in the given 32-bit integer.
4134 * Bits are numbered from 1 (least significant) to 32.
4135 *
4136 * @returns index [1..32] of the last set bit.
4137 * @returns 0 if all bits are cleared.
4138 * @param u32 Integer to search for set bits.
4139 * @remark Similar to fls() in BSD.
4140 */
4141DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
4142{
4143# if RT_INLINE_ASM_USES_INTRIN
4144 unsigned long iBit;
4145 if (_BitScanReverse(&iBit, u32))
4146 iBit++;
4147 else
4148 iBit = 0;
4149# elif RT_INLINE_ASM_GNU_STYLE
4150 uint32_t iBit;
4151 __asm__ __volatile__("bsrl %1, %0\n\t"
4152 "jnz 1f\n\t"
4153 "xorl %0, %0\n\t"
4154 "jmp 2f\n"
4155 "1:\n\t"
4156 "incl %0\n"
4157 "2:\n\t"
4158 : "=r" (iBit)
4159 : "rm" (u32));
4160# else
4161 uint32_t iBit;
4162 _asm
4163 {
4164 bsr eax, [u32]
4165 jnz found
4166 xor eax, eax
4167 jmp done
4168 found:
4169 inc eax
4170 done:
4171 mov [iBit], eax
4172 }
4173# endif
4174 return iBit;
4175}
4176
4177
4178/**
4179 * Finds the last bit which is set in the given 32-bit integer.
4180 * Bits are numbered from 1 (least significant) to 32.
4181 *
4182 * @returns index [1..32] of the last set bit.
4183 * @returns 0 if all bits are cleared.
4184 * @param i32 Integer to search for set bits.
4185 * @remark Similar to fls() in BSD.
4186 */
4187DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
4188{
4189 return ASMBitLastSetS32((uint32_t)i32);
4190}
4191
4192
4193/**
4194 * Reverse the byte order of the given 32-bit integer.
4195 * @param u32 Integer
4196 */
4197DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
4198{
4199#if RT_INLINE_ASM_USES_INTRIN
4200 u32 = _byteswap_ulong(u32);
4201#elif RT_INLINE_ASM_GNU_STYLE
4202 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
4203#else
4204 _asm
4205 {
4206 mov eax, [u32]
4207 bswap eax
4208 mov [u32], eax
4209 }
4210#endif
4211 return u32;
4212}
4213
4214/** @} */
4215
4216
4217/** @} */
4218#endif
4219
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette