VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 19469

Last change on this file since 19469 was 19305, checked in by vboxsync, 16 years ago

VBox/x86.h: ASMWriteBackAndInvalidateCaches, ASMHalt, and ASMInvalidateInternalCaches.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 163.5 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__wbinvd)
73# pragma intrinsic(__stosd)
74# pragma intrinsic(__stosw)
75# pragma intrinsic(__stosb)
76# pragma intrinsic(__readcr0)
77# pragma intrinsic(__readcr2)
78# pragma intrinsic(__readcr3)
79# pragma intrinsic(__readcr4)
80# pragma intrinsic(__writecr0)
81# pragma intrinsic(__writecr3)
82# pragma intrinsic(__writecr4)
83# pragma intrinsic(__readdr)
84# pragma intrinsic(__writedr)
85# pragma intrinsic(_BitScanForward)
86# pragma intrinsic(_BitScanReverse)
87# pragma intrinsic(_bittest)
88# pragma intrinsic(_bittestandset)
89# pragma intrinsic(_bittestandreset)
90# pragma intrinsic(_bittestandcomplement)
91# pragma intrinsic(_byteswap_ushort)
92# pragma intrinsic(_byteswap_ulong)
93# pragma intrinsic(_interlockedbittestandset)
94# pragma intrinsic(_interlockedbittestandreset)
95# pragma intrinsic(_InterlockedAnd)
96# pragma intrinsic(_InterlockedOr)
97# pragma intrinsic(_InterlockedIncrement)
98# pragma intrinsic(_InterlockedDecrement)
99# pragma intrinsic(_InterlockedExchange)
100# pragma intrinsic(_InterlockedExchangeAdd)
101# pragma intrinsic(_InterlockedCompareExchange)
102# pragma intrinsic(_InterlockedCompareExchange64)
103# ifdef RT_ARCH_AMD64
104# pragma intrinsic(__stosq)
105# pragma intrinsic(__readcr8)
106# pragma intrinsic(__writecr8)
107# pragma intrinsic(_byteswap_uint64)
108# pragma intrinsic(_InterlockedExchange64)
109# endif
110# endif
111#endif
112#ifndef RT_INLINE_ASM_USES_INTRIN
113# define RT_INLINE_ASM_USES_INTRIN 0
114#endif
115
116
117
118/** @defgroup grp_asm ASM - Assembly Routines
119 * @ingroup grp_rt
120 *
121 * @remarks The difference between ordered and unordered atomic operations are that
122 * the former will complete outstanding reads and writes before continuing
123 * while the latter doesn't make any promisses about the order. Ordered
124 * operations doesn't, it seems, make any 100% promise wrt to whether
125 * the operation will complete before any subsequent memory access.
126 * (please, correct if wrong.)
127 *
128 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
129 * are unordered (note the Uo).
130 *
131 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
132 * or even optimize assembler instructions away. For instance, in the following code
133 * the second rdmsr instruction is optimized away because gcc treats that instruction
134 * as deterministic:
135 *
136 * @code
137 * static inline uint64_t rdmsr_low(int idx)
138 * {
139 * uint32_t low;
140 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
141 * }
142 * ...
143 * uint32_t msr1 = rdmsr_low(1);
144 * foo(msr1);
145 * msr1 = rdmsr_low(1);
146 * bar(msr1);
147 * @endcode
148 *
149 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
150 * use the result of the first call as input parameter for bar() as well. For rdmsr this
151 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
152 * machine status information in general.
153 *
154 * @{
155 */
156
157/** @def RT_INLINE_ASM_EXTERNAL
158 * Defined as 1 if the compiler does not support inline assembly.
159 * The ASM* functions will then be implemented in an external .asm file.
160 *
161 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
162 * inline assembly in their AMD64 compiler.
163 */
164#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
165# define RT_INLINE_ASM_EXTERNAL 1
166#else
167# define RT_INLINE_ASM_EXTERNAL 0
168#endif
169
170/** @def RT_INLINE_ASM_GNU_STYLE
171 * Defined as 1 if the compiler understands GNU style inline assembly.
172 */
173#if defined(_MSC_VER)
174# define RT_INLINE_ASM_GNU_STYLE 0
175#else
176# define RT_INLINE_ASM_GNU_STYLE 1
177#endif
178
179
180/** @todo find a more proper place for this structure? */
181#pragma pack(1)
182/** IDTR */
183typedef struct RTIDTR
184{
185 /** Size of the IDT. */
186 uint16_t cbIdt;
187 /** Address of the IDT. */
188 uintptr_t pIdt;
189} RTIDTR, *PRTIDTR;
190#pragma pack()
191
192#pragma pack(1)
193/** GDTR */
194typedef struct RTGDTR
195{
196 /** Size of the GDT. */
197 uint16_t cbGdt;
198 /** Address of the GDT. */
199 uintptr_t pGdt;
200} RTGDTR, *PRTGDTR;
201#pragma pack()
202
203
204/** @def ASMReturnAddress
205 * Gets the return address of the current (or calling if you like) function or method.
206 */
207#ifdef _MSC_VER
208# ifdef __cplusplus
209extern "C"
210# endif
211void * _ReturnAddress(void);
212# pragma intrinsic(_ReturnAddress)
213# define ASMReturnAddress() _ReturnAddress()
214#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
215# define ASMReturnAddress() __builtin_return_address(0)
216#else
217# error "Unsupported compiler."
218#endif
219
220
221/**
222 * Gets the content of the IDTR CPU register.
223 * @param pIdtr Where to store the IDTR contents.
224 */
225#if RT_INLINE_ASM_EXTERNAL
226DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
227#else
228DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
229{
230# if RT_INLINE_ASM_GNU_STYLE
231 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
232# else
233 __asm
234 {
235# ifdef RT_ARCH_AMD64
236 mov rax, [pIdtr]
237 sidt [rax]
238# else
239 mov eax, [pIdtr]
240 sidt [eax]
241# endif
242 }
243# endif
244}
245#endif
246
247
248/**
249 * Sets the content of the IDTR CPU register.
250 * @param pIdtr Where to load the IDTR contents from
251 */
252#if RT_INLINE_ASM_EXTERNAL
253DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
254#else
255DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
256{
257# if RT_INLINE_ASM_GNU_STYLE
258 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
259# else
260 __asm
261 {
262# ifdef RT_ARCH_AMD64
263 mov rax, [pIdtr]
264 lidt [rax]
265# else
266 mov eax, [pIdtr]
267 lidt [eax]
268# endif
269 }
270# endif
271}
272#endif
273
274
275/**
276 * Gets the content of the GDTR CPU register.
277 * @param pGdtr Where to store the GDTR contents.
278 */
279#if RT_INLINE_ASM_EXTERNAL
280DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
281#else
282DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
283{
284# if RT_INLINE_ASM_GNU_STYLE
285 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
286# else
287 __asm
288 {
289# ifdef RT_ARCH_AMD64
290 mov rax, [pGdtr]
291 sgdt [rax]
292# else
293 mov eax, [pGdtr]
294 sgdt [eax]
295# endif
296 }
297# endif
298}
299#endif
300
301/**
302 * Get the cs register.
303 * @returns cs.
304 */
305#if RT_INLINE_ASM_EXTERNAL
306DECLASM(RTSEL) ASMGetCS(void);
307#else
308DECLINLINE(RTSEL) ASMGetCS(void)
309{
310 RTSEL SelCS;
311# if RT_INLINE_ASM_GNU_STYLE
312 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
313# else
314 __asm
315 {
316 mov ax, cs
317 mov [SelCS], ax
318 }
319# endif
320 return SelCS;
321}
322#endif
323
324
325/**
326 * Get the DS register.
327 * @returns DS.
328 */
329#if RT_INLINE_ASM_EXTERNAL
330DECLASM(RTSEL) ASMGetDS(void);
331#else
332DECLINLINE(RTSEL) ASMGetDS(void)
333{
334 RTSEL SelDS;
335# if RT_INLINE_ASM_GNU_STYLE
336 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
337# else
338 __asm
339 {
340 mov ax, ds
341 mov [SelDS], ax
342 }
343# endif
344 return SelDS;
345}
346#endif
347
348
349/**
350 * Get the ES register.
351 * @returns ES.
352 */
353#if RT_INLINE_ASM_EXTERNAL
354DECLASM(RTSEL) ASMGetES(void);
355#else
356DECLINLINE(RTSEL) ASMGetES(void)
357{
358 RTSEL SelES;
359# if RT_INLINE_ASM_GNU_STYLE
360 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
361# else
362 __asm
363 {
364 mov ax, es
365 mov [SelES], ax
366 }
367# endif
368 return SelES;
369}
370#endif
371
372
373/**
374 * Get the FS register.
375 * @returns FS.
376 */
377#if RT_INLINE_ASM_EXTERNAL
378DECLASM(RTSEL) ASMGetFS(void);
379#else
380DECLINLINE(RTSEL) ASMGetFS(void)
381{
382 RTSEL SelFS;
383# if RT_INLINE_ASM_GNU_STYLE
384 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
385# else
386 __asm
387 {
388 mov ax, fs
389 mov [SelFS], ax
390 }
391# endif
392 return SelFS;
393}
394# endif
395
396
397/**
398 * Get the GS register.
399 * @returns GS.
400 */
401#if RT_INLINE_ASM_EXTERNAL
402DECLASM(RTSEL) ASMGetGS(void);
403#else
404DECLINLINE(RTSEL) ASMGetGS(void)
405{
406 RTSEL SelGS;
407# if RT_INLINE_ASM_GNU_STYLE
408 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
409# else
410 __asm
411 {
412 mov ax, gs
413 mov [SelGS], ax
414 }
415# endif
416 return SelGS;
417}
418#endif
419
420
421/**
422 * Get the SS register.
423 * @returns SS.
424 */
425#if RT_INLINE_ASM_EXTERNAL
426DECLASM(RTSEL) ASMGetSS(void);
427#else
428DECLINLINE(RTSEL) ASMGetSS(void)
429{
430 RTSEL SelSS;
431# if RT_INLINE_ASM_GNU_STYLE
432 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
433# else
434 __asm
435 {
436 mov ax, ss
437 mov [SelSS], ax
438 }
439# endif
440 return SelSS;
441}
442#endif
443
444
445/**
446 * Get the TR register.
447 * @returns TR.
448 */
449#if RT_INLINE_ASM_EXTERNAL
450DECLASM(RTSEL) ASMGetTR(void);
451#else
452DECLINLINE(RTSEL) ASMGetTR(void)
453{
454 RTSEL SelTR;
455# if RT_INLINE_ASM_GNU_STYLE
456 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
457# else
458 __asm
459 {
460 str ax
461 mov [SelTR], ax
462 }
463# endif
464 return SelTR;
465}
466#endif
467
468
469/**
470 * Get the [RE]FLAGS register.
471 * @returns [RE]FLAGS.
472 */
473#if RT_INLINE_ASM_EXTERNAL
474DECLASM(RTCCUINTREG) ASMGetFlags(void);
475#else
476DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
477{
478 RTCCUINTREG uFlags;
479# if RT_INLINE_ASM_GNU_STYLE
480# ifdef RT_ARCH_AMD64
481 __asm__ __volatile__("pushfq\n\t"
482 "popq %0\n\t"
483 : "=g" (uFlags));
484# else
485 __asm__ __volatile__("pushfl\n\t"
486 "popl %0\n\t"
487 : "=g" (uFlags));
488# endif
489# else
490 __asm
491 {
492# ifdef RT_ARCH_AMD64
493 pushfq
494 pop [uFlags]
495# else
496 pushfd
497 pop [uFlags]
498# endif
499 }
500# endif
501 return uFlags;
502}
503#endif
504
505
506/**
507 * Set the [RE]FLAGS register.
508 * @param uFlags The new [RE]FLAGS value.
509 */
510#if RT_INLINE_ASM_EXTERNAL
511DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
512#else
513DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
514{
515# if RT_INLINE_ASM_GNU_STYLE
516# ifdef RT_ARCH_AMD64
517 __asm__ __volatile__("pushq %0\n\t"
518 "popfq\n\t"
519 : : "g" (uFlags));
520# else
521 __asm__ __volatile__("pushl %0\n\t"
522 "popfl\n\t"
523 : : "g" (uFlags));
524# endif
525# else
526 __asm
527 {
528# ifdef RT_ARCH_AMD64
529 push [uFlags]
530 popfq
531# else
532 push [uFlags]
533 popfd
534# endif
535 }
536# endif
537}
538#endif
539
540
541/**
542 * Gets the content of the CPU timestamp counter register.
543 *
544 * @returns TSC.
545 */
546#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
547DECLASM(uint64_t) ASMReadTSC(void);
548#else
549DECLINLINE(uint64_t) ASMReadTSC(void)
550{
551 RTUINT64U u;
552# if RT_INLINE_ASM_GNU_STYLE
553 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
554# else
555# if RT_INLINE_ASM_USES_INTRIN
556 u.u = __rdtsc();
557# else
558 __asm
559 {
560 rdtsc
561 mov [u.s.Lo], eax
562 mov [u.s.Hi], edx
563 }
564# endif
565# endif
566 return u.u;
567}
568#endif
569
570
571/**
572 * Performs the cpuid instruction returning all registers.
573 *
574 * @param uOperator CPUID operation (eax).
575 * @param pvEAX Where to store eax.
576 * @param pvEBX Where to store ebx.
577 * @param pvECX Where to store ecx.
578 * @param pvEDX Where to store edx.
579 * @remark We're using void pointers to ease the use of special bitfield structures and such.
580 */
581#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
582DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
583#else
584DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
585{
586# if RT_INLINE_ASM_GNU_STYLE
587# ifdef RT_ARCH_AMD64
588 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
589 __asm__ ("cpuid\n\t"
590 : "=a" (uRAX),
591 "=b" (uRBX),
592 "=c" (uRCX),
593 "=d" (uRDX)
594 : "0" (uOperator));
595 *(uint32_t *)pvEAX = (uint32_t)uRAX;
596 *(uint32_t *)pvEBX = (uint32_t)uRBX;
597 *(uint32_t *)pvECX = (uint32_t)uRCX;
598 *(uint32_t *)pvEDX = (uint32_t)uRDX;
599# else
600 __asm__ ("xchgl %%ebx, %1\n\t"
601 "cpuid\n\t"
602 "xchgl %%ebx, %1\n\t"
603 : "=a" (*(uint32_t *)pvEAX),
604 "=r" (*(uint32_t *)pvEBX),
605 "=c" (*(uint32_t *)pvECX),
606 "=d" (*(uint32_t *)pvEDX)
607 : "0" (uOperator));
608# endif
609
610# elif RT_INLINE_ASM_USES_INTRIN
611 int aInfo[4];
612 __cpuid(aInfo, uOperator);
613 *(uint32_t *)pvEAX = aInfo[0];
614 *(uint32_t *)pvEBX = aInfo[1];
615 *(uint32_t *)pvECX = aInfo[2];
616 *(uint32_t *)pvEDX = aInfo[3];
617
618# else
619 uint32_t uEAX;
620 uint32_t uEBX;
621 uint32_t uECX;
622 uint32_t uEDX;
623 __asm
624 {
625 push ebx
626 mov eax, [uOperator]
627 cpuid
628 mov [uEAX], eax
629 mov [uEBX], ebx
630 mov [uECX], ecx
631 mov [uEDX], edx
632 pop ebx
633 }
634 *(uint32_t *)pvEAX = uEAX;
635 *(uint32_t *)pvEBX = uEBX;
636 *(uint32_t *)pvECX = uECX;
637 *(uint32_t *)pvEDX = uEDX;
638# endif
639}
640#endif
641
642
643/**
644 * Performs the cpuid instruction returning all registers.
645 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
646 *
647 * @param uOperator CPUID operation (eax).
648 * @param uIdxECX ecx index
649 * @param pvEAX Where to store eax.
650 * @param pvEBX Where to store ebx.
651 * @param pvECX Where to store ecx.
652 * @param pvEDX Where to store edx.
653 * @remark We're using void pointers to ease the use of special bitfield structures and such.
654 */
655#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
656DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
657#else
658DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
659{
660# if RT_INLINE_ASM_GNU_STYLE
661# ifdef RT_ARCH_AMD64
662 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
663 __asm__ ("cpuid\n\t"
664 : "=a" (uRAX),
665 "=b" (uRBX),
666 "=c" (uRCX),
667 "=d" (uRDX)
668 : "0" (uOperator),
669 "2" (uIdxECX));
670 *(uint32_t *)pvEAX = (uint32_t)uRAX;
671 *(uint32_t *)pvEBX = (uint32_t)uRBX;
672 *(uint32_t *)pvECX = (uint32_t)uRCX;
673 *(uint32_t *)pvEDX = (uint32_t)uRDX;
674# else
675 __asm__ ("xchgl %%ebx, %1\n\t"
676 "cpuid\n\t"
677 "xchgl %%ebx, %1\n\t"
678 : "=a" (*(uint32_t *)pvEAX),
679 "=r" (*(uint32_t *)pvEBX),
680 "=c" (*(uint32_t *)pvECX),
681 "=d" (*(uint32_t *)pvEDX)
682 : "0" (uOperator),
683 "2" (uIdxECX));
684# endif
685
686# elif RT_INLINE_ASM_USES_INTRIN
687 int aInfo[4];
688 /* ??? another intrinsic ??? */
689 __cpuid(aInfo, uOperator);
690 *(uint32_t *)pvEAX = aInfo[0];
691 *(uint32_t *)pvEBX = aInfo[1];
692 *(uint32_t *)pvECX = aInfo[2];
693 *(uint32_t *)pvEDX = aInfo[3];
694
695# else
696 uint32_t uEAX;
697 uint32_t uEBX;
698 uint32_t uECX;
699 uint32_t uEDX;
700 __asm
701 {
702 push ebx
703 mov eax, [uOperator]
704 mov ecx, [uIdxECX]
705 cpuid
706 mov [uEAX], eax
707 mov [uEBX], ebx
708 mov [uECX], ecx
709 mov [uEDX], edx
710 pop ebx
711 }
712 *(uint32_t *)pvEAX = uEAX;
713 *(uint32_t *)pvEBX = uEBX;
714 *(uint32_t *)pvECX = uECX;
715 *(uint32_t *)pvEDX = uEDX;
716# endif
717}
718#endif
719
720
721/**
722 * Performs the cpuid instruction returning ecx and edx.
723 *
724 * @param uOperator CPUID operation (eax).
725 * @param pvECX Where to store ecx.
726 * @param pvEDX Where to store edx.
727 * @remark We're using void pointers to ease the use of special bitfield structures and such.
728 */
729#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
730DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
731#else
732DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
733{
734 uint32_t uEBX;
735 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
736}
737#endif
738
739
740/**
741 * Performs the cpuid instruction returning edx.
742 *
743 * @param uOperator CPUID operation (eax).
744 * @returns EDX after cpuid operation.
745 */
746#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
747DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
748#else
749DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
750{
751 RTCCUINTREG xDX;
752# if RT_INLINE_ASM_GNU_STYLE
753# ifdef RT_ARCH_AMD64
754 RTCCUINTREG uSpill;
755 __asm__ ("cpuid"
756 : "=a" (uSpill),
757 "=d" (xDX)
758 : "0" (uOperator)
759 : "rbx", "rcx");
760# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
761 __asm__ ("push %%ebx\n\t"
762 "cpuid\n\t"
763 "pop %%ebx\n\t"
764 : "=a" (uOperator),
765 "=d" (xDX)
766 : "0" (uOperator)
767 : "ecx");
768# else
769 __asm__ ("cpuid"
770 : "=a" (uOperator),
771 "=d" (xDX)
772 : "0" (uOperator)
773 : "ebx", "ecx");
774# endif
775
776# elif RT_INLINE_ASM_USES_INTRIN
777 int aInfo[4];
778 __cpuid(aInfo, uOperator);
779 xDX = aInfo[3];
780
781# else
782 __asm
783 {
784 push ebx
785 mov eax, [uOperator]
786 cpuid
787 mov [xDX], edx
788 pop ebx
789 }
790# endif
791 return (uint32_t)xDX;
792}
793#endif
794
795
796/**
797 * Performs the cpuid instruction returning ecx.
798 *
799 * @param uOperator CPUID operation (eax).
800 * @returns ECX after cpuid operation.
801 */
802#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
803DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
804#else
805DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
806{
807 RTCCUINTREG xCX;
808# if RT_INLINE_ASM_GNU_STYLE
809# ifdef RT_ARCH_AMD64
810 RTCCUINTREG uSpill;
811 __asm__ ("cpuid"
812 : "=a" (uSpill),
813 "=c" (xCX)
814 : "0" (uOperator)
815 : "rbx", "rdx");
816# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
817 __asm__ ("push %%ebx\n\t"
818 "cpuid\n\t"
819 "pop %%ebx\n\t"
820 : "=a" (uOperator),
821 "=c" (xCX)
822 : "0" (uOperator)
823 : "edx");
824# else
825 __asm__ ("cpuid"
826 : "=a" (uOperator),
827 "=c" (xCX)
828 : "0" (uOperator)
829 : "ebx", "edx");
830
831# endif
832
833# elif RT_INLINE_ASM_USES_INTRIN
834 int aInfo[4];
835 __cpuid(aInfo, uOperator);
836 xCX = aInfo[2];
837
838# else
839 __asm
840 {
841 push ebx
842 mov eax, [uOperator]
843 cpuid
844 mov [xCX], ecx
845 pop ebx
846 }
847# endif
848 return (uint32_t)xCX;
849}
850#endif
851
852
853/**
854 * Checks if the current CPU supports CPUID.
855 *
856 * @returns true if CPUID is supported.
857 */
858DECLINLINE(bool) ASMHasCpuId(void)
859{
860#ifdef RT_ARCH_AMD64
861 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
862#else /* !RT_ARCH_AMD64 */
863 bool fRet = false;
864# if RT_INLINE_ASM_GNU_STYLE
865 uint32_t u1;
866 uint32_t u2;
867 __asm__ ("pushf\n\t"
868 "pop %1\n\t"
869 "mov %1, %2\n\t"
870 "xorl $0x200000, %1\n\t"
871 "push %1\n\t"
872 "popf\n\t"
873 "pushf\n\t"
874 "pop %1\n\t"
875 "cmpl %1, %2\n\t"
876 "setne %0\n\t"
877 "push %2\n\t"
878 "popf\n\t"
879 : "=m" (fRet), "=r" (u1), "=r" (u2));
880# else
881 __asm
882 {
883 pushfd
884 pop eax
885 mov ebx, eax
886 xor eax, 0200000h
887 push eax
888 popfd
889 pushfd
890 pop eax
891 cmp eax, ebx
892 setne fRet
893 push ebx
894 popfd
895 }
896# endif
897 return fRet;
898#endif /* !RT_ARCH_AMD64 */
899}
900
901
902/**
903 * Gets the APIC ID of the current CPU.
904 *
905 * @returns the APIC ID.
906 */
907#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
908DECLASM(uint8_t) ASMGetApicId(void);
909#else
910DECLINLINE(uint8_t) ASMGetApicId(void)
911{
912 RTCCUINTREG xBX;
913# if RT_INLINE_ASM_GNU_STYLE
914# ifdef RT_ARCH_AMD64
915 RTCCUINTREG uSpill;
916 __asm__ ("cpuid"
917 : "=a" (uSpill),
918 "=b" (xBX)
919 : "0" (1)
920 : "rcx", "rdx");
921# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
922 RTCCUINTREG uSpill;
923 __asm__ ("mov %%ebx,%1\n\t"
924 "cpuid\n\t"
925 "xchgl %%ebx,%1\n\t"
926 : "=a" (uSpill),
927 "=r" (xBX)
928 : "0" (1)
929 : "ecx", "edx");
930# else
931 RTCCUINTREG uSpill;
932 __asm__ ("cpuid"
933 : "=a" (uSpill),
934 "=b" (xBX)
935 : "0" (1)
936 : "ecx", "edx");
937# endif
938
939# elif RT_INLINE_ASM_USES_INTRIN
940 int aInfo[4];
941 __cpuid(aInfo, 1);
942 xBX = aInfo[1];
943
944# else
945 __asm
946 {
947 push ebx
948 mov eax, 1
949 cpuid
950 mov [xBX], ebx
951 pop ebx
952 }
953# endif
954 return (uint8_t)(xBX >> 24);
955}
956#endif
957
958
959/**
960 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
961 *
962 * @returns true/false.
963 * @param uEBX EBX return from ASMCpuId(0)
964 * @param uECX ECX return from ASMCpuId(0)
965 * @param uEDX EDX return from ASMCpuId(0)
966 */
967DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
968{
969 return uEBX == 0x756e6547
970 && uECX == 0x6c65746e
971 && uEDX == 0x49656e69;
972}
973
974
975/**
976 * Tests if this is an genuin Intel CPU.
977 *
978 * @returns true/false.
979 */
980DECLINLINE(bool) ASMIsIntelCpu(void)
981{
982 uint32_t uEAX, uEBX, uECX, uEDX;
983 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
984 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
985}
986
987
988/**
989 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
990 *
991 * @returns Family.
992 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
993 */
994DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
995{
996 return ((uEAX >> 8) & 0xf) == 0xf
997 ? ((uEAX >> 20) & 0x7f) + 0xf
998 : ((uEAX >> 8) & 0xf);
999}
1000
1001
1002/**
1003 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1004 *
1005 * @returns Model.
1006 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1007 * @param fIntel Whether it's an intel CPU.
1008 */
1009DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1010{
1011 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1012 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1013 : ((uEAX >> 4) & 0xf);
1014}
1015
1016
1017/**
1018 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1019 *
1020 * @returns Model.
1021 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1022 * @param fIntel Whether it's an intel CPU.
1023 */
1024DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1025{
1026 return ((uEAX >> 8) & 0xf) == 0xf
1027 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1028 : ((uEAX >> 4) & 0xf);
1029}
1030
1031
1032/**
1033 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1034 *
1035 * @returns Model.
1036 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1037 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1038 */
1039DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1040{
1041 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1042 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1043 : ((uEAX >> 4) & 0xf);
1044}
1045
1046
1047/**
1048 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1049 *
1050 * @returns Model.
1051 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1052 */
1053DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1054{
1055 return uEAX & 0xf;
1056}
1057
1058
1059/**
1060 * Get cr0.
1061 * @returns cr0.
1062 */
1063#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1064DECLASM(RTCCUINTREG) ASMGetCR0(void);
1065#else
1066DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1067{
1068 RTCCUINTREG uCR0;
1069# if RT_INLINE_ASM_USES_INTRIN
1070 uCR0 = __readcr0();
1071
1072# elif RT_INLINE_ASM_GNU_STYLE
1073# ifdef RT_ARCH_AMD64
1074 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1075# else
1076 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1077# endif
1078# else
1079 __asm
1080 {
1081# ifdef RT_ARCH_AMD64
1082 mov rax, cr0
1083 mov [uCR0], rax
1084# else
1085 mov eax, cr0
1086 mov [uCR0], eax
1087# endif
1088 }
1089# endif
1090 return uCR0;
1091}
1092#endif
1093
1094
1095/**
1096 * Sets the CR0 register.
1097 * @param uCR0 The new CR0 value.
1098 */
1099#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1100DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1101#else
1102DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1103{
1104# if RT_INLINE_ASM_USES_INTRIN
1105 __writecr0(uCR0);
1106
1107# elif RT_INLINE_ASM_GNU_STYLE
1108# ifdef RT_ARCH_AMD64
1109 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1110# else
1111 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1112# endif
1113# else
1114 __asm
1115 {
1116# ifdef RT_ARCH_AMD64
1117 mov rax, [uCR0]
1118 mov cr0, rax
1119# else
1120 mov eax, [uCR0]
1121 mov cr0, eax
1122# endif
1123 }
1124# endif
1125}
1126#endif
1127
1128
1129/**
1130 * Get cr2.
1131 * @returns cr2.
1132 */
1133#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1134DECLASM(RTCCUINTREG) ASMGetCR2(void);
1135#else
1136DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1137{
1138 RTCCUINTREG uCR2;
1139# if RT_INLINE_ASM_USES_INTRIN
1140 uCR2 = __readcr2();
1141
1142# elif RT_INLINE_ASM_GNU_STYLE
1143# ifdef RT_ARCH_AMD64
1144 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1145# else
1146 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1147# endif
1148# else
1149 __asm
1150 {
1151# ifdef RT_ARCH_AMD64
1152 mov rax, cr2
1153 mov [uCR2], rax
1154# else
1155 mov eax, cr2
1156 mov [uCR2], eax
1157# endif
1158 }
1159# endif
1160 return uCR2;
1161}
1162#endif
1163
1164
1165/**
1166 * Sets the CR2 register.
1167 * @param uCR2 The new CR0 value.
1168 */
1169#if RT_INLINE_ASM_EXTERNAL
1170DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1171#else
1172DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1173{
1174# if RT_INLINE_ASM_GNU_STYLE
1175# ifdef RT_ARCH_AMD64
1176 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1177# else
1178 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1179# endif
1180# else
1181 __asm
1182 {
1183# ifdef RT_ARCH_AMD64
1184 mov rax, [uCR2]
1185 mov cr2, rax
1186# else
1187 mov eax, [uCR2]
1188 mov cr2, eax
1189# endif
1190 }
1191# endif
1192}
1193#endif
1194
1195
1196/**
1197 * Get cr3.
1198 * @returns cr3.
1199 */
1200#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1201DECLASM(RTCCUINTREG) ASMGetCR3(void);
1202#else
1203DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1204{
1205 RTCCUINTREG uCR3;
1206# if RT_INLINE_ASM_USES_INTRIN
1207 uCR3 = __readcr3();
1208
1209# elif RT_INLINE_ASM_GNU_STYLE
1210# ifdef RT_ARCH_AMD64
1211 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1212# else
1213 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1214# endif
1215# else
1216 __asm
1217 {
1218# ifdef RT_ARCH_AMD64
1219 mov rax, cr3
1220 mov [uCR3], rax
1221# else
1222 mov eax, cr3
1223 mov [uCR3], eax
1224# endif
1225 }
1226# endif
1227 return uCR3;
1228}
1229#endif
1230
1231
1232/**
1233 * Sets the CR3 register.
1234 *
1235 * @param uCR3 New CR3 value.
1236 */
1237#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1238DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1239#else
1240DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1241{
1242# if RT_INLINE_ASM_USES_INTRIN
1243 __writecr3(uCR3);
1244
1245# elif RT_INLINE_ASM_GNU_STYLE
1246# ifdef RT_ARCH_AMD64
1247 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1248# else
1249 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1250# endif
1251# else
1252 __asm
1253 {
1254# ifdef RT_ARCH_AMD64
1255 mov rax, [uCR3]
1256 mov cr3, rax
1257# else
1258 mov eax, [uCR3]
1259 mov cr3, eax
1260# endif
1261 }
1262# endif
1263}
1264#endif
1265
1266
1267/**
1268 * Reloads the CR3 register.
1269 */
1270#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1271DECLASM(void) ASMReloadCR3(void);
1272#else
1273DECLINLINE(void) ASMReloadCR3(void)
1274{
1275# if RT_INLINE_ASM_USES_INTRIN
1276 __writecr3(__readcr3());
1277
1278# elif RT_INLINE_ASM_GNU_STYLE
1279 RTCCUINTREG u;
1280# ifdef RT_ARCH_AMD64
1281 __asm__ __volatile__("movq %%cr3, %0\n\t"
1282 "movq %0, %%cr3\n\t"
1283 : "=r" (u));
1284# else
1285 __asm__ __volatile__("movl %%cr3, %0\n\t"
1286 "movl %0, %%cr3\n\t"
1287 : "=r" (u));
1288# endif
1289# else
1290 __asm
1291 {
1292# ifdef RT_ARCH_AMD64
1293 mov rax, cr3
1294 mov cr3, rax
1295# else
1296 mov eax, cr3
1297 mov cr3, eax
1298# endif
1299 }
1300# endif
1301}
1302#endif
1303
1304
1305/**
1306 * Get cr4.
1307 * @returns cr4.
1308 */
1309#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1310DECLASM(RTCCUINTREG) ASMGetCR4(void);
1311#else
1312DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1313{
1314 RTCCUINTREG uCR4;
1315# if RT_INLINE_ASM_USES_INTRIN
1316 uCR4 = __readcr4();
1317
1318# elif RT_INLINE_ASM_GNU_STYLE
1319# ifdef RT_ARCH_AMD64
1320 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1321# else
1322 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1323# endif
1324# else
1325 __asm
1326 {
1327# ifdef RT_ARCH_AMD64
1328 mov rax, cr4
1329 mov [uCR4], rax
1330# else
1331 push eax /* just in case */
1332 /*mov eax, cr4*/
1333 _emit 0x0f
1334 _emit 0x20
1335 _emit 0xe0
1336 mov [uCR4], eax
1337 pop eax
1338# endif
1339 }
1340# endif
1341 return uCR4;
1342}
1343#endif
1344
1345
1346/**
1347 * Sets the CR4 register.
1348 *
1349 * @param uCR4 New CR4 value.
1350 */
1351#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1352DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1353#else
1354DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1355{
1356# if RT_INLINE_ASM_USES_INTRIN
1357 __writecr4(uCR4);
1358
1359# elif RT_INLINE_ASM_GNU_STYLE
1360# ifdef RT_ARCH_AMD64
1361 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1362# else
1363 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1364# endif
1365# else
1366 __asm
1367 {
1368# ifdef RT_ARCH_AMD64
1369 mov rax, [uCR4]
1370 mov cr4, rax
1371# else
1372 mov eax, [uCR4]
1373 _emit 0x0F
1374 _emit 0x22
1375 _emit 0xE0 /* mov cr4, eax */
1376# endif
1377 }
1378# endif
1379}
1380#endif
1381
1382
1383/**
1384 * Get cr8.
1385 * @returns cr8.
1386 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1387 */
1388#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1389DECLASM(RTCCUINTREG) ASMGetCR8(void);
1390#else
1391DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1392{
1393# ifdef RT_ARCH_AMD64
1394 RTCCUINTREG uCR8;
1395# if RT_INLINE_ASM_USES_INTRIN
1396 uCR8 = __readcr8();
1397
1398# elif RT_INLINE_ASM_GNU_STYLE
1399 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1400# else
1401 __asm
1402 {
1403 mov rax, cr8
1404 mov [uCR8], rax
1405 }
1406# endif
1407 return uCR8;
1408# else /* !RT_ARCH_AMD64 */
1409 return 0;
1410# endif /* !RT_ARCH_AMD64 */
1411}
1412#endif
1413
1414
1415/**
1416 * Enables interrupts (EFLAGS.IF).
1417 */
1418#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1419DECLASM(void) ASMIntEnable(void);
1420#else
1421DECLINLINE(void) ASMIntEnable(void)
1422{
1423# if RT_INLINE_ASM_GNU_STYLE
1424 __asm("sti\n");
1425# elif RT_INLINE_ASM_USES_INTRIN
1426 _enable();
1427# else
1428 __asm sti
1429# endif
1430}
1431#endif
1432
1433
1434/**
1435 * Disables interrupts (!EFLAGS.IF).
1436 */
1437#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1438DECLASM(void) ASMIntDisable(void);
1439#else
1440DECLINLINE(void) ASMIntDisable(void)
1441{
1442# if RT_INLINE_ASM_GNU_STYLE
1443 __asm("cli\n");
1444# elif RT_INLINE_ASM_USES_INTRIN
1445 _disable();
1446# else
1447 __asm cli
1448# endif
1449}
1450#endif
1451
1452
1453/**
1454 * Disables interrupts and returns previous xFLAGS.
1455 */
1456#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1457DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1458#else
1459DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1460{
1461 RTCCUINTREG xFlags;
1462# if RT_INLINE_ASM_GNU_STYLE
1463# ifdef RT_ARCH_AMD64
1464 __asm__ __volatile__("pushfq\n\t"
1465 "cli\n\t"
1466 "popq %0\n\t"
1467 : "=rm" (xFlags));
1468# else
1469 __asm__ __volatile__("pushfl\n\t"
1470 "cli\n\t"
1471 "popl %0\n\t"
1472 : "=rm" (xFlags));
1473# endif
1474# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1475 xFlags = ASMGetFlags();
1476 _disable();
1477# else
1478 __asm {
1479 pushfd
1480 cli
1481 pop [xFlags]
1482 }
1483# endif
1484 return xFlags;
1485}
1486#endif
1487
1488
1489/**
1490 * Halts the CPU until interrupted.
1491 */
1492#if RT_INLINE_ASM_EXTERNAL
1493DECLASM(void) ASMHalt(void);
1494#else
1495DECLINLINE(void) ASMHalt(void)
1496{
1497# if RT_INLINE_ASM_GNU_STYLE
1498 __asm__ __volatile__("hlt\n\t");
1499# else
1500 __asm {
1501 hlt
1502 }
1503# endif
1504}
1505#endif
1506
1507
1508/**
1509 * Reads a machine specific register.
1510 *
1511 * @returns Register content.
1512 * @param uRegister Register to read.
1513 */
1514#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1515DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1516#else
1517DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1518{
1519 RTUINT64U u;
1520# if RT_INLINE_ASM_GNU_STYLE
1521 __asm__ __volatile__("rdmsr\n\t"
1522 : "=a" (u.s.Lo),
1523 "=d" (u.s.Hi)
1524 : "c" (uRegister));
1525
1526# elif RT_INLINE_ASM_USES_INTRIN
1527 u.u = __readmsr(uRegister);
1528
1529# else
1530 __asm
1531 {
1532 mov ecx, [uRegister]
1533 rdmsr
1534 mov [u.s.Lo], eax
1535 mov [u.s.Hi], edx
1536 }
1537# endif
1538
1539 return u.u;
1540}
1541#endif
1542
1543
1544/**
1545 * Writes a machine specific register.
1546 *
1547 * @returns Register content.
1548 * @param uRegister Register to write to.
1549 * @param u64Val Value to write.
1550 */
1551#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1552DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1553#else
1554DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1555{
1556 RTUINT64U u;
1557
1558 u.u = u64Val;
1559# if RT_INLINE_ASM_GNU_STYLE
1560 __asm__ __volatile__("wrmsr\n\t"
1561 ::"a" (u.s.Lo),
1562 "d" (u.s.Hi),
1563 "c" (uRegister));
1564
1565# elif RT_INLINE_ASM_USES_INTRIN
1566 __writemsr(uRegister, u.u);
1567
1568# else
1569 __asm
1570 {
1571 mov ecx, [uRegister]
1572 mov edx, [u.s.Hi]
1573 mov eax, [u.s.Lo]
1574 wrmsr
1575 }
1576# endif
1577}
1578#endif
1579
1580
1581/**
1582 * Reads low part of a machine specific register.
1583 *
1584 * @returns Register content.
1585 * @param uRegister Register to read.
1586 */
1587#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1588DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1589#else
1590DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1591{
1592 uint32_t u32;
1593# if RT_INLINE_ASM_GNU_STYLE
1594 __asm__ __volatile__("rdmsr\n\t"
1595 : "=a" (u32)
1596 : "c" (uRegister)
1597 : "edx");
1598
1599# elif RT_INLINE_ASM_USES_INTRIN
1600 u32 = (uint32_t)__readmsr(uRegister);
1601
1602#else
1603 __asm
1604 {
1605 mov ecx, [uRegister]
1606 rdmsr
1607 mov [u32], eax
1608 }
1609# endif
1610
1611 return u32;
1612}
1613#endif
1614
1615
1616/**
1617 * Reads high part of a machine specific register.
1618 *
1619 * @returns Register content.
1620 * @param uRegister Register to read.
1621 */
1622#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1623DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1624#else
1625DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1626{
1627 uint32_t u32;
1628# if RT_INLINE_ASM_GNU_STYLE
1629 __asm__ __volatile__("rdmsr\n\t"
1630 : "=d" (u32)
1631 : "c" (uRegister)
1632 : "eax");
1633
1634# elif RT_INLINE_ASM_USES_INTRIN
1635 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1636
1637# else
1638 __asm
1639 {
1640 mov ecx, [uRegister]
1641 rdmsr
1642 mov [u32], edx
1643 }
1644# endif
1645
1646 return u32;
1647}
1648#endif
1649
1650
1651/**
1652 * Gets dr0.
1653 *
1654 * @returns dr0.
1655 */
1656#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1657DECLASM(RTCCUINTREG) ASMGetDR0(void);
1658#else
1659DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1660{
1661 RTCCUINTREG uDR0;
1662# if RT_INLINE_ASM_USES_INTRIN
1663 uDR0 = __readdr(0);
1664# elif RT_INLINE_ASM_GNU_STYLE
1665# ifdef RT_ARCH_AMD64
1666 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1667# else
1668 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1669# endif
1670# else
1671 __asm
1672 {
1673# ifdef RT_ARCH_AMD64
1674 mov rax, dr0
1675 mov [uDR0], rax
1676# else
1677 mov eax, dr0
1678 mov [uDR0], eax
1679# endif
1680 }
1681# endif
1682 return uDR0;
1683}
1684#endif
1685
1686
1687/**
1688 * Gets dr1.
1689 *
1690 * @returns dr1.
1691 */
1692#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1693DECLASM(RTCCUINTREG) ASMGetDR1(void);
1694#else
1695DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1696{
1697 RTCCUINTREG uDR1;
1698# if RT_INLINE_ASM_USES_INTRIN
1699 uDR1 = __readdr(1);
1700# elif RT_INLINE_ASM_GNU_STYLE
1701# ifdef RT_ARCH_AMD64
1702 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1703# else
1704 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1705# endif
1706# else
1707 __asm
1708 {
1709# ifdef RT_ARCH_AMD64
1710 mov rax, dr1
1711 mov [uDR1], rax
1712# else
1713 mov eax, dr1
1714 mov [uDR1], eax
1715# endif
1716 }
1717# endif
1718 return uDR1;
1719}
1720#endif
1721
1722
1723/**
1724 * Gets dr2.
1725 *
1726 * @returns dr2.
1727 */
1728#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1729DECLASM(RTCCUINTREG) ASMGetDR2(void);
1730#else
1731DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1732{
1733 RTCCUINTREG uDR2;
1734# if RT_INLINE_ASM_USES_INTRIN
1735 uDR2 = __readdr(2);
1736# elif RT_INLINE_ASM_GNU_STYLE
1737# ifdef RT_ARCH_AMD64
1738 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1739# else
1740 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1741# endif
1742# else
1743 __asm
1744 {
1745# ifdef RT_ARCH_AMD64
1746 mov rax, dr2
1747 mov [uDR2], rax
1748# else
1749 mov eax, dr2
1750 mov [uDR2], eax
1751# endif
1752 }
1753# endif
1754 return uDR2;
1755}
1756#endif
1757
1758
1759/**
1760 * Gets dr3.
1761 *
1762 * @returns dr3.
1763 */
1764#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1765DECLASM(RTCCUINTREG) ASMGetDR3(void);
1766#else
1767DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1768{
1769 RTCCUINTREG uDR3;
1770# if RT_INLINE_ASM_USES_INTRIN
1771 uDR3 = __readdr(3);
1772# elif RT_INLINE_ASM_GNU_STYLE
1773# ifdef RT_ARCH_AMD64
1774 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1775# else
1776 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1777# endif
1778# else
1779 __asm
1780 {
1781# ifdef RT_ARCH_AMD64
1782 mov rax, dr3
1783 mov [uDR3], rax
1784# else
1785 mov eax, dr3
1786 mov [uDR3], eax
1787# endif
1788 }
1789# endif
1790 return uDR3;
1791}
1792#endif
1793
1794
1795/**
1796 * Gets dr6.
1797 *
1798 * @returns dr6.
1799 */
1800#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1801DECLASM(RTCCUINTREG) ASMGetDR6(void);
1802#else
1803DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1804{
1805 RTCCUINTREG uDR6;
1806# if RT_INLINE_ASM_USES_INTRIN
1807 uDR6 = __readdr(6);
1808# elif RT_INLINE_ASM_GNU_STYLE
1809# ifdef RT_ARCH_AMD64
1810 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1811# else
1812 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1813# endif
1814# else
1815 __asm
1816 {
1817# ifdef RT_ARCH_AMD64
1818 mov rax, dr6
1819 mov [uDR6], rax
1820# else
1821 mov eax, dr6
1822 mov [uDR6], eax
1823# endif
1824 }
1825# endif
1826 return uDR6;
1827}
1828#endif
1829
1830
1831/**
1832 * Reads and clears DR6.
1833 *
1834 * @returns DR6.
1835 */
1836#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1837DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1838#else
1839DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1840{
1841 RTCCUINTREG uDR6;
1842# if RT_INLINE_ASM_USES_INTRIN
1843 uDR6 = __readdr(6);
1844 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1845# elif RT_INLINE_ASM_GNU_STYLE
1846 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1847# ifdef RT_ARCH_AMD64
1848 __asm__ __volatile__("movq %%dr6, %0\n\t"
1849 "movq %1, %%dr6\n\t"
1850 : "=r" (uDR6)
1851 : "r" (uNewValue));
1852# else
1853 __asm__ __volatile__("movl %%dr6, %0\n\t"
1854 "movl %1, %%dr6\n\t"
1855 : "=r" (uDR6)
1856 : "r" (uNewValue));
1857# endif
1858# else
1859 __asm
1860 {
1861# ifdef RT_ARCH_AMD64
1862 mov rax, dr6
1863 mov [uDR6], rax
1864 mov rcx, rax
1865 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1866 mov dr6, rcx
1867# else
1868 mov eax, dr6
1869 mov [uDR6], eax
1870 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1871 mov dr6, ecx
1872# endif
1873 }
1874# endif
1875 return uDR6;
1876}
1877#endif
1878
1879
1880/**
1881 * Gets dr7.
1882 *
1883 * @returns dr7.
1884 */
1885#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1886DECLASM(RTCCUINTREG) ASMGetDR7(void);
1887#else
1888DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1889{
1890 RTCCUINTREG uDR7;
1891# if RT_INLINE_ASM_USES_INTRIN
1892 uDR7 = __readdr(7);
1893# elif RT_INLINE_ASM_GNU_STYLE
1894# ifdef RT_ARCH_AMD64
1895 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1896# else
1897 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1898# endif
1899# else
1900 __asm
1901 {
1902# ifdef RT_ARCH_AMD64
1903 mov rax, dr7
1904 mov [uDR7], rax
1905# else
1906 mov eax, dr7
1907 mov [uDR7], eax
1908# endif
1909 }
1910# endif
1911 return uDR7;
1912}
1913#endif
1914
1915
1916/**
1917 * Sets dr0.
1918 *
1919 * @param uDRVal Debug register value to write
1920 */
1921#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1922DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1923#else
1924DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1925{
1926# if RT_INLINE_ASM_USES_INTRIN
1927 __writedr(0, uDRVal);
1928# elif RT_INLINE_ASM_GNU_STYLE
1929# ifdef RT_ARCH_AMD64
1930 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1931# else
1932 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1933# endif
1934# else
1935 __asm
1936 {
1937# ifdef RT_ARCH_AMD64
1938 mov rax, [uDRVal]
1939 mov dr0, rax
1940# else
1941 mov eax, [uDRVal]
1942 mov dr0, eax
1943# endif
1944 }
1945# endif
1946}
1947#endif
1948
1949
1950/**
1951 * Sets dr1.
1952 *
1953 * @param uDRVal Debug register value to write
1954 */
1955#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1956DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1957#else
1958DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1959{
1960# if RT_INLINE_ASM_USES_INTRIN
1961 __writedr(1, uDRVal);
1962# elif RT_INLINE_ASM_GNU_STYLE
1963# ifdef RT_ARCH_AMD64
1964 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
1965# else
1966 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
1967# endif
1968# else
1969 __asm
1970 {
1971# ifdef RT_ARCH_AMD64
1972 mov rax, [uDRVal]
1973 mov dr1, rax
1974# else
1975 mov eax, [uDRVal]
1976 mov dr1, eax
1977# endif
1978 }
1979# endif
1980}
1981#endif
1982
1983
1984/**
1985 * Sets dr2.
1986 *
1987 * @param uDRVal Debug register value to write
1988 */
1989#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1990DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
1991#else
1992DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
1993{
1994# if RT_INLINE_ASM_USES_INTRIN
1995 __writedr(2, uDRVal);
1996# elif RT_INLINE_ASM_GNU_STYLE
1997# ifdef RT_ARCH_AMD64
1998 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
1999# else
2000 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2001# endif
2002# else
2003 __asm
2004 {
2005# ifdef RT_ARCH_AMD64
2006 mov rax, [uDRVal]
2007 mov dr2, rax
2008# else
2009 mov eax, [uDRVal]
2010 mov dr2, eax
2011# endif
2012 }
2013# endif
2014}
2015#endif
2016
2017
2018/**
2019 * Sets dr3.
2020 *
2021 * @param uDRVal Debug register value to write
2022 */
2023#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2024DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2025#else
2026DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2027{
2028# if RT_INLINE_ASM_USES_INTRIN
2029 __writedr(3, uDRVal);
2030# elif RT_INLINE_ASM_GNU_STYLE
2031# ifdef RT_ARCH_AMD64
2032 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2033# else
2034 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2035# endif
2036# else
2037 __asm
2038 {
2039# ifdef RT_ARCH_AMD64
2040 mov rax, [uDRVal]
2041 mov dr3, rax
2042# else
2043 mov eax, [uDRVal]
2044 mov dr3, eax
2045# endif
2046 }
2047# endif
2048}
2049#endif
2050
2051
2052/**
2053 * Sets dr6.
2054 *
2055 * @param uDRVal Debug register value to write
2056 */
2057#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2058DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2059#else
2060DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2061{
2062# if RT_INLINE_ASM_USES_INTRIN
2063 __writedr(6, uDRVal);
2064# elif RT_INLINE_ASM_GNU_STYLE
2065# ifdef RT_ARCH_AMD64
2066 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2067# else
2068 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2069# endif
2070# else
2071 __asm
2072 {
2073# ifdef RT_ARCH_AMD64
2074 mov rax, [uDRVal]
2075 mov dr6, rax
2076# else
2077 mov eax, [uDRVal]
2078 mov dr6, eax
2079# endif
2080 }
2081# endif
2082}
2083#endif
2084
2085
2086/**
2087 * Sets dr7.
2088 *
2089 * @param uDRVal Debug register value to write
2090 */
2091#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2092DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2093#else
2094DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2095{
2096# if RT_INLINE_ASM_USES_INTRIN
2097 __writedr(7, uDRVal);
2098# elif RT_INLINE_ASM_GNU_STYLE
2099# ifdef RT_ARCH_AMD64
2100 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2101# else
2102 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2103# endif
2104# else
2105 __asm
2106 {
2107# ifdef RT_ARCH_AMD64
2108 mov rax, [uDRVal]
2109 mov dr7, rax
2110# else
2111 mov eax, [uDRVal]
2112 mov dr7, eax
2113# endif
2114 }
2115# endif
2116}
2117#endif
2118
2119
2120/**
2121 * Compiler memory barrier.
2122 *
2123 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2124 * values or any outstanding writes when returning from this function.
2125 *
2126 * This function must be used if non-volatile data is modified by a
2127 * device or the VMM. Typical cases are port access, MMIO access,
2128 * trapping instruction, etc.
2129 */
2130#if RT_INLINE_ASM_GNU_STYLE
2131# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2132#elif RT_INLINE_ASM_USES_INTRIN
2133# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2134#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2135DECLINLINE(void) ASMCompilerBarrier(void)
2136{
2137 __asm
2138 {
2139 }
2140}
2141#endif
2142
2143
2144/**
2145 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2146 *
2147 * @param Port I/O port to write to.
2148 * @param u8 8-bit integer to write.
2149 */
2150#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2151DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2152#else
2153DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2154{
2155# if RT_INLINE_ASM_GNU_STYLE
2156 __asm__ __volatile__("outb %b1, %w0\n\t"
2157 :: "Nd" (Port),
2158 "a" (u8));
2159
2160# elif RT_INLINE_ASM_USES_INTRIN
2161 __outbyte(Port, u8);
2162
2163# else
2164 __asm
2165 {
2166 mov dx, [Port]
2167 mov al, [u8]
2168 out dx, al
2169 }
2170# endif
2171}
2172#endif
2173
2174
2175/**
2176 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2177 *
2178 * @returns 8-bit integer.
2179 * @param Port I/O port to read from.
2180 */
2181#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2182DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2183#else
2184DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2185{
2186 uint8_t u8;
2187# if RT_INLINE_ASM_GNU_STYLE
2188 __asm__ __volatile__("inb %w1, %b0\n\t"
2189 : "=a" (u8)
2190 : "Nd" (Port));
2191
2192# elif RT_INLINE_ASM_USES_INTRIN
2193 u8 = __inbyte(Port);
2194
2195# else
2196 __asm
2197 {
2198 mov dx, [Port]
2199 in al, dx
2200 mov [u8], al
2201 }
2202# endif
2203 return u8;
2204}
2205#endif
2206
2207
2208/**
2209 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2210 *
2211 * @param Port I/O port to write to.
2212 * @param u16 16-bit integer to write.
2213 */
2214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2215DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2216#else
2217DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2218{
2219# if RT_INLINE_ASM_GNU_STYLE
2220 __asm__ __volatile__("outw %w1, %w0\n\t"
2221 :: "Nd" (Port),
2222 "a" (u16));
2223
2224# elif RT_INLINE_ASM_USES_INTRIN
2225 __outword(Port, u16);
2226
2227# else
2228 __asm
2229 {
2230 mov dx, [Port]
2231 mov ax, [u16]
2232 out dx, ax
2233 }
2234# endif
2235}
2236#endif
2237
2238
2239/**
2240 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2241 *
2242 * @returns 16-bit integer.
2243 * @param Port I/O port to read from.
2244 */
2245#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2246DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2247#else
2248DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2249{
2250 uint16_t u16;
2251# if RT_INLINE_ASM_GNU_STYLE
2252 __asm__ __volatile__("inw %w1, %w0\n\t"
2253 : "=a" (u16)
2254 : "Nd" (Port));
2255
2256# elif RT_INLINE_ASM_USES_INTRIN
2257 u16 = __inword(Port);
2258
2259# else
2260 __asm
2261 {
2262 mov dx, [Port]
2263 in ax, dx
2264 mov [u16], ax
2265 }
2266# endif
2267 return u16;
2268}
2269#endif
2270
2271
2272/**
2273 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2274 *
2275 * @param Port I/O port to write to.
2276 * @param u32 32-bit integer to write.
2277 */
2278#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2279DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2280#else
2281DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2282{
2283# if RT_INLINE_ASM_GNU_STYLE
2284 __asm__ __volatile__("outl %1, %w0\n\t"
2285 :: "Nd" (Port),
2286 "a" (u32));
2287
2288# elif RT_INLINE_ASM_USES_INTRIN
2289 __outdword(Port, u32);
2290
2291# else
2292 __asm
2293 {
2294 mov dx, [Port]
2295 mov eax, [u32]
2296 out dx, eax
2297 }
2298# endif
2299}
2300#endif
2301
2302
2303/**
2304 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2305 *
2306 * @returns 32-bit integer.
2307 * @param Port I/O port to read from.
2308 */
2309#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2310DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2311#else
2312DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2313{
2314 uint32_t u32;
2315# if RT_INLINE_ASM_GNU_STYLE
2316 __asm__ __volatile__("inl %w1, %0\n\t"
2317 : "=a" (u32)
2318 : "Nd" (Port));
2319
2320# elif RT_INLINE_ASM_USES_INTRIN
2321 u32 = __indword(Port);
2322
2323# else
2324 __asm
2325 {
2326 mov dx, [Port]
2327 in eax, dx
2328 mov [u32], eax
2329 }
2330# endif
2331 return u32;
2332}
2333#endif
2334
2335
2336/**
2337 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2338 *
2339 * @param Port I/O port to write to.
2340 * @param pau8 Pointer to the string buffer.
2341 * @param c The number of items to write.
2342 */
2343#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2344DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2345#else
2346DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2347{
2348# if RT_INLINE_ASM_GNU_STYLE
2349 __asm__ __volatile__("rep; outsb\n\t"
2350 : "+S" (pau8),
2351 "+c" (c)
2352 : "d" (Port));
2353
2354# elif RT_INLINE_ASM_USES_INTRIN
2355 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2356
2357# else
2358 __asm
2359 {
2360 mov dx, [Port]
2361 mov ecx, [c]
2362 mov eax, [pau8]
2363 xchg esi, eax
2364 rep outsb
2365 xchg esi, eax
2366 }
2367# endif
2368}
2369#endif
2370
2371
2372/**
2373 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2374 *
2375 * @param Port I/O port to read from.
2376 * @param pau8 Pointer to the string buffer (output).
2377 * @param c The number of items to read.
2378 */
2379#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2380DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2381#else
2382DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2383{
2384# if RT_INLINE_ASM_GNU_STYLE
2385 __asm__ __volatile__("rep; insb\n\t"
2386 : "+D" (pau8),
2387 "+c" (c)
2388 : "d" (Port));
2389
2390# elif RT_INLINE_ASM_USES_INTRIN
2391 __inbytestring(Port, pau8, (unsigned long)c);
2392
2393# else
2394 __asm
2395 {
2396 mov dx, [Port]
2397 mov ecx, [c]
2398 mov eax, [pau8]
2399 xchg edi, eax
2400 rep insb
2401 xchg edi, eax
2402 }
2403# endif
2404}
2405#endif
2406
2407
2408/**
2409 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2410 *
2411 * @param Port I/O port to write to.
2412 * @param pau16 Pointer to the string buffer.
2413 * @param c The number of items to write.
2414 */
2415#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2416DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2417#else
2418DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2419{
2420# if RT_INLINE_ASM_GNU_STYLE
2421 __asm__ __volatile__("rep; outsw\n\t"
2422 : "+S" (pau16),
2423 "+c" (c)
2424 : "d" (Port));
2425
2426# elif RT_INLINE_ASM_USES_INTRIN
2427 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2428
2429# else
2430 __asm
2431 {
2432 mov dx, [Port]
2433 mov ecx, [c]
2434 mov eax, [pau16]
2435 xchg esi, eax
2436 rep outsw
2437 xchg esi, eax
2438 }
2439# endif
2440}
2441#endif
2442
2443
2444/**
2445 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2446 *
2447 * @param Port I/O port to read from.
2448 * @param pau16 Pointer to the string buffer (output).
2449 * @param c The number of items to read.
2450 */
2451#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2452DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2453#else
2454DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2455{
2456# if RT_INLINE_ASM_GNU_STYLE
2457 __asm__ __volatile__("rep; insw\n\t"
2458 : "+D" (pau16),
2459 "+c" (c)
2460 : "d" (Port));
2461
2462# elif RT_INLINE_ASM_USES_INTRIN
2463 __inwordstring(Port, pau16, (unsigned long)c);
2464
2465# else
2466 __asm
2467 {
2468 mov dx, [Port]
2469 mov ecx, [c]
2470 mov eax, [pau16]
2471 xchg edi, eax
2472 rep insw
2473 xchg edi, eax
2474 }
2475# endif
2476}
2477#endif
2478
2479
2480/**
2481 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2482 *
2483 * @param Port I/O port to write to.
2484 * @param pau32 Pointer to the string buffer.
2485 * @param c The number of items to write.
2486 */
2487#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2488DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2489#else
2490DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2491{
2492# if RT_INLINE_ASM_GNU_STYLE
2493 __asm__ __volatile__("rep; outsl\n\t"
2494 : "+S" (pau32),
2495 "+c" (c)
2496 : "d" (Port));
2497
2498# elif RT_INLINE_ASM_USES_INTRIN
2499 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2500
2501# else
2502 __asm
2503 {
2504 mov dx, [Port]
2505 mov ecx, [c]
2506 mov eax, [pau32]
2507 xchg esi, eax
2508 rep outsd
2509 xchg esi, eax
2510 }
2511# endif
2512}
2513#endif
2514
2515
2516/**
2517 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2518 *
2519 * @param Port I/O port to read from.
2520 * @param pau32 Pointer to the string buffer (output).
2521 * @param c The number of items to read.
2522 */
2523#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2524DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2525#else
2526DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2527{
2528# if RT_INLINE_ASM_GNU_STYLE
2529 __asm__ __volatile__("rep; insl\n\t"
2530 : "+D" (pau32),
2531 "+c" (c)
2532 : "d" (Port));
2533
2534# elif RT_INLINE_ASM_USES_INTRIN
2535 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2536
2537# else
2538 __asm
2539 {
2540 mov dx, [Port]
2541 mov ecx, [c]
2542 mov eax, [pau32]
2543 xchg edi, eax
2544 rep insd
2545 xchg edi, eax
2546 }
2547# endif
2548}
2549#endif
2550
2551
2552/**
2553 * Atomically Exchange an unsigned 8-bit value, ordered.
2554 *
2555 * @returns Current *pu8 value
2556 * @param pu8 Pointer to the 8-bit variable to update.
2557 * @param u8 The 8-bit value to assign to *pu8.
2558 */
2559#if RT_INLINE_ASM_EXTERNAL
2560DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2561#else
2562DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2563{
2564# if RT_INLINE_ASM_GNU_STYLE
2565 __asm__ __volatile__("xchgb %0, %1\n\t"
2566 : "=m" (*pu8),
2567 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2568 : "1" (u8),
2569 "m" (*pu8));
2570# else
2571 __asm
2572 {
2573# ifdef RT_ARCH_AMD64
2574 mov rdx, [pu8]
2575 mov al, [u8]
2576 xchg [rdx], al
2577 mov [u8], al
2578# else
2579 mov edx, [pu8]
2580 mov al, [u8]
2581 xchg [edx], al
2582 mov [u8], al
2583# endif
2584 }
2585# endif
2586 return u8;
2587}
2588#endif
2589
2590
2591/**
2592 * Atomically Exchange a signed 8-bit value, ordered.
2593 *
2594 * @returns Current *pu8 value
2595 * @param pi8 Pointer to the 8-bit variable to update.
2596 * @param i8 The 8-bit value to assign to *pi8.
2597 */
2598DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2599{
2600 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2601}
2602
2603
2604/**
2605 * Atomically Exchange a bool value, ordered.
2606 *
2607 * @returns Current *pf value
2608 * @param pf Pointer to the 8-bit variable to update.
2609 * @param f The 8-bit value to assign to *pi8.
2610 */
2611DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2612{
2613#ifdef _MSC_VER
2614 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2615#else
2616 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2617#endif
2618}
2619
2620
2621/**
2622 * Atomically Exchange an unsigned 16-bit value, ordered.
2623 *
2624 * @returns Current *pu16 value
2625 * @param pu16 Pointer to the 16-bit variable to update.
2626 * @param u16 The 16-bit value to assign to *pu16.
2627 */
2628#if RT_INLINE_ASM_EXTERNAL
2629DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2630#else
2631DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2632{
2633# if RT_INLINE_ASM_GNU_STYLE
2634 __asm__ __volatile__("xchgw %0, %1\n\t"
2635 : "=m" (*pu16),
2636 "=r" (u16)
2637 : "1" (u16),
2638 "m" (*pu16));
2639# else
2640 __asm
2641 {
2642# ifdef RT_ARCH_AMD64
2643 mov rdx, [pu16]
2644 mov ax, [u16]
2645 xchg [rdx], ax
2646 mov [u16], ax
2647# else
2648 mov edx, [pu16]
2649 mov ax, [u16]
2650 xchg [edx], ax
2651 mov [u16], ax
2652# endif
2653 }
2654# endif
2655 return u16;
2656}
2657#endif
2658
2659
2660/**
2661 * Atomically Exchange a signed 16-bit value, ordered.
2662 *
2663 * @returns Current *pu16 value
2664 * @param pi16 Pointer to the 16-bit variable to update.
2665 * @param i16 The 16-bit value to assign to *pi16.
2666 */
2667DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2668{
2669 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2670}
2671
2672
2673/**
2674 * Atomically Exchange an unsigned 32-bit value, ordered.
2675 *
2676 * @returns Current *pu32 value
2677 * @param pu32 Pointer to the 32-bit variable to update.
2678 * @param u32 The 32-bit value to assign to *pu32.
2679 */
2680#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2681DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2682#else
2683DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2684{
2685# if RT_INLINE_ASM_GNU_STYLE
2686 __asm__ __volatile__("xchgl %0, %1\n\t"
2687 : "=m" (*pu32),
2688 "=r" (u32)
2689 : "1" (u32),
2690 "m" (*pu32));
2691
2692# elif RT_INLINE_ASM_USES_INTRIN
2693 u32 = _InterlockedExchange((long *)pu32, u32);
2694
2695# else
2696 __asm
2697 {
2698# ifdef RT_ARCH_AMD64
2699 mov rdx, [pu32]
2700 mov eax, u32
2701 xchg [rdx], eax
2702 mov [u32], eax
2703# else
2704 mov edx, [pu32]
2705 mov eax, u32
2706 xchg [edx], eax
2707 mov [u32], eax
2708# endif
2709 }
2710# endif
2711 return u32;
2712}
2713#endif
2714
2715
2716/**
2717 * Atomically Exchange a signed 32-bit value, ordered.
2718 *
2719 * @returns Current *pu32 value
2720 * @param pi32 Pointer to the 32-bit variable to update.
2721 * @param i32 The 32-bit value to assign to *pi32.
2722 */
2723DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2724{
2725 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2726}
2727
2728
2729/**
2730 * Atomically Exchange an unsigned 64-bit value, ordered.
2731 *
2732 * @returns Current *pu64 value
2733 * @param pu64 Pointer to the 64-bit variable to update.
2734 * @param u64 The 64-bit value to assign to *pu64.
2735 */
2736#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2737DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2738#else
2739DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2740{
2741# if defined(RT_ARCH_AMD64)
2742# if RT_INLINE_ASM_USES_INTRIN
2743 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2744
2745# elif RT_INLINE_ASM_GNU_STYLE
2746 __asm__ __volatile__("xchgq %0, %1\n\t"
2747 : "=m" (*pu64),
2748 "=r" (u64)
2749 : "1" (u64),
2750 "m" (*pu64));
2751# else
2752 __asm
2753 {
2754 mov rdx, [pu64]
2755 mov rax, [u64]
2756 xchg [rdx], rax
2757 mov [u64], rax
2758 }
2759# endif
2760# else /* !RT_ARCH_AMD64 */
2761# if RT_INLINE_ASM_GNU_STYLE
2762# if defined(PIC) || defined(__PIC__)
2763 uint32_t u32EBX = (uint32_t)u64;
2764 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2765 "xchgl %%ebx, %3\n\t"
2766 "1:\n\t"
2767 "lock; cmpxchg8b (%5)\n\t"
2768 "jnz 1b\n\t"
2769 "movl %3, %%ebx\n\t"
2770 /*"xchgl %%esi, %5\n\t"*/
2771 : "=A" (u64),
2772 "=m" (*pu64)
2773 : "0" (*pu64),
2774 "m" ( u32EBX ),
2775 "c" ( (uint32_t)(u64 >> 32) ),
2776 "S" (pu64));
2777# else /* !PIC */
2778 __asm__ __volatile__("1:\n\t"
2779 "lock; cmpxchg8b %1\n\t"
2780 "jnz 1b\n\t"
2781 : "=A" (u64),
2782 "=m" (*pu64)
2783 : "0" (*pu64),
2784 "b" ( (uint32_t)u64 ),
2785 "c" ( (uint32_t)(u64 >> 32) ));
2786# endif
2787# else
2788 __asm
2789 {
2790 mov ebx, dword ptr [u64]
2791 mov ecx, dword ptr [u64 + 4]
2792 mov edi, pu64
2793 mov eax, dword ptr [edi]
2794 mov edx, dword ptr [edi + 4]
2795 retry:
2796 lock cmpxchg8b [edi]
2797 jnz retry
2798 mov dword ptr [u64], eax
2799 mov dword ptr [u64 + 4], edx
2800 }
2801# endif
2802# endif /* !RT_ARCH_AMD64 */
2803 return u64;
2804}
2805#endif
2806
2807
2808/**
2809 * Atomically Exchange an signed 64-bit value, ordered.
2810 *
2811 * @returns Current *pi64 value
2812 * @param pi64 Pointer to the 64-bit variable to update.
2813 * @param i64 The 64-bit value to assign to *pi64.
2814 */
2815DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2816{
2817 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2818}
2819
2820
2821#ifdef RT_ARCH_AMD64
2822/**
2823 * Atomically Exchange an unsigned 128-bit value, ordered.
2824 *
2825 * @returns Current *pu128.
2826 * @param pu128 Pointer to the 128-bit variable to update.
2827 * @param u128 The 128-bit value to assign to *pu128.
2828 *
2829 * @remark We cannot really assume that any hardware supports this. Nor do I have
2830 * GAS support for it. So, for the time being we'll BREAK the atomic
2831 * bit of this function and use two 64-bit exchanges instead.
2832 */
2833# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2834DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2835# else
2836DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2837{
2838 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2839 {
2840 /** @todo this is clumsy code */
2841 RTUINT128U u128Ret;
2842 u128Ret.u = u128;
2843 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2844 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2845 return u128Ret.u;
2846 }
2847#if 0 /* later? */
2848 else
2849 {
2850# if RT_INLINE_ASM_GNU_STYLE
2851 __asm__ __volatile__("1:\n\t"
2852 "lock; cmpxchg8b %1\n\t"
2853 "jnz 1b\n\t"
2854 : "=A" (u128),
2855 "=m" (*pu128)
2856 : "0" (*pu128),
2857 "b" ( (uint64_t)u128 ),
2858 "c" ( (uint64_t)(u128 >> 64) ));
2859# else
2860 __asm
2861 {
2862 mov rbx, dword ptr [u128]
2863 mov rcx, dword ptr [u128 + 8]
2864 mov rdi, pu128
2865 mov rax, dword ptr [rdi]
2866 mov rdx, dword ptr [rdi + 8]
2867 retry:
2868 lock cmpxchg16b [rdi]
2869 jnz retry
2870 mov dword ptr [u128], rax
2871 mov dword ptr [u128 + 8], rdx
2872 }
2873# endif
2874 }
2875 return u128;
2876#endif
2877}
2878# endif
2879#endif /* RT_ARCH_AMD64 */
2880
2881
2882/**
2883 * Atomically Exchange a pointer value, ordered.
2884 *
2885 * @returns Current *ppv value
2886 * @param ppv Pointer to the pointer variable to update.
2887 * @param pv The pointer value to assign to *ppv.
2888 */
2889DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2890{
2891#if ARCH_BITS == 32
2892 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2893#elif ARCH_BITS == 64
2894 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2895#else
2896# error "ARCH_BITS is bogus"
2897#endif
2898}
2899
2900
2901/**
2902 * Atomically Exchange a raw-mode context pointer value, ordered.
2903 *
2904 * @returns Current *ppv value
2905 * @param ppvRC Pointer to the pointer variable to update.
2906 * @param pvRC The pointer value to assign to *ppv.
2907 */
2908DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2909{
2910 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2911}
2912
2913
2914/**
2915 * Atomically Exchange a ring-0 pointer value, ordered.
2916 *
2917 * @returns Current *ppv value
2918 * @param ppvR0 Pointer to the pointer variable to update.
2919 * @param pvR0 The pointer value to assign to *ppv.
2920 */
2921DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2922{
2923#if R0_ARCH_BITS == 32
2924 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2925#elif R0_ARCH_BITS == 64
2926 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2927#else
2928# error "R0_ARCH_BITS is bogus"
2929#endif
2930}
2931
2932
2933/**
2934 * Atomically Exchange a ring-3 pointer value, ordered.
2935 *
2936 * @returns Current *ppv value
2937 * @param ppvR3 Pointer to the pointer variable to update.
2938 * @param pvR3 The pointer value to assign to *ppv.
2939 */
2940DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2941{
2942#if R3_ARCH_BITS == 32
2943 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2944#elif R3_ARCH_BITS == 64
2945 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2946#else
2947# error "R3_ARCH_BITS is bogus"
2948#endif
2949}
2950
2951
2952/** @def ASMAtomicXchgHandle
2953 * Atomically Exchange a typical IPRT handle value, ordered.
2954 *
2955 * @param ph Pointer to the value to update.
2956 * @param hNew The new value to assigned to *pu.
2957 * @param phRes Where to store the current *ph value.
2958 *
2959 * @remarks This doesn't currently work for all handles (like RTFILE).
2960 */
2961#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2962 do { \
2963 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (const void *)(hNew)); \
2964 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2965 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2966 } while (0)
2967
2968
2969/**
2970 * Atomically Exchange a value which size might differ
2971 * between platforms or compilers, ordered.
2972 *
2973 * @param pu Pointer to the variable to update.
2974 * @param uNew The value to assign to *pu.
2975 * @todo This is busted as its missing the result argument.
2976 */
2977#define ASMAtomicXchgSize(pu, uNew) \
2978 do { \
2979 switch (sizeof(*(pu))) { \
2980 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2981 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2982 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2983 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2984 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2985 } \
2986 } while (0)
2987
2988/**
2989 * Atomically Exchange a value which size might differ
2990 * between platforms or compilers, ordered.
2991 *
2992 * @param pu Pointer to the variable to update.
2993 * @param uNew The value to assign to *pu.
2994 * @param puRes Where to store the current *pu value.
2995 */
2996#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2997 do { \
2998 switch (sizeof(*(pu))) { \
2999 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3000 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3001 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3002 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3003 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3004 } \
3005 } while (0)
3006
3007
3008/**
3009 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3010 *
3011 * @returns true if xchg was done.
3012 * @returns false if xchg wasn't done.
3013 *
3014 * @param pu32 Pointer to the value to update.
3015 * @param u32New The new value to assigned to *pu32.
3016 * @param u32Old The old value to *pu32 compare with.
3017 */
3018#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3019DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3020#else
3021DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3022{
3023# if RT_INLINE_ASM_GNU_STYLE
3024 uint8_t u8Ret;
3025 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3026 "setz %1\n\t"
3027 : "=m" (*pu32),
3028 "=qm" (u8Ret),
3029 "=a" (u32Old)
3030 : "r" (u32New),
3031 "2" (u32Old),
3032 "m" (*pu32));
3033 return (bool)u8Ret;
3034
3035# elif RT_INLINE_ASM_USES_INTRIN
3036 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3037
3038# else
3039 uint32_t u32Ret;
3040 __asm
3041 {
3042# ifdef RT_ARCH_AMD64
3043 mov rdx, [pu32]
3044# else
3045 mov edx, [pu32]
3046# endif
3047 mov eax, [u32Old]
3048 mov ecx, [u32New]
3049# ifdef RT_ARCH_AMD64
3050 lock cmpxchg [rdx], ecx
3051# else
3052 lock cmpxchg [edx], ecx
3053# endif
3054 setz al
3055 movzx eax, al
3056 mov [u32Ret], eax
3057 }
3058 return !!u32Ret;
3059# endif
3060}
3061#endif
3062
3063
3064/**
3065 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3066 *
3067 * @returns true if xchg was done.
3068 * @returns false if xchg wasn't done.
3069 *
3070 * @param pi32 Pointer to the value to update.
3071 * @param i32New The new value to assigned to *pi32.
3072 * @param i32Old The old value to *pi32 compare with.
3073 */
3074DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3075{
3076 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3077}
3078
3079
3080/**
3081 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3082 *
3083 * @returns true if xchg was done.
3084 * @returns false if xchg wasn't done.
3085 *
3086 * @param pu64 Pointer to the 64-bit variable to update.
3087 * @param u64New The 64-bit value to assign to *pu64.
3088 * @param u64Old The value to compare with.
3089 */
3090#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3091DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3092#else
3093DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3094{
3095# if RT_INLINE_ASM_USES_INTRIN
3096 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3097
3098# elif defined(RT_ARCH_AMD64)
3099# if RT_INLINE_ASM_GNU_STYLE
3100 uint8_t u8Ret;
3101 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3102 "setz %1\n\t"
3103 : "=m" (*pu64),
3104 "=qm" (u8Ret),
3105 "=a" (u64Old)
3106 : "r" (u64New),
3107 "2" (u64Old),
3108 "m" (*pu64));
3109 return (bool)u8Ret;
3110# else
3111 bool fRet;
3112 __asm
3113 {
3114 mov rdx, [pu32]
3115 mov rax, [u64Old]
3116 mov rcx, [u64New]
3117 lock cmpxchg [rdx], rcx
3118 setz al
3119 mov [fRet], al
3120 }
3121 return fRet;
3122# endif
3123# else /* !RT_ARCH_AMD64 */
3124 uint32_t u32Ret;
3125# if RT_INLINE_ASM_GNU_STYLE
3126# if defined(PIC) || defined(__PIC__)
3127 uint32_t u32EBX = (uint32_t)u64New;
3128 uint32_t u32Spill;
3129 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3130 "lock; cmpxchg8b (%6)\n\t"
3131 "setz %%al\n\t"
3132 "movl %4, %%ebx\n\t"
3133 "movzbl %%al, %%eax\n\t"
3134 : "=a" (u32Ret),
3135 "=d" (u32Spill),
3136# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3137 "+m" (*pu64)
3138# else
3139 "=m" (*pu64)
3140# endif
3141 : "A" (u64Old),
3142 "m" ( u32EBX ),
3143 "c" ( (uint32_t)(u64New >> 32) ),
3144 "S" (pu64));
3145# else /* !PIC */
3146 uint32_t u32Spill;
3147 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3148 "setz %%al\n\t"
3149 "movzbl %%al, %%eax\n\t"
3150 : "=a" (u32Ret),
3151 "=d" (u32Spill),
3152 "+m" (*pu64)
3153 : "A" (u64Old),
3154 "b" ( (uint32_t)u64New ),
3155 "c" ( (uint32_t)(u64New >> 32) ));
3156# endif
3157 return (bool)u32Ret;
3158# else
3159 __asm
3160 {
3161 mov ebx, dword ptr [u64New]
3162 mov ecx, dword ptr [u64New + 4]
3163 mov edi, [pu64]
3164 mov eax, dword ptr [u64Old]
3165 mov edx, dword ptr [u64Old + 4]
3166 lock cmpxchg8b [edi]
3167 setz al
3168 movzx eax, al
3169 mov dword ptr [u32Ret], eax
3170 }
3171 return !!u32Ret;
3172# endif
3173# endif /* !RT_ARCH_AMD64 */
3174}
3175#endif
3176
3177
3178/**
3179 * Atomically Compare and exchange a signed 64-bit value, ordered.
3180 *
3181 * @returns true if xchg was done.
3182 * @returns false if xchg wasn't done.
3183 *
3184 * @param pi64 Pointer to the 64-bit variable to update.
3185 * @param i64 The 64-bit value to assign to *pu64.
3186 * @param i64Old The value to compare with.
3187 */
3188DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3189{
3190 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3191}
3192
3193
3194/**
3195 * Atomically Compare and Exchange a pointer value, ordered.
3196 *
3197 * @returns true if xchg was done.
3198 * @returns false if xchg wasn't done.
3199 *
3200 * @param ppv Pointer to the value to update.
3201 * @param pvNew The new value to assigned to *ppv.
3202 * @param pvOld The old value to *ppv compare with.
3203 */
3204DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3205{
3206#if ARCH_BITS == 32
3207 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3208#elif ARCH_BITS == 64
3209 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3210#else
3211# error "ARCH_BITS is bogus"
3212#endif
3213}
3214
3215
3216/** @def ASMAtomicCmpXchgHandle
3217 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3218 *
3219 * @param ph Pointer to the value to update.
3220 * @param hNew The new value to assigned to *pu.
3221 * @param hOld The old value to *pu compare with.
3222 * @param fRc Where to store the result.
3223 *
3224 * @remarks This doesn't currently work for all handles (like RTFILE).
3225 */
3226#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3227 do { \
3228 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
3229 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3230 } while (0)
3231
3232
3233/** @def ASMAtomicCmpXchgSize
3234 * Atomically Compare and Exchange a value which size might differ
3235 * between platforms or compilers, ordered.
3236 *
3237 * @param pu Pointer to the value to update.
3238 * @param uNew The new value to assigned to *pu.
3239 * @param uOld The old value to *pu compare with.
3240 * @param fRc Where to store the result.
3241 */
3242#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3243 do { \
3244 switch (sizeof(*(pu))) { \
3245 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3246 break; \
3247 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3248 break; \
3249 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3250 (fRc) = false; \
3251 break; \
3252 } \
3253 } while (0)
3254
3255
3256/**
3257 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3258 * passes back old value, ordered.
3259 *
3260 * @returns true if xchg was done.
3261 * @returns false if xchg wasn't done.
3262 *
3263 * @param pu32 Pointer to the value to update.
3264 * @param u32New The new value to assigned to *pu32.
3265 * @param u32Old The old value to *pu32 compare with.
3266 * @param pu32Old Pointer store the old value at.
3267 */
3268#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3269DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3270#else
3271DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3272{
3273# if RT_INLINE_ASM_GNU_STYLE
3274 uint8_t u8Ret;
3275 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3276 "setz %1\n\t"
3277 : "=m" (*pu32),
3278 "=qm" (u8Ret),
3279 "=a" (*pu32Old)
3280 : "r" (u32New),
3281 "a" (u32Old),
3282 "m" (*pu32));
3283 return (bool)u8Ret;
3284
3285# elif RT_INLINE_ASM_USES_INTRIN
3286 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3287
3288# else
3289 uint32_t u32Ret;
3290 __asm
3291 {
3292# ifdef RT_ARCH_AMD64
3293 mov rdx, [pu32]
3294# else
3295 mov edx, [pu32]
3296# endif
3297 mov eax, [u32Old]
3298 mov ecx, [u32New]
3299# ifdef RT_ARCH_AMD64
3300 lock cmpxchg [rdx], ecx
3301 mov rdx, [pu32Old]
3302 mov [rdx], eax
3303# else
3304 lock cmpxchg [edx], ecx
3305 mov edx, [pu32Old]
3306 mov [edx], eax
3307# endif
3308 setz al
3309 movzx eax, al
3310 mov [u32Ret], eax
3311 }
3312 return !!u32Ret;
3313# endif
3314}
3315#endif
3316
3317
3318/**
3319 * Atomically Compare and Exchange a signed 32-bit value, additionally
3320 * passes back old value, ordered.
3321 *
3322 * @returns true if xchg was done.
3323 * @returns false if xchg wasn't done.
3324 *
3325 * @param pi32 Pointer to the value to update.
3326 * @param i32New The new value to assigned to *pi32.
3327 * @param i32Old The old value to *pi32 compare with.
3328 * @param pi32Old Pointer store the old value at.
3329 */
3330DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3331{
3332 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3333}
3334
3335
3336/**
3337 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3338 * passing back old value, ordered.
3339 *
3340 * @returns true if xchg was done.
3341 * @returns false if xchg wasn't done.
3342 *
3343 * @param pu64 Pointer to the 64-bit variable to update.
3344 * @param u64New The 64-bit value to assign to *pu64.
3345 * @param u64Old The value to compare with.
3346 * @param pu64Old Pointer store the old value at.
3347 */
3348#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3349DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3350#else
3351DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3352{
3353# if RT_INLINE_ASM_USES_INTRIN
3354 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3355
3356# elif defined(RT_ARCH_AMD64)
3357# if RT_INLINE_ASM_GNU_STYLE
3358 uint8_t u8Ret;
3359 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3360 "setz %1\n\t"
3361 : "=m" (*pu64),
3362 "=qm" (u8Ret),
3363 "=a" (*pu64Old)
3364 : "r" (u64New),
3365 "a" (u64Old),
3366 "m" (*pu64));
3367 return (bool)u8Ret;
3368# else
3369 bool fRet;
3370 __asm
3371 {
3372 mov rdx, [pu32]
3373 mov rax, [u64Old]
3374 mov rcx, [u64New]
3375 lock cmpxchg [rdx], rcx
3376 mov rdx, [pu64Old]
3377 mov [rdx], rax
3378 setz al
3379 mov [fRet], al
3380 }
3381 return fRet;
3382# endif
3383# else /* !RT_ARCH_AMD64 */
3384# if RT_INLINE_ASM_GNU_STYLE
3385 uint64_t u64Ret;
3386# if defined(PIC) || defined(__PIC__)
3387 /* NB: this code uses a memory clobber description, because the clean
3388 * solution with an output value for *pu64 makes gcc run out of registers.
3389 * This will cause suboptimal code, and anyone with a better solution is
3390 * welcome to improve this. */
3391 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3392 "lock; cmpxchg8b %3\n\t"
3393 "xchgl %%ebx, %1\n\t"
3394 : "=A" (u64Ret)
3395 : "DS" ((uint32_t)u64New),
3396 "c" ((uint32_t)(u64New >> 32)),
3397 "m" (*pu64),
3398 "0" (u64Old)
3399 : "memory" );
3400# else /* !PIC */
3401 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3402 : "=A" (u64Ret),
3403 "=m" (*pu64)
3404 : "b" ((uint32_t)u64New),
3405 "c" ((uint32_t)(u64New >> 32)),
3406 "m" (*pu64),
3407 "0" (u64Old));
3408# endif
3409 *pu64Old = u64Ret;
3410 return u64Ret == u64Old;
3411# else
3412 uint32_t u32Ret;
3413 __asm
3414 {
3415 mov ebx, dword ptr [u64New]
3416 mov ecx, dword ptr [u64New + 4]
3417 mov edi, [pu64]
3418 mov eax, dword ptr [u64Old]
3419 mov edx, dword ptr [u64Old + 4]
3420 lock cmpxchg8b [edi]
3421 mov ebx, [pu64Old]
3422 mov [ebx], eax
3423 setz al
3424 movzx eax, al
3425 add ebx, 4
3426 mov [ebx], edx
3427 mov dword ptr [u32Ret], eax
3428 }
3429 return !!u32Ret;
3430# endif
3431# endif /* !RT_ARCH_AMD64 */
3432}
3433#endif
3434
3435
3436/**
3437 * Atomically Compare and exchange a signed 64-bit value, additionally
3438 * passing back old value, ordered.
3439 *
3440 * @returns true if xchg was done.
3441 * @returns false if xchg wasn't done.
3442 *
3443 * @param pi64 Pointer to the 64-bit variable to update.
3444 * @param i64 The 64-bit value to assign to *pu64.
3445 * @param i64Old The value to compare with.
3446 * @param pi64Old Pointer store the old value at.
3447 */
3448DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3449{
3450 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3451}
3452
3453/** @def ASMAtomicCmpXchgExHandle
3454 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3455 *
3456 * @param ph Pointer to the value to update.
3457 * @param hNew The new value to assigned to *pu.
3458 * @param hOld The old value to *pu compare with.
3459 * @param fRc Where to store the result.
3460 * @param phOldVal Pointer to where to store the old value.
3461 *
3462 * @remarks This doesn't currently work for all handles (like RTFILE).
3463 */
3464#if ARCH_BITS == 32
3465# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3466 do { \
3467 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3468 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3469 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3470 } while (0)
3471#elif ARCH_BITS == 64
3472# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3473 do { \
3474 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3475 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3476 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3477 } while (0)
3478#endif
3479
3480
3481/** @def ASMAtomicCmpXchgExSize
3482 * Atomically Compare and Exchange a value which size might differ
3483 * between platforms or compilers. Additionally passes back old value.
3484 *
3485 * @param pu Pointer to the value to update.
3486 * @param uNew The new value to assigned to *pu.
3487 * @param uOld The old value to *pu compare with.
3488 * @param fRc Where to store the result.
3489 * @param puOldVal Pointer to where to store the old value.
3490 */
3491#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3492 do { \
3493 switch (sizeof(*(pu))) { \
3494 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3495 break; \
3496 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3497 break; \
3498 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3499 (fRc) = false; \
3500 (uOldVal) = 0; \
3501 break; \
3502 } \
3503 } while (0)
3504
3505
3506/**
3507 * Atomically Compare and Exchange a pointer value, additionally
3508 * passing back old value, ordered.
3509 *
3510 * @returns true if xchg was done.
3511 * @returns false if xchg wasn't done.
3512 *
3513 * @param ppv Pointer to the value to update.
3514 * @param pvNew The new value to assigned to *ppv.
3515 * @param pvOld The old value to *ppv compare with.
3516 * @param ppvOld Pointer store the old value at.
3517 */
3518DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3519{
3520#if ARCH_BITS == 32
3521 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3522#elif ARCH_BITS == 64
3523 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3524#else
3525# error "ARCH_BITS is bogus"
3526#endif
3527}
3528
3529
3530/**
3531 * Atomically exchanges and adds to a 32-bit value, ordered.
3532 *
3533 * @returns The old value.
3534 * @param pu32 Pointer to the value.
3535 * @param u32 Number to add.
3536 */
3537#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3538DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3539#else
3540DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3541{
3542# if RT_INLINE_ASM_USES_INTRIN
3543 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3544 return u32;
3545
3546# elif RT_INLINE_ASM_GNU_STYLE
3547 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3548 : "=r" (u32),
3549 "=m" (*pu32)
3550 : "0" (u32),
3551 "m" (*pu32)
3552 : "memory");
3553 return u32;
3554# else
3555 __asm
3556 {
3557 mov eax, [u32]
3558# ifdef RT_ARCH_AMD64
3559 mov rdx, [pu32]
3560 lock xadd [rdx], eax
3561# else
3562 mov edx, [pu32]
3563 lock xadd [edx], eax
3564# endif
3565 mov [u32], eax
3566 }
3567 return u32;
3568# endif
3569}
3570#endif
3571
3572
3573/**
3574 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3575 *
3576 * @returns The old value.
3577 * @param pi32 Pointer to the value.
3578 * @param i32 Number to add.
3579 */
3580DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3581{
3582 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3583}
3584
3585
3586/**
3587 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3588 *
3589 * @returns The old value.
3590 * @param pu32 Pointer to the value.
3591 * @param u32 Number to subtract.
3592 */
3593DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3594{
3595 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3596}
3597
3598
3599/**
3600 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3601 *
3602 * @returns The old value.
3603 * @param pi32 Pointer to the value.
3604 * @param i32 Number to subtract.
3605 */
3606DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3607{
3608 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3609}
3610
3611
3612/**
3613 * Atomically increment a 32-bit value, ordered.
3614 *
3615 * @returns The new value.
3616 * @param pu32 Pointer to the value to increment.
3617 */
3618#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3619DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3620#else
3621DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3622{
3623 uint32_t u32;
3624# if RT_INLINE_ASM_USES_INTRIN
3625 u32 = _InterlockedIncrement((long *)pu32);
3626 return u32;
3627
3628# elif RT_INLINE_ASM_GNU_STYLE
3629 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3630 : "=r" (u32),
3631 "=m" (*pu32)
3632 : "0" (1),
3633 "m" (*pu32)
3634 : "memory");
3635 return u32+1;
3636# else
3637 __asm
3638 {
3639 mov eax, 1
3640# ifdef RT_ARCH_AMD64
3641 mov rdx, [pu32]
3642 lock xadd [rdx], eax
3643# else
3644 mov edx, [pu32]
3645 lock xadd [edx], eax
3646# endif
3647 mov u32, eax
3648 }
3649 return u32+1;
3650# endif
3651}
3652#endif
3653
3654
3655/**
3656 * Atomically increment a signed 32-bit value, ordered.
3657 *
3658 * @returns The new value.
3659 * @param pi32 Pointer to the value to increment.
3660 */
3661DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3662{
3663 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3664}
3665
3666
3667/**
3668 * Atomically decrement an unsigned 32-bit value, ordered.
3669 *
3670 * @returns The new value.
3671 * @param pu32 Pointer to the value to decrement.
3672 */
3673#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3674DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3675#else
3676DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3677{
3678 uint32_t u32;
3679# if RT_INLINE_ASM_USES_INTRIN
3680 u32 = _InterlockedDecrement((long *)pu32);
3681 return u32;
3682
3683# elif RT_INLINE_ASM_GNU_STYLE
3684 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3685 : "=r" (u32),
3686 "=m" (*pu32)
3687 : "0" (-1),
3688 "m" (*pu32)
3689 : "memory");
3690 return u32-1;
3691# else
3692 __asm
3693 {
3694 mov eax, -1
3695# ifdef RT_ARCH_AMD64
3696 mov rdx, [pu32]
3697 lock xadd [rdx], eax
3698# else
3699 mov edx, [pu32]
3700 lock xadd [edx], eax
3701# endif
3702 mov u32, eax
3703 }
3704 return u32-1;
3705# endif
3706}
3707#endif
3708
3709
3710/**
3711 * Atomically decrement a signed 32-bit value, ordered.
3712 *
3713 * @returns The new value.
3714 * @param pi32 Pointer to the value to decrement.
3715 */
3716DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3717{
3718 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3719}
3720
3721
3722/**
3723 * Atomically Or an unsigned 32-bit value, ordered.
3724 *
3725 * @param pu32 Pointer to the pointer variable to OR u32 with.
3726 * @param u32 The value to OR *pu32 with.
3727 */
3728#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3729DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3730#else
3731DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3732{
3733# if RT_INLINE_ASM_USES_INTRIN
3734 _InterlockedOr((long volatile *)pu32, (long)u32);
3735
3736# elif RT_INLINE_ASM_GNU_STYLE
3737 __asm__ __volatile__("lock; orl %1, %0\n\t"
3738 : "=m" (*pu32)
3739 : "ir" (u32),
3740 "m" (*pu32));
3741# else
3742 __asm
3743 {
3744 mov eax, [u32]
3745# ifdef RT_ARCH_AMD64
3746 mov rdx, [pu32]
3747 lock or [rdx], eax
3748# else
3749 mov edx, [pu32]
3750 lock or [edx], eax
3751# endif
3752 }
3753# endif
3754}
3755#endif
3756
3757
3758/**
3759 * Atomically Or a signed 32-bit value, ordered.
3760 *
3761 * @param pi32 Pointer to the pointer variable to OR u32 with.
3762 * @param i32 The value to OR *pu32 with.
3763 */
3764DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3765{
3766 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3767}
3768
3769
3770/**
3771 * Atomically And an unsigned 32-bit value, ordered.
3772 *
3773 * @param pu32 Pointer to the pointer variable to AND u32 with.
3774 * @param u32 The value to AND *pu32 with.
3775 */
3776#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3777DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3778#else
3779DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3780{
3781# if RT_INLINE_ASM_USES_INTRIN
3782 _InterlockedAnd((long volatile *)pu32, u32);
3783
3784# elif RT_INLINE_ASM_GNU_STYLE
3785 __asm__ __volatile__("lock; andl %1, %0\n\t"
3786 : "=m" (*pu32)
3787 : "ir" (u32),
3788 "m" (*pu32));
3789# else
3790 __asm
3791 {
3792 mov eax, [u32]
3793# ifdef RT_ARCH_AMD64
3794 mov rdx, [pu32]
3795 lock and [rdx], eax
3796# else
3797 mov edx, [pu32]
3798 lock and [edx], eax
3799# endif
3800 }
3801# endif
3802}
3803#endif
3804
3805
3806/**
3807 * Atomically And a signed 32-bit value, ordered.
3808 *
3809 * @param pi32 Pointer to the pointer variable to AND i32 with.
3810 * @param i32 The value to AND *pi32 with.
3811 */
3812DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3813{
3814 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3815}
3816
3817
3818/**
3819 * Memory fence, waits for any pending writes and reads to complete.
3820 */
3821DECLINLINE(void) ASMMemoryFence(void)
3822{
3823 /** @todo use mfence? check if all cpus we care for support it. */
3824 uint32_t volatile u32;
3825 ASMAtomicXchgU32(&u32, 0);
3826}
3827
3828
3829/**
3830 * Write fence, waits for any pending writes to complete.
3831 */
3832DECLINLINE(void) ASMWriteFence(void)
3833{
3834 /** @todo use sfence? check if all cpus we care for support it. */
3835 ASMMemoryFence();
3836}
3837
3838
3839/**
3840 * Read fence, waits for any pending reads to complete.
3841 */
3842DECLINLINE(void) ASMReadFence(void)
3843{
3844 /** @todo use lfence? check if all cpus we care for support it. */
3845 ASMMemoryFence();
3846}
3847
3848
3849/**
3850 * Atomically reads an unsigned 8-bit value, ordered.
3851 *
3852 * @returns Current *pu8 value
3853 * @param pu8 Pointer to the 8-bit variable to read.
3854 */
3855DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3856{
3857 ASMMemoryFence();
3858 return *pu8; /* byte reads are atomic on x86 */
3859}
3860
3861
3862/**
3863 * Atomically reads an unsigned 8-bit value, unordered.
3864 *
3865 * @returns Current *pu8 value
3866 * @param pu8 Pointer to the 8-bit variable to read.
3867 */
3868DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3869{
3870 return *pu8; /* byte reads are atomic on x86 */
3871}
3872
3873
3874/**
3875 * Atomically reads a signed 8-bit value, ordered.
3876 *
3877 * @returns Current *pi8 value
3878 * @param pi8 Pointer to the 8-bit variable to read.
3879 */
3880DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3881{
3882 ASMMemoryFence();
3883 return *pi8; /* byte reads are atomic on x86 */
3884}
3885
3886
3887/**
3888 * Atomically reads a signed 8-bit value, unordered.
3889 *
3890 * @returns Current *pi8 value
3891 * @param pi8 Pointer to the 8-bit variable to read.
3892 */
3893DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3894{
3895 return *pi8; /* byte reads are atomic on x86 */
3896}
3897
3898
3899/**
3900 * Atomically reads an unsigned 16-bit value, ordered.
3901 *
3902 * @returns Current *pu16 value
3903 * @param pu16 Pointer to the 16-bit variable to read.
3904 */
3905DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3906{
3907 ASMMemoryFence();
3908 Assert(!((uintptr_t)pu16 & 1));
3909 return *pu16;
3910}
3911
3912
3913/**
3914 * Atomically reads an unsigned 16-bit value, unordered.
3915 *
3916 * @returns Current *pu16 value
3917 * @param pu16 Pointer to the 16-bit variable to read.
3918 */
3919DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3920{
3921 Assert(!((uintptr_t)pu16 & 1));
3922 return *pu16;
3923}
3924
3925
3926/**
3927 * Atomically reads a signed 16-bit value, ordered.
3928 *
3929 * @returns Current *pi16 value
3930 * @param pi16 Pointer to the 16-bit variable to read.
3931 */
3932DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3933{
3934 ASMMemoryFence();
3935 Assert(!((uintptr_t)pi16 & 1));
3936 return *pi16;
3937}
3938
3939
3940/**
3941 * Atomically reads a signed 16-bit value, unordered.
3942 *
3943 * @returns Current *pi16 value
3944 * @param pi16 Pointer to the 16-bit variable to read.
3945 */
3946DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3947{
3948 Assert(!((uintptr_t)pi16 & 1));
3949 return *pi16;
3950}
3951
3952
3953/**
3954 * Atomically reads an unsigned 32-bit value, ordered.
3955 *
3956 * @returns Current *pu32 value
3957 * @param pu32 Pointer to the 32-bit variable to read.
3958 */
3959DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3960{
3961 ASMMemoryFence();
3962 Assert(!((uintptr_t)pu32 & 3));
3963 return *pu32;
3964}
3965
3966
3967/**
3968 * Atomically reads an unsigned 32-bit value, unordered.
3969 *
3970 * @returns Current *pu32 value
3971 * @param pu32 Pointer to the 32-bit variable to read.
3972 */
3973DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3974{
3975 Assert(!((uintptr_t)pu32 & 3));
3976 return *pu32;
3977}
3978
3979
3980/**
3981 * Atomically reads a signed 32-bit value, ordered.
3982 *
3983 * @returns Current *pi32 value
3984 * @param pi32 Pointer to the 32-bit variable to read.
3985 */
3986DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3987{
3988 ASMMemoryFence();
3989 Assert(!((uintptr_t)pi32 & 3));
3990 return *pi32;
3991}
3992
3993
3994/**
3995 * Atomically reads a signed 32-bit value, unordered.
3996 *
3997 * @returns Current *pi32 value
3998 * @param pi32 Pointer to the 32-bit variable to read.
3999 */
4000DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4001{
4002 Assert(!((uintptr_t)pi32 & 3));
4003 return *pi32;
4004}
4005
4006
4007/**
4008 * Atomically reads an unsigned 64-bit value, ordered.
4009 *
4010 * @returns Current *pu64 value
4011 * @param pu64 Pointer to the 64-bit variable to read.
4012 * The memory pointed to must be writable.
4013 * @remark This will fault if the memory is read-only!
4014 */
4015#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4016DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4017#else
4018DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4019{
4020 uint64_t u64;
4021# ifdef RT_ARCH_AMD64
4022 Assert(!((uintptr_t)pu64 & 7));
4023/*# if RT_INLINE_ASM_GNU_STYLE
4024 __asm__ __volatile__( "mfence\n\t"
4025 "movq %1, %0\n\t"
4026 : "=r" (u64)
4027 : "m" (*pu64));
4028# else
4029 __asm
4030 {
4031 mfence
4032 mov rdx, [pu64]
4033 mov rax, [rdx]
4034 mov [u64], rax
4035 }
4036# endif*/
4037 ASMMemoryFence();
4038 u64 = *pu64;
4039# else /* !RT_ARCH_AMD64 */
4040# if RT_INLINE_ASM_GNU_STYLE
4041# if defined(PIC) || defined(__PIC__)
4042 uint32_t u32EBX = 0;
4043 Assert(!((uintptr_t)pu64 & 7));
4044 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4045 "lock; cmpxchg8b (%5)\n\t"
4046 "movl %3, %%ebx\n\t"
4047 : "=A" (u64),
4048# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4049 "+m" (*pu64)
4050# else
4051 "=m" (*pu64)
4052# endif
4053 : "0" (0),
4054 "m" (u32EBX),
4055 "c" (0),
4056 "S" (pu64));
4057# else /* !PIC */
4058 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4059 : "=A" (u64),
4060 "+m" (*pu64)
4061 : "0" (0),
4062 "b" (0),
4063 "c" (0));
4064# endif
4065# else
4066 Assert(!((uintptr_t)pu64 & 7));
4067 __asm
4068 {
4069 xor eax, eax
4070 xor edx, edx
4071 mov edi, pu64
4072 xor ecx, ecx
4073 xor ebx, ebx
4074 lock cmpxchg8b [edi]
4075 mov dword ptr [u64], eax
4076 mov dword ptr [u64 + 4], edx
4077 }
4078# endif
4079# endif /* !RT_ARCH_AMD64 */
4080 return u64;
4081}
4082#endif
4083
4084
4085/**
4086 * Atomically reads an unsigned 64-bit value, unordered.
4087 *
4088 * @returns Current *pu64 value
4089 * @param pu64 Pointer to the 64-bit variable to read.
4090 * The memory pointed to must be writable.
4091 * @remark This will fault if the memory is read-only!
4092 */
4093#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4094DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4095#else
4096DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4097{
4098 uint64_t u64;
4099# ifdef RT_ARCH_AMD64
4100 Assert(!((uintptr_t)pu64 & 7));
4101/*# if RT_INLINE_ASM_GNU_STYLE
4102 Assert(!((uintptr_t)pu64 & 7));
4103 __asm__ __volatile__("movq %1, %0\n\t"
4104 : "=r" (u64)
4105 : "m" (*pu64));
4106# else
4107 __asm
4108 {
4109 mov rdx, [pu64]
4110 mov rax, [rdx]
4111 mov [u64], rax
4112 }
4113# endif */
4114 u64 = *pu64;
4115# else /* !RT_ARCH_AMD64 */
4116# if RT_INLINE_ASM_GNU_STYLE
4117# if defined(PIC) || defined(__PIC__)
4118 uint32_t u32EBX = 0;
4119 uint32_t u32Spill;
4120 Assert(!((uintptr_t)pu64 & 7));
4121 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4122 "xor %%ecx,%%ecx\n\t"
4123 "xor %%edx,%%edx\n\t"
4124 "xchgl %%ebx, %3\n\t"
4125 "lock; cmpxchg8b (%4)\n\t"
4126 "movl %3, %%ebx\n\t"
4127 : "=A" (u64),
4128# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4129 "+m" (*pu64),
4130# else
4131 "=m" (*pu64),
4132# endif
4133 "=c" (u32Spill)
4134 : "m" (u32EBX),
4135 "S" (pu64));
4136# else /* !PIC */
4137 __asm__ __volatile__("cmpxchg8b %1\n\t"
4138 : "=A" (u64),
4139 "+m" (*pu64)
4140 : "0" (0),
4141 "b" (0),
4142 "c" (0));
4143# endif
4144# else
4145 Assert(!((uintptr_t)pu64 & 7));
4146 __asm
4147 {
4148 xor eax, eax
4149 xor edx, edx
4150 mov edi, pu64
4151 xor ecx, ecx
4152 xor ebx, ebx
4153 lock cmpxchg8b [edi]
4154 mov dword ptr [u64], eax
4155 mov dword ptr [u64 + 4], edx
4156 }
4157# endif
4158# endif /* !RT_ARCH_AMD64 */
4159 return u64;
4160}
4161#endif
4162
4163
4164/**
4165 * Atomically reads a signed 64-bit value, ordered.
4166 *
4167 * @returns Current *pi64 value
4168 * @param pi64 Pointer to the 64-bit variable to read.
4169 * The memory pointed to must be writable.
4170 * @remark This will fault if the memory is read-only!
4171 */
4172DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4173{
4174 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4175}
4176
4177
4178/**
4179 * Atomically reads a signed 64-bit value, unordered.
4180 *
4181 * @returns Current *pi64 value
4182 * @param pi64 Pointer to the 64-bit variable to read.
4183 * The memory pointed to must be writable.
4184 * @remark This will fault if the memory is read-only!
4185 */
4186DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4187{
4188 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4189}
4190
4191
4192/**
4193 * Atomically reads a pointer value, ordered.
4194 *
4195 * @returns Current *pv value
4196 * @param ppv Pointer to the pointer variable to read.
4197 */
4198DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4199{
4200#if ARCH_BITS == 32
4201 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4202#elif ARCH_BITS == 64
4203 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4204#else
4205# error "ARCH_BITS is bogus"
4206#endif
4207}
4208
4209
4210/**
4211 * Atomically reads a pointer value, unordered.
4212 *
4213 * @returns Current *pv value
4214 * @param ppv Pointer to the pointer variable to read.
4215 */
4216DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4217{
4218#if ARCH_BITS == 32
4219 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4220#elif ARCH_BITS == 64
4221 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4222#else
4223# error "ARCH_BITS is bogus"
4224#endif
4225}
4226
4227
4228/**
4229 * Atomically reads a boolean value, ordered.
4230 *
4231 * @returns Current *pf value
4232 * @param pf Pointer to the boolean variable to read.
4233 */
4234DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4235{
4236 ASMMemoryFence();
4237 return *pf; /* byte reads are atomic on x86 */
4238}
4239
4240
4241/**
4242 * Atomically reads a boolean value, unordered.
4243 *
4244 * @returns Current *pf value
4245 * @param pf Pointer to the boolean variable to read.
4246 */
4247DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4248{
4249 return *pf; /* byte reads are atomic on x86 */
4250}
4251
4252
4253/**
4254 * Atomically read a typical IPRT handle value, ordered.
4255 *
4256 * @param ph Pointer to the handle variable to read.
4257 * @param phRes Where to store the result.
4258 *
4259 * @remarks This doesn't currently work for all handles (like RTFILE).
4260 */
4261#define ASMAtomicReadHandle(ph, phRes) \
4262 do { \
4263 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
4264 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4265 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4266 } while (0)
4267
4268
4269/**
4270 * Atomically read a typical IPRT handle value, unordered.
4271 *
4272 * @param ph Pointer to the handle variable to read.
4273 * @param phRes Where to store the result.
4274 *
4275 * @remarks This doesn't currently work for all handles (like RTFILE).
4276 */
4277#define ASMAtomicUoReadHandle(ph, phRes) \
4278 do { \
4279 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
4280 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4281 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4282 } while (0)
4283
4284
4285/**
4286 * Atomically read a value which size might differ
4287 * between platforms or compilers, ordered.
4288 *
4289 * @param pu Pointer to the variable to update.
4290 * @param puRes Where to store the result.
4291 */
4292#define ASMAtomicReadSize(pu, puRes) \
4293 do { \
4294 switch (sizeof(*(pu))) { \
4295 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4296 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4297 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4298 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4299 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4300 } \
4301 } while (0)
4302
4303
4304/**
4305 * Atomically read a value which size might differ
4306 * between platforms or compilers, unordered.
4307 *
4308 * @param pu Pointer to the variable to update.
4309 * @param puRes Where to store the result.
4310 */
4311#define ASMAtomicUoReadSize(pu, puRes) \
4312 do { \
4313 switch (sizeof(*(pu))) { \
4314 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4315 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4316 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4317 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4318 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4319 } \
4320 } while (0)
4321
4322
4323/**
4324 * Atomically writes an unsigned 8-bit value, ordered.
4325 *
4326 * @param pu8 Pointer to the 8-bit variable.
4327 * @param u8 The 8-bit value to assign to *pu8.
4328 */
4329DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4330{
4331 ASMAtomicXchgU8(pu8, u8);
4332}
4333
4334
4335/**
4336 * Atomically writes an unsigned 8-bit value, unordered.
4337 *
4338 * @param pu8 Pointer to the 8-bit variable.
4339 * @param u8 The 8-bit value to assign to *pu8.
4340 */
4341DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4342{
4343 *pu8 = u8; /* byte writes are atomic on x86 */
4344}
4345
4346
4347/**
4348 * Atomically writes a signed 8-bit value, ordered.
4349 *
4350 * @param pi8 Pointer to the 8-bit variable to read.
4351 * @param i8 The 8-bit value to assign to *pi8.
4352 */
4353DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4354{
4355 ASMAtomicXchgS8(pi8, i8);
4356}
4357
4358
4359/**
4360 * Atomically writes a signed 8-bit value, unordered.
4361 *
4362 * @param pi8 Pointer to the 8-bit variable to read.
4363 * @param i8 The 8-bit value to assign to *pi8.
4364 */
4365DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4366{
4367 *pi8 = i8; /* byte writes are atomic on x86 */
4368}
4369
4370
4371/**
4372 * Atomically writes an unsigned 16-bit value, ordered.
4373 *
4374 * @param pu16 Pointer to the 16-bit variable.
4375 * @param u16 The 16-bit value to assign to *pu16.
4376 */
4377DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4378{
4379 ASMAtomicXchgU16(pu16, u16);
4380}
4381
4382
4383/**
4384 * Atomically writes an unsigned 16-bit value, unordered.
4385 *
4386 * @param pu16 Pointer to the 16-bit variable.
4387 * @param u16 The 16-bit value to assign to *pu16.
4388 */
4389DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4390{
4391 Assert(!((uintptr_t)pu16 & 1));
4392 *pu16 = u16;
4393}
4394
4395
4396/**
4397 * Atomically writes a signed 16-bit value, ordered.
4398 *
4399 * @param pi16 Pointer to the 16-bit variable to read.
4400 * @param i16 The 16-bit value to assign to *pi16.
4401 */
4402DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4403{
4404 ASMAtomicXchgS16(pi16, i16);
4405}
4406
4407
4408/**
4409 * Atomically writes a signed 16-bit value, unordered.
4410 *
4411 * @param pi16 Pointer to the 16-bit variable to read.
4412 * @param i16 The 16-bit value to assign to *pi16.
4413 */
4414DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4415{
4416 Assert(!((uintptr_t)pi16 & 1));
4417 *pi16 = i16;
4418}
4419
4420
4421/**
4422 * Atomically writes an unsigned 32-bit value, ordered.
4423 *
4424 * @param pu32 Pointer to the 32-bit variable.
4425 * @param u32 The 32-bit value to assign to *pu32.
4426 */
4427DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4428{
4429 ASMAtomicXchgU32(pu32, u32);
4430}
4431
4432
4433/**
4434 * Atomically writes an unsigned 32-bit value, unordered.
4435 *
4436 * @param pu32 Pointer to the 32-bit variable.
4437 * @param u32 The 32-bit value to assign to *pu32.
4438 */
4439DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4440{
4441 Assert(!((uintptr_t)pu32 & 3));
4442 *pu32 = u32;
4443}
4444
4445
4446/**
4447 * Atomically writes a signed 32-bit value, ordered.
4448 *
4449 * @param pi32 Pointer to the 32-bit variable to read.
4450 * @param i32 The 32-bit value to assign to *pi32.
4451 */
4452DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4453{
4454 ASMAtomicXchgS32(pi32, i32);
4455}
4456
4457
4458/**
4459 * Atomically writes a signed 32-bit value, unordered.
4460 *
4461 * @param pi32 Pointer to the 32-bit variable to read.
4462 * @param i32 The 32-bit value to assign to *pi32.
4463 */
4464DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4465{
4466 Assert(!((uintptr_t)pi32 & 3));
4467 *pi32 = i32;
4468}
4469
4470
4471/**
4472 * Atomically writes an unsigned 64-bit value, ordered.
4473 *
4474 * @param pu64 Pointer to the 64-bit variable.
4475 * @param u64 The 64-bit value to assign to *pu64.
4476 */
4477DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4478{
4479 ASMAtomicXchgU64(pu64, u64);
4480}
4481
4482
4483/**
4484 * Atomically writes an unsigned 64-bit value, unordered.
4485 *
4486 * @param pu64 Pointer to the 64-bit variable.
4487 * @param u64 The 64-bit value to assign to *pu64.
4488 */
4489DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4490{
4491 Assert(!((uintptr_t)pu64 & 7));
4492#if ARCH_BITS == 64
4493 *pu64 = u64;
4494#else
4495 ASMAtomicXchgU64(pu64, u64);
4496#endif
4497}
4498
4499
4500/**
4501 * Atomically writes a signed 64-bit value, ordered.
4502 *
4503 * @param pi64 Pointer to the 64-bit variable.
4504 * @param i64 The 64-bit value to assign to *pi64.
4505 */
4506DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4507{
4508 ASMAtomicXchgS64(pi64, i64);
4509}
4510
4511
4512/**
4513 * Atomically writes a signed 64-bit value, unordered.
4514 *
4515 * @param pi64 Pointer to the 64-bit variable.
4516 * @param i64 The 64-bit value to assign to *pi64.
4517 */
4518DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4519{
4520 Assert(!((uintptr_t)pi64 & 7));
4521#if ARCH_BITS == 64
4522 *pi64 = i64;
4523#else
4524 ASMAtomicXchgS64(pi64, i64);
4525#endif
4526}
4527
4528
4529/**
4530 * Atomically writes a boolean value, unordered.
4531 *
4532 * @param pf Pointer to the boolean variable.
4533 * @param f The boolean value to assign to *pf.
4534 */
4535DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4536{
4537 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4538}
4539
4540
4541/**
4542 * Atomically writes a boolean value, unordered.
4543 *
4544 * @param pf Pointer to the boolean variable.
4545 * @param f The boolean value to assign to *pf.
4546 */
4547DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4548{
4549 *pf = f; /* byte writes are atomic on x86 */
4550}
4551
4552
4553/**
4554 * Atomically writes a pointer value, ordered.
4555 *
4556 * @returns Current *pv value
4557 * @param ppv Pointer to the pointer variable.
4558 * @param pv The pointer value to assigne to *ppv.
4559 */
4560DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4561{
4562#if ARCH_BITS == 32
4563 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4564#elif ARCH_BITS == 64
4565 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4566#else
4567# error "ARCH_BITS is bogus"
4568#endif
4569}
4570
4571
4572/**
4573 * Atomically writes a pointer value, unordered.
4574 *
4575 * @returns Current *pv value
4576 * @param ppv Pointer to the pointer variable.
4577 * @param pv The pointer value to assigne to *ppv.
4578 */
4579DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4580{
4581#if ARCH_BITS == 32
4582 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4583#elif ARCH_BITS == 64
4584 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4585#else
4586# error "ARCH_BITS is bogus"
4587#endif
4588}
4589
4590
4591/**
4592 * Atomically write a typical IPRT handle value, ordered.
4593 *
4594 * @param ph Pointer to the variable to update.
4595 * @param hNew The value to assign to *ph.
4596 *
4597 * @remarks This doesn't currently work for all handles (like RTFILE).
4598 */
4599#define ASMAtomicWriteHandle(ph, hNew) \
4600 do { \
4601 ASMAtomicWritePtr((void * volatile *)(ph), (const void *)hNew); \
4602 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4603 } while (0)
4604
4605
4606/**
4607 * Atomically write a typical IPRT handle value, unordered.
4608 *
4609 * @param ph Pointer to the variable to update.
4610 * @param hNew The value to assign to *ph.
4611 *
4612 * @remarks This doesn't currently work for all handles (like RTFILE).
4613 */
4614#define ASMAtomicUoWriteHandle(ph, hNew) \
4615 do { \
4616 ASMAtomicUoWritePtr((void * volatile *)(ph), (const void *)hNew); \
4617 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4618 } while (0)
4619
4620
4621/**
4622 * Atomically write a value which size might differ
4623 * between platforms or compilers, ordered.
4624 *
4625 * @param pu Pointer to the variable to update.
4626 * @param uNew The value to assign to *pu.
4627 */
4628#define ASMAtomicWriteSize(pu, uNew) \
4629 do { \
4630 switch (sizeof(*(pu))) { \
4631 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4632 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4633 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4634 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4635 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4636 } \
4637 } while (0)
4638
4639/**
4640 * Atomically write a value which size might differ
4641 * between platforms or compilers, unordered.
4642 *
4643 * @param pu Pointer to the variable to update.
4644 * @param uNew The value to assign to *pu.
4645 */
4646#define ASMAtomicUoWriteSize(pu, uNew) \
4647 do { \
4648 switch (sizeof(*(pu))) { \
4649 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4650 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4651 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4652 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4653 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4654 } \
4655 } while (0)
4656
4657
4658
4659
4660/**
4661 * Invalidate page.
4662 *
4663 * @param pv Address of the page to invalidate.
4664 */
4665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4666DECLASM(void) ASMInvalidatePage(void *pv);
4667#else
4668DECLINLINE(void) ASMInvalidatePage(void *pv)
4669{
4670# if RT_INLINE_ASM_USES_INTRIN
4671 __invlpg(pv);
4672
4673# elif RT_INLINE_ASM_GNU_STYLE
4674 __asm__ __volatile__("invlpg %0\n\t"
4675 : : "m" (*(uint8_t *)pv));
4676# else
4677 __asm
4678 {
4679# ifdef RT_ARCH_AMD64
4680 mov rax, [pv]
4681 invlpg [rax]
4682# else
4683 mov eax, [pv]
4684 invlpg [eax]
4685# endif
4686 }
4687# endif
4688}
4689#endif
4690
4691
4692/**
4693 * Write back the internal caches and invalidate them.
4694 */
4695#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4696DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4697#else
4698DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4699{
4700# if RT_INLINE_ASM_USES_INTRIN
4701 __wbinvd();
4702
4703# elif RT_INLINE_ASM_GNU_STYLE
4704 __asm__ __volatile__("wbinvd");
4705# else
4706 __asm
4707 {
4708 wbinvd
4709 }
4710# endif
4711}
4712#endif
4713
4714
4715/**
4716 * Invalidate internal and (perhaps) external caches without first
4717 * flushing dirty cache lines. Use with extreme care.
4718 */
4719#if RT_INLINE_ASM_EXTERNAL
4720DECLASM(void) ASMInvalidateInternalCaches(void);
4721#else
4722DECLINLINE(void) ASMInvalidateInternalCaches(void)
4723{
4724# if RT_INLINE_ASM_GNU_STYLE
4725 __asm__ __volatile__("invd");
4726# else
4727 __asm
4728 {
4729 invd
4730 }
4731# endif
4732}
4733#endif
4734
4735
4736#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4737# if PAGE_SIZE != 0x1000
4738# error "PAGE_SIZE is not 0x1000!"
4739# endif
4740#endif
4741
4742/**
4743 * Zeros a 4K memory page.
4744 *
4745 * @param pv Pointer to the memory block. This must be page aligned.
4746 */
4747#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4748DECLASM(void) ASMMemZeroPage(volatile void *pv);
4749# else
4750DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4751{
4752# if RT_INLINE_ASM_USES_INTRIN
4753# ifdef RT_ARCH_AMD64
4754 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4755# else
4756 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4757# endif
4758
4759# elif RT_INLINE_ASM_GNU_STYLE
4760 RTCCUINTREG uDummy;
4761# ifdef RT_ARCH_AMD64
4762 __asm__ __volatile__("rep stosq"
4763 : "=D" (pv),
4764 "=c" (uDummy)
4765 : "0" (pv),
4766 "c" (0x1000 >> 3),
4767 "a" (0)
4768 : "memory");
4769# else
4770 __asm__ __volatile__("rep stosl"
4771 : "=D" (pv),
4772 "=c" (uDummy)
4773 : "0" (pv),
4774 "c" (0x1000 >> 2),
4775 "a" (0)
4776 : "memory");
4777# endif
4778# else
4779 __asm
4780 {
4781# ifdef RT_ARCH_AMD64
4782 xor rax, rax
4783 mov ecx, 0200h
4784 mov rdi, [pv]
4785 rep stosq
4786# else
4787 xor eax, eax
4788 mov ecx, 0400h
4789 mov edi, [pv]
4790 rep stosd
4791# endif
4792 }
4793# endif
4794}
4795# endif
4796
4797
4798/**
4799 * Zeros a memory block with a 32-bit aligned size.
4800 *
4801 * @param pv Pointer to the memory block.
4802 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4803 */
4804#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4805DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4806#else
4807DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4808{
4809# if RT_INLINE_ASM_USES_INTRIN
4810# ifdef RT_ARCH_AMD64
4811 if (!(cb & 7))
4812 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4813 else
4814# endif
4815 __stosd((unsigned long *)pv, 0, cb / 4);
4816
4817# elif RT_INLINE_ASM_GNU_STYLE
4818 __asm__ __volatile__("rep stosl"
4819 : "=D" (pv),
4820 "=c" (cb)
4821 : "0" (pv),
4822 "1" (cb >> 2),
4823 "a" (0)
4824 : "memory");
4825# else
4826 __asm
4827 {
4828 xor eax, eax
4829# ifdef RT_ARCH_AMD64
4830 mov rcx, [cb]
4831 shr rcx, 2
4832 mov rdi, [pv]
4833# else
4834 mov ecx, [cb]
4835 shr ecx, 2
4836 mov edi, [pv]
4837# endif
4838 rep stosd
4839 }
4840# endif
4841}
4842#endif
4843
4844
4845/**
4846 * Fills a memory block with a 32-bit aligned size.
4847 *
4848 * @param pv Pointer to the memory block.
4849 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4850 * @param u32 The value to fill with.
4851 */
4852#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4853DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4854#else
4855DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4856{
4857# if RT_INLINE_ASM_USES_INTRIN
4858# ifdef RT_ARCH_AMD64
4859 if (!(cb & 7))
4860 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4861 else
4862# endif
4863 __stosd((unsigned long *)pv, u32, cb / 4);
4864
4865# elif RT_INLINE_ASM_GNU_STYLE
4866 __asm__ __volatile__("rep stosl"
4867 : "=D" (pv),
4868 "=c" (cb)
4869 : "0" (pv),
4870 "1" (cb >> 2),
4871 "a" (u32)
4872 : "memory");
4873# else
4874 __asm
4875 {
4876# ifdef RT_ARCH_AMD64
4877 mov rcx, [cb]
4878 shr rcx, 2
4879 mov rdi, [pv]
4880# else
4881 mov ecx, [cb]
4882 shr ecx, 2
4883 mov edi, [pv]
4884# endif
4885 mov eax, [u32]
4886 rep stosd
4887 }
4888# endif
4889}
4890#endif
4891
4892
4893/**
4894 * Checks if a memory block is filled with the specified byte.
4895 *
4896 * This is a sort of inverted memchr.
4897 *
4898 * @returns Pointer to the byte which doesn't equal u8.
4899 * @returns NULL if all equal to u8.
4900 *
4901 * @param pv Pointer to the memory block.
4902 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4903 * @param u8 The value it's supposed to be filled with.
4904 */
4905#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4906DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4907#else
4908DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4909{
4910/** @todo rewrite this in inline assembly? */
4911 uint8_t const *pb = (uint8_t const *)pv;
4912 for (; cb; cb--, pb++)
4913 if (RT_UNLIKELY(*pb != u8))
4914 return (void *)pb;
4915 return NULL;
4916}
4917#endif
4918
4919
4920/**
4921 * Checks if a memory block is filled with the specified 32-bit value.
4922 *
4923 * This is a sort of inverted memchr.
4924 *
4925 * @returns Pointer to the first value which doesn't equal u32.
4926 * @returns NULL if all equal to u32.
4927 *
4928 * @param pv Pointer to the memory block.
4929 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4930 * @param u32 The value it's supposed to be filled with.
4931 */
4932#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4933DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4934#else
4935DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4936{
4937/** @todo rewrite this in inline assembly? */
4938 uint32_t const *pu32 = (uint32_t const *)pv;
4939 for (; cb; cb -= 4, pu32++)
4940 if (RT_UNLIKELY(*pu32 != u32))
4941 return (uint32_t *)pu32;
4942 return NULL;
4943}
4944#endif
4945
4946
4947/**
4948 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4949 *
4950 * @returns u32F1 * u32F2.
4951 */
4952#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4953DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4954#else
4955DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4956{
4957# ifdef RT_ARCH_AMD64
4958 return (uint64_t)u32F1 * u32F2;
4959# else /* !RT_ARCH_AMD64 */
4960 uint64_t u64;
4961# if RT_INLINE_ASM_GNU_STYLE
4962 __asm__ __volatile__("mull %%edx"
4963 : "=A" (u64)
4964 : "a" (u32F2), "d" (u32F1));
4965# else
4966 __asm
4967 {
4968 mov edx, [u32F1]
4969 mov eax, [u32F2]
4970 mul edx
4971 mov dword ptr [u64], eax
4972 mov dword ptr [u64 + 4], edx
4973 }
4974# endif
4975 return u64;
4976# endif /* !RT_ARCH_AMD64 */
4977}
4978#endif
4979
4980
4981/**
4982 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4983 *
4984 * @returns u32F1 * u32F2.
4985 */
4986#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4987DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4988#else
4989DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4990{
4991# ifdef RT_ARCH_AMD64
4992 return (int64_t)i32F1 * i32F2;
4993# else /* !RT_ARCH_AMD64 */
4994 int64_t i64;
4995# if RT_INLINE_ASM_GNU_STYLE
4996 __asm__ __volatile__("imull %%edx"
4997 : "=A" (i64)
4998 : "a" (i32F2), "d" (i32F1));
4999# else
5000 __asm
5001 {
5002 mov edx, [i32F1]
5003 mov eax, [i32F2]
5004 imul edx
5005 mov dword ptr [i64], eax
5006 mov dword ptr [i64 + 4], edx
5007 }
5008# endif
5009 return i64;
5010# endif /* !RT_ARCH_AMD64 */
5011}
5012#endif
5013
5014
5015/**
5016 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5017 *
5018 * @returns u64 / u32.
5019 */
5020#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5021DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5022#else
5023DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5024{
5025# ifdef RT_ARCH_AMD64
5026 return (uint32_t)(u64 / u32);
5027# else /* !RT_ARCH_AMD64 */
5028# if RT_INLINE_ASM_GNU_STYLE
5029 RTCCUINTREG uDummy;
5030 __asm__ __volatile__("divl %3"
5031 : "=a" (u32), "=d"(uDummy)
5032 : "A" (u64), "r" (u32));
5033# else
5034 __asm
5035 {
5036 mov eax, dword ptr [u64]
5037 mov edx, dword ptr [u64 + 4]
5038 mov ecx, [u32]
5039 div ecx
5040 mov [u32], eax
5041 }
5042# endif
5043 return u32;
5044# endif /* !RT_ARCH_AMD64 */
5045}
5046#endif
5047
5048
5049/**
5050 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5051 *
5052 * @returns u64 / u32.
5053 */
5054#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5055DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5056#else
5057DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5058{
5059# ifdef RT_ARCH_AMD64
5060 return (int32_t)(i64 / i32);
5061# else /* !RT_ARCH_AMD64 */
5062# if RT_INLINE_ASM_GNU_STYLE
5063 RTCCUINTREG iDummy;
5064 __asm__ __volatile__("idivl %3"
5065 : "=a" (i32), "=d"(iDummy)
5066 : "A" (i64), "r" (i32));
5067# else
5068 __asm
5069 {
5070 mov eax, dword ptr [i64]
5071 mov edx, dword ptr [i64 + 4]
5072 mov ecx, [i32]
5073 idiv ecx
5074 mov [i32], eax
5075 }
5076# endif
5077 return i32;
5078# endif /* !RT_ARCH_AMD64 */
5079}
5080#endif
5081
5082
5083/**
5084 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5085 * returning the rest.
5086 *
5087 * @returns u64 % u32.
5088 *
5089 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5090 */
5091#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5092DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5093#else
5094DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5095{
5096# ifdef RT_ARCH_AMD64
5097 return (uint32_t)(u64 % u32);
5098# else /* !RT_ARCH_AMD64 */
5099# if RT_INLINE_ASM_GNU_STYLE
5100 RTCCUINTREG uDummy;
5101 __asm__ __volatile__("divl %3"
5102 : "=a" (uDummy), "=d"(u32)
5103 : "A" (u64), "r" (u32));
5104# else
5105 __asm
5106 {
5107 mov eax, dword ptr [u64]
5108 mov edx, dword ptr [u64 + 4]
5109 mov ecx, [u32]
5110 div ecx
5111 mov [u32], edx
5112 }
5113# endif
5114 return u32;
5115# endif /* !RT_ARCH_AMD64 */
5116}
5117#endif
5118
5119
5120/**
5121 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5122 * returning the rest.
5123 *
5124 * @returns u64 % u32.
5125 *
5126 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5127 */
5128#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5129DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5130#else
5131DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5132{
5133# ifdef RT_ARCH_AMD64
5134 return (int32_t)(i64 % i32);
5135# else /* !RT_ARCH_AMD64 */
5136# if RT_INLINE_ASM_GNU_STYLE
5137 RTCCUINTREG iDummy;
5138 __asm__ __volatile__("idivl %3"
5139 : "=a" (iDummy), "=d"(i32)
5140 : "A" (i64), "r" (i32));
5141# else
5142 __asm
5143 {
5144 mov eax, dword ptr [i64]
5145 mov edx, dword ptr [i64 + 4]
5146 mov ecx, [i32]
5147 idiv ecx
5148 mov [i32], edx
5149 }
5150# endif
5151 return i32;
5152# endif /* !RT_ARCH_AMD64 */
5153}
5154#endif
5155
5156
5157/**
5158 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5159 * using a 96 bit intermediate result.
5160 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5161 * __udivdi3 and __umoddi3 even if this inline function is not used.
5162 *
5163 * @returns (u64A * u32B) / u32C.
5164 * @param u64A The 64-bit value.
5165 * @param u32B The 32-bit value to multiple by A.
5166 * @param u32C The 32-bit value to divide A*B by.
5167 */
5168#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5169DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5170#else
5171DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5172{
5173# if RT_INLINE_ASM_GNU_STYLE
5174# ifdef RT_ARCH_AMD64
5175 uint64_t u64Result, u64Spill;
5176 __asm__ __volatile__("mulq %2\n\t"
5177 "divq %3\n\t"
5178 : "=a" (u64Result),
5179 "=d" (u64Spill)
5180 : "r" ((uint64_t)u32B),
5181 "r" ((uint64_t)u32C),
5182 "0" (u64A),
5183 "1" (0));
5184 return u64Result;
5185# else
5186 uint32_t u32Dummy;
5187 uint64_t u64Result;
5188 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5189 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5190 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5191 eax = u64A.hi */
5192 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5193 edx = u32C */
5194 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5195 edx = u32B */
5196 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5197 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5198 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5199 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5200 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5201 edx = u64Hi % u32C */
5202 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5203 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5204 "divl %%ecx \n\t" /* u64Result.lo */
5205 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5206 : "=A"(u64Result), "=c"(u32Dummy),
5207 "=S"(u32Dummy), "=D"(u32Dummy)
5208 : "a"((uint32_t)u64A),
5209 "S"((uint32_t)(u64A >> 32)),
5210 "c"(u32B),
5211 "D"(u32C));
5212 return u64Result;
5213# endif
5214# else
5215 RTUINT64U u;
5216 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5217 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5218 u64Hi += (u64Lo >> 32);
5219 u.s.Hi = (uint32_t)(u64Hi / u32C);
5220 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5221 return u.u;
5222# endif
5223}
5224#endif
5225
5226
5227/**
5228 * Probes a byte pointer for read access.
5229 *
5230 * While the function will not fault if the byte is not read accessible,
5231 * the idea is to do this in a safe place like before acquiring locks
5232 * and such like.
5233 *
5234 * Also, this functions guarantees that an eager compiler is not going
5235 * to optimize the probing away.
5236 *
5237 * @param pvByte Pointer to the byte.
5238 */
5239#if RT_INLINE_ASM_EXTERNAL
5240DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5241#else
5242DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5243{
5244 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5245 uint8_t u8;
5246# if RT_INLINE_ASM_GNU_STYLE
5247 __asm__ __volatile__("movb (%1), %0\n\t"
5248 : "=r" (u8)
5249 : "r" (pvByte));
5250# else
5251 __asm
5252 {
5253# ifdef RT_ARCH_AMD64
5254 mov rax, [pvByte]
5255 mov al, [rax]
5256# else
5257 mov eax, [pvByte]
5258 mov al, [eax]
5259# endif
5260 mov [u8], al
5261 }
5262# endif
5263 return u8;
5264}
5265#endif
5266
5267/**
5268 * Probes a buffer for read access page by page.
5269 *
5270 * While the function will fault if the buffer is not fully read
5271 * accessible, the idea is to do this in a safe place like before
5272 * acquiring locks and such like.
5273 *
5274 * Also, this functions guarantees that an eager compiler is not going
5275 * to optimize the probing away.
5276 *
5277 * @param pvBuf Pointer to the buffer.
5278 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5279 */
5280DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5281{
5282 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5283 /* the first byte */
5284 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5285 ASMProbeReadByte(pu8);
5286
5287 /* the pages in between pages. */
5288 while (cbBuf > /*PAGE_SIZE*/0x1000)
5289 {
5290 ASMProbeReadByte(pu8);
5291 cbBuf -= /*PAGE_SIZE*/0x1000;
5292 pu8 += /*PAGE_SIZE*/0x1000;
5293 }
5294
5295 /* the last byte */
5296 ASMProbeReadByte(pu8 + cbBuf - 1);
5297}
5298
5299
5300/** @def ASMBreakpoint
5301 * Debugger Breakpoint.
5302 * @remark In the gnu world we add a nop instruction after the int3 to
5303 * force gdb to remain at the int3 source line.
5304 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5305 * @internal
5306 */
5307#if RT_INLINE_ASM_GNU_STYLE
5308# ifndef __L4ENV__
5309# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5310# else
5311# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5312# endif
5313#else
5314# define ASMBreakpoint() __debugbreak()
5315#endif
5316
5317
5318
5319/** @defgroup grp_inline_bits Bit Operations
5320 * @{
5321 */
5322
5323
5324/**
5325 * Sets a bit in a bitmap.
5326 *
5327 * @param pvBitmap Pointer to the bitmap.
5328 * @param iBit The bit to set.
5329 */
5330#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5331DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5332#else
5333DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5334{
5335# if RT_INLINE_ASM_USES_INTRIN
5336 _bittestandset((long *)pvBitmap, iBit);
5337
5338# elif RT_INLINE_ASM_GNU_STYLE
5339 __asm__ __volatile__("btsl %1, %0"
5340 : "=m" (*(volatile long *)pvBitmap)
5341 : "Ir" (iBit),
5342 "m" (*(volatile long *)pvBitmap)
5343 : "memory");
5344# else
5345 __asm
5346 {
5347# ifdef RT_ARCH_AMD64
5348 mov rax, [pvBitmap]
5349 mov edx, [iBit]
5350 bts [rax], edx
5351# else
5352 mov eax, [pvBitmap]
5353 mov edx, [iBit]
5354 bts [eax], edx
5355# endif
5356 }
5357# endif
5358}
5359#endif
5360
5361
5362/**
5363 * Atomically sets a bit in a bitmap, ordered.
5364 *
5365 * @param pvBitmap Pointer to the bitmap.
5366 * @param iBit The bit to set.
5367 */
5368#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5369DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5370#else
5371DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5372{
5373# if RT_INLINE_ASM_USES_INTRIN
5374 _interlockedbittestandset((long *)pvBitmap, iBit);
5375# elif RT_INLINE_ASM_GNU_STYLE
5376 __asm__ __volatile__("lock; btsl %1, %0"
5377 : "=m" (*(volatile long *)pvBitmap)
5378 : "Ir" (iBit),
5379 "m" (*(volatile long *)pvBitmap)
5380 : "memory");
5381# else
5382 __asm
5383 {
5384# ifdef RT_ARCH_AMD64
5385 mov rax, [pvBitmap]
5386 mov edx, [iBit]
5387 lock bts [rax], edx
5388# else
5389 mov eax, [pvBitmap]
5390 mov edx, [iBit]
5391 lock bts [eax], edx
5392# endif
5393 }
5394# endif
5395}
5396#endif
5397
5398
5399/**
5400 * Clears a bit in a bitmap.
5401 *
5402 * @param pvBitmap Pointer to the bitmap.
5403 * @param iBit The bit to clear.
5404 */
5405#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5406DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5407#else
5408DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5409{
5410# if RT_INLINE_ASM_USES_INTRIN
5411 _bittestandreset((long *)pvBitmap, iBit);
5412
5413# elif RT_INLINE_ASM_GNU_STYLE
5414 __asm__ __volatile__("btrl %1, %0"
5415 : "=m" (*(volatile long *)pvBitmap)
5416 : "Ir" (iBit),
5417 "m" (*(volatile long *)pvBitmap)
5418 : "memory");
5419# else
5420 __asm
5421 {
5422# ifdef RT_ARCH_AMD64
5423 mov rax, [pvBitmap]
5424 mov edx, [iBit]
5425 btr [rax], edx
5426# else
5427 mov eax, [pvBitmap]
5428 mov edx, [iBit]
5429 btr [eax], edx
5430# endif
5431 }
5432# endif
5433}
5434#endif
5435
5436
5437/**
5438 * Atomically clears a bit in a bitmap, ordered.
5439 *
5440 * @param pvBitmap Pointer to the bitmap.
5441 * @param iBit The bit to toggle set.
5442 * @remark No memory barrier, take care on smp.
5443 */
5444#if RT_INLINE_ASM_EXTERNAL
5445DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5446#else
5447DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5448{
5449# if RT_INLINE_ASM_GNU_STYLE
5450 __asm__ __volatile__("lock; btrl %1, %0"
5451 : "=m" (*(volatile long *)pvBitmap)
5452 : "Ir" (iBit),
5453 "m" (*(volatile long *)pvBitmap)
5454 : "memory");
5455# else
5456 __asm
5457 {
5458# ifdef RT_ARCH_AMD64
5459 mov rax, [pvBitmap]
5460 mov edx, [iBit]
5461 lock btr [rax], edx
5462# else
5463 mov eax, [pvBitmap]
5464 mov edx, [iBit]
5465 lock btr [eax], edx
5466# endif
5467 }
5468# endif
5469}
5470#endif
5471
5472
5473/**
5474 * Toggles a bit in a bitmap.
5475 *
5476 * @param pvBitmap Pointer to the bitmap.
5477 * @param iBit The bit to toggle.
5478 */
5479#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5480DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5481#else
5482DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5483{
5484# if RT_INLINE_ASM_USES_INTRIN
5485 _bittestandcomplement((long *)pvBitmap, iBit);
5486# elif RT_INLINE_ASM_GNU_STYLE
5487 __asm__ __volatile__("btcl %1, %0"
5488 : "=m" (*(volatile long *)pvBitmap)
5489 : "Ir" (iBit),
5490 "m" (*(volatile long *)pvBitmap)
5491 : "memory");
5492# else
5493 __asm
5494 {
5495# ifdef RT_ARCH_AMD64
5496 mov rax, [pvBitmap]
5497 mov edx, [iBit]
5498 btc [rax], edx
5499# else
5500 mov eax, [pvBitmap]
5501 mov edx, [iBit]
5502 btc [eax], edx
5503# endif
5504 }
5505# endif
5506}
5507#endif
5508
5509
5510/**
5511 * Atomically toggles a bit in a bitmap, ordered.
5512 *
5513 * @param pvBitmap Pointer to the bitmap.
5514 * @param iBit The bit to test and set.
5515 */
5516#if RT_INLINE_ASM_EXTERNAL
5517DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5518#else
5519DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5520{
5521# if RT_INLINE_ASM_GNU_STYLE
5522 __asm__ __volatile__("lock; btcl %1, %0"
5523 : "=m" (*(volatile long *)pvBitmap)
5524 : "Ir" (iBit),
5525 "m" (*(volatile long *)pvBitmap)
5526 : "memory");
5527# else
5528 __asm
5529 {
5530# ifdef RT_ARCH_AMD64
5531 mov rax, [pvBitmap]
5532 mov edx, [iBit]
5533 lock btc [rax], edx
5534# else
5535 mov eax, [pvBitmap]
5536 mov edx, [iBit]
5537 lock btc [eax], edx
5538# endif
5539 }
5540# endif
5541}
5542#endif
5543
5544
5545/**
5546 * Tests and sets a bit in a bitmap.
5547 *
5548 * @returns true if the bit was set.
5549 * @returns false if the bit was clear.
5550 * @param pvBitmap Pointer to the bitmap.
5551 * @param iBit The bit to test and set.
5552 */
5553#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5554DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5555#else
5556DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5557{
5558 union { bool f; uint32_t u32; uint8_t u8; } rc;
5559# if RT_INLINE_ASM_USES_INTRIN
5560 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5561
5562# elif RT_INLINE_ASM_GNU_STYLE
5563 __asm__ __volatile__("btsl %2, %1\n\t"
5564 "setc %b0\n\t"
5565 "andl $1, %0\n\t"
5566 : "=q" (rc.u32),
5567 "=m" (*(volatile long *)pvBitmap)
5568 : "Ir" (iBit),
5569 "m" (*(volatile long *)pvBitmap)
5570 : "memory");
5571# else
5572 __asm
5573 {
5574 mov edx, [iBit]
5575# ifdef RT_ARCH_AMD64
5576 mov rax, [pvBitmap]
5577 bts [rax], edx
5578# else
5579 mov eax, [pvBitmap]
5580 bts [eax], edx
5581# endif
5582 setc al
5583 and eax, 1
5584 mov [rc.u32], eax
5585 }
5586# endif
5587 return rc.f;
5588}
5589#endif
5590
5591
5592/**
5593 * Atomically tests and sets a bit in a bitmap, ordered.
5594 *
5595 * @returns true if the bit was set.
5596 * @returns false if the bit was clear.
5597 * @param pvBitmap Pointer to the bitmap.
5598 * @param iBit The bit to set.
5599 */
5600#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5601DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5602#else
5603DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5604{
5605 union { bool f; uint32_t u32; uint8_t u8; } rc;
5606# if RT_INLINE_ASM_USES_INTRIN
5607 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5608# elif RT_INLINE_ASM_GNU_STYLE
5609 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5610 "setc %b0\n\t"
5611 "andl $1, %0\n\t"
5612 : "=q" (rc.u32),
5613 "=m" (*(volatile long *)pvBitmap)
5614 : "Ir" (iBit),
5615 "m" (*(volatile long *)pvBitmap)
5616 : "memory");
5617# else
5618 __asm
5619 {
5620 mov edx, [iBit]
5621# ifdef RT_ARCH_AMD64
5622 mov rax, [pvBitmap]
5623 lock bts [rax], edx
5624# else
5625 mov eax, [pvBitmap]
5626 lock bts [eax], edx
5627# endif
5628 setc al
5629 and eax, 1
5630 mov [rc.u32], eax
5631 }
5632# endif
5633 return rc.f;
5634}
5635#endif
5636
5637
5638/**
5639 * Tests and clears a bit in a bitmap.
5640 *
5641 * @returns true if the bit was set.
5642 * @returns false if the bit was clear.
5643 * @param pvBitmap Pointer to the bitmap.
5644 * @param iBit The bit to test and clear.
5645 */
5646#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5647DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5648#else
5649DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5650{
5651 union { bool f; uint32_t u32; uint8_t u8; } rc;
5652# if RT_INLINE_ASM_USES_INTRIN
5653 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5654
5655# elif RT_INLINE_ASM_GNU_STYLE
5656 __asm__ __volatile__("btrl %2, %1\n\t"
5657 "setc %b0\n\t"
5658 "andl $1, %0\n\t"
5659 : "=q" (rc.u32),
5660 "=m" (*(volatile long *)pvBitmap)
5661 : "Ir" (iBit),
5662 "m" (*(volatile long *)pvBitmap)
5663 : "memory");
5664# else
5665 __asm
5666 {
5667 mov edx, [iBit]
5668# ifdef RT_ARCH_AMD64
5669 mov rax, [pvBitmap]
5670 btr [rax], edx
5671# else
5672 mov eax, [pvBitmap]
5673 btr [eax], edx
5674# endif
5675 setc al
5676 and eax, 1
5677 mov [rc.u32], eax
5678 }
5679# endif
5680 return rc.f;
5681}
5682#endif
5683
5684
5685/**
5686 * Atomically tests and clears a bit in a bitmap, ordered.
5687 *
5688 * @returns true if the bit was set.
5689 * @returns false if the bit was clear.
5690 * @param pvBitmap Pointer to the bitmap.
5691 * @param iBit The bit to test and clear.
5692 * @remark No memory barrier, take care on smp.
5693 */
5694#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5695DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5696#else
5697DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5698{
5699 union { bool f; uint32_t u32; uint8_t u8; } rc;
5700# if RT_INLINE_ASM_USES_INTRIN
5701 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5702
5703# elif RT_INLINE_ASM_GNU_STYLE
5704 __asm__ __volatile__("lock; btrl %2, %1\n\t"
5705 "setc %b0\n\t"
5706 "andl $1, %0\n\t"
5707 : "=q" (rc.u32),
5708 "=m" (*(volatile long *)pvBitmap)
5709 : "Ir" (iBit),
5710 "m" (*(volatile long *)pvBitmap)
5711 : "memory");
5712# else
5713 __asm
5714 {
5715 mov edx, [iBit]
5716# ifdef RT_ARCH_AMD64
5717 mov rax, [pvBitmap]
5718 lock btr [rax], edx
5719# else
5720 mov eax, [pvBitmap]
5721 lock btr [eax], edx
5722# endif
5723 setc al
5724 and eax, 1
5725 mov [rc.u32], eax
5726 }
5727# endif
5728 return rc.f;
5729}
5730#endif
5731
5732
5733/**
5734 * Tests and toggles a bit in a bitmap.
5735 *
5736 * @returns true if the bit was set.
5737 * @returns false if the bit was clear.
5738 * @param pvBitmap Pointer to the bitmap.
5739 * @param iBit The bit to test and toggle.
5740 */
5741#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5742DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5743#else
5744DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5745{
5746 union { bool f; uint32_t u32; uint8_t u8; } rc;
5747# if RT_INLINE_ASM_USES_INTRIN
5748 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5749
5750# elif RT_INLINE_ASM_GNU_STYLE
5751 __asm__ __volatile__("btcl %2, %1\n\t"
5752 "setc %b0\n\t"
5753 "andl $1, %0\n\t"
5754 : "=q" (rc.u32),
5755 "=m" (*(volatile long *)pvBitmap)
5756 : "Ir" (iBit),
5757 "m" (*(volatile long *)pvBitmap)
5758 : "memory");
5759# else
5760 __asm
5761 {
5762 mov edx, [iBit]
5763# ifdef RT_ARCH_AMD64
5764 mov rax, [pvBitmap]
5765 btc [rax], edx
5766# else
5767 mov eax, [pvBitmap]
5768 btc [eax], edx
5769# endif
5770 setc al
5771 and eax, 1
5772 mov [rc.u32], eax
5773 }
5774# endif
5775 return rc.f;
5776}
5777#endif
5778
5779
5780/**
5781 * Atomically tests and toggles a bit in a bitmap, ordered.
5782 *
5783 * @returns true if the bit was set.
5784 * @returns false if the bit was clear.
5785 * @param pvBitmap Pointer to the bitmap.
5786 * @param iBit The bit to test and toggle.
5787 */
5788#if RT_INLINE_ASM_EXTERNAL
5789DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5790#else
5791DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5792{
5793 union { bool f; uint32_t u32; uint8_t u8; } rc;
5794# if RT_INLINE_ASM_GNU_STYLE
5795 __asm__ __volatile__("lock; btcl %2, %1\n\t"
5796 "setc %b0\n\t"
5797 "andl $1, %0\n\t"
5798 : "=q" (rc.u32),
5799 "=m" (*(volatile long *)pvBitmap)
5800 : "Ir" (iBit),
5801 "m" (*(volatile long *)pvBitmap)
5802 : "memory");
5803# else
5804 __asm
5805 {
5806 mov edx, [iBit]
5807# ifdef RT_ARCH_AMD64
5808 mov rax, [pvBitmap]
5809 lock btc [rax], edx
5810# else
5811 mov eax, [pvBitmap]
5812 lock btc [eax], edx
5813# endif
5814 setc al
5815 and eax, 1
5816 mov [rc.u32], eax
5817 }
5818# endif
5819 return rc.f;
5820}
5821#endif
5822
5823
5824/**
5825 * Tests if a bit in a bitmap is set.
5826 *
5827 * @returns true if the bit is set.
5828 * @returns false if the bit is clear.
5829 * @param pvBitmap Pointer to the bitmap.
5830 * @param iBit The bit to test.
5831 */
5832#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5833DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5834#else
5835DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5836{
5837 union { bool f; uint32_t u32; uint8_t u8; } rc;
5838# if RT_INLINE_ASM_USES_INTRIN
5839 rc.u32 = _bittest((long *)pvBitmap, iBit);
5840# elif RT_INLINE_ASM_GNU_STYLE
5841
5842 __asm__ __volatile__("btl %2, %1\n\t"
5843 "setc %b0\n\t"
5844 "andl $1, %0\n\t"
5845 : "=q" (rc.u32)
5846 : "m" (*(const volatile long *)pvBitmap),
5847 "Ir" (iBit)
5848 : "memory");
5849# else
5850 __asm
5851 {
5852 mov edx, [iBit]
5853# ifdef RT_ARCH_AMD64
5854 mov rax, [pvBitmap]
5855 bt [rax], edx
5856# else
5857 mov eax, [pvBitmap]
5858 bt [eax], edx
5859# endif
5860 setc al
5861 and eax, 1
5862 mov [rc.u32], eax
5863 }
5864# endif
5865 return rc.f;
5866}
5867#endif
5868
5869
5870/**
5871 * Clears a bit range within a bitmap.
5872 *
5873 * @param pvBitmap Pointer to the bitmap.
5874 * @param iBitStart The First bit to clear.
5875 * @param iBitEnd The first bit not to clear.
5876 */
5877DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5878{
5879 if (iBitStart < iBitEnd)
5880 {
5881 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5882 int iStart = iBitStart & ~31;
5883 int iEnd = iBitEnd & ~31;
5884 if (iStart == iEnd)
5885 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5886 else
5887 {
5888 /* bits in first dword. */
5889 if (iBitStart & 31)
5890 {
5891 *pu32 &= (1 << (iBitStart & 31)) - 1;
5892 pu32++;
5893 iBitStart = iStart + 32;
5894 }
5895
5896 /* whole dword. */
5897 if (iBitStart != iEnd)
5898 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5899
5900 /* bits in last dword. */
5901 if (iBitEnd & 31)
5902 {
5903 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5904 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5905 }
5906 }
5907 }
5908}
5909
5910
5911/**
5912 * Sets a bit range within a bitmap.
5913 *
5914 * @param pvBitmap Pointer to the bitmap.
5915 * @param iBitStart The First bit to set.
5916 * @param iBitEnd The first bit not to set.
5917 */
5918DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5919{
5920 if (iBitStart < iBitEnd)
5921 {
5922 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5923 int iStart = iBitStart & ~31;
5924 int iEnd = iBitEnd & ~31;
5925 if (iStart == iEnd)
5926 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5927 else
5928 {
5929 /* bits in first dword. */
5930 if (iBitStart & 31)
5931 {
5932 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5933 pu32++;
5934 iBitStart = iStart + 32;
5935 }
5936
5937 /* whole dword. */
5938 if (iBitStart != iEnd)
5939 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5940
5941 /* bits in last dword. */
5942 if (iBitEnd & 31)
5943 {
5944 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5945 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5946 }
5947 }
5948 }
5949}
5950
5951
5952/**
5953 * Finds the first clear bit in a bitmap.
5954 *
5955 * @returns Index of the first zero bit.
5956 * @returns -1 if no clear bit was found.
5957 * @param pvBitmap Pointer to the bitmap.
5958 * @param cBits The number of bits in the bitmap. Multiple of 32.
5959 */
5960#if RT_INLINE_ASM_EXTERNAL
5961DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5962#else
5963DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5964{
5965 if (cBits)
5966 {
5967 int32_t iBit;
5968# if RT_INLINE_ASM_GNU_STYLE
5969 RTCCUINTREG uEAX, uECX, uEDI;
5970 cBits = RT_ALIGN_32(cBits, 32);
5971 __asm__ __volatile__("repe; scasl\n\t"
5972 "je 1f\n\t"
5973# ifdef RT_ARCH_AMD64
5974 "lea -4(%%rdi), %%rdi\n\t"
5975 "xorl (%%rdi), %%eax\n\t"
5976 "subq %5, %%rdi\n\t"
5977# else
5978 "lea -4(%%edi), %%edi\n\t"
5979 "xorl (%%edi), %%eax\n\t"
5980 "subl %5, %%edi\n\t"
5981# endif
5982 "shll $3, %%edi\n\t"
5983 "bsfl %%eax, %%edx\n\t"
5984 "addl %%edi, %%edx\n\t"
5985 "1:\t\n"
5986 : "=d" (iBit),
5987 "=&c" (uECX),
5988 "=&D" (uEDI),
5989 "=&a" (uEAX)
5990 : "0" (0xffffffff),
5991 "mr" (pvBitmap),
5992 "1" (cBits >> 5),
5993 "2" (pvBitmap),
5994 "3" (0xffffffff));
5995# else
5996 cBits = RT_ALIGN_32(cBits, 32);
5997 __asm
5998 {
5999# ifdef RT_ARCH_AMD64
6000 mov rdi, [pvBitmap]
6001 mov rbx, rdi
6002# else
6003 mov edi, [pvBitmap]
6004 mov ebx, edi
6005# endif
6006 mov edx, 0ffffffffh
6007 mov eax, edx
6008 mov ecx, [cBits]
6009 shr ecx, 5
6010 repe scasd
6011 je done
6012
6013# ifdef RT_ARCH_AMD64
6014 lea rdi, [rdi - 4]
6015 xor eax, [rdi]
6016 sub rdi, rbx
6017# else
6018 lea edi, [edi - 4]
6019 xor eax, [edi]
6020 sub edi, ebx
6021# endif
6022 shl edi, 3
6023 bsf edx, eax
6024 add edx, edi
6025 done:
6026 mov [iBit], edx
6027 }
6028# endif
6029 return iBit;
6030 }
6031 return -1;
6032}
6033#endif
6034
6035
6036/**
6037 * Finds the next clear bit in a bitmap.
6038 *
6039 * @returns Index of the first zero bit.
6040 * @returns -1 if no clear bit was found.
6041 * @param pvBitmap Pointer to the bitmap.
6042 * @param cBits The number of bits in the bitmap. Multiple of 32.
6043 * @param iBitPrev The bit returned from the last search.
6044 * The search will start at iBitPrev + 1.
6045 */
6046#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6047DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6048#else
6049DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6050{
6051 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6052 int iBit = ++iBitPrev & 31;
6053 if (iBit)
6054 {
6055 /*
6056 * Inspect the 32-bit word containing the unaligned bit.
6057 */
6058 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6059
6060# if RT_INLINE_ASM_USES_INTRIN
6061 unsigned long ulBit = 0;
6062 if (_BitScanForward(&ulBit, u32))
6063 return ulBit + iBitPrev;
6064# else
6065# if RT_INLINE_ASM_GNU_STYLE
6066 __asm__ __volatile__("bsf %1, %0\n\t"
6067 "jnz 1f\n\t"
6068 "movl $-1, %0\n\t"
6069 "1:\n\t"
6070 : "=r" (iBit)
6071 : "r" (u32));
6072# else
6073 __asm
6074 {
6075 mov edx, [u32]
6076 bsf eax, edx
6077 jnz done
6078 mov eax, 0ffffffffh
6079 done:
6080 mov [iBit], eax
6081 }
6082# endif
6083 if (iBit >= 0)
6084 return iBit + iBitPrev;
6085# endif
6086
6087 /*
6088 * Skip ahead and see if there is anything left to search.
6089 */
6090 iBitPrev |= 31;
6091 iBitPrev++;
6092 if (cBits <= (uint32_t)iBitPrev)
6093 return -1;
6094 }
6095
6096 /*
6097 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6098 */
6099 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6100 if (iBit >= 0)
6101 iBit += iBitPrev;
6102 return iBit;
6103}
6104#endif
6105
6106
6107/**
6108 * Finds the first set bit in a bitmap.
6109 *
6110 * @returns Index of the first set bit.
6111 * @returns -1 if no clear bit was found.
6112 * @param pvBitmap Pointer to the bitmap.
6113 * @param cBits The number of bits in the bitmap. Multiple of 32.
6114 */
6115#if RT_INLINE_ASM_EXTERNAL
6116DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6117#else
6118DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6119{
6120 if (cBits)
6121 {
6122 int32_t iBit;
6123# if RT_INLINE_ASM_GNU_STYLE
6124 RTCCUINTREG uEAX, uECX, uEDI;
6125 cBits = RT_ALIGN_32(cBits, 32);
6126 __asm__ __volatile__("repe; scasl\n\t"
6127 "je 1f\n\t"
6128# ifdef RT_ARCH_AMD64
6129 "lea -4(%%rdi), %%rdi\n\t"
6130 "movl (%%rdi), %%eax\n\t"
6131 "subq %5, %%rdi\n\t"
6132# else
6133 "lea -4(%%edi), %%edi\n\t"
6134 "movl (%%edi), %%eax\n\t"
6135 "subl %5, %%edi\n\t"
6136# endif
6137 "shll $3, %%edi\n\t"
6138 "bsfl %%eax, %%edx\n\t"
6139 "addl %%edi, %%edx\n\t"
6140 "1:\t\n"
6141 : "=d" (iBit),
6142 "=&c" (uECX),
6143 "=&D" (uEDI),
6144 "=&a" (uEAX)
6145 : "0" (0xffffffff),
6146 "mr" (pvBitmap),
6147 "1" (cBits >> 5),
6148 "2" (pvBitmap),
6149 "3" (0));
6150# else
6151 cBits = RT_ALIGN_32(cBits, 32);
6152 __asm
6153 {
6154# ifdef RT_ARCH_AMD64
6155 mov rdi, [pvBitmap]
6156 mov rbx, rdi
6157# else
6158 mov edi, [pvBitmap]
6159 mov ebx, edi
6160# endif
6161 mov edx, 0ffffffffh
6162 xor eax, eax
6163 mov ecx, [cBits]
6164 shr ecx, 5
6165 repe scasd
6166 je done
6167# ifdef RT_ARCH_AMD64
6168 lea rdi, [rdi - 4]
6169 mov eax, [rdi]
6170 sub rdi, rbx
6171# else
6172 lea edi, [edi - 4]
6173 mov eax, [edi]
6174 sub edi, ebx
6175# endif
6176 shl edi, 3
6177 bsf edx, eax
6178 add edx, edi
6179 done:
6180 mov [iBit], edx
6181 }
6182# endif
6183 return iBit;
6184 }
6185 return -1;
6186}
6187#endif
6188
6189
6190/**
6191 * Finds the next set bit in a bitmap.
6192 *
6193 * @returns Index of the next set bit.
6194 * @returns -1 if no set bit was found.
6195 * @param pvBitmap Pointer to the bitmap.
6196 * @param cBits The number of bits in the bitmap. Multiple of 32.
6197 * @param iBitPrev The bit returned from the last search.
6198 * The search will start at iBitPrev + 1.
6199 */
6200#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6201DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6202#else
6203DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6204{
6205 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6206 int iBit = ++iBitPrev & 31;
6207 if (iBit)
6208 {
6209 /*
6210 * Inspect the 32-bit word containing the unaligned bit.
6211 */
6212 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6213
6214# if RT_INLINE_ASM_USES_INTRIN
6215 unsigned long ulBit = 0;
6216 if (_BitScanForward(&ulBit, u32))
6217 return ulBit + iBitPrev;
6218# else
6219# if RT_INLINE_ASM_GNU_STYLE
6220 __asm__ __volatile__("bsf %1, %0\n\t"
6221 "jnz 1f\n\t"
6222 "movl $-1, %0\n\t"
6223 "1:\n\t"
6224 : "=r" (iBit)
6225 : "r" (u32));
6226# else
6227 __asm
6228 {
6229 mov edx, [u32]
6230 bsf eax, edx
6231 jnz done
6232 mov eax, 0ffffffffh
6233 done:
6234 mov [iBit], eax
6235 }
6236# endif
6237 if (iBit >= 0)
6238 return iBit + iBitPrev;
6239# endif
6240
6241 /*
6242 * Skip ahead and see if there is anything left to search.
6243 */
6244 iBitPrev |= 31;
6245 iBitPrev++;
6246 if (cBits <= (uint32_t)iBitPrev)
6247 return -1;
6248 }
6249
6250 /*
6251 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6252 */
6253 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6254 if (iBit >= 0)
6255 iBit += iBitPrev;
6256 return iBit;
6257}
6258#endif
6259
6260
6261/**
6262 * Finds the first bit which is set in the given 32-bit integer.
6263 * Bits are numbered from 1 (least significant) to 32.
6264 *
6265 * @returns index [1..32] of the first set bit.
6266 * @returns 0 if all bits are cleared.
6267 * @param u32 Integer to search for set bits.
6268 * @remark Similar to ffs() in BSD.
6269 */
6270DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6271{
6272# if RT_INLINE_ASM_USES_INTRIN
6273 unsigned long iBit;
6274 if (_BitScanForward(&iBit, u32))
6275 iBit++;
6276 else
6277 iBit = 0;
6278# elif RT_INLINE_ASM_GNU_STYLE
6279 uint32_t iBit;
6280 __asm__ __volatile__("bsf %1, %0\n\t"
6281 "jnz 1f\n\t"
6282 "xorl %0, %0\n\t"
6283 "jmp 2f\n"
6284 "1:\n\t"
6285 "incl %0\n"
6286 "2:\n\t"
6287 : "=r" (iBit)
6288 : "rm" (u32));
6289# else
6290 uint32_t iBit;
6291 _asm
6292 {
6293 bsf eax, [u32]
6294 jnz found
6295 xor eax, eax
6296 jmp done
6297 found:
6298 inc eax
6299 done:
6300 mov [iBit], eax
6301 }
6302# endif
6303 return iBit;
6304}
6305
6306
6307/**
6308 * Finds the first bit which is set in the given 32-bit integer.
6309 * Bits are numbered from 1 (least significant) to 32.
6310 *
6311 * @returns index [1..32] of the first set bit.
6312 * @returns 0 if all bits are cleared.
6313 * @param i32 Integer to search for set bits.
6314 * @remark Similar to ffs() in BSD.
6315 */
6316DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6317{
6318 return ASMBitFirstSetU32((uint32_t)i32);
6319}
6320
6321
6322/**
6323 * Finds the last bit which is set in the given 32-bit integer.
6324 * Bits are numbered from 1 (least significant) to 32.
6325 *
6326 * @returns index [1..32] of the last set bit.
6327 * @returns 0 if all bits are cleared.
6328 * @param u32 Integer to search for set bits.
6329 * @remark Similar to fls() in BSD.
6330 */
6331DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6332{
6333# if RT_INLINE_ASM_USES_INTRIN
6334 unsigned long iBit;
6335 if (_BitScanReverse(&iBit, u32))
6336 iBit++;
6337 else
6338 iBit = 0;
6339# elif RT_INLINE_ASM_GNU_STYLE
6340 uint32_t iBit;
6341 __asm__ __volatile__("bsrl %1, %0\n\t"
6342 "jnz 1f\n\t"
6343 "xorl %0, %0\n\t"
6344 "jmp 2f\n"
6345 "1:\n\t"
6346 "incl %0\n"
6347 "2:\n\t"
6348 : "=r" (iBit)
6349 : "rm" (u32));
6350# else
6351 uint32_t iBit;
6352 _asm
6353 {
6354 bsr eax, [u32]
6355 jnz found
6356 xor eax, eax
6357 jmp done
6358 found:
6359 inc eax
6360 done:
6361 mov [iBit], eax
6362 }
6363# endif
6364 return iBit;
6365}
6366
6367
6368/**
6369 * Finds the last bit which is set in the given 32-bit integer.
6370 * Bits are numbered from 1 (least significant) to 32.
6371 *
6372 * @returns index [1..32] of the last set bit.
6373 * @returns 0 if all bits are cleared.
6374 * @param i32 Integer to search for set bits.
6375 * @remark Similar to fls() in BSD.
6376 */
6377DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6378{
6379 return ASMBitLastSetS32((uint32_t)i32);
6380}
6381
6382/**
6383 * Reverse the byte order of the given 16-bit integer.
6384 *
6385 * @returns Revert
6386 * @param u16 16-bit integer value.
6387 */
6388DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6389{
6390#if RT_INLINE_ASM_USES_INTRIN
6391 u16 = _byteswap_ushort(u16);
6392#elif RT_INLINE_ASM_GNU_STYLE
6393 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6394#else
6395 _asm
6396 {
6397 mov ax, [u16]
6398 ror ax, 8
6399 mov [u16], ax
6400 }
6401#endif
6402 return u16;
6403}
6404
6405/**
6406 * Reverse the byte order of the given 32-bit integer.
6407 *
6408 * @returns Revert
6409 * @param u32 32-bit integer value.
6410 */
6411DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6412{
6413#if RT_INLINE_ASM_USES_INTRIN
6414 u32 = _byteswap_ulong(u32);
6415#elif RT_INLINE_ASM_GNU_STYLE
6416 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6417#else
6418 _asm
6419 {
6420 mov eax, [u32]
6421 bswap eax
6422 mov [u32], eax
6423 }
6424#endif
6425 return u32;
6426}
6427
6428
6429/**
6430 * Reverse the byte order of the given 64-bit integer.
6431 *
6432 * @returns Revert
6433 * @param u64 64-bit integer value.
6434 */
6435DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6436{
6437#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6438 u64 = _byteswap_uint64(u64);
6439#else
6440 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6441 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6442#endif
6443 return u64;
6444}
6445
6446
6447/** @} */
6448
6449
6450/** @} */
6451#endif
6452
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette