VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 18552

Last change on this file since 18552 was 18521, checked in by vboxsync, 16 years ago

iprt/asm.h: Finally added string I/O.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 162.5 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outbytestring)
56# pragma intrinsic(__outword)
57# pragma intrinsic(__outwordstring)
58# pragma intrinsic(__outdword)
59# pragma intrinsic(__outdwordstring)
60# pragma intrinsic(__inbyte)
61# pragma intrinsic(__inbytestring)
62# pragma intrinsic(__inword)
63# pragma intrinsic(__inwordstring)
64# pragma intrinsic(__indword)
65# pragma intrinsic(__indwordstring)
66# pragma intrinsic(__invlpg)
67# pragma intrinsic(__stosd)
68# pragma intrinsic(__stosw)
69# pragma intrinsic(__stosb)
70# pragma intrinsic(__readcr0)
71# pragma intrinsic(__readcr2)
72# pragma intrinsic(__readcr3)
73# pragma intrinsic(__readcr4)
74# pragma intrinsic(__writecr0)
75# pragma intrinsic(__writecr3)
76# pragma intrinsic(__writecr4)
77# pragma intrinsic(__readdr)
78# pragma intrinsic(__writedr)
79# pragma intrinsic(_BitScanForward)
80# pragma intrinsic(_BitScanReverse)
81# pragma intrinsic(_bittest)
82# pragma intrinsic(_bittestandset)
83# pragma intrinsic(_bittestandreset)
84# pragma intrinsic(_bittestandcomplement)
85# pragma intrinsic(_byteswap_ushort)
86# pragma intrinsic(_byteswap_ulong)
87# pragma intrinsic(_interlockedbittestandset)
88# pragma intrinsic(_interlockedbittestandreset)
89# pragma intrinsic(_InterlockedAnd)
90# pragma intrinsic(_InterlockedOr)
91# pragma intrinsic(_InterlockedIncrement)
92# pragma intrinsic(_InterlockedDecrement)
93# pragma intrinsic(_InterlockedExchange)
94# pragma intrinsic(_InterlockedExchangeAdd)
95# pragma intrinsic(_InterlockedCompareExchange)
96# pragma intrinsic(_InterlockedCompareExchange64)
97# ifdef RT_ARCH_AMD64
98# pragma intrinsic(__stosq)
99# pragma intrinsic(__readcr8)
100# pragma intrinsic(__writecr8)
101# pragma intrinsic(_byteswap_uint64)
102# pragma intrinsic(_InterlockedExchange64)
103# endif
104# endif
105#endif
106#ifndef RT_INLINE_ASM_USES_INTRIN
107# define RT_INLINE_ASM_USES_INTRIN 0
108#endif
109
110
111
112/** @defgroup grp_asm ASM - Assembly Routines
113 * @ingroup grp_rt
114 *
115 * @remarks The difference between ordered and unordered atomic operations are that
116 * the former will complete outstanding reads and writes before continuing
117 * while the latter doesn't make any promisses about the order. Ordered
118 * operations doesn't, it seems, make any 100% promise wrt to whether
119 * the operation will complete before any subsequent memory access.
120 * (please, correct if wrong.)
121 *
122 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
123 * are unordered (note the Uo).
124 *
125 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
126 * or even optimize assembler instructions away. For instance, in the following code
127 * the second rdmsr instruction is optimized away because gcc treats that instruction
128 * as deterministic:
129 *
130 * @code
131 * static inline uint64_t rdmsr_low(int idx)
132 * {
133 * uint32_t low;
134 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
135 * }
136 * ...
137 * uint32_t msr1 = rdmsr_low(1);
138 * foo(msr1);
139 * msr1 = rdmsr_low(1);
140 * bar(msr1);
141 * @endcode
142 *
143 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
144 * use the result of the first call as input parameter for bar() as well. For rdmsr this
145 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
146 * machine status information in general.
147 *
148 * @{
149 */
150
151/** @def RT_INLINE_ASM_EXTERNAL
152 * Defined as 1 if the compiler does not support inline assembly.
153 * The ASM* functions will then be implemented in an external .asm file.
154 *
155 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
156 * inline assembly in their AMD64 compiler.
157 */
158#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
159# define RT_INLINE_ASM_EXTERNAL 1
160#else
161# define RT_INLINE_ASM_EXTERNAL 0
162#endif
163
164/** @def RT_INLINE_ASM_GNU_STYLE
165 * Defined as 1 if the compiler understands GNU style inline assembly.
166 */
167#if defined(_MSC_VER)
168# define RT_INLINE_ASM_GNU_STYLE 0
169#else
170# define RT_INLINE_ASM_GNU_STYLE 1
171#endif
172
173
174/** @todo find a more proper place for this structure? */
175#pragma pack(1)
176/** IDTR */
177typedef struct RTIDTR
178{
179 /** Size of the IDT. */
180 uint16_t cbIdt;
181 /** Address of the IDT. */
182 uintptr_t pIdt;
183} RTIDTR, *PRTIDTR;
184#pragma pack()
185
186#pragma pack(1)
187/** GDTR */
188typedef struct RTGDTR
189{
190 /** Size of the GDT. */
191 uint16_t cbGdt;
192 /** Address of the GDT. */
193 uintptr_t pGdt;
194} RTGDTR, *PRTGDTR;
195#pragma pack()
196
197
198/** @def ASMReturnAddress
199 * Gets the return address of the current (or calling if you like) function or method.
200 */
201#ifdef _MSC_VER
202# ifdef __cplusplus
203extern "C"
204# endif
205void * _ReturnAddress(void);
206# pragma intrinsic(_ReturnAddress)
207# define ASMReturnAddress() _ReturnAddress()
208#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
209# define ASMReturnAddress() __builtin_return_address(0)
210#else
211# error "Unsupported compiler."
212#endif
213
214
215/**
216 * Gets the content of the IDTR CPU register.
217 * @param pIdtr Where to store the IDTR contents.
218 */
219#if RT_INLINE_ASM_EXTERNAL
220DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
221#else
222DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
223{
224# if RT_INLINE_ASM_GNU_STYLE
225 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
226# else
227 __asm
228 {
229# ifdef RT_ARCH_AMD64
230 mov rax, [pIdtr]
231 sidt [rax]
232# else
233 mov eax, [pIdtr]
234 sidt [eax]
235# endif
236 }
237# endif
238}
239#endif
240
241
242/**
243 * Sets the content of the IDTR CPU register.
244 * @param pIdtr Where to load the IDTR contents from
245 */
246#if RT_INLINE_ASM_EXTERNAL
247DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
248#else
249DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
250{
251# if RT_INLINE_ASM_GNU_STYLE
252 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
253# else
254 __asm
255 {
256# ifdef RT_ARCH_AMD64
257 mov rax, [pIdtr]
258 lidt [rax]
259# else
260 mov eax, [pIdtr]
261 lidt [eax]
262# endif
263 }
264# endif
265}
266#endif
267
268
269/**
270 * Gets the content of the GDTR CPU register.
271 * @param pGdtr Where to store the GDTR contents.
272 */
273#if RT_INLINE_ASM_EXTERNAL
274DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
275#else
276DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
277{
278# if RT_INLINE_ASM_GNU_STYLE
279 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
280# else
281 __asm
282 {
283# ifdef RT_ARCH_AMD64
284 mov rax, [pGdtr]
285 sgdt [rax]
286# else
287 mov eax, [pGdtr]
288 sgdt [eax]
289# endif
290 }
291# endif
292}
293#endif
294
295/**
296 * Get the cs register.
297 * @returns cs.
298 */
299#if RT_INLINE_ASM_EXTERNAL
300DECLASM(RTSEL) ASMGetCS(void);
301#else
302DECLINLINE(RTSEL) ASMGetCS(void)
303{
304 RTSEL SelCS;
305# if RT_INLINE_ASM_GNU_STYLE
306 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
307# else
308 __asm
309 {
310 mov ax, cs
311 mov [SelCS], ax
312 }
313# endif
314 return SelCS;
315}
316#endif
317
318
319/**
320 * Get the DS register.
321 * @returns DS.
322 */
323#if RT_INLINE_ASM_EXTERNAL
324DECLASM(RTSEL) ASMGetDS(void);
325#else
326DECLINLINE(RTSEL) ASMGetDS(void)
327{
328 RTSEL SelDS;
329# if RT_INLINE_ASM_GNU_STYLE
330 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
331# else
332 __asm
333 {
334 mov ax, ds
335 mov [SelDS], ax
336 }
337# endif
338 return SelDS;
339}
340#endif
341
342
343/**
344 * Get the ES register.
345 * @returns ES.
346 */
347#if RT_INLINE_ASM_EXTERNAL
348DECLASM(RTSEL) ASMGetES(void);
349#else
350DECLINLINE(RTSEL) ASMGetES(void)
351{
352 RTSEL SelES;
353# if RT_INLINE_ASM_GNU_STYLE
354 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
355# else
356 __asm
357 {
358 mov ax, es
359 mov [SelES], ax
360 }
361# endif
362 return SelES;
363}
364#endif
365
366
367/**
368 * Get the FS register.
369 * @returns FS.
370 */
371#if RT_INLINE_ASM_EXTERNAL
372DECLASM(RTSEL) ASMGetFS(void);
373#else
374DECLINLINE(RTSEL) ASMGetFS(void)
375{
376 RTSEL SelFS;
377# if RT_INLINE_ASM_GNU_STYLE
378 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
379# else
380 __asm
381 {
382 mov ax, fs
383 mov [SelFS], ax
384 }
385# endif
386 return SelFS;
387}
388# endif
389
390
391/**
392 * Get the GS register.
393 * @returns GS.
394 */
395#if RT_INLINE_ASM_EXTERNAL
396DECLASM(RTSEL) ASMGetGS(void);
397#else
398DECLINLINE(RTSEL) ASMGetGS(void)
399{
400 RTSEL SelGS;
401# if RT_INLINE_ASM_GNU_STYLE
402 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
403# else
404 __asm
405 {
406 mov ax, gs
407 mov [SelGS], ax
408 }
409# endif
410 return SelGS;
411}
412#endif
413
414
415/**
416 * Get the SS register.
417 * @returns SS.
418 */
419#if RT_INLINE_ASM_EXTERNAL
420DECLASM(RTSEL) ASMGetSS(void);
421#else
422DECLINLINE(RTSEL) ASMGetSS(void)
423{
424 RTSEL SelSS;
425# if RT_INLINE_ASM_GNU_STYLE
426 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
427# else
428 __asm
429 {
430 mov ax, ss
431 mov [SelSS], ax
432 }
433# endif
434 return SelSS;
435}
436#endif
437
438
439/**
440 * Get the TR register.
441 * @returns TR.
442 */
443#if RT_INLINE_ASM_EXTERNAL
444DECLASM(RTSEL) ASMGetTR(void);
445#else
446DECLINLINE(RTSEL) ASMGetTR(void)
447{
448 RTSEL SelTR;
449# if RT_INLINE_ASM_GNU_STYLE
450 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
451# else
452 __asm
453 {
454 str ax
455 mov [SelTR], ax
456 }
457# endif
458 return SelTR;
459}
460#endif
461
462
463/**
464 * Get the [RE]FLAGS register.
465 * @returns [RE]FLAGS.
466 */
467#if RT_INLINE_ASM_EXTERNAL
468DECLASM(RTCCUINTREG) ASMGetFlags(void);
469#else
470DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
471{
472 RTCCUINTREG uFlags;
473# if RT_INLINE_ASM_GNU_STYLE
474# ifdef RT_ARCH_AMD64
475 __asm__ __volatile__("pushfq\n\t"
476 "popq %0\n\t"
477 : "=g" (uFlags));
478# else
479 __asm__ __volatile__("pushfl\n\t"
480 "popl %0\n\t"
481 : "=g" (uFlags));
482# endif
483# else
484 __asm
485 {
486# ifdef RT_ARCH_AMD64
487 pushfq
488 pop [uFlags]
489# else
490 pushfd
491 pop [uFlags]
492# endif
493 }
494# endif
495 return uFlags;
496}
497#endif
498
499
500/**
501 * Set the [RE]FLAGS register.
502 * @param uFlags The new [RE]FLAGS value.
503 */
504#if RT_INLINE_ASM_EXTERNAL
505DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
506#else
507DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
508{
509# if RT_INLINE_ASM_GNU_STYLE
510# ifdef RT_ARCH_AMD64
511 __asm__ __volatile__("pushq %0\n\t"
512 "popfq\n\t"
513 : : "g" (uFlags));
514# else
515 __asm__ __volatile__("pushl %0\n\t"
516 "popfl\n\t"
517 : : "g" (uFlags));
518# endif
519# else
520 __asm
521 {
522# ifdef RT_ARCH_AMD64
523 push [uFlags]
524 popfq
525# else
526 push [uFlags]
527 popfd
528# endif
529 }
530# endif
531}
532#endif
533
534
535/**
536 * Gets the content of the CPU timestamp counter register.
537 *
538 * @returns TSC.
539 */
540#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
541DECLASM(uint64_t) ASMReadTSC(void);
542#else
543DECLINLINE(uint64_t) ASMReadTSC(void)
544{
545 RTUINT64U u;
546# if RT_INLINE_ASM_GNU_STYLE
547 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
548# else
549# if RT_INLINE_ASM_USES_INTRIN
550 u.u = __rdtsc();
551# else
552 __asm
553 {
554 rdtsc
555 mov [u.s.Lo], eax
556 mov [u.s.Hi], edx
557 }
558# endif
559# endif
560 return u.u;
561}
562#endif
563
564
565/**
566 * Performs the cpuid instruction returning all registers.
567 *
568 * @param uOperator CPUID operation (eax).
569 * @param pvEAX Where to store eax.
570 * @param pvEBX Where to store ebx.
571 * @param pvECX Where to store ecx.
572 * @param pvEDX Where to store edx.
573 * @remark We're using void pointers to ease the use of special bitfield structures and such.
574 */
575#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
576DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
577#else
578DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
579{
580# if RT_INLINE_ASM_GNU_STYLE
581# ifdef RT_ARCH_AMD64
582 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
583 __asm__ ("cpuid\n\t"
584 : "=a" (uRAX),
585 "=b" (uRBX),
586 "=c" (uRCX),
587 "=d" (uRDX)
588 : "0" (uOperator));
589 *(uint32_t *)pvEAX = (uint32_t)uRAX;
590 *(uint32_t *)pvEBX = (uint32_t)uRBX;
591 *(uint32_t *)pvECX = (uint32_t)uRCX;
592 *(uint32_t *)pvEDX = (uint32_t)uRDX;
593# else
594 __asm__ ("xchgl %%ebx, %1\n\t"
595 "cpuid\n\t"
596 "xchgl %%ebx, %1\n\t"
597 : "=a" (*(uint32_t *)pvEAX),
598 "=r" (*(uint32_t *)pvEBX),
599 "=c" (*(uint32_t *)pvECX),
600 "=d" (*(uint32_t *)pvEDX)
601 : "0" (uOperator));
602# endif
603
604# elif RT_INLINE_ASM_USES_INTRIN
605 int aInfo[4];
606 __cpuid(aInfo, uOperator);
607 *(uint32_t *)pvEAX = aInfo[0];
608 *(uint32_t *)pvEBX = aInfo[1];
609 *(uint32_t *)pvECX = aInfo[2];
610 *(uint32_t *)pvEDX = aInfo[3];
611
612# else
613 uint32_t uEAX;
614 uint32_t uEBX;
615 uint32_t uECX;
616 uint32_t uEDX;
617 __asm
618 {
619 push ebx
620 mov eax, [uOperator]
621 cpuid
622 mov [uEAX], eax
623 mov [uEBX], ebx
624 mov [uECX], ecx
625 mov [uEDX], edx
626 pop ebx
627 }
628 *(uint32_t *)pvEAX = uEAX;
629 *(uint32_t *)pvEBX = uEBX;
630 *(uint32_t *)pvECX = uECX;
631 *(uint32_t *)pvEDX = uEDX;
632# endif
633}
634#endif
635
636
637/**
638 * Performs the cpuid instruction returning all registers.
639 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
640 *
641 * @param uOperator CPUID operation (eax).
642 * @param uIdxECX ecx index
643 * @param pvEAX Where to store eax.
644 * @param pvEBX Where to store ebx.
645 * @param pvECX Where to store ecx.
646 * @param pvEDX Where to store edx.
647 * @remark We're using void pointers to ease the use of special bitfield structures and such.
648 */
649#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
650DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
651#else
652DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
653{
654# if RT_INLINE_ASM_GNU_STYLE
655# ifdef RT_ARCH_AMD64
656 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
657 __asm__ ("cpuid\n\t"
658 : "=a" (uRAX),
659 "=b" (uRBX),
660 "=c" (uRCX),
661 "=d" (uRDX)
662 : "0" (uOperator),
663 "2" (uIdxECX));
664 *(uint32_t *)pvEAX = (uint32_t)uRAX;
665 *(uint32_t *)pvEBX = (uint32_t)uRBX;
666 *(uint32_t *)pvECX = (uint32_t)uRCX;
667 *(uint32_t *)pvEDX = (uint32_t)uRDX;
668# else
669 __asm__ ("xchgl %%ebx, %1\n\t"
670 "cpuid\n\t"
671 "xchgl %%ebx, %1\n\t"
672 : "=a" (*(uint32_t *)pvEAX),
673 "=r" (*(uint32_t *)pvEBX),
674 "=c" (*(uint32_t *)pvECX),
675 "=d" (*(uint32_t *)pvEDX)
676 : "0" (uOperator),
677 "2" (uIdxECX));
678# endif
679
680# elif RT_INLINE_ASM_USES_INTRIN
681 int aInfo[4];
682 /* ??? another intrinsic ??? */
683 __cpuid(aInfo, uOperator);
684 *(uint32_t *)pvEAX = aInfo[0];
685 *(uint32_t *)pvEBX = aInfo[1];
686 *(uint32_t *)pvECX = aInfo[2];
687 *(uint32_t *)pvEDX = aInfo[3];
688
689# else
690 uint32_t uEAX;
691 uint32_t uEBX;
692 uint32_t uECX;
693 uint32_t uEDX;
694 __asm
695 {
696 push ebx
697 mov eax, [uOperator]
698 mov ecx, [uIdxECX]
699 cpuid
700 mov [uEAX], eax
701 mov [uEBX], ebx
702 mov [uECX], ecx
703 mov [uEDX], edx
704 pop ebx
705 }
706 *(uint32_t *)pvEAX = uEAX;
707 *(uint32_t *)pvEBX = uEBX;
708 *(uint32_t *)pvECX = uECX;
709 *(uint32_t *)pvEDX = uEDX;
710# endif
711}
712#endif
713
714
715/**
716 * Performs the cpuid instruction returning ecx and edx.
717 *
718 * @param uOperator CPUID operation (eax).
719 * @param pvECX Where to store ecx.
720 * @param pvEDX Where to store edx.
721 * @remark We're using void pointers to ease the use of special bitfield structures and such.
722 */
723#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
724DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
725#else
726DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
727{
728 uint32_t uEBX;
729 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
730}
731#endif
732
733
734/**
735 * Performs the cpuid instruction returning edx.
736 *
737 * @param uOperator CPUID operation (eax).
738 * @returns EDX after cpuid operation.
739 */
740#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
741DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
742#else
743DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
744{
745 RTCCUINTREG xDX;
746# if RT_INLINE_ASM_GNU_STYLE
747# ifdef RT_ARCH_AMD64
748 RTCCUINTREG uSpill;
749 __asm__ ("cpuid"
750 : "=a" (uSpill),
751 "=d" (xDX)
752 : "0" (uOperator)
753 : "rbx", "rcx");
754# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
755 __asm__ ("push %%ebx\n\t"
756 "cpuid\n\t"
757 "pop %%ebx\n\t"
758 : "=a" (uOperator),
759 "=d" (xDX)
760 : "0" (uOperator)
761 : "ecx");
762# else
763 __asm__ ("cpuid"
764 : "=a" (uOperator),
765 "=d" (xDX)
766 : "0" (uOperator)
767 : "ebx", "ecx");
768# endif
769
770# elif RT_INLINE_ASM_USES_INTRIN
771 int aInfo[4];
772 __cpuid(aInfo, uOperator);
773 xDX = aInfo[3];
774
775# else
776 __asm
777 {
778 push ebx
779 mov eax, [uOperator]
780 cpuid
781 mov [xDX], edx
782 pop ebx
783 }
784# endif
785 return (uint32_t)xDX;
786}
787#endif
788
789
790/**
791 * Performs the cpuid instruction returning ecx.
792 *
793 * @param uOperator CPUID operation (eax).
794 * @returns ECX after cpuid operation.
795 */
796#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
797DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
798#else
799DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
800{
801 RTCCUINTREG xCX;
802# if RT_INLINE_ASM_GNU_STYLE
803# ifdef RT_ARCH_AMD64
804 RTCCUINTREG uSpill;
805 __asm__ ("cpuid"
806 : "=a" (uSpill),
807 "=c" (xCX)
808 : "0" (uOperator)
809 : "rbx", "rdx");
810# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
811 __asm__ ("push %%ebx\n\t"
812 "cpuid\n\t"
813 "pop %%ebx\n\t"
814 : "=a" (uOperator),
815 "=c" (xCX)
816 : "0" (uOperator)
817 : "edx");
818# else
819 __asm__ ("cpuid"
820 : "=a" (uOperator),
821 "=c" (xCX)
822 : "0" (uOperator)
823 : "ebx", "edx");
824
825# endif
826
827# elif RT_INLINE_ASM_USES_INTRIN
828 int aInfo[4];
829 __cpuid(aInfo, uOperator);
830 xCX = aInfo[2];
831
832# else
833 __asm
834 {
835 push ebx
836 mov eax, [uOperator]
837 cpuid
838 mov [xCX], ecx
839 pop ebx
840 }
841# endif
842 return (uint32_t)xCX;
843}
844#endif
845
846
847/**
848 * Checks if the current CPU supports CPUID.
849 *
850 * @returns true if CPUID is supported.
851 */
852DECLINLINE(bool) ASMHasCpuId(void)
853{
854#ifdef RT_ARCH_AMD64
855 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
856#else /* !RT_ARCH_AMD64 */
857 bool fRet = false;
858# if RT_INLINE_ASM_GNU_STYLE
859 uint32_t u1;
860 uint32_t u2;
861 __asm__ ("pushf\n\t"
862 "pop %1\n\t"
863 "mov %1, %2\n\t"
864 "xorl $0x200000, %1\n\t"
865 "push %1\n\t"
866 "popf\n\t"
867 "pushf\n\t"
868 "pop %1\n\t"
869 "cmpl %1, %2\n\t"
870 "setne %0\n\t"
871 "push %2\n\t"
872 "popf\n\t"
873 : "=m" (fRet), "=r" (u1), "=r" (u2));
874# else
875 __asm
876 {
877 pushfd
878 pop eax
879 mov ebx, eax
880 xor eax, 0200000h
881 push eax
882 popfd
883 pushfd
884 pop eax
885 cmp eax, ebx
886 setne fRet
887 push ebx
888 popfd
889 }
890# endif
891 return fRet;
892#endif /* !RT_ARCH_AMD64 */
893}
894
895
896/**
897 * Gets the APIC ID of the current CPU.
898 *
899 * @returns the APIC ID.
900 */
901#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
902DECLASM(uint8_t) ASMGetApicId(void);
903#else
904DECLINLINE(uint8_t) ASMGetApicId(void)
905{
906 RTCCUINTREG xBX;
907# if RT_INLINE_ASM_GNU_STYLE
908# ifdef RT_ARCH_AMD64
909 RTCCUINTREG uSpill;
910 __asm__ ("cpuid"
911 : "=a" (uSpill),
912 "=b" (xBX)
913 : "0" (1)
914 : "rcx", "rdx");
915# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
916 RTCCUINTREG uSpill;
917 __asm__ ("mov %%ebx,%1\n\t"
918 "cpuid\n\t"
919 "xchgl %%ebx,%1\n\t"
920 : "=a" (uSpill),
921 "=r" (xBX)
922 : "0" (1)
923 : "ecx", "edx");
924# else
925 RTCCUINTREG uSpill;
926 __asm__ ("cpuid"
927 : "=a" (uSpill),
928 "=b" (xBX)
929 : "0" (1)
930 : "ecx", "edx");
931# endif
932
933# elif RT_INLINE_ASM_USES_INTRIN
934 int aInfo[4];
935 __cpuid(aInfo, 1);
936 xBX = aInfo[1];
937
938# else
939 __asm
940 {
941 push ebx
942 mov eax, 1
943 cpuid
944 mov [xBX], ebx
945 pop ebx
946 }
947# endif
948 return (uint8_t)(xBX >> 24);
949}
950#endif
951
952
953/**
954 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
955 *
956 * @returns true/false.
957 * @param uEBX EBX return from ASMCpuId(0)
958 * @param uECX ECX return from ASMCpuId(0)
959 * @param uEDX EDX return from ASMCpuId(0)
960 */
961DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
962{
963 return uEBX == 0x756e6547
964 && uECX == 0x6c65746e
965 && uEDX == 0x49656e69;
966}
967
968
969/**
970 * Tests if this is an genuin Intel CPU.
971 *
972 * @returns true/false.
973 */
974DECLINLINE(bool) ASMIsIntelCpu(void)
975{
976 uint32_t uEAX, uEBX, uECX, uEDX;
977 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
978 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
979}
980
981
982/**
983 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
984 *
985 * @returns Family.
986 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
987 */
988DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
989{
990 return ((uEAX >> 8) & 0xf) == 0xf
991 ? ((uEAX >> 20) & 0x7f) + 0xf
992 : ((uEAX >> 8) & 0xf);
993}
994
995
996/**
997 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
998 *
999 * @returns Model.
1000 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1001 * @param fIntel Whether it's an intel CPU.
1002 */
1003DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1004{
1005 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1006 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1007 : ((uEAX >> 4) & 0xf);
1008}
1009
1010
1011/**
1012 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1013 *
1014 * @returns Model.
1015 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1016 * @param fIntel Whether it's an intel CPU.
1017 */
1018DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1019{
1020 return ((uEAX >> 8) & 0xf) == 0xf
1021 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1022 : ((uEAX >> 4) & 0xf);
1023}
1024
1025
1026/**
1027 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1028 *
1029 * @returns Model.
1030 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1031 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1032 */
1033DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1034{
1035 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1036 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1037 : ((uEAX >> 4) & 0xf);
1038}
1039
1040
1041/**
1042 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1043 *
1044 * @returns Model.
1045 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1046 */
1047DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1048{
1049 return uEAX & 0xf;
1050}
1051
1052
1053/**
1054 * Get cr0.
1055 * @returns cr0.
1056 */
1057#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1058DECLASM(RTCCUINTREG) ASMGetCR0(void);
1059#else
1060DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1061{
1062 RTCCUINTREG uCR0;
1063# if RT_INLINE_ASM_USES_INTRIN
1064 uCR0 = __readcr0();
1065
1066# elif RT_INLINE_ASM_GNU_STYLE
1067# ifdef RT_ARCH_AMD64
1068 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1069# else
1070 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1071# endif
1072# else
1073 __asm
1074 {
1075# ifdef RT_ARCH_AMD64
1076 mov rax, cr0
1077 mov [uCR0], rax
1078# else
1079 mov eax, cr0
1080 mov [uCR0], eax
1081# endif
1082 }
1083# endif
1084 return uCR0;
1085}
1086#endif
1087
1088
1089/**
1090 * Sets the CR0 register.
1091 * @param uCR0 The new CR0 value.
1092 */
1093#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1094DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1095#else
1096DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1097{
1098# if RT_INLINE_ASM_USES_INTRIN
1099 __writecr0(uCR0);
1100
1101# elif RT_INLINE_ASM_GNU_STYLE
1102# ifdef RT_ARCH_AMD64
1103 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1104# else
1105 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1106# endif
1107# else
1108 __asm
1109 {
1110# ifdef RT_ARCH_AMD64
1111 mov rax, [uCR0]
1112 mov cr0, rax
1113# else
1114 mov eax, [uCR0]
1115 mov cr0, eax
1116# endif
1117 }
1118# endif
1119}
1120#endif
1121
1122
1123/**
1124 * Get cr2.
1125 * @returns cr2.
1126 */
1127#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1128DECLASM(RTCCUINTREG) ASMGetCR2(void);
1129#else
1130DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1131{
1132 RTCCUINTREG uCR2;
1133# if RT_INLINE_ASM_USES_INTRIN
1134 uCR2 = __readcr2();
1135
1136# elif RT_INLINE_ASM_GNU_STYLE
1137# ifdef RT_ARCH_AMD64
1138 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1139# else
1140 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1141# endif
1142# else
1143 __asm
1144 {
1145# ifdef RT_ARCH_AMD64
1146 mov rax, cr2
1147 mov [uCR2], rax
1148# else
1149 mov eax, cr2
1150 mov [uCR2], eax
1151# endif
1152 }
1153# endif
1154 return uCR2;
1155}
1156#endif
1157
1158
1159/**
1160 * Sets the CR2 register.
1161 * @param uCR2 The new CR0 value.
1162 */
1163#if RT_INLINE_ASM_EXTERNAL
1164DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1165#else
1166DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1167{
1168# if RT_INLINE_ASM_GNU_STYLE
1169# ifdef RT_ARCH_AMD64
1170 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1171# else
1172 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1173# endif
1174# else
1175 __asm
1176 {
1177# ifdef RT_ARCH_AMD64
1178 mov rax, [uCR2]
1179 mov cr2, rax
1180# else
1181 mov eax, [uCR2]
1182 mov cr2, eax
1183# endif
1184 }
1185# endif
1186}
1187#endif
1188
1189
1190/**
1191 * Get cr3.
1192 * @returns cr3.
1193 */
1194#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1195DECLASM(RTCCUINTREG) ASMGetCR3(void);
1196#else
1197DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1198{
1199 RTCCUINTREG uCR3;
1200# if RT_INLINE_ASM_USES_INTRIN
1201 uCR3 = __readcr3();
1202
1203# elif RT_INLINE_ASM_GNU_STYLE
1204# ifdef RT_ARCH_AMD64
1205 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1206# else
1207 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1208# endif
1209# else
1210 __asm
1211 {
1212# ifdef RT_ARCH_AMD64
1213 mov rax, cr3
1214 mov [uCR3], rax
1215# else
1216 mov eax, cr3
1217 mov [uCR3], eax
1218# endif
1219 }
1220# endif
1221 return uCR3;
1222}
1223#endif
1224
1225
1226/**
1227 * Sets the CR3 register.
1228 *
1229 * @param uCR3 New CR3 value.
1230 */
1231#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1232DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1233#else
1234DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1235{
1236# if RT_INLINE_ASM_USES_INTRIN
1237 __writecr3(uCR3);
1238
1239# elif RT_INLINE_ASM_GNU_STYLE
1240# ifdef RT_ARCH_AMD64
1241 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1242# else
1243 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1244# endif
1245# else
1246 __asm
1247 {
1248# ifdef RT_ARCH_AMD64
1249 mov rax, [uCR3]
1250 mov cr3, rax
1251# else
1252 mov eax, [uCR3]
1253 mov cr3, eax
1254# endif
1255 }
1256# endif
1257}
1258#endif
1259
1260
1261/**
1262 * Reloads the CR3 register.
1263 */
1264#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1265DECLASM(void) ASMReloadCR3(void);
1266#else
1267DECLINLINE(void) ASMReloadCR3(void)
1268{
1269# if RT_INLINE_ASM_USES_INTRIN
1270 __writecr3(__readcr3());
1271
1272# elif RT_INLINE_ASM_GNU_STYLE
1273 RTCCUINTREG u;
1274# ifdef RT_ARCH_AMD64
1275 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1276 "movq %0, %%cr3\n\t"
1277 : "=r" (u));
1278# else
1279 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1280 "movl %0, %%cr3\n\t"
1281 : "=r" (u));
1282# endif
1283# else
1284 __asm
1285 {
1286# ifdef RT_ARCH_AMD64
1287 mov rax, cr3
1288 mov cr3, rax
1289# else
1290 mov eax, cr3
1291 mov cr3, eax
1292# endif
1293 }
1294# endif
1295}
1296#endif
1297
1298
1299/**
1300 * Get cr4.
1301 * @returns cr4.
1302 */
1303#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1304DECLASM(RTCCUINTREG) ASMGetCR4(void);
1305#else
1306DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1307{
1308 RTCCUINTREG uCR4;
1309# if RT_INLINE_ASM_USES_INTRIN
1310 uCR4 = __readcr4();
1311
1312# elif RT_INLINE_ASM_GNU_STYLE
1313# ifdef RT_ARCH_AMD64
1314 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1315# else
1316 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1317# endif
1318# else
1319 __asm
1320 {
1321# ifdef RT_ARCH_AMD64
1322 mov rax, cr4
1323 mov [uCR4], rax
1324# else
1325 push eax /* just in case */
1326 /*mov eax, cr4*/
1327 _emit 0x0f
1328 _emit 0x20
1329 _emit 0xe0
1330 mov [uCR4], eax
1331 pop eax
1332# endif
1333 }
1334# endif
1335 return uCR4;
1336}
1337#endif
1338
1339
1340/**
1341 * Sets the CR4 register.
1342 *
1343 * @param uCR4 New CR4 value.
1344 */
1345#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1346DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1347#else
1348DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1349{
1350# if RT_INLINE_ASM_USES_INTRIN
1351 __writecr4(uCR4);
1352
1353# elif RT_INLINE_ASM_GNU_STYLE
1354# ifdef RT_ARCH_AMD64
1355 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1356# else
1357 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1358# endif
1359# else
1360 __asm
1361 {
1362# ifdef RT_ARCH_AMD64
1363 mov rax, [uCR4]
1364 mov cr4, rax
1365# else
1366 mov eax, [uCR4]
1367 _emit 0x0F
1368 _emit 0x22
1369 _emit 0xE0 /* mov cr4, eax */
1370# endif
1371 }
1372# endif
1373}
1374#endif
1375
1376
1377/**
1378 * Get cr8.
1379 * @returns cr8.
1380 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1381 */
1382#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1383DECLASM(RTCCUINTREG) ASMGetCR8(void);
1384#else
1385DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1386{
1387# ifdef RT_ARCH_AMD64
1388 RTCCUINTREG uCR8;
1389# if RT_INLINE_ASM_USES_INTRIN
1390 uCR8 = __readcr8();
1391
1392# elif RT_INLINE_ASM_GNU_STYLE
1393 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1394# else
1395 __asm
1396 {
1397 mov rax, cr8
1398 mov [uCR8], rax
1399 }
1400# endif
1401 return uCR8;
1402# else /* !RT_ARCH_AMD64 */
1403 return 0;
1404# endif /* !RT_ARCH_AMD64 */
1405}
1406#endif
1407
1408
1409/**
1410 * Enables interrupts (EFLAGS.IF).
1411 */
1412#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1413DECLASM(void) ASMIntEnable(void);
1414#else
1415DECLINLINE(void) ASMIntEnable(void)
1416{
1417# if RT_INLINE_ASM_GNU_STYLE
1418 __asm("sti\n");
1419# elif RT_INLINE_ASM_USES_INTRIN
1420 _enable();
1421# else
1422 __asm sti
1423# endif
1424}
1425#endif
1426
1427
1428/**
1429 * Disables interrupts (!EFLAGS.IF).
1430 */
1431#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1432DECLASM(void) ASMIntDisable(void);
1433#else
1434DECLINLINE(void) ASMIntDisable(void)
1435{
1436# if RT_INLINE_ASM_GNU_STYLE
1437 __asm("cli\n");
1438# elif RT_INLINE_ASM_USES_INTRIN
1439 _disable();
1440# else
1441 __asm cli
1442# endif
1443}
1444#endif
1445
1446
1447/**
1448 * Disables interrupts and returns previous xFLAGS.
1449 */
1450#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1451DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1452#else
1453DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1454{
1455 RTCCUINTREG xFlags;
1456# if RT_INLINE_ASM_GNU_STYLE
1457# ifdef RT_ARCH_AMD64
1458 __asm__ __volatile__("pushfq\n\t"
1459 "cli\n\t"
1460 "popq %0\n\t"
1461 : "=rm" (xFlags));
1462# else
1463 __asm__ __volatile__("pushfl\n\t"
1464 "cli\n\t"
1465 "popl %0\n\t"
1466 : "=rm" (xFlags));
1467# endif
1468# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1469 xFlags = ASMGetFlags();
1470 _disable();
1471# else
1472 __asm {
1473 pushfd
1474 cli
1475 pop [xFlags]
1476 }
1477# endif
1478 return xFlags;
1479}
1480#endif
1481
1482
1483/**
1484 * Reads a machine specific register.
1485 *
1486 * @returns Register content.
1487 * @param uRegister Register to read.
1488 */
1489#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1490DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1491#else
1492DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1493{
1494 RTUINT64U u;
1495# if RT_INLINE_ASM_GNU_STYLE
1496 __asm__ __volatile__("rdmsr\n\t"
1497 : "=a" (u.s.Lo),
1498 "=d" (u.s.Hi)
1499 : "c" (uRegister));
1500
1501# elif RT_INLINE_ASM_USES_INTRIN
1502 u.u = __readmsr(uRegister);
1503
1504# else
1505 __asm
1506 {
1507 mov ecx, [uRegister]
1508 rdmsr
1509 mov [u.s.Lo], eax
1510 mov [u.s.Hi], edx
1511 }
1512# endif
1513
1514 return u.u;
1515}
1516#endif
1517
1518
1519/**
1520 * Writes a machine specific register.
1521 *
1522 * @returns Register content.
1523 * @param uRegister Register to write to.
1524 * @param u64Val Value to write.
1525 */
1526#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1527DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1528#else
1529DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1530{
1531 RTUINT64U u;
1532
1533 u.u = u64Val;
1534# if RT_INLINE_ASM_GNU_STYLE
1535 __asm__ __volatile__("wrmsr\n\t"
1536 ::"a" (u.s.Lo),
1537 "d" (u.s.Hi),
1538 "c" (uRegister));
1539
1540# elif RT_INLINE_ASM_USES_INTRIN
1541 __writemsr(uRegister, u.u);
1542
1543# else
1544 __asm
1545 {
1546 mov ecx, [uRegister]
1547 mov edx, [u.s.Hi]
1548 mov eax, [u.s.Lo]
1549 wrmsr
1550 }
1551# endif
1552}
1553#endif
1554
1555
1556/**
1557 * Reads low part of a machine specific register.
1558 *
1559 * @returns Register content.
1560 * @param uRegister Register to read.
1561 */
1562#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1563DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1564#else
1565DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1566{
1567 uint32_t u32;
1568# if RT_INLINE_ASM_GNU_STYLE
1569 __asm__ __volatile__("rdmsr\n\t"
1570 : "=a" (u32)
1571 : "c" (uRegister)
1572 : "edx");
1573
1574# elif RT_INLINE_ASM_USES_INTRIN
1575 u32 = (uint32_t)__readmsr(uRegister);
1576
1577#else
1578 __asm
1579 {
1580 mov ecx, [uRegister]
1581 rdmsr
1582 mov [u32], eax
1583 }
1584# endif
1585
1586 return u32;
1587}
1588#endif
1589
1590
1591/**
1592 * Reads high part of a machine specific register.
1593 *
1594 * @returns Register content.
1595 * @param uRegister Register to read.
1596 */
1597#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1598DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1599#else
1600DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1601{
1602 uint32_t u32;
1603# if RT_INLINE_ASM_GNU_STYLE
1604 __asm__ __volatile__("rdmsr\n\t"
1605 : "=d" (u32)
1606 : "c" (uRegister)
1607 : "eax");
1608
1609# elif RT_INLINE_ASM_USES_INTRIN
1610 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1611
1612# else
1613 __asm
1614 {
1615 mov ecx, [uRegister]
1616 rdmsr
1617 mov [u32], edx
1618 }
1619# endif
1620
1621 return u32;
1622}
1623#endif
1624
1625
1626/**
1627 * Gets dr0.
1628 *
1629 * @returns dr0.
1630 */
1631#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1632DECLASM(RTCCUINTREG) ASMGetDR0(void);
1633#else
1634DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1635{
1636 RTCCUINTREG uDR0;
1637# if RT_INLINE_ASM_USES_INTRIN
1638 uDR0 = __readdr(0);
1639# elif RT_INLINE_ASM_GNU_STYLE
1640# ifdef RT_ARCH_AMD64
1641 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1642# else
1643 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1644# endif
1645# else
1646 __asm
1647 {
1648# ifdef RT_ARCH_AMD64
1649 mov rax, dr0
1650 mov [uDR0], rax
1651# else
1652 mov eax, dr0
1653 mov [uDR0], eax
1654# endif
1655 }
1656# endif
1657 return uDR0;
1658}
1659#endif
1660
1661
1662/**
1663 * Gets dr1.
1664 *
1665 * @returns dr1.
1666 */
1667#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1668DECLASM(RTCCUINTREG) ASMGetDR1(void);
1669#else
1670DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1671{
1672 RTCCUINTREG uDR1;
1673# if RT_INLINE_ASM_USES_INTRIN
1674 uDR1 = __readdr(1);
1675# elif RT_INLINE_ASM_GNU_STYLE
1676# ifdef RT_ARCH_AMD64
1677 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1678# else
1679 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1680# endif
1681# else
1682 __asm
1683 {
1684# ifdef RT_ARCH_AMD64
1685 mov rax, dr1
1686 mov [uDR1], rax
1687# else
1688 mov eax, dr1
1689 mov [uDR1], eax
1690# endif
1691 }
1692# endif
1693 return uDR1;
1694}
1695#endif
1696
1697
1698/**
1699 * Gets dr2.
1700 *
1701 * @returns dr2.
1702 */
1703#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1704DECLASM(RTCCUINTREG) ASMGetDR2(void);
1705#else
1706DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1707{
1708 RTCCUINTREG uDR2;
1709# if RT_INLINE_ASM_USES_INTRIN
1710 uDR2 = __readdr(2);
1711# elif RT_INLINE_ASM_GNU_STYLE
1712# ifdef RT_ARCH_AMD64
1713 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1714# else
1715 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1716# endif
1717# else
1718 __asm
1719 {
1720# ifdef RT_ARCH_AMD64
1721 mov rax, dr2
1722 mov [uDR2], rax
1723# else
1724 mov eax, dr2
1725 mov [uDR2], eax
1726# endif
1727 }
1728# endif
1729 return uDR2;
1730}
1731#endif
1732
1733
1734/**
1735 * Gets dr3.
1736 *
1737 * @returns dr3.
1738 */
1739#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1740DECLASM(RTCCUINTREG) ASMGetDR3(void);
1741#else
1742DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1743{
1744 RTCCUINTREG uDR3;
1745# if RT_INLINE_ASM_USES_INTRIN
1746 uDR3 = __readdr(3);
1747# elif RT_INLINE_ASM_GNU_STYLE
1748# ifdef RT_ARCH_AMD64
1749 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1750# else
1751 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1752# endif
1753# else
1754 __asm
1755 {
1756# ifdef RT_ARCH_AMD64
1757 mov rax, dr3
1758 mov [uDR3], rax
1759# else
1760 mov eax, dr3
1761 mov [uDR3], eax
1762# endif
1763 }
1764# endif
1765 return uDR3;
1766}
1767#endif
1768
1769
1770/**
1771 * Gets dr6.
1772 *
1773 * @returns dr6.
1774 */
1775#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1776DECLASM(RTCCUINTREG) ASMGetDR6(void);
1777#else
1778DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1779{
1780 RTCCUINTREG uDR6;
1781# if RT_INLINE_ASM_USES_INTRIN
1782 uDR6 = __readdr(6);
1783# elif RT_INLINE_ASM_GNU_STYLE
1784# ifdef RT_ARCH_AMD64
1785 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1786# else
1787 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1788# endif
1789# else
1790 __asm
1791 {
1792# ifdef RT_ARCH_AMD64
1793 mov rax, dr6
1794 mov [uDR6], rax
1795# else
1796 mov eax, dr6
1797 mov [uDR6], eax
1798# endif
1799 }
1800# endif
1801 return uDR6;
1802}
1803#endif
1804
1805
1806/**
1807 * Reads and clears DR6.
1808 *
1809 * @returns DR6.
1810 */
1811#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1812DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1813#else
1814DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1815{
1816 RTCCUINTREG uDR6;
1817# if RT_INLINE_ASM_USES_INTRIN
1818 uDR6 = __readdr(6);
1819 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1820# elif RT_INLINE_ASM_GNU_STYLE
1821 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1822# ifdef RT_ARCH_AMD64
1823 __asm__ __volatile__("movq %%dr6, %0\n\t"
1824 "movq %1, %%dr6\n\t"
1825 : "=r" (uDR6)
1826 : "r" (uNewValue));
1827# else
1828 __asm__ __volatile__("movl %%dr6, %0\n\t"
1829 "movl %1, %%dr6\n\t"
1830 : "=r" (uDR6)
1831 : "r" (uNewValue));
1832# endif
1833# else
1834 __asm
1835 {
1836# ifdef RT_ARCH_AMD64
1837 mov rax, dr6
1838 mov [uDR6], rax
1839 mov rcx, rax
1840 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1841 mov dr6, rcx
1842# else
1843 mov eax, dr6
1844 mov [uDR6], eax
1845 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1846 mov dr6, ecx
1847# endif
1848 }
1849# endif
1850 return uDR6;
1851}
1852#endif
1853
1854
1855/**
1856 * Gets dr7.
1857 *
1858 * @returns dr7.
1859 */
1860#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1861DECLASM(RTCCUINTREG) ASMGetDR7(void);
1862#else
1863DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1864{
1865 RTCCUINTREG uDR7;
1866# if RT_INLINE_ASM_USES_INTRIN
1867 uDR7 = __readdr(7);
1868# elif RT_INLINE_ASM_GNU_STYLE
1869# ifdef RT_ARCH_AMD64
1870 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1871# else
1872 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1873# endif
1874# else
1875 __asm
1876 {
1877# ifdef RT_ARCH_AMD64
1878 mov rax, dr7
1879 mov [uDR7], rax
1880# else
1881 mov eax, dr7
1882 mov [uDR7], eax
1883# endif
1884 }
1885# endif
1886 return uDR7;
1887}
1888#endif
1889
1890
1891/**
1892 * Sets dr0.
1893 *
1894 * @param uDRVal Debug register value to write
1895 */
1896#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1897DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1898#else
1899DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1900{
1901# if RT_INLINE_ASM_USES_INTRIN
1902 __writedr(0, uDRVal);
1903# elif RT_INLINE_ASM_GNU_STYLE
1904# ifdef RT_ARCH_AMD64
1905 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1906# else
1907 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1908# endif
1909# else
1910 __asm
1911 {
1912# ifdef RT_ARCH_AMD64
1913 mov rax, [uDRVal]
1914 mov dr0, rax
1915# else
1916 mov eax, [uDRVal]
1917 mov dr0, eax
1918# endif
1919 }
1920# endif
1921}
1922#endif
1923
1924
1925/**
1926 * Sets dr1.
1927 *
1928 * @param uDRVal Debug register value to write
1929 */
1930#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1931DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1932#else
1933DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1934{
1935# if RT_INLINE_ASM_USES_INTRIN
1936 __writedr(1, uDRVal);
1937# elif RT_INLINE_ASM_GNU_STYLE
1938# ifdef RT_ARCH_AMD64
1939 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
1940# else
1941 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
1942# endif
1943# else
1944 __asm
1945 {
1946# ifdef RT_ARCH_AMD64
1947 mov rax, [uDRVal]
1948 mov dr1, rax
1949# else
1950 mov eax, [uDRVal]
1951 mov dr1, eax
1952# endif
1953 }
1954# endif
1955}
1956#endif
1957
1958
1959/**
1960 * Sets dr2.
1961 *
1962 * @param uDRVal Debug register value to write
1963 */
1964#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1965DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
1966#else
1967DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
1968{
1969# if RT_INLINE_ASM_USES_INTRIN
1970 __writedr(2, uDRVal);
1971# elif RT_INLINE_ASM_GNU_STYLE
1972# ifdef RT_ARCH_AMD64
1973 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
1974# else
1975 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
1976# endif
1977# else
1978 __asm
1979 {
1980# ifdef RT_ARCH_AMD64
1981 mov rax, [uDRVal]
1982 mov dr2, rax
1983# else
1984 mov eax, [uDRVal]
1985 mov dr2, eax
1986# endif
1987 }
1988# endif
1989}
1990#endif
1991
1992
1993/**
1994 * Sets dr3.
1995 *
1996 * @param uDRVal Debug register value to write
1997 */
1998#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1999DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2000#else
2001DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2002{
2003# if RT_INLINE_ASM_USES_INTRIN
2004 __writedr(3, uDRVal);
2005# elif RT_INLINE_ASM_GNU_STYLE
2006# ifdef RT_ARCH_AMD64
2007 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2008# else
2009 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2010# endif
2011# else
2012 __asm
2013 {
2014# ifdef RT_ARCH_AMD64
2015 mov rax, [uDRVal]
2016 mov dr3, rax
2017# else
2018 mov eax, [uDRVal]
2019 mov dr3, eax
2020# endif
2021 }
2022# endif
2023}
2024#endif
2025
2026
2027/**
2028 * Sets dr6.
2029 *
2030 * @param uDRVal Debug register value to write
2031 */
2032#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2033DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2034#else
2035DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2036{
2037# if RT_INLINE_ASM_USES_INTRIN
2038 __writedr(6, uDRVal);
2039# elif RT_INLINE_ASM_GNU_STYLE
2040# ifdef RT_ARCH_AMD64
2041 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2042# else
2043 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2044# endif
2045# else
2046 __asm
2047 {
2048# ifdef RT_ARCH_AMD64
2049 mov rax, [uDRVal]
2050 mov dr6, rax
2051# else
2052 mov eax, [uDRVal]
2053 mov dr6, eax
2054# endif
2055 }
2056# endif
2057}
2058#endif
2059
2060
2061/**
2062 * Sets dr7.
2063 *
2064 * @param uDRVal Debug register value to write
2065 */
2066#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2067DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2068#else
2069DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2070{
2071# if RT_INLINE_ASM_USES_INTRIN
2072 __writedr(7, uDRVal);
2073# elif RT_INLINE_ASM_GNU_STYLE
2074# ifdef RT_ARCH_AMD64
2075 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2076# else
2077 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2078# endif
2079# else
2080 __asm
2081 {
2082# ifdef RT_ARCH_AMD64
2083 mov rax, [uDRVal]
2084 mov dr7, rax
2085# else
2086 mov eax, [uDRVal]
2087 mov dr7, eax
2088# endif
2089 }
2090# endif
2091}
2092#endif
2093
2094
2095/**
2096 * Compiler memory barrier.
2097 *
2098 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2099 * values or any outstanding writes when returning from this function.
2100 *
2101 * This function must be used if non-volatile data is modified by a
2102 * device or the VMM. Typical cases are port access, MMIO access,
2103 * trapping instruction, etc.
2104 */
2105#if RT_INLINE_ASM_GNU_STYLE
2106# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
2107#elif RT_INLINE_ASM_USES_INTRIN
2108# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2109#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2110DECLINLINE(void) ASMCompilerBarrier(void)
2111{
2112 __asm
2113 {
2114 }
2115}
2116#endif
2117
2118
2119/**
2120 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2121 *
2122 * @param Port I/O port to write to.
2123 * @param u8 8-bit integer to write.
2124 */
2125#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2126DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2127#else
2128DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2129{
2130# if RT_INLINE_ASM_GNU_STYLE
2131 __asm__ __volatile__("outb %b1, %w0\n\t"
2132 :: "Nd" (Port),
2133 "a" (u8));
2134
2135# elif RT_INLINE_ASM_USES_INTRIN
2136 __outbyte(Port, u8);
2137
2138# else
2139 __asm
2140 {
2141 mov dx, [Port]
2142 mov al, [u8]
2143 out dx, al
2144 }
2145# endif
2146}
2147#endif
2148
2149
2150/**
2151 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2152 *
2153 * @returns 8-bit integer.
2154 * @param Port I/O port to read from.
2155 */
2156#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2157DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2158#else
2159DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2160{
2161 uint8_t u8;
2162# if RT_INLINE_ASM_GNU_STYLE
2163 __asm__ __volatile__("inb %w1, %b0\n\t"
2164 : "=a" (u8)
2165 : "Nd" (Port));
2166
2167# elif RT_INLINE_ASM_USES_INTRIN
2168 u8 = __inbyte(Port);
2169
2170# else
2171 __asm
2172 {
2173 mov dx, [Port]
2174 in al, dx
2175 mov [u8], al
2176 }
2177# endif
2178 return u8;
2179}
2180#endif
2181
2182
2183/**
2184 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2185 *
2186 * @param Port I/O port to write to.
2187 * @param u16 16-bit integer to write.
2188 */
2189#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2190DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2191#else
2192DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2193{
2194# if RT_INLINE_ASM_GNU_STYLE
2195 __asm__ __volatile__("outw %w1, %w0\n\t"
2196 :: "Nd" (Port),
2197 "a" (u16));
2198
2199# elif RT_INLINE_ASM_USES_INTRIN
2200 __outword(Port, u16);
2201
2202# else
2203 __asm
2204 {
2205 mov dx, [Port]
2206 mov ax, [u16]
2207 out dx, ax
2208 }
2209# endif
2210}
2211#endif
2212
2213
2214/**
2215 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2216 *
2217 * @returns 16-bit integer.
2218 * @param Port I/O port to read from.
2219 */
2220#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2221DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2222#else
2223DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2224{
2225 uint16_t u16;
2226# if RT_INLINE_ASM_GNU_STYLE
2227 __asm__ __volatile__("inw %w1, %w0\n\t"
2228 : "=a" (u16)
2229 : "Nd" (Port));
2230
2231# elif RT_INLINE_ASM_USES_INTRIN
2232 u16 = __inword(Port);
2233
2234# else
2235 __asm
2236 {
2237 mov dx, [Port]
2238 in ax, dx
2239 mov [u16], ax
2240 }
2241# endif
2242 return u16;
2243}
2244#endif
2245
2246
2247/**
2248 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2249 *
2250 * @param Port I/O port to write to.
2251 * @param u32 32-bit integer to write.
2252 */
2253#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2254DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2255#else
2256DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2257{
2258# if RT_INLINE_ASM_GNU_STYLE
2259 __asm__ __volatile__("outl %1, %w0\n\t"
2260 :: "Nd" (Port),
2261 "a" (u32));
2262
2263# elif RT_INLINE_ASM_USES_INTRIN
2264 __outdword(Port, u32);
2265
2266# else
2267 __asm
2268 {
2269 mov dx, [Port]
2270 mov eax, [u32]
2271 out dx, eax
2272 }
2273# endif
2274}
2275#endif
2276
2277
2278/**
2279 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2280 *
2281 * @returns 32-bit integer.
2282 * @param Port I/O port to read from.
2283 */
2284#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2285DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2286#else
2287DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2288{
2289 uint32_t u32;
2290# if RT_INLINE_ASM_GNU_STYLE
2291 __asm__ __volatile__("inl %w1, %0\n\t"
2292 : "=a" (u32)
2293 : "Nd" (Port));
2294
2295# elif RT_INLINE_ASM_USES_INTRIN
2296 u32 = __indword(Port);
2297
2298# else
2299 __asm
2300 {
2301 mov dx, [Port]
2302 in eax, dx
2303 mov [u32], eax
2304 }
2305# endif
2306 return u32;
2307}
2308#endif
2309
2310
2311/**
2312 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2313 *
2314 * @param Port I/O port to write to.
2315 * @param pau8 Pointer to the string buffer.
2316 * @param c The number of items to write.
2317 */
2318#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2319DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2320#else
2321DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2322{
2323# if RT_INLINE_ASM_GNU_STYLE
2324 __asm__ __volatile__("rep; outsb\n\t"
2325 : "+S" (pau8),
2326 "+c" (c)
2327 : "d" (Port));
2328
2329# elif RT_INLINE_ASM_USES_INTRIN
2330 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2331
2332# else
2333 __asm
2334 {
2335 mov dx, [Port]
2336 mov ecx, [c]
2337 mov eax, [pau8]
2338 xchg esi, eax
2339 rep outsb
2340 xchg esi, eax
2341 }
2342# endif
2343}
2344#endif
2345
2346
2347/**
2348 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2349 *
2350 * @param Port I/O port to read from.
2351 * @param pau8 Pointer to the string buffer (output).
2352 * @param c The number of items to read.
2353 */
2354#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2355DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2356#else
2357DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2358{
2359# if RT_INLINE_ASM_GNU_STYLE
2360 __asm__ __volatile__("rep; insb\n\t"
2361 : "+D" (pau8),
2362 "+c" (c)
2363 : "d" (Port));
2364
2365# elif RT_INLINE_ASM_USES_INTRIN
2366 __inbytestring(Port, pau8, (unsigned long)c);
2367
2368# else
2369 __asm
2370 {
2371 mov dx, [Port]
2372 mov ecx, [c]
2373 mov eax, [pau8]
2374 xchg edi, eax
2375 rep insb
2376 xchg edi, eax
2377 }
2378# endif
2379}
2380#endif
2381
2382
2383/**
2384 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2385 *
2386 * @param Port I/O port to write to.
2387 * @param pau16 Pointer to the string buffer.
2388 * @param c The number of items to write.
2389 */
2390#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2391DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2392#else
2393DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2394{
2395# if RT_INLINE_ASM_GNU_STYLE
2396 __asm__ __volatile__("rep; outsw\n\t"
2397 : "+S" (pau16),
2398 "+c" (c)
2399 : "d" (Port));
2400
2401# elif RT_INLINE_ASM_USES_INTRIN
2402 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2403
2404# else
2405 __asm
2406 {
2407 mov dx, [Port]
2408 mov ecx, [c]
2409 mov eax, [pau16]
2410 xchg esi, eax
2411 rep outsw
2412 xchg esi, eax
2413 }
2414# endif
2415}
2416#endif
2417
2418
2419/**
2420 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2421 *
2422 * @param Port I/O port to read from.
2423 * @param pau16 Pointer to the string buffer (output).
2424 * @param c The number of items to read.
2425 */
2426#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2427DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2428#else
2429DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2430{
2431# if RT_INLINE_ASM_GNU_STYLE
2432 __asm__ __volatile__("rep; insw\n\t"
2433 : "+D" (pau16),
2434 "+c" (c)
2435 : "d" (Port));
2436
2437# elif RT_INLINE_ASM_USES_INTRIN
2438 __inwordstring(Port, pau16, (unsigned long)c);
2439
2440# else
2441 __asm
2442 {
2443 mov dx, [Port]
2444 mov ecx, [c]
2445 mov eax, [pau16]
2446 xchg edi, eax
2447 rep insw
2448 xchg edi, eax
2449 }
2450# endif
2451}
2452#endif
2453
2454
2455/**
2456 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2457 *
2458 * @param Port I/O port to write to.
2459 * @param pau32 Pointer to the string buffer.
2460 * @param c The number of items to write.
2461 */
2462#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2463DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2464#else
2465DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2466{
2467# if RT_INLINE_ASM_GNU_STYLE
2468 __asm__ __volatile__("rep; outsl\n\t"
2469 : "+S" (pau32),
2470 "+c" (c)
2471 : "d" (Port));
2472
2473# elif RT_INLINE_ASM_USES_INTRIN
2474 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2475
2476# else
2477 __asm
2478 {
2479 mov dx, [Port]
2480 mov ecx, [c]
2481 mov eax, [pau32]
2482 xchg esi, eax
2483 rep outsd
2484 xchg esi, eax
2485 }
2486# endif
2487}
2488#endif
2489
2490
2491/**
2492 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2493 *
2494 * @param Port I/O port to read from.
2495 * @param pau32 Pointer to the string buffer (output).
2496 * @param c The number of items to read.
2497 */
2498#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2499DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2500#else
2501DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2502{
2503# if RT_INLINE_ASM_GNU_STYLE
2504 __asm__ __volatile__("rep; insl\n\t"
2505 : "+D" (pau32),
2506 "+c" (c)
2507 : "d" (Port));
2508
2509# elif RT_INLINE_ASM_USES_INTRIN
2510 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2511
2512# else
2513 __asm
2514 {
2515 mov dx, [Port]
2516 mov ecx, [c]
2517 mov eax, [pau32]
2518 xchg edi, eax
2519 rep insd
2520 xchg edi, eax
2521 }
2522# endif
2523}
2524#endif
2525
2526
2527/**
2528 * Atomically Exchange an unsigned 8-bit value, ordered.
2529 *
2530 * @returns Current *pu8 value
2531 * @param pu8 Pointer to the 8-bit variable to update.
2532 * @param u8 The 8-bit value to assign to *pu8.
2533 */
2534#if RT_INLINE_ASM_EXTERNAL
2535DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2536#else
2537DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2538{
2539# if RT_INLINE_ASM_GNU_STYLE
2540 __asm__ __volatile__("xchgb %0, %1\n\t"
2541 : "=m" (*pu8),
2542 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2543 : "1" (u8),
2544 "m" (*pu8));
2545# else
2546 __asm
2547 {
2548# ifdef RT_ARCH_AMD64
2549 mov rdx, [pu8]
2550 mov al, [u8]
2551 xchg [rdx], al
2552 mov [u8], al
2553# else
2554 mov edx, [pu8]
2555 mov al, [u8]
2556 xchg [edx], al
2557 mov [u8], al
2558# endif
2559 }
2560# endif
2561 return u8;
2562}
2563#endif
2564
2565
2566/**
2567 * Atomically Exchange a signed 8-bit value, ordered.
2568 *
2569 * @returns Current *pu8 value
2570 * @param pi8 Pointer to the 8-bit variable to update.
2571 * @param i8 The 8-bit value to assign to *pi8.
2572 */
2573DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2574{
2575 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2576}
2577
2578
2579/**
2580 * Atomically Exchange a bool value, ordered.
2581 *
2582 * @returns Current *pf value
2583 * @param pf Pointer to the 8-bit variable to update.
2584 * @param f The 8-bit value to assign to *pi8.
2585 */
2586DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2587{
2588#ifdef _MSC_VER
2589 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2590#else
2591 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2592#endif
2593}
2594
2595
2596/**
2597 * Atomically Exchange an unsigned 16-bit value, ordered.
2598 *
2599 * @returns Current *pu16 value
2600 * @param pu16 Pointer to the 16-bit variable to update.
2601 * @param u16 The 16-bit value to assign to *pu16.
2602 */
2603#if RT_INLINE_ASM_EXTERNAL
2604DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2605#else
2606DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2607{
2608# if RT_INLINE_ASM_GNU_STYLE
2609 __asm__ __volatile__("xchgw %0, %1\n\t"
2610 : "=m" (*pu16),
2611 "=r" (u16)
2612 : "1" (u16),
2613 "m" (*pu16));
2614# else
2615 __asm
2616 {
2617# ifdef RT_ARCH_AMD64
2618 mov rdx, [pu16]
2619 mov ax, [u16]
2620 xchg [rdx], ax
2621 mov [u16], ax
2622# else
2623 mov edx, [pu16]
2624 mov ax, [u16]
2625 xchg [edx], ax
2626 mov [u16], ax
2627# endif
2628 }
2629# endif
2630 return u16;
2631}
2632#endif
2633
2634
2635/**
2636 * Atomically Exchange a signed 16-bit value, ordered.
2637 *
2638 * @returns Current *pu16 value
2639 * @param pi16 Pointer to the 16-bit variable to update.
2640 * @param i16 The 16-bit value to assign to *pi16.
2641 */
2642DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2643{
2644 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2645}
2646
2647
2648/**
2649 * Atomically Exchange an unsigned 32-bit value, ordered.
2650 *
2651 * @returns Current *pu32 value
2652 * @param pu32 Pointer to the 32-bit variable to update.
2653 * @param u32 The 32-bit value to assign to *pu32.
2654 */
2655#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2656DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2657#else
2658DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2659{
2660# if RT_INLINE_ASM_GNU_STYLE
2661 __asm__ __volatile__("xchgl %0, %1\n\t"
2662 : "=m" (*pu32),
2663 "=r" (u32)
2664 : "1" (u32),
2665 "m" (*pu32));
2666
2667# elif RT_INLINE_ASM_USES_INTRIN
2668 u32 = _InterlockedExchange((long *)pu32, u32);
2669
2670# else
2671 __asm
2672 {
2673# ifdef RT_ARCH_AMD64
2674 mov rdx, [pu32]
2675 mov eax, u32
2676 xchg [rdx], eax
2677 mov [u32], eax
2678# else
2679 mov edx, [pu32]
2680 mov eax, u32
2681 xchg [edx], eax
2682 mov [u32], eax
2683# endif
2684 }
2685# endif
2686 return u32;
2687}
2688#endif
2689
2690
2691/**
2692 * Atomically Exchange a signed 32-bit value, ordered.
2693 *
2694 * @returns Current *pu32 value
2695 * @param pi32 Pointer to the 32-bit variable to update.
2696 * @param i32 The 32-bit value to assign to *pi32.
2697 */
2698DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2699{
2700 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2701}
2702
2703
2704/**
2705 * Atomically Exchange an unsigned 64-bit value, ordered.
2706 *
2707 * @returns Current *pu64 value
2708 * @param pu64 Pointer to the 64-bit variable to update.
2709 * @param u64 The 64-bit value to assign to *pu64.
2710 */
2711#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2712DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2713#else
2714DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2715{
2716# if defined(RT_ARCH_AMD64)
2717# if RT_INLINE_ASM_USES_INTRIN
2718 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2719
2720# elif RT_INLINE_ASM_GNU_STYLE
2721 __asm__ __volatile__("xchgq %0, %1\n\t"
2722 : "=m" (*pu64),
2723 "=r" (u64)
2724 : "1" (u64),
2725 "m" (*pu64));
2726# else
2727 __asm
2728 {
2729 mov rdx, [pu64]
2730 mov rax, [u64]
2731 xchg [rdx], rax
2732 mov [u64], rax
2733 }
2734# endif
2735# else /* !RT_ARCH_AMD64 */
2736# if RT_INLINE_ASM_GNU_STYLE
2737# if defined(PIC) || defined(__PIC__)
2738 uint32_t u32EBX = (uint32_t)u64;
2739 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2740 "xchgl %%ebx, %3\n\t"
2741 "1:\n\t"
2742 "lock; cmpxchg8b (%5)\n\t"
2743 "jnz 1b\n\t"
2744 "movl %3, %%ebx\n\t"
2745 /*"xchgl %%esi, %5\n\t"*/
2746 : "=A" (u64),
2747 "=m" (*pu64)
2748 : "0" (*pu64),
2749 "m" ( u32EBX ),
2750 "c" ( (uint32_t)(u64 >> 32) ),
2751 "S" (pu64));
2752# else /* !PIC */
2753 __asm__ __volatile__("1:\n\t"
2754 "lock; cmpxchg8b %1\n\t"
2755 "jnz 1b\n\t"
2756 : "=A" (u64),
2757 "=m" (*pu64)
2758 : "0" (*pu64),
2759 "b" ( (uint32_t)u64 ),
2760 "c" ( (uint32_t)(u64 >> 32) ));
2761# endif
2762# else
2763 __asm
2764 {
2765 mov ebx, dword ptr [u64]
2766 mov ecx, dword ptr [u64 + 4]
2767 mov edi, pu64
2768 mov eax, dword ptr [edi]
2769 mov edx, dword ptr [edi + 4]
2770 retry:
2771 lock cmpxchg8b [edi]
2772 jnz retry
2773 mov dword ptr [u64], eax
2774 mov dword ptr [u64 + 4], edx
2775 }
2776# endif
2777# endif /* !RT_ARCH_AMD64 */
2778 return u64;
2779}
2780#endif
2781
2782
2783/**
2784 * Atomically Exchange an signed 64-bit value, ordered.
2785 *
2786 * @returns Current *pi64 value
2787 * @param pi64 Pointer to the 64-bit variable to update.
2788 * @param i64 The 64-bit value to assign to *pi64.
2789 */
2790DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2791{
2792 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2793}
2794
2795
2796#ifdef RT_ARCH_AMD64
2797/**
2798 * Atomically Exchange an unsigned 128-bit value, ordered.
2799 *
2800 * @returns Current *pu128.
2801 * @param pu128 Pointer to the 128-bit variable to update.
2802 * @param u128 The 128-bit value to assign to *pu128.
2803 *
2804 * @remark We cannot really assume that any hardware supports this. Nor do I have
2805 * GAS support for it. So, for the time being we'll BREAK the atomic
2806 * bit of this function and use two 64-bit exchanges instead.
2807 */
2808# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2809DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2810# else
2811DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2812{
2813 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2814 {
2815 /** @todo this is clumsy code */
2816 RTUINT128U u128Ret;
2817 u128Ret.u = u128;
2818 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2819 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2820 return u128Ret.u;
2821 }
2822#if 0 /* later? */
2823 else
2824 {
2825# if RT_INLINE_ASM_GNU_STYLE
2826 __asm__ __volatile__("1:\n\t"
2827 "lock; cmpxchg8b %1\n\t"
2828 "jnz 1b\n\t"
2829 : "=A" (u128),
2830 "=m" (*pu128)
2831 : "0" (*pu128),
2832 "b" ( (uint64_t)u128 ),
2833 "c" ( (uint64_t)(u128 >> 64) ));
2834# else
2835 __asm
2836 {
2837 mov rbx, dword ptr [u128]
2838 mov rcx, dword ptr [u128 + 8]
2839 mov rdi, pu128
2840 mov rax, dword ptr [rdi]
2841 mov rdx, dword ptr [rdi + 8]
2842 retry:
2843 lock cmpxchg16b [rdi]
2844 jnz retry
2845 mov dword ptr [u128], rax
2846 mov dword ptr [u128 + 8], rdx
2847 }
2848# endif
2849 }
2850 return u128;
2851#endif
2852}
2853# endif
2854#endif /* RT_ARCH_AMD64 */
2855
2856
2857/**
2858 * Atomically Exchange a pointer value, ordered.
2859 *
2860 * @returns Current *ppv value
2861 * @param ppv Pointer to the pointer variable to update.
2862 * @param pv The pointer value to assign to *ppv.
2863 */
2864DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2865{
2866#if ARCH_BITS == 32
2867 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2868#elif ARCH_BITS == 64
2869 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2870#else
2871# error "ARCH_BITS is bogus"
2872#endif
2873}
2874
2875
2876/**
2877 * Atomically Exchange a raw-mode context pointer value, ordered.
2878 *
2879 * @returns Current *ppv value
2880 * @param ppvRC Pointer to the pointer variable to update.
2881 * @param pvRC The pointer value to assign to *ppv.
2882 */
2883DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2884{
2885 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2886}
2887
2888
2889/**
2890 * Atomically Exchange a ring-0 pointer value, ordered.
2891 *
2892 * @returns Current *ppv value
2893 * @param ppvR0 Pointer to the pointer variable to update.
2894 * @param pvR0 The pointer value to assign to *ppv.
2895 */
2896DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2897{
2898#if R0_ARCH_BITS == 32
2899 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2900#elif R0_ARCH_BITS == 64
2901 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2902#else
2903# error "R0_ARCH_BITS is bogus"
2904#endif
2905}
2906
2907
2908/**
2909 * Atomically Exchange a ring-3 pointer value, ordered.
2910 *
2911 * @returns Current *ppv value
2912 * @param ppvR3 Pointer to the pointer variable to update.
2913 * @param pvR3 The pointer value to assign to *ppv.
2914 */
2915DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2916{
2917#if R3_ARCH_BITS == 32
2918 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2919#elif R3_ARCH_BITS == 64
2920 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2921#else
2922# error "R3_ARCH_BITS is bogus"
2923#endif
2924}
2925
2926
2927/** @def ASMAtomicXchgHandle
2928 * Atomically Exchange a typical IPRT handle value, ordered.
2929 *
2930 * @param ph Pointer to the value to update.
2931 * @param hNew The new value to assigned to *pu.
2932 * @param phRes Where to store the current *ph value.
2933 *
2934 * @remarks This doesn't currently work for all handles (like RTFILE).
2935 */
2936#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2937 do { \
2938 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (const void *)(hNew)); \
2939 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2940 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2941 } while (0)
2942
2943
2944/**
2945 * Atomically Exchange a value which size might differ
2946 * between platforms or compilers, ordered.
2947 *
2948 * @param pu Pointer to the variable to update.
2949 * @param uNew The value to assign to *pu.
2950 * @todo This is busted as its missing the result argument.
2951 */
2952#define ASMAtomicXchgSize(pu, uNew) \
2953 do { \
2954 switch (sizeof(*(pu))) { \
2955 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2956 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2957 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2958 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2959 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2960 } \
2961 } while (0)
2962
2963/**
2964 * Atomically Exchange a value which size might differ
2965 * between platforms or compilers, ordered.
2966 *
2967 * @param pu Pointer to the variable to update.
2968 * @param uNew The value to assign to *pu.
2969 * @param puRes Where to store the current *pu value.
2970 */
2971#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2972 do { \
2973 switch (sizeof(*(pu))) { \
2974 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2975 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2976 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2977 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2978 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2979 } \
2980 } while (0)
2981
2982
2983/**
2984 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2985 *
2986 * @returns true if xchg was done.
2987 * @returns false if xchg wasn't done.
2988 *
2989 * @param pu32 Pointer to the value to update.
2990 * @param u32New The new value to assigned to *pu32.
2991 * @param u32Old The old value to *pu32 compare with.
2992 */
2993#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2994DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2995#else
2996DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2997{
2998# if RT_INLINE_ASM_GNU_STYLE
2999 uint8_t u8Ret;
3000 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3001 "setz %1\n\t"
3002 : "=m" (*pu32),
3003 "=qm" (u8Ret),
3004 "=a" (u32Old)
3005 : "r" (u32New),
3006 "2" (u32Old),
3007 "m" (*pu32));
3008 return (bool)u8Ret;
3009
3010# elif RT_INLINE_ASM_USES_INTRIN
3011 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3012
3013# else
3014 uint32_t u32Ret;
3015 __asm
3016 {
3017# ifdef RT_ARCH_AMD64
3018 mov rdx, [pu32]
3019# else
3020 mov edx, [pu32]
3021# endif
3022 mov eax, [u32Old]
3023 mov ecx, [u32New]
3024# ifdef RT_ARCH_AMD64
3025 lock cmpxchg [rdx], ecx
3026# else
3027 lock cmpxchg [edx], ecx
3028# endif
3029 setz al
3030 movzx eax, al
3031 mov [u32Ret], eax
3032 }
3033 return !!u32Ret;
3034# endif
3035}
3036#endif
3037
3038
3039/**
3040 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3041 *
3042 * @returns true if xchg was done.
3043 * @returns false if xchg wasn't done.
3044 *
3045 * @param pi32 Pointer to the value to update.
3046 * @param i32New The new value to assigned to *pi32.
3047 * @param i32Old The old value to *pi32 compare with.
3048 */
3049DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3050{
3051 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3052}
3053
3054
3055/**
3056 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3057 *
3058 * @returns true if xchg was done.
3059 * @returns false if xchg wasn't done.
3060 *
3061 * @param pu64 Pointer to the 64-bit variable to update.
3062 * @param u64New The 64-bit value to assign to *pu64.
3063 * @param u64Old The value to compare with.
3064 */
3065#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3066DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3067#else
3068DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3069{
3070# if RT_INLINE_ASM_USES_INTRIN
3071 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3072
3073# elif defined(RT_ARCH_AMD64)
3074# if RT_INLINE_ASM_GNU_STYLE
3075 uint8_t u8Ret;
3076 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3077 "setz %1\n\t"
3078 : "=m" (*pu64),
3079 "=qm" (u8Ret),
3080 "=a" (u64Old)
3081 : "r" (u64New),
3082 "2" (u64Old),
3083 "m" (*pu64));
3084 return (bool)u8Ret;
3085# else
3086 bool fRet;
3087 __asm
3088 {
3089 mov rdx, [pu32]
3090 mov rax, [u64Old]
3091 mov rcx, [u64New]
3092 lock cmpxchg [rdx], rcx
3093 setz al
3094 mov [fRet], al
3095 }
3096 return fRet;
3097# endif
3098# else /* !RT_ARCH_AMD64 */
3099 uint32_t u32Ret;
3100# if RT_INLINE_ASM_GNU_STYLE
3101# if defined(PIC) || defined(__PIC__)
3102 uint32_t u32EBX = (uint32_t)u64New;
3103 uint32_t u32Spill;
3104 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3105 "lock; cmpxchg8b (%6)\n\t"
3106 "setz %%al\n\t"
3107 "movl %4, %%ebx\n\t"
3108 "movzbl %%al, %%eax\n\t"
3109 : "=a" (u32Ret),
3110 "=d" (u32Spill),
3111# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3112 "+m" (*pu64)
3113# else
3114 "=m" (*pu64)
3115# endif
3116 : "A" (u64Old),
3117 "m" ( u32EBX ),
3118 "c" ( (uint32_t)(u64New >> 32) ),
3119 "S" (pu64));
3120# else /* !PIC */
3121 uint32_t u32Spill;
3122 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3123 "setz %%al\n\t"
3124 "movzbl %%al, %%eax\n\t"
3125 : "=a" (u32Ret),
3126 "=d" (u32Spill),
3127 "+m" (*pu64)
3128 : "A" (u64Old),
3129 "b" ( (uint32_t)u64New ),
3130 "c" ( (uint32_t)(u64New >> 32) ));
3131# endif
3132 return (bool)u32Ret;
3133# else
3134 __asm
3135 {
3136 mov ebx, dword ptr [u64New]
3137 mov ecx, dword ptr [u64New + 4]
3138 mov edi, [pu64]
3139 mov eax, dword ptr [u64Old]
3140 mov edx, dword ptr [u64Old + 4]
3141 lock cmpxchg8b [edi]
3142 setz al
3143 movzx eax, al
3144 mov dword ptr [u32Ret], eax
3145 }
3146 return !!u32Ret;
3147# endif
3148# endif /* !RT_ARCH_AMD64 */
3149}
3150#endif
3151
3152
3153/**
3154 * Atomically Compare and exchange a signed 64-bit value, ordered.
3155 *
3156 * @returns true if xchg was done.
3157 * @returns false if xchg wasn't done.
3158 *
3159 * @param pi64 Pointer to the 64-bit variable to update.
3160 * @param i64 The 64-bit value to assign to *pu64.
3161 * @param i64Old The value to compare with.
3162 */
3163DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3164{
3165 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3166}
3167
3168
3169/**
3170 * Atomically Compare and Exchange a pointer value, ordered.
3171 *
3172 * @returns true if xchg was done.
3173 * @returns false if xchg wasn't done.
3174 *
3175 * @param ppv Pointer to the value to update.
3176 * @param pvNew The new value to assigned to *ppv.
3177 * @param pvOld The old value to *ppv compare with.
3178 */
3179DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3180{
3181#if ARCH_BITS == 32
3182 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3183#elif ARCH_BITS == 64
3184 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3185#else
3186# error "ARCH_BITS is bogus"
3187#endif
3188}
3189
3190
3191/** @def ASMAtomicCmpXchgHandle
3192 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3193 *
3194 * @param ph Pointer to the value to update.
3195 * @param hNew The new value to assigned to *pu.
3196 * @param hOld The old value to *pu compare with.
3197 * @param fRc Where to store the result.
3198 *
3199 * @remarks This doesn't currently work for all handles (like RTFILE).
3200 */
3201#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3202 do { \
3203 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
3204 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3205 } while (0)
3206
3207
3208/** @def ASMAtomicCmpXchgSize
3209 * Atomically Compare and Exchange a value which size might differ
3210 * between platforms or compilers, ordered.
3211 *
3212 * @param pu Pointer to the value to update.
3213 * @param uNew The new value to assigned to *pu.
3214 * @param uOld The old value to *pu compare with.
3215 * @param fRc Where to store the result.
3216 */
3217#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3218 do { \
3219 switch (sizeof(*(pu))) { \
3220 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3221 break; \
3222 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3223 break; \
3224 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3225 (fRc) = false; \
3226 break; \
3227 } \
3228 } while (0)
3229
3230
3231/**
3232 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3233 * passes back old value, ordered.
3234 *
3235 * @returns true if xchg was done.
3236 * @returns false if xchg wasn't done.
3237 *
3238 * @param pu32 Pointer to the value to update.
3239 * @param u32New The new value to assigned to *pu32.
3240 * @param u32Old The old value to *pu32 compare with.
3241 * @param pu32Old Pointer store the old value at.
3242 */
3243#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3244DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3245#else
3246DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3247{
3248# if RT_INLINE_ASM_GNU_STYLE
3249 uint8_t u8Ret;
3250 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3251 "setz %1\n\t"
3252 : "=m" (*pu32),
3253 "=qm" (u8Ret),
3254 "=a" (*pu32Old)
3255 : "r" (u32New),
3256 "a" (u32Old),
3257 "m" (*pu32));
3258 return (bool)u8Ret;
3259
3260# elif RT_INLINE_ASM_USES_INTRIN
3261 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3262
3263# else
3264 uint32_t u32Ret;
3265 __asm
3266 {
3267# ifdef RT_ARCH_AMD64
3268 mov rdx, [pu32]
3269# else
3270 mov edx, [pu32]
3271# endif
3272 mov eax, [u32Old]
3273 mov ecx, [u32New]
3274# ifdef RT_ARCH_AMD64
3275 lock cmpxchg [rdx], ecx
3276 mov rdx, [pu32Old]
3277 mov [rdx], eax
3278# else
3279 lock cmpxchg [edx], ecx
3280 mov edx, [pu32Old]
3281 mov [edx], eax
3282# endif
3283 setz al
3284 movzx eax, al
3285 mov [u32Ret], eax
3286 }
3287 return !!u32Ret;
3288# endif
3289}
3290#endif
3291
3292
3293/**
3294 * Atomically Compare and Exchange a signed 32-bit value, additionally
3295 * passes back old value, ordered.
3296 *
3297 * @returns true if xchg was done.
3298 * @returns false if xchg wasn't done.
3299 *
3300 * @param pi32 Pointer to the value to update.
3301 * @param i32New The new value to assigned to *pi32.
3302 * @param i32Old The old value to *pi32 compare with.
3303 * @param pi32Old Pointer store the old value at.
3304 */
3305DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3306{
3307 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3308}
3309
3310
3311/**
3312 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3313 * passing back old value, ordered.
3314 *
3315 * @returns true if xchg was done.
3316 * @returns false if xchg wasn't done.
3317 *
3318 * @param pu64 Pointer to the 64-bit variable to update.
3319 * @param u64New The 64-bit value to assign to *pu64.
3320 * @param u64Old The value to compare with.
3321 * @param pu64Old Pointer store the old value at.
3322 */
3323#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3324DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3325#else
3326DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3327{
3328# if RT_INLINE_ASM_USES_INTRIN
3329 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3330
3331# elif defined(RT_ARCH_AMD64)
3332# if RT_INLINE_ASM_GNU_STYLE
3333 uint8_t u8Ret;
3334 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3335 "setz %1\n\t"
3336 : "=m" (*pu64),
3337 "=qm" (u8Ret),
3338 "=a" (*pu64Old)
3339 : "r" (u64New),
3340 "a" (u64Old),
3341 "m" (*pu64));
3342 return (bool)u8Ret;
3343# else
3344 bool fRet;
3345 __asm
3346 {
3347 mov rdx, [pu32]
3348 mov rax, [u64Old]
3349 mov rcx, [u64New]
3350 lock cmpxchg [rdx], rcx
3351 mov rdx, [pu64Old]
3352 mov [rdx], rax
3353 setz al
3354 mov [fRet], al
3355 }
3356 return fRet;
3357# endif
3358# else /* !RT_ARCH_AMD64 */
3359# if RT_INLINE_ASM_GNU_STYLE
3360 uint64_t u64Ret;
3361# if defined(PIC) || defined(__PIC__)
3362 /* NB: this code uses a memory clobber description, because the clean
3363 * solution with an output value for *pu64 makes gcc run out of registers.
3364 * This will cause suboptimal code, and anyone with a better solution is
3365 * welcome to improve this. */
3366 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3367 "lock; cmpxchg8b %3\n\t"
3368 "xchgl %%ebx, %1\n\t"
3369 : "=A" (u64Ret)
3370 : "DS" ((uint32_t)u64New),
3371 "c" ((uint32_t)(u64New >> 32)),
3372 "m" (*pu64),
3373 "0" (u64Old)
3374 : "memory" );
3375# else /* !PIC */
3376 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3377 : "=A" (u64Ret),
3378 "=m" (*pu64)
3379 : "b" ((uint32_t)u64New),
3380 "c" ((uint32_t)(u64New >> 32)),
3381 "m" (*pu64),
3382 "0" (u64Old));
3383# endif
3384 *pu64Old = u64Ret;
3385 return u64Ret == u64Old;
3386# else
3387 uint32_t u32Ret;
3388 __asm
3389 {
3390 mov ebx, dword ptr [u64New]
3391 mov ecx, dword ptr [u64New + 4]
3392 mov edi, [pu64]
3393 mov eax, dword ptr [u64Old]
3394 mov edx, dword ptr [u64Old + 4]
3395 lock cmpxchg8b [edi]
3396 mov ebx, [pu64Old]
3397 mov [ebx], eax
3398 setz al
3399 movzx eax, al
3400 add ebx, 4
3401 mov [ebx], edx
3402 mov dword ptr [u32Ret], eax
3403 }
3404 return !!u32Ret;
3405# endif
3406# endif /* !RT_ARCH_AMD64 */
3407}
3408#endif
3409
3410
3411/**
3412 * Atomically Compare and exchange a signed 64-bit value, additionally
3413 * passing back old value, ordered.
3414 *
3415 * @returns true if xchg was done.
3416 * @returns false if xchg wasn't done.
3417 *
3418 * @param pi64 Pointer to the 64-bit variable to update.
3419 * @param i64 The 64-bit value to assign to *pu64.
3420 * @param i64Old The value to compare with.
3421 * @param pi64Old Pointer store the old value at.
3422 */
3423DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3424{
3425 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3426}
3427
3428/** @def ASMAtomicCmpXchgExHandle
3429 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3430 *
3431 * @param ph Pointer to the value to update.
3432 * @param hNew The new value to assigned to *pu.
3433 * @param hOld The old value to *pu compare with.
3434 * @param fRc Where to store the result.
3435 * @param phOldVal Pointer to where to store the old value.
3436 *
3437 * @remarks This doesn't currently work for all handles (like RTFILE).
3438 */
3439#if ARCH_BITS == 32
3440# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3441 do { \
3442 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3443 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3444 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3445 } while (0)
3446#elif ARCH_BITS == 64
3447# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3448 do { \
3449 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3450 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3451 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3452 } while (0)
3453#endif
3454
3455
3456/** @def ASMAtomicCmpXchgExSize
3457 * Atomically Compare and Exchange a value which size might differ
3458 * between platforms or compilers. Additionally passes back old value.
3459 *
3460 * @param pu Pointer to the value to update.
3461 * @param uNew The new value to assigned to *pu.
3462 * @param uOld The old value to *pu compare with.
3463 * @param fRc Where to store the result.
3464 * @param puOldVal Pointer to where to store the old value.
3465 */
3466#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3467 do { \
3468 switch (sizeof(*(pu))) { \
3469 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3470 break; \
3471 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3472 break; \
3473 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3474 (fRc) = false; \
3475 (uOldVal) = 0; \
3476 break; \
3477 } \
3478 } while (0)
3479
3480
3481/**
3482 * Atomically Compare and Exchange a pointer value, additionally
3483 * passing back old value, ordered.
3484 *
3485 * @returns true if xchg was done.
3486 * @returns false if xchg wasn't done.
3487 *
3488 * @param ppv Pointer to the value to update.
3489 * @param pvNew The new value to assigned to *ppv.
3490 * @param pvOld The old value to *ppv compare with.
3491 * @param ppvOld Pointer store the old value at.
3492 */
3493DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3494{
3495#if ARCH_BITS == 32
3496 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3497#elif ARCH_BITS == 64
3498 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3499#else
3500# error "ARCH_BITS is bogus"
3501#endif
3502}
3503
3504
3505/**
3506 * Atomically exchanges and adds to a 32-bit value, ordered.
3507 *
3508 * @returns The old value.
3509 * @param pu32 Pointer to the value.
3510 * @param u32 Number to add.
3511 */
3512#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3513DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3514#else
3515DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3516{
3517# if RT_INLINE_ASM_USES_INTRIN
3518 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3519 return u32;
3520
3521# elif RT_INLINE_ASM_GNU_STYLE
3522 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3523 : "=r" (u32),
3524 "=m" (*pu32)
3525 : "0" (u32),
3526 "m" (*pu32)
3527 : "memory");
3528 return u32;
3529# else
3530 __asm
3531 {
3532 mov eax, [u32]
3533# ifdef RT_ARCH_AMD64
3534 mov rdx, [pu32]
3535 lock xadd [rdx], eax
3536# else
3537 mov edx, [pu32]
3538 lock xadd [edx], eax
3539# endif
3540 mov [u32], eax
3541 }
3542 return u32;
3543# endif
3544}
3545#endif
3546
3547
3548/**
3549 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3550 *
3551 * @returns The old value.
3552 * @param pi32 Pointer to the value.
3553 * @param i32 Number to add.
3554 */
3555DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3556{
3557 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3558}
3559
3560
3561/**
3562 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3563 *
3564 * @returns The old value.
3565 * @param pu32 Pointer to the value.
3566 * @param u32 Number to subtract.
3567 */
3568DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3569{
3570 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3571}
3572
3573
3574/**
3575 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3576 *
3577 * @returns The old value.
3578 * @param pi32 Pointer to the value.
3579 * @param i32 Number to subtract.
3580 */
3581DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3582{
3583 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3584}
3585
3586
3587/**
3588 * Atomically increment a 32-bit value, ordered.
3589 *
3590 * @returns The new value.
3591 * @param pu32 Pointer to the value to increment.
3592 */
3593#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3594DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3595#else
3596DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3597{
3598 uint32_t u32;
3599# if RT_INLINE_ASM_USES_INTRIN
3600 u32 = _InterlockedIncrement((long *)pu32);
3601 return u32;
3602
3603# elif RT_INLINE_ASM_GNU_STYLE
3604 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3605 : "=r" (u32),
3606 "=m" (*pu32)
3607 : "0" (1),
3608 "m" (*pu32)
3609 : "memory");
3610 return u32+1;
3611# else
3612 __asm
3613 {
3614 mov eax, 1
3615# ifdef RT_ARCH_AMD64
3616 mov rdx, [pu32]
3617 lock xadd [rdx], eax
3618# else
3619 mov edx, [pu32]
3620 lock xadd [edx], eax
3621# endif
3622 mov u32, eax
3623 }
3624 return u32+1;
3625# endif
3626}
3627#endif
3628
3629
3630/**
3631 * Atomically increment a signed 32-bit value, ordered.
3632 *
3633 * @returns The new value.
3634 * @param pi32 Pointer to the value to increment.
3635 */
3636DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3637{
3638 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3639}
3640
3641
3642/**
3643 * Atomically decrement an unsigned 32-bit value, ordered.
3644 *
3645 * @returns The new value.
3646 * @param pu32 Pointer to the value to decrement.
3647 */
3648#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3649DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3650#else
3651DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3652{
3653 uint32_t u32;
3654# if RT_INLINE_ASM_USES_INTRIN
3655 u32 = _InterlockedDecrement((long *)pu32);
3656 return u32;
3657
3658# elif RT_INLINE_ASM_GNU_STYLE
3659 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3660 : "=r" (u32),
3661 "=m" (*pu32)
3662 : "0" (-1),
3663 "m" (*pu32)
3664 : "memory");
3665 return u32-1;
3666# else
3667 __asm
3668 {
3669 mov eax, -1
3670# ifdef RT_ARCH_AMD64
3671 mov rdx, [pu32]
3672 lock xadd [rdx], eax
3673# else
3674 mov edx, [pu32]
3675 lock xadd [edx], eax
3676# endif
3677 mov u32, eax
3678 }
3679 return u32-1;
3680# endif
3681}
3682#endif
3683
3684
3685/**
3686 * Atomically decrement a signed 32-bit value, ordered.
3687 *
3688 * @returns The new value.
3689 * @param pi32 Pointer to the value to decrement.
3690 */
3691DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3692{
3693 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3694}
3695
3696
3697/**
3698 * Atomically Or an unsigned 32-bit value, ordered.
3699 *
3700 * @param pu32 Pointer to the pointer variable to OR u32 with.
3701 * @param u32 The value to OR *pu32 with.
3702 */
3703#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3704DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3705#else
3706DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3707{
3708# if RT_INLINE_ASM_USES_INTRIN
3709 _InterlockedOr((long volatile *)pu32, (long)u32);
3710
3711# elif RT_INLINE_ASM_GNU_STYLE
3712 __asm__ __volatile__("lock; orl %1, %0\n\t"
3713 : "=m" (*pu32)
3714 : "ir" (u32),
3715 "m" (*pu32));
3716# else
3717 __asm
3718 {
3719 mov eax, [u32]
3720# ifdef RT_ARCH_AMD64
3721 mov rdx, [pu32]
3722 lock or [rdx], eax
3723# else
3724 mov edx, [pu32]
3725 lock or [edx], eax
3726# endif
3727 }
3728# endif
3729}
3730#endif
3731
3732
3733/**
3734 * Atomically Or a signed 32-bit value, ordered.
3735 *
3736 * @param pi32 Pointer to the pointer variable to OR u32 with.
3737 * @param i32 The value to OR *pu32 with.
3738 */
3739DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3740{
3741 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3742}
3743
3744
3745/**
3746 * Atomically And an unsigned 32-bit value, ordered.
3747 *
3748 * @param pu32 Pointer to the pointer variable to AND u32 with.
3749 * @param u32 The value to AND *pu32 with.
3750 */
3751#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3752DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3753#else
3754DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3755{
3756# if RT_INLINE_ASM_USES_INTRIN
3757 _InterlockedAnd((long volatile *)pu32, u32);
3758
3759# elif RT_INLINE_ASM_GNU_STYLE
3760 __asm__ __volatile__("lock; andl %1, %0\n\t"
3761 : "=m" (*pu32)
3762 : "ir" (u32),
3763 "m" (*pu32));
3764# else
3765 __asm
3766 {
3767 mov eax, [u32]
3768# ifdef RT_ARCH_AMD64
3769 mov rdx, [pu32]
3770 lock and [rdx], eax
3771# else
3772 mov edx, [pu32]
3773 lock and [edx], eax
3774# endif
3775 }
3776# endif
3777}
3778#endif
3779
3780
3781/**
3782 * Atomically And a signed 32-bit value, ordered.
3783 *
3784 * @param pi32 Pointer to the pointer variable to AND i32 with.
3785 * @param i32 The value to AND *pi32 with.
3786 */
3787DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3788{
3789 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3790}
3791
3792
3793/**
3794 * Memory fence, waits for any pending writes and reads to complete.
3795 */
3796DECLINLINE(void) ASMMemoryFence(void)
3797{
3798 /** @todo use mfence? check if all cpus we care for support it. */
3799 uint32_t volatile u32;
3800 ASMAtomicXchgU32(&u32, 0);
3801}
3802
3803
3804/**
3805 * Write fence, waits for any pending writes to complete.
3806 */
3807DECLINLINE(void) ASMWriteFence(void)
3808{
3809 /** @todo use sfence? check if all cpus we care for support it. */
3810 ASMMemoryFence();
3811}
3812
3813
3814/**
3815 * Read fence, waits for any pending reads to complete.
3816 */
3817DECLINLINE(void) ASMReadFence(void)
3818{
3819 /** @todo use lfence? check if all cpus we care for support it. */
3820 ASMMemoryFence();
3821}
3822
3823
3824/**
3825 * Atomically reads an unsigned 8-bit value, ordered.
3826 *
3827 * @returns Current *pu8 value
3828 * @param pu8 Pointer to the 8-bit variable to read.
3829 */
3830DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3831{
3832 ASMMemoryFence();
3833 return *pu8; /* byte reads are atomic on x86 */
3834}
3835
3836
3837/**
3838 * Atomically reads an unsigned 8-bit value, unordered.
3839 *
3840 * @returns Current *pu8 value
3841 * @param pu8 Pointer to the 8-bit variable to read.
3842 */
3843DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3844{
3845 return *pu8; /* byte reads are atomic on x86 */
3846}
3847
3848
3849/**
3850 * Atomically reads a signed 8-bit value, ordered.
3851 *
3852 * @returns Current *pi8 value
3853 * @param pi8 Pointer to the 8-bit variable to read.
3854 */
3855DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3856{
3857 ASMMemoryFence();
3858 return *pi8; /* byte reads are atomic on x86 */
3859}
3860
3861
3862/**
3863 * Atomically reads a signed 8-bit value, unordered.
3864 *
3865 * @returns Current *pi8 value
3866 * @param pi8 Pointer to the 8-bit variable to read.
3867 */
3868DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3869{
3870 return *pi8; /* byte reads are atomic on x86 */
3871}
3872
3873
3874/**
3875 * Atomically reads an unsigned 16-bit value, ordered.
3876 *
3877 * @returns Current *pu16 value
3878 * @param pu16 Pointer to the 16-bit variable to read.
3879 */
3880DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3881{
3882 ASMMemoryFence();
3883 Assert(!((uintptr_t)pu16 & 1));
3884 return *pu16;
3885}
3886
3887
3888/**
3889 * Atomically reads an unsigned 16-bit value, unordered.
3890 *
3891 * @returns Current *pu16 value
3892 * @param pu16 Pointer to the 16-bit variable to read.
3893 */
3894DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3895{
3896 Assert(!((uintptr_t)pu16 & 1));
3897 return *pu16;
3898}
3899
3900
3901/**
3902 * Atomically reads a signed 16-bit value, ordered.
3903 *
3904 * @returns Current *pi16 value
3905 * @param pi16 Pointer to the 16-bit variable to read.
3906 */
3907DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3908{
3909 ASMMemoryFence();
3910 Assert(!((uintptr_t)pi16 & 1));
3911 return *pi16;
3912}
3913
3914
3915/**
3916 * Atomically reads a signed 16-bit value, unordered.
3917 *
3918 * @returns Current *pi16 value
3919 * @param pi16 Pointer to the 16-bit variable to read.
3920 */
3921DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3922{
3923 Assert(!((uintptr_t)pi16 & 1));
3924 return *pi16;
3925}
3926
3927
3928/**
3929 * Atomically reads an unsigned 32-bit value, ordered.
3930 *
3931 * @returns Current *pu32 value
3932 * @param pu32 Pointer to the 32-bit variable to read.
3933 */
3934DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3935{
3936 ASMMemoryFence();
3937 Assert(!((uintptr_t)pu32 & 3));
3938 return *pu32;
3939}
3940
3941
3942/**
3943 * Atomically reads an unsigned 32-bit value, unordered.
3944 *
3945 * @returns Current *pu32 value
3946 * @param pu32 Pointer to the 32-bit variable to read.
3947 */
3948DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3949{
3950 Assert(!((uintptr_t)pu32 & 3));
3951 return *pu32;
3952}
3953
3954
3955/**
3956 * Atomically reads a signed 32-bit value, ordered.
3957 *
3958 * @returns Current *pi32 value
3959 * @param pi32 Pointer to the 32-bit variable to read.
3960 */
3961DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3962{
3963 ASMMemoryFence();
3964 Assert(!((uintptr_t)pi32 & 3));
3965 return *pi32;
3966}
3967
3968
3969/**
3970 * Atomically reads a signed 32-bit value, unordered.
3971 *
3972 * @returns Current *pi32 value
3973 * @param pi32 Pointer to the 32-bit variable to read.
3974 */
3975DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3976{
3977 Assert(!((uintptr_t)pi32 & 3));
3978 return *pi32;
3979}
3980
3981
3982/**
3983 * Atomically reads an unsigned 64-bit value, ordered.
3984 *
3985 * @returns Current *pu64 value
3986 * @param pu64 Pointer to the 64-bit variable to read.
3987 * The memory pointed to must be writable.
3988 * @remark This will fault if the memory is read-only!
3989 */
3990#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3991DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3992#else
3993DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3994{
3995 uint64_t u64;
3996# ifdef RT_ARCH_AMD64
3997 Assert(!((uintptr_t)pu64 & 7));
3998/*# if RT_INLINE_ASM_GNU_STYLE
3999 __asm__ __volatile__( "mfence\n\t"
4000 "movq %1, %0\n\t"
4001 : "=r" (u64)
4002 : "m" (*pu64));
4003# else
4004 __asm
4005 {
4006 mfence
4007 mov rdx, [pu64]
4008 mov rax, [rdx]
4009 mov [u64], rax
4010 }
4011# endif*/
4012 ASMMemoryFence();
4013 u64 = *pu64;
4014# else /* !RT_ARCH_AMD64 */
4015# if RT_INLINE_ASM_GNU_STYLE
4016# if defined(PIC) || defined(__PIC__)
4017 uint32_t u32EBX = 0;
4018 Assert(!((uintptr_t)pu64 & 7));
4019 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4020 "lock; cmpxchg8b (%5)\n\t"
4021 "movl %3, %%ebx\n\t"
4022 : "=A" (u64),
4023# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4024 "+m" (*pu64)
4025# else
4026 "=m" (*pu64)
4027# endif
4028 : "0" (0),
4029 "m" (u32EBX),
4030 "c" (0),
4031 "S" (pu64));
4032# else /* !PIC */
4033 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4034 : "=A" (u64),
4035 "+m" (*pu64)
4036 : "0" (0),
4037 "b" (0),
4038 "c" (0));
4039# endif
4040# else
4041 Assert(!((uintptr_t)pu64 & 7));
4042 __asm
4043 {
4044 xor eax, eax
4045 xor edx, edx
4046 mov edi, pu64
4047 xor ecx, ecx
4048 xor ebx, ebx
4049 lock cmpxchg8b [edi]
4050 mov dword ptr [u64], eax
4051 mov dword ptr [u64 + 4], edx
4052 }
4053# endif
4054# endif /* !RT_ARCH_AMD64 */
4055 return u64;
4056}
4057#endif
4058
4059
4060/**
4061 * Atomically reads an unsigned 64-bit value, unordered.
4062 *
4063 * @returns Current *pu64 value
4064 * @param pu64 Pointer to the 64-bit variable to read.
4065 * The memory pointed to must be writable.
4066 * @remark This will fault if the memory is read-only!
4067 */
4068#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4069DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4070#else
4071DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4072{
4073 uint64_t u64;
4074# ifdef RT_ARCH_AMD64
4075 Assert(!((uintptr_t)pu64 & 7));
4076/*# if RT_INLINE_ASM_GNU_STYLE
4077 Assert(!((uintptr_t)pu64 & 7));
4078 __asm__ __volatile__("movq %1, %0\n\t"
4079 : "=r" (u64)
4080 : "m" (*pu64));
4081# else
4082 __asm
4083 {
4084 mov rdx, [pu64]
4085 mov rax, [rdx]
4086 mov [u64], rax
4087 }
4088# endif */
4089 u64 = *pu64;
4090# else /* !RT_ARCH_AMD64 */
4091# if RT_INLINE_ASM_GNU_STYLE
4092# if defined(PIC) || defined(__PIC__)
4093 uint32_t u32EBX = 0;
4094 uint32_t u32Spill;
4095 Assert(!((uintptr_t)pu64 & 7));
4096 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4097 "xor %%ecx,%%ecx\n\t"
4098 "xor %%edx,%%edx\n\t"
4099 "xchgl %%ebx, %3\n\t"
4100 "lock; cmpxchg8b (%4)\n\t"
4101 "movl %3, %%ebx\n\t"
4102 : "=A" (u64),
4103# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4104 "+m" (*pu64),
4105# else
4106 "=m" (*pu64),
4107# endif
4108 "=c" (u32Spill)
4109 : "m" (u32EBX),
4110 "S" (pu64));
4111# else /* !PIC */
4112 __asm__ __volatile__("cmpxchg8b %1\n\t"
4113 : "=A" (u64),
4114 "+m" (*pu64)
4115 : "0" (0),
4116 "b" (0),
4117 "c" (0));
4118# endif
4119# else
4120 Assert(!((uintptr_t)pu64 & 7));
4121 __asm
4122 {
4123 xor eax, eax
4124 xor edx, edx
4125 mov edi, pu64
4126 xor ecx, ecx
4127 xor ebx, ebx
4128 lock cmpxchg8b [edi]
4129 mov dword ptr [u64], eax
4130 mov dword ptr [u64 + 4], edx
4131 }
4132# endif
4133# endif /* !RT_ARCH_AMD64 */
4134 return u64;
4135}
4136#endif
4137
4138
4139/**
4140 * Atomically reads a signed 64-bit value, ordered.
4141 *
4142 * @returns Current *pi64 value
4143 * @param pi64 Pointer to the 64-bit variable to read.
4144 * The memory pointed to must be writable.
4145 * @remark This will fault if the memory is read-only!
4146 */
4147DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4148{
4149 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4150}
4151
4152
4153/**
4154 * Atomically reads a signed 64-bit value, unordered.
4155 *
4156 * @returns Current *pi64 value
4157 * @param pi64 Pointer to the 64-bit variable to read.
4158 * The memory pointed to must be writable.
4159 * @remark This will fault if the memory is read-only!
4160 */
4161DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4162{
4163 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4164}
4165
4166
4167/**
4168 * Atomically reads a pointer value, ordered.
4169 *
4170 * @returns Current *pv value
4171 * @param ppv Pointer to the pointer variable to read.
4172 */
4173DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4174{
4175#if ARCH_BITS == 32
4176 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4177#elif ARCH_BITS == 64
4178 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4179#else
4180# error "ARCH_BITS is bogus"
4181#endif
4182}
4183
4184
4185/**
4186 * Atomically reads a pointer value, unordered.
4187 *
4188 * @returns Current *pv value
4189 * @param ppv Pointer to the pointer variable to read.
4190 */
4191DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4192{
4193#if ARCH_BITS == 32
4194 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4195#elif ARCH_BITS == 64
4196 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4197#else
4198# error "ARCH_BITS is bogus"
4199#endif
4200}
4201
4202
4203/**
4204 * Atomically reads a boolean value, ordered.
4205 *
4206 * @returns Current *pf value
4207 * @param pf Pointer to the boolean variable to read.
4208 */
4209DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4210{
4211 ASMMemoryFence();
4212 return *pf; /* byte reads are atomic on x86 */
4213}
4214
4215
4216/**
4217 * Atomically reads a boolean value, unordered.
4218 *
4219 * @returns Current *pf value
4220 * @param pf Pointer to the boolean variable to read.
4221 */
4222DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4223{
4224 return *pf; /* byte reads are atomic on x86 */
4225}
4226
4227
4228/**
4229 * Atomically read a typical IPRT handle value, ordered.
4230 *
4231 * @param ph Pointer to the handle variable to read.
4232 * @param phRes Where to store the result.
4233 *
4234 * @remarks This doesn't currently work for all handles (like RTFILE).
4235 */
4236#define ASMAtomicReadHandle(ph, phRes) \
4237 do { \
4238 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
4239 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4240 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4241 } while (0)
4242
4243
4244/**
4245 * Atomically read a typical IPRT handle value, unordered.
4246 *
4247 * @param ph Pointer to the handle variable to read.
4248 * @param phRes Where to store the result.
4249 *
4250 * @remarks This doesn't currently work for all handles (like RTFILE).
4251 */
4252#define ASMAtomicUoReadHandle(ph, phRes) \
4253 do { \
4254 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
4255 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4256 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4257 } while (0)
4258
4259
4260/**
4261 * Atomically read a value which size might differ
4262 * between platforms or compilers, ordered.
4263 *
4264 * @param pu Pointer to the variable to update.
4265 * @param puRes Where to store the result.
4266 */
4267#define ASMAtomicReadSize(pu, puRes) \
4268 do { \
4269 switch (sizeof(*(pu))) { \
4270 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4271 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4272 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4273 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4274 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4275 } \
4276 } while (0)
4277
4278
4279/**
4280 * Atomically read a value which size might differ
4281 * between platforms or compilers, unordered.
4282 *
4283 * @param pu Pointer to the variable to update.
4284 * @param puRes Where to store the result.
4285 */
4286#define ASMAtomicUoReadSize(pu, puRes) \
4287 do { \
4288 switch (sizeof(*(pu))) { \
4289 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4290 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4291 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4292 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4293 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4294 } \
4295 } while (0)
4296
4297
4298/**
4299 * Atomically writes an unsigned 8-bit value, ordered.
4300 *
4301 * @param pu8 Pointer to the 8-bit variable.
4302 * @param u8 The 8-bit value to assign to *pu8.
4303 */
4304DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4305{
4306 ASMAtomicXchgU8(pu8, u8);
4307}
4308
4309
4310/**
4311 * Atomically writes an unsigned 8-bit value, unordered.
4312 *
4313 * @param pu8 Pointer to the 8-bit variable.
4314 * @param u8 The 8-bit value to assign to *pu8.
4315 */
4316DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4317{
4318 *pu8 = u8; /* byte writes are atomic on x86 */
4319}
4320
4321
4322/**
4323 * Atomically writes a signed 8-bit value, ordered.
4324 *
4325 * @param pi8 Pointer to the 8-bit variable to read.
4326 * @param i8 The 8-bit value to assign to *pi8.
4327 */
4328DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4329{
4330 ASMAtomicXchgS8(pi8, i8);
4331}
4332
4333
4334/**
4335 * Atomically writes a signed 8-bit value, unordered.
4336 *
4337 * @param pi8 Pointer to the 8-bit variable to read.
4338 * @param i8 The 8-bit value to assign to *pi8.
4339 */
4340DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4341{
4342 *pi8 = i8; /* byte writes are atomic on x86 */
4343}
4344
4345
4346/**
4347 * Atomically writes an unsigned 16-bit value, ordered.
4348 *
4349 * @param pu16 Pointer to the 16-bit variable.
4350 * @param u16 The 16-bit value to assign to *pu16.
4351 */
4352DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4353{
4354 ASMAtomicXchgU16(pu16, u16);
4355}
4356
4357
4358/**
4359 * Atomically writes an unsigned 16-bit value, unordered.
4360 *
4361 * @param pu16 Pointer to the 16-bit variable.
4362 * @param u16 The 16-bit value to assign to *pu16.
4363 */
4364DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4365{
4366 Assert(!((uintptr_t)pu16 & 1));
4367 *pu16 = u16;
4368}
4369
4370
4371/**
4372 * Atomically writes a signed 16-bit value, ordered.
4373 *
4374 * @param pi16 Pointer to the 16-bit variable to read.
4375 * @param i16 The 16-bit value to assign to *pi16.
4376 */
4377DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4378{
4379 ASMAtomicXchgS16(pi16, i16);
4380}
4381
4382
4383/**
4384 * Atomically writes a signed 16-bit value, unordered.
4385 *
4386 * @param pi16 Pointer to the 16-bit variable to read.
4387 * @param i16 The 16-bit value to assign to *pi16.
4388 */
4389DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4390{
4391 Assert(!((uintptr_t)pi16 & 1));
4392 *pi16 = i16;
4393}
4394
4395
4396/**
4397 * Atomically writes an unsigned 32-bit value, ordered.
4398 *
4399 * @param pu32 Pointer to the 32-bit variable.
4400 * @param u32 The 32-bit value to assign to *pu32.
4401 */
4402DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4403{
4404 ASMAtomicXchgU32(pu32, u32);
4405}
4406
4407
4408/**
4409 * Atomically writes an unsigned 32-bit value, unordered.
4410 *
4411 * @param pu32 Pointer to the 32-bit variable.
4412 * @param u32 The 32-bit value to assign to *pu32.
4413 */
4414DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4415{
4416 Assert(!((uintptr_t)pu32 & 3));
4417 *pu32 = u32;
4418}
4419
4420
4421/**
4422 * Atomically writes a signed 32-bit value, ordered.
4423 *
4424 * @param pi32 Pointer to the 32-bit variable to read.
4425 * @param i32 The 32-bit value to assign to *pi32.
4426 */
4427DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4428{
4429 ASMAtomicXchgS32(pi32, i32);
4430}
4431
4432
4433/**
4434 * Atomically writes a signed 32-bit value, unordered.
4435 *
4436 * @param pi32 Pointer to the 32-bit variable to read.
4437 * @param i32 The 32-bit value to assign to *pi32.
4438 */
4439DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4440{
4441 Assert(!((uintptr_t)pi32 & 3));
4442 *pi32 = i32;
4443}
4444
4445
4446/**
4447 * Atomically writes an unsigned 64-bit value, ordered.
4448 *
4449 * @param pu64 Pointer to the 64-bit variable.
4450 * @param u64 The 64-bit value to assign to *pu64.
4451 */
4452DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4453{
4454 ASMAtomicXchgU64(pu64, u64);
4455}
4456
4457
4458/**
4459 * Atomically writes an unsigned 64-bit value, unordered.
4460 *
4461 * @param pu64 Pointer to the 64-bit variable.
4462 * @param u64 The 64-bit value to assign to *pu64.
4463 */
4464DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4465{
4466 Assert(!((uintptr_t)pu64 & 7));
4467#if ARCH_BITS == 64
4468 *pu64 = u64;
4469#else
4470 ASMAtomicXchgU64(pu64, u64);
4471#endif
4472}
4473
4474
4475/**
4476 * Atomically writes a signed 64-bit value, ordered.
4477 *
4478 * @param pi64 Pointer to the 64-bit variable.
4479 * @param i64 The 64-bit value to assign to *pi64.
4480 */
4481DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4482{
4483 ASMAtomicXchgS64(pi64, i64);
4484}
4485
4486
4487/**
4488 * Atomically writes a signed 64-bit value, unordered.
4489 *
4490 * @param pi64 Pointer to the 64-bit variable.
4491 * @param i64 The 64-bit value to assign to *pi64.
4492 */
4493DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4494{
4495 Assert(!((uintptr_t)pi64 & 7));
4496#if ARCH_BITS == 64
4497 *pi64 = i64;
4498#else
4499 ASMAtomicXchgS64(pi64, i64);
4500#endif
4501}
4502
4503
4504/**
4505 * Atomically writes a boolean value, unordered.
4506 *
4507 * @param pf Pointer to the boolean variable.
4508 * @param f The boolean value to assign to *pf.
4509 */
4510DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4511{
4512 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4513}
4514
4515
4516/**
4517 * Atomically writes a boolean value, unordered.
4518 *
4519 * @param pf Pointer to the boolean variable.
4520 * @param f The boolean value to assign to *pf.
4521 */
4522DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4523{
4524 *pf = f; /* byte writes are atomic on x86 */
4525}
4526
4527
4528/**
4529 * Atomically writes a pointer value, ordered.
4530 *
4531 * @returns Current *pv value
4532 * @param ppv Pointer to the pointer variable.
4533 * @param pv The pointer value to assigne to *ppv.
4534 */
4535DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4536{
4537#if ARCH_BITS == 32
4538 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4539#elif ARCH_BITS == 64
4540 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4541#else
4542# error "ARCH_BITS is bogus"
4543#endif
4544}
4545
4546
4547/**
4548 * Atomically writes a pointer value, unordered.
4549 *
4550 * @returns Current *pv value
4551 * @param ppv Pointer to the pointer variable.
4552 * @param pv The pointer value to assigne to *ppv.
4553 */
4554DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4555{
4556#if ARCH_BITS == 32
4557 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4558#elif ARCH_BITS == 64
4559 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4560#else
4561# error "ARCH_BITS is bogus"
4562#endif
4563}
4564
4565
4566/**
4567 * Atomically write a typical IPRT handle value, ordered.
4568 *
4569 * @param ph Pointer to the variable to update.
4570 * @param hNew The value to assign to *ph.
4571 *
4572 * @remarks This doesn't currently work for all handles (like RTFILE).
4573 */
4574#define ASMAtomicWriteHandle(ph, hNew) \
4575 do { \
4576 ASMAtomicWritePtr((void * volatile *)(ph), (const void *)hNew); \
4577 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4578 } while (0)
4579
4580
4581/**
4582 * Atomically write a typical IPRT handle value, unordered.
4583 *
4584 * @param ph Pointer to the variable to update.
4585 * @param hNew The value to assign to *ph.
4586 *
4587 * @remarks This doesn't currently work for all handles (like RTFILE).
4588 */
4589#define ASMAtomicUoWriteHandle(ph, hNew) \
4590 do { \
4591 ASMAtomicUoWritePtr((void * volatile *)(ph), (const void *)hNew); \
4592 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4593 } while (0)
4594
4595
4596/**
4597 * Atomically write a value which size might differ
4598 * between platforms or compilers, ordered.
4599 *
4600 * @param pu Pointer to the variable to update.
4601 * @param uNew The value to assign to *pu.
4602 */
4603#define ASMAtomicWriteSize(pu, uNew) \
4604 do { \
4605 switch (sizeof(*(pu))) { \
4606 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4607 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4608 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4609 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4610 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4611 } \
4612 } while (0)
4613
4614/**
4615 * Atomically write a value which size might differ
4616 * between platforms or compilers, unordered.
4617 *
4618 * @param pu Pointer to the variable to update.
4619 * @param uNew The value to assign to *pu.
4620 */
4621#define ASMAtomicUoWriteSize(pu, uNew) \
4622 do { \
4623 switch (sizeof(*(pu))) { \
4624 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4625 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4626 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4627 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4628 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4629 } \
4630 } while (0)
4631
4632
4633
4634
4635/**
4636 * Invalidate page.
4637 *
4638 * @param pv Address of the page to invalidate.
4639 */
4640#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4641DECLASM(void) ASMInvalidatePage(void *pv);
4642#else
4643DECLINLINE(void) ASMInvalidatePage(void *pv)
4644{
4645# if RT_INLINE_ASM_USES_INTRIN
4646 __invlpg(pv);
4647
4648# elif RT_INLINE_ASM_GNU_STYLE
4649 __asm__ __volatile__("invlpg %0\n\t"
4650 : : "m" (*(uint8_t *)pv));
4651# else
4652 __asm
4653 {
4654# ifdef RT_ARCH_AMD64
4655 mov rax, [pv]
4656 invlpg [rax]
4657# else
4658 mov eax, [pv]
4659 invlpg [eax]
4660# endif
4661 }
4662# endif
4663}
4664#endif
4665
4666
4667#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4668# if PAGE_SIZE != 0x1000
4669# error "PAGE_SIZE is not 0x1000!"
4670# endif
4671#endif
4672
4673/**
4674 * Zeros a 4K memory page.
4675 *
4676 * @param pv Pointer to the memory block. This must be page aligned.
4677 */
4678#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4679DECLASM(void) ASMMemZeroPage(volatile void *pv);
4680# else
4681DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4682{
4683# if RT_INLINE_ASM_USES_INTRIN
4684# ifdef RT_ARCH_AMD64
4685 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4686# else
4687 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4688# endif
4689
4690# elif RT_INLINE_ASM_GNU_STYLE
4691 RTCCUINTREG uDummy;
4692# ifdef RT_ARCH_AMD64
4693 __asm__ __volatile__ ("rep stosq"
4694 : "=D" (pv),
4695 "=c" (uDummy)
4696 : "0" (pv),
4697 "c" (0x1000 >> 3),
4698 "a" (0)
4699 : "memory");
4700# else
4701 __asm__ __volatile__ ("rep stosl"
4702 : "=D" (pv),
4703 "=c" (uDummy)
4704 : "0" (pv),
4705 "c" (0x1000 >> 2),
4706 "a" (0)
4707 : "memory");
4708# endif
4709# else
4710 __asm
4711 {
4712# ifdef RT_ARCH_AMD64
4713 xor rax, rax
4714 mov ecx, 0200h
4715 mov rdi, [pv]
4716 rep stosq
4717# else
4718 xor eax, eax
4719 mov ecx, 0400h
4720 mov edi, [pv]
4721 rep stosd
4722# endif
4723 }
4724# endif
4725}
4726# endif
4727
4728
4729/**
4730 * Zeros a memory block with a 32-bit aligned size.
4731 *
4732 * @param pv Pointer to the memory block.
4733 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4734 */
4735#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4736DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4737#else
4738DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4739{
4740# if RT_INLINE_ASM_USES_INTRIN
4741# ifdef RT_ARCH_AMD64
4742 if (!(cb & 7))
4743 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4744 else
4745# endif
4746 __stosd((unsigned long *)pv, 0, cb / 4);
4747
4748# elif RT_INLINE_ASM_GNU_STYLE
4749 __asm__ __volatile__ ("rep stosl"
4750 : "=D" (pv),
4751 "=c" (cb)
4752 : "0" (pv),
4753 "1" (cb >> 2),
4754 "a" (0)
4755 : "memory");
4756# else
4757 __asm
4758 {
4759 xor eax, eax
4760# ifdef RT_ARCH_AMD64
4761 mov rcx, [cb]
4762 shr rcx, 2
4763 mov rdi, [pv]
4764# else
4765 mov ecx, [cb]
4766 shr ecx, 2
4767 mov edi, [pv]
4768# endif
4769 rep stosd
4770 }
4771# endif
4772}
4773#endif
4774
4775
4776/**
4777 * Fills a memory block with a 32-bit aligned size.
4778 *
4779 * @param pv Pointer to the memory block.
4780 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4781 * @param u32 The value to fill with.
4782 */
4783#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4784DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4785#else
4786DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4787{
4788# if RT_INLINE_ASM_USES_INTRIN
4789# ifdef RT_ARCH_AMD64
4790 if (!(cb & 7))
4791 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4792 else
4793# endif
4794 __stosd((unsigned long *)pv, u32, cb / 4);
4795
4796# elif RT_INLINE_ASM_GNU_STYLE
4797 __asm__ __volatile__ ("rep stosl"
4798 : "=D" (pv),
4799 "=c" (cb)
4800 : "0" (pv),
4801 "1" (cb >> 2),
4802 "a" (u32)
4803 : "memory");
4804# else
4805 __asm
4806 {
4807# ifdef RT_ARCH_AMD64
4808 mov rcx, [cb]
4809 shr rcx, 2
4810 mov rdi, [pv]
4811# else
4812 mov ecx, [cb]
4813 shr ecx, 2
4814 mov edi, [pv]
4815# endif
4816 mov eax, [u32]
4817 rep stosd
4818 }
4819# endif
4820}
4821#endif
4822
4823
4824/**
4825 * Checks if a memory block is filled with the specified byte.
4826 *
4827 * This is a sort of inverted memchr.
4828 *
4829 * @returns Pointer to the byte which doesn't equal u8.
4830 * @returns NULL if all equal to u8.
4831 *
4832 * @param pv Pointer to the memory block.
4833 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4834 * @param u8 The value it's supposed to be filled with.
4835 */
4836#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4837DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4838#else
4839DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4840{
4841/** @todo rewrite this in inline assembly? */
4842 uint8_t const *pb = (uint8_t const *)pv;
4843 for (; cb; cb--, pb++)
4844 if (RT_UNLIKELY(*pb != u8))
4845 return (void *)pb;
4846 return NULL;
4847}
4848#endif
4849
4850
4851/**
4852 * Checks if a memory block is filled with the specified 32-bit value.
4853 *
4854 * This is a sort of inverted memchr.
4855 *
4856 * @returns Pointer to the first value which doesn't equal u32.
4857 * @returns NULL if all equal to u32.
4858 *
4859 * @param pv Pointer to the memory block.
4860 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4861 * @param u32 The value it's supposed to be filled with.
4862 */
4863#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4864DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4865#else
4866DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4867{
4868/** @todo rewrite this in inline assembly? */
4869 uint32_t const *pu32 = (uint32_t const *)pv;
4870 for (; cb; cb -= 4, pu32++)
4871 if (RT_UNLIKELY(*pu32 != u32))
4872 return (uint32_t *)pu32;
4873 return NULL;
4874}
4875#endif
4876
4877
4878/**
4879 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4880 *
4881 * @returns u32F1 * u32F2.
4882 */
4883#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4884DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4885#else
4886DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4887{
4888# ifdef RT_ARCH_AMD64
4889 return (uint64_t)u32F1 * u32F2;
4890# else /* !RT_ARCH_AMD64 */
4891 uint64_t u64;
4892# if RT_INLINE_ASM_GNU_STYLE
4893 __asm__ __volatile__("mull %%edx"
4894 : "=A" (u64)
4895 : "a" (u32F2), "d" (u32F1));
4896# else
4897 __asm
4898 {
4899 mov edx, [u32F1]
4900 mov eax, [u32F2]
4901 mul edx
4902 mov dword ptr [u64], eax
4903 mov dword ptr [u64 + 4], edx
4904 }
4905# endif
4906 return u64;
4907# endif /* !RT_ARCH_AMD64 */
4908}
4909#endif
4910
4911
4912/**
4913 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4914 *
4915 * @returns u32F1 * u32F2.
4916 */
4917#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4918DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4919#else
4920DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4921{
4922# ifdef RT_ARCH_AMD64
4923 return (int64_t)i32F1 * i32F2;
4924# else /* !RT_ARCH_AMD64 */
4925 int64_t i64;
4926# if RT_INLINE_ASM_GNU_STYLE
4927 __asm__ __volatile__("imull %%edx"
4928 : "=A" (i64)
4929 : "a" (i32F2), "d" (i32F1));
4930# else
4931 __asm
4932 {
4933 mov edx, [i32F1]
4934 mov eax, [i32F2]
4935 imul edx
4936 mov dword ptr [i64], eax
4937 mov dword ptr [i64 + 4], edx
4938 }
4939# endif
4940 return i64;
4941# endif /* !RT_ARCH_AMD64 */
4942}
4943#endif
4944
4945
4946/**
4947 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4948 *
4949 * @returns u64 / u32.
4950 */
4951#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4952DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4953#else
4954DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4955{
4956# ifdef RT_ARCH_AMD64
4957 return (uint32_t)(u64 / u32);
4958# else /* !RT_ARCH_AMD64 */
4959# if RT_INLINE_ASM_GNU_STYLE
4960 RTCCUINTREG uDummy;
4961 __asm__ __volatile__("divl %3"
4962 : "=a" (u32), "=d"(uDummy)
4963 : "A" (u64), "r" (u32));
4964# else
4965 __asm
4966 {
4967 mov eax, dword ptr [u64]
4968 mov edx, dword ptr [u64 + 4]
4969 mov ecx, [u32]
4970 div ecx
4971 mov [u32], eax
4972 }
4973# endif
4974 return u32;
4975# endif /* !RT_ARCH_AMD64 */
4976}
4977#endif
4978
4979
4980/**
4981 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4982 *
4983 * @returns u64 / u32.
4984 */
4985#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4986DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4987#else
4988DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4989{
4990# ifdef RT_ARCH_AMD64
4991 return (int32_t)(i64 / i32);
4992# else /* !RT_ARCH_AMD64 */
4993# if RT_INLINE_ASM_GNU_STYLE
4994 RTCCUINTREG iDummy;
4995 __asm__ __volatile__("idivl %3"
4996 : "=a" (i32), "=d"(iDummy)
4997 : "A" (i64), "r" (i32));
4998# else
4999 __asm
5000 {
5001 mov eax, dword ptr [i64]
5002 mov edx, dword ptr [i64 + 4]
5003 mov ecx, [i32]
5004 idiv ecx
5005 mov [i32], eax
5006 }
5007# endif
5008 return i32;
5009# endif /* !RT_ARCH_AMD64 */
5010}
5011#endif
5012
5013
5014/**
5015 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5016 * returning the rest.
5017 *
5018 * @returns u64 % u32.
5019 *
5020 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5021 */
5022#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5023DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5024#else
5025DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5026{
5027# ifdef RT_ARCH_AMD64
5028 return (uint32_t)(u64 % u32);
5029# else /* !RT_ARCH_AMD64 */
5030# if RT_INLINE_ASM_GNU_STYLE
5031 RTCCUINTREG uDummy;
5032 __asm__ __volatile__("divl %3"
5033 : "=a" (uDummy), "=d"(u32)
5034 : "A" (u64), "r" (u32));
5035# else
5036 __asm
5037 {
5038 mov eax, dword ptr [u64]
5039 mov edx, dword ptr [u64 + 4]
5040 mov ecx, [u32]
5041 div ecx
5042 mov [u32], edx
5043 }
5044# endif
5045 return u32;
5046# endif /* !RT_ARCH_AMD64 */
5047}
5048#endif
5049
5050
5051/**
5052 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5053 * returning the rest.
5054 *
5055 * @returns u64 % u32.
5056 *
5057 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5058 */
5059#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5060DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5061#else
5062DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5063{
5064# ifdef RT_ARCH_AMD64
5065 return (int32_t)(i64 % i32);
5066# else /* !RT_ARCH_AMD64 */
5067# if RT_INLINE_ASM_GNU_STYLE
5068 RTCCUINTREG iDummy;
5069 __asm__ __volatile__("idivl %3"
5070 : "=a" (iDummy), "=d"(i32)
5071 : "A" (i64), "r" (i32));
5072# else
5073 __asm
5074 {
5075 mov eax, dword ptr [i64]
5076 mov edx, dword ptr [i64 + 4]
5077 mov ecx, [i32]
5078 idiv ecx
5079 mov [i32], edx
5080 }
5081# endif
5082 return i32;
5083# endif /* !RT_ARCH_AMD64 */
5084}
5085#endif
5086
5087
5088/**
5089 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5090 * using a 96 bit intermediate result.
5091 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5092 * __udivdi3 and __umoddi3 even if this inline function is not used.
5093 *
5094 * @returns (u64A * u32B) / u32C.
5095 * @param u64A The 64-bit value.
5096 * @param u32B The 32-bit value to multiple by A.
5097 * @param u32C The 32-bit value to divide A*B by.
5098 */
5099#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5100DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5101#else
5102DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5103{
5104# if RT_INLINE_ASM_GNU_STYLE
5105# ifdef RT_ARCH_AMD64
5106 uint64_t u64Result, u64Spill;
5107 __asm__ __volatile__("mulq %2\n\t"
5108 "divq %3\n\t"
5109 : "=a" (u64Result),
5110 "=d" (u64Spill)
5111 : "r" ((uint64_t)u32B),
5112 "r" ((uint64_t)u32C),
5113 "0" (u64A),
5114 "1" (0));
5115 return u64Result;
5116# else
5117 uint32_t u32Dummy;
5118 uint64_t u64Result;
5119 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5120 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5121 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5122 eax = u64A.hi */
5123 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5124 edx = u32C */
5125 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5126 edx = u32B */
5127 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5128 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5129 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5130 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5131 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5132 edx = u64Hi % u32C */
5133 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5134 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5135 "divl %%ecx \n\t" /* u64Result.lo */
5136 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5137 : "=A"(u64Result), "=c"(u32Dummy),
5138 "=S"(u32Dummy), "=D"(u32Dummy)
5139 : "a"((uint32_t)u64A),
5140 "S"((uint32_t)(u64A >> 32)),
5141 "c"(u32B),
5142 "D"(u32C));
5143 return u64Result;
5144# endif
5145# else
5146 RTUINT64U u;
5147 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5148 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5149 u64Hi += (u64Lo >> 32);
5150 u.s.Hi = (uint32_t)(u64Hi / u32C);
5151 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5152 return u.u;
5153# endif
5154}
5155#endif
5156
5157
5158/**
5159 * Probes a byte pointer for read access.
5160 *
5161 * While the function will not fault if the byte is not read accessible,
5162 * the idea is to do this in a safe place like before acquiring locks
5163 * and such like.
5164 *
5165 * Also, this functions guarantees that an eager compiler is not going
5166 * to optimize the probing away.
5167 *
5168 * @param pvByte Pointer to the byte.
5169 */
5170#if RT_INLINE_ASM_EXTERNAL
5171DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5172#else
5173DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5174{
5175 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5176 uint8_t u8;
5177# if RT_INLINE_ASM_GNU_STYLE
5178 __asm__ __volatile__("movb (%1), %0\n\t"
5179 : "=r" (u8)
5180 : "r" (pvByte));
5181# else
5182 __asm
5183 {
5184# ifdef RT_ARCH_AMD64
5185 mov rax, [pvByte]
5186 mov al, [rax]
5187# else
5188 mov eax, [pvByte]
5189 mov al, [eax]
5190# endif
5191 mov [u8], al
5192 }
5193# endif
5194 return u8;
5195}
5196#endif
5197
5198/**
5199 * Probes a buffer for read access page by page.
5200 *
5201 * While the function will fault if the buffer is not fully read
5202 * accessible, the idea is to do this in a safe place like before
5203 * acquiring locks and such like.
5204 *
5205 * Also, this functions guarantees that an eager compiler is not going
5206 * to optimize the probing away.
5207 *
5208 * @param pvBuf Pointer to the buffer.
5209 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5210 */
5211DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5212{
5213 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5214 /* the first byte */
5215 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5216 ASMProbeReadByte(pu8);
5217
5218 /* the pages in between pages. */
5219 while (cbBuf > /*PAGE_SIZE*/0x1000)
5220 {
5221 ASMProbeReadByte(pu8);
5222 cbBuf -= /*PAGE_SIZE*/0x1000;
5223 pu8 += /*PAGE_SIZE*/0x1000;
5224 }
5225
5226 /* the last byte */
5227 ASMProbeReadByte(pu8 + cbBuf - 1);
5228}
5229
5230
5231/** @def ASMBreakpoint
5232 * Debugger Breakpoint.
5233 * @remark In the gnu world we add a nop instruction after the int3 to
5234 * force gdb to remain at the int3 source line.
5235 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5236 * @internal
5237 */
5238#if RT_INLINE_ASM_GNU_STYLE
5239# ifndef __L4ENV__
5240# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
5241# else
5242# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
5243# endif
5244#else
5245# define ASMBreakpoint() __debugbreak()
5246#endif
5247
5248
5249
5250/** @defgroup grp_inline_bits Bit Operations
5251 * @{
5252 */
5253
5254
5255/**
5256 * Sets a bit in a bitmap.
5257 *
5258 * @param pvBitmap Pointer to the bitmap.
5259 * @param iBit The bit to set.
5260 */
5261#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5262DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5263#else
5264DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5265{
5266# if RT_INLINE_ASM_USES_INTRIN
5267 _bittestandset((long *)pvBitmap, iBit);
5268
5269# elif RT_INLINE_ASM_GNU_STYLE
5270 __asm__ __volatile__ ("btsl %1, %0"
5271 : "=m" (*(volatile long *)pvBitmap)
5272 : "Ir" (iBit),
5273 "m" (*(volatile long *)pvBitmap)
5274 : "memory");
5275# else
5276 __asm
5277 {
5278# ifdef RT_ARCH_AMD64
5279 mov rax, [pvBitmap]
5280 mov edx, [iBit]
5281 bts [rax], edx
5282# else
5283 mov eax, [pvBitmap]
5284 mov edx, [iBit]
5285 bts [eax], edx
5286# endif
5287 }
5288# endif
5289}
5290#endif
5291
5292
5293/**
5294 * Atomically sets a bit in a bitmap, ordered.
5295 *
5296 * @param pvBitmap Pointer to the bitmap.
5297 * @param iBit The bit to set.
5298 */
5299#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5300DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5301#else
5302DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5303{
5304# if RT_INLINE_ASM_USES_INTRIN
5305 _interlockedbittestandset((long *)pvBitmap, iBit);
5306# elif RT_INLINE_ASM_GNU_STYLE
5307 __asm__ __volatile__ ("lock; btsl %1, %0"
5308 : "=m" (*(volatile long *)pvBitmap)
5309 : "Ir" (iBit),
5310 "m" (*(volatile long *)pvBitmap)
5311 : "memory");
5312# else
5313 __asm
5314 {
5315# ifdef RT_ARCH_AMD64
5316 mov rax, [pvBitmap]
5317 mov edx, [iBit]
5318 lock bts [rax], edx
5319# else
5320 mov eax, [pvBitmap]
5321 mov edx, [iBit]
5322 lock bts [eax], edx
5323# endif
5324 }
5325# endif
5326}
5327#endif
5328
5329
5330/**
5331 * Clears a bit in a bitmap.
5332 *
5333 * @param pvBitmap Pointer to the bitmap.
5334 * @param iBit The bit to clear.
5335 */
5336#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5337DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5338#else
5339DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5340{
5341# if RT_INLINE_ASM_USES_INTRIN
5342 _bittestandreset((long *)pvBitmap, iBit);
5343
5344# elif RT_INLINE_ASM_GNU_STYLE
5345 __asm__ __volatile__ ("btrl %1, %0"
5346 : "=m" (*(volatile long *)pvBitmap)
5347 : "Ir" (iBit),
5348 "m" (*(volatile long *)pvBitmap)
5349 : "memory");
5350# else
5351 __asm
5352 {
5353# ifdef RT_ARCH_AMD64
5354 mov rax, [pvBitmap]
5355 mov edx, [iBit]
5356 btr [rax], edx
5357# else
5358 mov eax, [pvBitmap]
5359 mov edx, [iBit]
5360 btr [eax], edx
5361# endif
5362 }
5363# endif
5364}
5365#endif
5366
5367
5368/**
5369 * Atomically clears a bit in a bitmap, ordered.
5370 *
5371 * @param pvBitmap Pointer to the bitmap.
5372 * @param iBit The bit to toggle set.
5373 * @remark No memory barrier, take care on smp.
5374 */
5375#if RT_INLINE_ASM_EXTERNAL
5376DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5377#else
5378DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5379{
5380# if RT_INLINE_ASM_GNU_STYLE
5381 __asm__ __volatile__ ("lock; btrl %1, %0"
5382 : "=m" (*(volatile long *)pvBitmap)
5383 : "Ir" (iBit),
5384 "m" (*(volatile long *)pvBitmap)
5385 : "memory");
5386# else
5387 __asm
5388 {
5389# ifdef RT_ARCH_AMD64
5390 mov rax, [pvBitmap]
5391 mov edx, [iBit]
5392 lock btr [rax], edx
5393# else
5394 mov eax, [pvBitmap]
5395 mov edx, [iBit]
5396 lock btr [eax], edx
5397# endif
5398 }
5399# endif
5400}
5401#endif
5402
5403
5404/**
5405 * Toggles a bit in a bitmap.
5406 *
5407 * @param pvBitmap Pointer to the bitmap.
5408 * @param iBit The bit to toggle.
5409 */
5410#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5411DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5412#else
5413DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5414{
5415# if RT_INLINE_ASM_USES_INTRIN
5416 _bittestandcomplement((long *)pvBitmap, iBit);
5417# elif RT_INLINE_ASM_GNU_STYLE
5418 __asm__ __volatile__ ("btcl %1, %0"
5419 : "=m" (*(volatile long *)pvBitmap)
5420 : "Ir" (iBit),
5421 "m" (*(volatile long *)pvBitmap)
5422 : "memory");
5423# else
5424 __asm
5425 {
5426# ifdef RT_ARCH_AMD64
5427 mov rax, [pvBitmap]
5428 mov edx, [iBit]
5429 btc [rax], edx
5430# else
5431 mov eax, [pvBitmap]
5432 mov edx, [iBit]
5433 btc [eax], edx
5434# endif
5435 }
5436# endif
5437}
5438#endif
5439
5440
5441/**
5442 * Atomically toggles a bit in a bitmap, ordered.
5443 *
5444 * @param pvBitmap Pointer to the bitmap.
5445 * @param iBit The bit to test and set.
5446 */
5447#if RT_INLINE_ASM_EXTERNAL
5448DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5449#else
5450DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5451{
5452# if RT_INLINE_ASM_GNU_STYLE
5453 __asm__ __volatile__ ("lock; btcl %1, %0"
5454 : "=m" (*(volatile long *)pvBitmap)
5455 : "Ir" (iBit),
5456 "m" (*(volatile long *)pvBitmap)
5457 : "memory");
5458# else
5459 __asm
5460 {
5461# ifdef RT_ARCH_AMD64
5462 mov rax, [pvBitmap]
5463 mov edx, [iBit]
5464 lock btc [rax], edx
5465# else
5466 mov eax, [pvBitmap]
5467 mov edx, [iBit]
5468 lock btc [eax], edx
5469# endif
5470 }
5471# endif
5472}
5473#endif
5474
5475
5476/**
5477 * Tests and sets a bit in a bitmap.
5478 *
5479 * @returns true if the bit was set.
5480 * @returns false if the bit was clear.
5481 * @param pvBitmap Pointer to the bitmap.
5482 * @param iBit The bit to test and set.
5483 */
5484#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5485DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5486#else
5487DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5488{
5489 union { bool f; uint32_t u32; uint8_t u8; } rc;
5490# if RT_INLINE_ASM_USES_INTRIN
5491 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5492
5493# elif RT_INLINE_ASM_GNU_STYLE
5494 __asm__ __volatile__ ("btsl %2, %1\n\t"
5495 "setc %b0\n\t"
5496 "andl $1, %0\n\t"
5497 : "=q" (rc.u32),
5498 "=m" (*(volatile long *)pvBitmap)
5499 : "Ir" (iBit),
5500 "m" (*(volatile long *)pvBitmap)
5501 : "memory");
5502# else
5503 __asm
5504 {
5505 mov edx, [iBit]
5506# ifdef RT_ARCH_AMD64
5507 mov rax, [pvBitmap]
5508 bts [rax], edx
5509# else
5510 mov eax, [pvBitmap]
5511 bts [eax], edx
5512# endif
5513 setc al
5514 and eax, 1
5515 mov [rc.u32], eax
5516 }
5517# endif
5518 return rc.f;
5519}
5520#endif
5521
5522
5523/**
5524 * Atomically tests and sets a bit in a bitmap, ordered.
5525 *
5526 * @returns true if the bit was set.
5527 * @returns false if the bit was clear.
5528 * @param pvBitmap Pointer to the bitmap.
5529 * @param iBit The bit to set.
5530 */
5531#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5532DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5533#else
5534DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5535{
5536 union { bool f; uint32_t u32; uint8_t u8; } rc;
5537# if RT_INLINE_ASM_USES_INTRIN
5538 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5539# elif RT_INLINE_ASM_GNU_STYLE
5540 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
5541 "setc %b0\n\t"
5542 "andl $1, %0\n\t"
5543 : "=q" (rc.u32),
5544 "=m" (*(volatile long *)pvBitmap)
5545 : "Ir" (iBit),
5546 "m" (*(volatile long *)pvBitmap)
5547 : "memory");
5548# else
5549 __asm
5550 {
5551 mov edx, [iBit]
5552# ifdef RT_ARCH_AMD64
5553 mov rax, [pvBitmap]
5554 lock bts [rax], edx
5555# else
5556 mov eax, [pvBitmap]
5557 lock bts [eax], edx
5558# endif
5559 setc al
5560 and eax, 1
5561 mov [rc.u32], eax
5562 }
5563# endif
5564 return rc.f;
5565}
5566#endif
5567
5568
5569/**
5570 * Tests and clears a bit in a bitmap.
5571 *
5572 * @returns true if the bit was set.
5573 * @returns false if the bit was clear.
5574 * @param pvBitmap Pointer to the bitmap.
5575 * @param iBit The bit to test and clear.
5576 */
5577#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5578DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5579#else
5580DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5581{
5582 union { bool f; uint32_t u32; uint8_t u8; } rc;
5583# if RT_INLINE_ASM_USES_INTRIN
5584 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5585
5586# elif RT_INLINE_ASM_GNU_STYLE
5587 __asm__ __volatile__ ("btrl %2, %1\n\t"
5588 "setc %b0\n\t"
5589 "andl $1, %0\n\t"
5590 : "=q" (rc.u32),
5591 "=m" (*(volatile long *)pvBitmap)
5592 : "Ir" (iBit),
5593 "m" (*(volatile long *)pvBitmap)
5594 : "memory");
5595# else
5596 __asm
5597 {
5598 mov edx, [iBit]
5599# ifdef RT_ARCH_AMD64
5600 mov rax, [pvBitmap]
5601 btr [rax], edx
5602# else
5603 mov eax, [pvBitmap]
5604 btr [eax], edx
5605# endif
5606 setc al
5607 and eax, 1
5608 mov [rc.u32], eax
5609 }
5610# endif
5611 return rc.f;
5612}
5613#endif
5614
5615
5616/**
5617 * Atomically tests and clears a bit in a bitmap, ordered.
5618 *
5619 * @returns true if the bit was set.
5620 * @returns false if the bit was clear.
5621 * @param pvBitmap Pointer to the bitmap.
5622 * @param iBit The bit to test and clear.
5623 * @remark No memory barrier, take care on smp.
5624 */
5625#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5626DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5627#else
5628DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5629{
5630 union { bool f; uint32_t u32; uint8_t u8; } rc;
5631# if RT_INLINE_ASM_USES_INTRIN
5632 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5633
5634# elif RT_INLINE_ASM_GNU_STYLE
5635 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
5636 "setc %b0\n\t"
5637 "andl $1, %0\n\t"
5638 : "=q" (rc.u32),
5639 "=m" (*(volatile long *)pvBitmap)
5640 : "Ir" (iBit),
5641 "m" (*(volatile long *)pvBitmap)
5642 : "memory");
5643# else
5644 __asm
5645 {
5646 mov edx, [iBit]
5647# ifdef RT_ARCH_AMD64
5648 mov rax, [pvBitmap]
5649 lock btr [rax], edx
5650# else
5651 mov eax, [pvBitmap]
5652 lock btr [eax], edx
5653# endif
5654 setc al
5655 and eax, 1
5656 mov [rc.u32], eax
5657 }
5658# endif
5659 return rc.f;
5660}
5661#endif
5662
5663
5664/**
5665 * Tests and toggles a bit in a bitmap.
5666 *
5667 * @returns true if the bit was set.
5668 * @returns false if the bit was clear.
5669 * @param pvBitmap Pointer to the bitmap.
5670 * @param iBit The bit to test and toggle.
5671 */
5672#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5673DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5674#else
5675DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5676{
5677 union { bool f; uint32_t u32; uint8_t u8; } rc;
5678# if RT_INLINE_ASM_USES_INTRIN
5679 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5680
5681# elif RT_INLINE_ASM_GNU_STYLE
5682 __asm__ __volatile__ ("btcl %2, %1\n\t"
5683 "setc %b0\n\t"
5684 "andl $1, %0\n\t"
5685 : "=q" (rc.u32),
5686 "=m" (*(volatile long *)pvBitmap)
5687 : "Ir" (iBit),
5688 "m" (*(volatile long *)pvBitmap)
5689 : "memory");
5690# else
5691 __asm
5692 {
5693 mov edx, [iBit]
5694# ifdef RT_ARCH_AMD64
5695 mov rax, [pvBitmap]
5696 btc [rax], edx
5697# else
5698 mov eax, [pvBitmap]
5699 btc [eax], edx
5700# endif
5701 setc al
5702 and eax, 1
5703 mov [rc.u32], eax
5704 }
5705# endif
5706 return rc.f;
5707}
5708#endif
5709
5710
5711/**
5712 * Atomically tests and toggles a bit in a bitmap, ordered.
5713 *
5714 * @returns true if the bit was set.
5715 * @returns false if the bit was clear.
5716 * @param pvBitmap Pointer to the bitmap.
5717 * @param iBit The bit to test and toggle.
5718 */
5719#if RT_INLINE_ASM_EXTERNAL
5720DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5721#else
5722DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5723{
5724 union { bool f; uint32_t u32; uint8_t u8; } rc;
5725# if RT_INLINE_ASM_GNU_STYLE
5726 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
5727 "setc %b0\n\t"
5728 "andl $1, %0\n\t"
5729 : "=q" (rc.u32),
5730 "=m" (*(volatile long *)pvBitmap)
5731 : "Ir" (iBit),
5732 "m" (*(volatile long *)pvBitmap)
5733 : "memory");
5734# else
5735 __asm
5736 {
5737 mov edx, [iBit]
5738# ifdef RT_ARCH_AMD64
5739 mov rax, [pvBitmap]
5740 lock btc [rax], edx
5741# else
5742 mov eax, [pvBitmap]
5743 lock btc [eax], edx
5744# endif
5745 setc al
5746 and eax, 1
5747 mov [rc.u32], eax
5748 }
5749# endif
5750 return rc.f;
5751}
5752#endif
5753
5754
5755/**
5756 * Tests if a bit in a bitmap is set.
5757 *
5758 * @returns true if the bit is set.
5759 * @returns false if the bit is clear.
5760 * @param pvBitmap Pointer to the bitmap.
5761 * @param iBit The bit to test.
5762 */
5763#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5764DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5765#else
5766DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5767{
5768 union { bool f; uint32_t u32; uint8_t u8; } rc;
5769# if RT_INLINE_ASM_USES_INTRIN
5770 rc.u32 = _bittest((long *)pvBitmap, iBit);
5771# elif RT_INLINE_ASM_GNU_STYLE
5772
5773 __asm__ __volatile__ ("btl %2, %1\n\t"
5774 "setc %b0\n\t"
5775 "andl $1, %0\n\t"
5776 : "=q" (rc.u32)
5777 : "m" (*(const volatile long *)pvBitmap),
5778 "Ir" (iBit)
5779 : "memory");
5780# else
5781 __asm
5782 {
5783 mov edx, [iBit]
5784# ifdef RT_ARCH_AMD64
5785 mov rax, [pvBitmap]
5786 bt [rax], edx
5787# else
5788 mov eax, [pvBitmap]
5789 bt [eax], edx
5790# endif
5791 setc al
5792 and eax, 1
5793 mov [rc.u32], eax
5794 }
5795# endif
5796 return rc.f;
5797}
5798#endif
5799
5800
5801/**
5802 * Clears a bit range within a bitmap.
5803 *
5804 * @param pvBitmap Pointer to the bitmap.
5805 * @param iBitStart The First bit to clear.
5806 * @param iBitEnd The first bit not to clear.
5807 */
5808DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5809{
5810 if (iBitStart < iBitEnd)
5811 {
5812 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5813 int iStart = iBitStart & ~31;
5814 int iEnd = iBitEnd & ~31;
5815 if (iStart == iEnd)
5816 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5817 else
5818 {
5819 /* bits in first dword. */
5820 if (iBitStart & 31)
5821 {
5822 *pu32 &= (1 << (iBitStart & 31)) - 1;
5823 pu32++;
5824 iBitStart = iStart + 32;
5825 }
5826
5827 /* whole dword. */
5828 if (iBitStart != iEnd)
5829 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5830
5831 /* bits in last dword. */
5832 if (iBitEnd & 31)
5833 {
5834 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5835 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5836 }
5837 }
5838 }
5839}
5840
5841
5842/**
5843 * Sets a bit range within a bitmap.
5844 *
5845 * @param pvBitmap Pointer to the bitmap.
5846 * @param iBitStart The First bit to set.
5847 * @param iBitEnd The first bit not to set.
5848 */
5849DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5850{
5851 if (iBitStart < iBitEnd)
5852 {
5853 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5854 int iStart = iBitStart & ~31;
5855 int iEnd = iBitEnd & ~31;
5856 if (iStart == iEnd)
5857 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5858 else
5859 {
5860 /* bits in first dword. */
5861 if (iBitStart & 31)
5862 {
5863 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5864 pu32++;
5865 iBitStart = iStart + 32;
5866 }
5867
5868 /* whole dword. */
5869 if (iBitStart != iEnd)
5870 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5871
5872 /* bits in last dword. */
5873 if (iBitEnd & 31)
5874 {
5875 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5876 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5877 }
5878 }
5879 }
5880}
5881
5882
5883/**
5884 * Finds the first clear bit in a bitmap.
5885 *
5886 * @returns Index of the first zero bit.
5887 * @returns -1 if no clear bit was found.
5888 * @param pvBitmap Pointer to the bitmap.
5889 * @param cBits The number of bits in the bitmap. Multiple of 32.
5890 */
5891#if RT_INLINE_ASM_EXTERNAL
5892DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5893#else
5894DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5895{
5896 if (cBits)
5897 {
5898 int32_t iBit;
5899# if RT_INLINE_ASM_GNU_STYLE
5900 RTCCUINTREG uEAX, uECX, uEDI;
5901 cBits = RT_ALIGN_32(cBits, 32);
5902 __asm__ __volatile__("repe; scasl\n\t"
5903 "je 1f\n\t"
5904# ifdef RT_ARCH_AMD64
5905 "lea -4(%%rdi), %%rdi\n\t"
5906 "xorl (%%rdi), %%eax\n\t"
5907 "subq %5, %%rdi\n\t"
5908# else
5909 "lea -4(%%edi), %%edi\n\t"
5910 "xorl (%%edi), %%eax\n\t"
5911 "subl %5, %%edi\n\t"
5912# endif
5913 "shll $3, %%edi\n\t"
5914 "bsfl %%eax, %%edx\n\t"
5915 "addl %%edi, %%edx\n\t"
5916 "1:\t\n"
5917 : "=d" (iBit),
5918 "=&c" (uECX),
5919 "=&D" (uEDI),
5920 "=&a" (uEAX)
5921 : "0" (0xffffffff),
5922 "mr" (pvBitmap),
5923 "1" (cBits >> 5),
5924 "2" (pvBitmap),
5925 "3" (0xffffffff));
5926# else
5927 cBits = RT_ALIGN_32(cBits, 32);
5928 __asm
5929 {
5930# ifdef RT_ARCH_AMD64
5931 mov rdi, [pvBitmap]
5932 mov rbx, rdi
5933# else
5934 mov edi, [pvBitmap]
5935 mov ebx, edi
5936# endif
5937 mov edx, 0ffffffffh
5938 mov eax, edx
5939 mov ecx, [cBits]
5940 shr ecx, 5
5941 repe scasd
5942 je done
5943
5944# ifdef RT_ARCH_AMD64
5945 lea rdi, [rdi - 4]
5946 xor eax, [rdi]
5947 sub rdi, rbx
5948# else
5949 lea edi, [edi - 4]
5950 xor eax, [edi]
5951 sub edi, ebx
5952# endif
5953 shl edi, 3
5954 bsf edx, eax
5955 add edx, edi
5956 done:
5957 mov [iBit], edx
5958 }
5959# endif
5960 return iBit;
5961 }
5962 return -1;
5963}
5964#endif
5965
5966
5967/**
5968 * Finds the next clear bit in a bitmap.
5969 *
5970 * @returns Index of the first zero bit.
5971 * @returns -1 if no clear bit was found.
5972 * @param pvBitmap Pointer to the bitmap.
5973 * @param cBits The number of bits in the bitmap. Multiple of 32.
5974 * @param iBitPrev The bit returned from the last search.
5975 * The search will start at iBitPrev + 1.
5976 */
5977#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5978DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5979#else
5980DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5981{
5982 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
5983 int iBit = ++iBitPrev & 31;
5984 if (iBit)
5985 {
5986 /*
5987 * Inspect the 32-bit word containing the unaligned bit.
5988 */
5989 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
5990
5991# if RT_INLINE_ASM_USES_INTRIN
5992 unsigned long ulBit = 0;
5993 if (_BitScanForward(&ulBit, u32))
5994 return ulBit + iBitPrev;
5995# else
5996# if RT_INLINE_ASM_GNU_STYLE
5997 __asm__ __volatile__("bsf %1, %0\n\t"
5998 "jnz 1f\n\t"
5999 "movl $-1, %0\n\t"
6000 "1:\n\t"
6001 : "=r" (iBit)
6002 : "r" (u32));
6003# else
6004 __asm
6005 {
6006 mov edx, [u32]
6007 bsf eax, edx
6008 jnz done
6009 mov eax, 0ffffffffh
6010 done:
6011 mov [iBit], eax
6012 }
6013# endif
6014 if (iBit >= 0)
6015 return iBit + iBitPrev;
6016# endif
6017
6018 /*
6019 * Skip ahead and see if there is anything left to search.
6020 */
6021 iBitPrev |= 31;
6022 iBitPrev++;
6023 if (cBits <= (uint32_t)iBitPrev)
6024 return -1;
6025 }
6026
6027 /*
6028 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6029 */
6030 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6031 if (iBit >= 0)
6032 iBit += iBitPrev;
6033 return iBit;
6034}
6035#endif
6036
6037
6038/**
6039 * Finds the first set bit in a bitmap.
6040 *
6041 * @returns Index of the first set bit.
6042 * @returns -1 if no clear bit was found.
6043 * @param pvBitmap Pointer to the bitmap.
6044 * @param cBits The number of bits in the bitmap. Multiple of 32.
6045 */
6046#if RT_INLINE_ASM_EXTERNAL
6047DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6048#else
6049DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6050{
6051 if (cBits)
6052 {
6053 int32_t iBit;
6054# if RT_INLINE_ASM_GNU_STYLE
6055 RTCCUINTREG uEAX, uECX, uEDI;
6056 cBits = RT_ALIGN_32(cBits, 32);
6057 __asm__ __volatile__("repe; scasl\n\t"
6058 "je 1f\n\t"
6059# ifdef RT_ARCH_AMD64
6060 "lea -4(%%rdi), %%rdi\n\t"
6061 "movl (%%rdi), %%eax\n\t"
6062 "subq %5, %%rdi\n\t"
6063# else
6064 "lea -4(%%edi), %%edi\n\t"
6065 "movl (%%edi), %%eax\n\t"
6066 "subl %5, %%edi\n\t"
6067# endif
6068 "shll $3, %%edi\n\t"
6069 "bsfl %%eax, %%edx\n\t"
6070 "addl %%edi, %%edx\n\t"
6071 "1:\t\n"
6072 : "=d" (iBit),
6073 "=&c" (uECX),
6074 "=&D" (uEDI),
6075 "=&a" (uEAX)
6076 : "0" (0xffffffff),
6077 "mr" (pvBitmap),
6078 "1" (cBits >> 5),
6079 "2" (pvBitmap),
6080 "3" (0));
6081# else
6082 cBits = RT_ALIGN_32(cBits, 32);
6083 __asm
6084 {
6085# ifdef RT_ARCH_AMD64
6086 mov rdi, [pvBitmap]
6087 mov rbx, rdi
6088# else
6089 mov edi, [pvBitmap]
6090 mov ebx, edi
6091# endif
6092 mov edx, 0ffffffffh
6093 xor eax, eax
6094 mov ecx, [cBits]
6095 shr ecx, 5
6096 repe scasd
6097 je done
6098# ifdef RT_ARCH_AMD64
6099 lea rdi, [rdi - 4]
6100 mov eax, [rdi]
6101 sub rdi, rbx
6102# else
6103 lea edi, [edi - 4]
6104 mov eax, [edi]
6105 sub edi, ebx
6106# endif
6107 shl edi, 3
6108 bsf edx, eax
6109 add edx, edi
6110 done:
6111 mov [iBit], edx
6112 }
6113# endif
6114 return iBit;
6115 }
6116 return -1;
6117}
6118#endif
6119
6120
6121/**
6122 * Finds the next set bit in a bitmap.
6123 *
6124 * @returns Index of the next set bit.
6125 * @returns -1 if no set bit was found.
6126 * @param pvBitmap Pointer to the bitmap.
6127 * @param cBits The number of bits in the bitmap. Multiple of 32.
6128 * @param iBitPrev The bit returned from the last search.
6129 * The search will start at iBitPrev + 1.
6130 */
6131#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6132DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6133#else
6134DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6135{
6136 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6137 int iBit = ++iBitPrev & 31;
6138 if (iBit)
6139 {
6140 /*
6141 * Inspect the 32-bit word containing the unaligned bit.
6142 */
6143 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6144
6145# if RT_INLINE_ASM_USES_INTRIN
6146 unsigned long ulBit = 0;
6147 if (_BitScanForward(&ulBit, u32))
6148 return ulBit + iBitPrev;
6149# else
6150# if RT_INLINE_ASM_GNU_STYLE
6151 __asm__ __volatile__("bsf %1, %0\n\t"
6152 "jnz 1f\n\t"
6153 "movl $-1, %0\n\t"
6154 "1:\n\t"
6155 : "=r" (iBit)
6156 : "r" (u32));
6157# else
6158 __asm
6159 {
6160 mov edx, [u32]
6161 bsf eax, edx
6162 jnz done
6163 mov eax, 0ffffffffh
6164 done:
6165 mov [iBit], eax
6166 }
6167# endif
6168 if (iBit >= 0)
6169 return iBit + iBitPrev;
6170# endif
6171
6172 /*
6173 * Skip ahead and see if there is anything left to search.
6174 */
6175 iBitPrev |= 31;
6176 iBitPrev++;
6177 if (cBits <= (uint32_t)iBitPrev)
6178 return -1;
6179 }
6180
6181 /*
6182 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6183 */
6184 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6185 if (iBit >= 0)
6186 iBit += iBitPrev;
6187 return iBit;
6188}
6189#endif
6190
6191
6192/**
6193 * Finds the first bit which is set in the given 32-bit integer.
6194 * Bits are numbered from 1 (least significant) to 32.
6195 *
6196 * @returns index [1..32] of the first set bit.
6197 * @returns 0 if all bits are cleared.
6198 * @param u32 Integer to search for set bits.
6199 * @remark Similar to ffs() in BSD.
6200 */
6201DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6202{
6203# if RT_INLINE_ASM_USES_INTRIN
6204 unsigned long iBit;
6205 if (_BitScanForward(&iBit, u32))
6206 iBit++;
6207 else
6208 iBit = 0;
6209# elif RT_INLINE_ASM_GNU_STYLE
6210 uint32_t iBit;
6211 __asm__ __volatile__("bsf %1, %0\n\t"
6212 "jnz 1f\n\t"
6213 "xorl %0, %0\n\t"
6214 "jmp 2f\n"
6215 "1:\n\t"
6216 "incl %0\n"
6217 "2:\n\t"
6218 : "=r" (iBit)
6219 : "rm" (u32));
6220# else
6221 uint32_t iBit;
6222 _asm
6223 {
6224 bsf eax, [u32]
6225 jnz found
6226 xor eax, eax
6227 jmp done
6228 found:
6229 inc eax
6230 done:
6231 mov [iBit], eax
6232 }
6233# endif
6234 return iBit;
6235}
6236
6237
6238/**
6239 * Finds the first bit which is set in the given 32-bit integer.
6240 * Bits are numbered from 1 (least significant) to 32.
6241 *
6242 * @returns index [1..32] of the first set bit.
6243 * @returns 0 if all bits are cleared.
6244 * @param i32 Integer to search for set bits.
6245 * @remark Similar to ffs() in BSD.
6246 */
6247DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6248{
6249 return ASMBitFirstSetU32((uint32_t)i32);
6250}
6251
6252
6253/**
6254 * Finds the last bit which is set in the given 32-bit integer.
6255 * Bits are numbered from 1 (least significant) to 32.
6256 *
6257 * @returns index [1..32] of the last set bit.
6258 * @returns 0 if all bits are cleared.
6259 * @param u32 Integer to search for set bits.
6260 * @remark Similar to fls() in BSD.
6261 */
6262DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6263{
6264# if RT_INLINE_ASM_USES_INTRIN
6265 unsigned long iBit;
6266 if (_BitScanReverse(&iBit, u32))
6267 iBit++;
6268 else
6269 iBit = 0;
6270# elif RT_INLINE_ASM_GNU_STYLE
6271 uint32_t iBit;
6272 __asm__ __volatile__("bsrl %1, %0\n\t"
6273 "jnz 1f\n\t"
6274 "xorl %0, %0\n\t"
6275 "jmp 2f\n"
6276 "1:\n\t"
6277 "incl %0\n"
6278 "2:\n\t"
6279 : "=r" (iBit)
6280 : "rm" (u32));
6281# else
6282 uint32_t iBit;
6283 _asm
6284 {
6285 bsr eax, [u32]
6286 jnz found
6287 xor eax, eax
6288 jmp done
6289 found:
6290 inc eax
6291 done:
6292 mov [iBit], eax
6293 }
6294# endif
6295 return iBit;
6296}
6297
6298
6299/**
6300 * Finds the last bit which is set in the given 32-bit integer.
6301 * Bits are numbered from 1 (least significant) to 32.
6302 *
6303 * @returns index [1..32] of the last set bit.
6304 * @returns 0 if all bits are cleared.
6305 * @param i32 Integer to search for set bits.
6306 * @remark Similar to fls() in BSD.
6307 */
6308DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6309{
6310 return ASMBitLastSetS32((uint32_t)i32);
6311}
6312
6313/**
6314 * Reverse the byte order of the given 16-bit integer.
6315 *
6316 * @returns Revert
6317 * @param u16 16-bit integer value.
6318 */
6319DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6320{
6321#if RT_INLINE_ASM_USES_INTRIN
6322 u16 = _byteswap_ushort(u16);
6323#elif RT_INLINE_ASM_GNU_STYLE
6324 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6325#else
6326 _asm
6327 {
6328 mov ax, [u16]
6329 ror ax, 8
6330 mov [u16], ax
6331 }
6332#endif
6333 return u16;
6334}
6335
6336/**
6337 * Reverse the byte order of the given 32-bit integer.
6338 *
6339 * @returns Revert
6340 * @param u32 32-bit integer value.
6341 */
6342DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6343{
6344#if RT_INLINE_ASM_USES_INTRIN
6345 u32 = _byteswap_ulong(u32);
6346#elif RT_INLINE_ASM_GNU_STYLE
6347 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6348#else
6349 _asm
6350 {
6351 mov eax, [u32]
6352 bswap eax
6353 mov [u32], eax
6354 }
6355#endif
6356 return u32;
6357}
6358
6359
6360/**
6361 * Reverse the byte order of the given 64-bit integer.
6362 *
6363 * @returns Revert
6364 * @param u64 64-bit integer value.
6365 */
6366DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6367{
6368#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6369 u64 = _byteswap_uint64(u64);
6370#else
6371 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6372 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6373#endif
6374 return u64;
6375}
6376
6377
6378/** @} */
6379
6380
6381/** @} */
6382#endif
6383
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette