VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 21943

Last change on this file since 21943 was 21943, checked in by vboxsync, 16 years ago

iprt/asm.h: typos.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 169.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__wbinvd)
73# pragma intrinsic(__stosd)
74# pragma intrinsic(__stosw)
75# pragma intrinsic(__stosb)
76# pragma intrinsic(__readcr0)
77# pragma intrinsic(__readcr2)
78# pragma intrinsic(__readcr3)
79# pragma intrinsic(__readcr4)
80# pragma intrinsic(__writecr0)
81# pragma intrinsic(__writecr3)
82# pragma intrinsic(__writecr4)
83# pragma intrinsic(__readdr)
84# pragma intrinsic(__writedr)
85# pragma intrinsic(_BitScanForward)
86# pragma intrinsic(_BitScanReverse)
87# pragma intrinsic(_bittest)
88# pragma intrinsic(_bittestandset)
89# pragma intrinsic(_bittestandreset)
90# pragma intrinsic(_bittestandcomplement)
91# pragma intrinsic(_byteswap_ushort)
92# pragma intrinsic(_byteswap_ulong)
93# pragma intrinsic(_interlockedbittestandset)
94# pragma intrinsic(_interlockedbittestandreset)
95# pragma intrinsic(_InterlockedAnd)
96# pragma intrinsic(_InterlockedOr)
97# pragma intrinsic(_InterlockedIncrement)
98# pragma intrinsic(_InterlockedDecrement)
99# pragma intrinsic(_InterlockedExchange)
100# pragma intrinsic(_InterlockedExchangeAdd)
101# pragma intrinsic(_InterlockedCompareExchange)
102# pragma intrinsic(_InterlockedCompareExchange64)
103# ifdef RT_ARCH_AMD64
104# pragma intrinsic(_mm_mfence)
105# pragma intrinsic(_mm_sfence)
106# pragma intrinsic(_mm_lfence)
107# pragma intrinsic(__stosq)
108# pragma intrinsic(__readcr8)
109# pragma intrinsic(__writecr8)
110# pragma intrinsic(_byteswap_uint64)
111# pragma intrinsic(_InterlockedExchange64)
112# endif
113# endif
114#endif
115#ifndef RT_INLINE_ASM_USES_INTRIN
116# define RT_INLINE_ASM_USES_INTRIN 0
117#endif
118
119/** @def RT_INLINE_ASM_GCC_4_3_X_X86
120 * Used to work around some 4.3.x register allocation issues in this version of
121 * the compiler. */
122#ifdef __GNUC__
123# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ == 3 && defined(__i386__))
124#endif
125#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
126# define RT_INLINE_ASM_GCC_4_3_X_X86 0
127#endif
128
129
130
131/** @defgroup grp_asm ASM - Assembly Routines
132 * @ingroup grp_rt
133 *
134 * @remarks The difference between ordered and unordered atomic operations are that
135 * the former will complete outstanding reads and writes before continuing
136 * while the latter doesn't make any promisses about the order. Ordered
137 * operations doesn't, it seems, make any 100% promise wrt to whether
138 * the operation will complete before any subsequent memory access.
139 * (please, correct if wrong.)
140 *
141 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
142 * are unordered (note the Uo).
143 *
144 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
145 * or even optimize assembler instructions away. For instance, in the following code
146 * the second rdmsr instruction is optimized away because gcc treats that instruction
147 * as deterministic:
148 *
149 * @code
150 * static inline uint64_t rdmsr_low(int idx)
151 * {
152 * uint32_t low;
153 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
154 * }
155 * ...
156 * uint32_t msr1 = rdmsr_low(1);
157 * foo(msr1);
158 * msr1 = rdmsr_low(1);
159 * bar(msr1);
160 * @endcode
161 *
162 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
163 * use the result of the first call as input parameter for bar() as well. For rdmsr this
164 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
165 * machine status information in general.
166 *
167 * @{
168 */
169
170/** @def RT_INLINE_ASM_EXTERNAL
171 * Defined as 1 if the compiler does not support inline assembly.
172 * The ASM* functions will then be implemented in an external .asm file.
173 *
174 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
175 * inline assembly in their AMD64 compiler.
176 */
177#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
178# define RT_INLINE_ASM_EXTERNAL 1
179#else
180# define RT_INLINE_ASM_EXTERNAL 0
181#endif
182
183/** @def RT_INLINE_ASM_GNU_STYLE
184 * Defined as 1 if the compiler understands GNU style inline assembly.
185 */
186#if defined(_MSC_VER)
187# define RT_INLINE_ASM_GNU_STYLE 0
188#else
189# define RT_INLINE_ASM_GNU_STYLE 1
190#endif
191
192
193/** @todo find a more proper place for this structure? */
194#pragma pack(1)
195/** IDTR */
196typedef struct RTIDTR
197{
198 /** Size of the IDT. */
199 uint16_t cbIdt;
200 /** Address of the IDT. */
201 uintptr_t pIdt;
202} RTIDTR, *PRTIDTR;
203#pragma pack()
204
205#pragma pack(1)
206/** GDTR */
207typedef struct RTGDTR
208{
209 /** Size of the GDT. */
210 uint16_t cbGdt;
211 /** Address of the GDT. */
212 uintptr_t pGdt;
213} RTGDTR, *PRTGDTR;
214#pragma pack()
215
216
217/** @def ASMReturnAddress
218 * Gets the return address of the current (or calling if you like) function or method.
219 */
220#ifdef _MSC_VER
221# ifdef __cplusplus
222extern "C"
223# endif
224void * _ReturnAddress(void);
225# pragma intrinsic(_ReturnAddress)
226# define ASMReturnAddress() _ReturnAddress()
227#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
228# define ASMReturnAddress() __builtin_return_address(0)
229#else
230# error "Unsupported compiler."
231#endif
232
233
234/**
235 * Gets the content of the IDTR CPU register.
236 * @param pIdtr Where to store the IDTR contents.
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
240#else
241DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 sidt [rax]
251# else
252 mov eax, [pIdtr]
253 sidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Sets the content of the IDTR CPU register.
263 * @param pIdtr Where to load the IDTR contents from
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
267#else
268DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pIdtr]
277 lidt [rax]
278# else
279 mov eax, [pIdtr]
280 lidt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287
288/**
289 * Gets the content of the GDTR CPU register.
290 * @param pGdtr Where to store the GDTR contents.
291 */
292#if RT_INLINE_ASM_EXTERNAL
293DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
294#else
295DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
296{
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
299# else
300 __asm
301 {
302# ifdef RT_ARCH_AMD64
303 mov rax, [pGdtr]
304 sgdt [rax]
305# else
306 mov eax, [pGdtr]
307 sgdt [eax]
308# endif
309 }
310# endif
311}
312#endif
313
314/**
315 * Get the cs register.
316 * @returns cs.
317 */
318#if RT_INLINE_ASM_EXTERNAL
319DECLASM(RTSEL) ASMGetCS(void);
320#else
321DECLINLINE(RTSEL) ASMGetCS(void)
322{
323 RTSEL SelCS;
324# if RT_INLINE_ASM_GNU_STYLE
325 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
326# else
327 __asm
328 {
329 mov ax, cs
330 mov [SelCS], ax
331 }
332# endif
333 return SelCS;
334}
335#endif
336
337
338/**
339 * Get the DS register.
340 * @returns DS.
341 */
342#if RT_INLINE_ASM_EXTERNAL
343DECLASM(RTSEL) ASMGetDS(void);
344#else
345DECLINLINE(RTSEL) ASMGetDS(void)
346{
347 RTSEL SelDS;
348# if RT_INLINE_ASM_GNU_STYLE
349 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
350# else
351 __asm
352 {
353 mov ax, ds
354 mov [SelDS], ax
355 }
356# endif
357 return SelDS;
358}
359#endif
360
361
362/**
363 * Get the ES register.
364 * @returns ES.
365 */
366#if RT_INLINE_ASM_EXTERNAL
367DECLASM(RTSEL) ASMGetES(void);
368#else
369DECLINLINE(RTSEL) ASMGetES(void)
370{
371 RTSEL SelES;
372# if RT_INLINE_ASM_GNU_STYLE
373 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
374# else
375 __asm
376 {
377 mov ax, es
378 mov [SelES], ax
379 }
380# endif
381 return SelES;
382}
383#endif
384
385
386/**
387 * Get the FS register.
388 * @returns FS.
389 */
390#if RT_INLINE_ASM_EXTERNAL
391DECLASM(RTSEL) ASMGetFS(void);
392#else
393DECLINLINE(RTSEL) ASMGetFS(void)
394{
395 RTSEL SelFS;
396# if RT_INLINE_ASM_GNU_STYLE
397 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
398# else
399 __asm
400 {
401 mov ax, fs
402 mov [SelFS], ax
403 }
404# endif
405 return SelFS;
406}
407# endif
408
409
410/**
411 * Get the GS register.
412 * @returns GS.
413 */
414#if RT_INLINE_ASM_EXTERNAL
415DECLASM(RTSEL) ASMGetGS(void);
416#else
417DECLINLINE(RTSEL) ASMGetGS(void)
418{
419 RTSEL SelGS;
420# if RT_INLINE_ASM_GNU_STYLE
421 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
422# else
423 __asm
424 {
425 mov ax, gs
426 mov [SelGS], ax
427 }
428# endif
429 return SelGS;
430}
431#endif
432
433
434/**
435 * Get the SS register.
436 * @returns SS.
437 */
438#if RT_INLINE_ASM_EXTERNAL
439DECLASM(RTSEL) ASMGetSS(void);
440#else
441DECLINLINE(RTSEL) ASMGetSS(void)
442{
443 RTSEL SelSS;
444# if RT_INLINE_ASM_GNU_STYLE
445 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
446# else
447 __asm
448 {
449 mov ax, ss
450 mov [SelSS], ax
451 }
452# endif
453 return SelSS;
454}
455#endif
456
457
458/**
459 * Get the TR register.
460 * @returns TR.
461 */
462#if RT_INLINE_ASM_EXTERNAL
463DECLASM(RTSEL) ASMGetTR(void);
464#else
465DECLINLINE(RTSEL) ASMGetTR(void)
466{
467 RTSEL SelTR;
468# if RT_INLINE_ASM_GNU_STYLE
469 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
470# else
471 __asm
472 {
473 str ax
474 mov [SelTR], ax
475 }
476# endif
477 return SelTR;
478}
479#endif
480
481
482/**
483 * Get the [RE]FLAGS register.
484 * @returns [RE]FLAGS.
485 */
486#if RT_INLINE_ASM_EXTERNAL
487DECLASM(RTCCUINTREG) ASMGetFlags(void);
488#else
489DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
490{
491 RTCCUINTREG uFlags;
492# if RT_INLINE_ASM_GNU_STYLE
493# ifdef RT_ARCH_AMD64
494 __asm__ __volatile__("pushfq\n\t"
495 "popq %0\n\t"
496 : "=g" (uFlags));
497# else
498 __asm__ __volatile__("pushfl\n\t"
499 "popl %0\n\t"
500 : "=g" (uFlags));
501# endif
502# else
503 __asm
504 {
505# ifdef RT_ARCH_AMD64
506 pushfq
507 pop [uFlags]
508# else
509 pushfd
510 pop [uFlags]
511# endif
512 }
513# endif
514 return uFlags;
515}
516#endif
517
518
519/**
520 * Set the [RE]FLAGS register.
521 * @param uFlags The new [RE]FLAGS value.
522 */
523#if RT_INLINE_ASM_EXTERNAL
524DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
525#else
526DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
527{
528# if RT_INLINE_ASM_GNU_STYLE
529# ifdef RT_ARCH_AMD64
530 __asm__ __volatile__("pushq %0\n\t"
531 "popfq\n\t"
532 : : "g" (uFlags));
533# else
534 __asm__ __volatile__("pushl %0\n\t"
535 "popfl\n\t"
536 : : "g" (uFlags));
537# endif
538# else
539 __asm
540 {
541# ifdef RT_ARCH_AMD64
542 push [uFlags]
543 popfq
544# else
545 push [uFlags]
546 popfd
547# endif
548 }
549# endif
550}
551#endif
552
553
554/**
555 * Gets the content of the CPU timestamp counter register.
556 *
557 * @returns TSC.
558 */
559#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
560DECLASM(uint64_t) ASMReadTSC(void);
561#else
562DECLINLINE(uint64_t) ASMReadTSC(void)
563{
564 RTUINT64U u;
565# if RT_INLINE_ASM_GNU_STYLE
566 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
567# else
568# if RT_INLINE_ASM_USES_INTRIN
569 u.u = __rdtsc();
570# else
571 __asm
572 {
573 rdtsc
574 mov [u.s.Lo], eax
575 mov [u.s.Hi], edx
576 }
577# endif
578# endif
579 return u.u;
580}
581#endif
582
583
584/**
585 * Performs the cpuid instruction returning all registers.
586 *
587 * @param uOperator CPUID operation (eax).
588 * @param pvEAX Where to store eax.
589 * @param pvEBX Where to store ebx.
590 * @param pvECX Where to store ecx.
591 * @param pvEDX Where to store edx.
592 * @remark We're using void pointers to ease the use of special bitfield structures and such.
593 */
594#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
595DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
596#else
597DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
598{
599# if RT_INLINE_ASM_GNU_STYLE
600# ifdef RT_ARCH_AMD64
601 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
602 __asm__ ("cpuid\n\t"
603 : "=a" (uRAX),
604 "=b" (uRBX),
605 "=c" (uRCX),
606 "=d" (uRDX)
607 : "0" (uOperator));
608 *(uint32_t *)pvEAX = (uint32_t)uRAX;
609 *(uint32_t *)pvEBX = (uint32_t)uRBX;
610 *(uint32_t *)pvECX = (uint32_t)uRCX;
611 *(uint32_t *)pvEDX = (uint32_t)uRDX;
612# else
613 __asm__ ("xchgl %%ebx, %1\n\t"
614 "cpuid\n\t"
615 "xchgl %%ebx, %1\n\t"
616 : "=a" (*(uint32_t *)pvEAX),
617 "=r" (*(uint32_t *)pvEBX),
618 "=c" (*(uint32_t *)pvECX),
619 "=d" (*(uint32_t *)pvEDX)
620 : "0" (uOperator));
621# endif
622
623# elif RT_INLINE_ASM_USES_INTRIN
624 int aInfo[4];
625 __cpuid(aInfo, uOperator);
626 *(uint32_t *)pvEAX = aInfo[0];
627 *(uint32_t *)pvEBX = aInfo[1];
628 *(uint32_t *)pvECX = aInfo[2];
629 *(uint32_t *)pvEDX = aInfo[3];
630
631# else
632 uint32_t uEAX;
633 uint32_t uEBX;
634 uint32_t uECX;
635 uint32_t uEDX;
636 __asm
637 {
638 push ebx
639 mov eax, [uOperator]
640 cpuid
641 mov [uEAX], eax
642 mov [uEBX], ebx
643 mov [uECX], ecx
644 mov [uEDX], edx
645 pop ebx
646 }
647 *(uint32_t *)pvEAX = uEAX;
648 *(uint32_t *)pvEBX = uEBX;
649 *(uint32_t *)pvECX = uECX;
650 *(uint32_t *)pvEDX = uEDX;
651# endif
652}
653#endif
654
655
656/**
657 * Performs the cpuid instruction returning all registers.
658 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
659 *
660 * @param uOperator CPUID operation (eax).
661 * @param uIdxECX ecx index
662 * @param pvEAX Where to store eax.
663 * @param pvEBX Where to store ebx.
664 * @param pvECX Where to store ecx.
665 * @param pvEDX Where to store edx.
666 * @remark We're using void pointers to ease the use of special bitfield structures and such.
667 */
668#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
669DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
670#else
671DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
672{
673# if RT_INLINE_ASM_GNU_STYLE
674# ifdef RT_ARCH_AMD64
675 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
676 __asm__ ("cpuid\n\t"
677 : "=a" (uRAX),
678 "=b" (uRBX),
679 "=c" (uRCX),
680 "=d" (uRDX)
681 : "0" (uOperator),
682 "2" (uIdxECX));
683 *(uint32_t *)pvEAX = (uint32_t)uRAX;
684 *(uint32_t *)pvEBX = (uint32_t)uRBX;
685 *(uint32_t *)pvECX = (uint32_t)uRCX;
686 *(uint32_t *)pvEDX = (uint32_t)uRDX;
687# else
688 __asm__ ("xchgl %%ebx, %1\n\t"
689 "cpuid\n\t"
690 "xchgl %%ebx, %1\n\t"
691 : "=a" (*(uint32_t *)pvEAX),
692 "=r" (*(uint32_t *)pvEBX),
693 "=c" (*(uint32_t *)pvECX),
694 "=d" (*(uint32_t *)pvEDX)
695 : "0" (uOperator),
696 "2" (uIdxECX));
697# endif
698
699# elif RT_INLINE_ASM_USES_INTRIN
700 int aInfo[4];
701 /* ??? another intrinsic ??? */
702 __cpuid(aInfo, uOperator);
703 *(uint32_t *)pvEAX = aInfo[0];
704 *(uint32_t *)pvEBX = aInfo[1];
705 *(uint32_t *)pvECX = aInfo[2];
706 *(uint32_t *)pvEDX = aInfo[3];
707
708# else
709 uint32_t uEAX;
710 uint32_t uEBX;
711 uint32_t uECX;
712 uint32_t uEDX;
713 __asm
714 {
715 push ebx
716 mov eax, [uOperator]
717 mov ecx, [uIdxECX]
718 cpuid
719 mov [uEAX], eax
720 mov [uEBX], ebx
721 mov [uECX], ecx
722 mov [uEDX], edx
723 pop ebx
724 }
725 *(uint32_t *)pvEAX = uEAX;
726 *(uint32_t *)pvEBX = uEBX;
727 *(uint32_t *)pvECX = uECX;
728 *(uint32_t *)pvEDX = uEDX;
729# endif
730}
731#endif
732
733
734/**
735 * Performs the cpuid instruction returning ecx and edx.
736 *
737 * @param uOperator CPUID operation (eax).
738 * @param pvECX Where to store ecx.
739 * @param pvEDX Where to store edx.
740 * @remark We're using void pointers to ease the use of special bitfield structures and such.
741 */
742#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
743DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
744#else
745DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
746{
747 uint32_t uEBX;
748 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
749}
750#endif
751
752
753/**
754 * Performs the cpuid instruction returning edx.
755 *
756 * @param uOperator CPUID operation (eax).
757 * @returns EDX after cpuid operation.
758 */
759#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
760DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
761#else
762DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
763{
764 RTCCUINTREG xDX;
765# if RT_INLINE_ASM_GNU_STYLE
766# ifdef RT_ARCH_AMD64
767 RTCCUINTREG uSpill;
768 __asm__ ("cpuid"
769 : "=a" (uSpill),
770 "=d" (xDX)
771 : "0" (uOperator)
772 : "rbx", "rcx");
773# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
774 __asm__ ("push %%ebx\n\t"
775 "cpuid\n\t"
776 "pop %%ebx\n\t"
777 : "=a" (uOperator),
778 "=d" (xDX)
779 : "0" (uOperator)
780 : "ecx");
781# else
782 __asm__ ("cpuid"
783 : "=a" (uOperator),
784 "=d" (xDX)
785 : "0" (uOperator)
786 : "ebx", "ecx");
787# endif
788
789# elif RT_INLINE_ASM_USES_INTRIN
790 int aInfo[4];
791 __cpuid(aInfo, uOperator);
792 xDX = aInfo[3];
793
794# else
795 __asm
796 {
797 push ebx
798 mov eax, [uOperator]
799 cpuid
800 mov [xDX], edx
801 pop ebx
802 }
803# endif
804 return (uint32_t)xDX;
805}
806#endif
807
808
809/**
810 * Performs the cpuid instruction returning ecx.
811 *
812 * @param uOperator CPUID operation (eax).
813 * @returns ECX after cpuid operation.
814 */
815#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
816DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
817#else
818DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
819{
820 RTCCUINTREG xCX;
821# if RT_INLINE_ASM_GNU_STYLE
822# ifdef RT_ARCH_AMD64
823 RTCCUINTREG uSpill;
824 __asm__ ("cpuid"
825 : "=a" (uSpill),
826 "=c" (xCX)
827 : "0" (uOperator)
828 : "rbx", "rdx");
829# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
830 __asm__ ("push %%ebx\n\t"
831 "cpuid\n\t"
832 "pop %%ebx\n\t"
833 : "=a" (uOperator),
834 "=c" (xCX)
835 : "0" (uOperator)
836 : "edx");
837# else
838 __asm__ ("cpuid"
839 : "=a" (uOperator),
840 "=c" (xCX)
841 : "0" (uOperator)
842 : "ebx", "edx");
843
844# endif
845
846# elif RT_INLINE_ASM_USES_INTRIN
847 int aInfo[4];
848 __cpuid(aInfo, uOperator);
849 xCX = aInfo[2];
850
851# else
852 __asm
853 {
854 push ebx
855 mov eax, [uOperator]
856 cpuid
857 mov [xCX], ecx
858 pop ebx
859 }
860# endif
861 return (uint32_t)xCX;
862}
863#endif
864
865
866/**
867 * Checks if the current CPU supports CPUID.
868 *
869 * @returns true if CPUID is supported.
870 */
871DECLINLINE(bool) ASMHasCpuId(void)
872{
873#ifdef RT_ARCH_AMD64
874 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
875#else /* !RT_ARCH_AMD64 */
876 bool fRet = false;
877# if RT_INLINE_ASM_GNU_STYLE
878 uint32_t u1;
879 uint32_t u2;
880 __asm__ ("pushf\n\t"
881 "pop %1\n\t"
882 "mov %1, %2\n\t"
883 "xorl $0x200000, %1\n\t"
884 "push %1\n\t"
885 "popf\n\t"
886 "pushf\n\t"
887 "pop %1\n\t"
888 "cmpl %1, %2\n\t"
889 "setne %0\n\t"
890 "push %2\n\t"
891 "popf\n\t"
892 : "=m" (fRet), "=r" (u1), "=r" (u2));
893# else
894 __asm
895 {
896 pushfd
897 pop eax
898 mov ebx, eax
899 xor eax, 0200000h
900 push eax
901 popfd
902 pushfd
903 pop eax
904 cmp eax, ebx
905 setne fRet
906 push ebx
907 popfd
908 }
909# endif
910 return fRet;
911#endif /* !RT_ARCH_AMD64 */
912}
913
914
915/**
916 * Gets the APIC ID of the current CPU.
917 *
918 * @returns the APIC ID.
919 */
920#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
921DECLASM(uint8_t) ASMGetApicId(void);
922#else
923DECLINLINE(uint8_t) ASMGetApicId(void)
924{
925 RTCCUINTREG xBX;
926# if RT_INLINE_ASM_GNU_STYLE
927# ifdef RT_ARCH_AMD64
928 RTCCUINTREG uSpill;
929 __asm__ ("cpuid"
930 : "=a" (uSpill),
931 "=b" (xBX)
932 : "0" (1)
933 : "rcx", "rdx");
934# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
935 RTCCUINTREG uSpill;
936 __asm__ ("mov %%ebx,%1\n\t"
937 "cpuid\n\t"
938 "xchgl %%ebx,%1\n\t"
939 : "=a" (uSpill),
940 "=r" (xBX)
941 : "0" (1)
942 : "ecx", "edx");
943# else
944 RTCCUINTREG uSpill;
945 __asm__ ("cpuid"
946 : "=a" (uSpill),
947 "=b" (xBX)
948 : "0" (1)
949 : "ecx", "edx");
950# endif
951
952# elif RT_INLINE_ASM_USES_INTRIN
953 int aInfo[4];
954 __cpuid(aInfo, 1);
955 xBX = aInfo[1];
956
957# else
958 __asm
959 {
960 push ebx
961 mov eax, 1
962 cpuid
963 mov [xBX], ebx
964 pop ebx
965 }
966# endif
967 return (uint8_t)(xBX >> 24);
968}
969#endif
970
971
972/**
973 * Tests if it a genuine Intel CPU based on the ASMCpuId(0) output.
974 *
975 * @returns true/false.
976 * @param uEBX EBX return from ASMCpuId(0)
977 * @param uECX ECX return from ASMCpuId(0)
978 * @param uEDX EDX return from ASMCpuId(0)
979 */
980DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
981{
982 return uEBX == 0x756e6547
983 && uECX == 0x6c65746e
984 && uEDX == 0x49656e69;
985}
986
987
988/**
989 * Tests if this is a genuine Intel CPU.
990 *
991 * @returns true/false.
992 * @remarks ASSUMES that cpuid is supported by the CPU.
993 */
994DECLINLINE(bool) ASMIsIntelCpu(void)
995{
996 uint32_t uEAX, uEBX, uECX, uEDX;
997 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
998 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
999}
1000
1001
1002/**
1003 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
1004 *
1005 * @returns Family.
1006 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
1007 */
1008DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
1009{
1010 return ((uEAX >> 8) & 0xf) == 0xf
1011 ? ((uEAX >> 20) & 0x7f) + 0xf
1012 : ((uEAX >> 8) & 0xf);
1013}
1014
1015
1016/**
1017 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1018 *
1019 * @returns Model.
1020 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1021 * @param fIntel Whether it's an intel CPU.
1022 */
1023DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1024{
1025 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1026 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1027 : ((uEAX >> 4) & 0xf);
1028}
1029
1030
1031/**
1032 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1033 *
1034 * @returns Model.
1035 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1036 * @param fIntel Whether it's an intel CPU.
1037 */
1038DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1039{
1040 return ((uEAX >> 8) & 0xf) == 0xf
1041 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1042 : ((uEAX >> 4) & 0xf);
1043}
1044
1045
1046/**
1047 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1048 *
1049 * @returns Model.
1050 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1051 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1052 */
1053DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1054{
1055 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1056 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1057 : ((uEAX >> 4) & 0xf);
1058}
1059
1060
1061/**
1062 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1063 *
1064 * @returns Model.
1065 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1066 */
1067DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1068{
1069 return uEAX & 0xf;
1070}
1071
1072
1073/**
1074 * Get cr0.
1075 * @returns cr0.
1076 */
1077#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1078DECLASM(RTCCUINTREG) ASMGetCR0(void);
1079#else
1080DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1081{
1082 RTCCUINTREG uCR0;
1083# if RT_INLINE_ASM_USES_INTRIN
1084 uCR0 = __readcr0();
1085
1086# elif RT_INLINE_ASM_GNU_STYLE
1087# ifdef RT_ARCH_AMD64
1088 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1089# else
1090 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1091# endif
1092# else
1093 __asm
1094 {
1095# ifdef RT_ARCH_AMD64
1096 mov rax, cr0
1097 mov [uCR0], rax
1098# else
1099 mov eax, cr0
1100 mov [uCR0], eax
1101# endif
1102 }
1103# endif
1104 return uCR0;
1105}
1106#endif
1107
1108
1109/**
1110 * Sets the CR0 register.
1111 * @param uCR0 The new CR0 value.
1112 */
1113#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1114DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1115#else
1116DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1117{
1118# if RT_INLINE_ASM_USES_INTRIN
1119 __writecr0(uCR0);
1120
1121# elif RT_INLINE_ASM_GNU_STYLE
1122# ifdef RT_ARCH_AMD64
1123 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1124# else
1125 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1126# endif
1127# else
1128 __asm
1129 {
1130# ifdef RT_ARCH_AMD64
1131 mov rax, [uCR0]
1132 mov cr0, rax
1133# else
1134 mov eax, [uCR0]
1135 mov cr0, eax
1136# endif
1137 }
1138# endif
1139}
1140#endif
1141
1142
1143/**
1144 * Get cr2.
1145 * @returns cr2.
1146 */
1147#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1148DECLASM(RTCCUINTREG) ASMGetCR2(void);
1149#else
1150DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1151{
1152 RTCCUINTREG uCR2;
1153# if RT_INLINE_ASM_USES_INTRIN
1154 uCR2 = __readcr2();
1155
1156# elif RT_INLINE_ASM_GNU_STYLE
1157# ifdef RT_ARCH_AMD64
1158 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1159# else
1160 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1161# endif
1162# else
1163 __asm
1164 {
1165# ifdef RT_ARCH_AMD64
1166 mov rax, cr2
1167 mov [uCR2], rax
1168# else
1169 mov eax, cr2
1170 mov [uCR2], eax
1171# endif
1172 }
1173# endif
1174 return uCR2;
1175}
1176#endif
1177
1178
1179/**
1180 * Sets the CR2 register.
1181 * @param uCR2 The new CR0 value.
1182 */
1183#if RT_INLINE_ASM_EXTERNAL
1184DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1185#else
1186DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1187{
1188# if RT_INLINE_ASM_GNU_STYLE
1189# ifdef RT_ARCH_AMD64
1190 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1191# else
1192 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1193# endif
1194# else
1195 __asm
1196 {
1197# ifdef RT_ARCH_AMD64
1198 mov rax, [uCR2]
1199 mov cr2, rax
1200# else
1201 mov eax, [uCR2]
1202 mov cr2, eax
1203# endif
1204 }
1205# endif
1206}
1207#endif
1208
1209
1210/**
1211 * Get cr3.
1212 * @returns cr3.
1213 */
1214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1215DECLASM(RTCCUINTREG) ASMGetCR3(void);
1216#else
1217DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1218{
1219 RTCCUINTREG uCR3;
1220# if RT_INLINE_ASM_USES_INTRIN
1221 uCR3 = __readcr3();
1222
1223# elif RT_INLINE_ASM_GNU_STYLE
1224# ifdef RT_ARCH_AMD64
1225 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1226# else
1227 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1228# endif
1229# else
1230 __asm
1231 {
1232# ifdef RT_ARCH_AMD64
1233 mov rax, cr3
1234 mov [uCR3], rax
1235# else
1236 mov eax, cr3
1237 mov [uCR3], eax
1238# endif
1239 }
1240# endif
1241 return uCR3;
1242}
1243#endif
1244
1245
1246/**
1247 * Sets the CR3 register.
1248 *
1249 * @param uCR3 New CR3 value.
1250 */
1251#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1252DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1253#else
1254DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1255{
1256# if RT_INLINE_ASM_USES_INTRIN
1257 __writecr3(uCR3);
1258
1259# elif RT_INLINE_ASM_GNU_STYLE
1260# ifdef RT_ARCH_AMD64
1261 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1262# else
1263 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1264# endif
1265# else
1266 __asm
1267 {
1268# ifdef RT_ARCH_AMD64
1269 mov rax, [uCR3]
1270 mov cr3, rax
1271# else
1272 mov eax, [uCR3]
1273 mov cr3, eax
1274# endif
1275 }
1276# endif
1277}
1278#endif
1279
1280
1281/**
1282 * Reloads the CR3 register.
1283 */
1284#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1285DECLASM(void) ASMReloadCR3(void);
1286#else
1287DECLINLINE(void) ASMReloadCR3(void)
1288{
1289# if RT_INLINE_ASM_USES_INTRIN
1290 __writecr3(__readcr3());
1291
1292# elif RT_INLINE_ASM_GNU_STYLE
1293 RTCCUINTREG u;
1294# ifdef RT_ARCH_AMD64
1295 __asm__ __volatile__("movq %%cr3, %0\n\t"
1296 "movq %0, %%cr3\n\t"
1297 : "=r" (u));
1298# else
1299 __asm__ __volatile__("movl %%cr3, %0\n\t"
1300 "movl %0, %%cr3\n\t"
1301 : "=r" (u));
1302# endif
1303# else
1304 __asm
1305 {
1306# ifdef RT_ARCH_AMD64
1307 mov rax, cr3
1308 mov cr3, rax
1309# else
1310 mov eax, cr3
1311 mov cr3, eax
1312# endif
1313 }
1314# endif
1315}
1316#endif
1317
1318
1319/**
1320 * Get cr4.
1321 * @returns cr4.
1322 */
1323#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1324DECLASM(RTCCUINTREG) ASMGetCR4(void);
1325#else
1326DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1327{
1328 RTCCUINTREG uCR4;
1329# if RT_INLINE_ASM_USES_INTRIN
1330 uCR4 = __readcr4();
1331
1332# elif RT_INLINE_ASM_GNU_STYLE
1333# ifdef RT_ARCH_AMD64
1334 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1335# else
1336 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1337# endif
1338# else
1339 __asm
1340 {
1341# ifdef RT_ARCH_AMD64
1342 mov rax, cr4
1343 mov [uCR4], rax
1344# else
1345 push eax /* just in case */
1346 /*mov eax, cr4*/
1347 _emit 0x0f
1348 _emit 0x20
1349 _emit 0xe0
1350 mov [uCR4], eax
1351 pop eax
1352# endif
1353 }
1354# endif
1355 return uCR4;
1356}
1357#endif
1358
1359
1360/**
1361 * Sets the CR4 register.
1362 *
1363 * @param uCR4 New CR4 value.
1364 */
1365#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1366DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1367#else
1368DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1369{
1370# if RT_INLINE_ASM_USES_INTRIN
1371 __writecr4(uCR4);
1372
1373# elif RT_INLINE_ASM_GNU_STYLE
1374# ifdef RT_ARCH_AMD64
1375 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1376# else
1377 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1378# endif
1379# else
1380 __asm
1381 {
1382# ifdef RT_ARCH_AMD64
1383 mov rax, [uCR4]
1384 mov cr4, rax
1385# else
1386 mov eax, [uCR4]
1387 _emit 0x0F
1388 _emit 0x22
1389 _emit 0xE0 /* mov cr4, eax */
1390# endif
1391 }
1392# endif
1393}
1394#endif
1395
1396
1397/**
1398 * Get cr8.
1399 * @returns cr8.
1400 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1401 */
1402#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1403DECLASM(RTCCUINTREG) ASMGetCR8(void);
1404#else
1405DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1406{
1407# ifdef RT_ARCH_AMD64
1408 RTCCUINTREG uCR8;
1409# if RT_INLINE_ASM_USES_INTRIN
1410 uCR8 = __readcr8();
1411
1412# elif RT_INLINE_ASM_GNU_STYLE
1413 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1414# else
1415 __asm
1416 {
1417 mov rax, cr8
1418 mov [uCR8], rax
1419 }
1420# endif
1421 return uCR8;
1422# else /* !RT_ARCH_AMD64 */
1423 return 0;
1424# endif /* !RT_ARCH_AMD64 */
1425}
1426#endif
1427
1428
1429/**
1430 * Enables interrupts (EFLAGS.IF).
1431 */
1432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1433DECLASM(void) ASMIntEnable(void);
1434#else
1435DECLINLINE(void) ASMIntEnable(void)
1436{
1437# if RT_INLINE_ASM_GNU_STYLE
1438 __asm("sti\n");
1439# elif RT_INLINE_ASM_USES_INTRIN
1440 _enable();
1441# else
1442 __asm sti
1443# endif
1444}
1445#endif
1446
1447
1448/**
1449 * Disables interrupts (!EFLAGS.IF).
1450 */
1451#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1452DECLASM(void) ASMIntDisable(void);
1453#else
1454DECLINLINE(void) ASMIntDisable(void)
1455{
1456# if RT_INLINE_ASM_GNU_STYLE
1457 __asm("cli\n");
1458# elif RT_INLINE_ASM_USES_INTRIN
1459 _disable();
1460# else
1461 __asm cli
1462# endif
1463}
1464#endif
1465
1466
1467/**
1468 * Disables interrupts and returns previous xFLAGS.
1469 */
1470#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1471DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1472#else
1473DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1474{
1475 RTCCUINTREG xFlags;
1476# if RT_INLINE_ASM_GNU_STYLE
1477# ifdef RT_ARCH_AMD64
1478 __asm__ __volatile__("pushfq\n\t"
1479 "cli\n\t"
1480 "popq %0\n\t"
1481 : "=rm" (xFlags));
1482# else
1483 __asm__ __volatile__("pushfl\n\t"
1484 "cli\n\t"
1485 "popl %0\n\t"
1486 : "=rm" (xFlags));
1487# endif
1488# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1489 xFlags = ASMGetFlags();
1490 _disable();
1491# else
1492 __asm {
1493 pushfd
1494 cli
1495 pop [xFlags]
1496 }
1497# endif
1498 return xFlags;
1499}
1500#endif
1501
1502
1503/**
1504 * Are interrupts enabled?
1505 *
1506 * @returns true / false.
1507 */
1508DECLINLINE(RTCCUINTREG) ASMIntAreEnabled(void)
1509{
1510 RTCCUINTREG uFlags = ASMGetFlags();
1511 return uFlags & 0x200 /* X86_EFL_IF */ ? true : false;
1512}
1513
1514
1515/**
1516 * Halts the CPU until interrupted.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL
1519DECLASM(void) ASMHalt(void);
1520#else
1521DECLINLINE(void) ASMHalt(void)
1522{
1523# if RT_INLINE_ASM_GNU_STYLE
1524 __asm__ __volatile__("hlt\n\t");
1525# else
1526 __asm {
1527 hlt
1528 }
1529# endif
1530}
1531#endif
1532
1533
1534/**
1535 * The PAUSE variant of NOP for helping hyperthreaded CPUs detecing spin locks.
1536 */
1537#if RT_INLINE_ASM_EXTERNAL
1538DECLASM(void) ASMNopPause(void);
1539#else
1540DECLINLINE(void) ASMNopPause(void)
1541{
1542# if RT_INLINE_ASM_GNU_STYLE
1543 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
1544# else
1545 __asm {
1546 _emit 0f3h
1547 _emit 090h
1548 }
1549# endif
1550}
1551#endif
1552
1553
1554/**
1555 * Reads a machine specific register.
1556 *
1557 * @returns Register content.
1558 * @param uRegister Register to read.
1559 */
1560#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1561DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1562#else
1563DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1564{
1565 RTUINT64U u;
1566# if RT_INLINE_ASM_GNU_STYLE
1567 __asm__ __volatile__("rdmsr\n\t"
1568 : "=a" (u.s.Lo),
1569 "=d" (u.s.Hi)
1570 : "c" (uRegister));
1571
1572# elif RT_INLINE_ASM_USES_INTRIN
1573 u.u = __readmsr(uRegister);
1574
1575# else
1576 __asm
1577 {
1578 mov ecx, [uRegister]
1579 rdmsr
1580 mov [u.s.Lo], eax
1581 mov [u.s.Hi], edx
1582 }
1583# endif
1584
1585 return u.u;
1586}
1587#endif
1588
1589
1590/**
1591 * Writes a machine specific register.
1592 *
1593 * @returns Register content.
1594 * @param uRegister Register to write to.
1595 * @param u64Val Value to write.
1596 */
1597#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1598DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1599#else
1600DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1601{
1602 RTUINT64U u;
1603
1604 u.u = u64Val;
1605# if RT_INLINE_ASM_GNU_STYLE
1606 __asm__ __volatile__("wrmsr\n\t"
1607 ::"a" (u.s.Lo),
1608 "d" (u.s.Hi),
1609 "c" (uRegister));
1610
1611# elif RT_INLINE_ASM_USES_INTRIN
1612 __writemsr(uRegister, u.u);
1613
1614# else
1615 __asm
1616 {
1617 mov ecx, [uRegister]
1618 mov edx, [u.s.Hi]
1619 mov eax, [u.s.Lo]
1620 wrmsr
1621 }
1622# endif
1623}
1624#endif
1625
1626
1627/**
1628 * Reads low part of a machine specific register.
1629 *
1630 * @returns Register content.
1631 * @param uRegister Register to read.
1632 */
1633#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1634DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1635#else
1636DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1637{
1638 uint32_t u32;
1639# if RT_INLINE_ASM_GNU_STYLE
1640 __asm__ __volatile__("rdmsr\n\t"
1641 : "=a" (u32)
1642 : "c" (uRegister)
1643 : "edx");
1644
1645# elif RT_INLINE_ASM_USES_INTRIN
1646 u32 = (uint32_t)__readmsr(uRegister);
1647
1648#else
1649 __asm
1650 {
1651 mov ecx, [uRegister]
1652 rdmsr
1653 mov [u32], eax
1654 }
1655# endif
1656
1657 return u32;
1658}
1659#endif
1660
1661
1662/**
1663 * Reads high part of a machine specific register.
1664 *
1665 * @returns Register content.
1666 * @param uRegister Register to read.
1667 */
1668#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1669DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1670#else
1671DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1672{
1673 uint32_t u32;
1674# if RT_INLINE_ASM_GNU_STYLE
1675 __asm__ __volatile__("rdmsr\n\t"
1676 : "=d" (u32)
1677 : "c" (uRegister)
1678 : "eax");
1679
1680# elif RT_INLINE_ASM_USES_INTRIN
1681 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1682
1683# else
1684 __asm
1685 {
1686 mov ecx, [uRegister]
1687 rdmsr
1688 mov [u32], edx
1689 }
1690# endif
1691
1692 return u32;
1693}
1694#endif
1695
1696
1697/**
1698 * Gets dr0.
1699 *
1700 * @returns dr0.
1701 */
1702#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1703DECLASM(RTCCUINTREG) ASMGetDR0(void);
1704#else
1705DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1706{
1707 RTCCUINTREG uDR0;
1708# if RT_INLINE_ASM_USES_INTRIN
1709 uDR0 = __readdr(0);
1710# elif RT_INLINE_ASM_GNU_STYLE
1711# ifdef RT_ARCH_AMD64
1712 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1713# else
1714 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1715# endif
1716# else
1717 __asm
1718 {
1719# ifdef RT_ARCH_AMD64
1720 mov rax, dr0
1721 mov [uDR0], rax
1722# else
1723 mov eax, dr0
1724 mov [uDR0], eax
1725# endif
1726 }
1727# endif
1728 return uDR0;
1729}
1730#endif
1731
1732
1733/**
1734 * Gets dr1.
1735 *
1736 * @returns dr1.
1737 */
1738#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1739DECLASM(RTCCUINTREG) ASMGetDR1(void);
1740#else
1741DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1742{
1743 RTCCUINTREG uDR1;
1744# if RT_INLINE_ASM_USES_INTRIN
1745 uDR1 = __readdr(1);
1746# elif RT_INLINE_ASM_GNU_STYLE
1747# ifdef RT_ARCH_AMD64
1748 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1749# else
1750 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1751# endif
1752# else
1753 __asm
1754 {
1755# ifdef RT_ARCH_AMD64
1756 mov rax, dr1
1757 mov [uDR1], rax
1758# else
1759 mov eax, dr1
1760 mov [uDR1], eax
1761# endif
1762 }
1763# endif
1764 return uDR1;
1765}
1766#endif
1767
1768
1769/**
1770 * Gets dr2.
1771 *
1772 * @returns dr2.
1773 */
1774#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1775DECLASM(RTCCUINTREG) ASMGetDR2(void);
1776#else
1777DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1778{
1779 RTCCUINTREG uDR2;
1780# if RT_INLINE_ASM_USES_INTRIN
1781 uDR2 = __readdr(2);
1782# elif RT_INLINE_ASM_GNU_STYLE
1783# ifdef RT_ARCH_AMD64
1784 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1785# else
1786 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1787# endif
1788# else
1789 __asm
1790 {
1791# ifdef RT_ARCH_AMD64
1792 mov rax, dr2
1793 mov [uDR2], rax
1794# else
1795 mov eax, dr2
1796 mov [uDR2], eax
1797# endif
1798 }
1799# endif
1800 return uDR2;
1801}
1802#endif
1803
1804
1805/**
1806 * Gets dr3.
1807 *
1808 * @returns dr3.
1809 */
1810#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1811DECLASM(RTCCUINTREG) ASMGetDR3(void);
1812#else
1813DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1814{
1815 RTCCUINTREG uDR3;
1816# if RT_INLINE_ASM_USES_INTRIN
1817 uDR3 = __readdr(3);
1818# elif RT_INLINE_ASM_GNU_STYLE
1819# ifdef RT_ARCH_AMD64
1820 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1821# else
1822 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1823# endif
1824# else
1825 __asm
1826 {
1827# ifdef RT_ARCH_AMD64
1828 mov rax, dr3
1829 mov [uDR3], rax
1830# else
1831 mov eax, dr3
1832 mov [uDR3], eax
1833# endif
1834 }
1835# endif
1836 return uDR3;
1837}
1838#endif
1839
1840
1841/**
1842 * Gets dr6.
1843 *
1844 * @returns dr6.
1845 */
1846#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1847DECLASM(RTCCUINTREG) ASMGetDR6(void);
1848#else
1849DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1850{
1851 RTCCUINTREG uDR6;
1852# if RT_INLINE_ASM_USES_INTRIN
1853 uDR6 = __readdr(6);
1854# elif RT_INLINE_ASM_GNU_STYLE
1855# ifdef RT_ARCH_AMD64
1856 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1857# else
1858 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1859# endif
1860# else
1861 __asm
1862 {
1863# ifdef RT_ARCH_AMD64
1864 mov rax, dr6
1865 mov [uDR6], rax
1866# else
1867 mov eax, dr6
1868 mov [uDR6], eax
1869# endif
1870 }
1871# endif
1872 return uDR6;
1873}
1874#endif
1875
1876
1877/**
1878 * Reads and clears DR6.
1879 *
1880 * @returns DR6.
1881 */
1882#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1883DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1884#else
1885DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1886{
1887 RTCCUINTREG uDR6;
1888# if RT_INLINE_ASM_USES_INTRIN
1889 uDR6 = __readdr(6);
1890 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1891# elif RT_INLINE_ASM_GNU_STYLE
1892 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1893# ifdef RT_ARCH_AMD64
1894 __asm__ __volatile__("movq %%dr6, %0\n\t"
1895 "movq %1, %%dr6\n\t"
1896 : "=r" (uDR6)
1897 : "r" (uNewValue));
1898# else
1899 __asm__ __volatile__("movl %%dr6, %0\n\t"
1900 "movl %1, %%dr6\n\t"
1901 : "=r" (uDR6)
1902 : "r" (uNewValue));
1903# endif
1904# else
1905 __asm
1906 {
1907# ifdef RT_ARCH_AMD64
1908 mov rax, dr6
1909 mov [uDR6], rax
1910 mov rcx, rax
1911 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1912 mov dr6, rcx
1913# else
1914 mov eax, dr6
1915 mov [uDR6], eax
1916 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1917 mov dr6, ecx
1918# endif
1919 }
1920# endif
1921 return uDR6;
1922}
1923#endif
1924
1925
1926/**
1927 * Gets dr7.
1928 *
1929 * @returns dr7.
1930 */
1931#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1932DECLASM(RTCCUINTREG) ASMGetDR7(void);
1933#else
1934DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1935{
1936 RTCCUINTREG uDR7;
1937# if RT_INLINE_ASM_USES_INTRIN
1938 uDR7 = __readdr(7);
1939# elif RT_INLINE_ASM_GNU_STYLE
1940# ifdef RT_ARCH_AMD64
1941 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1942# else
1943 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1944# endif
1945# else
1946 __asm
1947 {
1948# ifdef RT_ARCH_AMD64
1949 mov rax, dr7
1950 mov [uDR7], rax
1951# else
1952 mov eax, dr7
1953 mov [uDR7], eax
1954# endif
1955 }
1956# endif
1957 return uDR7;
1958}
1959#endif
1960
1961
1962/**
1963 * Sets dr0.
1964 *
1965 * @param uDRVal Debug register value to write
1966 */
1967#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1968DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1969#else
1970DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1971{
1972# if RT_INLINE_ASM_USES_INTRIN
1973 __writedr(0, uDRVal);
1974# elif RT_INLINE_ASM_GNU_STYLE
1975# ifdef RT_ARCH_AMD64
1976 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1977# else
1978 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1979# endif
1980# else
1981 __asm
1982 {
1983# ifdef RT_ARCH_AMD64
1984 mov rax, [uDRVal]
1985 mov dr0, rax
1986# else
1987 mov eax, [uDRVal]
1988 mov dr0, eax
1989# endif
1990 }
1991# endif
1992}
1993#endif
1994
1995
1996/**
1997 * Sets dr1.
1998 *
1999 * @param uDRVal Debug register value to write
2000 */
2001#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2002DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
2003#else
2004DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
2005{
2006# if RT_INLINE_ASM_USES_INTRIN
2007 __writedr(1, uDRVal);
2008# elif RT_INLINE_ASM_GNU_STYLE
2009# ifdef RT_ARCH_AMD64
2010 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
2011# else
2012 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
2013# endif
2014# else
2015 __asm
2016 {
2017# ifdef RT_ARCH_AMD64
2018 mov rax, [uDRVal]
2019 mov dr1, rax
2020# else
2021 mov eax, [uDRVal]
2022 mov dr1, eax
2023# endif
2024 }
2025# endif
2026}
2027#endif
2028
2029
2030/**
2031 * Sets dr2.
2032 *
2033 * @param uDRVal Debug register value to write
2034 */
2035#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2036DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
2037#else
2038DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
2039{
2040# if RT_INLINE_ASM_USES_INTRIN
2041 __writedr(2, uDRVal);
2042# elif RT_INLINE_ASM_GNU_STYLE
2043# ifdef RT_ARCH_AMD64
2044 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
2045# else
2046 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2047# endif
2048# else
2049 __asm
2050 {
2051# ifdef RT_ARCH_AMD64
2052 mov rax, [uDRVal]
2053 mov dr2, rax
2054# else
2055 mov eax, [uDRVal]
2056 mov dr2, eax
2057# endif
2058 }
2059# endif
2060}
2061#endif
2062
2063
2064/**
2065 * Sets dr3.
2066 *
2067 * @param uDRVal Debug register value to write
2068 */
2069#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2070DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2071#else
2072DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2073{
2074# if RT_INLINE_ASM_USES_INTRIN
2075 __writedr(3, uDRVal);
2076# elif RT_INLINE_ASM_GNU_STYLE
2077# ifdef RT_ARCH_AMD64
2078 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2079# else
2080 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2081# endif
2082# else
2083 __asm
2084 {
2085# ifdef RT_ARCH_AMD64
2086 mov rax, [uDRVal]
2087 mov dr3, rax
2088# else
2089 mov eax, [uDRVal]
2090 mov dr3, eax
2091# endif
2092 }
2093# endif
2094}
2095#endif
2096
2097
2098/**
2099 * Sets dr6.
2100 *
2101 * @param uDRVal Debug register value to write
2102 */
2103#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2104DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2105#else
2106DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2107{
2108# if RT_INLINE_ASM_USES_INTRIN
2109 __writedr(6, uDRVal);
2110# elif RT_INLINE_ASM_GNU_STYLE
2111# ifdef RT_ARCH_AMD64
2112 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2113# else
2114 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2115# endif
2116# else
2117 __asm
2118 {
2119# ifdef RT_ARCH_AMD64
2120 mov rax, [uDRVal]
2121 mov dr6, rax
2122# else
2123 mov eax, [uDRVal]
2124 mov dr6, eax
2125# endif
2126 }
2127# endif
2128}
2129#endif
2130
2131
2132/**
2133 * Sets dr7.
2134 *
2135 * @param uDRVal Debug register value to write
2136 */
2137#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2138DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2139#else
2140DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2141{
2142# if RT_INLINE_ASM_USES_INTRIN
2143 __writedr(7, uDRVal);
2144# elif RT_INLINE_ASM_GNU_STYLE
2145# ifdef RT_ARCH_AMD64
2146 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2147# else
2148 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2149# endif
2150# else
2151 __asm
2152 {
2153# ifdef RT_ARCH_AMD64
2154 mov rax, [uDRVal]
2155 mov dr7, rax
2156# else
2157 mov eax, [uDRVal]
2158 mov dr7, eax
2159# endif
2160 }
2161# endif
2162}
2163#endif
2164
2165
2166/**
2167 * Compiler memory barrier.
2168 *
2169 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2170 * values or any outstanding writes when returning from this function.
2171 *
2172 * This function must be used if non-volatile data is modified by a
2173 * device or the VMM. Typical cases are port access, MMIO access,
2174 * trapping instruction, etc.
2175 */
2176#if RT_INLINE_ASM_GNU_STYLE
2177# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2178#elif RT_INLINE_ASM_USES_INTRIN
2179# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2180#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2181DECLINLINE(void) ASMCompilerBarrier(void)
2182{
2183 __asm
2184 {
2185 }
2186}
2187#endif
2188
2189
2190/**
2191 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2192 *
2193 * @param Port I/O port to write to.
2194 * @param u8 8-bit integer to write.
2195 */
2196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2197DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2198#else
2199DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2200{
2201# if RT_INLINE_ASM_GNU_STYLE
2202 __asm__ __volatile__("outb %b1, %w0\n\t"
2203 :: "Nd" (Port),
2204 "a" (u8));
2205
2206# elif RT_INLINE_ASM_USES_INTRIN
2207 __outbyte(Port, u8);
2208
2209# else
2210 __asm
2211 {
2212 mov dx, [Port]
2213 mov al, [u8]
2214 out dx, al
2215 }
2216# endif
2217}
2218#endif
2219
2220
2221/**
2222 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2223 *
2224 * @returns 8-bit integer.
2225 * @param Port I/O port to read from.
2226 */
2227#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2228DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2229#else
2230DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2231{
2232 uint8_t u8;
2233# if RT_INLINE_ASM_GNU_STYLE
2234 __asm__ __volatile__("inb %w1, %b0\n\t"
2235 : "=a" (u8)
2236 : "Nd" (Port));
2237
2238# elif RT_INLINE_ASM_USES_INTRIN
2239 u8 = __inbyte(Port);
2240
2241# else
2242 __asm
2243 {
2244 mov dx, [Port]
2245 in al, dx
2246 mov [u8], al
2247 }
2248# endif
2249 return u8;
2250}
2251#endif
2252
2253
2254/**
2255 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2256 *
2257 * @param Port I/O port to write to.
2258 * @param u16 16-bit integer to write.
2259 */
2260#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2261DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2262#else
2263DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2264{
2265# if RT_INLINE_ASM_GNU_STYLE
2266 __asm__ __volatile__("outw %w1, %w0\n\t"
2267 :: "Nd" (Port),
2268 "a" (u16));
2269
2270# elif RT_INLINE_ASM_USES_INTRIN
2271 __outword(Port, u16);
2272
2273# else
2274 __asm
2275 {
2276 mov dx, [Port]
2277 mov ax, [u16]
2278 out dx, ax
2279 }
2280# endif
2281}
2282#endif
2283
2284
2285/**
2286 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2287 *
2288 * @returns 16-bit integer.
2289 * @param Port I/O port to read from.
2290 */
2291#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2292DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2293#else
2294DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2295{
2296 uint16_t u16;
2297# if RT_INLINE_ASM_GNU_STYLE
2298 __asm__ __volatile__("inw %w1, %w0\n\t"
2299 : "=a" (u16)
2300 : "Nd" (Port));
2301
2302# elif RT_INLINE_ASM_USES_INTRIN
2303 u16 = __inword(Port);
2304
2305# else
2306 __asm
2307 {
2308 mov dx, [Port]
2309 in ax, dx
2310 mov [u16], ax
2311 }
2312# endif
2313 return u16;
2314}
2315#endif
2316
2317
2318/**
2319 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2320 *
2321 * @param Port I/O port to write to.
2322 * @param u32 32-bit integer to write.
2323 */
2324#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2325DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2326#else
2327DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2328{
2329# if RT_INLINE_ASM_GNU_STYLE
2330 __asm__ __volatile__("outl %1, %w0\n\t"
2331 :: "Nd" (Port),
2332 "a" (u32));
2333
2334# elif RT_INLINE_ASM_USES_INTRIN
2335 __outdword(Port, u32);
2336
2337# else
2338 __asm
2339 {
2340 mov dx, [Port]
2341 mov eax, [u32]
2342 out dx, eax
2343 }
2344# endif
2345}
2346#endif
2347
2348
2349/**
2350 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2351 *
2352 * @returns 32-bit integer.
2353 * @param Port I/O port to read from.
2354 */
2355#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2356DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2357#else
2358DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2359{
2360 uint32_t u32;
2361# if RT_INLINE_ASM_GNU_STYLE
2362 __asm__ __volatile__("inl %w1, %0\n\t"
2363 : "=a" (u32)
2364 : "Nd" (Port));
2365
2366# elif RT_INLINE_ASM_USES_INTRIN
2367 u32 = __indword(Port);
2368
2369# else
2370 __asm
2371 {
2372 mov dx, [Port]
2373 in eax, dx
2374 mov [u32], eax
2375 }
2376# endif
2377 return u32;
2378}
2379#endif
2380
2381
2382/**
2383 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2384 *
2385 * @param Port I/O port to write to.
2386 * @param pau8 Pointer to the string buffer.
2387 * @param c The number of items to write.
2388 */
2389#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2390DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2391#else
2392DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2393{
2394# if RT_INLINE_ASM_GNU_STYLE
2395 __asm__ __volatile__("rep; outsb\n\t"
2396 : "+S" (pau8),
2397 "+c" (c)
2398 : "d" (Port));
2399
2400# elif RT_INLINE_ASM_USES_INTRIN
2401 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2402
2403# else
2404 __asm
2405 {
2406 mov dx, [Port]
2407 mov ecx, [c]
2408 mov eax, [pau8]
2409 xchg esi, eax
2410 rep outsb
2411 xchg esi, eax
2412 }
2413# endif
2414}
2415#endif
2416
2417
2418/**
2419 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2420 *
2421 * @param Port I/O port to read from.
2422 * @param pau8 Pointer to the string buffer (output).
2423 * @param c The number of items to read.
2424 */
2425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2426DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2427#else
2428DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2429{
2430# if RT_INLINE_ASM_GNU_STYLE
2431 __asm__ __volatile__("rep; insb\n\t"
2432 : "+D" (pau8),
2433 "+c" (c)
2434 : "d" (Port));
2435
2436# elif RT_INLINE_ASM_USES_INTRIN
2437 __inbytestring(Port, pau8, (unsigned long)c);
2438
2439# else
2440 __asm
2441 {
2442 mov dx, [Port]
2443 mov ecx, [c]
2444 mov eax, [pau8]
2445 xchg edi, eax
2446 rep insb
2447 xchg edi, eax
2448 }
2449# endif
2450}
2451#endif
2452
2453
2454/**
2455 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2456 *
2457 * @param Port I/O port to write to.
2458 * @param pau16 Pointer to the string buffer.
2459 * @param c The number of items to write.
2460 */
2461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2462DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2463#else
2464DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2465{
2466# if RT_INLINE_ASM_GNU_STYLE
2467 __asm__ __volatile__("rep; outsw\n\t"
2468 : "+S" (pau16),
2469 "+c" (c)
2470 : "d" (Port));
2471
2472# elif RT_INLINE_ASM_USES_INTRIN
2473 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2474
2475# else
2476 __asm
2477 {
2478 mov dx, [Port]
2479 mov ecx, [c]
2480 mov eax, [pau16]
2481 xchg esi, eax
2482 rep outsw
2483 xchg esi, eax
2484 }
2485# endif
2486}
2487#endif
2488
2489
2490/**
2491 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2492 *
2493 * @param Port I/O port to read from.
2494 * @param pau16 Pointer to the string buffer (output).
2495 * @param c The number of items to read.
2496 */
2497#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2498DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2499#else
2500DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2501{
2502# if RT_INLINE_ASM_GNU_STYLE
2503 __asm__ __volatile__("rep; insw\n\t"
2504 : "+D" (pau16),
2505 "+c" (c)
2506 : "d" (Port));
2507
2508# elif RT_INLINE_ASM_USES_INTRIN
2509 __inwordstring(Port, pau16, (unsigned long)c);
2510
2511# else
2512 __asm
2513 {
2514 mov dx, [Port]
2515 mov ecx, [c]
2516 mov eax, [pau16]
2517 xchg edi, eax
2518 rep insw
2519 xchg edi, eax
2520 }
2521# endif
2522}
2523#endif
2524
2525
2526/**
2527 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2528 *
2529 * @param Port I/O port to write to.
2530 * @param pau32 Pointer to the string buffer.
2531 * @param c The number of items to write.
2532 */
2533#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2534DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2535#else
2536DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2537{
2538# if RT_INLINE_ASM_GNU_STYLE
2539 __asm__ __volatile__("rep; outsl\n\t"
2540 : "+S" (pau32),
2541 "+c" (c)
2542 : "d" (Port));
2543
2544# elif RT_INLINE_ASM_USES_INTRIN
2545 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2546
2547# else
2548 __asm
2549 {
2550 mov dx, [Port]
2551 mov ecx, [c]
2552 mov eax, [pau32]
2553 xchg esi, eax
2554 rep outsd
2555 xchg esi, eax
2556 }
2557# endif
2558}
2559#endif
2560
2561
2562/**
2563 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2564 *
2565 * @param Port I/O port to read from.
2566 * @param pau32 Pointer to the string buffer (output).
2567 * @param c The number of items to read.
2568 */
2569#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2570DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2571#else
2572DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2573{
2574# if RT_INLINE_ASM_GNU_STYLE
2575 __asm__ __volatile__("rep; insl\n\t"
2576 : "+D" (pau32),
2577 "+c" (c)
2578 : "d" (Port));
2579
2580# elif RT_INLINE_ASM_USES_INTRIN
2581 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2582
2583# else
2584 __asm
2585 {
2586 mov dx, [Port]
2587 mov ecx, [c]
2588 mov eax, [pau32]
2589 xchg edi, eax
2590 rep insd
2591 xchg edi, eax
2592 }
2593# endif
2594}
2595#endif
2596
2597
2598/**
2599 * Atomically Exchange an unsigned 8-bit value, ordered.
2600 *
2601 * @returns Current *pu8 value
2602 * @param pu8 Pointer to the 8-bit variable to update.
2603 * @param u8 The 8-bit value to assign to *pu8.
2604 */
2605#if RT_INLINE_ASM_EXTERNAL
2606DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2607#else
2608DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2609{
2610# if RT_INLINE_ASM_GNU_STYLE
2611 __asm__ __volatile__("xchgb %0, %1\n\t"
2612 : "=m" (*pu8),
2613 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2614 : "1" (u8),
2615 "m" (*pu8));
2616# else
2617 __asm
2618 {
2619# ifdef RT_ARCH_AMD64
2620 mov rdx, [pu8]
2621 mov al, [u8]
2622 xchg [rdx], al
2623 mov [u8], al
2624# else
2625 mov edx, [pu8]
2626 mov al, [u8]
2627 xchg [edx], al
2628 mov [u8], al
2629# endif
2630 }
2631# endif
2632 return u8;
2633}
2634#endif
2635
2636
2637/**
2638 * Atomically Exchange a signed 8-bit value, ordered.
2639 *
2640 * @returns Current *pu8 value
2641 * @param pi8 Pointer to the 8-bit variable to update.
2642 * @param i8 The 8-bit value to assign to *pi8.
2643 */
2644DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2645{
2646 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2647}
2648
2649
2650/**
2651 * Atomically Exchange a bool value, ordered.
2652 *
2653 * @returns Current *pf value
2654 * @param pf Pointer to the 8-bit variable to update.
2655 * @param f The 8-bit value to assign to *pi8.
2656 */
2657DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2658{
2659#ifdef _MSC_VER
2660 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2661#else
2662 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2663#endif
2664}
2665
2666
2667/**
2668 * Atomically Exchange an unsigned 16-bit value, ordered.
2669 *
2670 * @returns Current *pu16 value
2671 * @param pu16 Pointer to the 16-bit variable to update.
2672 * @param u16 The 16-bit value to assign to *pu16.
2673 */
2674#if RT_INLINE_ASM_EXTERNAL
2675DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2676#else
2677DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2678{
2679# if RT_INLINE_ASM_GNU_STYLE
2680 __asm__ __volatile__("xchgw %0, %1\n\t"
2681 : "=m" (*pu16),
2682 "=r" (u16)
2683 : "1" (u16),
2684 "m" (*pu16));
2685# else
2686 __asm
2687 {
2688# ifdef RT_ARCH_AMD64
2689 mov rdx, [pu16]
2690 mov ax, [u16]
2691 xchg [rdx], ax
2692 mov [u16], ax
2693# else
2694 mov edx, [pu16]
2695 mov ax, [u16]
2696 xchg [edx], ax
2697 mov [u16], ax
2698# endif
2699 }
2700# endif
2701 return u16;
2702}
2703#endif
2704
2705
2706/**
2707 * Atomically Exchange a signed 16-bit value, ordered.
2708 *
2709 * @returns Current *pu16 value
2710 * @param pi16 Pointer to the 16-bit variable to update.
2711 * @param i16 The 16-bit value to assign to *pi16.
2712 */
2713DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2714{
2715 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2716}
2717
2718
2719/**
2720 * Atomically Exchange an unsigned 32-bit value, ordered.
2721 *
2722 * @returns Current *pu32 value
2723 * @param pu32 Pointer to the 32-bit variable to update.
2724 * @param u32 The 32-bit value to assign to *pu32.
2725 */
2726#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2727DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2728#else
2729DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2730{
2731# if RT_INLINE_ASM_GNU_STYLE
2732 __asm__ __volatile__("xchgl %0, %1\n\t"
2733 : "=m" (*pu32),
2734 "=r" (u32)
2735 : "1" (u32),
2736 "m" (*pu32));
2737
2738# elif RT_INLINE_ASM_USES_INTRIN
2739 u32 = _InterlockedExchange((long *)pu32, u32);
2740
2741# else
2742 __asm
2743 {
2744# ifdef RT_ARCH_AMD64
2745 mov rdx, [pu32]
2746 mov eax, u32
2747 xchg [rdx], eax
2748 mov [u32], eax
2749# else
2750 mov edx, [pu32]
2751 mov eax, u32
2752 xchg [edx], eax
2753 mov [u32], eax
2754# endif
2755 }
2756# endif
2757 return u32;
2758}
2759#endif
2760
2761
2762/**
2763 * Atomically Exchange a signed 32-bit value, ordered.
2764 *
2765 * @returns Current *pu32 value
2766 * @param pi32 Pointer to the 32-bit variable to update.
2767 * @param i32 The 32-bit value to assign to *pi32.
2768 */
2769DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2770{
2771 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2772}
2773
2774
2775/**
2776 * Atomically Exchange an unsigned 64-bit value, ordered.
2777 *
2778 * @returns Current *pu64 value
2779 * @param pu64 Pointer to the 64-bit variable to update.
2780 * @param u64 The 64-bit value to assign to *pu64.
2781 */
2782#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2783DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2784#else
2785DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2786{
2787# if defined(RT_ARCH_AMD64)
2788# if RT_INLINE_ASM_USES_INTRIN
2789 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2790
2791# elif RT_INLINE_ASM_GNU_STYLE
2792 __asm__ __volatile__("xchgq %0, %1\n\t"
2793 : "=m" (*pu64),
2794 "=r" (u64)
2795 : "1" (u64),
2796 "m" (*pu64));
2797# else
2798 __asm
2799 {
2800 mov rdx, [pu64]
2801 mov rax, [u64]
2802 xchg [rdx], rax
2803 mov [u64], rax
2804 }
2805# endif
2806# else /* !RT_ARCH_AMD64 */
2807# if RT_INLINE_ASM_GNU_STYLE
2808# if defined(PIC) || defined(__PIC__)
2809 uint32_t u32EBX = (uint32_t)u64;
2810 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2811 "xchgl %%ebx, %3\n\t"
2812 "1:\n\t"
2813 "lock; cmpxchg8b (%5)\n\t"
2814 "jnz 1b\n\t"
2815 "movl %3, %%ebx\n\t"
2816 /*"xchgl %%esi, %5\n\t"*/
2817 : "=A" (u64),
2818 "=m" (*pu64)
2819 : "0" (*pu64),
2820 "m" ( u32EBX ),
2821 "c" ( (uint32_t)(u64 >> 32) ),
2822 "S" (pu64));
2823# else /* !PIC */
2824 __asm__ __volatile__("1:\n\t"
2825 "lock; cmpxchg8b %1\n\t"
2826 "jnz 1b\n\t"
2827 : "=A" (u64),
2828 "=m" (*pu64)
2829 : "0" (*pu64),
2830 "b" ( (uint32_t)u64 ),
2831 "c" ( (uint32_t)(u64 >> 32) ));
2832# endif
2833# else
2834 __asm
2835 {
2836 mov ebx, dword ptr [u64]
2837 mov ecx, dword ptr [u64 + 4]
2838 mov edi, pu64
2839 mov eax, dword ptr [edi]
2840 mov edx, dword ptr [edi + 4]
2841 retry:
2842 lock cmpxchg8b [edi]
2843 jnz retry
2844 mov dword ptr [u64], eax
2845 mov dword ptr [u64 + 4], edx
2846 }
2847# endif
2848# endif /* !RT_ARCH_AMD64 */
2849 return u64;
2850}
2851#endif
2852
2853
2854/**
2855 * Atomically Exchange an signed 64-bit value, ordered.
2856 *
2857 * @returns Current *pi64 value
2858 * @param pi64 Pointer to the 64-bit variable to update.
2859 * @param i64 The 64-bit value to assign to *pi64.
2860 */
2861DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2862{
2863 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2864}
2865
2866
2867/**
2868 * Atomically Exchange a pointer value, ordered.
2869 *
2870 * @returns Current *ppv value
2871 * @param ppv Pointer to the pointer variable to update.
2872 * @param pv The pointer value to assign to *ppv.
2873 */
2874DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2875{
2876#if ARCH_BITS == 32
2877 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2878#elif ARCH_BITS == 64
2879 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2880#else
2881# error "ARCH_BITS is bogus"
2882#endif
2883}
2884
2885
2886/**
2887 * Atomically Exchange a raw-mode context pointer value, ordered.
2888 *
2889 * @returns Current *ppv value
2890 * @param ppvRC Pointer to the pointer variable to update.
2891 * @param pvRC The pointer value to assign to *ppv.
2892 */
2893DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2894{
2895 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2896}
2897
2898
2899/**
2900 * Atomically Exchange a ring-0 pointer value, ordered.
2901 *
2902 * @returns Current *ppv value
2903 * @param ppvR0 Pointer to the pointer variable to update.
2904 * @param pvR0 The pointer value to assign to *ppv.
2905 */
2906DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2907{
2908#if R0_ARCH_BITS == 32
2909 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2910#elif R0_ARCH_BITS == 64
2911 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2912#else
2913# error "R0_ARCH_BITS is bogus"
2914#endif
2915}
2916
2917
2918/**
2919 * Atomically Exchange a ring-3 pointer value, ordered.
2920 *
2921 * @returns Current *ppv value
2922 * @param ppvR3 Pointer to the pointer variable to update.
2923 * @param pvR3 The pointer value to assign to *ppv.
2924 */
2925DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2926{
2927#if R3_ARCH_BITS == 32
2928 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2929#elif R3_ARCH_BITS == 64
2930 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2931#else
2932# error "R3_ARCH_BITS is bogus"
2933#endif
2934}
2935
2936
2937/** @def ASMAtomicXchgHandle
2938 * Atomically Exchange a typical IPRT handle value, ordered.
2939 *
2940 * @param ph Pointer to the value to update.
2941 * @param hNew The new value to assigned to *pu.
2942 * @param phRes Where to store the current *ph value.
2943 *
2944 * @remarks This doesn't currently work for all handles (like RTFILE).
2945 */
2946#if HC_ARCH_BITS == 32
2947# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2948 do { \
2949 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2950 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2951 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2952 } while (0)
2953#elif HC_ARCH_BITS == 64
2954# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2955 do { \
2956 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2957 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2958 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2959 } while (0)
2960#else
2961# error HC_ARCH_BITS
2962#endif
2963
2964
2965/**
2966 * Atomically Exchange a value which size might differ
2967 * between platforms or compilers, ordered.
2968 *
2969 * @param pu Pointer to the variable to update.
2970 * @param uNew The value to assign to *pu.
2971 * @todo This is busted as its missing the result argument.
2972 */
2973#define ASMAtomicXchgSize(pu, uNew) \
2974 do { \
2975 switch (sizeof(*(pu))) { \
2976 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2977 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2978 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2979 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2980 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2981 } \
2982 } while (0)
2983
2984/**
2985 * Atomically Exchange a value which size might differ
2986 * between platforms or compilers, ordered.
2987 *
2988 * @param pu Pointer to the variable to update.
2989 * @param uNew The value to assign to *pu.
2990 * @param puRes Where to store the current *pu value.
2991 */
2992#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2993 do { \
2994 switch (sizeof(*(pu))) { \
2995 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2996 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2997 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2998 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2999 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3000 } \
3001 } while (0)
3002
3003
3004/**
3005 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3006 *
3007 * @returns true if xchg was done.
3008 * @returns false if xchg wasn't done.
3009 *
3010 * @param pu32 Pointer to the value to update.
3011 * @param u32New The new value to assigned to *pu32.
3012 * @param u32Old The old value to *pu32 compare with.
3013 */
3014#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3015DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3016#else
3017DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3018{
3019# if RT_INLINE_ASM_GNU_STYLE
3020 uint8_t u8Ret;
3021 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3022 "setz %1\n\t"
3023 : "=m" (*pu32),
3024 "=qm" (u8Ret),
3025 "=a" (u32Old)
3026 : "r" (u32New),
3027 "2" (u32Old),
3028 "m" (*pu32));
3029 return (bool)u8Ret;
3030
3031# elif RT_INLINE_ASM_USES_INTRIN
3032 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3033
3034# else
3035 uint32_t u32Ret;
3036 __asm
3037 {
3038# ifdef RT_ARCH_AMD64
3039 mov rdx, [pu32]
3040# else
3041 mov edx, [pu32]
3042# endif
3043 mov eax, [u32Old]
3044 mov ecx, [u32New]
3045# ifdef RT_ARCH_AMD64
3046 lock cmpxchg [rdx], ecx
3047# else
3048 lock cmpxchg [edx], ecx
3049# endif
3050 setz al
3051 movzx eax, al
3052 mov [u32Ret], eax
3053 }
3054 return !!u32Ret;
3055# endif
3056}
3057#endif
3058
3059
3060/**
3061 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3062 *
3063 * @returns true if xchg was done.
3064 * @returns false if xchg wasn't done.
3065 *
3066 * @param pi32 Pointer to the value to update.
3067 * @param i32New The new value to assigned to *pi32.
3068 * @param i32Old The old value to *pi32 compare with.
3069 */
3070DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3071{
3072 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3073}
3074
3075
3076/**
3077 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3078 *
3079 * @returns true if xchg was done.
3080 * @returns false if xchg wasn't done.
3081 *
3082 * @param pu64 Pointer to the 64-bit variable to update.
3083 * @param u64New The 64-bit value to assign to *pu64.
3084 * @param u64Old The value to compare with.
3085 */
3086#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3087 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
3088DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3089#else
3090DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3091{
3092# if RT_INLINE_ASM_USES_INTRIN
3093 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3094
3095# elif defined(RT_ARCH_AMD64)
3096# if RT_INLINE_ASM_GNU_STYLE
3097 uint8_t u8Ret;
3098 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3099 "setz %1\n\t"
3100 : "=m" (*pu64),
3101 "=qm" (u8Ret),
3102 "=a" (u64Old)
3103 : "r" (u64New),
3104 "2" (u64Old),
3105 "m" (*pu64));
3106 return (bool)u8Ret;
3107# else
3108 bool fRet;
3109 __asm
3110 {
3111 mov rdx, [pu32]
3112 mov rax, [u64Old]
3113 mov rcx, [u64New]
3114 lock cmpxchg [rdx], rcx
3115 setz al
3116 mov [fRet], al
3117 }
3118 return fRet;
3119# endif
3120# else /* !RT_ARCH_AMD64 */
3121 uint32_t u32Ret;
3122# if RT_INLINE_ASM_GNU_STYLE
3123# if defined(PIC) || defined(__PIC__)
3124 uint32_t u32EBX = (uint32_t)u64New;
3125 uint32_t u32Spill;
3126 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3127 "lock; cmpxchg8b (%6)\n\t"
3128 "setz %%al\n\t"
3129 "movl %4, %%ebx\n\t"
3130 "movzbl %%al, %%eax\n\t"
3131 : "=a" (u32Ret),
3132 "=d" (u32Spill),
3133# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3134 "+m" (*pu64)
3135# else
3136 "=m" (*pu64)
3137# endif
3138 : "A" (u64Old),
3139 "m" ( u32EBX ),
3140 "c" ( (uint32_t)(u64New >> 32) ),
3141 "S" (pu64));
3142# else /* !PIC */
3143 uint32_t u32Spill;
3144 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3145 "setz %%al\n\t"
3146 "movzbl %%al, %%eax\n\t"
3147 : "=a" (u32Ret),
3148 "=d" (u32Spill),
3149 "+m" (*pu64)
3150 : "A" (u64Old),
3151 "b" ( (uint32_t)u64New ),
3152 "c" ( (uint32_t)(u64New >> 32) ));
3153# endif
3154 return (bool)u32Ret;
3155# else
3156 __asm
3157 {
3158 mov ebx, dword ptr [u64New]
3159 mov ecx, dword ptr [u64New + 4]
3160 mov edi, [pu64]
3161 mov eax, dword ptr [u64Old]
3162 mov edx, dword ptr [u64Old + 4]
3163 lock cmpxchg8b [edi]
3164 setz al
3165 movzx eax, al
3166 mov dword ptr [u32Ret], eax
3167 }
3168 return !!u32Ret;
3169# endif
3170# endif /* !RT_ARCH_AMD64 */
3171}
3172#endif
3173
3174
3175/**
3176 * Atomically Compare and exchange a signed 64-bit value, ordered.
3177 *
3178 * @returns true if xchg was done.
3179 * @returns false if xchg wasn't done.
3180 *
3181 * @param pi64 Pointer to the 64-bit variable to update.
3182 * @param i64 The 64-bit value to assign to *pu64.
3183 * @param i64Old The value to compare with.
3184 */
3185DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3186{
3187 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3188}
3189
3190
3191/**
3192 * Atomically Compare and Exchange a pointer value, ordered.
3193 *
3194 * @returns true if xchg was done.
3195 * @returns false if xchg wasn't done.
3196 *
3197 * @param ppv Pointer to the value to update.
3198 * @param pvNew The new value to assigned to *ppv.
3199 * @param pvOld The old value to *ppv compare with.
3200 */
3201DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3202{
3203#if ARCH_BITS == 32
3204 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3205#elif ARCH_BITS == 64
3206 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3207#else
3208# error "ARCH_BITS is bogus"
3209#endif
3210}
3211
3212
3213/** @def ASMAtomicCmpXchgHandle
3214 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3215 *
3216 * @param ph Pointer to the value to update.
3217 * @param hNew The new value to assigned to *pu.
3218 * @param hOld The old value to *pu compare with.
3219 * @param fRc Where to store the result.
3220 *
3221 * @remarks This doesn't currently work for all handles (like RTFILE).
3222 */
3223#if HC_ARCH_BITS == 32
3224# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3225 do { \
3226 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3227 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
3228 } while (0)
3229#elif HC_ARCH_BITS == 64
3230# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3231 do { \
3232 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3233 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
3234 } while (0)
3235#else
3236# error HC_ARCH_BITS
3237#endif
3238
3239
3240/** @def ASMAtomicCmpXchgSize
3241 * Atomically Compare and Exchange a value which size might differ
3242 * between platforms or compilers, ordered.
3243 *
3244 * @param pu Pointer to the value to update.
3245 * @param uNew The new value to assigned to *pu.
3246 * @param uOld The old value to *pu compare with.
3247 * @param fRc Where to store the result.
3248 */
3249#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3250 do { \
3251 switch (sizeof(*(pu))) { \
3252 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3253 break; \
3254 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3255 break; \
3256 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3257 (fRc) = false; \
3258 break; \
3259 } \
3260 } while (0)
3261
3262
3263/**
3264 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3265 * passes back old value, ordered.
3266 *
3267 * @returns true if xchg was done.
3268 * @returns false if xchg wasn't done.
3269 *
3270 * @param pu32 Pointer to the value to update.
3271 * @param u32New The new value to assigned to *pu32.
3272 * @param u32Old The old value to *pu32 compare with.
3273 * @param pu32Old Pointer store the old value at.
3274 */
3275#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3276DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3277#else
3278DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3279{
3280# if RT_INLINE_ASM_GNU_STYLE
3281 uint8_t u8Ret;
3282 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3283 "setz %1\n\t"
3284 : "=m" (*pu32),
3285 "=qm" (u8Ret),
3286 "=a" (*pu32Old)
3287 : "r" (u32New),
3288 "a" (u32Old),
3289 "m" (*pu32));
3290 return (bool)u8Ret;
3291
3292# elif RT_INLINE_ASM_USES_INTRIN
3293 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3294
3295# else
3296 uint32_t u32Ret;
3297 __asm
3298 {
3299# ifdef RT_ARCH_AMD64
3300 mov rdx, [pu32]
3301# else
3302 mov edx, [pu32]
3303# endif
3304 mov eax, [u32Old]
3305 mov ecx, [u32New]
3306# ifdef RT_ARCH_AMD64
3307 lock cmpxchg [rdx], ecx
3308 mov rdx, [pu32Old]
3309 mov [rdx], eax
3310# else
3311 lock cmpxchg [edx], ecx
3312 mov edx, [pu32Old]
3313 mov [edx], eax
3314# endif
3315 setz al
3316 movzx eax, al
3317 mov [u32Ret], eax
3318 }
3319 return !!u32Ret;
3320# endif
3321}
3322#endif
3323
3324
3325/**
3326 * Atomically Compare and Exchange a signed 32-bit value, additionally
3327 * passes back old value, ordered.
3328 *
3329 * @returns true if xchg was done.
3330 * @returns false if xchg wasn't done.
3331 *
3332 * @param pi32 Pointer to the value to update.
3333 * @param i32New The new value to assigned to *pi32.
3334 * @param i32Old The old value to *pi32 compare with.
3335 * @param pi32Old Pointer store the old value at.
3336 */
3337DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3338{
3339 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3340}
3341
3342
3343/**
3344 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3345 * passing back old value, ordered.
3346 *
3347 * @returns true if xchg was done.
3348 * @returns false if xchg wasn't done.
3349 *
3350 * @param pu64 Pointer to the 64-bit variable to update.
3351 * @param u64New The 64-bit value to assign to *pu64.
3352 * @param u64Old The value to compare with.
3353 * @param pu64Old Pointer store the old value at.
3354 */
3355#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3356DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3357#else
3358DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3359{
3360# if RT_INLINE_ASM_USES_INTRIN
3361 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3362
3363# elif defined(RT_ARCH_AMD64)
3364# if RT_INLINE_ASM_GNU_STYLE
3365 uint8_t u8Ret;
3366 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3367 "setz %1\n\t"
3368 : "=m" (*pu64),
3369 "=qm" (u8Ret),
3370 "=a" (*pu64Old)
3371 : "r" (u64New),
3372 "a" (u64Old),
3373 "m" (*pu64));
3374 return (bool)u8Ret;
3375# else
3376 bool fRet;
3377 __asm
3378 {
3379 mov rdx, [pu32]
3380 mov rax, [u64Old]
3381 mov rcx, [u64New]
3382 lock cmpxchg [rdx], rcx
3383 mov rdx, [pu64Old]
3384 mov [rdx], rax
3385 setz al
3386 mov [fRet], al
3387 }
3388 return fRet;
3389# endif
3390# else /* !RT_ARCH_AMD64 */
3391# if RT_INLINE_ASM_GNU_STYLE
3392 uint64_t u64Ret;
3393# if defined(PIC) || defined(__PIC__)
3394 /* NB: this code uses a memory clobber description, because the clean
3395 * solution with an output value for *pu64 makes gcc run out of registers.
3396 * This will cause suboptimal code, and anyone with a better solution is
3397 * welcome to improve this. */
3398 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3399 "lock; cmpxchg8b %3\n\t"
3400 "xchgl %%ebx, %1\n\t"
3401 : "=A" (u64Ret)
3402 : "DS" ((uint32_t)u64New),
3403 "c" ((uint32_t)(u64New >> 32)),
3404 "m" (*pu64),
3405 "0" (u64Old)
3406 : "memory" );
3407# else /* !PIC */
3408 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3409 : "=A" (u64Ret),
3410 "=m" (*pu64)
3411 : "b" ((uint32_t)u64New),
3412 "c" ((uint32_t)(u64New >> 32)),
3413 "m" (*pu64),
3414 "0" (u64Old));
3415# endif
3416 *pu64Old = u64Ret;
3417 return u64Ret == u64Old;
3418# else
3419 uint32_t u32Ret;
3420 __asm
3421 {
3422 mov ebx, dword ptr [u64New]
3423 mov ecx, dword ptr [u64New + 4]
3424 mov edi, [pu64]
3425 mov eax, dword ptr [u64Old]
3426 mov edx, dword ptr [u64Old + 4]
3427 lock cmpxchg8b [edi]
3428 mov ebx, [pu64Old]
3429 mov [ebx], eax
3430 setz al
3431 movzx eax, al
3432 add ebx, 4
3433 mov [ebx], edx
3434 mov dword ptr [u32Ret], eax
3435 }
3436 return !!u32Ret;
3437# endif
3438# endif /* !RT_ARCH_AMD64 */
3439}
3440#endif
3441
3442
3443/**
3444 * Atomically Compare and exchange a signed 64-bit value, additionally
3445 * passing back old value, ordered.
3446 *
3447 * @returns true if xchg was done.
3448 * @returns false if xchg wasn't done.
3449 *
3450 * @param pi64 Pointer to the 64-bit variable to update.
3451 * @param i64 The 64-bit value to assign to *pu64.
3452 * @param i64Old The value to compare with.
3453 * @param pi64Old Pointer store the old value at.
3454 */
3455DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3456{
3457 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3458}
3459
3460/** @def ASMAtomicCmpXchgExHandle
3461 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3462 *
3463 * @param ph Pointer to the value to update.
3464 * @param hNew The new value to assigned to *pu.
3465 * @param hOld The old value to *pu compare with.
3466 * @param fRc Where to store the result.
3467 * @param phOldVal Pointer to where to store the old value.
3468 *
3469 * @remarks This doesn't currently work for all handles (like RTFILE).
3470 */
3471#if HC_ARCH_BITS == 32
3472# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3473 do { \
3474 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
3475 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
3476 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3477 } while (0)
3478#elif HC_ARCH_BITS == 64
3479# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3480 do { \
3481 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3482 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
3483 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3484 } while (0)
3485#else
3486# error HC_ARCH_BITS
3487#endif
3488
3489
3490/** @def ASMAtomicCmpXchgExSize
3491 * Atomically Compare and Exchange a value which size might differ
3492 * between platforms or compilers. Additionally passes back old value.
3493 *
3494 * @param pu Pointer to the value to update.
3495 * @param uNew The new value to assigned to *pu.
3496 * @param uOld The old value to *pu compare with.
3497 * @param fRc Where to store the result.
3498 * @param puOldVal Pointer to where to store the old value.
3499 */
3500#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3501 do { \
3502 switch (sizeof(*(pu))) { \
3503 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3504 break; \
3505 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3506 break; \
3507 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3508 (fRc) = false; \
3509 (uOldVal) = 0; \
3510 break; \
3511 } \
3512 } while (0)
3513
3514
3515/**
3516 * Atomically Compare and Exchange a pointer value, additionally
3517 * passing back old value, ordered.
3518 *
3519 * @returns true if xchg was done.
3520 * @returns false if xchg wasn't done.
3521 *
3522 * @param ppv Pointer to the value to update.
3523 * @param pvNew The new value to assigned to *ppv.
3524 * @param pvOld The old value to *ppv compare with.
3525 * @param ppvOld Pointer store the old value at.
3526 */
3527DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3528{
3529#if ARCH_BITS == 32
3530 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3531#elif ARCH_BITS == 64
3532 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3533#else
3534# error "ARCH_BITS is bogus"
3535#endif
3536}
3537
3538
3539/**
3540 * Atomically exchanges and adds to a 32-bit value, ordered.
3541 *
3542 * @returns The old value.
3543 * @param pu32 Pointer to the value.
3544 * @param u32 Number to add.
3545 */
3546#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3547DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3548#else
3549DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3550{
3551# if RT_INLINE_ASM_USES_INTRIN
3552 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3553 return u32;
3554
3555# elif RT_INLINE_ASM_GNU_STYLE
3556 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3557 : "=r" (u32),
3558 "=m" (*pu32)
3559 : "0" (u32),
3560 "m" (*pu32)
3561 : "memory");
3562 return u32;
3563# else
3564 __asm
3565 {
3566 mov eax, [u32]
3567# ifdef RT_ARCH_AMD64
3568 mov rdx, [pu32]
3569 lock xadd [rdx], eax
3570# else
3571 mov edx, [pu32]
3572 lock xadd [edx], eax
3573# endif
3574 mov [u32], eax
3575 }
3576 return u32;
3577# endif
3578}
3579#endif
3580
3581
3582/**
3583 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3584 *
3585 * @returns The old value.
3586 * @param pi32 Pointer to the value.
3587 * @param i32 Number to add.
3588 */
3589DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3590{
3591 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3592}
3593
3594
3595/**
3596 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3597 *
3598 * @returns The old value.
3599 * @param pu32 Pointer to the value.
3600 * @param u32 Number to subtract.
3601 */
3602DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3603{
3604 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3605}
3606
3607
3608/**
3609 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3610 *
3611 * @returns The old value.
3612 * @param pi32 Pointer to the value.
3613 * @param i32 Number to subtract.
3614 */
3615DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3616{
3617 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3618}
3619
3620
3621/**
3622 * Atomically increment a 32-bit value, ordered.
3623 *
3624 * @returns The new value.
3625 * @param pu32 Pointer to the value to increment.
3626 */
3627#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3628DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3629#else
3630DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3631{
3632 uint32_t u32;
3633# if RT_INLINE_ASM_USES_INTRIN
3634 u32 = _InterlockedIncrement((long *)pu32);
3635 return u32;
3636
3637# elif RT_INLINE_ASM_GNU_STYLE
3638 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3639 : "=r" (u32),
3640 "=m" (*pu32)
3641 : "0" (1),
3642 "m" (*pu32)
3643 : "memory");
3644 return u32+1;
3645# else
3646 __asm
3647 {
3648 mov eax, 1
3649# ifdef RT_ARCH_AMD64
3650 mov rdx, [pu32]
3651 lock xadd [rdx], eax
3652# else
3653 mov edx, [pu32]
3654 lock xadd [edx], eax
3655# endif
3656 mov u32, eax
3657 }
3658 return u32+1;
3659# endif
3660}
3661#endif
3662
3663
3664/**
3665 * Atomically increment a signed 32-bit value, ordered.
3666 *
3667 * @returns The new value.
3668 * @param pi32 Pointer to the value to increment.
3669 */
3670DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3671{
3672 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3673}
3674
3675
3676/**
3677 * Atomically decrement an unsigned 32-bit value, ordered.
3678 *
3679 * @returns The new value.
3680 * @param pu32 Pointer to the value to decrement.
3681 */
3682#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3683DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3684#else
3685DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3686{
3687 uint32_t u32;
3688# if RT_INLINE_ASM_USES_INTRIN
3689 u32 = _InterlockedDecrement((long *)pu32);
3690 return u32;
3691
3692# elif RT_INLINE_ASM_GNU_STYLE
3693 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3694 : "=r" (u32),
3695 "=m" (*pu32)
3696 : "0" (-1),
3697 "m" (*pu32)
3698 : "memory");
3699 return u32-1;
3700# else
3701 __asm
3702 {
3703 mov eax, -1
3704# ifdef RT_ARCH_AMD64
3705 mov rdx, [pu32]
3706 lock xadd [rdx], eax
3707# else
3708 mov edx, [pu32]
3709 lock xadd [edx], eax
3710# endif
3711 mov u32, eax
3712 }
3713 return u32-1;
3714# endif
3715}
3716#endif
3717
3718
3719/**
3720 * Atomically decrement a signed 32-bit value, ordered.
3721 *
3722 * @returns The new value.
3723 * @param pi32 Pointer to the value to decrement.
3724 */
3725DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3726{
3727 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3728}
3729
3730
3731/**
3732 * Atomically Or an unsigned 32-bit value, ordered.
3733 *
3734 * @param pu32 Pointer to the pointer variable to OR u32 with.
3735 * @param u32 The value to OR *pu32 with.
3736 */
3737#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3738DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3739#else
3740DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3741{
3742# if RT_INLINE_ASM_USES_INTRIN
3743 _InterlockedOr((long volatile *)pu32, (long)u32);
3744
3745# elif RT_INLINE_ASM_GNU_STYLE
3746 __asm__ __volatile__("lock; orl %1, %0\n\t"
3747 : "=m" (*pu32)
3748 : "ir" (u32),
3749 "m" (*pu32));
3750# else
3751 __asm
3752 {
3753 mov eax, [u32]
3754# ifdef RT_ARCH_AMD64
3755 mov rdx, [pu32]
3756 lock or [rdx], eax
3757# else
3758 mov edx, [pu32]
3759 lock or [edx], eax
3760# endif
3761 }
3762# endif
3763}
3764#endif
3765
3766
3767/**
3768 * Atomically Or a signed 32-bit value, ordered.
3769 *
3770 * @param pi32 Pointer to the pointer variable to OR u32 with.
3771 * @param i32 The value to OR *pu32 with.
3772 */
3773DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3774{
3775 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3776}
3777
3778
3779/**
3780 * Atomically And an unsigned 32-bit value, ordered.
3781 *
3782 * @param pu32 Pointer to the pointer variable to AND u32 with.
3783 * @param u32 The value to AND *pu32 with.
3784 */
3785#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3786DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3787#else
3788DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3789{
3790# if RT_INLINE_ASM_USES_INTRIN
3791 _InterlockedAnd((long volatile *)pu32, u32);
3792
3793# elif RT_INLINE_ASM_GNU_STYLE
3794 __asm__ __volatile__("lock; andl %1, %0\n\t"
3795 : "=m" (*pu32)
3796 : "ir" (u32),
3797 "m" (*pu32));
3798# else
3799 __asm
3800 {
3801 mov eax, [u32]
3802# ifdef RT_ARCH_AMD64
3803 mov rdx, [pu32]
3804 lock and [rdx], eax
3805# else
3806 mov edx, [pu32]
3807 lock and [edx], eax
3808# endif
3809 }
3810# endif
3811}
3812#endif
3813
3814
3815/**
3816 * Atomically And a signed 32-bit value, ordered.
3817 *
3818 * @param pi32 Pointer to the pointer variable to AND i32 with.
3819 * @param i32 The value to AND *pi32 with.
3820 */
3821DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3822{
3823 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3824}
3825
3826
3827/**
3828 * Serialize Instruction.
3829 */
3830#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3831DECLASM(void) ASMSerializeInstruction(void);
3832#else
3833DECLINLINE(void) ASMSerializeInstruction(void)
3834{
3835# if RT_INLINE_ASM_GNU_STYLE
3836 RTCCUINTREG xAX = 0;
3837# ifdef RT_ARCH_AMD64
3838 __asm__ ("cpuid"
3839 : "=a" (xAX)
3840 : "0" (xAX)
3841 : "rbx", "rcx", "rdx");
3842# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
3843 __asm__ ("push %%ebx\n\t"
3844 "cpuid\n\t"
3845 "pop %%ebx\n\t"
3846 : "=a" (xAX)
3847 : "0" (xAX)
3848 : "ecx", "edx");
3849# else
3850 __asm__ ("cpuid"
3851 : "=a" (xAX)
3852 : "0" (xAX)
3853 : "ebx", "ecx", "edx");
3854# endif
3855
3856# elif RT_INLINE_ASM_USES_INTRIN
3857 int aInfo[4];
3858 __cpuid(aInfo, 0);
3859
3860# else
3861 __asm
3862 {
3863 push ebx
3864 xor eax, eax
3865 cpuid
3866 pop ebx
3867 }
3868# endif
3869}
3870#endif
3871
3872
3873/**
3874 * Memory load/store fence, waits for any pending writes and reads to complete.
3875 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
3876 */
3877DECLINLINE(void) ASMMemoryFenceSSE2(void)
3878{
3879#if RT_INLINE_ASM_GNU_STYLE
3880 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
3881#elif RT_INLINE_ASM_USES_INTRIN
3882 _mm_mfence();
3883#else
3884 __asm
3885 {
3886 _emit 0x0f
3887 _emit 0xae
3888 _emit 0xf0
3889 }
3890#endif
3891}
3892
3893
3894/**
3895 * Memory store fence, waits for any writes to complete.
3896 * Requires the X86_CPUID_FEATURE_EDX_SSE CPUID bit set.
3897 */
3898DECLINLINE(void) ASMWriteFenceSSE(void)
3899{
3900#if RT_INLINE_ASM_GNU_STYLE
3901 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
3902#elif RT_INLINE_ASM_USES_INTRIN
3903 _mm_sfence();
3904#else
3905 __asm
3906 {
3907 _emit 0x0f
3908 _emit 0xae
3909 _emit 0xf8
3910 }
3911#endif
3912}
3913
3914
3915/**
3916 * Memory load fence, waits for any pending reads to complete.
3917 * Requires the X86_CPUID_FEATURE_EDX_SSE2 CPUID bit set.
3918 */
3919DECLINLINE(void) ASMReadFenceSSE2(void)
3920{
3921#if RT_INLINE_ASM_GNU_STYLE
3922 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
3923#elif RT_INLINE_ASM_USES_INTRIN
3924 _mm_lfence();
3925#else
3926 __asm
3927 {
3928 _emit 0x0f
3929 _emit 0xae
3930 _emit 0xe8
3931 }
3932#endif
3933}
3934
3935
3936/**
3937 * Memory fence, waits for any pending writes and reads to complete.
3938 */
3939DECLINLINE(void) ASMMemoryFence(void)
3940{
3941 /** @todo use mfence? check if all cpus we care for support it. */
3942 uint32_t volatile u32;
3943 ASMAtomicXchgU32(&u32, 0);
3944}
3945
3946
3947/**
3948 * Write fence, waits for any pending writes to complete.
3949 */
3950DECLINLINE(void) ASMWriteFence(void)
3951{
3952 /** @todo use sfence? check if all cpus we care for support it. */
3953 ASMMemoryFence();
3954}
3955
3956
3957/**
3958 * Read fence, waits for any pending reads to complete.
3959 */
3960DECLINLINE(void) ASMReadFence(void)
3961{
3962 /** @todo use lfence? check if all cpus we care for support it. */
3963 ASMMemoryFence();
3964}
3965
3966
3967/**
3968 * Atomically reads an unsigned 8-bit value, ordered.
3969 *
3970 * @returns Current *pu8 value
3971 * @param pu8 Pointer to the 8-bit variable to read.
3972 */
3973DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3974{
3975 ASMMemoryFence();
3976 return *pu8; /* byte reads are atomic on x86 */
3977}
3978
3979
3980/**
3981 * Atomically reads an unsigned 8-bit value, unordered.
3982 *
3983 * @returns Current *pu8 value
3984 * @param pu8 Pointer to the 8-bit variable to read.
3985 */
3986DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3987{
3988 return *pu8; /* byte reads are atomic on x86 */
3989}
3990
3991
3992/**
3993 * Atomically reads a signed 8-bit value, ordered.
3994 *
3995 * @returns Current *pi8 value
3996 * @param pi8 Pointer to the 8-bit variable to read.
3997 */
3998DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3999{
4000 ASMMemoryFence();
4001 return *pi8; /* byte reads are atomic on x86 */
4002}
4003
4004
4005/**
4006 * Atomically reads a signed 8-bit value, unordered.
4007 *
4008 * @returns Current *pi8 value
4009 * @param pi8 Pointer to the 8-bit variable to read.
4010 */
4011DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
4012{
4013 return *pi8; /* byte reads are atomic on x86 */
4014}
4015
4016
4017/**
4018 * Atomically reads an unsigned 16-bit value, ordered.
4019 *
4020 * @returns Current *pu16 value
4021 * @param pu16 Pointer to the 16-bit variable to read.
4022 */
4023DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
4024{
4025 ASMMemoryFence();
4026 Assert(!((uintptr_t)pu16 & 1));
4027 return *pu16;
4028}
4029
4030
4031/**
4032 * Atomically reads an unsigned 16-bit value, unordered.
4033 *
4034 * @returns Current *pu16 value
4035 * @param pu16 Pointer to the 16-bit variable to read.
4036 */
4037DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
4038{
4039 Assert(!((uintptr_t)pu16 & 1));
4040 return *pu16;
4041}
4042
4043
4044/**
4045 * Atomically reads a signed 16-bit value, ordered.
4046 *
4047 * @returns Current *pi16 value
4048 * @param pi16 Pointer to the 16-bit variable to read.
4049 */
4050DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
4051{
4052 ASMMemoryFence();
4053 Assert(!((uintptr_t)pi16 & 1));
4054 return *pi16;
4055}
4056
4057
4058/**
4059 * Atomically reads a signed 16-bit value, unordered.
4060 *
4061 * @returns Current *pi16 value
4062 * @param pi16 Pointer to the 16-bit variable to read.
4063 */
4064DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
4065{
4066 Assert(!((uintptr_t)pi16 & 1));
4067 return *pi16;
4068}
4069
4070
4071/**
4072 * Atomically reads an unsigned 32-bit value, ordered.
4073 *
4074 * @returns Current *pu32 value
4075 * @param pu32 Pointer to the 32-bit variable to read.
4076 */
4077DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
4078{
4079 ASMMemoryFence();
4080 Assert(!((uintptr_t)pu32 & 3));
4081 return *pu32;
4082}
4083
4084
4085/**
4086 * Atomically reads an unsigned 32-bit value, unordered.
4087 *
4088 * @returns Current *pu32 value
4089 * @param pu32 Pointer to the 32-bit variable to read.
4090 */
4091DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
4092{
4093 Assert(!((uintptr_t)pu32 & 3));
4094 return *pu32;
4095}
4096
4097
4098/**
4099 * Atomically reads a signed 32-bit value, ordered.
4100 *
4101 * @returns Current *pi32 value
4102 * @param pi32 Pointer to the 32-bit variable to read.
4103 */
4104DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
4105{
4106 ASMMemoryFence();
4107 Assert(!((uintptr_t)pi32 & 3));
4108 return *pi32;
4109}
4110
4111
4112/**
4113 * Atomically reads a signed 32-bit value, unordered.
4114 *
4115 * @returns Current *pi32 value
4116 * @param pi32 Pointer to the 32-bit variable to read.
4117 */
4118DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4119{
4120 Assert(!((uintptr_t)pi32 & 3));
4121 return *pi32;
4122}
4123
4124
4125/**
4126 * Atomically reads an unsigned 64-bit value, ordered.
4127 *
4128 * @returns Current *pu64 value
4129 * @param pu64 Pointer to the 64-bit variable to read.
4130 * The memory pointed to must be writable.
4131 * @remark This will fault if the memory is read-only!
4132 */
4133#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
4134 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
4135DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4136#else
4137DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4138{
4139 uint64_t u64;
4140# ifdef RT_ARCH_AMD64
4141 Assert(!((uintptr_t)pu64 & 7));
4142/*# if RT_INLINE_ASM_GNU_STYLE
4143 __asm__ __volatile__( "mfence\n\t"
4144 "movq %1, %0\n\t"
4145 : "=r" (u64)
4146 : "m" (*pu64));
4147# else
4148 __asm
4149 {
4150 mfence
4151 mov rdx, [pu64]
4152 mov rax, [rdx]
4153 mov [u64], rax
4154 }
4155# endif*/
4156 ASMMemoryFence();
4157 u64 = *pu64;
4158# else /* !RT_ARCH_AMD64 */
4159# if RT_INLINE_ASM_GNU_STYLE
4160# if defined(PIC) || defined(__PIC__)
4161 uint32_t u32EBX = 0;
4162 Assert(!((uintptr_t)pu64 & 7));
4163 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4164 "lock; cmpxchg8b (%5)\n\t"
4165 "movl %3, %%ebx\n\t"
4166 : "=A" (u64),
4167# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4168 "+m" (*pu64)
4169# else
4170 "=m" (*pu64)
4171# endif
4172 : "0" (0),
4173 "m" (u32EBX),
4174 "c" (0),
4175 "S" (pu64));
4176# else /* !PIC */
4177 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4178 : "=A" (u64),
4179 "+m" (*pu64)
4180 : "0" (0),
4181 "b" (0),
4182 "c" (0));
4183# endif
4184# else
4185 Assert(!((uintptr_t)pu64 & 7));
4186 __asm
4187 {
4188 xor eax, eax
4189 xor edx, edx
4190 mov edi, pu64
4191 xor ecx, ecx
4192 xor ebx, ebx
4193 lock cmpxchg8b [edi]
4194 mov dword ptr [u64], eax
4195 mov dword ptr [u64 + 4], edx
4196 }
4197# endif
4198# endif /* !RT_ARCH_AMD64 */
4199 return u64;
4200}
4201#endif
4202
4203
4204/**
4205 * Atomically reads an unsigned 64-bit value, unordered.
4206 *
4207 * @returns Current *pu64 value
4208 * @param pu64 Pointer to the 64-bit variable to read.
4209 * The memory pointed to must be writable.
4210 * @remark This will fault if the memory is read-only!
4211 */
4212#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4213DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4214#else
4215DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4216{
4217 uint64_t u64;
4218# ifdef RT_ARCH_AMD64
4219 Assert(!((uintptr_t)pu64 & 7));
4220/*# if RT_INLINE_ASM_GNU_STYLE
4221 Assert(!((uintptr_t)pu64 & 7));
4222 __asm__ __volatile__("movq %1, %0\n\t"
4223 : "=r" (u64)
4224 : "m" (*pu64));
4225# else
4226 __asm
4227 {
4228 mov rdx, [pu64]
4229 mov rax, [rdx]
4230 mov [u64], rax
4231 }
4232# endif */
4233 u64 = *pu64;
4234# else /* !RT_ARCH_AMD64 */
4235# if RT_INLINE_ASM_GNU_STYLE
4236# if defined(PIC) || defined(__PIC__)
4237 uint32_t u32EBX = 0;
4238 uint32_t u32Spill;
4239 Assert(!((uintptr_t)pu64 & 7));
4240 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4241 "xor %%ecx,%%ecx\n\t"
4242 "xor %%edx,%%edx\n\t"
4243 "xchgl %%ebx, %3\n\t"
4244 "lock; cmpxchg8b (%4)\n\t"
4245 "movl %3, %%ebx\n\t"
4246 : "=A" (u64),
4247# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4248 "+m" (*pu64),
4249# else
4250 "=m" (*pu64),
4251# endif
4252 "=c" (u32Spill)
4253 : "m" (u32EBX),
4254 "S" (pu64));
4255# else /* !PIC */
4256 __asm__ __volatile__("cmpxchg8b %1\n\t"
4257 : "=A" (u64),
4258 "+m" (*pu64)
4259 : "0" (0),
4260 "b" (0),
4261 "c" (0));
4262# endif
4263# else
4264 Assert(!((uintptr_t)pu64 & 7));
4265 __asm
4266 {
4267 xor eax, eax
4268 xor edx, edx
4269 mov edi, pu64
4270 xor ecx, ecx
4271 xor ebx, ebx
4272 lock cmpxchg8b [edi]
4273 mov dword ptr [u64], eax
4274 mov dword ptr [u64 + 4], edx
4275 }
4276# endif
4277# endif /* !RT_ARCH_AMD64 */
4278 return u64;
4279}
4280#endif
4281
4282
4283/**
4284 * Atomically reads a signed 64-bit value, ordered.
4285 *
4286 * @returns Current *pi64 value
4287 * @param pi64 Pointer to the 64-bit variable to read.
4288 * The memory pointed to must be writable.
4289 * @remark This will fault if the memory is read-only!
4290 */
4291DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4292{
4293 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4294}
4295
4296
4297/**
4298 * Atomically reads a signed 64-bit value, unordered.
4299 *
4300 * @returns Current *pi64 value
4301 * @param pi64 Pointer to the 64-bit variable to read.
4302 * The memory pointed to must be writable.
4303 * @remark This will fault if the memory is read-only!
4304 */
4305DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4306{
4307 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4308}
4309
4310
4311/**
4312 * Atomically reads a pointer value, ordered.
4313 *
4314 * @returns Current *pv value
4315 * @param ppv Pointer to the pointer variable to read.
4316 */
4317DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4318{
4319#if ARCH_BITS == 32
4320 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4321#elif ARCH_BITS == 64
4322 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4323#else
4324# error "ARCH_BITS is bogus"
4325#endif
4326}
4327
4328
4329/**
4330 * Atomically reads a pointer value, unordered.
4331 *
4332 * @returns Current *pv value
4333 * @param ppv Pointer to the pointer variable to read.
4334 */
4335DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4336{
4337#if ARCH_BITS == 32
4338 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4339#elif ARCH_BITS == 64
4340 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4341#else
4342# error "ARCH_BITS is bogus"
4343#endif
4344}
4345
4346
4347/**
4348 * Atomically reads a boolean value, ordered.
4349 *
4350 * @returns Current *pf value
4351 * @param pf Pointer to the boolean variable to read.
4352 */
4353DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4354{
4355 ASMMemoryFence();
4356 return *pf; /* byte reads are atomic on x86 */
4357}
4358
4359
4360/**
4361 * Atomically reads a boolean value, unordered.
4362 *
4363 * @returns Current *pf value
4364 * @param pf Pointer to the boolean variable to read.
4365 */
4366DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4367{
4368 return *pf; /* byte reads are atomic on x86 */
4369}
4370
4371
4372/**
4373 * Atomically read a typical IPRT handle value, ordered.
4374 *
4375 * @param ph Pointer to the handle variable to read.
4376 * @param phRes Where to store the result.
4377 *
4378 * @remarks This doesn't currently work for all handles (like RTFILE).
4379 */
4380#if HC_ARCH_BITS == 32
4381# define ASMAtomicReadHandle(ph, phRes) \
4382 do { \
4383 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4384 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4385 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
4386 } while (0)
4387#elif HC_ARCH_BITS == 64
4388# define ASMAtomicReadHandle(ph, phRes) \
4389 do { \
4390 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4391 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4392 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
4393 } while (0)
4394#else
4395# error HC_ARCH_BITS
4396#endif
4397
4398
4399/**
4400 * Atomically read a typical IPRT handle value, unordered.
4401 *
4402 * @param ph Pointer to the handle variable to read.
4403 * @param phRes Where to store the result.
4404 *
4405 * @remarks This doesn't currently work for all handles (like RTFILE).
4406 */
4407#if HC_ARCH_BITS == 32
4408# define ASMAtomicUoReadHandle(ph, phRes) \
4409 do { \
4410 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4411 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4412 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
4413 } while (0)
4414#elif HC_ARCH_BITS == 64
4415# define ASMAtomicUoReadHandle(ph, phRes) \
4416 do { \
4417 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4418 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4419 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
4420 } while (0)
4421#else
4422# error HC_ARCH_BITS
4423#endif
4424
4425
4426/**
4427 * Atomically read a value which size might differ
4428 * between platforms or compilers, ordered.
4429 *
4430 * @param pu Pointer to the variable to update.
4431 * @param puRes Where to store the result.
4432 */
4433#define ASMAtomicReadSize(pu, puRes) \
4434 do { \
4435 switch (sizeof(*(pu))) { \
4436 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4437 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4438 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4439 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4440 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4441 } \
4442 } while (0)
4443
4444
4445/**
4446 * Atomically read a value which size might differ
4447 * between platforms or compilers, unordered.
4448 *
4449 * @param pu Pointer to the variable to read.
4450 * @param puRes Where to store the result.
4451 */
4452#define ASMAtomicUoReadSize(pu, puRes) \
4453 do { \
4454 switch (sizeof(*(pu))) { \
4455 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4456 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4457 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4458 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4459 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4460 } \
4461 } while (0)
4462
4463
4464/**
4465 * Atomically writes an unsigned 8-bit value, ordered.
4466 *
4467 * @param pu8 Pointer to the 8-bit variable.
4468 * @param u8 The 8-bit value to assign to *pu8.
4469 */
4470DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4471{
4472 ASMAtomicXchgU8(pu8, u8);
4473}
4474
4475
4476/**
4477 * Atomically writes an unsigned 8-bit value, unordered.
4478 *
4479 * @param pu8 Pointer to the 8-bit variable.
4480 * @param u8 The 8-bit value to assign to *pu8.
4481 */
4482DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4483{
4484 *pu8 = u8; /* byte writes are atomic on x86 */
4485}
4486
4487
4488/**
4489 * Atomically writes a signed 8-bit value, ordered.
4490 *
4491 * @param pi8 Pointer to the 8-bit variable to read.
4492 * @param i8 The 8-bit value to assign to *pi8.
4493 */
4494DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4495{
4496 ASMAtomicXchgS8(pi8, i8);
4497}
4498
4499
4500/**
4501 * Atomically writes a signed 8-bit value, unordered.
4502 *
4503 * @param pi8 Pointer to the 8-bit variable to read.
4504 * @param i8 The 8-bit value to assign to *pi8.
4505 */
4506DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4507{
4508 *pi8 = i8; /* byte writes are atomic on x86 */
4509}
4510
4511
4512/**
4513 * Atomically writes an unsigned 16-bit value, ordered.
4514 *
4515 * @param pu16 Pointer to the 16-bit variable.
4516 * @param u16 The 16-bit value to assign to *pu16.
4517 */
4518DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4519{
4520 ASMAtomicXchgU16(pu16, u16);
4521}
4522
4523
4524/**
4525 * Atomically writes an unsigned 16-bit value, unordered.
4526 *
4527 * @param pu16 Pointer to the 16-bit variable.
4528 * @param u16 The 16-bit value to assign to *pu16.
4529 */
4530DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4531{
4532 Assert(!((uintptr_t)pu16 & 1));
4533 *pu16 = u16;
4534}
4535
4536
4537/**
4538 * Atomically writes a signed 16-bit value, ordered.
4539 *
4540 * @param pi16 Pointer to the 16-bit variable to read.
4541 * @param i16 The 16-bit value to assign to *pi16.
4542 */
4543DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4544{
4545 ASMAtomicXchgS16(pi16, i16);
4546}
4547
4548
4549/**
4550 * Atomically writes a signed 16-bit value, unordered.
4551 *
4552 * @param pi16 Pointer to the 16-bit variable to read.
4553 * @param i16 The 16-bit value to assign to *pi16.
4554 */
4555DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4556{
4557 Assert(!((uintptr_t)pi16 & 1));
4558 *pi16 = i16;
4559}
4560
4561
4562/**
4563 * Atomically writes an unsigned 32-bit value, ordered.
4564 *
4565 * @param pu32 Pointer to the 32-bit variable.
4566 * @param u32 The 32-bit value to assign to *pu32.
4567 */
4568DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4569{
4570 ASMAtomicXchgU32(pu32, u32);
4571}
4572
4573
4574/**
4575 * Atomically writes an unsigned 32-bit value, unordered.
4576 *
4577 * @param pu32 Pointer to the 32-bit variable.
4578 * @param u32 The 32-bit value to assign to *pu32.
4579 */
4580DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4581{
4582 Assert(!((uintptr_t)pu32 & 3));
4583 *pu32 = u32;
4584}
4585
4586
4587/**
4588 * Atomically writes a signed 32-bit value, ordered.
4589 *
4590 * @param pi32 Pointer to the 32-bit variable to read.
4591 * @param i32 The 32-bit value to assign to *pi32.
4592 */
4593DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4594{
4595 ASMAtomicXchgS32(pi32, i32);
4596}
4597
4598
4599/**
4600 * Atomically writes a signed 32-bit value, unordered.
4601 *
4602 * @param pi32 Pointer to the 32-bit variable to read.
4603 * @param i32 The 32-bit value to assign to *pi32.
4604 */
4605DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4606{
4607 Assert(!((uintptr_t)pi32 & 3));
4608 *pi32 = i32;
4609}
4610
4611
4612/**
4613 * Atomically writes an unsigned 64-bit value, ordered.
4614 *
4615 * @param pu64 Pointer to the 64-bit variable.
4616 * @param u64 The 64-bit value to assign to *pu64.
4617 */
4618DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4619{
4620 ASMAtomicXchgU64(pu64, u64);
4621}
4622
4623
4624/**
4625 * Atomically writes an unsigned 64-bit value, unordered.
4626 *
4627 * @param pu64 Pointer to the 64-bit variable.
4628 * @param u64 The 64-bit value to assign to *pu64.
4629 */
4630DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4631{
4632 Assert(!((uintptr_t)pu64 & 7));
4633#if ARCH_BITS == 64
4634 *pu64 = u64;
4635#else
4636 ASMAtomicXchgU64(pu64, u64);
4637#endif
4638}
4639
4640
4641/**
4642 * Atomically writes a signed 64-bit value, ordered.
4643 *
4644 * @param pi64 Pointer to the 64-bit variable.
4645 * @param i64 The 64-bit value to assign to *pi64.
4646 */
4647DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4648{
4649 ASMAtomicXchgS64(pi64, i64);
4650}
4651
4652
4653/**
4654 * Atomically writes a signed 64-bit value, unordered.
4655 *
4656 * @param pi64 Pointer to the 64-bit variable.
4657 * @param i64 The 64-bit value to assign to *pi64.
4658 */
4659DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4660{
4661 Assert(!((uintptr_t)pi64 & 7));
4662#if ARCH_BITS == 64
4663 *pi64 = i64;
4664#else
4665 ASMAtomicXchgS64(pi64, i64);
4666#endif
4667}
4668
4669
4670/**
4671 * Atomically writes a boolean value, unordered.
4672 *
4673 * @param pf Pointer to the boolean variable.
4674 * @param f The boolean value to assign to *pf.
4675 */
4676DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4677{
4678 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4679}
4680
4681
4682/**
4683 * Atomically writes a boolean value, unordered.
4684 *
4685 * @param pf Pointer to the boolean variable.
4686 * @param f The boolean value to assign to *pf.
4687 */
4688DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4689{
4690 *pf = f; /* byte writes are atomic on x86 */
4691}
4692
4693
4694/**
4695 * Atomically writes a pointer value, ordered.
4696 *
4697 * @returns Current *pv value
4698 * @param ppv Pointer to the pointer variable.
4699 * @param pv The pointer value to assigne to *ppv.
4700 */
4701DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4702{
4703#if ARCH_BITS == 32
4704 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4705#elif ARCH_BITS == 64
4706 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4707#else
4708# error "ARCH_BITS is bogus"
4709#endif
4710}
4711
4712
4713/**
4714 * Atomically writes a pointer value, unordered.
4715 *
4716 * @returns Current *pv value
4717 * @param ppv Pointer to the pointer variable.
4718 * @param pv The pointer value to assigne to *ppv.
4719 */
4720DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4721{
4722#if ARCH_BITS == 32
4723 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4724#elif ARCH_BITS == 64
4725 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4726#else
4727# error "ARCH_BITS is bogus"
4728#endif
4729}
4730
4731
4732/**
4733 * Atomically write a typical IPRT handle value, ordered.
4734 *
4735 * @param ph Pointer to the variable to update.
4736 * @param hNew The value to assign to *ph.
4737 *
4738 * @remarks This doesn't currently work for all handles (like RTFILE).
4739 */
4740#if HC_ARCH_BITS == 32
4741# define ASMAtomicWriteHandle(ph, hNew) \
4742 do { \
4743 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4744 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
4745 } while (0)
4746#elif HC_ARCH_BITS == 64
4747# define ASMAtomicWriteHandle(ph, hNew) \
4748 do { \
4749 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4750 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
4751 } while (0)
4752#else
4753# error HC_ARCH_BITS
4754#endif
4755
4756
4757/**
4758 * Atomically write a typical IPRT handle value, unordered.
4759 *
4760 * @param ph Pointer to the variable to update.
4761 * @param hNew The value to assign to *ph.
4762 *
4763 * @remarks This doesn't currently work for all handles (like RTFILE).
4764 */
4765#if HC_ARCH_BITS == 32
4766# define ASMAtomicUoWriteHandle(ph, hNew) \
4767 do { \
4768 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4769 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
4770 } while (0)
4771#elif HC_ARCH_BITS == 64
4772# define ASMAtomicUoWriteHandle(ph, hNew) \
4773 do { \
4774 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4775 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
4776 } while (0)
4777#else
4778# error HC_ARCH_BITS
4779#endif
4780
4781
4782/**
4783 * Atomically write a value which size might differ
4784 * between platforms or compilers, ordered.
4785 *
4786 * @param pu Pointer to the variable to update.
4787 * @param uNew The value to assign to *pu.
4788 */
4789#define ASMAtomicWriteSize(pu, uNew) \
4790 do { \
4791 switch (sizeof(*(pu))) { \
4792 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4793 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4794 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4795 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4796 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4797 } \
4798 } while (0)
4799
4800/**
4801 * Atomically write a value which size might differ
4802 * between platforms or compilers, unordered.
4803 *
4804 * @param pu Pointer to the variable to update.
4805 * @param uNew The value to assign to *pu.
4806 */
4807#define ASMAtomicUoWriteSize(pu, uNew) \
4808 do { \
4809 switch (sizeof(*(pu))) { \
4810 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4811 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4812 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4813 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4814 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4815 } \
4816 } while (0)
4817
4818
4819
4820
4821/**
4822 * Invalidate page.
4823 *
4824 * @param pv Address of the page to invalidate.
4825 */
4826#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4827DECLASM(void) ASMInvalidatePage(void *pv);
4828#else
4829DECLINLINE(void) ASMInvalidatePage(void *pv)
4830{
4831# if RT_INLINE_ASM_USES_INTRIN
4832 __invlpg(pv);
4833
4834# elif RT_INLINE_ASM_GNU_STYLE
4835 __asm__ __volatile__("invlpg %0\n\t"
4836 : : "m" (*(uint8_t *)pv));
4837# else
4838 __asm
4839 {
4840# ifdef RT_ARCH_AMD64
4841 mov rax, [pv]
4842 invlpg [rax]
4843# else
4844 mov eax, [pv]
4845 invlpg [eax]
4846# endif
4847 }
4848# endif
4849}
4850#endif
4851
4852
4853/**
4854 * Write back the internal caches and invalidate them.
4855 */
4856#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4857DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4858#else
4859DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4860{
4861# if RT_INLINE_ASM_USES_INTRIN
4862 __wbinvd();
4863
4864# elif RT_INLINE_ASM_GNU_STYLE
4865 __asm__ __volatile__("wbinvd");
4866# else
4867 __asm
4868 {
4869 wbinvd
4870 }
4871# endif
4872}
4873#endif
4874
4875
4876/**
4877 * Invalidate internal and (perhaps) external caches without first
4878 * flushing dirty cache lines. Use with extreme care.
4879 */
4880#if RT_INLINE_ASM_EXTERNAL
4881DECLASM(void) ASMInvalidateInternalCaches(void);
4882#else
4883DECLINLINE(void) ASMInvalidateInternalCaches(void)
4884{
4885# if RT_INLINE_ASM_GNU_STYLE
4886 __asm__ __volatile__("invd");
4887# else
4888 __asm
4889 {
4890 invd
4891 }
4892# endif
4893}
4894#endif
4895
4896
4897#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4898# if PAGE_SIZE != 0x1000
4899# error "PAGE_SIZE is not 0x1000!"
4900# endif
4901#endif
4902
4903/**
4904 * Zeros a 4K memory page.
4905 *
4906 * @param pv Pointer to the memory block. This must be page aligned.
4907 */
4908#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4909DECLASM(void) ASMMemZeroPage(volatile void *pv);
4910# else
4911DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4912{
4913# if RT_INLINE_ASM_USES_INTRIN
4914# ifdef RT_ARCH_AMD64
4915 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4916# else
4917 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4918# endif
4919
4920# elif RT_INLINE_ASM_GNU_STYLE
4921 RTCCUINTREG uDummy;
4922# ifdef RT_ARCH_AMD64
4923 __asm__ __volatile__("rep stosq"
4924 : "=D" (pv),
4925 "=c" (uDummy)
4926 : "0" (pv),
4927 "c" (0x1000 >> 3),
4928 "a" (0)
4929 : "memory");
4930# else
4931 __asm__ __volatile__("rep stosl"
4932 : "=D" (pv),
4933 "=c" (uDummy)
4934 : "0" (pv),
4935 "c" (0x1000 >> 2),
4936 "a" (0)
4937 : "memory");
4938# endif
4939# else
4940 __asm
4941 {
4942# ifdef RT_ARCH_AMD64
4943 xor rax, rax
4944 mov ecx, 0200h
4945 mov rdi, [pv]
4946 rep stosq
4947# else
4948 xor eax, eax
4949 mov ecx, 0400h
4950 mov edi, [pv]
4951 rep stosd
4952# endif
4953 }
4954# endif
4955}
4956# endif
4957
4958
4959/**
4960 * Zeros a memory block with a 32-bit aligned size.
4961 *
4962 * @param pv Pointer to the memory block.
4963 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4964 */
4965#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4966DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4967#else
4968DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4969{
4970# if RT_INLINE_ASM_USES_INTRIN
4971# ifdef RT_ARCH_AMD64
4972 if (!(cb & 7))
4973 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4974 else
4975# endif
4976 __stosd((unsigned long *)pv, 0, cb / 4);
4977
4978# elif RT_INLINE_ASM_GNU_STYLE
4979 __asm__ __volatile__("rep stosl"
4980 : "=D" (pv),
4981 "=c" (cb)
4982 : "0" (pv),
4983 "1" (cb >> 2),
4984 "a" (0)
4985 : "memory");
4986# else
4987 __asm
4988 {
4989 xor eax, eax
4990# ifdef RT_ARCH_AMD64
4991 mov rcx, [cb]
4992 shr rcx, 2
4993 mov rdi, [pv]
4994# else
4995 mov ecx, [cb]
4996 shr ecx, 2
4997 mov edi, [pv]
4998# endif
4999 rep stosd
5000 }
5001# endif
5002}
5003#endif
5004
5005
5006/**
5007 * Fills a memory block with a 32-bit aligned size.
5008 *
5009 * @param pv Pointer to the memory block.
5010 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5011 * @param u32 The value to fill with.
5012 */
5013#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5014DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
5015#else
5016DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
5017{
5018# if RT_INLINE_ASM_USES_INTRIN
5019# ifdef RT_ARCH_AMD64
5020 if (!(cb & 7))
5021 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5022 else
5023# endif
5024 __stosd((unsigned long *)pv, u32, cb / 4);
5025
5026# elif RT_INLINE_ASM_GNU_STYLE
5027 __asm__ __volatile__("rep stosl"
5028 : "=D" (pv),
5029 "=c" (cb)
5030 : "0" (pv),
5031 "1" (cb >> 2),
5032 "a" (u32)
5033 : "memory");
5034# else
5035 __asm
5036 {
5037# ifdef RT_ARCH_AMD64
5038 mov rcx, [cb]
5039 shr rcx, 2
5040 mov rdi, [pv]
5041# else
5042 mov ecx, [cb]
5043 shr ecx, 2
5044 mov edi, [pv]
5045# endif
5046 mov eax, [u32]
5047 rep stosd
5048 }
5049# endif
5050}
5051#endif
5052
5053
5054/**
5055 * Checks if a memory block is filled with the specified byte.
5056 *
5057 * This is a sort of inverted memchr.
5058 *
5059 * @returns Pointer to the byte which doesn't equal u8.
5060 * @returns NULL if all equal to u8.
5061 *
5062 * @param pv Pointer to the memory block.
5063 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5064 * @param u8 The value it's supposed to be filled with.
5065 */
5066#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5067DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
5068#else
5069DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
5070{
5071/** @todo rewrite this in inline assembly? */
5072 uint8_t const *pb = (uint8_t const *)pv;
5073 for (; cb; cb--, pb++)
5074 if (RT_UNLIKELY(*pb != u8))
5075 return (void *)pb;
5076 return NULL;
5077}
5078#endif
5079
5080
5081/**
5082 * Checks if a memory block is filled with the specified 32-bit value.
5083 *
5084 * This is a sort of inverted memchr.
5085 *
5086 * @returns Pointer to the first value which doesn't equal u32.
5087 * @returns NULL if all equal to u32.
5088 *
5089 * @param pv Pointer to the memory block.
5090 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5091 * @param u32 The value it's supposed to be filled with.
5092 */
5093#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5094DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
5095#else
5096DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
5097{
5098/** @todo rewrite this in inline assembly? */
5099 uint32_t const *pu32 = (uint32_t const *)pv;
5100 for (; cb; cb -= 4, pu32++)
5101 if (RT_UNLIKELY(*pu32 != u32))
5102 return (uint32_t *)pu32;
5103 return NULL;
5104}
5105#endif
5106
5107
5108/**
5109 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
5110 *
5111 * @returns u32F1 * u32F2.
5112 */
5113#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5114DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
5115#else
5116DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
5117{
5118# ifdef RT_ARCH_AMD64
5119 return (uint64_t)u32F1 * u32F2;
5120# else /* !RT_ARCH_AMD64 */
5121 uint64_t u64;
5122# if RT_INLINE_ASM_GNU_STYLE
5123 __asm__ __volatile__("mull %%edx"
5124 : "=A" (u64)
5125 : "a" (u32F2), "d" (u32F1));
5126# else
5127 __asm
5128 {
5129 mov edx, [u32F1]
5130 mov eax, [u32F2]
5131 mul edx
5132 mov dword ptr [u64], eax
5133 mov dword ptr [u64 + 4], edx
5134 }
5135# endif
5136 return u64;
5137# endif /* !RT_ARCH_AMD64 */
5138}
5139#endif
5140
5141
5142/**
5143 * Multiplies two signed 32-bit values returning a signed 64-bit result.
5144 *
5145 * @returns u32F1 * u32F2.
5146 */
5147#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5148DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
5149#else
5150DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
5151{
5152# ifdef RT_ARCH_AMD64
5153 return (int64_t)i32F1 * i32F2;
5154# else /* !RT_ARCH_AMD64 */
5155 int64_t i64;
5156# if RT_INLINE_ASM_GNU_STYLE
5157 __asm__ __volatile__("imull %%edx"
5158 : "=A" (i64)
5159 : "a" (i32F2), "d" (i32F1));
5160# else
5161 __asm
5162 {
5163 mov edx, [i32F1]
5164 mov eax, [i32F2]
5165 imul edx
5166 mov dword ptr [i64], eax
5167 mov dword ptr [i64 + 4], edx
5168 }
5169# endif
5170 return i64;
5171# endif /* !RT_ARCH_AMD64 */
5172}
5173#endif
5174
5175
5176/**
5177 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5178 *
5179 * @returns u64 / u32.
5180 */
5181#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5182DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5183#else
5184DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5185{
5186# ifdef RT_ARCH_AMD64
5187 return (uint32_t)(u64 / u32);
5188# else /* !RT_ARCH_AMD64 */
5189# if RT_INLINE_ASM_GNU_STYLE
5190 RTCCUINTREG uDummy;
5191 __asm__ __volatile__("divl %3"
5192 : "=a" (u32), "=d"(uDummy)
5193 : "A" (u64), "r" (u32));
5194# else
5195 __asm
5196 {
5197 mov eax, dword ptr [u64]
5198 mov edx, dword ptr [u64 + 4]
5199 mov ecx, [u32]
5200 div ecx
5201 mov [u32], eax
5202 }
5203# endif
5204 return u32;
5205# endif /* !RT_ARCH_AMD64 */
5206}
5207#endif
5208
5209
5210/**
5211 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5212 *
5213 * @returns u64 / u32.
5214 */
5215#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5216DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5217#else
5218DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5219{
5220# ifdef RT_ARCH_AMD64
5221 return (int32_t)(i64 / i32);
5222# else /* !RT_ARCH_AMD64 */
5223# if RT_INLINE_ASM_GNU_STYLE
5224 RTCCUINTREG iDummy;
5225 __asm__ __volatile__("idivl %3"
5226 : "=a" (i32), "=d"(iDummy)
5227 : "A" (i64), "r" (i32));
5228# else
5229 __asm
5230 {
5231 mov eax, dword ptr [i64]
5232 mov edx, dword ptr [i64 + 4]
5233 mov ecx, [i32]
5234 idiv ecx
5235 mov [i32], eax
5236 }
5237# endif
5238 return i32;
5239# endif /* !RT_ARCH_AMD64 */
5240}
5241#endif
5242
5243
5244/**
5245 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5246 * returning the rest.
5247 *
5248 * @returns u64 % u32.
5249 *
5250 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5251 */
5252#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5253DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5254#else
5255DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5256{
5257# ifdef RT_ARCH_AMD64
5258 return (uint32_t)(u64 % u32);
5259# else /* !RT_ARCH_AMD64 */
5260# if RT_INLINE_ASM_GNU_STYLE
5261 RTCCUINTREG uDummy;
5262 __asm__ __volatile__("divl %3"
5263 : "=a" (uDummy), "=d"(u32)
5264 : "A" (u64), "r" (u32));
5265# else
5266 __asm
5267 {
5268 mov eax, dword ptr [u64]
5269 mov edx, dword ptr [u64 + 4]
5270 mov ecx, [u32]
5271 div ecx
5272 mov [u32], edx
5273 }
5274# endif
5275 return u32;
5276# endif /* !RT_ARCH_AMD64 */
5277}
5278#endif
5279
5280
5281/**
5282 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5283 * returning the rest.
5284 *
5285 * @returns u64 % u32.
5286 *
5287 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5288 */
5289#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5290DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5291#else
5292DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5293{
5294# ifdef RT_ARCH_AMD64
5295 return (int32_t)(i64 % i32);
5296# else /* !RT_ARCH_AMD64 */
5297# if RT_INLINE_ASM_GNU_STYLE
5298 RTCCUINTREG iDummy;
5299 __asm__ __volatile__("idivl %3"
5300 : "=a" (iDummy), "=d"(i32)
5301 : "A" (i64), "r" (i32));
5302# else
5303 __asm
5304 {
5305 mov eax, dword ptr [i64]
5306 mov edx, dword ptr [i64 + 4]
5307 mov ecx, [i32]
5308 idiv ecx
5309 mov [i32], edx
5310 }
5311# endif
5312 return i32;
5313# endif /* !RT_ARCH_AMD64 */
5314}
5315#endif
5316
5317
5318/**
5319 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5320 * using a 96 bit intermediate result.
5321 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5322 * __udivdi3 and __umoddi3 even if this inline function is not used.
5323 *
5324 * @returns (u64A * u32B) / u32C.
5325 * @param u64A The 64-bit value.
5326 * @param u32B The 32-bit value to multiple by A.
5327 * @param u32C The 32-bit value to divide A*B by.
5328 */
5329#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5330DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5331#else
5332DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5333{
5334# if RT_INLINE_ASM_GNU_STYLE
5335# ifdef RT_ARCH_AMD64
5336 uint64_t u64Result, u64Spill;
5337 __asm__ __volatile__("mulq %2\n\t"
5338 "divq %3\n\t"
5339 : "=a" (u64Result),
5340 "=d" (u64Spill)
5341 : "r" ((uint64_t)u32B),
5342 "r" ((uint64_t)u32C),
5343 "0" (u64A),
5344 "1" (0));
5345 return u64Result;
5346# else
5347 uint32_t u32Dummy;
5348 uint64_t u64Result;
5349 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5350 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5351 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5352 eax = u64A.hi */
5353 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5354 edx = u32C */
5355 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5356 edx = u32B */
5357 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5358 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5359 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5360 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5361 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5362 edx = u64Hi % u32C */
5363 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5364 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5365 "divl %%ecx \n\t" /* u64Result.lo */
5366 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5367 : "=A"(u64Result), "=c"(u32Dummy),
5368 "=S"(u32Dummy), "=D"(u32Dummy)
5369 : "a"((uint32_t)u64A),
5370 "S"((uint32_t)(u64A >> 32)),
5371 "c"(u32B),
5372 "D"(u32C));
5373 return u64Result;
5374# endif
5375# else
5376 RTUINT64U u;
5377 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5378 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5379 u64Hi += (u64Lo >> 32);
5380 u.s.Hi = (uint32_t)(u64Hi / u32C);
5381 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5382 return u.u;
5383# endif
5384}
5385#endif
5386
5387
5388/**
5389 * Probes a byte pointer for read access.
5390 *
5391 * While the function will not fault if the byte is not read accessible,
5392 * the idea is to do this in a safe place like before acquiring locks
5393 * and such like.
5394 *
5395 * Also, this functions guarantees that an eager compiler is not going
5396 * to optimize the probing away.
5397 *
5398 * @param pvByte Pointer to the byte.
5399 */
5400#if RT_INLINE_ASM_EXTERNAL
5401DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5402#else
5403DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5404{
5405 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5406 uint8_t u8;
5407# if RT_INLINE_ASM_GNU_STYLE
5408 __asm__ __volatile__("movb (%1), %0\n\t"
5409 : "=r" (u8)
5410 : "r" (pvByte));
5411# else
5412 __asm
5413 {
5414# ifdef RT_ARCH_AMD64
5415 mov rax, [pvByte]
5416 mov al, [rax]
5417# else
5418 mov eax, [pvByte]
5419 mov al, [eax]
5420# endif
5421 mov [u8], al
5422 }
5423# endif
5424 return u8;
5425}
5426#endif
5427
5428/**
5429 * Probes a buffer for read access page by page.
5430 *
5431 * While the function will fault if the buffer is not fully read
5432 * accessible, the idea is to do this in a safe place like before
5433 * acquiring locks and such like.
5434 *
5435 * Also, this functions guarantees that an eager compiler is not going
5436 * to optimize the probing away.
5437 *
5438 * @param pvBuf Pointer to the buffer.
5439 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5440 */
5441DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5442{
5443 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5444 /* the first byte */
5445 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5446 ASMProbeReadByte(pu8);
5447
5448 /* the pages in between pages. */
5449 while (cbBuf > /*PAGE_SIZE*/0x1000)
5450 {
5451 ASMProbeReadByte(pu8);
5452 cbBuf -= /*PAGE_SIZE*/0x1000;
5453 pu8 += /*PAGE_SIZE*/0x1000;
5454 }
5455
5456 /* the last byte */
5457 ASMProbeReadByte(pu8 + cbBuf - 1);
5458}
5459
5460
5461/** @def ASMBreakpoint
5462 * Debugger Breakpoint.
5463 * @remark In the gnu world we add a nop instruction after the int3 to
5464 * force gdb to remain at the int3 source line.
5465 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5466 * @internal
5467 */
5468#if RT_INLINE_ASM_GNU_STYLE
5469# ifndef __L4ENV__
5470# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5471# else
5472# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5473# endif
5474#else
5475# define ASMBreakpoint() __debugbreak()
5476#endif
5477
5478
5479
5480/** @defgroup grp_inline_bits Bit Operations
5481 * @{
5482 */
5483
5484
5485/**
5486 * Sets a bit in a bitmap.
5487 *
5488 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
5489 * @param iBit The bit to set.
5490 *
5491 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5492 * However, doing so will yield better performance as well as avoiding
5493 * traps accessing the last bits in the bitmap.
5494 */
5495#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5496DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5497#else
5498DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5499{
5500# if RT_INLINE_ASM_USES_INTRIN
5501 _bittestandset((long *)pvBitmap, iBit);
5502
5503# elif RT_INLINE_ASM_GNU_STYLE
5504 __asm__ __volatile__("btsl %1, %0"
5505 : "=m" (*(volatile long *)pvBitmap)
5506 : "Ir" (iBit),
5507 "m" (*(volatile long *)pvBitmap)
5508 : "memory");
5509# else
5510 __asm
5511 {
5512# ifdef RT_ARCH_AMD64
5513 mov rax, [pvBitmap]
5514 mov edx, [iBit]
5515 bts [rax], edx
5516# else
5517 mov eax, [pvBitmap]
5518 mov edx, [iBit]
5519 bts [eax], edx
5520# endif
5521 }
5522# endif
5523}
5524#endif
5525
5526
5527/**
5528 * Atomically sets a bit in a bitmap, ordered.
5529 *
5530 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5531 * the memory access isn't atomic!
5532 * @param iBit The bit to set.
5533 */
5534#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5535DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5536#else
5537DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5538{
5539 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5540# if RT_INLINE_ASM_USES_INTRIN
5541 _interlockedbittestandset((long *)pvBitmap, iBit);
5542# elif RT_INLINE_ASM_GNU_STYLE
5543 __asm__ __volatile__("lock; btsl %1, %0"
5544 : "=m" (*(volatile long *)pvBitmap)
5545 : "Ir" (iBit),
5546 "m" (*(volatile long *)pvBitmap)
5547 : "memory");
5548# else
5549 __asm
5550 {
5551# ifdef RT_ARCH_AMD64
5552 mov rax, [pvBitmap]
5553 mov edx, [iBit]
5554 lock bts [rax], edx
5555# else
5556 mov eax, [pvBitmap]
5557 mov edx, [iBit]
5558 lock bts [eax], edx
5559# endif
5560 }
5561# endif
5562}
5563#endif
5564
5565
5566/**
5567 * Clears a bit in a bitmap.
5568 *
5569 * @param pvBitmap Pointer to the bitmap.
5570 * @param iBit The bit to clear.
5571 *
5572 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5573 * However, doing so will yield better performance as well as avoiding
5574 * traps accessing the last bits in the bitmap.
5575 */
5576#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5577DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5578#else
5579DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5580{
5581# if RT_INLINE_ASM_USES_INTRIN
5582 _bittestandreset((long *)pvBitmap, iBit);
5583
5584# elif RT_INLINE_ASM_GNU_STYLE
5585 __asm__ __volatile__("btrl %1, %0"
5586 : "=m" (*(volatile long *)pvBitmap)
5587 : "Ir" (iBit),
5588 "m" (*(volatile long *)pvBitmap)
5589 : "memory");
5590# else
5591 __asm
5592 {
5593# ifdef RT_ARCH_AMD64
5594 mov rax, [pvBitmap]
5595 mov edx, [iBit]
5596 btr [rax], edx
5597# else
5598 mov eax, [pvBitmap]
5599 mov edx, [iBit]
5600 btr [eax], edx
5601# endif
5602 }
5603# endif
5604}
5605#endif
5606
5607
5608/**
5609 * Atomically clears a bit in a bitmap, ordered.
5610 *
5611 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5612 * the memory access isn't atomic!
5613 * @param iBit The bit to toggle set.
5614 * @remarks No memory barrier, take care on smp.
5615 */
5616#if RT_INLINE_ASM_EXTERNAL
5617DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5618#else
5619DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5620{
5621 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5622# if RT_INLINE_ASM_GNU_STYLE
5623 __asm__ __volatile__("lock; btrl %1, %0"
5624 : "=m" (*(volatile long *)pvBitmap)
5625 : "Ir" (iBit),
5626 "m" (*(volatile long *)pvBitmap)
5627 : "memory");
5628# else
5629 __asm
5630 {
5631# ifdef RT_ARCH_AMD64
5632 mov rax, [pvBitmap]
5633 mov edx, [iBit]
5634 lock btr [rax], edx
5635# else
5636 mov eax, [pvBitmap]
5637 mov edx, [iBit]
5638 lock btr [eax], edx
5639# endif
5640 }
5641# endif
5642}
5643#endif
5644
5645
5646/**
5647 * Toggles a bit in a bitmap.
5648 *
5649 * @param pvBitmap Pointer to the bitmap.
5650 * @param iBit The bit to toggle.
5651 *
5652 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5653 * However, doing so will yield better performance as well as avoiding
5654 * traps accessing the last bits in the bitmap.
5655 */
5656#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5657DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5658#else
5659DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5660{
5661# if RT_INLINE_ASM_USES_INTRIN
5662 _bittestandcomplement((long *)pvBitmap, iBit);
5663# elif RT_INLINE_ASM_GNU_STYLE
5664 __asm__ __volatile__("btcl %1, %0"
5665 : "=m" (*(volatile long *)pvBitmap)
5666 : "Ir" (iBit),
5667 "m" (*(volatile long *)pvBitmap)
5668 : "memory");
5669# else
5670 __asm
5671 {
5672# ifdef RT_ARCH_AMD64
5673 mov rax, [pvBitmap]
5674 mov edx, [iBit]
5675 btc [rax], edx
5676# else
5677 mov eax, [pvBitmap]
5678 mov edx, [iBit]
5679 btc [eax], edx
5680# endif
5681 }
5682# endif
5683}
5684#endif
5685
5686
5687/**
5688 * Atomically toggles a bit in a bitmap, ordered.
5689 *
5690 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5691 * the memory access isn't atomic!
5692 * @param iBit The bit to test and set.
5693 */
5694#if RT_INLINE_ASM_EXTERNAL
5695DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5696#else
5697DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5698{
5699 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5700# if RT_INLINE_ASM_GNU_STYLE
5701 __asm__ __volatile__("lock; btcl %1, %0"
5702 : "=m" (*(volatile long *)pvBitmap)
5703 : "Ir" (iBit),
5704 "m" (*(volatile long *)pvBitmap)
5705 : "memory");
5706# else
5707 __asm
5708 {
5709# ifdef RT_ARCH_AMD64
5710 mov rax, [pvBitmap]
5711 mov edx, [iBit]
5712 lock btc [rax], edx
5713# else
5714 mov eax, [pvBitmap]
5715 mov edx, [iBit]
5716 lock btc [eax], edx
5717# endif
5718 }
5719# endif
5720}
5721#endif
5722
5723
5724/**
5725 * Tests and sets a bit in a bitmap.
5726 *
5727 * @returns true if the bit was set.
5728 * @returns false if the bit was clear.
5729 *
5730 * @param pvBitmap Pointer to the bitmap.
5731 * @param iBit The bit to test and set.
5732 *
5733 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5734 * However, doing so will yield better performance as well as avoiding
5735 * traps accessing the last bits in the bitmap.
5736 */
5737#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5738DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5739#else
5740DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5741{
5742 union { bool f; uint32_t u32; uint8_t u8; } rc;
5743# if RT_INLINE_ASM_USES_INTRIN
5744 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5745
5746# elif RT_INLINE_ASM_GNU_STYLE
5747 __asm__ __volatile__("btsl %2, %1\n\t"
5748 "setc %b0\n\t"
5749 "andl $1, %0\n\t"
5750 : "=q" (rc.u32),
5751 "=m" (*(volatile long *)pvBitmap)
5752 : "Ir" (iBit),
5753 "m" (*(volatile long *)pvBitmap)
5754 : "memory");
5755# else
5756 __asm
5757 {
5758 mov edx, [iBit]
5759# ifdef RT_ARCH_AMD64
5760 mov rax, [pvBitmap]
5761 bts [rax], edx
5762# else
5763 mov eax, [pvBitmap]
5764 bts [eax], edx
5765# endif
5766 setc al
5767 and eax, 1
5768 mov [rc.u32], eax
5769 }
5770# endif
5771 return rc.f;
5772}
5773#endif
5774
5775
5776/**
5777 * Atomically tests and sets a bit in a bitmap, ordered.
5778 *
5779 * @returns true if the bit was set.
5780 * @returns false if the bit was clear.
5781 *
5782 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5783 * the memory access isn't atomic!
5784 * @param iBit The bit to set.
5785 */
5786#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5787DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5788#else
5789DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5790{
5791 union { bool f; uint32_t u32; uint8_t u8; } rc;
5792 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5793# if RT_INLINE_ASM_USES_INTRIN
5794 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5795# elif RT_INLINE_ASM_GNU_STYLE
5796 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5797 "setc %b0\n\t"
5798 "andl $1, %0\n\t"
5799 : "=q" (rc.u32),
5800 "=m" (*(volatile long *)pvBitmap)
5801 : "Ir" (iBit),
5802 "m" (*(volatile long *)pvBitmap)
5803 : "memory");
5804# else
5805 __asm
5806 {
5807 mov edx, [iBit]
5808# ifdef RT_ARCH_AMD64
5809 mov rax, [pvBitmap]
5810 lock bts [rax], edx
5811# else
5812 mov eax, [pvBitmap]
5813 lock bts [eax], edx
5814# endif
5815 setc al
5816 and eax, 1
5817 mov [rc.u32], eax
5818 }
5819# endif
5820 return rc.f;
5821}
5822#endif
5823
5824
5825/**
5826 * Tests and clears a bit in a bitmap.
5827 *
5828 * @returns true if the bit was set.
5829 * @returns false if the bit was clear.
5830 *
5831 * @param pvBitmap Pointer to the bitmap.
5832 * @param iBit The bit to test and clear.
5833 *
5834 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5835 * However, doing so will yield better performance as well as avoiding
5836 * traps accessing the last bits in the bitmap.
5837 */
5838#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5839DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5840#else
5841DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5842{
5843 union { bool f; uint32_t u32; uint8_t u8; } rc;
5844# if RT_INLINE_ASM_USES_INTRIN
5845 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5846
5847# elif RT_INLINE_ASM_GNU_STYLE
5848 __asm__ __volatile__("btrl %2, %1\n\t"
5849 "setc %b0\n\t"
5850 "andl $1, %0\n\t"
5851 : "=q" (rc.u32),
5852 "=m" (*(volatile long *)pvBitmap)
5853 : "Ir" (iBit),
5854 "m" (*(volatile long *)pvBitmap)
5855 : "memory");
5856# else
5857 __asm
5858 {
5859 mov edx, [iBit]
5860# ifdef RT_ARCH_AMD64
5861 mov rax, [pvBitmap]
5862 btr [rax], edx
5863# else
5864 mov eax, [pvBitmap]
5865 btr [eax], edx
5866# endif
5867 setc al
5868 and eax, 1
5869 mov [rc.u32], eax
5870 }
5871# endif
5872 return rc.f;
5873}
5874#endif
5875
5876
5877/**
5878 * Atomically tests and clears a bit in a bitmap, ordered.
5879 *
5880 * @returns true if the bit was set.
5881 * @returns false if the bit was clear.
5882 *
5883 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5884 * the memory access isn't atomic!
5885 * @param iBit The bit to test and clear.
5886 *
5887 * @remarks No memory barrier, take care on smp.
5888 */
5889#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5890DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5891#else
5892DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5893{
5894 union { bool f; uint32_t u32; uint8_t u8; } rc;
5895 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5896# if RT_INLINE_ASM_USES_INTRIN
5897 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5898
5899# elif RT_INLINE_ASM_GNU_STYLE
5900 __asm__ __volatile__("lock; btrl %2, %1\n\t"
5901 "setc %b0\n\t"
5902 "andl $1, %0\n\t"
5903 : "=q" (rc.u32),
5904 "=m" (*(volatile long *)pvBitmap)
5905 : "Ir" (iBit),
5906 "m" (*(volatile long *)pvBitmap)
5907 : "memory");
5908# else
5909 __asm
5910 {
5911 mov edx, [iBit]
5912# ifdef RT_ARCH_AMD64
5913 mov rax, [pvBitmap]
5914 lock btr [rax], edx
5915# else
5916 mov eax, [pvBitmap]
5917 lock btr [eax], edx
5918# endif
5919 setc al
5920 and eax, 1
5921 mov [rc.u32], eax
5922 }
5923# endif
5924 return rc.f;
5925}
5926#endif
5927
5928
5929/**
5930 * Tests and toggles a bit in a bitmap.
5931 *
5932 * @returns true if the bit was set.
5933 * @returns false if the bit was clear.
5934 *
5935 * @param pvBitmap Pointer to the bitmap.
5936 * @param iBit The bit to test and toggle.
5937 *
5938 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5939 * However, doing so will yield better performance as well as avoiding
5940 * traps accessing the last bits in the bitmap.
5941 */
5942#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5943DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5944#else
5945DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5946{
5947 union { bool f; uint32_t u32; uint8_t u8; } rc;
5948# if RT_INLINE_ASM_USES_INTRIN
5949 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5950
5951# elif RT_INLINE_ASM_GNU_STYLE
5952 __asm__ __volatile__("btcl %2, %1\n\t"
5953 "setc %b0\n\t"
5954 "andl $1, %0\n\t"
5955 : "=q" (rc.u32),
5956 "=m" (*(volatile long *)pvBitmap)
5957 : "Ir" (iBit),
5958 "m" (*(volatile long *)pvBitmap)
5959 : "memory");
5960# else
5961 __asm
5962 {
5963 mov edx, [iBit]
5964# ifdef RT_ARCH_AMD64
5965 mov rax, [pvBitmap]
5966 btc [rax], edx
5967# else
5968 mov eax, [pvBitmap]
5969 btc [eax], edx
5970# endif
5971 setc al
5972 and eax, 1
5973 mov [rc.u32], eax
5974 }
5975# endif
5976 return rc.f;
5977}
5978#endif
5979
5980
5981/**
5982 * Atomically tests and toggles a bit in a bitmap, ordered.
5983 *
5984 * @returns true if the bit was set.
5985 * @returns false if the bit was clear.
5986 *
5987 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5988 * the memory access isn't atomic!
5989 * @param iBit The bit to test and toggle.
5990 */
5991#if RT_INLINE_ASM_EXTERNAL
5992DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5993#else
5994DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5995{
5996 union { bool f; uint32_t u32; uint8_t u8; } rc;
5997 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5998# if RT_INLINE_ASM_GNU_STYLE
5999 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6000 "setc %b0\n\t"
6001 "andl $1, %0\n\t"
6002 : "=q" (rc.u32),
6003 "=m" (*(volatile long *)pvBitmap)
6004 : "Ir" (iBit),
6005 "m" (*(volatile long *)pvBitmap)
6006 : "memory");
6007# else
6008 __asm
6009 {
6010 mov edx, [iBit]
6011# ifdef RT_ARCH_AMD64
6012 mov rax, [pvBitmap]
6013 lock btc [rax], edx
6014# else
6015 mov eax, [pvBitmap]
6016 lock btc [eax], edx
6017# endif
6018 setc al
6019 and eax, 1
6020 mov [rc.u32], eax
6021 }
6022# endif
6023 return rc.f;
6024}
6025#endif
6026
6027
6028/**
6029 * Tests if a bit in a bitmap is set.
6030 *
6031 * @returns true if the bit is set.
6032 * @returns false if the bit is clear.
6033 *
6034 * @param pvBitmap Pointer to the bitmap.
6035 * @param iBit The bit to test.
6036 *
6037 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6038 * However, doing so will yield better performance as well as avoiding
6039 * traps accessing the last bits in the bitmap.
6040 */
6041#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6042DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
6043#else
6044DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
6045{
6046 union { bool f; uint32_t u32; uint8_t u8; } rc;
6047# if RT_INLINE_ASM_USES_INTRIN
6048 rc.u32 = _bittest((long *)pvBitmap, iBit);
6049# elif RT_INLINE_ASM_GNU_STYLE
6050
6051 __asm__ __volatile__("btl %2, %1\n\t"
6052 "setc %b0\n\t"
6053 "andl $1, %0\n\t"
6054 : "=q" (rc.u32)
6055 : "m" (*(const volatile long *)pvBitmap),
6056 "Ir" (iBit)
6057 : "memory");
6058# else
6059 __asm
6060 {
6061 mov edx, [iBit]
6062# ifdef RT_ARCH_AMD64
6063 mov rax, [pvBitmap]
6064 bt [rax], edx
6065# else
6066 mov eax, [pvBitmap]
6067 bt [eax], edx
6068# endif
6069 setc al
6070 and eax, 1
6071 mov [rc.u32], eax
6072 }
6073# endif
6074 return rc.f;
6075}
6076#endif
6077
6078
6079/**
6080 * Clears a bit range within a bitmap.
6081 *
6082 * @param pvBitmap Pointer to the bitmap.
6083 * @param iBitStart The First bit to clear.
6084 * @param iBitEnd The first bit not to clear.
6085 */
6086DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6087{
6088 if (iBitStart < iBitEnd)
6089 {
6090 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6091 int iStart = iBitStart & ~31;
6092 int iEnd = iBitEnd & ~31;
6093 if (iStart == iEnd)
6094 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
6095 else
6096 {
6097 /* bits in first dword. */
6098 if (iBitStart & 31)
6099 {
6100 *pu32 &= (1 << (iBitStart & 31)) - 1;
6101 pu32++;
6102 iBitStart = iStart + 32;
6103 }
6104
6105 /* whole dword. */
6106 if (iBitStart != iEnd)
6107 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6108
6109 /* bits in last dword. */
6110 if (iBitEnd & 31)
6111 {
6112 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6113 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
6114 }
6115 }
6116 }
6117}
6118
6119
6120/**
6121 * Sets a bit range within a bitmap.
6122 *
6123 * @param pvBitmap Pointer to the bitmap.
6124 * @param iBitStart The First bit to set.
6125 * @param iBitEnd The first bit not to set.
6126 */
6127DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6128{
6129 if (iBitStart < iBitEnd)
6130 {
6131 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6132 int iStart = iBitStart & ~31;
6133 int iEnd = iBitEnd & ~31;
6134 if (iStart == iEnd)
6135 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
6136 else
6137 {
6138 /* bits in first dword. */
6139 if (iBitStart & 31)
6140 {
6141 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
6142 pu32++;
6143 iBitStart = iStart + 32;
6144 }
6145
6146 /* whole dword. */
6147 if (iBitStart != iEnd)
6148 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
6149
6150 /* bits in last dword. */
6151 if (iBitEnd & 31)
6152 {
6153 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6154 *pu32 |= (1 << (iBitEnd & 31)) - 1;
6155 }
6156 }
6157 }
6158}
6159
6160
6161/**
6162 * Finds the first clear bit in a bitmap.
6163 *
6164 * @returns Index of the first zero bit.
6165 * @returns -1 if no clear bit was found.
6166 * @param pvBitmap Pointer to the bitmap.
6167 * @param cBits The number of bits in the bitmap. Multiple of 32.
6168 */
6169#if RT_INLINE_ASM_EXTERNAL
6170DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
6171#else
6172DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
6173{
6174 if (cBits)
6175 {
6176 int32_t iBit;
6177# if RT_INLINE_ASM_GNU_STYLE
6178 RTCCUINTREG uEAX, uECX, uEDI;
6179 cBits = RT_ALIGN_32(cBits, 32);
6180 __asm__ __volatile__("repe; scasl\n\t"
6181 "je 1f\n\t"
6182# ifdef RT_ARCH_AMD64
6183 "lea -4(%%rdi), %%rdi\n\t"
6184 "xorl (%%rdi), %%eax\n\t"
6185 "subq %5, %%rdi\n\t"
6186# else
6187 "lea -4(%%edi), %%edi\n\t"
6188 "xorl (%%edi), %%eax\n\t"
6189 "subl %5, %%edi\n\t"
6190# endif
6191 "shll $3, %%edi\n\t"
6192 "bsfl %%eax, %%edx\n\t"
6193 "addl %%edi, %%edx\n\t"
6194 "1:\t\n"
6195 : "=d" (iBit),
6196 "=&c" (uECX),
6197 "=&D" (uEDI),
6198 "=&a" (uEAX)
6199 : "0" (0xffffffff),
6200 "mr" (pvBitmap),
6201 "1" (cBits >> 5),
6202 "2" (pvBitmap),
6203 "3" (0xffffffff));
6204# else
6205 cBits = RT_ALIGN_32(cBits, 32);
6206 __asm
6207 {
6208# ifdef RT_ARCH_AMD64
6209 mov rdi, [pvBitmap]
6210 mov rbx, rdi
6211# else
6212 mov edi, [pvBitmap]
6213 mov ebx, edi
6214# endif
6215 mov edx, 0ffffffffh
6216 mov eax, edx
6217 mov ecx, [cBits]
6218 shr ecx, 5
6219 repe scasd
6220 je done
6221
6222# ifdef RT_ARCH_AMD64
6223 lea rdi, [rdi - 4]
6224 xor eax, [rdi]
6225 sub rdi, rbx
6226# else
6227 lea edi, [edi - 4]
6228 xor eax, [edi]
6229 sub edi, ebx
6230# endif
6231 shl edi, 3
6232 bsf edx, eax
6233 add edx, edi
6234 done:
6235 mov [iBit], edx
6236 }
6237# endif
6238 return iBit;
6239 }
6240 return -1;
6241}
6242#endif
6243
6244
6245/**
6246 * Finds the next clear bit in a bitmap.
6247 *
6248 * @returns Index of the first zero bit.
6249 * @returns -1 if no clear bit was found.
6250 * @param pvBitmap Pointer to the bitmap.
6251 * @param cBits The number of bits in the bitmap. Multiple of 32.
6252 * @param iBitPrev The bit returned from the last search.
6253 * The search will start at iBitPrev + 1.
6254 */
6255#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6256DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6257#else
6258DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6259{
6260 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6261 int iBit = ++iBitPrev & 31;
6262 if (iBit)
6263 {
6264 /*
6265 * Inspect the 32-bit word containing the unaligned bit.
6266 */
6267 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6268
6269# if RT_INLINE_ASM_USES_INTRIN
6270 unsigned long ulBit = 0;
6271 if (_BitScanForward(&ulBit, u32))
6272 return ulBit + iBitPrev;
6273# else
6274# if RT_INLINE_ASM_GNU_STYLE
6275 __asm__ __volatile__("bsf %1, %0\n\t"
6276 "jnz 1f\n\t"
6277 "movl $-1, %0\n\t"
6278 "1:\n\t"
6279 : "=r" (iBit)
6280 : "r" (u32));
6281# else
6282 __asm
6283 {
6284 mov edx, [u32]
6285 bsf eax, edx
6286 jnz done
6287 mov eax, 0ffffffffh
6288 done:
6289 mov [iBit], eax
6290 }
6291# endif
6292 if (iBit >= 0)
6293 return iBit + iBitPrev;
6294# endif
6295
6296 /*
6297 * Skip ahead and see if there is anything left to search.
6298 */
6299 iBitPrev |= 31;
6300 iBitPrev++;
6301 if (cBits <= (uint32_t)iBitPrev)
6302 return -1;
6303 }
6304
6305 /*
6306 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6307 */
6308 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6309 if (iBit >= 0)
6310 iBit += iBitPrev;
6311 return iBit;
6312}
6313#endif
6314
6315
6316/**
6317 * Finds the first set bit in a bitmap.
6318 *
6319 * @returns Index of the first set bit.
6320 * @returns -1 if no clear bit was found.
6321 * @param pvBitmap Pointer to the bitmap.
6322 * @param cBits The number of bits in the bitmap. Multiple of 32.
6323 */
6324#if RT_INLINE_ASM_EXTERNAL
6325DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6326#else
6327DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6328{
6329 if (cBits)
6330 {
6331 int32_t iBit;
6332# if RT_INLINE_ASM_GNU_STYLE
6333 RTCCUINTREG uEAX, uECX, uEDI;
6334 cBits = RT_ALIGN_32(cBits, 32);
6335 __asm__ __volatile__("repe; scasl\n\t"
6336 "je 1f\n\t"
6337# ifdef RT_ARCH_AMD64
6338 "lea -4(%%rdi), %%rdi\n\t"
6339 "movl (%%rdi), %%eax\n\t"
6340 "subq %5, %%rdi\n\t"
6341# else
6342 "lea -4(%%edi), %%edi\n\t"
6343 "movl (%%edi), %%eax\n\t"
6344 "subl %5, %%edi\n\t"
6345# endif
6346 "shll $3, %%edi\n\t"
6347 "bsfl %%eax, %%edx\n\t"
6348 "addl %%edi, %%edx\n\t"
6349 "1:\t\n"
6350 : "=d" (iBit),
6351 "=&c" (uECX),
6352 "=&D" (uEDI),
6353 "=&a" (uEAX)
6354 : "0" (0xffffffff),
6355 "mr" (pvBitmap),
6356 "1" (cBits >> 5),
6357 "2" (pvBitmap),
6358 "3" (0));
6359# else
6360 cBits = RT_ALIGN_32(cBits, 32);
6361 __asm
6362 {
6363# ifdef RT_ARCH_AMD64
6364 mov rdi, [pvBitmap]
6365 mov rbx, rdi
6366# else
6367 mov edi, [pvBitmap]
6368 mov ebx, edi
6369# endif
6370 mov edx, 0ffffffffh
6371 xor eax, eax
6372 mov ecx, [cBits]
6373 shr ecx, 5
6374 repe scasd
6375 je done
6376# ifdef RT_ARCH_AMD64
6377 lea rdi, [rdi - 4]
6378 mov eax, [rdi]
6379 sub rdi, rbx
6380# else
6381 lea edi, [edi - 4]
6382 mov eax, [edi]
6383 sub edi, ebx
6384# endif
6385 shl edi, 3
6386 bsf edx, eax
6387 add edx, edi
6388 done:
6389 mov [iBit], edx
6390 }
6391# endif
6392 return iBit;
6393 }
6394 return -1;
6395}
6396#endif
6397
6398
6399/**
6400 * Finds the next set bit in a bitmap.
6401 *
6402 * @returns Index of the next set bit.
6403 * @returns -1 if no set bit was found.
6404 * @param pvBitmap Pointer to the bitmap.
6405 * @param cBits The number of bits in the bitmap. Multiple of 32.
6406 * @param iBitPrev The bit returned from the last search.
6407 * The search will start at iBitPrev + 1.
6408 */
6409#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6410DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6411#else
6412DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6413{
6414 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6415 int iBit = ++iBitPrev & 31;
6416 if (iBit)
6417 {
6418 /*
6419 * Inspect the 32-bit word containing the unaligned bit.
6420 */
6421 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6422
6423# if RT_INLINE_ASM_USES_INTRIN
6424 unsigned long ulBit = 0;
6425 if (_BitScanForward(&ulBit, u32))
6426 return ulBit + iBitPrev;
6427# else
6428# if RT_INLINE_ASM_GNU_STYLE
6429 __asm__ __volatile__("bsf %1, %0\n\t"
6430 "jnz 1f\n\t"
6431 "movl $-1, %0\n\t"
6432 "1:\n\t"
6433 : "=r" (iBit)
6434 : "r" (u32));
6435# else
6436 __asm
6437 {
6438 mov edx, [u32]
6439 bsf eax, edx
6440 jnz done
6441 mov eax, 0ffffffffh
6442 done:
6443 mov [iBit], eax
6444 }
6445# endif
6446 if (iBit >= 0)
6447 return iBit + iBitPrev;
6448# endif
6449
6450 /*
6451 * Skip ahead and see if there is anything left to search.
6452 */
6453 iBitPrev |= 31;
6454 iBitPrev++;
6455 if (cBits <= (uint32_t)iBitPrev)
6456 return -1;
6457 }
6458
6459 /*
6460 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6461 */
6462 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6463 if (iBit >= 0)
6464 iBit += iBitPrev;
6465 return iBit;
6466}
6467#endif
6468
6469
6470/**
6471 * Finds the first bit which is set in the given 32-bit integer.
6472 * Bits are numbered from 1 (least significant) to 32.
6473 *
6474 * @returns index [1..32] of the first set bit.
6475 * @returns 0 if all bits are cleared.
6476 * @param u32 Integer to search for set bits.
6477 * @remark Similar to ffs() in BSD.
6478 */
6479DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6480{
6481# if RT_INLINE_ASM_USES_INTRIN
6482 unsigned long iBit;
6483 if (_BitScanForward(&iBit, u32))
6484 iBit++;
6485 else
6486 iBit = 0;
6487# elif RT_INLINE_ASM_GNU_STYLE
6488 uint32_t iBit;
6489 __asm__ __volatile__("bsf %1, %0\n\t"
6490 "jnz 1f\n\t"
6491 "xorl %0, %0\n\t"
6492 "jmp 2f\n"
6493 "1:\n\t"
6494 "incl %0\n"
6495 "2:\n\t"
6496 : "=r" (iBit)
6497 : "rm" (u32));
6498# else
6499 uint32_t iBit;
6500 _asm
6501 {
6502 bsf eax, [u32]
6503 jnz found
6504 xor eax, eax
6505 jmp done
6506 found:
6507 inc eax
6508 done:
6509 mov [iBit], eax
6510 }
6511# endif
6512 return iBit;
6513}
6514
6515
6516/**
6517 * Finds the first bit which is set in the given 32-bit integer.
6518 * Bits are numbered from 1 (least significant) to 32.
6519 *
6520 * @returns index [1..32] of the first set bit.
6521 * @returns 0 if all bits are cleared.
6522 * @param i32 Integer to search for set bits.
6523 * @remark Similar to ffs() in BSD.
6524 */
6525DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6526{
6527 return ASMBitFirstSetU32((uint32_t)i32);
6528}
6529
6530
6531/**
6532 * Finds the last bit which is set in the given 32-bit integer.
6533 * Bits are numbered from 1 (least significant) to 32.
6534 *
6535 * @returns index [1..32] of the last set bit.
6536 * @returns 0 if all bits are cleared.
6537 * @param u32 Integer to search for set bits.
6538 * @remark Similar to fls() in BSD.
6539 */
6540DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6541{
6542# if RT_INLINE_ASM_USES_INTRIN
6543 unsigned long iBit;
6544 if (_BitScanReverse(&iBit, u32))
6545 iBit++;
6546 else
6547 iBit = 0;
6548# elif RT_INLINE_ASM_GNU_STYLE
6549 uint32_t iBit;
6550 __asm__ __volatile__("bsrl %1, %0\n\t"
6551 "jnz 1f\n\t"
6552 "xorl %0, %0\n\t"
6553 "jmp 2f\n"
6554 "1:\n\t"
6555 "incl %0\n"
6556 "2:\n\t"
6557 : "=r" (iBit)
6558 : "rm" (u32));
6559# else
6560 uint32_t iBit;
6561 _asm
6562 {
6563 bsr eax, [u32]
6564 jnz found
6565 xor eax, eax
6566 jmp done
6567 found:
6568 inc eax
6569 done:
6570 mov [iBit], eax
6571 }
6572# endif
6573 return iBit;
6574}
6575
6576
6577/**
6578 * Finds the last bit which is set in the given 32-bit integer.
6579 * Bits are numbered from 1 (least significant) to 32.
6580 *
6581 * @returns index [1..32] of the last set bit.
6582 * @returns 0 if all bits are cleared.
6583 * @param i32 Integer to search for set bits.
6584 * @remark Similar to fls() in BSD.
6585 */
6586DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6587{
6588 return ASMBitLastSetU32((uint32_t)i32);
6589}
6590
6591/**
6592 * Reverse the byte order of the given 16-bit integer.
6593 *
6594 * @returns Revert
6595 * @param u16 16-bit integer value.
6596 */
6597DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6598{
6599#if RT_INLINE_ASM_USES_INTRIN
6600 u16 = _byteswap_ushort(u16);
6601#elif RT_INLINE_ASM_GNU_STYLE
6602 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6603#else
6604 _asm
6605 {
6606 mov ax, [u16]
6607 ror ax, 8
6608 mov [u16], ax
6609 }
6610#endif
6611 return u16;
6612}
6613
6614/**
6615 * Reverse the byte order of the given 32-bit integer.
6616 *
6617 * @returns Revert
6618 * @param u32 32-bit integer value.
6619 */
6620DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6621{
6622#if RT_INLINE_ASM_USES_INTRIN
6623 u32 = _byteswap_ulong(u32);
6624#elif RT_INLINE_ASM_GNU_STYLE
6625 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6626#else
6627 _asm
6628 {
6629 mov eax, [u32]
6630 bswap eax
6631 mov [u32], eax
6632 }
6633#endif
6634 return u32;
6635}
6636
6637
6638/**
6639 * Reverse the byte order of the given 64-bit integer.
6640 *
6641 * @returns Revert
6642 * @param u64 64-bit integer value.
6643 */
6644DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6645{
6646#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6647 u64 = _byteswap_uint64(u64);
6648#else
6649 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6650 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6651#endif
6652 return u64;
6653}
6654
6655
6656/** @} */
6657
6658
6659/** @} */
6660#endif
6661
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette