VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 21095

Last change on this file since 21095 was 20789, checked in by vboxsync, 16 years ago

iprt/asm.h: larger notes about alignment.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 169.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__wbinvd)
73# pragma intrinsic(__stosd)
74# pragma intrinsic(__stosw)
75# pragma intrinsic(__stosb)
76# pragma intrinsic(__readcr0)
77# pragma intrinsic(__readcr2)
78# pragma intrinsic(__readcr3)
79# pragma intrinsic(__readcr4)
80# pragma intrinsic(__writecr0)
81# pragma intrinsic(__writecr3)
82# pragma intrinsic(__writecr4)
83# pragma intrinsic(__readdr)
84# pragma intrinsic(__writedr)
85# pragma intrinsic(_BitScanForward)
86# pragma intrinsic(_BitScanReverse)
87# pragma intrinsic(_bittest)
88# pragma intrinsic(_bittestandset)
89# pragma intrinsic(_bittestandreset)
90# pragma intrinsic(_bittestandcomplement)
91# pragma intrinsic(_byteswap_ushort)
92# pragma intrinsic(_byteswap_ulong)
93# pragma intrinsic(_interlockedbittestandset)
94# pragma intrinsic(_interlockedbittestandreset)
95# pragma intrinsic(_InterlockedAnd)
96# pragma intrinsic(_InterlockedOr)
97# pragma intrinsic(_InterlockedIncrement)
98# pragma intrinsic(_InterlockedDecrement)
99# pragma intrinsic(_InterlockedExchange)
100# pragma intrinsic(_InterlockedExchangeAdd)
101# pragma intrinsic(_InterlockedCompareExchange)
102# pragma intrinsic(_InterlockedCompareExchange64)
103# ifdef RT_ARCH_AMD64
104# pragma intrinsic(__stosq)
105# pragma intrinsic(__readcr8)
106# pragma intrinsic(__writecr8)
107# pragma intrinsic(_byteswap_uint64)
108# pragma intrinsic(_InterlockedExchange64)
109# endif
110# endif
111#endif
112#ifndef RT_INLINE_ASM_USES_INTRIN
113# define RT_INLINE_ASM_USES_INTRIN 0
114#endif
115
116/** @def RT_INLINE_ASM_GCC_4_3_X_X86
117 * Used to work around some 4.3.x register allocation issues in this version of
118 * the compiler. */
119#ifdef __GNUC__
120# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ == 3 && defined(__i386__))
121#endif
122#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
123# define RT_INLINE_ASM_GCC_4_3_X_X86 0
124#endif
125
126
127
128/** @defgroup grp_asm ASM - Assembly Routines
129 * @ingroup grp_rt
130 *
131 * @remarks The difference between ordered and unordered atomic operations are that
132 * the former will complete outstanding reads and writes before continuing
133 * while the latter doesn't make any promisses about the order. Ordered
134 * operations doesn't, it seems, make any 100% promise wrt to whether
135 * the operation will complete before any subsequent memory access.
136 * (please, correct if wrong.)
137 *
138 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
139 * are unordered (note the Uo).
140 *
141 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
142 * or even optimize assembler instructions away. For instance, in the following code
143 * the second rdmsr instruction is optimized away because gcc treats that instruction
144 * as deterministic:
145 *
146 * @code
147 * static inline uint64_t rdmsr_low(int idx)
148 * {
149 * uint32_t low;
150 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
151 * }
152 * ...
153 * uint32_t msr1 = rdmsr_low(1);
154 * foo(msr1);
155 * msr1 = rdmsr_low(1);
156 * bar(msr1);
157 * @endcode
158 *
159 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
160 * use the result of the first call as input parameter for bar() as well. For rdmsr this
161 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
162 * machine status information in general.
163 *
164 * @{
165 */
166
167/** @def RT_INLINE_ASM_EXTERNAL
168 * Defined as 1 if the compiler does not support inline assembly.
169 * The ASM* functions will then be implemented in an external .asm file.
170 *
171 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
172 * inline assembly in their AMD64 compiler.
173 */
174#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
175# define RT_INLINE_ASM_EXTERNAL 1
176#else
177# define RT_INLINE_ASM_EXTERNAL 0
178#endif
179
180/** @def RT_INLINE_ASM_GNU_STYLE
181 * Defined as 1 if the compiler understands GNU style inline assembly.
182 */
183#if defined(_MSC_VER)
184# define RT_INLINE_ASM_GNU_STYLE 0
185#else
186# define RT_INLINE_ASM_GNU_STYLE 1
187#endif
188
189
190/** @todo find a more proper place for this structure? */
191#pragma pack(1)
192/** IDTR */
193typedef struct RTIDTR
194{
195 /** Size of the IDT. */
196 uint16_t cbIdt;
197 /** Address of the IDT. */
198 uintptr_t pIdt;
199} RTIDTR, *PRTIDTR;
200#pragma pack()
201
202#pragma pack(1)
203/** GDTR */
204typedef struct RTGDTR
205{
206 /** Size of the GDT. */
207 uint16_t cbGdt;
208 /** Address of the GDT. */
209 uintptr_t pGdt;
210} RTGDTR, *PRTGDTR;
211#pragma pack()
212
213
214/** @def ASMReturnAddress
215 * Gets the return address of the current (or calling if you like) function or method.
216 */
217#ifdef _MSC_VER
218# ifdef __cplusplus
219extern "C"
220# endif
221void * _ReturnAddress(void);
222# pragma intrinsic(_ReturnAddress)
223# define ASMReturnAddress() _ReturnAddress()
224#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
225# define ASMReturnAddress() __builtin_return_address(0)
226#else
227# error "Unsupported compiler."
228#endif
229
230
231/**
232 * Gets the content of the IDTR CPU register.
233 * @param pIdtr Where to store the IDTR contents.
234 */
235#if RT_INLINE_ASM_EXTERNAL
236DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
237#else
238DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
239{
240# if RT_INLINE_ASM_GNU_STYLE
241 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
242# else
243 __asm
244 {
245# ifdef RT_ARCH_AMD64
246 mov rax, [pIdtr]
247 sidt [rax]
248# else
249 mov eax, [pIdtr]
250 sidt [eax]
251# endif
252 }
253# endif
254}
255#endif
256
257
258/**
259 * Sets the content of the IDTR CPU register.
260 * @param pIdtr Where to load the IDTR contents from
261 */
262#if RT_INLINE_ASM_EXTERNAL
263DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
264#else
265DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
266{
267# if RT_INLINE_ASM_GNU_STYLE
268 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
269# else
270 __asm
271 {
272# ifdef RT_ARCH_AMD64
273 mov rax, [pIdtr]
274 lidt [rax]
275# else
276 mov eax, [pIdtr]
277 lidt [eax]
278# endif
279 }
280# endif
281}
282#endif
283
284
285/**
286 * Gets the content of the GDTR CPU register.
287 * @param pGdtr Where to store the GDTR contents.
288 */
289#if RT_INLINE_ASM_EXTERNAL
290DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
291#else
292DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
293{
294# if RT_INLINE_ASM_GNU_STYLE
295 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
296# else
297 __asm
298 {
299# ifdef RT_ARCH_AMD64
300 mov rax, [pGdtr]
301 sgdt [rax]
302# else
303 mov eax, [pGdtr]
304 sgdt [eax]
305# endif
306 }
307# endif
308}
309#endif
310
311/**
312 * Get the cs register.
313 * @returns cs.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetCS(void);
317#else
318DECLINLINE(RTSEL) ASMGetCS(void)
319{
320 RTSEL SelCS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
323# else
324 __asm
325 {
326 mov ax, cs
327 mov [SelCS], ax
328 }
329# endif
330 return SelCS;
331}
332#endif
333
334
335/**
336 * Get the DS register.
337 * @returns DS.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetDS(void);
341#else
342DECLINLINE(RTSEL) ASMGetDS(void)
343{
344 RTSEL SelDS;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
347# else
348 __asm
349 {
350 mov ax, ds
351 mov [SelDS], ax
352 }
353# endif
354 return SelDS;
355}
356#endif
357
358
359/**
360 * Get the ES register.
361 * @returns ES.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetES(void);
365#else
366DECLINLINE(RTSEL) ASMGetES(void)
367{
368 RTSEL SelES;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
371# else
372 __asm
373 {
374 mov ax, es
375 mov [SelES], ax
376 }
377# endif
378 return SelES;
379}
380#endif
381
382
383/**
384 * Get the FS register.
385 * @returns FS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetFS(void);
389#else
390DECLINLINE(RTSEL) ASMGetFS(void)
391{
392 RTSEL SelFS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
395# else
396 __asm
397 {
398 mov ax, fs
399 mov [SelFS], ax
400 }
401# endif
402 return SelFS;
403}
404# endif
405
406
407/**
408 * Get the GS register.
409 * @returns GS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetGS(void);
413#else
414DECLINLINE(RTSEL) ASMGetGS(void)
415{
416 RTSEL SelGS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
419# else
420 __asm
421 {
422 mov ax, gs
423 mov [SelGS], ax
424 }
425# endif
426 return SelGS;
427}
428#endif
429
430
431/**
432 * Get the SS register.
433 * @returns SS.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetSS(void);
437#else
438DECLINLINE(RTSEL) ASMGetSS(void)
439{
440 RTSEL SelSS;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
443# else
444 __asm
445 {
446 mov ax, ss
447 mov [SelSS], ax
448 }
449# endif
450 return SelSS;
451}
452#endif
453
454
455/**
456 * Get the TR register.
457 * @returns TR.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTSEL) ASMGetTR(void);
461#else
462DECLINLINE(RTSEL) ASMGetTR(void)
463{
464 RTSEL SelTR;
465# if RT_INLINE_ASM_GNU_STYLE
466 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
467# else
468 __asm
469 {
470 str ax
471 mov [SelTR], ax
472 }
473# endif
474 return SelTR;
475}
476#endif
477
478
479/**
480 * Get the [RE]FLAGS register.
481 * @returns [RE]FLAGS.
482 */
483#if RT_INLINE_ASM_EXTERNAL
484DECLASM(RTCCUINTREG) ASMGetFlags(void);
485#else
486DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
487{
488 RTCCUINTREG uFlags;
489# if RT_INLINE_ASM_GNU_STYLE
490# ifdef RT_ARCH_AMD64
491 __asm__ __volatile__("pushfq\n\t"
492 "popq %0\n\t"
493 : "=g" (uFlags));
494# else
495 __asm__ __volatile__("pushfl\n\t"
496 "popl %0\n\t"
497 : "=g" (uFlags));
498# endif
499# else
500 __asm
501 {
502# ifdef RT_ARCH_AMD64
503 pushfq
504 pop [uFlags]
505# else
506 pushfd
507 pop [uFlags]
508# endif
509 }
510# endif
511 return uFlags;
512}
513#endif
514
515
516/**
517 * Set the [RE]FLAGS register.
518 * @param uFlags The new [RE]FLAGS value.
519 */
520#if RT_INLINE_ASM_EXTERNAL
521DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
522#else
523DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
524{
525# if RT_INLINE_ASM_GNU_STYLE
526# ifdef RT_ARCH_AMD64
527 __asm__ __volatile__("pushq %0\n\t"
528 "popfq\n\t"
529 : : "g" (uFlags));
530# else
531 __asm__ __volatile__("pushl %0\n\t"
532 "popfl\n\t"
533 : : "g" (uFlags));
534# endif
535# else
536 __asm
537 {
538# ifdef RT_ARCH_AMD64
539 push [uFlags]
540 popfq
541# else
542 push [uFlags]
543 popfd
544# endif
545 }
546# endif
547}
548#endif
549
550
551/**
552 * Gets the content of the CPU timestamp counter register.
553 *
554 * @returns TSC.
555 */
556#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
557DECLASM(uint64_t) ASMReadTSC(void);
558#else
559DECLINLINE(uint64_t) ASMReadTSC(void)
560{
561 RTUINT64U u;
562# if RT_INLINE_ASM_GNU_STYLE
563 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
564# else
565# if RT_INLINE_ASM_USES_INTRIN
566 u.u = __rdtsc();
567# else
568 __asm
569 {
570 rdtsc
571 mov [u.s.Lo], eax
572 mov [u.s.Hi], edx
573 }
574# endif
575# endif
576 return u.u;
577}
578#endif
579
580
581/**
582 * Performs the cpuid instruction returning all registers.
583 *
584 * @param uOperator CPUID operation (eax).
585 * @param pvEAX Where to store eax.
586 * @param pvEBX Where to store ebx.
587 * @param pvECX Where to store ecx.
588 * @param pvEDX Where to store edx.
589 * @remark We're using void pointers to ease the use of special bitfield structures and such.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
593#else
594DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
595{
596# if RT_INLINE_ASM_GNU_STYLE
597# ifdef RT_ARCH_AMD64
598 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
599 __asm__ ("cpuid\n\t"
600 : "=a" (uRAX),
601 "=b" (uRBX),
602 "=c" (uRCX),
603 "=d" (uRDX)
604 : "0" (uOperator));
605 *(uint32_t *)pvEAX = (uint32_t)uRAX;
606 *(uint32_t *)pvEBX = (uint32_t)uRBX;
607 *(uint32_t *)pvECX = (uint32_t)uRCX;
608 *(uint32_t *)pvEDX = (uint32_t)uRDX;
609# else
610 __asm__ ("xchgl %%ebx, %1\n\t"
611 "cpuid\n\t"
612 "xchgl %%ebx, %1\n\t"
613 : "=a" (*(uint32_t *)pvEAX),
614 "=r" (*(uint32_t *)pvEBX),
615 "=c" (*(uint32_t *)pvECX),
616 "=d" (*(uint32_t *)pvEDX)
617 : "0" (uOperator));
618# endif
619
620# elif RT_INLINE_ASM_USES_INTRIN
621 int aInfo[4];
622 __cpuid(aInfo, uOperator);
623 *(uint32_t *)pvEAX = aInfo[0];
624 *(uint32_t *)pvEBX = aInfo[1];
625 *(uint32_t *)pvECX = aInfo[2];
626 *(uint32_t *)pvEDX = aInfo[3];
627
628# else
629 uint32_t uEAX;
630 uint32_t uEBX;
631 uint32_t uECX;
632 uint32_t uEDX;
633 __asm
634 {
635 push ebx
636 mov eax, [uOperator]
637 cpuid
638 mov [uEAX], eax
639 mov [uEBX], ebx
640 mov [uECX], ecx
641 mov [uEDX], edx
642 pop ebx
643 }
644 *(uint32_t *)pvEAX = uEAX;
645 *(uint32_t *)pvEBX = uEBX;
646 *(uint32_t *)pvECX = uECX;
647 *(uint32_t *)pvEDX = uEDX;
648# endif
649}
650#endif
651
652
653/**
654 * Performs the cpuid instruction returning all registers.
655 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
656 *
657 * @param uOperator CPUID operation (eax).
658 * @param uIdxECX ecx index
659 * @param pvEAX Where to store eax.
660 * @param pvEBX Where to store ebx.
661 * @param pvECX Where to store ecx.
662 * @param pvEDX Where to store edx.
663 * @remark We're using void pointers to ease the use of special bitfield structures and such.
664 */
665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
666DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
667#else
668DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
669{
670# if RT_INLINE_ASM_GNU_STYLE
671# ifdef RT_ARCH_AMD64
672 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
673 __asm__ ("cpuid\n\t"
674 : "=a" (uRAX),
675 "=b" (uRBX),
676 "=c" (uRCX),
677 "=d" (uRDX)
678 : "0" (uOperator),
679 "2" (uIdxECX));
680 *(uint32_t *)pvEAX = (uint32_t)uRAX;
681 *(uint32_t *)pvEBX = (uint32_t)uRBX;
682 *(uint32_t *)pvECX = (uint32_t)uRCX;
683 *(uint32_t *)pvEDX = (uint32_t)uRDX;
684# else
685 __asm__ ("xchgl %%ebx, %1\n\t"
686 "cpuid\n\t"
687 "xchgl %%ebx, %1\n\t"
688 : "=a" (*(uint32_t *)pvEAX),
689 "=r" (*(uint32_t *)pvEBX),
690 "=c" (*(uint32_t *)pvECX),
691 "=d" (*(uint32_t *)pvEDX)
692 : "0" (uOperator),
693 "2" (uIdxECX));
694# endif
695
696# elif RT_INLINE_ASM_USES_INTRIN
697 int aInfo[4];
698 /* ??? another intrinsic ??? */
699 __cpuid(aInfo, uOperator);
700 *(uint32_t *)pvEAX = aInfo[0];
701 *(uint32_t *)pvEBX = aInfo[1];
702 *(uint32_t *)pvECX = aInfo[2];
703 *(uint32_t *)pvEDX = aInfo[3];
704
705# else
706 uint32_t uEAX;
707 uint32_t uEBX;
708 uint32_t uECX;
709 uint32_t uEDX;
710 __asm
711 {
712 push ebx
713 mov eax, [uOperator]
714 mov ecx, [uIdxECX]
715 cpuid
716 mov [uEAX], eax
717 mov [uEBX], ebx
718 mov [uECX], ecx
719 mov [uEDX], edx
720 pop ebx
721 }
722 *(uint32_t *)pvEAX = uEAX;
723 *(uint32_t *)pvEBX = uEBX;
724 *(uint32_t *)pvECX = uECX;
725 *(uint32_t *)pvEDX = uEDX;
726# endif
727}
728#endif
729
730
731/**
732 * Performs the cpuid instruction returning ecx and edx.
733 *
734 * @param uOperator CPUID operation (eax).
735 * @param pvECX Where to store ecx.
736 * @param pvEDX Where to store edx.
737 * @remark We're using void pointers to ease the use of special bitfield structures and such.
738 */
739#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
740DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
741#else
742DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
743{
744 uint32_t uEBX;
745 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
746}
747#endif
748
749
750/**
751 * Performs the cpuid instruction returning edx.
752 *
753 * @param uOperator CPUID operation (eax).
754 * @returns EDX after cpuid operation.
755 */
756#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
757DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
758#else
759DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
760{
761 RTCCUINTREG xDX;
762# if RT_INLINE_ASM_GNU_STYLE
763# ifdef RT_ARCH_AMD64
764 RTCCUINTREG uSpill;
765 __asm__ ("cpuid"
766 : "=a" (uSpill),
767 "=d" (xDX)
768 : "0" (uOperator)
769 : "rbx", "rcx");
770# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
771 __asm__ ("push %%ebx\n\t"
772 "cpuid\n\t"
773 "pop %%ebx\n\t"
774 : "=a" (uOperator),
775 "=d" (xDX)
776 : "0" (uOperator)
777 : "ecx");
778# else
779 __asm__ ("cpuid"
780 : "=a" (uOperator),
781 "=d" (xDX)
782 : "0" (uOperator)
783 : "ebx", "ecx");
784# endif
785
786# elif RT_INLINE_ASM_USES_INTRIN
787 int aInfo[4];
788 __cpuid(aInfo, uOperator);
789 xDX = aInfo[3];
790
791# else
792 __asm
793 {
794 push ebx
795 mov eax, [uOperator]
796 cpuid
797 mov [xDX], edx
798 pop ebx
799 }
800# endif
801 return (uint32_t)xDX;
802}
803#endif
804
805
806/**
807 * Performs the cpuid instruction returning ecx.
808 *
809 * @param uOperator CPUID operation (eax).
810 * @returns ECX after cpuid operation.
811 */
812#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
813DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
814#else
815DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
816{
817 RTCCUINTREG xCX;
818# if RT_INLINE_ASM_GNU_STYLE
819# ifdef RT_ARCH_AMD64
820 RTCCUINTREG uSpill;
821 __asm__ ("cpuid"
822 : "=a" (uSpill),
823 "=c" (xCX)
824 : "0" (uOperator)
825 : "rbx", "rdx");
826# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
827 __asm__ ("push %%ebx\n\t"
828 "cpuid\n\t"
829 "pop %%ebx\n\t"
830 : "=a" (uOperator),
831 "=c" (xCX)
832 : "0" (uOperator)
833 : "edx");
834# else
835 __asm__ ("cpuid"
836 : "=a" (uOperator),
837 "=c" (xCX)
838 : "0" (uOperator)
839 : "ebx", "edx");
840
841# endif
842
843# elif RT_INLINE_ASM_USES_INTRIN
844 int aInfo[4];
845 __cpuid(aInfo, uOperator);
846 xCX = aInfo[2];
847
848# else
849 __asm
850 {
851 push ebx
852 mov eax, [uOperator]
853 cpuid
854 mov [xCX], ecx
855 pop ebx
856 }
857# endif
858 return (uint32_t)xCX;
859}
860#endif
861
862
863/**
864 * Checks if the current CPU supports CPUID.
865 *
866 * @returns true if CPUID is supported.
867 */
868DECLINLINE(bool) ASMHasCpuId(void)
869{
870#ifdef RT_ARCH_AMD64
871 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
872#else /* !RT_ARCH_AMD64 */
873 bool fRet = false;
874# if RT_INLINE_ASM_GNU_STYLE
875 uint32_t u1;
876 uint32_t u2;
877 __asm__ ("pushf\n\t"
878 "pop %1\n\t"
879 "mov %1, %2\n\t"
880 "xorl $0x200000, %1\n\t"
881 "push %1\n\t"
882 "popf\n\t"
883 "pushf\n\t"
884 "pop %1\n\t"
885 "cmpl %1, %2\n\t"
886 "setne %0\n\t"
887 "push %2\n\t"
888 "popf\n\t"
889 : "=m" (fRet), "=r" (u1), "=r" (u2));
890# else
891 __asm
892 {
893 pushfd
894 pop eax
895 mov ebx, eax
896 xor eax, 0200000h
897 push eax
898 popfd
899 pushfd
900 pop eax
901 cmp eax, ebx
902 setne fRet
903 push ebx
904 popfd
905 }
906# endif
907 return fRet;
908#endif /* !RT_ARCH_AMD64 */
909}
910
911
912/**
913 * Gets the APIC ID of the current CPU.
914 *
915 * @returns the APIC ID.
916 */
917#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
918DECLASM(uint8_t) ASMGetApicId(void);
919#else
920DECLINLINE(uint8_t) ASMGetApicId(void)
921{
922 RTCCUINTREG xBX;
923# if RT_INLINE_ASM_GNU_STYLE
924# ifdef RT_ARCH_AMD64
925 RTCCUINTREG uSpill;
926 __asm__ ("cpuid"
927 : "=a" (uSpill),
928 "=b" (xBX)
929 : "0" (1)
930 : "rcx", "rdx");
931# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
932 RTCCUINTREG uSpill;
933 __asm__ ("mov %%ebx,%1\n\t"
934 "cpuid\n\t"
935 "xchgl %%ebx,%1\n\t"
936 : "=a" (uSpill),
937 "=r" (xBX)
938 : "0" (1)
939 : "ecx", "edx");
940# else
941 RTCCUINTREG uSpill;
942 __asm__ ("cpuid"
943 : "=a" (uSpill),
944 "=b" (xBX)
945 : "0" (1)
946 : "ecx", "edx");
947# endif
948
949# elif RT_INLINE_ASM_USES_INTRIN
950 int aInfo[4];
951 __cpuid(aInfo, 1);
952 xBX = aInfo[1];
953
954# else
955 __asm
956 {
957 push ebx
958 mov eax, 1
959 cpuid
960 mov [xBX], ebx
961 pop ebx
962 }
963# endif
964 return (uint8_t)(xBX >> 24);
965}
966#endif
967
968
969/**
970 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
971 *
972 * @returns true/false.
973 * @param uEBX EBX return from ASMCpuId(0)
974 * @param uECX ECX return from ASMCpuId(0)
975 * @param uEDX EDX return from ASMCpuId(0)
976 */
977DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
978{
979 return uEBX == 0x756e6547
980 && uECX == 0x6c65746e
981 && uEDX == 0x49656e69;
982}
983
984
985/**
986 * Tests if this is an genuin Intel CPU.
987 *
988 * @returns true/false.
989 */
990DECLINLINE(bool) ASMIsIntelCpu(void)
991{
992 uint32_t uEAX, uEBX, uECX, uEDX;
993 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
994 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
995}
996
997
998/**
999 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
1000 *
1001 * @returns Family.
1002 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
1003 */
1004DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
1005{
1006 return ((uEAX >> 8) & 0xf) == 0xf
1007 ? ((uEAX >> 20) & 0x7f) + 0xf
1008 : ((uEAX >> 8) & 0xf);
1009}
1010
1011
1012/**
1013 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1014 *
1015 * @returns Model.
1016 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1017 * @param fIntel Whether it's an intel CPU.
1018 */
1019DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1020{
1021 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1022 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1023 : ((uEAX >> 4) & 0xf);
1024}
1025
1026
1027/**
1028 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1029 *
1030 * @returns Model.
1031 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1032 * @param fIntel Whether it's an intel CPU.
1033 */
1034DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1035{
1036 return ((uEAX >> 8) & 0xf) == 0xf
1037 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1038 : ((uEAX >> 4) & 0xf);
1039}
1040
1041
1042/**
1043 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1044 *
1045 * @returns Model.
1046 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1047 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1048 */
1049DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1050{
1051 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1052 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1053 : ((uEAX >> 4) & 0xf);
1054}
1055
1056
1057/**
1058 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1059 *
1060 * @returns Model.
1061 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1062 */
1063DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1064{
1065 return uEAX & 0xf;
1066}
1067
1068
1069/**
1070 * Get cr0.
1071 * @returns cr0.
1072 */
1073#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1074DECLASM(RTCCUINTREG) ASMGetCR0(void);
1075#else
1076DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1077{
1078 RTCCUINTREG uCR0;
1079# if RT_INLINE_ASM_USES_INTRIN
1080 uCR0 = __readcr0();
1081
1082# elif RT_INLINE_ASM_GNU_STYLE
1083# ifdef RT_ARCH_AMD64
1084 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1085# else
1086 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1087# endif
1088# else
1089 __asm
1090 {
1091# ifdef RT_ARCH_AMD64
1092 mov rax, cr0
1093 mov [uCR0], rax
1094# else
1095 mov eax, cr0
1096 mov [uCR0], eax
1097# endif
1098 }
1099# endif
1100 return uCR0;
1101}
1102#endif
1103
1104
1105/**
1106 * Sets the CR0 register.
1107 * @param uCR0 The new CR0 value.
1108 */
1109#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1110DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1111#else
1112DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1113{
1114# if RT_INLINE_ASM_USES_INTRIN
1115 __writecr0(uCR0);
1116
1117# elif RT_INLINE_ASM_GNU_STYLE
1118# ifdef RT_ARCH_AMD64
1119 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1120# else
1121 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1122# endif
1123# else
1124 __asm
1125 {
1126# ifdef RT_ARCH_AMD64
1127 mov rax, [uCR0]
1128 mov cr0, rax
1129# else
1130 mov eax, [uCR0]
1131 mov cr0, eax
1132# endif
1133 }
1134# endif
1135}
1136#endif
1137
1138
1139/**
1140 * Get cr2.
1141 * @returns cr2.
1142 */
1143#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1144DECLASM(RTCCUINTREG) ASMGetCR2(void);
1145#else
1146DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1147{
1148 RTCCUINTREG uCR2;
1149# if RT_INLINE_ASM_USES_INTRIN
1150 uCR2 = __readcr2();
1151
1152# elif RT_INLINE_ASM_GNU_STYLE
1153# ifdef RT_ARCH_AMD64
1154 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1155# else
1156 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1157# endif
1158# else
1159 __asm
1160 {
1161# ifdef RT_ARCH_AMD64
1162 mov rax, cr2
1163 mov [uCR2], rax
1164# else
1165 mov eax, cr2
1166 mov [uCR2], eax
1167# endif
1168 }
1169# endif
1170 return uCR2;
1171}
1172#endif
1173
1174
1175/**
1176 * Sets the CR2 register.
1177 * @param uCR2 The new CR0 value.
1178 */
1179#if RT_INLINE_ASM_EXTERNAL
1180DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1181#else
1182DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1183{
1184# if RT_INLINE_ASM_GNU_STYLE
1185# ifdef RT_ARCH_AMD64
1186 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1187# else
1188 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1189# endif
1190# else
1191 __asm
1192 {
1193# ifdef RT_ARCH_AMD64
1194 mov rax, [uCR2]
1195 mov cr2, rax
1196# else
1197 mov eax, [uCR2]
1198 mov cr2, eax
1199# endif
1200 }
1201# endif
1202}
1203#endif
1204
1205
1206/**
1207 * Get cr3.
1208 * @returns cr3.
1209 */
1210#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1211DECLASM(RTCCUINTREG) ASMGetCR3(void);
1212#else
1213DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1214{
1215 RTCCUINTREG uCR3;
1216# if RT_INLINE_ASM_USES_INTRIN
1217 uCR3 = __readcr3();
1218
1219# elif RT_INLINE_ASM_GNU_STYLE
1220# ifdef RT_ARCH_AMD64
1221 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1222# else
1223 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1224# endif
1225# else
1226 __asm
1227 {
1228# ifdef RT_ARCH_AMD64
1229 mov rax, cr3
1230 mov [uCR3], rax
1231# else
1232 mov eax, cr3
1233 mov [uCR3], eax
1234# endif
1235 }
1236# endif
1237 return uCR3;
1238}
1239#endif
1240
1241
1242/**
1243 * Sets the CR3 register.
1244 *
1245 * @param uCR3 New CR3 value.
1246 */
1247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1248DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1249#else
1250DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1251{
1252# if RT_INLINE_ASM_USES_INTRIN
1253 __writecr3(uCR3);
1254
1255# elif RT_INLINE_ASM_GNU_STYLE
1256# ifdef RT_ARCH_AMD64
1257 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1258# else
1259 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1260# endif
1261# else
1262 __asm
1263 {
1264# ifdef RT_ARCH_AMD64
1265 mov rax, [uCR3]
1266 mov cr3, rax
1267# else
1268 mov eax, [uCR3]
1269 mov cr3, eax
1270# endif
1271 }
1272# endif
1273}
1274#endif
1275
1276
1277/**
1278 * Reloads the CR3 register.
1279 */
1280#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1281DECLASM(void) ASMReloadCR3(void);
1282#else
1283DECLINLINE(void) ASMReloadCR3(void)
1284{
1285# if RT_INLINE_ASM_USES_INTRIN
1286 __writecr3(__readcr3());
1287
1288# elif RT_INLINE_ASM_GNU_STYLE
1289 RTCCUINTREG u;
1290# ifdef RT_ARCH_AMD64
1291 __asm__ __volatile__("movq %%cr3, %0\n\t"
1292 "movq %0, %%cr3\n\t"
1293 : "=r" (u));
1294# else
1295 __asm__ __volatile__("movl %%cr3, %0\n\t"
1296 "movl %0, %%cr3\n\t"
1297 : "=r" (u));
1298# endif
1299# else
1300 __asm
1301 {
1302# ifdef RT_ARCH_AMD64
1303 mov rax, cr3
1304 mov cr3, rax
1305# else
1306 mov eax, cr3
1307 mov cr3, eax
1308# endif
1309 }
1310# endif
1311}
1312#endif
1313
1314
1315/**
1316 * Get cr4.
1317 * @returns cr4.
1318 */
1319#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1320DECLASM(RTCCUINTREG) ASMGetCR4(void);
1321#else
1322DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1323{
1324 RTCCUINTREG uCR4;
1325# if RT_INLINE_ASM_USES_INTRIN
1326 uCR4 = __readcr4();
1327
1328# elif RT_INLINE_ASM_GNU_STYLE
1329# ifdef RT_ARCH_AMD64
1330 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1331# else
1332 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1333# endif
1334# else
1335 __asm
1336 {
1337# ifdef RT_ARCH_AMD64
1338 mov rax, cr4
1339 mov [uCR4], rax
1340# else
1341 push eax /* just in case */
1342 /*mov eax, cr4*/
1343 _emit 0x0f
1344 _emit 0x20
1345 _emit 0xe0
1346 mov [uCR4], eax
1347 pop eax
1348# endif
1349 }
1350# endif
1351 return uCR4;
1352}
1353#endif
1354
1355
1356/**
1357 * Sets the CR4 register.
1358 *
1359 * @param uCR4 New CR4 value.
1360 */
1361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1362DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1363#else
1364DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1365{
1366# if RT_INLINE_ASM_USES_INTRIN
1367 __writecr4(uCR4);
1368
1369# elif RT_INLINE_ASM_GNU_STYLE
1370# ifdef RT_ARCH_AMD64
1371 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1372# else
1373 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1374# endif
1375# else
1376 __asm
1377 {
1378# ifdef RT_ARCH_AMD64
1379 mov rax, [uCR4]
1380 mov cr4, rax
1381# else
1382 mov eax, [uCR4]
1383 _emit 0x0F
1384 _emit 0x22
1385 _emit 0xE0 /* mov cr4, eax */
1386# endif
1387 }
1388# endif
1389}
1390#endif
1391
1392
1393/**
1394 * Get cr8.
1395 * @returns cr8.
1396 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1397 */
1398#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1399DECLASM(RTCCUINTREG) ASMGetCR8(void);
1400#else
1401DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1402{
1403# ifdef RT_ARCH_AMD64
1404 RTCCUINTREG uCR8;
1405# if RT_INLINE_ASM_USES_INTRIN
1406 uCR8 = __readcr8();
1407
1408# elif RT_INLINE_ASM_GNU_STYLE
1409 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1410# else
1411 __asm
1412 {
1413 mov rax, cr8
1414 mov [uCR8], rax
1415 }
1416# endif
1417 return uCR8;
1418# else /* !RT_ARCH_AMD64 */
1419 return 0;
1420# endif /* !RT_ARCH_AMD64 */
1421}
1422#endif
1423
1424
1425/**
1426 * Enables interrupts (EFLAGS.IF).
1427 */
1428#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1429DECLASM(void) ASMIntEnable(void);
1430#else
1431DECLINLINE(void) ASMIntEnable(void)
1432{
1433# if RT_INLINE_ASM_GNU_STYLE
1434 __asm("sti\n");
1435# elif RT_INLINE_ASM_USES_INTRIN
1436 _enable();
1437# else
1438 __asm sti
1439# endif
1440}
1441#endif
1442
1443
1444/**
1445 * Disables interrupts (!EFLAGS.IF).
1446 */
1447#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1448DECLASM(void) ASMIntDisable(void);
1449#else
1450DECLINLINE(void) ASMIntDisable(void)
1451{
1452# if RT_INLINE_ASM_GNU_STYLE
1453 __asm("cli\n");
1454# elif RT_INLINE_ASM_USES_INTRIN
1455 _disable();
1456# else
1457 __asm cli
1458# endif
1459}
1460#endif
1461
1462
1463/**
1464 * Disables interrupts and returns previous xFLAGS.
1465 */
1466#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1467DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1468#else
1469DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1470{
1471 RTCCUINTREG xFlags;
1472# if RT_INLINE_ASM_GNU_STYLE
1473# ifdef RT_ARCH_AMD64
1474 __asm__ __volatile__("pushfq\n\t"
1475 "cli\n\t"
1476 "popq %0\n\t"
1477 : "=rm" (xFlags));
1478# else
1479 __asm__ __volatile__("pushfl\n\t"
1480 "cli\n\t"
1481 "popl %0\n\t"
1482 : "=rm" (xFlags));
1483# endif
1484# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1485 xFlags = ASMGetFlags();
1486 _disable();
1487# else
1488 __asm {
1489 pushfd
1490 cli
1491 pop [xFlags]
1492 }
1493# endif
1494 return xFlags;
1495}
1496#endif
1497
1498
1499/**
1500 * Halts the CPU until interrupted.
1501 */
1502#if RT_INLINE_ASM_EXTERNAL
1503DECLASM(void) ASMHalt(void);
1504#else
1505DECLINLINE(void) ASMHalt(void)
1506{
1507# if RT_INLINE_ASM_GNU_STYLE
1508 __asm__ __volatile__("hlt\n\t");
1509# else
1510 __asm {
1511 hlt
1512 }
1513# endif
1514}
1515#endif
1516
1517
1518/**
1519 * Reads a machine specific register.
1520 *
1521 * @returns Register content.
1522 * @param uRegister Register to read.
1523 */
1524#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1525DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1526#else
1527DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1528{
1529 RTUINT64U u;
1530# if RT_INLINE_ASM_GNU_STYLE
1531 __asm__ __volatile__("rdmsr\n\t"
1532 : "=a" (u.s.Lo),
1533 "=d" (u.s.Hi)
1534 : "c" (uRegister));
1535
1536# elif RT_INLINE_ASM_USES_INTRIN
1537 u.u = __readmsr(uRegister);
1538
1539# else
1540 __asm
1541 {
1542 mov ecx, [uRegister]
1543 rdmsr
1544 mov [u.s.Lo], eax
1545 mov [u.s.Hi], edx
1546 }
1547# endif
1548
1549 return u.u;
1550}
1551#endif
1552
1553
1554/**
1555 * Writes a machine specific register.
1556 *
1557 * @returns Register content.
1558 * @param uRegister Register to write to.
1559 * @param u64Val Value to write.
1560 */
1561#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1562DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1563#else
1564DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1565{
1566 RTUINT64U u;
1567
1568 u.u = u64Val;
1569# if RT_INLINE_ASM_GNU_STYLE
1570 __asm__ __volatile__("wrmsr\n\t"
1571 ::"a" (u.s.Lo),
1572 "d" (u.s.Hi),
1573 "c" (uRegister));
1574
1575# elif RT_INLINE_ASM_USES_INTRIN
1576 __writemsr(uRegister, u.u);
1577
1578# else
1579 __asm
1580 {
1581 mov ecx, [uRegister]
1582 mov edx, [u.s.Hi]
1583 mov eax, [u.s.Lo]
1584 wrmsr
1585 }
1586# endif
1587}
1588#endif
1589
1590
1591/**
1592 * Reads low part of a machine specific register.
1593 *
1594 * @returns Register content.
1595 * @param uRegister Register to read.
1596 */
1597#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1598DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1599#else
1600DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1601{
1602 uint32_t u32;
1603# if RT_INLINE_ASM_GNU_STYLE
1604 __asm__ __volatile__("rdmsr\n\t"
1605 : "=a" (u32)
1606 : "c" (uRegister)
1607 : "edx");
1608
1609# elif RT_INLINE_ASM_USES_INTRIN
1610 u32 = (uint32_t)__readmsr(uRegister);
1611
1612#else
1613 __asm
1614 {
1615 mov ecx, [uRegister]
1616 rdmsr
1617 mov [u32], eax
1618 }
1619# endif
1620
1621 return u32;
1622}
1623#endif
1624
1625
1626/**
1627 * Reads high part of a machine specific register.
1628 *
1629 * @returns Register content.
1630 * @param uRegister Register to read.
1631 */
1632#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1633DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1634#else
1635DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1636{
1637 uint32_t u32;
1638# if RT_INLINE_ASM_GNU_STYLE
1639 __asm__ __volatile__("rdmsr\n\t"
1640 : "=d" (u32)
1641 : "c" (uRegister)
1642 : "eax");
1643
1644# elif RT_INLINE_ASM_USES_INTRIN
1645 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1646
1647# else
1648 __asm
1649 {
1650 mov ecx, [uRegister]
1651 rdmsr
1652 mov [u32], edx
1653 }
1654# endif
1655
1656 return u32;
1657}
1658#endif
1659
1660
1661/**
1662 * Gets dr0.
1663 *
1664 * @returns dr0.
1665 */
1666#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1667DECLASM(RTCCUINTREG) ASMGetDR0(void);
1668#else
1669DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1670{
1671 RTCCUINTREG uDR0;
1672# if RT_INLINE_ASM_USES_INTRIN
1673 uDR0 = __readdr(0);
1674# elif RT_INLINE_ASM_GNU_STYLE
1675# ifdef RT_ARCH_AMD64
1676 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1677# else
1678 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1679# endif
1680# else
1681 __asm
1682 {
1683# ifdef RT_ARCH_AMD64
1684 mov rax, dr0
1685 mov [uDR0], rax
1686# else
1687 mov eax, dr0
1688 mov [uDR0], eax
1689# endif
1690 }
1691# endif
1692 return uDR0;
1693}
1694#endif
1695
1696
1697/**
1698 * Gets dr1.
1699 *
1700 * @returns dr1.
1701 */
1702#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1703DECLASM(RTCCUINTREG) ASMGetDR1(void);
1704#else
1705DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1706{
1707 RTCCUINTREG uDR1;
1708# if RT_INLINE_ASM_USES_INTRIN
1709 uDR1 = __readdr(1);
1710# elif RT_INLINE_ASM_GNU_STYLE
1711# ifdef RT_ARCH_AMD64
1712 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1713# else
1714 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1715# endif
1716# else
1717 __asm
1718 {
1719# ifdef RT_ARCH_AMD64
1720 mov rax, dr1
1721 mov [uDR1], rax
1722# else
1723 mov eax, dr1
1724 mov [uDR1], eax
1725# endif
1726 }
1727# endif
1728 return uDR1;
1729}
1730#endif
1731
1732
1733/**
1734 * Gets dr2.
1735 *
1736 * @returns dr2.
1737 */
1738#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1739DECLASM(RTCCUINTREG) ASMGetDR2(void);
1740#else
1741DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1742{
1743 RTCCUINTREG uDR2;
1744# if RT_INLINE_ASM_USES_INTRIN
1745 uDR2 = __readdr(2);
1746# elif RT_INLINE_ASM_GNU_STYLE
1747# ifdef RT_ARCH_AMD64
1748 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1749# else
1750 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1751# endif
1752# else
1753 __asm
1754 {
1755# ifdef RT_ARCH_AMD64
1756 mov rax, dr2
1757 mov [uDR2], rax
1758# else
1759 mov eax, dr2
1760 mov [uDR2], eax
1761# endif
1762 }
1763# endif
1764 return uDR2;
1765}
1766#endif
1767
1768
1769/**
1770 * Gets dr3.
1771 *
1772 * @returns dr3.
1773 */
1774#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1775DECLASM(RTCCUINTREG) ASMGetDR3(void);
1776#else
1777DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1778{
1779 RTCCUINTREG uDR3;
1780# if RT_INLINE_ASM_USES_INTRIN
1781 uDR3 = __readdr(3);
1782# elif RT_INLINE_ASM_GNU_STYLE
1783# ifdef RT_ARCH_AMD64
1784 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1785# else
1786 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1787# endif
1788# else
1789 __asm
1790 {
1791# ifdef RT_ARCH_AMD64
1792 mov rax, dr3
1793 mov [uDR3], rax
1794# else
1795 mov eax, dr3
1796 mov [uDR3], eax
1797# endif
1798 }
1799# endif
1800 return uDR3;
1801}
1802#endif
1803
1804
1805/**
1806 * Gets dr6.
1807 *
1808 * @returns dr6.
1809 */
1810#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1811DECLASM(RTCCUINTREG) ASMGetDR6(void);
1812#else
1813DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1814{
1815 RTCCUINTREG uDR6;
1816# if RT_INLINE_ASM_USES_INTRIN
1817 uDR6 = __readdr(6);
1818# elif RT_INLINE_ASM_GNU_STYLE
1819# ifdef RT_ARCH_AMD64
1820 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1821# else
1822 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1823# endif
1824# else
1825 __asm
1826 {
1827# ifdef RT_ARCH_AMD64
1828 mov rax, dr6
1829 mov [uDR6], rax
1830# else
1831 mov eax, dr6
1832 mov [uDR6], eax
1833# endif
1834 }
1835# endif
1836 return uDR6;
1837}
1838#endif
1839
1840
1841/**
1842 * Reads and clears DR6.
1843 *
1844 * @returns DR6.
1845 */
1846#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1847DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1848#else
1849DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1850{
1851 RTCCUINTREG uDR6;
1852# if RT_INLINE_ASM_USES_INTRIN
1853 uDR6 = __readdr(6);
1854 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1855# elif RT_INLINE_ASM_GNU_STYLE
1856 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1857# ifdef RT_ARCH_AMD64
1858 __asm__ __volatile__("movq %%dr6, %0\n\t"
1859 "movq %1, %%dr6\n\t"
1860 : "=r" (uDR6)
1861 : "r" (uNewValue));
1862# else
1863 __asm__ __volatile__("movl %%dr6, %0\n\t"
1864 "movl %1, %%dr6\n\t"
1865 : "=r" (uDR6)
1866 : "r" (uNewValue));
1867# endif
1868# else
1869 __asm
1870 {
1871# ifdef RT_ARCH_AMD64
1872 mov rax, dr6
1873 mov [uDR6], rax
1874 mov rcx, rax
1875 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1876 mov dr6, rcx
1877# else
1878 mov eax, dr6
1879 mov [uDR6], eax
1880 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1881 mov dr6, ecx
1882# endif
1883 }
1884# endif
1885 return uDR6;
1886}
1887#endif
1888
1889
1890/**
1891 * Gets dr7.
1892 *
1893 * @returns dr7.
1894 */
1895#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1896DECLASM(RTCCUINTREG) ASMGetDR7(void);
1897#else
1898DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1899{
1900 RTCCUINTREG uDR7;
1901# if RT_INLINE_ASM_USES_INTRIN
1902 uDR7 = __readdr(7);
1903# elif RT_INLINE_ASM_GNU_STYLE
1904# ifdef RT_ARCH_AMD64
1905 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1906# else
1907 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1908# endif
1909# else
1910 __asm
1911 {
1912# ifdef RT_ARCH_AMD64
1913 mov rax, dr7
1914 mov [uDR7], rax
1915# else
1916 mov eax, dr7
1917 mov [uDR7], eax
1918# endif
1919 }
1920# endif
1921 return uDR7;
1922}
1923#endif
1924
1925
1926/**
1927 * Sets dr0.
1928 *
1929 * @param uDRVal Debug register value to write
1930 */
1931#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1932DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1933#else
1934DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1935{
1936# if RT_INLINE_ASM_USES_INTRIN
1937 __writedr(0, uDRVal);
1938# elif RT_INLINE_ASM_GNU_STYLE
1939# ifdef RT_ARCH_AMD64
1940 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1941# else
1942 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1943# endif
1944# else
1945 __asm
1946 {
1947# ifdef RT_ARCH_AMD64
1948 mov rax, [uDRVal]
1949 mov dr0, rax
1950# else
1951 mov eax, [uDRVal]
1952 mov dr0, eax
1953# endif
1954 }
1955# endif
1956}
1957#endif
1958
1959
1960/**
1961 * Sets dr1.
1962 *
1963 * @param uDRVal Debug register value to write
1964 */
1965#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1966DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1967#else
1968DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1969{
1970# if RT_INLINE_ASM_USES_INTRIN
1971 __writedr(1, uDRVal);
1972# elif RT_INLINE_ASM_GNU_STYLE
1973# ifdef RT_ARCH_AMD64
1974 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
1975# else
1976 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
1977# endif
1978# else
1979 __asm
1980 {
1981# ifdef RT_ARCH_AMD64
1982 mov rax, [uDRVal]
1983 mov dr1, rax
1984# else
1985 mov eax, [uDRVal]
1986 mov dr1, eax
1987# endif
1988 }
1989# endif
1990}
1991#endif
1992
1993
1994/**
1995 * Sets dr2.
1996 *
1997 * @param uDRVal Debug register value to write
1998 */
1999#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2000DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
2001#else
2002DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
2003{
2004# if RT_INLINE_ASM_USES_INTRIN
2005 __writedr(2, uDRVal);
2006# elif RT_INLINE_ASM_GNU_STYLE
2007# ifdef RT_ARCH_AMD64
2008 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
2009# else
2010 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2011# endif
2012# else
2013 __asm
2014 {
2015# ifdef RT_ARCH_AMD64
2016 mov rax, [uDRVal]
2017 mov dr2, rax
2018# else
2019 mov eax, [uDRVal]
2020 mov dr2, eax
2021# endif
2022 }
2023# endif
2024}
2025#endif
2026
2027
2028/**
2029 * Sets dr3.
2030 *
2031 * @param uDRVal Debug register value to write
2032 */
2033#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2034DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2035#else
2036DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2037{
2038# if RT_INLINE_ASM_USES_INTRIN
2039 __writedr(3, uDRVal);
2040# elif RT_INLINE_ASM_GNU_STYLE
2041# ifdef RT_ARCH_AMD64
2042 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2043# else
2044 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2045# endif
2046# else
2047 __asm
2048 {
2049# ifdef RT_ARCH_AMD64
2050 mov rax, [uDRVal]
2051 mov dr3, rax
2052# else
2053 mov eax, [uDRVal]
2054 mov dr3, eax
2055# endif
2056 }
2057# endif
2058}
2059#endif
2060
2061
2062/**
2063 * Sets dr6.
2064 *
2065 * @param uDRVal Debug register value to write
2066 */
2067#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2068DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2069#else
2070DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2071{
2072# if RT_INLINE_ASM_USES_INTRIN
2073 __writedr(6, uDRVal);
2074# elif RT_INLINE_ASM_GNU_STYLE
2075# ifdef RT_ARCH_AMD64
2076 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2077# else
2078 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2079# endif
2080# else
2081 __asm
2082 {
2083# ifdef RT_ARCH_AMD64
2084 mov rax, [uDRVal]
2085 mov dr6, rax
2086# else
2087 mov eax, [uDRVal]
2088 mov dr6, eax
2089# endif
2090 }
2091# endif
2092}
2093#endif
2094
2095
2096/**
2097 * Sets dr7.
2098 *
2099 * @param uDRVal Debug register value to write
2100 */
2101#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2102DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2103#else
2104DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2105{
2106# if RT_INLINE_ASM_USES_INTRIN
2107 __writedr(7, uDRVal);
2108# elif RT_INLINE_ASM_GNU_STYLE
2109# ifdef RT_ARCH_AMD64
2110 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2111# else
2112 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2113# endif
2114# else
2115 __asm
2116 {
2117# ifdef RT_ARCH_AMD64
2118 mov rax, [uDRVal]
2119 mov dr7, rax
2120# else
2121 mov eax, [uDRVal]
2122 mov dr7, eax
2123# endif
2124 }
2125# endif
2126}
2127#endif
2128
2129
2130/**
2131 * Compiler memory barrier.
2132 *
2133 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2134 * values or any outstanding writes when returning from this function.
2135 *
2136 * This function must be used if non-volatile data is modified by a
2137 * device or the VMM. Typical cases are port access, MMIO access,
2138 * trapping instruction, etc.
2139 */
2140#if RT_INLINE_ASM_GNU_STYLE
2141# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2142#elif RT_INLINE_ASM_USES_INTRIN
2143# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2144#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2145DECLINLINE(void) ASMCompilerBarrier(void)
2146{
2147 __asm
2148 {
2149 }
2150}
2151#endif
2152
2153
2154/**
2155 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2156 *
2157 * @param Port I/O port to write to.
2158 * @param u8 8-bit integer to write.
2159 */
2160#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2161DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2162#else
2163DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2164{
2165# if RT_INLINE_ASM_GNU_STYLE
2166 __asm__ __volatile__("outb %b1, %w0\n\t"
2167 :: "Nd" (Port),
2168 "a" (u8));
2169
2170# elif RT_INLINE_ASM_USES_INTRIN
2171 __outbyte(Port, u8);
2172
2173# else
2174 __asm
2175 {
2176 mov dx, [Port]
2177 mov al, [u8]
2178 out dx, al
2179 }
2180# endif
2181}
2182#endif
2183
2184
2185/**
2186 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2187 *
2188 * @returns 8-bit integer.
2189 * @param Port I/O port to read from.
2190 */
2191#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2192DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2193#else
2194DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2195{
2196 uint8_t u8;
2197# if RT_INLINE_ASM_GNU_STYLE
2198 __asm__ __volatile__("inb %w1, %b0\n\t"
2199 : "=a" (u8)
2200 : "Nd" (Port));
2201
2202# elif RT_INLINE_ASM_USES_INTRIN
2203 u8 = __inbyte(Port);
2204
2205# else
2206 __asm
2207 {
2208 mov dx, [Port]
2209 in al, dx
2210 mov [u8], al
2211 }
2212# endif
2213 return u8;
2214}
2215#endif
2216
2217
2218/**
2219 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2220 *
2221 * @param Port I/O port to write to.
2222 * @param u16 16-bit integer to write.
2223 */
2224#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2225DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2226#else
2227DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2228{
2229# if RT_INLINE_ASM_GNU_STYLE
2230 __asm__ __volatile__("outw %w1, %w0\n\t"
2231 :: "Nd" (Port),
2232 "a" (u16));
2233
2234# elif RT_INLINE_ASM_USES_INTRIN
2235 __outword(Port, u16);
2236
2237# else
2238 __asm
2239 {
2240 mov dx, [Port]
2241 mov ax, [u16]
2242 out dx, ax
2243 }
2244# endif
2245}
2246#endif
2247
2248
2249/**
2250 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2251 *
2252 * @returns 16-bit integer.
2253 * @param Port I/O port to read from.
2254 */
2255#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2256DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2257#else
2258DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2259{
2260 uint16_t u16;
2261# if RT_INLINE_ASM_GNU_STYLE
2262 __asm__ __volatile__("inw %w1, %w0\n\t"
2263 : "=a" (u16)
2264 : "Nd" (Port));
2265
2266# elif RT_INLINE_ASM_USES_INTRIN
2267 u16 = __inword(Port);
2268
2269# else
2270 __asm
2271 {
2272 mov dx, [Port]
2273 in ax, dx
2274 mov [u16], ax
2275 }
2276# endif
2277 return u16;
2278}
2279#endif
2280
2281
2282/**
2283 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2284 *
2285 * @param Port I/O port to write to.
2286 * @param u32 32-bit integer to write.
2287 */
2288#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2289DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2290#else
2291DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2292{
2293# if RT_INLINE_ASM_GNU_STYLE
2294 __asm__ __volatile__("outl %1, %w0\n\t"
2295 :: "Nd" (Port),
2296 "a" (u32));
2297
2298# elif RT_INLINE_ASM_USES_INTRIN
2299 __outdword(Port, u32);
2300
2301# else
2302 __asm
2303 {
2304 mov dx, [Port]
2305 mov eax, [u32]
2306 out dx, eax
2307 }
2308# endif
2309}
2310#endif
2311
2312
2313/**
2314 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2315 *
2316 * @returns 32-bit integer.
2317 * @param Port I/O port to read from.
2318 */
2319#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2320DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2321#else
2322DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2323{
2324 uint32_t u32;
2325# if RT_INLINE_ASM_GNU_STYLE
2326 __asm__ __volatile__("inl %w1, %0\n\t"
2327 : "=a" (u32)
2328 : "Nd" (Port));
2329
2330# elif RT_INLINE_ASM_USES_INTRIN
2331 u32 = __indword(Port);
2332
2333# else
2334 __asm
2335 {
2336 mov dx, [Port]
2337 in eax, dx
2338 mov [u32], eax
2339 }
2340# endif
2341 return u32;
2342}
2343#endif
2344
2345
2346/**
2347 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2348 *
2349 * @param Port I/O port to write to.
2350 * @param pau8 Pointer to the string buffer.
2351 * @param c The number of items to write.
2352 */
2353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2354DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2355#else
2356DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2357{
2358# if RT_INLINE_ASM_GNU_STYLE
2359 __asm__ __volatile__("rep; outsb\n\t"
2360 : "+S" (pau8),
2361 "+c" (c)
2362 : "d" (Port));
2363
2364# elif RT_INLINE_ASM_USES_INTRIN
2365 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2366
2367# else
2368 __asm
2369 {
2370 mov dx, [Port]
2371 mov ecx, [c]
2372 mov eax, [pau8]
2373 xchg esi, eax
2374 rep outsb
2375 xchg esi, eax
2376 }
2377# endif
2378}
2379#endif
2380
2381
2382/**
2383 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2384 *
2385 * @param Port I/O port to read from.
2386 * @param pau8 Pointer to the string buffer (output).
2387 * @param c The number of items to read.
2388 */
2389#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2390DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2391#else
2392DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2393{
2394# if RT_INLINE_ASM_GNU_STYLE
2395 __asm__ __volatile__("rep; insb\n\t"
2396 : "+D" (pau8),
2397 "+c" (c)
2398 : "d" (Port));
2399
2400# elif RT_INLINE_ASM_USES_INTRIN
2401 __inbytestring(Port, pau8, (unsigned long)c);
2402
2403# else
2404 __asm
2405 {
2406 mov dx, [Port]
2407 mov ecx, [c]
2408 mov eax, [pau8]
2409 xchg edi, eax
2410 rep insb
2411 xchg edi, eax
2412 }
2413# endif
2414}
2415#endif
2416
2417
2418/**
2419 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2420 *
2421 * @param Port I/O port to write to.
2422 * @param pau16 Pointer to the string buffer.
2423 * @param c The number of items to write.
2424 */
2425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2426DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2427#else
2428DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2429{
2430# if RT_INLINE_ASM_GNU_STYLE
2431 __asm__ __volatile__("rep; outsw\n\t"
2432 : "+S" (pau16),
2433 "+c" (c)
2434 : "d" (Port));
2435
2436# elif RT_INLINE_ASM_USES_INTRIN
2437 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2438
2439# else
2440 __asm
2441 {
2442 mov dx, [Port]
2443 mov ecx, [c]
2444 mov eax, [pau16]
2445 xchg esi, eax
2446 rep outsw
2447 xchg esi, eax
2448 }
2449# endif
2450}
2451#endif
2452
2453
2454/**
2455 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2456 *
2457 * @param Port I/O port to read from.
2458 * @param pau16 Pointer to the string buffer (output).
2459 * @param c The number of items to read.
2460 */
2461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2462DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2463#else
2464DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2465{
2466# if RT_INLINE_ASM_GNU_STYLE
2467 __asm__ __volatile__("rep; insw\n\t"
2468 : "+D" (pau16),
2469 "+c" (c)
2470 : "d" (Port));
2471
2472# elif RT_INLINE_ASM_USES_INTRIN
2473 __inwordstring(Port, pau16, (unsigned long)c);
2474
2475# else
2476 __asm
2477 {
2478 mov dx, [Port]
2479 mov ecx, [c]
2480 mov eax, [pau16]
2481 xchg edi, eax
2482 rep insw
2483 xchg edi, eax
2484 }
2485# endif
2486}
2487#endif
2488
2489
2490/**
2491 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2492 *
2493 * @param Port I/O port to write to.
2494 * @param pau32 Pointer to the string buffer.
2495 * @param c The number of items to write.
2496 */
2497#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2498DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2499#else
2500DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2501{
2502# if RT_INLINE_ASM_GNU_STYLE
2503 __asm__ __volatile__("rep; outsl\n\t"
2504 : "+S" (pau32),
2505 "+c" (c)
2506 : "d" (Port));
2507
2508# elif RT_INLINE_ASM_USES_INTRIN
2509 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2510
2511# else
2512 __asm
2513 {
2514 mov dx, [Port]
2515 mov ecx, [c]
2516 mov eax, [pau32]
2517 xchg esi, eax
2518 rep outsd
2519 xchg esi, eax
2520 }
2521# endif
2522}
2523#endif
2524
2525
2526/**
2527 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2528 *
2529 * @param Port I/O port to read from.
2530 * @param pau32 Pointer to the string buffer (output).
2531 * @param c The number of items to read.
2532 */
2533#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2534DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2535#else
2536DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2537{
2538# if RT_INLINE_ASM_GNU_STYLE
2539 __asm__ __volatile__("rep; insl\n\t"
2540 : "+D" (pau32),
2541 "+c" (c)
2542 : "d" (Port));
2543
2544# elif RT_INLINE_ASM_USES_INTRIN
2545 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2546
2547# else
2548 __asm
2549 {
2550 mov dx, [Port]
2551 mov ecx, [c]
2552 mov eax, [pau32]
2553 xchg edi, eax
2554 rep insd
2555 xchg edi, eax
2556 }
2557# endif
2558}
2559#endif
2560
2561
2562/**
2563 * Atomically Exchange an unsigned 8-bit value, ordered.
2564 *
2565 * @returns Current *pu8 value
2566 * @param pu8 Pointer to the 8-bit variable to update.
2567 * @param u8 The 8-bit value to assign to *pu8.
2568 */
2569#if RT_INLINE_ASM_EXTERNAL
2570DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2571#else
2572DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2573{
2574# if RT_INLINE_ASM_GNU_STYLE
2575 __asm__ __volatile__("xchgb %0, %1\n\t"
2576 : "=m" (*pu8),
2577 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2578 : "1" (u8),
2579 "m" (*pu8));
2580# else
2581 __asm
2582 {
2583# ifdef RT_ARCH_AMD64
2584 mov rdx, [pu8]
2585 mov al, [u8]
2586 xchg [rdx], al
2587 mov [u8], al
2588# else
2589 mov edx, [pu8]
2590 mov al, [u8]
2591 xchg [edx], al
2592 mov [u8], al
2593# endif
2594 }
2595# endif
2596 return u8;
2597}
2598#endif
2599
2600
2601/**
2602 * Atomically Exchange a signed 8-bit value, ordered.
2603 *
2604 * @returns Current *pu8 value
2605 * @param pi8 Pointer to the 8-bit variable to update.
2606 * @param i8 The 8-bit value to assign to *pi8.
2607 */
2608DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2609{
2610 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2611}
2612
2613
2614/**
2615 * Atomically Exchange a bool value, ordered.
2616 *
2617 * @returns Current *pf value
2618 * @param pf Pointer to the 8-bit variable to update.
2619 * @param f The 8-bit value to assign to *pi8.
2620 */
2621DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2622{
2623#ifdef _MSC_VER
2624 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2625#else
2626 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2627#endif
2628}
2629
2630
2631/**
2632 * Atomically Exchange an unsigned 16-bit value, ordered.
2633 *
2634 * @returns Current *pu16 value
2635 * @param pu16 Pointer to the 16-bit variable to update.
2636 * @param u16 The 16-bit value to assign to *pu16.
2637 */
2638#if RT_INLINE_ASM_EXTERNAL
2639DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2640#else
2641DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2642{
2643# if RT_INLINE_ASM_GNU_STYLE
2644 __asm__ __volatile__("xchgw %0, %1\n\t"
2645 : "=m" (*pu16),
2646 "=r" (u16)
2647 : "1" (u16),
2648 "m" (*pu16));
2649# else
2650 __asm
2651 {
2652# ifdef RT_ARCH_AMD64
2653 mov rdx, [pu16]
2654 mov ax, [u16]
2655 xchg [rdx], ax
2656 mov [u16], ax
2657# else
2658 mov edx, [pu16]
2659 mov ax, [u16]
2660 xchg [edx], ax
2661 mov [u16], ax
2662# endif
2663 }
2664# endif
2665 return u16;
2666}
2667#endif
2668
2669
2670/**
2671 * Atomically Exchange a signed 16-bit value, ordered.
2672 *
2673 * @returns Current *pu16 value
2674 * @param pi16 Pointer to the 16-bit variable to update.
2675 * @param i16 The 16-bit value to assign to *pi16.
2676 */
2677DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2678{
2679 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2680}
2681
2682
2683/**
2684 * Atomically Exchange an unsigned 32-bit value, ordered.
2685 *
2686 * @returns Current *pu32 value
2687 * @param pu32 Pointer to the 32-bit variable to update.
2688 * @param u32 The 32-bit value to assign to *pu32.
2689 */
2690#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2691DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2692#else
2693DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2694{
2695# if RT_INLINE_ASM_GNU_STYLE
2696 __asm__ __volatile__("xchgl %0, %1\n\t"
2697 : "=m" (*pu32),
2698 "=r" (u32)
2699 : "1" (u32),
2700 "m" (*pu32));
2701
2702# elif RT_INLINE_ASM_USES_INTRIN
2703 u32 = _InterlockedExchange((long *)pu32, u32);
2704
2705# else
2706 __asm
2707 {
2708# ifdef RT_ARCH_AMD64
2709 mov rdx, [pu32]
2710 mov eax, u32
2711 xchg [rdx], eax
2712 mov [u32], eax
2713# else
2714 mov edx, [pu32]
2715 mov eax, u32
2716 xchg [edx], eax
2717 mov [u32], eax
2718# endif
2719 }
2720# endif
2721 return u32;
2722}
2723#endif
2724
2725
2726/**
2727 * Atomically Exchange a signed 32-bit value, ordered.
2728 *
2729 * @returns Current *pu32 value
2730 * @param pi32 Pointer to the 32-bit variable to update.
2731 * @param i32 The 32-bit value to assign to *pi32.
2732 */
2733DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2734{
2735 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2736}
2737
2738
2739/**
2740 * Atomically Exchange an unsigned 64-bit value, ordered.
2741 *
2742 * @returns Current *pu64 value
2743 * @param pu64 Pointer to the 64-bit variable to update.
2744 * @param u64 The 64-bit value to assign to *pu64.
2745 */
2746#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2747DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2748#else
2749DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2750{
2751# if defined(RT_ARCH_AMD64)
2752# if RT_INLINE_ASM_USES_INTRIN
2753 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2754
2755# elif RT_INLINE_ASM_GNU_STYLE
2756 __asm__ __volatile__("xchgq %0, %1\n\t"
2757 : "=m" (*pu64),
2758 "=r" (u64)
2759 : "1" (u64),
2760 "m" (*pu64));
2761# else
2762 __asm
2763 {
2764 mov rdx, [pu64]
2765 mov rax, [u64]
2766 xchg [rdx], rax
2767 mov [u64], rax
2768 }
2769# endif
2770# else /* !RT_ARCH_AMD64 */
2771# if RT_INLINE_ASM_GNU_STYLE
2772# if defined(PIC) || defined(__PIC__)
2773 uint32_t u32EBX = (uint32_t)u64;
2774 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2775 "xchgl %%ebx, %3\n\t"
2776 "1:\n\t"
2777 "lock; cmpxchg8b (%5)\n\t"
2778 "jnz 1b\n\t"
2779 "movl %3, %%ebx\n\t"
2780 /*"xchgl %%esi, %5\n\t"*/
2781 : "=A" (u64),
2782 "=m" (*pu64)
2783 : "0" (*pu64),
2784 "m" ( u32EBX ),
2785 "c" ( (uint32_t)(u64 >> 32) ),
2786 "S" (pu64));
2787# else /* !PIC */
2788 __asm__ __volatile__("1:\n\t"
2789 "lock; cmpxchg8b %1\n\t"
2790 "jnz 1b\n\t"
2791 : "=A" (u64),
2792 "=m" (*pu64)
2793 : "0" (*pu64),
2794 "b" ( (uint32_t)u64 ),
2795 "c" ( (uint32_t)(u64 >> 32) ));
2796# endif
2797# else
2798 __asm
2799 {
2800 mov ebx, dword ptr [u64]
2801 mov ecx, dword ptr [u64 + 4]
2802 mov edi, pu64
2803 mov eax, dword ptr [edi]
2804 mov edx, dword ptr [edi + 4]
2805 retry:
2806 lock cmpxchg8b [edi]
2807 jnz retry
2808 mov dword ptr [u64], eax
2809 mov dword ptr [u64 + 4], edx
2810 }
2811# endif
2812# endif /* !RT_ARCH_AMD64 */
2813 return u64;
2814}
2815#endif
2816
2817
2818/**
2819 * Atomically Exchange an signed 64-bit value, ordered.
2820 *
2821 * @returns Current *pi64 value
2822 * @param pi64 Pointer to the 64-bit variable to update.
2823 * @param i64 The 64-bit value to assign to *pi64.
2824 */
2825DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2826{
2827 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2828}
2829
2830
2831#ifdef RT_ARCH_AMD64
2832/**
2833 * Atomically Exchange an unsigned 128-bit value, ordered.
2834 *
2835 * @returns Current *pu128.
2836 * @param pu128 Pointer to the 128-bit variable to update.
2837 * @param u128 The 128-bit value to assign to *pu128.
2838 *
2839 * @remark We cannot really assume that any hardware supports this. Nor do I have
2840 * GAS support for it. So, for the time being we'll BREAK the atomic
2841 * bit of this function and use two 64-bit exchanges instead.
2842 */
2843# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2844DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2845# else
2846DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2847{
2848 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2849 {
2850 /** @todo this is clumsy code */
2851 RTUINT128U u128Ret;
2852 u128Ret.u = u128;
2853 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2854 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2855 return u128Ret.u;
2856 }
2857#if 0 /* later? */
2858 else
2859 {
2860# if RT_INLINE_ASM_GNU_STYLE
2861 __asm__ __volatile__("1:\n\t"
2862 "lock; cmpxchg8b %1\n\t"
2863 "jnz 1b\n\t"
2864 : "=A" (u128),
2865 "=m" (*pu128)
2866 : "0" (*pu128),
2867 "b" ( (uint64_t)u128 ),
2868 "c" ( (uint64_t)(u128 >> 64) ));
2869# else
2870 __asm
2871 {
2872 mov rbx, dword ptr [u128]
2873 mov rcx, dword ptr [u128 + 8]
2874 mov rdi, pu128
2875 mov rax, dword ptr [rdi]
2876 mov rdx, dword ptr [rdi + 8]
2877 retry:
2878 lock cmpxchg16b [rdi]
2879 jnz retry
2880 mov dword ptr [u128], rax
2881 mov dword ptr [u128 + 8], rdx
2882 }
2883# endif
2884 }
2885 return u128;
2886#endif
2887}
2888# endif
2889#endif /* RT_ARCH_AMD64 */
2890
2891
2892/**
2893 * Atomically Exchange a pointer value, ordered.
2894 *
2895 * @returns Current *ppv value
2896 * @param ppv Pointer to the pointer variable to update.
2897 * @param pv The pointer value to assign to *ppv.
2898 */
2899DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2900{
2901#if ARCH_BITS == 32
2902 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2903#elif ARCH_BITS == 64
2904 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2905#else
2906# error "ARCH_BITS is bogus"
2907#endif
2908}
2909
2910
2911/**
2912 * Atomically Exchange a raw-mode context pointer value, ordered.
2913 *
2914 * @returns Current *ppv value
2915 * @param ppvRC Pointer to the pointer variable to update.
2916 * @param pvRC The pointer value to assign to *ppv.
2917 */
2918DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2919{
2920 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2921}
2922
2923
2924/**
2925 * Atomically Exchange a ring-0 pointer value, ordered.
2926 *
2927 * @returns Current *ppv value
2928 * @param ppvR0 Pointer to the pointer variable to update.
2929 * @param pvR0 The pointer value to assign to *ppv.
2930 */
2931DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2932{
2933#if R0_ARCH_BITS == 32
2934 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2935#elif R0_ARCH_BITS == 64
2936 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2937#else
2938# error "R0_ARCH_BITS is bogus"
2939#endif
2940}
2941
2942
2943/**
2944 * Atomically Exchange a ring-3 pointer value, ordered.
2945 *
2946 * @returns Current *ppv value
2947 * @param ppvR3 Pointer to the pointer variable to update.
2948 * @param pvR3 The pointer value to assign to *ppv.
2949 */
2950DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2951{
2952#if R3_ARCH_BITS == 32
2953 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2954#elif R3_ARCH_BITS == 64
2955 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2956#else
2957# error "R3_ARCH_BITS is bogus"
2958#endif
2959}
2960
2961
2962/** @def ASMAtomicXchgHandle
2963 * Atomically Exchange a typical IPRT handle value, ordered.
2964 *
2965 * @param ph Pointer to the value to update.
2966 * @param hNew The new value to assigned to *pu.
2967 * @param phRes Where to store the current *ph value.
2968 *
2969 * @remarks This doesn't currently work for all handles (like RTFILE).
2970 */
2971#if HC_ARCH_BITS == 32
2972# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2973 do { \
2974 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2975 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2976 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2977 } while (0)
2978#elif HC_ARCH_BITS == 64
2979# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2980 do { \
2981 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2982 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2983 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2984 } while (0)
2985#else
2986# error HC_ARCH_BITS
2987#endif
2988
2989
2990/**
2991 * Atomically Exchange a value which size might differ
2992 * between platforms or compilers, ordered.
2993 *
2994 * @param pu Pointer to the variable to update.
2995 * @param uNew The value to assign to *pu.
2996 * @todo This is busted as its missing the result argument.
2997 */
2998#define ASMAtomicXchgSize(pu, uNew) \
2999 do { \
3000 switch (sizeof(*(pu))) { \
3001 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3002 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3003 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3004 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3005 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3006 } \
3007 } while (0)
3008
3009/**
3010 * Atomically Exchange a value which size might differ
3011 * between platforms or compilers, ordered.
3012 *
3013 * @param pu Pointer to the variable to update.
3014 * @param uNew The value to assign to *pu.
3015 * @param puRes Where to store the current *pu value.
3016 */
3017#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
3018 do { \
3019 switch (sizeof(*(pu))) { \
3020 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3021 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3022 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3023 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3024 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3025 } \
3026 } while (0)
3027
3028
3029/**
3030 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3031 *
3032 * @returns true if xchg was done.
3033 * @returns false if xchg wasn't done.
3034 *
3035 * @param pu32 Pointer to the value to update.
3036 * @param u32New The new value to assigned to *pu32.
3037 * @param u32Old The old value to *pu32 compare with.
3038 */
3039#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3040DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3041#else
3042DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3043{
3044# if RT_INLINE_ASM_GNU_STYLE
3045 uint8_t u8Ret;
3046 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3047 "setz %1\n\t"
3048 : "=m" (*pu32),
3049 "=qm" (u8Ret),
3050 "=a" (u32Old)
3051 : "r" (u32New),
3052 "2" (u32Old),
3053 "m" (*pu32));
3054 return (bool)u8Ret;
3055
3056# elif RT_INLINE_ASM_USES_INTRIN
3057 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3058
3059# else
3060 uint32_t u32Ret;
3061 __asm
3062 {
3063# ifdef RT_ARCH_AMD64
3064 mov rdx, [pu32]
3065# else
3066 mov edx, [pu32]
3067# endif
3068 mov eax, [u32Old]
3069 mov ecx, [u32New]
3070# ifdef RT_ARCH_AMD64
3071 lock cmpxchg [rdx], ecx
3072# else
3073 lock cmpxchg [edx], ecx
3074# endif
3075 setz al
3076 movzx eax, al
3077 mov [u32Ret], eax
3078 }
3079 return !!u32Ret;
3080# endif
3081}
3082#endif
3083
3084
3085/**
3086 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3087 *
3088 * @returns true if xchg was done.
3089 * @returns false if xchg wasn't done.
3090 *
3091 * @param pi32 Pointer to the value to update.
3092 * @param i32New The new value to assigned to *pi32.
3093 * @param i32Old The old value to *pi32 compare with.
3094 */
3095DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3096{
3097 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3098}
3099
3100
3101/**
3102 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3103 *
3104 * @returns true if xchg was done.
3105 * @returns false if xchg wasn't done.
3106 *
3107 * @param pu64 Pointer to the 64-bit variable to update.
3108 * @param u64New The 64-bit value to assign to *pu64.
3109 * @param u64Old The value to compare with.
3110 */
3111#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3112 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
3113DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3114#else
3115DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3116{
3117# if RT_INLINE_ASM_USES_INTRIN
3118 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3119
3120# elif defined(RT_ARCH_AMD64)
3121# if RT_INLINE_ASM_GNU_STYLE
3122 uint8_t u8Ret;
3123 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3124 "setz %1\n\t"
3125 : "=m" (*pu64),
3126 "=qm" (u8Ret),
3127 "=a" (u64Old)
3128 : "r" (u64New),
3129 "2" (u64Old),
3130 "m" (*pu64));
3131 return (bool)u8Ret;
3132# else
3133 bool fRet;
3134 __asm
3135 {
3136 mov rdx, [pu32]
3137 mov rax, [u64Old]
3138 mov rcx, [u64New]
3139 lock cmpxchg [rdx], rcx
3140 setz al
3141 mov [fRet], al
3142 }
3143 return fRet;
3144# endif
3145# else /* !RT_ARCH_AMD64 */
3146 uint32_t u32Ret;
3147# if RT_INLINE_ASM_GNU_STYLE
3148# if defined(PIC) || defined(__PIC__)
3149 uint32_t u32EBX = (uint32_t)u64New;
3150 uint32_t u32Spill;
3151 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3152 "lock; cmpxchg8b (%6)\n\t"
3153 "setz %%al\n\t"
3154 "movl %4, %%ebx\n\t"
3155 "movzbl %%al, %%eax\n\t"
3156 : "=a" (u32Ret),
3157 "=d" (u32Spill),
3158# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3159 "+m" (*pu64)
3160# else
3161 "=m" (*pu64)
3162# endif
3163 : "A" (u64Old),
3164 "m" ( u32EBX ),
3165 "c" ( (uint32_t)(u64New >> 32) ),
3166 "S" (pu64));
3167# else /* !PIC */
3168 uint32_t u32Spill;
3169 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3170 "setz %%al\n\t"
3171 "movzbl %%al, %%eax\n\t"
3172 : "=a" (u32Ret),
3173 "=d" (u32Spill),
3174 "+m" (*pu64)
3175 : "A" (u64Old),
3176 "b" ( (uint32_t)u64New ),
3177 "c" ( (uint32_t)(u64New >> 32) ));
3178# endif
3179 return (bool)u32Ret;
3180# else
3181 __asm
3182 {
3183 mov ebx, dword ptr [u64New]
3184 mov ecx, dword ptr [u64New + 4]
3185 mov edi, [pu64]
3186 mov eax, dword ptr [u64Old]
3187 mov edx, dword ptr [u64Old + 4]
3188 lock cmpxchg8b [edi]
3189 setz al
3190 movzx eax, al
3191 mov dword ptr [u32Ret], eax
3192 }
3193 return !!u32Ret;
3194# endif
3195# endif /* !RT_ARCH_AMD64 */
3196}
3197#endif
3198
3199
3200/**
3201 * Atomically Compare and exchange a signed 64-bit value, ordered.
3202 *
3203 * @returns true if xchg was done.
3204 * @returns false if xchg wasn't done.
3205 *
3206 * @param pi64 Pointer to the 64-bit variable to update.
3207 * @param i64 The 64-bit value to assign to *pu64.
3208 * @param i64Old The value to compare with.
3209 */
3210DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3211{
3212 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3213}
3214
3215
3216/**
3217 * Atomically Compare and Exchange a pointer value, ordered.
3218 *
3219 * @returns true if xchg was done.
3220 * @returns false if xchg wasn't done.
3221 *
3222 * @param ppv Pointer to the value to update.
3223 * @param pvNew The new value to assigned to *ppv.
3224 * @param pvOld The old value to *ppv compare with.
3225 */
3226DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3227{
3228#if ARCH_BITS == 32
3229 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3230#elif ARCH_BITS == 64
3231 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3232#else
3233# error "ARCH_BITS is bogus"
3234#endif
3235}
3236
3237
3238/** @def ASMAtomicCmpXchgHandle
3239 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3240 *
3241 * @param ph Pointer to the value to update.
3242 * @param hNew The new value to assigned to *pu.
3243 * @param hOld The old value to *pu compare with.
3244 * @param fRc Where to store the result.
3245 *
3246 * @remarks This doesn't currently work for all handles (like RTFILE).
3247 */
3248#if HC_ARCH_BITS == 32
3249# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3250 do { \
3251 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
3252 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3253 } while (0)
3254#elif HC_ARCH_BITS == 64
3255# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3256 do { \
3257 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
3258 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3259 } while (0)
3260#else
3261# error HC_ARCH_BITS
3262#endif
3263
3264
3265/** @def ASMAtomicCmpXchgSize
3266 * Atomically Compare and Exchange a value which size might differ
3267 * between platforms or compilers, ordered.
3268 *
3269 * @param pu Pointer to the value to update.
3270 * @param uNew The new value to assigned to *pu.
3271 * @param uOld The old value to *pu compare with.
3272 * @param fRc Where to store the result.
3273 */
3274#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3275 do { \
3276 switch (sizeof(*(pu))) { \
3277 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3278 break; \
3279 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3280 break; \
3281 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3282 (fRc) = false; \
3283 break; \
3284 } \
3285 } while (0)
3286
3287
3288/**
3289 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3290 * passes back old value, ordered.
3291 *
3292 * @returns true if xchg was done.
3293 * @returns false if xchg wasn't done.
3294 *
3295 * @param pu32 Pointer to the value to update.
3296 * @param u32New The new value to assigned to *pu32.
3297 * @param u32Old The old value to *pu32 compare with.
3298 * @param pu32Old Pointer store the old value at.
3299 */
3300#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3301DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3302#else
3303DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3304{
3305# if RT_INLINE_ASM_GNU_STYLE
3306 uint8_t u8Ret;
3307 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3308 "setz %1\n\t"
3309 : "=m" (*pu32),
3310 "=qm" (u8Ret),
3311 "=a" (*pu32Old)
3312 : "r" (u32New),
3313 "a" (u32Old),
3314 "m" (*pu32));
3315 return (bool)u8Ret;
3316
3317# elif RT_INLINE_ASM_USES_INTRIN
3318 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3319
3320# else
3321 uint32_t u32Ret;
3322 __asm
3323 {
3324# ifdef RT_ARCH_AMD64
3325 mov rdx, [pu32]
3326# else
3327 mov edx, [pu32]
3328# endif
3329 mov eax, [u32Old]
3330 mov ecx, [u32New]
3331# ifdef RT_ARCH_AMD64
3332 lock cmpxchg [rdx], ecx
3333 mov rdx, [pu32Old]
3334 mov [rdx], eax
3335# else
3336 lock cmpxchg [edx], ecx
3337 mov edx, [pu32Old]
3338 mov [edx], eax
3339# endif
3340 setz al
3341 movzx eax, al
3342 mov [u32Ret], eax
3343 }
3344 return !!u32Ret;
3345# endif
3346}
3347#endif
3348
3349
3350/**
3351 * Atomically Compare and Exchange a signed 32-bit value, additionally
3352 * passes back old value, ordered.
3353 *
3354 * @returns true if xchg was done.
3355 * @returns false if xchg wasn't done.
3356 *
3357 * @param pi32 Pointer to the value to update.
3358 * @param i32New The new value to assigned to *pi32.
3359 * @param i32Old The old value to *pi32 compare with.
3360 * @param pi32Old Pointer store the old value at.
3361 */
3362DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3363{
3364 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3365}
3366
3367
3368/**
3369 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3370 * passing back old value, ordered.
3371 *
3372 * @returns true if xchg was done.
3373 * @returns false if xchg wasn't done.
3374 *
3375 * @param pu64 Pointer to the 64-bit variable to update.
3376 * @param u64New The 64-bit value to assign to *pu64.
3377 * @param u64Old The value to compare with.
3378 * @param pu64Old Pointer store the old value at.
3379 */
3380#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3381DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3382#else
3383DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3384{
3385# if RT_INLINE_ASM_USES_INTRIN
3386 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3387
3388# elif defined(RT_ARCH_AMD64)
3389# if RT_INLINE_ASM_GNU_STYLE
3390 uint8_t u8Ret;
3391 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3392 "setz %1\n\t"
3393 : "=m" (*pu64),
3394 "=qm" (u8Ret),
3395 "=a" (*pu64Old)
3396 : "r" (u64New),
3397 "a" (u64Old),
3398 "m" (*pu64));
3399 return (bool)u8Ret;
3400# else
3401 bool fRet;
3402 __asm
3403 {
3404 mov rdx, [pu32]
3405 mov rax, [u64Old]
3406 mov rcx, [u64New]
3407 lock cmpxchg [rdx], rcx
3408 mov rdx, [pu64Old]
3409 mov [rdx], rax
3410 setz al
3411 mov [fRet], al
3412 }
3413 return fRet;
3414# endif
3415# else /* !RT_ARCH_AMD64 */
3416# if RT_INLINE_ASM_GNU_STYLE
3417 uint64_t u64Ret;
3418# if defined(PIC) || defined(__PIC__)
3419 /* NB: this code uses a memory clobber description, because the clean
3420 * solution with an output value for *pu64 makes gcc run out of registers.
3421 * This will cause suboptimal code, and anyone with a better solution is
3422 * welcome to improve this. */
3423 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3424 "lock; cmpxchg8b %3\n\t"
3425 "xchgl %%ebx, %1\n\t"
3426 : "=A" (u64Ret)
3427 : "DS" ((uint32_t)u64New),
3428 "c" ((uint32_t)(u64New >> 32)),
3429 "m" (*pu64),
3430 "0" (u64Old)
3431 : "memory" );
3432# else /* !PIC */
3433 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3434 : "=A" (u64Ret),
3435 "=m" (*pu64)
3436 : "b" ((uint32_t)u64New),
3437 "c" ((uint32_t)(u64New >> 32)),
3438 "m" (*pu64),
3439 "0" (u64Old));
3440# endif
3441 *pu64Old = u64Ret;
3442 return u64Ret == u64Old;
3443# else
3444 uint32_t u32Ret;
3445 __asm
3446 {
3447 mov ebx, dword ptr [u64New]
3448 mov ecx, dword ptr [u64New + 4]
3449 mov edi, [pu64]
3450 mov eax, dword ptr [u64Old]
3451 mov edx, dword ptr [u64Old + 4]
3452 lock cmpxchg8b [edi]
3453 mov ebx, [pu64Old]
3454 mov [ebx], eax
3455 setz al
3456 movzx eax, al
3457 add ebx, 4
3458 mov [ebx], edx
3459 mov dword ptr [u32Ret], eax
3460 }
3461 return !!u32Ret;
3462# endif
3463# endif /* !RT_ARCH_AMD64 */
3464}
3465#endif
3466
3467
3468/**
3469 * Atomically Compare and exchange a signed 64-bit value, additionally
3470 * passing back old value, ordered.
3471 *
3472 * @returns true if xchg was done.
3473 * @returns false if xchg wasn't done.
3474 *
3475 * @param pi64 Pointer to the 64-bit variable to update.
3476 * @param i64 The 64-bit value to assign to *pu64.
3477 * @param i64Old The value to compare with.
3478 * @param pi64Old Pointer store the old value at.
3479 */
3480DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3481{
3482 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3483}
3484
3485/** @def ASMAtomicCmpXchgExHandle
3486 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3487 *
3488 * @param ph Pointer to the value to update.
3489 * @param hNew The new value to assigned to *pu.
3490 * @param hOld The old value to *pu compare with.
3491 * @param fRc Where to store the result.
3492 * @param phOldVal Pointer to where to store the old value.
3493 *
3494 * @remarks This doesn't currently work for all handles (like RTFILE).
3495 */
3496#if HC_ARCH_BITS == 32
3497# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3498 do { \
3499 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3500 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
3501 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
3502 } while (0)
3503#elif HC_ARCH_BITS == 64
3504# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3505 do { \
3506 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3507 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3508 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
3509 } while (0)
3510#else
3511# error HC_ARCH_BITS
3512#endif
3513
3514
3515/** @def ASMAtomicCmpXchgExSize
3516 * Atomically Compare and Exchange a value which size might differ
3517 * between platforms or compilers. Additionally passes back old value.
3518 *
3519 * @param pu Pointer to the value to update.
3520 * @param uNew The new value to assigned to *pu.
3521 * @param uOld The old value to *pu compare with.
3522 * @param fRc Where to store the result.
3523 * @param puOldVal Pointer to where to store the old value.
3524 */
3525#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3526 do { \
3527 switch (sizeof(*(pu))) { \
3528 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3529 break; \
3530 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3531 break; \
3532 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3533 (fRc) = false; \
3534 (uOldVal) = 0; \
3535 break; \
3536 } \
3537 } while (0)
3538
3539
3540/**
3541 * Atomically Compare and Exchange a pointer value, additionally
3542 * passing back old value, ordered.
3543 *
3544 * @returns true if xchg was done.
3545 * @returns false if xchg wasn't done.
3546 *
3547 * @param ppv Pointer to the value to update.
3548 * @param pvNew The new value to assigned to *ppv.
3549 * @param pvOld The old value to *ppv compare with.
3550 * @param ppvOld Pointer store the old value at.
3551 */
3552DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3553{
3554#if ARCH_BITS == 32
3555 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3556#elif ARCH_BITS == 64
3557 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3558#else
3559# error "ARCH_BITS is bogus"
3560#endif
3561}
3562
3563
3564/**
3565 * Atomically exchanges and adds to a 32-bit value, ordered.
3566 *
3567 * @returns The old value.
3568 * @param pu32 Pointer to the value.
3569 * @param u32 Number to add.
3570 */
3571#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3572DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3573#else
3574DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3575{
3576# if RT_INLINE_ASM_USES_INTRIN
3577 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3578 return u32;
3579
3580# elif RT_INLINE_ASM_GNU_STYLE
3581 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3582 : "=r" (u32),
3583 "=m" (*pu32)
3584 : "0" (u32),
3585 "m" (*pu32)
3586 : "memory");
3587 return u32;
3588# else
3589 __asm
3590 {
3591 mov eax, [u32]
3592# ifdef RT_ARCH_AMD64
3593 mov rdx, [pu32]
3594 lock xadd [rdx], eax
3595# else
3596 mov edx, [pu32]
3597 lock xadd [edx], eax
3598# endif
3599 mov [u32], eax
3600 }
3601 return u32;
3602# endif
3603}
3604#endif
3605
3606
3607/**
3608 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3609 *
3610 * @returns The old value.
3611 * @param pi32 Pointer to the value.
3612 * @param i32 Number to add.
3613 */
3614DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3615{
3616 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3617}
3618
3619
3620/**
3621 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3622 *
3623 * @returns The old value.
3624 * @param pu32 Pointer to the value.
3625 * @param u32 Number to subtract.
3626 */
3627DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3628{
3629 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3630}
3631
3632
3633/**
3634 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3635 *
3636 * @returns The old value.
3637 * @param pi32 Pointer to the value.
3638 * @param i32 Number to subtract.
3639 */
3640DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3641{
3642 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3643}
3644
3645
3646/**
3647 * Atomically increment a 32-bit value, ordered.
3648 *
3649 * @returns The new value.
3650 * @param pu32 Pointer to the value to increment.
3651 */
3652#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3653DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3654#else
3655DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3656{
3657 uint32_t u32;
3658# if RT_INLINE_ASM_USES_INTRIN
3659 u32 = _InterlockedIncrement((long *)pu32);
3660 return u32;
3661
3662# elif RT_INLINE_ASM_GNU_STYLE
3663 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3664 : "=r" (u32),
3665 "=m" (*pu32)
3666 : "0" (1),
3667 "m" (*pu32)
3668 : "memory");
3669 return u32+1;
3670# else
3671 __asm
3672 {
3673 mov eax, 1
3674# ifdef RT_ARCH_AMD64
3675 mov rdx, [pu32]
3676 lock xadd [rdx], eax
3677# else
3678 mov edx, [pu32]
3679 lock xadd [edx], eax
3680# endif
3681 mov u32, eax
3682 }
3683 return u32+1;
3684# endif
3685}
3686#endif
3687
3688
3689/**
3690 * Atomically increment a signed 32-bit value, ordered.
3691 *
3692 * @returns The new value.
3693 * @param pi32 Pointer to the value to increment.
3694 */
3695DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3696{
3697 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3698}
3699
3700
3701/**
3702 * Atomically decrement an unsigned 32-bit value, ordered.
3703 *
3704 * @returns The new value.
3705 * @param pu32 Pointer to the value to decrement.
3706 */
3707#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3708DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3709#else
3710DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3711{
3712 uint32_t u32;
3713# if RT_INLINE_ASM_USES_INTRIN
3714 u32 = _InterlockedDecrement((long *)pu32);
3715 return u32;
3716
3717# elif RT_INLINE_ASM_GNU_STYLE
3718 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3719 : "=r" (u32),
3720 "=m" (*pu32)
3721 : "0" (-1),
3722 "m" (*pu32)
3723 : "memory");
3724 return u32-1;
3725# else
3726 __asm
3727 {
3728 mov eax, -1
3729# ifdef RT_ARCH_AMD64
3730 mov rdx, [pu32]
3731 lock xadd [rdx], eax
3732# else
3733 mov edx, [pu32]
3734 lock xadd [edx], eax
3735# endif
3736 mov u32, eax
3737 }
3738 return u32-1;
3739# endif
3740}
3741#endif
3742
3743
3744/**
3745 * Atomically decrement a signed 32-bit value, ordered.
3746 *
3747 * @returns The new value.
3748 * @param pi32 Pointer to the value to decrement.
3749 */
3750DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3751{
3752 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3753}
3754
3755
3756/**
3757 * Atomically Or an unsigned 32-bit value, ordered.
3758 *
3759 * @param pu32 Pointer to the pointer variable to OR u32 with.
3760 * @param u32 The value to OR *pu32 with.
3761 */
3762#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3763DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3764#else
3765DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3766{
3767# if RT_INLINE_ASM_USES_INTRIN
3768 _InterlockedOr((long volatile *)pu32, (long)u32);
3769
3770# elif RT_INLINE_ASM_GNU_STYLE
3771 __asm__ __volatile__("lock; orl %1, %0\n\t"
3772 : "=m" (*pu32)
3773 : "ir" (u32),
3774 "m" (*pu32));
3775# else
3776 __asm
3777 {
3778 mov eax, [u32]
3779# ifdef RT_ARCH_AMD64
3780 mov rdx, [pu32]
3781 lock or [rdx], eax
3782# else
3783 mov edx, [pu32]
3784 lock or [edx], eax
3785# endif
3786 }
3787# endif
3788}
3789#endif
3790
3791
3792/**
3793 * Atomically Or a signed 32-bit value, ordered.
3794 *
3795 * @param pi32 Pointer to the pointer variable to OR u32 with.
3796 * @param i32 The value to OR *pu32 with.
3797 */
3798DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3799{
3800 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3801}
3802
3803
3804/**
3805 * Atomically And an unsigned 32-bit value, ordered.
3806 *
3807 * @param pu32 Pointer to the pointer variable to AND u32 with.
3808 * @param u32 The value to AND *pu32 with.
3809 */
3810#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3811DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3812#else
3813DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3814{
3815# if RT_INLINE_ASM_USES_INTRIN
3816 _InterlockedAnd((long volatile *)pu32, u32);
3817
3818# elif RT_INLINE_ASM_GNU_STYLE
3819 __asm__ __volatile__("lock; andl %1, %0\n\t"
3820 : "=m" (*pu32)
3821 : "ir" (u32),
3822 "m" (*pu32));
3823# else
3824 __asm
3825 {
3826 mov eax, [u32]
3827# ifdef RT_ARCH_AMD64
3828 mov rdx, [pu32]
3829 lock and [rdx], eax
3830# else
3831 mov edx, [pu32]
3832 lock and [edx], eax
3833# endif
3834 }
3835# endif
3836}
3837#endif
3838
3839
3840/**
3841 * Atomically And a signed 32-bit value, ordered.
3842 *
3843 * @param pi32 Pointer to the pointer variable to AND i32 with.
3844 * @param i32 The value to AND *pi32 with.
3845 */
3846DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3847{
3848 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3849}
3850
3851
3852/**
3853 * Serialize Instruction.
3854 */
3855#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3856DECLASM(void) ASMSerializeInstruction(void);
3857#else
3858DECLINLINE(void) ASMSerializeInstruction(void)
3859{
3860# if RT_INLINE_ASM_GNU_STYLE
3861 RTCCUINTREG xAX = 0;
3862# ifdef RT_ARCH_AMD64
3863 __asm__ ("cpuid"
3864 : "=a" (xAX)
3865 : "0" (xAX)
3866 : "rbx", "rcx", "rdx");
3867# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
3868 __asm__ ("push %%ebx\n\t"
3869 "cpuid\n\t"
3870 "pop %%ebx\n\t"
3871 : "=a" (xAX)
3872 : "0" (xAX)
3873 : "ecx", "edx");
3874# else
3875 __asm__ ("cpuid"
3876 : "=a" (xAX)
3877 : "0" (xAX)
3878 : "ebx", "ecx", "edx");
3879# endif
3880
3881# elif RT_INLINE_ASM_USES_INTRIN
3882 int aInfo[4];
3883 __cpuid(aInfo, 0);
3884
3885# else
3886 __asm
3887 {
3888 push ebx
3889 xor eax, eax
3890 cpuid
3891 pop ebx
3892 }
3893# endif
3894}
3895#endif
3896
3897
3898/**
3899 * Memory fence, waits for any pending writes and reads to complete.
3900 */
3901DECLINLINE(void) ASMMemoryFence(void)
3902{
3903 /** @todo use mfence? check if all cpus we care for support it. */
3904 uint32_t volatile u32;
3905 ASMAtomicXchgU32(&u32, 0);
3906}
3907
3908
3909/**
3910 * Write fence, waits for any pending writes to complete.
3911 */
3912DECLINLINE(void) ASMWriteFence(void)
3913{
3914 /** @todo use sfence? check if all cpus we care for support it. */
3915 ASMMemoryFence();
3916}
3917
3918
3919/**
3920 * Read fence, waits for any pending reads to complete.
3921 */
3922DECLINLINE(void) ASMReadFence(void)
3923{
3924 /** @todo use lfence? check if all cpus we care for support it. */
3925 ASMMemoryFence();
3926}
3927
3928
3929/**
3930 * Atomically reads an unsigned 8-bit value, ordered.
3931 *
3932 * @returns Current *pu8 value
3933 * @param pu8 Pointer to the 8-bit variable to read.
3934 */
3935DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3936{
3937 ASMMemoryFence();
3938 return *pu8; /* byte reads are atomic on x86 */
3939}
3940
3941
3942/**
3943 * Atomically reads an unsigned 8-bit value, unordered.
3944 *
3945 * @returns Current *pu8 value
3946 * @param pu8 Pointer to the 8-bit variable to read.
3947 */
3948DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3949{
3950 return *pu8; /* byte reads are atomic on x86 */
3951}
3952
3953
3954/**
3955 * Atomically reads a signed 8-bit value, ordered.
3956 *
3957 * @returns Current *pi8 value
3958 * @param pi8 Pointer to the 8-bit variable to read.
3959 */
3960DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3961{
3962 ASMMemoryFence();
3963 return *pi8; /* byte reads are atomic on x86 */
3964}
3965
3966
3967/**
3968 * Atomically reads a signed 8-bit value, unordered.
3969 *
3970 * @returns Current *pi8 value
3971 * @param pi8 Pointer to the 8-bit variable to read.
3972 */
3973DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3974{
3975 return *pi8; /* byte reads are atomic on x86 */
3976}
3977
3978
3979/**
3980 * Atomically reads an unsigned 16-bit value, ordered.
3981 *
3982 * @returns Current *pu16 value
3983 * @param pu16 Pointer to the 16-bit variable to read.
3984 */
3985DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3986{
3987 ASMMemoryFence();
3988 Assert(!((uintptr_t)pu16 & 1));
3989 return *pu16;
3990}
3991
3992
3993/**
3994 * Atomically reads an unsigned 16-bit value, unordered.
3995 *
3996 * @returns Current *pu16 value
3997 * @param pu16 Pointer to the 16-bit variable to read.
3998 */
3999DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
4000{
4001 Assert(!((uintptr_t)pu16 & 1));
4002 return *pu16;
4003}
4004
4005
4006/**
4007 * Atomically reads a signed 16-bit value, ordered.
4008 *
4009 * @returns Current *pi16 value
4010 * @param pi16 Pointer to the 16-bit variable to read.
4011 */
4012DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
4013{
4014 ASMMemoryFence();
4015 Assert(!((uintptr_t)pi16 & 1));
4016 return *pi16;
4017}
4018
4019
4020/**
4021 * Atomically reads a signed 16-bit value, unordered.
4022 *
4023 * @returns Current *pi16 value
4024 * @param pi16 Pointer to the 16-bit variable to read.
4025 */
4026DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
4027{
4028 Assert(!((uintptr_t)pi16 & 1));
4029 return *pi16;
4030}
4031
4032
4033/**
4034 * Atomically reads an unsigned 32-bit value, ordered.
4035 *
4036 * @returns Current *pu32 value
4037 * @param pu32 Pointer to the 32-bit variable to read.
4038 */
4039DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
4040{
4041 ASMMemoryFence();
4042 Assert(!((uintptr_t)pu32 & 3));
4043 return *pu32;
4044}
4045
4046
4047/**
4048 * Atomically reads an unsigned 32-bit value, unordered.
4049 *
4050 * @returns Current *pu32 value
4051 * @param pu32 Pointer to the 32-bit variable to read.
4052 */
4053DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
4054{
4055 Assert(!((uintptr_t)pu32 & 3));
4056 return *pu32;
4057}
4058
4059
4060/**
4061 * Atomically reads a signed 32-bit value, ordered.
4062 *
4063 * @returns Current *pi32 value
4064 * @param pi32 Pointer to the 32-bit variable to read.
4065 */
4066DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
4067{
4068 ASMMemoryFence();
4069 Assert(!((uintptr_t)pi32 & 3));
4070 return *pi32;
4071}
4072
4073
4074/**
4075 * Atomically reads a signed 32-bit value, unordered.
4076 *
4077 * @returns Current *pi32 value
4078 * @param pi32 Pointer to the 32-bit variable to read.
4079 */
4080DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4081{
4082 Assert(!((uintptr_t)pi32 & 3));
4083 return *pi32;
4084}
4085
4086
4087/**
4088 * Atomically reads an unsigned 64-bit value, ordered.
4089 *
4090 * @returns Current *pu64 value
4091 * @param pu64 Pointer to the 64-bit variable to read.
4092 * The memory pointed to must be writable.
4093 * @remark This will fault if the memory is read-only!
4094 */
4095#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
4096 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
4097DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4098#else
4099DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4100{
4101 uint64_t u64;
4102# ifdef RT_ARCH_AMD64
4103 Assert(!((uintptr_t)pu64 & 7));
4104/*# if RT_INLINE_ASM_GNU_STYLE
4105 __asm__ __volatile__( "mfence\n\t"
4106 "movq %1, %0\n\t"
4107 : "=r" (u64)
4108 : "m" (*pu64));
4109# else
4110 __asm
4111 {
4112 mfence
4113 mov rdx, [pu64]
4114 mov rax, [rdx]
4115 mov [u64], rax
4116 }
4117# endif*/
4118 ASMMemoryFence();
4119 u64 = *pu64;
4120# else /* !RT_ARCH_AMD64 */
4121# if RT_INLINE_ASM_GNU_STYLE
4122# if defined(PIC) || defined(__PIC__)
4123 uint32_t u32EBX = 0;
4124 Assert(!((uintptr_t)pu64 & 7));
4125 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4126 "lock; cmpxchg8b (%5)\n\t"
4127 "movl %3, %%ebx\n\t"
4128 : "=A" (u64),
4129# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4130 "+m" (*pu64)
4131# else
4132 "=m" (*pu64)
4133# endif
4134 : "0" (0),
4135 "m" (u32EBX),
4136 "c" (0),
4137 "S" (pu64));
4138# else /* !PIC */
4139 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4140 : "=A" (u64),
4141 "+m" (*pu64)
4142 : "0" (0),
4143 "b" (0),
4144 "c" (0));
4145# endif
4146# else
4147 Assert(!((uintptr_t)pu64 & 7));
4148 __asm
4149 {
4150 xor eax, eax
4151 xor edx, edx
4152 mov edi, pu64
4153 xor ecx, ecx
4154 xor ebx, ebx
4155 lock cmpxchg8b [edi]
4156 mov dword ptr [u64], eax
4157 mov dword ptr [u64 + 4], edx
4158 }
4159# endif
4160# endif /* !RT_ARCH_AMD64 */
4161 return u64;
4162}
4163#endif
4164
4165
4166/**
4167 * Atomically reads an unsigned 64-bit value, unordered.
4168 *
4169 * @returns Current *pu64 value
4170 * @param pu64 Pointer to the 64-bit variable to read.
4171 * The memory pointed to must be writable.
4172 * @remark This will fault if the memory is read-only!
4173 */
4174#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4175DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4176#else
4177DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4178{
4179 uint64_t u64;
4180# ifdef RT_ARCH_AMD64
4181 Assert(!((uintptr_t)pu64 & 7));
4182/*# if RT_INLINE_ASM_GNU_STYLE
4183 Assert(!((uintptr_t)pu64 & 7));
4184 __asm__ __volatile__("movq %1, %0\n\t"
4185 : "=r" (u64)
4186 : "m" (*pu64));
4187# else
4188 __asm
4189 {
4190 mov rdx, [pu64]
4191 mov rax, [rdx]
4192 mov [u64], rax
4193 }
4194# endif */
4195 u64 = *pu64;
4196# else /* !RT_ARCH_AMD64 */
4197# if RT_INLINE_ASM_GNU_STYLE
4198# if defined(PIC) || defined(__PIC__)
4199 uint32_t u32EBX = 0;
4200 uint32_t u32Spill;
4201 Assert(!((uintptr_t)pu64 & 7));
4202 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4203 "xor %%ecx,%%ecx\n\t"
4204 "xor %%edx,%%edx\n\t"
4205 "xchgl %%ebx, %3\n\t"
4206 "lock; cmpxchg8b (%4)\n\t"
4207 "movl %3, %%ebx\n\t"
4208 : "=A" (u64),
4209# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4210 "+m" (*pu64),
4211# else
4212 "=m" (*pu64),
4213# endif
4214 "=c" (u32Spill)
4215 : "m" (u32EBX),
4216 "S" (pu64));
4217# else /* !PIC */
4218 __asm__ __volatile__("cmpxchg8b %1\n\t"
4219 : "=A" (u64),
4220 "+m" (*pu64)
4221 : "0" (0),
4222 "b" (0),
4223 "c" (0));
4224# endif
4225# else
4226 Assert(!((uintptr_t)pu64 & 7));
4227 __asm
4228 {
4229 xor eax, eax
4230 xor edx, edx
4231 mov edi, pu64
4232 xor ecx, ecx
4233 xor ebx, ebx
4234 lock cmpxchg8b [edi]
4235 mov dword ptr [u64], eax
4236 mov dword ptr [u64 + 4], edx
4237 }
4238# endif
4239# endif /* !RT_ARCH_AMD64 */
4240 return u64;
4241}
4242#endif
4243
4244
4245/**
4246 * Atomically reads a signed 64-bit value, ordered.
4247 *
4248 * @returns Current *pi64 value
4249 * @param pi64 Pointer to the 64-bit variable to read.
4250 * The memory pointed to must be writable.
4251 * @remark This will fault if the memory is read-only!
4252 */
4253DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4254{
4255 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4256}
4257
4258
4259/**
4260 * Atomically reads a signed 64-bit value, unordered.
4261 *
4262 * @returns Current *pi64 value
4263 * @param pi64 Pointer to the 64-bit variable to read.
4264 * The memory pointed to must be writable.
4265 * @remark This will fault if the memory is read-only!
4266 */
4267DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4268{
4269 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4270}
4271
4272
4273/**
4274 * Atomically reads a pointer value, ordered.
4275 *
4276 * @returns Current *pv value
4277 * @param ppv Pointer to the pointer variable to read.
4278 */
4279DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4280{
4281#if ARCH_BITS == 32
4282 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4283#elif ARCH_BITS == 64
4284 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4285#else
4286# error "ARCH_BITS is bogus"
4287#endif
4288}
4289
4290
4291/**
4292 * Atomically reads a pointer value, unordered.
4293 *
4294 * @returns Current *pv value
4295 * @param ppv Pointer to the pointer variable to read.
4296 */
4297DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4298{
4299#if ARCH_BITS == 32
4300 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4301#elif ARCH_BITS == 64
4302 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4303#else
4304# error "ARCH_BITS is bogus"
4305#endif
4306}
4307
4308
4309/**
4310 * Atomically reads a boolean value, ordered.
4311 *
4312 * @returns Current *pf value
4313 * @param pf Pointer to the boolean variable to read.
4314 */
4315DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4316{
4317 ASMMemoryFence();
4318 return *pf; /* byte reads are atomic on x86 */
4319}
4320
4321
4322/**
4323 * Atomically reads a boolean value, unordered.
4324 *
4325 * @returns Current *pf value
4326 * @param pf Pointer to the boolean variable to read.
4327 */
4328DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4329{
4330 return *pf; /* byte reads are atomic on x86 */
4331}
4332
4333
4334/**
4335 * Atomically read a typical IPRT handle value, ordered.
4336 *
4337 * @param ph Pointer to the handle variable to read.
4338 * @param phRes Where to store the result.
4339 *
4340 * @remarks This doesn't currently work for all handles (like RTFILE).
4341 */
4342#if HC_ARCH_BITS == 32
4343# define ASMAtomicReadHandle(ph, phRes) \
4344 do { \
4345 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
4346 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4347 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4348 } while (0)
4349#elif HC_ARCH_BITS == 64
4350# define ASMAtomicReadHandle(ph, phRes) \
4351 do { \
4352 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
4353 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4354 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4355 } while (0)
4356#else
4357# error HC_ARCH_BITS
4358#endif
4359
4360
4361/**
4362 * Atomically read a typical IPRT handle value, unordered.
4363 *
4364 * @param ph Pointer to the handle variable to read.
4365 * @param phRes Where to store the result.
4366 *
4367 * @remarks This doesn't currently work for all handles (like RTFILE).
4368 */
4369#if HC_ARCH_BITS == 32
4370# define ASMAtomicUoReadHandle(ph, phRes) \
4371 do { \
4372 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
4373 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4374 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4375 } while (0)
4376#elif HC_ARCH_BITS == 64
4377# define ASMAtomicUoReadHandle(ph, phRes) \
4378 do { \
4379 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
4380 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4381 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4382 } while (0)
4383#else
4384# error HC_ARCH_BITS
4385#endif
4386
4387
4388/**
4389 * Atomically read a value which size might differ
4390 * between platforms or compilers, ordered.
4391 *
4392 * @param pu Pointer to the variable to update.
4393 * @param puRes Where to store the result.
4394 */
4395#define ASMAtomicReadSize(pu, puRes) \
4396 do { \
4397 switch (sizeof(*(pu))) { \
4398 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4399 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4400 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4401 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4402 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4403 } \
4404 } while (0)
4405
4406
4407/**
4408 * Atomically read a value which size might differ
4409 * between platforms or compilers, unordered.
4410 *
4411 * @param pu Pointer to the variable to read.
4412 * @param puRes Where to store the result.
4413 */
4414#define ASMAtomicUoReadSize(pu, puRes) \
4415 do { \
4416 switch (sizeof(*(pu))) { \
4417 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4418 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4419 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4420 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4421 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4422 } \
4423 } while (0)
4424
4425
4426/**
4427 * Atomically writes an unsigned 8-bit value, ordered.
4428 *
4429 * @param pu8 Pointer to the 8-bit variable.
4430 * @param u8 The 8-bit value to assign to *pu8.
4431 */
4432DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4433{
4434 ASMAtomicXchgU8(pu8, u8);
4435}
4436
4437
4438/**
4439 * Atomically writes an unsigned 8-bit value, unordered.
4440 *
4441 * @param pu8 Pointer to the 8-bit variable.
4442 * @param u8 The 8-bit value to assign to *pu8.
4443 */
4444DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4445{
4446 *pu8 = u8; /* byte writes are atomic on x86 */
4447}
4448
4449
4450/**
4451 * Atomically writes a signed 8-bit value, ordered.
4452 *
4453 * @param pi8 Pointer to the 8-bit variable to read.
4454 * @param i8 The 8-bit value to assign to *pi8.
4455 */
4456DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4457{
4458 ASMAtomicXchgS8(pi8, i8);
4459}
4460
4461
4462/**
4463 * Atomically writes a signed 8-bit value, unordered.
4464 *
4465 * @param pi8 Pointer to the 8-bit variable to read.
4466 * @param i8 The 8-bit value to assign to *pi8.
4467 */
4468DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4469{
4470 *pi8 = i8; /* byte writes are atomic on x86 */
4471}
4472
4473
4474/**
4475 * Atomically writes an unsigned 16-bit value, ordered.
4476 *
4477 * @param pu16 Pointer to the 16-bit variable.
4478 * @param u16 The 16-bit value to assign to *pu16.
4479 */
4480DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4481{
4482 ASMAtomicXchgU16(pu16, u16);
4483}
4484
4485
4486/**
4487 * Atomically writes an unsigned 16-bit value, unordered.
4488 *
4489 * @param pu16 Pointer to the 16-bit variable.
4490 * @param u16 The 16-bit value to assign to *pu16.
4491 */
4492DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4493{
4494 Assert(!((uintptr_t)pu16 & 1));
4495 *pu16 = u16;
4496}
4497
4498
4499/**
4500 * Atomically writes a signed 16-bit value, ordered.
4501 *
4502 * @param pi16 Pointer to the 16-bit variable to read.
4503 * @param i16 The 16-bit value to assign to *pi16.
4504 */
4505DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4506{
4507 ASMAtomicXchgS16(pi16, i16);
4508}
4509
4510
4511/**
4512 * Atomically writes a signed 16-bit value, unordered.
4513 *
4514 * @param pi16 Pointer to the 16-bit variable to read.
4515 * @param i16 The 16-bit value to assign to *pi16.
4516 */
4517DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4518{
4519 Assert(!((uintptr_t)pi16 & 1));
4520 *pi16 = i16;
4521}
4522
4523
4524/**
4525 * Atomically writes an unsigned 32-bit value, ordered.
4526 *
4527 * @param pu32 Pointer to the 32-bit variable.
4528 * @param u32 The 32-bit value to assign to *pu32.
4529 */
4530DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4531{
4532 ASMAtomicXchgU32(pu32, u32);
4533}
4534
4535
4536/**
4537 * Atomically writes an unsigned 32-bit value, unordered.
4538 *
4539 * @param pu32 Pointer to the 32-bit variable.
4540 * @param u32 The 32-bit value to assign to *pu32.
4541 */
4542DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4543{
4544 Assert(!((uintptr_t)pu32 & 3));
4545 *pu32 = u32;
4546}
4547
4548
4549/**
4550 * Atomically writes a signed 32-bit value, ordered.
4551 *
4552 * @param pi32 Pointer to the 32-bit variable to read.
4553 * @param i32 The 32-bit value to assign to *pi32.
4554 */
4555DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4556{
4557 ASMAtomicXchgS32(pi32, i32);
4558}
4559
4560
4561/**
4562 * Atomically writes a signed 32-bit value, unordered.
4563 *
4564 * @param pi32 Pointer to the 32-bit variable to read.
4565 * @param i32 The 32-bit value to assign to *pi32.
4566 */
4567DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4568{
4569 Assert(!((uintptr_t)pi32 & 3));
4570 *pi32 = i32;
4571}
4572
4573
4574/**
4575 * Atomically writes an unsigned 64-bit value, ordered.
4576 *
4577 * @param pu64 Pointer to the 64-bit variable.
4578 * @param u64 The 64-bit value to assign to *pu64.
4579 */
4580DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4581{
4582 ASMAtomicXchgU64(pu64, u64);
4583}
4584
4585
4586/**
4587 * Atomically writes an unsigned 64-bit value, unordered.
4588 *
4589 * @param pu64 Pointer to the 64-bit variable.
4590 * @param u64 The 64-bit value to assign to *pu64.
4591 */
4592DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4593{
4594 Assert(!((uintptr_t)pu64 & 7));
4595#if ARCH_BITS == 64
4596 *pu64 = u64;
4597#else
4598 ASMAtomicXchgU64(pu64, u64);
4599#endif
4600}
4601
4602
4603/**
4604 * Atomically writes a signed 64-bit value, ordered.
4605 *
4606 * @param pi64 Pointer to the 64-bit variable.
4607 * @param i64 The 64-bit value to assign to *pi64.
4608 */
4609DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4610{
4611 ASMAtomicXchgS64(pi64, i64);
4612}
4613
4614
4615/**
4616 * Atomically writes a signed 64-bit value, unordered.
4617 *
4618 * @param pi64 Pointer to the 64-bit variable.
4619 * @param i64 The 64-bit value to assign to *pi64.
4620 */
4621DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4622{
4623 Assert(!((uintptr_t)pi64 & 7));
4624#if ARCH_BITS == 64
4625 *pi64 = i64;
4626#else
4627 ASMAtomicXchgS64(pi64, i64);
4628#endif
4629}
4630
4631
4632/**
4633 * Atomically writes a boolean value, unordered.
4634 *
4635 * @param pf Pointer to the boolean variable.
4636 * @param f The boolean value to assign to *pf.
4637 */
4638DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4639{
4640 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4641}
4642
4643
4644/**
4645 * Atomically writes a boolean value, unordered.
4646 *
4647 * @param pf Pointer to the boolean variable.
4648 * @param f The boolean value to assign to *pf.
4649 */
4650DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4651{
4652 *pf = f; /* byte writes are atomic on x86 */
4653}
4654
4655
4656/**
4657 * Atomically writes a pointer value, ordered.
4658 *
4659 * @returns Current *pv value
4660 * @param ppv Pointer to the pointer variable.
4661 * @param pv The pointer value to assigne to *ppv.
4662 */
4663DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4664{
4665#if ARCH_BITS == 32
4666 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4667#elif ARCH_BITS == 64
4668 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4669#else
4670# error "ARCH_BITS is bogus"
4671#endif
4672}
4673
4674
4675/**
4676 * Atomically writes a pointer value, unordered.
4677 *
4678 * @returns Current *pv value
4679 * @param ppv Pointer to the pointer variable.
4680 * @param pv The pointer value to assigne to *ppv.
4681 */
4682DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4683{
4684#if ARCH_BITS == 32
4685 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4686#elif ARCH_BITS == 64
4687 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4688#else
4689# error "ARCH_BITS is bogus"
4690#endif
4691}
4692
4693
4694/**
4695 * Atomically write a typical IPRT handle value, ordered.
4696 *
4697 * @param ph Pointer to the variable to update.
4698 * @param hNew The value to assign to *ph.
4699 *
4700 * @remarks This doesn't currently work for all handles (like RTFILE).
4701 */
4702#if HC_ARCH_BITS == 32
4703# define ASMAtomicWriteHandle(ph, hNew) \
4704 do { \
4705 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
4706 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4707 } while (0)
4708#elif HC_ARCH_BITS == 64
4709# define ASMAtomicWriteHandle(ph, hNew) \
4710 do { \
4711 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
4712 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4713 } while (0)
4714#else
4715# error HC_ARCH_BITS
4716#endif
4717
4718
4719/**
4720 * Atomically write a typical IPRT handle value, unordered.
4721 *
4722 * @param ph Pointer to the variable to update.
4723 * @param hNew The value to assign to *ph.
4724 *
4725 * @remarks This doesn't currently work for all handles (like RTFILE).
4726 */
4727#if HC_ARCH_BITS == 32
4728# define ASMAtomicUoWriteHandle(ph, hNew) \
4729 do { \
4730 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
4731 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4732 } while (0)
4733#elif HC_ARCH_BITS == 64
4734# define ASMAtomicUoWriteHandle(ph, hNew) \
4735 do { \
4736 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
4737 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4738 } while (0)
4739#else
4740# error HC_ARCH_BITS
4741#endif
4742
4743
4744/**
4745 * Atomically write a value which size might differ
4746 * between platforms or compilers, ordered.
4747 *
4748 * @param pu Pointer to the variable to update.
4749 * @param uNew The value to assign to *pu.
4750 */
4751#define ASMAtomicWriteSize(pu, uNew) \
4752 do { \
4753 switch (sizeof(*(pu))) { \
4754 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4755 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4756 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4757 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4758 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4759 } \
4760 } while (0)
4761
4762/**
4763 * Atomically write a value which size might differ
4764 * between platforms or compilers, unordered.
4765 *
4766 * @param pu Pointer to the variable to update.
4767 * @param uNew The value to assign to *pu.
4768 */
4769#define ASMAtomicUoWriteSize(pu, uNew) \
4770 do { \
4771 switch (sizeof(*(pu))) { \
4772 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4773 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4774 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4775 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4776 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4777 } \
4778 } while (0)
4779
4780
4781
4782
4783/**
4784 * Invalidate page.
4785 *
4786 * @param pv Address of the page to invalidate.
4787 */
4788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4789DECLASM(void) ASMInvalidatePage(void *pv);
4790#else
4791DECLINLINE(void) ASMInvalidatePage(void *pv)
4792{
4793# if RT_INLINE_ASM_USES_INTRIN
4794 __invlpg(pv);
4795
4796# elif RT_INLINE_ASM_GNU_STYLE
4797 __asm__ __volatile__("invlpg %0\n\t"
4798 : : "m" (*(uint8_t *)pv));
4799# else
4800 __asm
4801 {
4802# ifdef RT_ARCH_AMD64
4803 mov rax, [pv]
4804 invlpg [rax]
4805# else
4806 mov eax, [pv]
4807 invlpg [eax]
4808# endif
4809 }
4810# endif
4811}
4812#endif
4813
4814
4815/**
4816 * Write back the internal caches and invalidate them.
4817 */
4818#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4819DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4820#else
4821DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4822{
4823# if RT_INLINE_ASM_USES_INTRIN
4824 __wbinvd();
4825
4826# elif RT_INLINE_ASM_GNU_STYLE
4827 __asm__ __volatile__("wbinvd");
4828# else
4829 __asm
4830 {
4831 wbinvd
4832 }
4833# endif
4834}
4835#endif
4836
4837
4838/**
4839 * Invalidate internal and (perhaps) external caches without first
4840 * flushing dirty cache lines. Use with extreme care.
4841 */
4842#if RT_INLINE_ASM_EXTERNAL
4843DECLASM(void) ASMInvalidateInternalCaches(void);
4844#else
4845DECLINLINE(void) ASMInvalidateInternalCaches(void)
4846{
4847# if RT_INLINE_ASM_GNU_STYLE
4848 __asm__ __volatile__("invd");
4849# else
4850 __asm
4851 {
4852 invd
4853 }
4854# endif
4855}
4856#endif
4857
4858
4859#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4860# if PAGE_SIZE != 0x1000
4861# error "PAGE_SIZE is not 0x1000!"
4862# endif
4863#endif
4864
4865/**
4866 * Zeros a 4K memory page.
4867 *
4868 * @param pv Pointer to the memory block. This must be page aligned.
4869 */
4870#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4871DECLASM(void) ASMMemZeroPage(volatile void *pv);
4872# else
4873DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4874{
4875# if RT_INLINE_ASM_USES_INTRIN
4876# ifdef RT_ARCH_AMD64
4877 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4878# else
4879 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4880# endif
4881
4882# elif RT_INLINE_ASM_GNU_STYLE
4883 RTCCUINTREG uDummy;
4884# ifdef RT_ARCH_AMD64
4885 __asm__ __volatile__("rep stosq"
4886 : "=D" (pv),
4887 "=c" (uDummy)
4888 : "0" (pv),
4889 "c" (0x1000 >> 3),
4890 "a" (0)
4891 : "memory");
4892# else
4893 __asm__ __volatile__("rep stosl"
4894 : "=D" (pv),
4895 "=c" (uDummy)
4896 : "0" (pv),
4897 "c" (0x1000 >> 2),
4898 "a" (0)
4899 : "memory");
4900# endif
4901# else
4902 __asm
4903 {
4904# ifdef RT_ARCH_AMD64
4905 xor rax, rax
4906 mov ecx, 0200h
4907 mov rdi, [pv]
4908 rep stosq
4909# else
4910 xor eax, eax
4911 mov ecx, 0400h
4912 mov edi, [pv]
4913 rep stosd
4914# endif
4915 }
4916# endif
4917}
4918# endif
4919
4920
4921/**
4922 * Zeros a memory block with a 32-bit aligned size.
4923 *
4924 * @param pv Pointer to the memory block.
4925 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4926 */
4927#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4928DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4929#else
4930DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4931{
4932# if RT_INLINE_ASM_USES_INTRIN
4933# ifdef RT_ARCH_AMD64
4934 if (!(cb & 7))
4935 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4936 else
4937# endif
4938 __stosd((unsigned long *)pv, 0, cb / 4);
4939
4940# elif RT_INLINE_ASM_GNU_STYLE
4941 __asm__ __volatile__("rep stosl"
4942 : "=D" (pv),
4943 "=c" (cb)
4944 : "0" (pv),
4945 "1" (cb >> 2),
4946 "a" (0)
4947 : "memory");
4948# else
4949 __asm
4950 {
4951 xor eax, eax
4952# ifdef RT_ARCH_AMD64
4953 mov rcx, [cb]
4954 shr rcx, 2
4955 mov rdi, [pv]
4956# else
4957 mov ecx, [cb]
4958 shr ecx, 2
4959 mov edi, [pv]
4960# endif
4961 rep stosd
4962 }
4963# endif
4964}
4965#endif
4966
4967
4968/**
4969 * Fills a memory block with a 32-bit aligned size.
4970 *
4971 * @param pv Pointer to the memory block.
4972 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4973 * @param u32 The value to fill with.
4974 */
4975#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4976DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4977#else
4978DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4979{
4980# if RT_INLINE_ASM_USES_INTRIN
4981# ifdef RT_ARCH_AMD64
4982 if (!(cb & 7))
4983 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4984 else
4985# endif
4986 __stosd((unsigned long *)pv, u32, cb / 4);
4987
4988# elif RT_INLINE_ASM_GNU_STYLE
4989 __asm__ __volatile__("rep stosl"
4990 : "=D" (pv),
4991 "=c" (cb)
4992 : "0" (pv),
4993 "1" (cb >> 2),
4994 "a" (u32)
4995 : "memory");
4996# else
4997 __asm
4998 {
4999# ifdef RT_ARCH_AMD64
5000 mov rcx, [cb]
5001 shr rcx, 2
5002 mov rdi, [pv]
5003# else
5004 mov ecx, [cb]
5005 shr ecx, 2
5006 mov edi, [pv]
5007# endif
5008 mov eax, [u32]
5009 rep stosd
5010 }
5011# endif
5012}
5013#endif
5014
5015
5016/**
5017 * Checks if a memory block is filled with the specified byte.
5018 *
5019 * This is a sort of inverted memchr.
5020 *
5021 * @returns Pointer to the byte which doesn't equal u8.
5022 * @returns NULL if all equal to u8.
5023 *
5024 * @param pv Pointer to the memory block.
5025 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5026 * @param u8 The value it's supposed to be filled with.
5027 */
5028#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5029DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
5030#else
5031DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
5032{
5033/** @todo rewrite this in inline assembly? */
5034 uint8_t const *pb = (uint8_t const *)pv;
5035 for (; cb; cb--, pb++)
5036 if (RT_UNLIKELY(*pb != u8))
5037 return (void *)pb;
5038 return NULL;
5039}
5040#endif
5041
5042
5043/**
5044 * Checks if a memory block is filled with the specified 32-bit value.
5045 *
5046 * This is a sort of inverted memchr.
5047 *
5048 * @returns Pointer to the first value which doesn't equal u32.
5049 * @returns NULL if all equal to u32.
5050 *
5051 * @param pv Pointer to the memory block.
5052 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5053 * @param u32 The value it's supposed to be filled with.
5054 */
5055#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5056DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
5057#else
5058DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
5059{
5060/** @todo rewrite this in inline assembly? */
5061 uint32_t const *pu32 = (uint32_t const *)pv;
5062 for (; cb; cb -= 4, pu32++)
5063 if (RT_UNLIKELY(*pu32 != u32))
5064 return (uint32_t *)pu32;
5065 return NULL;
5066}
5067#endif
5068
5069
5070/**
5071 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
5072 *
5073 * @returns u32F1 * u32F2.
5074 */
5075#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5076DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
5077#else
5078DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
5079{
5080# ifdef RT_ARCH_AMD64
5081 return (uint64_t)u32F1 * u32F2;
5082# else /* !RT_ARCH_AMD64 */
5083 uint64_t u64;
5084# if RT_INLINE_ASM_GNU_STYLE
5085 __asm__ __volatile__("mull %%edx"
5086 : "=A" (u64)
5087 : "a" (u32F2), "d" (u32F1));
5088# else
5089 __asm
5090 {
5091 mov edx, [u32F1]
5092 mov eax, [u32F2]
5093 mul edx
5094 mov dword ptr [u64], eax
5095 mov dword ptr [u64 + 4], edx
5096 }
5097# endif
5098 return u64;
5099# endif /* !RT_ARCH_AMD64 */
5100}
5101#endif
5102
5103
5104/**
5105 * Multiplies two signed 32-bit values returning a signed 64-bit result.
5106 *
5107 * @returns u32F1 * u32F2.
5108 */
5109#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5110DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
5111#else
5112DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
5113{
5114# ifdef RT_ARCH_AMD64
5115 return (int64_t)i32F1 * i32F2;
5116# else /* !RT_ARCH_AMD64 */
5117 int64_t i64;
5118# if RT_INLINE_ASM_GNU_STYLE
5119 __asm__ __volatile__("imull %%edx"
5120 : "=A" (i64)
5121 : "a" (i32F2), "d" (i32F1));
5122# else
5123 __asm
5124 {
5125 mov edx, [i32F1]
5126 mov eax, [i32F2]
5127 imul edx
5128 mov dword ptr [i64], eax
5129 mov dword ptr [i64 + 4], edx
5130 }
5131# endif
5132 return i64;
5133# endif /* !RT_ARCH_AMD64 */
5134}
5135#endif
5136
5137
5138/**
5139 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5140 *
5141 * @returns u64 / u32.
5142 */
5143#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5144DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5145#else
5146DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5147{
5148# ifdef RT_ARCH_AMD64
5149 return (uint32_t)(u64 / u32);
5150# else /* !RT_ARCH_AMD64 */
5151# if RT_INLINE_ASM_GNU_STYLE
5152 RTCCUINTREG uDummy;
5153 __asm__ __volatile__("divl %3"
5154 : "=a" (u32), "=d"(uDummy)
5155 : "A" (u64), "r" (u32));
5156# else
5157 __asm
5158 {
5159 mov eax, dword ptr [u64]
5160 mov edx, dword ptr [u64 + 4]
5161 mov ecx, [u32]
5162 div ecx
5163 mov [u32], eax
5164 }
5165# endif
5166 return u32;
5167# endif /* !RT_ARCH_AMD64 */
5168}
5169#endif
5170
5171
5172/**
5173 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5174 *
5175 * @returns u64 / u32.
5176 */
5177#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5178DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5179#else
5180DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5181{
5182# ifdef RT_ARCH_AMD64
5183 return (int32_t)(i64 / i32);
5184# else /* !RT_ARCH_AMD64 */
5185# if RT_INLINE_ASM_GNU_STYLE
5186 RTCCUINTREG iDummy;
5187 __asm__ __volatile__("idivl %3"
5188 : "=a" (i32), "=d"(iDummy)
5189 : "A" (i64), "r" (i32));
5190# else
5191 __asm
5192 {
5193 mov eax, dword ptr [i64]
5194 mov edx, dword ptr [i64 + 4]
5195 mov ecx, [i32]
5196 idiv ecx
5197 mov [i32], eax
5198 }
5199# endif
5200 return i32;
5201# endif /* !RT_ARCH_AMD64 */
5202}
5203#endif
5204
5205
5206/**
5207 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5208 * returning the rest.
5209 *
5210 * @returns u64 % u32.
5211 *
5212 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5213 */
5214#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5215DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5216#else
5217DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5218{
5219# ifdef RT_ARCH_AMD64
5220 return (uint32_t)(u64 % u32);
5221# else /* !RT_ARCH_AMD64 */
5222# if RT_INLINE_ASM_GNU_STYLE
5223 RTCCUINTREG uDummy;
5224 __asm__ __volatile__("divl %3"
5225 : "=a" (uDummy), "=d"(u32)
5226 : "A" (u64), "r" (u32));
5227# else
5228 __asm
5229 {
5230 mov eax, dword ptr [u64]
5231 mov edx, dword ptr [u64 + 4]
5232 mov ecx, [u32]
5233 div ecx
5234 mov [u32], edx
5235 }
5236# endif
5237 return u32;
5238# endif /* !RT_ARCH_AMD64 */
5239}
5240#endif
5241
5242
5243/**
5244 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5245 * returning the rest.
5246 *
5247 * @returns u64 % u32.
5248 *
5249 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5250 */
5251#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5252DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5253#else
5254DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5255{
5256# ifdef RT_ARCH_AMD64
5257 return (int32_t)(i64 % i32);
5258# else /* !RT_ARCH_AMD64 */
5259# if RT_INLINE_ASM_GNU_STYLE
5260 RTCCUINTREG iDummy;
5261 __asm__ __volatile__("idivl %3"
5262 : "=a" (iDummy), "=d"(i32)
5263 : "A" (i64), "r" (i32));
5264# else
5265 __asm
5266 {
5267 mov eax, dword ptr [i64]
5268 mov edx, dword ptr [i64 + 4]
5269 mov ecx, [i32]
5270 idiv ecx
5271 mov [i32], edx
5272 }
5273# endif
5274 return i32;
5275# endif /* !RT_ARCH_AMD64 */
5276}
5277#endif
5278
5279
5280/**
5281 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5282 * using a 96 bit intermediate result.
5283 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5284 * __udivdi3 and __umoddi3 even if this inline function is not used.
5285 *
5286 * @returns (u64A * u32B) / u32C.
5287 * @param u64A The 64-bit value.
5288 * @param u32B The 32-bit value to multiple by A.
5289 * @param u32C The 32-bit value to divide A*B by.
5290 */
5291#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5292DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5293#else
5294DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5295{
5296# if RT_INLINE_ASM_GNU_STYLE
5297# ifdef RT_ARCH_AMD64
5298 uint64_t u64Result, u64Spill;
5299 __asm__ __volatile__("mulq %2\n\t"
5300 "divq %3\n\t"
5301 : "=a" (u64Result),
5302 "=d" (u64Spill)
5303 : "r" ((uint64_t)u32B),
5304 "r" ((uint64_t)u32C),
5305 "0" (u64A),
5306 "1" (0));
5307 return u64Result;
5308# else
5309 uint32_t u32Dummy;
5310 uint64_t u64Result;
5311 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5312 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5313 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5314 eax = u64A.hi */
5315 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5316 edx = u32C */
5317 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5318 edx = u32B */
5319 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5320 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5321 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5322 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5323 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5324 edx = u64Hi % u32C */
5325 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5326 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5327 "divl %%ecx \n\t" /* u64Result.lo */
5328 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5329 : "=A"(u64Result), "=c"(u32Dummy),
5330 "=S"(u32Dummy), "=D"(u32Dummy)
5331 : "a"((uint32_t)u64A),
5332 "S"((uint32_t)(u64A >> 32)),
5333 "c"(u32B),
5334 "D"(u32C));
5335 return u64Result;
5336# endif
5337# else
5338 RTUINT64U u;
5339 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5340 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5341 u64Hi += (u64Lo >> 32);
5342 u.s.Hi = (uint32_t)(u64Hi / u32C);
5343 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5344 return u.u;
5345# endif
5346}
5347#endif
5348
5349
5350/**
5351 * Probes a byte pointer for read access.
5352 *
5353 * While the function will not fault if the byte is not read accessible,
5354 * the idea is to do this in a safe place like before acquiring locks
5355 * and such like.
5356 *
5357 * Also, this functions guarantees that an eager compiler is not going
5358 * to optimize the probing away.
5359 *
5360 * @param pvByte Pointer to the byte.
5361 */
5362#if RT_INLINE_ASM_EXTERNAL
5363DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5364#else
5365DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5366{
5367 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5368 uint8_t u8;
5369# if RT_INLINE_ASM_GNU_STYLE
5370 __asm__ __volatile__("movb (%1), %0\n\t"
5371 : "=r" (u8)
5372 : "r" (pvByte));
5373# else
5374 __asm
5375 {
5376# ifdef RT_ARCH_AMD64
5377 mov rax, [pvByte]
5378 mov al, [rax]
5379# else
5380 mov eax, [pvByte]
5381 mov al, [eax]
5382# endif
5383 mov [u8], al
5384 }
5385# endif
5386 return u8;
5387}
5388#endif
5389
5390/**
5391 * Probes a buffer for read access page by page.
5392 *
5393 * While the function will fault if the buffer is not fully read
5394 * accessible, the idea is to do this in a safe place like before
5395 * acquiring locks and such like.
5396 *
5397 * Also, this functions guarantees that an eager compiler is not going
5398 * to optimize the probing away.
5399 *
5400 * @param pvBuf Pointer to the buffer.
5401 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5402 */
5403DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5404{
5405 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5406 /* the first byte */
5407 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5408 ASMProbeReadByte(pu8);
5409
5410 /* the pages in between pages. */
5411 while (cbBuf > /*PAGE_SIZE*/0x1000)
5412 {
5413 ASMProbeReadByte(pu8);
5414 cbBuf -= /*PAGE_SIZE*/0x1000;
5415 pu8 += /*PAGE_SIZE*/0x1000;
5416 }
5417
5418 /* the last byte */
5419 ASMProbeReadByte(pu8 + cbBuf - 1);
5420}
5421
5422
5423/** @def ASMBreakpoint
5424 * Debugger Breakpoint.
5425 * @remark In the gnu world we add a nop instruction after the int3 to
5426 * force gdb to remain at the int3 source line.
5427 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5428 * @internal
5429 */
5430#if RT_INLINE_ASM_GNU_STYLE
5431# ifndef __L4ENV__
5432# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5433# else
5434# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5435# endif
5436#else
5437# define ASMBreakpoint() __debugbreak()
5438#endif
5439
5440
5441
5442/** @defgroup grp_inline_bits Bit Operations
5443 * @{
5444 */
5445
5446
5447/**
5448 * Sets a bit in a bitmap.
5449 *
5450 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
5451 * @param iBit The bit to set.
5452 *
5453 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5454 * However, doing so will yield better performance as well as avoiding
5455 * traps accessing the last bits in the bitmap.
5456 */
5457#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5458DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5459#else
5460DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5461{
5462# if RT_INLINE_ASM_USES_INTRIN
5463 _bittestandset((long *)pvBitmap, iBit);
5464
5465# elif RT_INLINE_ASM_GNU_STYLE
5466 __asm__ __volatile__("btsl %1, %0"
5467 : "=m" (*(volatile long *)pvBitmap)
5468 : "Ir" (iBit),
5469 "m" (*(volatile long *)pvBitmap)
5470 : "memory");
5471# else
5472 __asm
5473 {
5474# ifdef RT_ARCH_AMD64
5475 mov rax, [pvBitmap]
5476 mov edx, [iBit]
5477 bts [rax], edx
5478# else
5479 mov eax, [pvBitmap]
5480 mov edx, [iBit]
5481 bts [eax], edx
5482# endif
5483 }
5484# endif
5485}
5486#endif
5487
5488
5489/**
5490 * Atomically sets a bit in a bitmap, ordered.
5491 *
5492 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5493 * the memory access isn't atomic!
5494 * @param iBit The bit to set.
5495 */
5496#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5497DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5498#else
5499DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5500{
5501 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5502# if RT_INLINE_ASM_USES_INTRIN
5503 _interlockedbittestandset((long *)pvBitmap, iBit);
5504# elif RT_INLINE_ASM_GNU_STYLE
5505 __asm__ __volatile__("lock; btsl %1, %0"
5506 : "=m" (*(volatile long *)pvBitmap)
5507 : "Ir" (iBit),
5508 "m" (*(volatile long *)pvBitmap)
5509 : "memory");
5510# else
5511 __asm
5512 {
5513# ifdef RT_ARCH_AMD64
5514 mov rax, [pvBitmap]
5515 mov edx, [iBit]
5516 lock bts [rax], edx
5517# else
5518 mov eax, [pvBitmap]
5519 mov edx, [iBit]
5520 lock bts [eax], edx
5521# endif
5522 }
5523# endif
5524}
5525#endif
5526
5527
5528/**
5529 * Clears a bit in a bitmap.
5530 *
5531 * @param pvBitmap Pointer to the bitmap.
5532 * @param iBit The bit to clear.
5533 *
5534 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5535 * However, doing so will yield better performance as well as avoiding
5536 * traps accessing the last bits in the bitmap.
5537 */
5538#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5539DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5540#else
5541DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5542{
5543# if RT_INLINE_ASM_USES_INTRIN
5544 _bittestandreset((long *)pvBitmap, iBit);
5545
5546# elif RT_INLINE_ASM_GNU_STYLE
5547 __asm__ __volatile__("btrl %1, %0"
5548 : "=m" (*(volatile long *)pvBitmap)
5549 : "Ir" (iBit),
5550 "m" (*(volatile long *)pvBitmap)
5551 : "memory");
5552# else
5553 __asm
5554 {
5555# ifdef RT_ARCH_AMD64
5556 mov rax, [pvBitmap]
5557 mov edx, [iBit]
5558 btr [rax], edx
5559# else
5560 mov eax, [pvBitmap]
5561 mov edx, [iBit]
5562 btr [eax], edx
5563# endif
5564 }
5565# endif
5566}
5567#endif
5568
5569
5570/**
5571 * Atomically clears a bit in a bitmap, ordered.
5572 *
5573 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5574 * the memory access isn't atomic!
5575 * @param iBit The bit to toggle set.
5576 * @remarks No memory barrier, take care on smp.
5577 */
5578#if RT_INLINE_ASM_EXTERNAL
5579DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5580#else
5581DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5582{
5583 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5584# if RT_INLINE_ASM_GNU_STYLE
5585 __asm__ __volatile__("lock; btrl %1, %0"
5586 : "=m" (*(volatile long *)pvBitmap)
5587 : "Ir" (iBit),
5588 "m" (*(volatile long *)pvBitmap)
5589 : "memory");
5590# else
5591 __asm
5592 {
5593# ifdef RT_ARCH_AMD64
5594 mov rax, [pvBitmap]
5595 mov edx, [iBit]
5596 lock btr [rax], edx
5597# else
5598 mov eax, [pvBitmap]
5599 mov edx, [iBit]
5600 lock btr [eax], edx
5601# endif
5602 }
5603# endif
5604}
5605#endif
5606
5607
5608/**
5609 * Toggles a bit in a bitmap.
5610 *
5611 * @param pvBitmap Pointer to the bitmap.
5612 * @param iBit The bit to toggle.
5613 *
5614 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5615 * However, doing so will yield better performance as well as avoiding
5616 * traps accessing the last bits in the bitmap.
5617 */
5618#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5619DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5620#else
5621DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5622{
5623# if RT_INLINE_ASM_USES_INTRIN
5624 _bittestandcomplement((long *)pvBitmap, iBit);
5625# elif RT_INLINE_ASM_GNU_STYLE
5626 __asm__ __volatile__("btcl %1, %0"
5627 : "=m" (*(volatile long *)pvBitmap)
5628 : "Ir" (iBit),
5629 "m" (*(volatile long *)pvBitmap)
5630 : "memory");
5631# else
5632 __asm
5633 {
5634# ifdef RT_ARCH_AMD64
5635 mov rax, [pvBitmap]
5636 mov edx, [iBit]
5637 btc [rax], edx
5638# else
5639 mov eax, [pvBitmap]
5640 mov edx, [iBit]
5641 btc [eax], edx
5642# endif
5643 }
5644# endif
5645}
5646#endif
5647
5648
5649/**
5650 * Atomically toggles a bit in a bitmap, ordered.
5651 *
5652 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5653 * the memory access isn't atomic!
5654 * @param iBit The bit to test and set.
5655 */
5656#if RT_INLINE_ASM_EXTERNAL
5657DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5658#else
5659DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5660{
5661 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5662# if RT_INLINE_ASM_GNU_STYLE
5663 __asm__ __volatile__("lock; btcl %1, %0"
5664 : "=m" (*(volatile long *)pvBitmap)
5665 : "Ir" (iBit),
5666 "m" (*(volatile long *)pvBitmap)
5667 : "memory");
5668# else
5669 __asm
5670 {
5671# ifdef RT_ARCH_AMD64
5672 mov rax, [pvBitmap]
5673 mov edx, [iBit]
5674 lock btc [rax], edx
5675# else
5676 mov eax, [pvBitmap]
5677 mov edx, [iBit]
5678 lock btc [eax], edx
5679# endif
5680 }
5681# endif
5682}
5683#endif
5684
5685
5686/**
5687 * Tests and sets a bit in a bitmap.
5688 *
5689 * @returns true if the bit was set.
5690 * @returns false if the bit was clear.
5691 *
5692 * @param pvBitmap Pointer to the bitmap.
5693 * @param iBit The bit to test and set.
5694 *
5695 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5696 * However, doing so will yield better performance as well as avoiding
5697 * traps accessing the last bits in the bitmap.
5698 */
5699#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5700DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5701#else
5702DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5703{
5704 union { bool f; uint32_t u32; uint8_t u8; } rc;
5705# if RT_INLINE_ASM_USES_INTRIN
5706 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5707
5708# elif RT_INLINE_ASM_GNU_STYLE
5709 __asm__ __volatile__("btsl %2, %1\n\t"
5710 "setc %b0\n\t"
5711 "andl $1, %0\n\t"
5712 : "=q" (rc.u32),
5713 "=m" (*(volatile long *)pvBitmap)
5714 : "Ir" (iBit),
5715 "m" (*(volatile long *)pvBitmap)
5716 : "memory");
5717# else
5718 __asm
5719 {
5720 mov edx, [iBit]
5721# ifdef RT_ARCH_AMD64
5722 mov rax, [pvBitmap]
5723 bts [rax], edx
5724# else
5725 mov eax, [pvBitmap]
5726 bts [eax], edx
5727# endif
5728 setc al
5729 and eax, 1
5730 mov [rc.u32], eax
5731 }
5732# endif
5733 return rc.f;
5734}
5735#endif
5736
5737
5738/**
5739 * Atomically tests and sets a bit in a bitmap, ordered.
5740 *
5741 * @returns true if the bit was set.
5742 * @returns false if the bit was clear.
5743 *
5744 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5745 * the memory access isn't atomic!
5746 * @param iBit The bit to set.
5747 */
5748#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5749DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5750#else
5751DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5752{
5753 union { bool f; uint32_t u32; uint8_t u8; } rc;
5754 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5755# if RT_INLINE_ASM_USES_INTRIN
5756 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5757# elif RT_INLINE_ASM_GNU_STYLE
5758 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5759 "setc %b0\n\t"
5760 "andl $1, %0\n\t"
5761 : "=q" (rc.u32),
5762 "=m" (*(volatile long *)pvBitmap)
5763 : "Ir" (iBit),
5764 "m" (*(volatile long *)pvBitmap)
5765 : "memory");
5766# else
5767 __asm
5768 {
5769 mov edx, [iBit]
5770# ifdef RT_ARCH_AMD64
5771 mov rax, [pvBitmap]
5772 lock bts [rax], edx
5773# else
5774 mov eax, [pvBitmap]
5775 lock bts [eax], edx
5776# endif
5777 setc al
5778 and eax, 1
5779 mov [rc.u32], eax
5780 }
5781# endif
5782 return rc.f;
5783}
5784#endif
5785
5786
5787/**
5788 * Tests and clears a bit in a bitmap.
5789 *
5790 * @returns true if the bit was set.
5791 * @returns false if the bit was clear.
5792 *
5793 * @param pvBitmap Pointer to the bitmap.
5794 * @param iBit The bit to test and clear.
5795 *
5796 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5797 * However, doing so will yield better performance as well as avoiding
5798 * traps accessing the last bits in the bitmap.
5799 */
5800#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5801DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5802#else
5803DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5804{
5805 union { bool f; uint32_t u32; uint8_t u8; } rc;
5806# if RT_INLINE_ASM_USES_INTRIN
5807 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5808
5809# elif RT_INLINE_ASM_GNU_STYLE
5810 __asm__ __volatile__("btrl %2, %1\n\t"
5811 "setc %b0\n\t"
5812 "andl $1, %0\n\t"
5813 : "=q" (rc.u32),
5814 "=m" (*(volatile long *)pvBitmap)
5815 : "Ir" (iBit),
5816 "m" (*(volatile long *)pvBitmap)
5817 : "memory");
5818# else
5819 __asm
5820 {
5821 mov edx, [iBit]
5822# ifdef RT_ARCH_AMD64
5823 mov rax, [pvBitmap]
5824 btr [rax], edx
5825# else
5826 mov eax, [pvBitmap]
5827 btr [eax], edx
5828# endif
5829 setc al
5830 and eax, 1
5831 mov [rc.u32], eax
5832 }
5833# endif
5834 return rc.f;
5835}
5836#endif
5837
5838
5839/**
5840 * Atomically tests and clears a bit in a bitmap, ordered.
5841 *
5842 * @returns true if the bit was set.
5843 * @returns false if the bit was clear.
5844 *
5845 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5846 * the memory access isn't atomic!
5847 * @param iBit The bit to test and clear.
5848 *
5849 * @remarks No memory barrier, take care on smp.
5850 */
5851#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5852DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5853#else
5854DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5855{
5856 union { bool f; uint32_t u32; uint8_t u8; } rc;
5857 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5858# if RT_INLINE_ASM_USES_INTRIN
5859 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5860
5861# elif RT_INLINE_ASM_GNU_STYLE
5862 __asm__ __volatile__("lock; btrl %2, %1\n\t"
5863 "setc %b0\n\t"
5864 "andl $1, %0\n\t"
5865 : "=q" (rc.u32),
5866 "=m" (*(volatile long *)pvBitmap)
5867 : "Ir" (iBit),
5868 "m" (*(volatile long *)pvBitmap)
5869 : "memory");
5870# else
5871 __asm
5872 {
5873 mov edx, [iBit]
5874# ifdef RT_ARCH_AMD64
5875 mov rax, [pvBitmap]
5876 lock btr [rax], edx
5877# else
5878 mov eax, [pvBitmap]
5879 lock btr [eax], edx
5880# endif
5881 setc al
5882 and eax, 1
5883 mov [rc.u32], eax
5884 }
5885# endif
5886 return rc.f;
5887}
5888#endif
5889
5890
5891/**
5892 * Tests and toggles a bit in a bitmap.
5893 *
5894 * @returns true if the bit was set.
5895 * @returns false if the bit was clear.
5896 *
5897 * @param pvBitmap Pointer to the bitmap.
5898 * @param iBit The bit to test and toggle.
5899 *
5900 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5901 * However, doing so will yield better performance as well as avoiding
5902 * traps accessing the last bits in the bitmap.
5903 */
5904#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5905DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5906#else
5907DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5908{
5909 union { bool f; uint32_t u32; uint8_t u8; } rc;
5910# if RT_INLINE_ASM_USES_INTRIN
5911 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5912
5913# elif RT_INLINE_ASM_GNU_STYLE
5914 __asm__ __volatile__("btcl %2, %1\n\t"
5915 "setc %b0\n\t"
5916 "andl $1, %0\n\t"
5917 : "=q" (rc.u32),
5918 "=m" (*(volatile long *)pvBitmap)
5919 : "Ir" (iBit),
5920 "m" (*(volatile long *)pvBitmap)
5921 : "memory");
5922# else
5923 __asm
5924 {
5925 mov edx, [iBit]
5926# ifdef RT_ARCH_AMD64
5927 mov rax, [pvBitmap]
5928 btc [rax], edx
5929# else
5930 mov eax, [pvBitmap]
5931 btc [eax], edx
5932# endif
5933 setc al
5934 and eax, 1
5935 mov [rc.u32], eax
5936 }
5937# endif
5938 return rc.f;
5939}
5940#endif
5941
5942
5943/**
5944 * Atomically tests and toggles a bit in a bitmap, ordered.
5945 *
5946 * @returns true if the bit was set.
5947 * @returns false if the bit was clear.
5948 *
5949 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
5950 * the memory access isn't atomic!
5951 * @param iBit The bit to test and toggle.
5952 */
5953#if RT_INLINE_ASM_EXTERNAL
5954DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5955#else
5956DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5957{
5958 union { bool f; uint32_t u32; uint8_t u8; } rc;
5959 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5960# if RT_INLINE_ASM_GNU_STYLE
5961 __asm__ __volatile__("lock; btcl %2, %1\n\t"
5962 "setc %b0\n\t"
5963 "andl $1, %0\n\t"
5964 : "=q" (rc.u32),
5965 "=m" (*(volatile long *)pvBitmap)
5966 : "Ir" (iBit),
5967 "m" (*(volatile long *)pvBitmap)
5968 : "memory");
5969# else
5970 __asm
5971 {
5972 mov edx, [iBit]
5973# ifdef RT_ARCH_AMD64
5974 mov rax, [pvBitmap]
5975 lock btc [rax], edx
5976# else
5977 mov eax, [pvBitmap]
5978 lock btc [eax], edx
5979# endif
5980 setc al
5981 and eax, 1
5982 mov [rc.u32], eax
5983 }
5984# endif
5985 return rc.f;
5986}
5987#endif
5988
5989
5990/**
5991 * Tests if a bit in a bitmap is set.
5992 *
5993 * @returns true if the bit is set.
5994 * @returns false if the bit is clear.
5995 *
5996 * @param pvBitmap Pointer to the bitmap.
5997 * @param iBit The bit to test.
5998 *
5999 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6000 * However, doing so will yield better performance as well as avoiding
6001 * traps accessing the last bits in the bitmap.
6002 */
6003#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6004DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
6005#else
6006DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
6007{
6008 union { bool f; uint32_t u32; uint8_t u8; } rc;
6009# if RT_INLINE_ASM_USES_INTRIN
6010 rc.u32 = _bittest((long *)pvBitmap, iBit);
6011# elif RT_INLINE_ASM_GNU_STYLE
6012
6013 __asm__ __volatile__("btl %2, %1\n\t"
6014 "setc %b0\n\t"
6015 "andl $1, %0\n\t"
6016 : "=q" (rc.u32)
6017 : "m" (*(const volatile long *)pvBitmap),
6018 "Ir" (iBit)
6019 : "memory");
6020# else
6021 __asm
6022 {
6023 mov edx, [iBit]
6024# ifdef RT_ARCH_AMD64
6025 mov rax, [pvBitmap]
6026 bt [rax], edx
6027# else
6028 mov eax, [pvBitmap]
6029 bt [eax], edx
6030# endif
6031 setc al
6032 and eax, 1
6033 mov [rc.u32], eax
6034 }
6035# endif
6036 return rc.f;
6037}
6038#endif
6039
6040
6041/**
6042 * Clears a bit range within a bitmap.
6043 *
6044 * @param pvBitmap Pointer to the bitmap.
6045 * @param iBitStart The First bit to clear.
6046 * @param iBitEnd The first bit not to clear.
6047 */
6048DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6049{
6050 if (iBitStart < iBitEnd)
6051 {
6052 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6053 int iStart = iBitStart & ~31;
6054 int iEnd = iBitEnd & ~31;
6055 if (iStart == iEnd)
6056 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
6057 else
6058 {
6059 /* bits in first dword. */
6060 if (iBitStart & 31)
6061 {
6062 *pu32 &= (1 << (iBitStart & 31)) - 1;
6063 pu32++;
6064 iBitStart = iStart + 32;
6065 }
6066
6067 /* whole dword. */
6068 if (iBitStart != iEnd)
6069 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
6070
6071 /* bits in last dword. */
6072 if (iBitEnd & 31)
6073 {
6074 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6075 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
6076 }
6077 }
6078 }
6079}
6080
6081
6082/**
6083 * Sets a bit range within a bitmap.
6084 *
6085 * @param pvBitmap Pointer to the bitmap.
6086 * @param iBitStart The First bit to set.
6087 * @param iBitEnd The first bit not to set.
6088 */
6089DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
6090{
6091 if (iBitStart < iBitEnd)
6092 {
6093 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6094 int iStart = iBitStart & ~31;
6095 int iEnd = iBitEnd & ~31;
6096 if (iStart == iEnd)
6097 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
6098 else
6099 {
6100 /* bits in first dword. */
6101 if (iBitStart & 31)
6102 {
6103 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
6104 pu32++;
6105 iBitStart = iStart + 32;
6106 }
6107
6108 /* whole dword. */
6109 if (iBitStart != iEnd)
6110 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
6111
6112 /* bits in last dword. */
6113 if (iBitEnd & 31)
6114 {
6115 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6116 *pu32 |= (1 << (iBitEnd & 31)) - 1;
6117 }
6118 }
6119 }
6120}
6121
6122
6123/**
6124 * Finds the first clear bit in a bitmap.
6125 *
6126 * @returns Index of the first zero bit.
6127 * @returns -1 if no clear bit was found.
6128 * @param pvBitmap Pointer to the bitmap.
6129 * @param cBits The number of bits in the bitmap. Multiple of 32.
6130 */
6131#if RT_INLINE_ASM_EXTERNAL
6132DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
6133#else
6134DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
6135{
6136 if (cBits)
6137 {
6138 int32_t iBit;
6139# if RT_INLINE_ASM_GNU_STYLE
6140 RTCCUINTREG uEAX, uECX, uEDI;
6141 cBits = RT_ALIGN_32(cBits, 32);
6142 __asm__ __volatile__("repe; scasl\n\t"
6143 "je 1f\n\t"
6144# ifdef RT_ARCH_AMD64
6145 "lea -4(%%rdi), %%rdi\n\t"
6146 "xorl (%%rdi), %%eax\n\t"
6147 "subq %5, %%rdi\n\t"
6148# else
6149 "lea -4(%%edi), %%edi\n\t"
6150 "xorl (%%edi), %%eax\n\t"
6151 "subl %5, %%edi\n\t"
6152# endif
6153 "shll $3, %%edi\n\t"
6154 "bsfl %%eax, %%edx\n\t"
6155 "addl %%edi, %%edx\n\t"
6156 "1:\t\n"
6157 : "=d" (iBit),
6158 "=&c" (uECX),
6159 "=&D" (uEDI),
6160 "=&a" (uEAX)
6161 : "0" (0xffffffff),
6162 "mr" (pvBitmap),
6163 "1" (cBits >> 5),
6164 "2" (pvBitmap),
6165 "3" (0xffffffff));
6166# else
6167 cBits = RT_ALIGN_32(cBits, 32);
6168 __asm
6169 {
6170# ifdef RT_ARCH_AMD64
6171 mov rdi, [pvBitmap]
6172 mov rbx, rdi
6173# else
6174 mov edi, [pvBitmap]
6175 mov ebx, edi
6176# endif
6177 mov edx, 0ffffffffh
6178 mov eax, edx
6179 mov ecx, [cBits]
6180 shr ecx, 5
6181 repe scasd
6182 je done
6183
6184# ifdef RT_ARCH_AMD64
6185 lea rdi, [rdi - 4]
6186 xor eax, [rdi]
6187 sub rdi, rbx
6188# else
6189 lea edi, [edi - 4]
6190 xor eax, [edi]
6191 sub edi, ebx
6192# endif
6193 shl edi, 3
6194 bsf edx, eax
6195 add edx, edi
6196 done:
6197 mov [iBit], edx
6198 }
6199# endif
6200 return iBit;
6201 }
6202 return -1;
6203}
6204#endif
6205
6206
6207/**
6208 * Finds the next clear bit in a bitmap.
6209 *
6210 * @returns Index of the first zero bit.
6211 * @returns -1 if no clear bit was found.
6212 * @param pvBitmap Pointer to the bitmap.
6213 * @param cBits The number of bits in the bitmap. Multiple of 32.
6214 * @param iBitPrev The bit returned from the last search.
6215 * The search will start at iBitPrev + 1.
6216 */
6217#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6218DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6219#else
6220DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6221{
6222 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6223 int iBit = ++iBitPrev & 31;
6224 if (iBit)
6225 {
6226 /*
6227 * Inspect the 32-bit word containing the unaligned bit.
6228 */
6229 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6230
6231# if RT_INLINE_ASM_USES_INTRIN
6232 unsigned long ulBit = 0;
6233 if (_BitScanForward(&ulBit, u32))
6234 return ulBit + iBitPrev;
6235# else
6236# if RT_INLINE_ASM_GNU_STYLE
6237 __asm__ __volatile__("bsf %1, %0\n\t"
6238 "jnz 1f\n\t"
6239 "movl $-1, %0\n\t"
6240 "1:\n\t"
6241 : "=r" (iBit)
6242 : "r" (u32));
6243# else
6244 __asm
6245 {
6246 mov edx, [u32]
6247 bsf eax, edx
6248 jnz done
6249 mov eax, 0ffffffffh
6250 done:
6251 mov [iBit], eax
6252 }
6253# endif
6254 if (iBit >= 0)
6255 return iBit + iBitPrev;
6256# endif
6257
6258 /*
6259 * Skip ahead and see if there is anything left to search.
6260 */
6261 iBitPrev |= 31;
6262 iBitPrev++;
6263 if (cBits <= (uint32_t)iBitPrev)
6264 return -1;
6265 }
6266
6267 /*
6268 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6269 */
6270 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6271 if (iBit >= 0)
6272 iBit += iBitPrev;
6273 return iBit;
6274}
6275#endif
6276
6277
6278/**
6279 * Finds the first set bit in a bitmap.
6280 *
6281 * @returns Index of the first set bit.
6282 * @returns -1 if no clear bit was found.
6283 * @param pvBitmap Pointer to the bitmap.
6284 * @param cBits The number of bits in the bitmap. Multiple of 32.
6285 */
6286#if RT_INLINE_ASM_EXTERNAL
6287DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6288#else
6289DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6290{
6291 if (cBits)
6292 {
6293 int32_t iBit;
6294# if RT_INLINE_ASM_GNU_STYLE
6295 RTCCUINTREG uEAX, uECX, uEDI;
6296 cBits = RT_ALIGN_32(cBits, 32);
6297 __asm__ __volatile__("repe; scasl\n\t"
6298 "je 1f\n\t"
6299# ifdef RT_ARCH_AMD64
6300 "lea -4(%%rdi), %%rdi\n\t"
6301 "movl (%%rdi), %%eax\n\t"
6302 "subq %5, %%rdi\n\t"
6303# else
6304 "lea -4(%%edi), %%edi\n\t"
6305 "movl (%%edi), %%eax\n\t"
6306 "subl %5, %%edi\n\t"
6307# endif
6308 "shll $3, %%edi\n\t"
6309 "bsfl %%eax, %%edx\n\t"
6310 "addl %%edi, %%edx\n\t"
6311 "1:\t\n"
6312 : "=d" (iBit),
6313 "=&c" (uECX),
6314 "=&D" (uEDI),
6315 "=&a" (uEAX)
6316 : "0" (0xffffffff),
6317 "mr" (pvBitmap),
6318 "1" (cBits >> 5),
6319 "2" (pvBitmap),
6320 "3" (0));
6321# else
6322 cBits = RT_ALIGN_32(cBits, 32);
6323 __asm
6324 {
6325# ifdef RT_ARCH_AMD64
6326 mov rdi, [pvBitmap]
6327 mov rbx, rdi
6328# else
6329 mov edi, [pvBitmap]
6330 mov ebx, edi
6331# endif
6332 mov edx, 0ffffffffh
6333 xor eax, eax
6334 mov ecx, [cBits]
6335 shr ecx, 5
6336 repe scasd
6337 je done
6338# ifdef RT_ARCH_AMD64
6339 lea rdi, [rdi - 4]
6340 mov eax, [rdi]
6341 sub rdi, rbx
6342# else
6343 lea edi, [edi - 4]
6344 mov eax, [edi]
6345 sub edi, ebx
6346# endif
6347 shl edi, 3
6348 bsf edx, eax
6349 add edx, edi
6350 done:
6351 mov [iBit], edx
6352 }
6353# endif
6354 return iBit;
6355 }
6356 return -1;
6357}
6358#endif
6359
6360
6361/**
6362 * Finds the next set bit in a bitmap.
6363 *
6364 * @returns Index of the next set bit.
6365 * @returns -1 if no set bit was found.
6366 * @param pvBitmap Pointer to the bitmap.
6367 * @param cBits The number of bits in the bitmap. Multiple of 32.
6368 * @param iBitPrev The bit returned from the last search.
6369 * The search will start at iBitPrev + 1.
6370 */
6371#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6372DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6373#else
6374DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6375{
6376 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6377 int iBit = ++iBitPrev & 31;
6378 if (iBit)
6379 {
6380 /*
6381 * Inspect the 32-bit word containing the unaligned bit.
6382 */
6383 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6384
6385# if RT_INLINE_ASM_USES_INTRIN
6386 unsigned long ulBit = 0;
6387 if (_BitScanForward(&ulBit, u32))
6388 return ulBit + iBitPrev;
6389# else
6390# if RT_INLINE_ASM_GNU_STYLE
6391 __asm__ __volatile__("bsf %1, %0\n\t"
6392 "jnz 1f\n\t"
6393 "movl $-1, %0\n\t"
6394 "1:\n\t"
6395 : "=r" (iBit)
6396 : "r" (u32));
6397# else
6398 __asm
6399 {
6400 mov edx, [u32]
6401 bsf eax, edx
6402 jnz done
6403 mov eax, 0ffffffffh
6404 done:
6405 mov [iBit], eax
6406 }
6407# endif
6408 if (iBit >= 0)
6409 return iBit + iBitPrev;
6410# endif
6411
6412 /*
6413 * Skip ahead and see if there is anything left to search.
6414 */
6415 iBitPrev |= 31;
6416 iBitPrev++;
6417 if (cBits <= (uint32_t)iBitPrev)
6418 return -1;
6419 }
6420
6421 /*
6422 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6423 */
6424 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6425 if (iBit >= 0)
6426 iBit += iBitPrev;
6427 return iBit;
6428}
6429#endif
6430
6431
6432/**
6433 * Finds the first bit which is set in the given 32-bit integer.
6434 * Bits are numbered from 1 (least significant) to 32.
6435 *
6436 * @returns index [1..32] of the first set bit.
6437 * @returns 0 if all bits are cleared.
6438 * @param u32 Integer to search for set bits.
6439 * @remark Similar to ffs() in BSD.
6440 */
6441DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6442{
6443# if RT_INLINE_ASM_USES_INTRIN
6444 unsigned long iBit;
6445 if (_BitScanForward(&iBit, u32))
6446 iBit++;
6447 else
6448 iBit = 0;
6449# elif RT_INLINE_ASM_GNU_STYLE
6450 uint32_t iBit;
6451 __asm__ __volatile__("bsf %1, %0\n\t"
6452 "jnz 1f\n\t"
6453 "xorl %0, %0\n\t"
6454 "jmp 2f\n"
6455 "1:\n\t"
6456 "incl %0\n"
6457 "2:\n\t"
6458 : "=r" (iBit)
6459 : "rm" (u32));
6460# else
6461 uint32_t iBit;
6462 _asm
6463 {
6464 bsf eax, [u32]
6465 jnz found
6466 xor eax, eax
6467 jmp done
6468 found:
6469 inc eax
6470 done:
6471 mov [iBit], eax
6472 }
6473# endif
6474 return iBit;
6475}
6476
6477
6478/**
6479 * Finds the first bit which is set in the given 32-bit integer.
6480 * Bits are numbered from 1 (least significant) to 32.
6481 *
6482 * @returns index [1..32] of the first set bit.
6483 * @returns 0 if all bits are cleared.
6484 * @param i32 Integer to search for set bits.
6485 * @remark Similar to ffs() in BSD.
6486 */
6487DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6488{
6489 return ASMBitFirstSetU32((uint32_t)i32);
6490}
6491
6492
6493/**
6494 * Finds the last bit which is set in the given 32-bit integer.
6495 * Bits are numbered from 1 (least significant) to 32.
6496 *
6497 * @returns index [1..32] of the last set bit.
6498 * @returns 0 if all bits are cleared.
6499 * @param u32 Integer to search for set bits.
6500 * @remark Similar to fls() in BSD.
6501 */
6502DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6503{
6504# if RT_INLINE_ASM_USES_INTRIN
6505 unsigned long iBit;
6506 if (_BitScanReverse(&iBit, u32))
6507 iBit++;
6508 else
6509 iBit = 0;
6510# elif RT_INLINE_ASM_GNU_STYLE
6511 uint32_t iBit;
6512 __asm__ __volatile__("bsrl %1, %0\n\t"
6513 "jnz 1f\n\t"
6514 "xorl %0, %0\n\t"
6515 "jmp 2f\n"
6516 "1:\n\t"
6517 "incl %0\n"
6518 "2:\n\t"
6519 : "=r" (iBit)
6520 : "rm" (u32));
6521# else
6522 uint32_t iBit;
6523 _asm
6524 {
6525 bsr eax, [u32]
6526 jnz found
6527 xor eax, eax
6528 jmp done
6529 found:
6530 inc eax
6531 done:
6532 mov [iBit], eax
6533 }
6534# endif
6535 return iBit;
6536}
6537
6538
6539/**
6540 * Finds the last bit which is set in the given 32-bit integer.
6541 * Bits are numbered from 1 (least significant) to 32.
6542 *
6543 * @returns index [1..32] of the last set bit.
6544 * @returns 0 if all bits are cleared.
6545 * @param i32 Integer to search for set bits.
6546 * @remark Similar to fls() in BSD.
6547 */
6548DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6549{
6550 return ASMBitLastSetU32((uint32_t)i32);
6551}
6552
6553/**
6554 * Reverse the byte order of the given 16-bit integer.
6555 *
6556 * @returns Revert
6557 * @param u16 16-bit integer value.
6558 */
6559DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6560{
6561#if RT_INLINE_ASM_USES_INTRIN
6562 u16 = _byteswap_ushort(u16);
6563#elif RT_INLINE_ASM_GNU_STYLE
6564 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6565#else
6566 _asm
6567 {
6568 mov ax, [u16]
6569 ror ax, 8
6570 mov [u16], ax
6571 }
6572#endif
6573 return u16;
6574}
6575
6576/**
6577 * Reverse the byte order of the given 32-bit integer.
6578 *
6579 * @returns Revert
6580 * @param u32 32-bit integer value.
6581 */
6582DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6583{
6584#if RT_INLINE_ASM_USES_INTRIN
6585 u32 = _byteswap_ulong(u32);
6586#elif RT_INLINE_ASM_GNU_STYLE
6587 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6588#else
6589 _asm
6590 {
6591 mov eax, [u32]
6592 bswap eax
6593 mov [u32], eax
6594 }
6595#endif
6596 return u32;
6597}
6598
6599
6600/**
6601 * Reverse the byte order of the given 64-bit integer.
6602 *
6603 * @returns Revert
6604 * @param u64 64-bit integer value.
6605 */
6606DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6607{
6608#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6609 u64 = _byteswap_uint64(u64);
6610#else
6611 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6612 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6613#endif
6614 return u64;
6615}
6616
6617
6618/** @} */
6619
6620
6621/** @} */
6622#endif
6623
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette