VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 20554

Last change on this file since 20554 was 20242, checked in by vboxsync, 16 years ago

View HW Accel: OpenGL framebuffer

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 166.1 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42/* Solaris 10 header ugliness */
43#ifdef u
44#undef u
45#endif
46
47#ifdef _MSC_VER
48# if _MSC_VER >= 1400
49# define RT_INLINE_ASM_USES_INTRIN 1
50# include <intrin.h>
51 /* Emit the intrinsics at all optimization levels. */
52# pragma intrinsic(_ReadWriteBarrier)
53# pragma intrinsic(__cpuid)
54# pragma intrinsic(_enable)
55# pragma intrinsic(_disable)
56# pragma intrinsic(__rdtsc)
57# pragma intrinsic(__readmsr)
58# pragma intrinsic(__writemsr)
59# pragma intrinsic(__outbyte)
60# pragma intrinsic(__outbytestring)
61# pragma intrinsic(__outword)
62# pragma intrinsic(__outwordstring)
63# pragma intrinsic(__outdword)
64# pragma intrinsic(__outdwordstring)
65# pragma intrinsic(__inbyte)
66# pragma intrinsic(__inbytestring)
67# pragma intrinsic(__inword)
68# pragma intrinsic(__inwordstring)
69# pragma intrinsic(__indword)
70# pragma intrinsic(__indwordstring)
71# pragma intrinsic(__invlpg)
72# pragma intrinsic(__wbinvd)
73# pragma intrinsic(__stosd)
74# pragma intrinsic(__stosw)
75# pragma intrinsic(__stosb)
76# pragma intrinsic(__readcr0)
77# pragma intrinsic(__readcr2)
78# pragma intrinsic(__readcr3)
79# pragma intrinsic(__readcr4)
80# pragma intrinsic(__writecr0)
81# pragma intrinsic(__writecr3)
82# pragma intrinsic(__writecr4)
83# pragma intrinsic(__readdr)
84# pragma intrinsic(__writedr)
85# pragma intrinsic(_BitScanForward)
86# pragma intrinsic(_BitScanReverse)
87# pragma intrinsic(_bittest)
88# pragma intrinsic(_bittestandset)
89# pragma intrinsic(_bittestandreset)
90# pragma intrinsic(_bittestandcomplement)
91# pragma intrinsic(_byteswap_ushort)
92# pragma intrinsic(_byteswap_ulong)
93# pragma intrinsic(_interlockedbittestandset)
94# pragma intrinsic(_interlockedbittestandreset)
95# pragma intrinsic(_InterlockedAnd)
96# pragma intrinsic(_InterlockedOr)
97# pragma intrinsic(_InterlockedIncrement)
98# pragma intrinsic(_InterlockedDecrement)
99# pragma intrinsic(_InterlockedExchange)
100# pragma intrinsic(_InterlockedExchangeAdd)
101# pragma intrinsic(_InterlockedCompareExchange)
102# pragma intrinsic(_InterlockedCompareExchange64)
103# ifdef RT_ARCH_AMD64
104# pragma intrinsic(__stosq)
105# pragma intrinsic(__readcr8)
106# pragma intrinsic(__writecr8)
107# pragma intrinsic(_byteswap_uint64)
108# pragma intrinsic(_InterlockedExchange64)
109# endif
110# endif
111#endif
112#ifndef RT_INLINE_ASM_USES_INTRIN
113# define RT_INLINE_ASM_USES_INTRIN 0
114#endif
115
116/** @def RT_INLINE_ASM_GCC_4_3_X_X86
117 * Used to work around some 4.3.x register allocation issues in this version of
118 * the compiler. */
119#ifdef __GNUC__
120# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ == 3 && defined(__i386__))
121#endif
122#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
123# define RT_INLINE_ASM_GCC_4_3_X_X86 0
124#endif
125
126
127
128/** @defgroup grp_asm ASM - Assembly Routines
129 * @ingroup grp_rt
130 *
131 * @remarks The difference between ordered and unordered atomic operations are that
132 * the former will complete outstanding reads and writes before continuing
133 * while the latter doesn't make any promisses about the order. Ordered
134 * operations doesn't, it seems, make any 100% promise wrt to whether
135 * the operation will complete before any subsequent memory access.
136 * (please, correct if wrong.)
137 *
138 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
139 * are unordered (note the Uo).
140 *
141 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
142 * or even optimize assembler instructions away. For instance, in the following code
143 * the second rdmsr instruction is optimized away because gcc treats that instruction
144 * as deterministic:
145 *
146 * @code
147 * static inline uint64_t rdmsr_low(int idx)
148 * {
149 * uint32_t low;
150 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
151 * }
152 * ...
153 * uint32_t msr1 = rdmsr_low(1);
154 * foo(msr1);
155 * msr1 = rdmsr_low(1);
156 * bar(msr1);
157 * @endcode
158 *
159 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
160 * use the result of the first call as input parameter for bar() as well. For rdmsr this
161 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
162 * machine status information in general.
163 *
164 * @{
165 */
166
167/** @def RT_INLINE_ASM_EXTERNAL
168 * Defined as 1 if the compiler does not support inline assembly.
169 * The ASM* functions will then be implemented in an external .asm file.
170 *
171 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
172 * inline assembly in their AMD64 compiler.
173 */
174#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
175# define RT_INLINE_ASM_EXTERNAL 1
176#else
177# define RT_INLINE_ASM_EXTERNAL 0
178#endif
179
180/** @def RT_INLINE_ASM_GNU_STYLE
181 * Defined as 1 if the compiler understands GNU style inline assembly.
182 */
183#if defined(_MSC_VER)
184# define RT_INLINE_ASM_GNU_STYLE 0
185#else
186# define RT_INLINE_ASM_GNU_STYLE 1
187#endif
188
189
190/** @todo find a more proper place for this structure? */
191#pragma pack(1)
192/** IDTR */
193typedef struct RTIDTR
194{
195 /** Size of the IDT. */
196 uint16_t cbIdt;
197 /** Address of the IDT. */
198 uintptr_t pIdt;
199} RTIDTR, *PRTIDTR;
200#pragma pack()
201
202#pragma pack(1)
203/** GDTR */
204typedef struct RTGDTR
205{
206 /** Size of the GDT. */
207 uint16_t cbGdt;
208 /** Address of the GDT. */
209 uintptr_t pGdt;
210} RTGDTR, *PRTGDTR;
211#pragma pack()
212
213
214/** @def ASMReturnAddress
215 * Gets the return address of the current (or calling if you like) function or method.
216 */
217#ifdef _MSC_VER
218# ifdef __cplusplus
219extern "C"
220# endif
221void * _ReturnAddress(void);
222# pragma intrinsic(_ReturnAddress)
223# define ASMReturnAddress() _ReturnAddress()
224#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
225# define ASMReturnAddress() __builtin_return_address(0)
226#else
227# error "Unsupported compiler."
228#endif
229
230
231/**
232 * Gets the content of the IDTR CPU register.
233 * @param pIdtr Where to store the IDTR contents.
234 */
235#if RT_INLINE_ASM_EXTERNAL
236DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
237#else
238DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
239{
240# if RT_INLINE_ASM_GNU_STYLE
241 __asm__ __volatile__("sidt %0" : "=m" (*pIdtr));
242# else
243 __asm
244 {
245# ifdef RT_ARCH_AMD64
246 mov rax, [pIdtr]
247 sidt [rax]
248# else
249 mov eax, [pIdtr]
250 sidt [eax]
251# endif
252 }
253# endif
254}
255#endif
256
257
258/**
259 * Sets the content of the IDTR CPU register.
260 * @param pIdtr Where to load the IDTR contents from
261 */
262#if RT_INLINE_ASM_EXTERNAL
263DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
264#else
265DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
266{
267# if RT_INLINE_ASM_GNU_STYLE
268 __asm__ __volatile__("lidt %0" : : "m" (*pIdtr));
269# else
270 __asm
271 {
272# ifdef RT_ARCH_AMD64
273 mov rax, [pIdtr]
274 lidt [rax]
275# else
276 mov eax, [pIdtr]
277 lidt [eax]
278# endif
279 }
280# endif
281}
282#endif
283
284
285/**
286 * Gets the content of the GDTR CPU register.
287 * @param pGdtr Where to store the GDTR contents.
288 */
289#if RT_INLINE_ASM_EXTERNAL
290DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
291#else
292DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
293{
294# if RT_INLINE_ASM_GNU_STYLE
295 __asm__ __volatile__("sgdt %0" : "=m" (*pGdtr));
296# else
297 __asm
298 {
299# ifdef RT_ARCH_AMD64
300 mov rax, [pGdtr]
301 sgdt [rax]
302# else
303 mov eax, [pGdtr]
304 sgdt [eax]
305# endif
306 }
307# endif
308}
309#endif
310
311/**
312 * Get the cs register.
313 * @returns cs.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetCS(void);
317#else
318DECLINLINE(RTSEL) ASMGetCS(void)
319{
320 RTSEL SelCS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
323# else
324 __asm
325 {
326 mov ax, cs
327 mov [SelCS], ax
328 }
329# endif
330 return SelCS;
331}
332#endif
333
334
335/**
336 * Get the DS register.
337 * @returns DS.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetDS(void);
341#else
342DECLINLINE(RTSEL) ASMGetDS(void)
343{
344 RTSEL SelDS;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
347# else
348 __asm
349 {
350 mov ax, ds
351 mov [SelDS], ax
352 }
353# endif
354 return SelDS;
355}
356#endif
357
358
359/**
360 * Get the ES register.
361 * @returns ES.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetES(void);
365#else
366DECLINLINE(RTSEL) ASMGetES(void)
367{
368 RTSEL SelES;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
371# else
372 __asm
373 {
374 mov ax, es
375 mov [SelES], ax
376 }
377# endif
378 return SelES;
379}
380#endif
381
382
383/**
384 * Get the FS register.
385 * @returns FS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetFS(void);
389#else
390DECLINLINE(RTSEL) ASMGetFS(void)
391{
392 RTSEL SelFS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
395# else
396 __asm
397 {
398 mov ax, fs
399 mov [SelFS], ax
400 }
401# endif
402 return SelFS;
403}
404# endif
405
406
407/**
408 * Get the GS register.
409 * @returns GS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetGS(void);
413#else
414DECLINLINE(RTSEL) ASMGetGS(void)
415{
416 RTSEL SelGS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
419# else
420 __asm
421 {
422 mov ax, gs
423 mov [SelGS], ax
424 }
425# endif
426 return SelGS;
427}
428#endif
429
430
431/**
432 * Get the SS register.
433 * @returns SS.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetSS(void);
437#else
438DECLINLINE(RTSEL) ASMGetSS(void)
439{
440 RTSEL SelSS;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
443# else
444 __asm
445 {
446 mov ax, ss
447 mov [SelSS], ax
448 }
449# endif
450 return SelSS;
451}
452#endif
453
454
455/**
456 * Get the TR register.
457 * @returns TR.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTSEL) ASMGetTR(void);
461#else
462DECLINLINE(RTSEL) ASMGetTR(void)
463{
464 RTSEL SelTR;
465# if RT_INLINE_ASM_GNU_STYLE
466 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
467# else
468 __asm
469 {
470 str ax
471 mov [SelTR], ax
472 }
473# endif
474 return SelTR;
475}
476#endif
477
478
479/**
480 * Get the [RE]FLAGS register.
481 * @returns [RE]FLAGS.
482 */
483#if RT_INLINE_ASM_EXTERNAL
484DECLASM(RTCCUINTREG) ASMGetFlags(void);
485#else
486DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
487{
488 RTCCUINTREG uFlags;
489# if RT_INLINE_ASM_GNU_STYLE
490# ifdef RT_ARCH_AMD64
491 __asm__ __volatile__("pushfq\n\t"
492 "popq %0\n\t"
493 : "=g" (uFlags));
494# else
495 __asm__ __volatile__("pushfl\n\t"
496 "popl %0\n\t"
497 : "=g" (uFlags));
498# endif
499# else
500 __asm
501 {
502# ifdef RT_ARCH_AMD64
503 pushfq
504 pop [uFlags]
505# else
506 pushfd
507 pop [uFlags]
508# endif
509 }
510# endif
511 return uFlags;
512}
513#endif
514
515
516/**
517 * Set the [RE]FLAGS register.
518 * @param uFlags The new [RE]FLAGS value.
519 */
520#if RT_INLINE_ASM_EXTERNAL
521DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
522#else
523DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
524{
525# if RT_INLINE_ASM_GNU_STYLE
526# ifdef RT_ARCH_AMD64
527 __asm__ __volatile__("pushq %0\n\t"
528 "popfq\n\t"
529 : : "g" (uFlags));
530# else
531 __asm__ __volatile__("pushl %0\n\t"
532 "popfl\n\t"
533 : : "g" (uFlags));
534# endif
535# else
536 __asm
537 {
538# ifdef RT_ARCH_AMD64
539 push [uFlags]
540 popfq
541# else
542 push [uFlags]
543 popfd
544# endif
545 }
546# endif
547}
548#endif
549
550
551/**
552 * Gets the content of the CPU timestamp counter register.
553 *
554 * @returns TSC.
555 */
556#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
557DECLASM(uint64_t) ASMReadTSC(void);
558#else
559DECLINLINE(uint64_t) ASMReadTSC(void)
560{
561 RTUINT64U u;
562# if RT_INLINE_ASM_GNU_STYLE
563 __asm__ __volatile__("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
564# else
565# if RT_INLINE_ASM_USES_INTRIN
566 u.u = __rdtsc();
567# else
568 __asm
569 {
570 rdtsc
571 mov [u.s.Lo], eax
572 mov [u.s.Hi], edx
573 }
574# endif
575# endif
576 return u.u;
577}
578#endif
579
580
581/**
582 * Performs the cpuid instruction returning all registers.
583 *
584 * @param uOperator CPUID operation (eax).
585 * @param pvEAX Where to store eax.
586 * @param pvEBX Where to store ebx.
587 * @param pvECX Where to store ecx.
588 * @param pvEDX Where to store edx.
589 * @remark We're using void pointers to ease the use of special bitfield structures and such.
590 */
591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
592DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
593#else
594DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
595{
596# if RT_INLINE_ASM_GNU_STYLE
597# ifdef RT_ARCH_AMD64
598 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
599 __asm__ ("cpuid\n\t"
600 : "=a" (uRAX),
601 "=b" (uRBX),
602 "=c" (uRCX),
603 "=d" (uRDX)
604 : "0" (uOperator));
605 *(uint32_t *)pvEAX = (uint32_t)uRAX;
606 *(uint32_t *)pvEBX = (uint32_t)uRBX;
607 *(uint32_t *)pvECX = (uint32_t)uRCX;
608 *(uint32_t *)pvEDX = (uint32_t)uRDX;
609# else
610 __asm__ ("xchgl %%ebx, %1\n\t"
611 "cpuid\n\t"
612 "xchgl %%ebx, %1\n\t"
613 : "=a" (*(uint32_t *)pvEAX),
614 "=r" (*(uint32_t *)pvEBX),
615 "=c" (*(uint32_t *)pvECX),
616 "=d" (*(uint32_t *)pvEDX)
617 : "0" (uOperator));
618# endif
619
620# elif RT_INLINE_ASM_USES_INTRIN
621 int aInfo[4];
622 __cpuid(aInfo, uOperator);
623 *(uint32_t *)pvEAX = aInfo[0];
624 *(uint32_t *)pvEBX = aInfo[1];
625 *(uint32_t *)pvECX = aInfo[2];
626 *(uint32_t *)pvEDX = aInfo[3];
627
628# else
629 uint32_t uEAX;
630 uint32_t uEBX;
631 uint32_t uECX;
632 uint32_t uEDX;
633 __asm
634 {
635 push ebx
636 mov eax, [uOperator]
637 cpuid
638 mov [uEAX], eax
639 mov [uEBX], ebx
640 mov [uECX], ecx
641 mov [uEDX], edx
642 pop ebx
643 }
644 *(uint32_t *)pvEAX = uEAX;
645 *(uint32_t *)pvEBX = uEBX;
646 *(uint32_t *)pvECX = uECX;
647 *(uint32_t *)pvEDX = uEDX;
648# endif
649}
650#endif
651
652
653/**
654 * Performs the cpuid instruction returning all registers.
655 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
656 *
657 * @param uOperator CPUID operation (eax).
658 * @param uIdxECX ecx index
659 * @param pvEAX Where to store eax.
660 * @param pvEBX Where to store ebx.
661 * @param pvECX Where to store ecx.
662 * @param pvEDX Where to store edx.
663 * @remark We're using void pointers to ease the use of special bitfield structures and such.
664 */
665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
666DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
667#else
668DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
669{
670# if RT_INLINE_ASM_GNU_STYLE
671# ifdef RT_ARCH_AMD64
672 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
673 __asm__ ("cpuid\n\t"
674 : "=a" (uRAX),
675 "=b" (uRBX),
676 "=c" (uRCX),
677 "=d" (uRDX)
678 : "0" (uOperator),
679 "2" (uIdxECX));
680 *(uint32_t *)pvEAX = (uint32_t)uRAX;
681 *(uint32_t *)pvEBX = (uint32_t)uRBX;
682 *(uint32_t *)pvECX = (uint32_t)uRCX;
683 *(uint32_t *)pvEDX = (uint32_t)uRDX;
684# else
685 __asm__ ("xchgl %%ebx, %1\n\t"
686 "cpuid\n\t"
687 "xchgl %%ebx, %1\n\t"
688 : "=a" (*(uint32_t *)pvEAX),
689 "=r" (*(uint32_t *)pvEBX),
690 "=c" (*(uint32_t *)pvECX),
691 "=d" (*(uint32_t *)pvEDX)
692 : "0" (uOperator),
693 "2" (uIdxECX));
694# endif
695
696# elif RT_INLINE_ASM_USES_INTRIN
697 int aInfo[4];
698 /* ??? another intrinsic ??? */
699 __cpuid(aInfo, uOperator);
700 *(uint32_t *)pvEAX = aInfo[0];
701 *(uint32_t *)pvEBX = aInfo[1];
702 *(uint32_t *)pvECX = aInfo[2];
703 *(uint32_t *)pvEDX = aInfo[3];
704
705# else
706 uint32_t uEAX;
707 uint32_t uEBX;
708 uint32_t uECX;
709 uint32_t uEDX;
710 __asm
711 {
712 push ebx
713 mov eax, [uOperator]
714 mov ecx, [uIdxECX]
715 cpuid
716 mov [uEAX], eax
717 mov [uEBX], ebx
718 mov [uECX], ecx
719 mov [uEDX], edx
720 pop ebx
721 }
722 *(uint32_t *)pvEAX = uEAX;
723 *(uint32_t *)pvEBX = uEBX;
724 *(uint32_t *)pvECX = uECX;
725 *(uint32_t *)pvEDX = uEDX;
726# endif
727}
728#endif
729
730
731/**
732 * Performs the cpuid instruction returning ecx and edx.
733 *
734 * @param uOperator CPUID operation (eax).
735 * @param pvECX Where to store ecx.
736 * @param pvEDX Where to store edx.
737 * @remark We're using void pointers to ease the use of special bitfield structures and such.
738 */
739#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
740DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
741#else
742DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
743{
744 uint32_t uEBX;
745 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
746}
747#endif
748
749
750/**
751 * Performs the cpuid instruction returning edx.
752 *
753 * @param uOperator CPUID operation (eax).
754 * @returns EDX after cpuid operation.
755 */
756#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
757DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
758#else
759DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
760{
761 RTCCUINTREG xDX;
762# if RT_INLINE_ASM_GNU_STYLE
763# ifdef RT_ARCH_AMD64
764 RTCCUINTREG uSpill;
765 __asm__ ("cpuid"
766 : "=a" (uSpill),
767 "=d" (xDX)
768 : "0" (uOperator)
769 : "rbx", "rcx");
770# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
771 __asm__ ("push %%ebx\n\t"
772 "cpuid\n\t"
773 "pop %%ebx\n\t"
774 : "=a" (uOperator),
775 "=d" (xDX)
776 : "0" (uOperator)
777 : "ecx");
778# else
779 __asm__ ("cpuid"
780 : "=a" (uOperator),
781 "=d" (xDX)
782 : "0" (uOperator)
783 : "ebx", "ecx");
784# endif
785
786# elif RT_INLINE_ASM_USES_INTRIN
787 int aInfo[4];
788 __cpuid(aInfo, uOperator);
789 xDX = aInfo[3];
790
791# else
792 __asm
793 {
794 push ebx
795 mov eax, [uOperator]
796 cpuid
797 mov [xDX], edx
798 pop ebx
799 }
800# endif
801 return (uint32_t)xDX;
802}
803#endif
804
805
806/**
807 * Performs the cpuid instruction returning ecx.
808 *
809 * @param uOperator CPUID operation (eax).
810 * @returns ECX after cpuid operation.
811 */
812#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
813DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
814#else
815DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
816{
817 RTCCUINTREG xCX;
818# if RT_INLINE_ASM_GNU_STYLE
819# ifdef RT_ARCH_AMD64
820 RTCCUINTREG uSpill;
821 __asm__ ("cpuid"
822 : "=a" (uSpill),
823 "=c" (xCX)
824 : "0" (uOperator)
825 : "rbx", "rdx");
826# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
827 __asm__ ("push %%ebx\n\t"
828 "cpuid\n\t"
829 "pop %%ebx\n\t"
830 : "=a" (uOperator),
831 "=c" (xCX)
832 : "0" (uOperator)
833 : "edx");
834# else
835 __asm__ ("cpuid"
836 : "=a" (uOperator),
837 "=c" (xCX)
838 : "0" (uOperator)
839 : "ebx", "edx");
840
841# endif
842
843# elif RT_INLINE_ASM_USES_INTRIN
844 int aInfo[4];
845 __cpuid(aInfo, uOperator);
846 xCX = aInfo[2];
847
848# else
849 __asm
850 {
851 push ebx
852 mov eax, [uOperator]
853 cpuid
854 mov [xCX], ecx
855 pop ebx
856 }
857# endif
858 return (uint32_t)xCX;
859}
860#endif
861
862
863/**
864 * Checks if the current CPU supports CPUID.
865 *
866 * @returns true if CPUID is supported.
867 */
868DECLINLINE(bool) ASMHasCpuId(void)
869{
870#ifdef RT_ARCH_AMD64
871 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
872#else /* !RT_ARCH_AMD64 */
873 bool fRet = false;
874# if RT_INLINE_ASM_GNU_STYLE
875 uint32_t u1;
876 uint32_t u2;
877 __asm__ ("pushf\n\t"
878 "pop %1\n\t"
879 "mov %1, %2\n\t"
880 "xorl $0x200000, %1\n\t"
881 "push %1\n\t"
882 "popf\n\t"
883 "pushf\n\t"
884 "pop %1\n\t"
885 "cmpl %1, %2\n\t"
886 "setne %0\n\t"
887 "push %2\n\t"
888 "popf\n\t"
889 : "=m" (fRet), "=r" (u1), "=r" (u2));
890# else
891 __asm
892 {
893 pushfd
894 pop eax
895 mov ebx, eax
896 xor eax, 0200000h
897 push eax
898 popfd
899 pushfd
900 pop eax
901 cmp eax, ebx
902 setne fRet
903 push ebx
904 popfd
905 }
906# endif
907 return fRet;
908#endif /* !RT_ARCH_AMD64 */
909}
910
911
912/**
913 * Gets the APIC ID of the current CPU.
914 *
915 * @returns the APIC ID.
916 */
917#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
918DECLASM(uint8_t) ASMGetApicId(void);
919#else
920DECLINLINE(uint8_t) ASMGetApicId(void)
921{
922 RTCCUINTREG xBX;
923# if RT_INLINE_ASM_GNU_STYLE
924# ifdef RT_ARCH_AMD64
925 RTCCUINTREG uSpill;
926 __asm__ ("cpuid"
927 : "=a" (uSpill),
928 "=b" (xBX)
929 : "0" (1)
930 : "rcx", "rdx");
931# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
932 RTCCUINTREG uSpill;
933 __asm__ ("mov %%ebx,%1\n\t"
934 "cpuid\n\t"
935 "xchgl %%ebx,%1\n\t"
936 : "=a" (uSpill),
937 "=r" (xBX)
938 : "0" (1)
939 : "ecx", "edx");
940# else
941 RTCCUINTREG uSpill;
942 __asm__ ("cpuid"
943 : "=a" (uSpill),
944 "=b" (xBX)
945 : "0" (1)
946 : "ecx", "edx");
947# endif
948
949# elif RT_INLINE_ASM_USES_INTRIN
950 int aInfo[4];
951 __cpuid(aInfo, 1);
952 xBX = aInfo[1];
953
954# else
955 __asm
956 {
957 push ebx
958 mov eax, 1
959 cpuid
960 mov [xBX], ebx
961 pop ebx
962 }
963# endif
964 return (uint8_t)(xBX >> 24);
965}
966#endif
967
968
969/**
970 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
971 *
972 * @returns true/false.
973 * @param uEBX EBX return from ASMCpuId(0)
974 * @param uECX ECX return from ASMCpuId(0)
975 * @param uEDX EDX return from ASMCpuId(0)
976 */
977DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
978{
979 return uEBX == 0x756e6547
980 && uECX == 0x6c65746e
981 && uEDX == 0x49656e69;
982}
983
984
985/**
986 * Tests if this is an genuin Intel CPU.
987 *
988 * @returns true/false.
989 */
990DECLINLINE(bool) ASMIsIntelCpu(void)
991{
992 uint32_t uEAX, uEBX, uECX, uEDX;
993 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
994 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
995}
996
997
998/**
999 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
1000 *
1001 * @returns Family.
1002 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
1003 */
1004DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
1005{
1006 return ((uEAX >> 8) & 0xf) == 0xf
1007 ? ((uEAX >> 20) & 0x7f) + 0xf
1008 : ((uEAX >> 8) & 0xf);
1009}
1010
1011
1012/**
1013 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
1014 *
1015 * @returns Model.
1016 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1017 * @param fIntel Whether it's an intel CPU.
1018 */
1019DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
1020{
1021 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1022 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1023 : ((uEAX >> 4) & 0xf);
1024}
1025
1026
1027/**
1028 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1029 *
1030 * @returns Model.
1031 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1032 * @param fIntel Whether it's an intel CPU.
1033 */
1034DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1035{
1036 return ((uEAX >> 8) & 0xf) == 0xf
1037 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1038 : ((uEAX >> 4) & 0xf);
1039}
1040
1041
1042/**
1043 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1044 *
1045 * @returns Model.
1046 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1047 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1048 */
1049DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1050{
1051 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1052 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1053 : ((uEAX >> 4) & 0xf);
1054}
1055
1056
1057/**
1058 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1059 *
1060 * @returns Model.
1061 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1062 */
1063DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1064{
1065 return uEAX & 0xf;
1066}
1067
1068
1069/**
1070 * Get cr0.
1071 * @returns cr0.
1072 */
1073#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1074DECLASM(RTCCUINTREG) ASMGetCR0(void);
1075#else
1076DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1077{
1078 RTCCUINTREG uCR0;
1079# if RT_INLINE_ASM_USES_INTRIN
1080 uCR0 = __readcr0();
1081
1082# elif RT_INLINE_ASM_GNU_STYLE
1083# ifdef RT_ARCH_AMD64
1084 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1085# else
1086 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1087# endif
1088# else
1089 __asm
1090 {
1091# ifdef RT_ARCH_AMD64
1092 mov rax, cr0
1093 mov [uCR0], rax
1094# else
1095 mov eax, cr0
1096 mov [uCR0], eax
1097# endif
1098 }
1099# endif
1100 return uCR0;
1101}
1102#endif
1103
1104
1105/**
1106 * Sets the CR0 register.
1107 * @param uCR0 The new CR0 value.
1108 */
1109#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1110DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1111#else
1112DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1113{
1114# if RT_INLINE_ASM_USES_INTRIN
1115 __writecr0(uCR0);
1116
1117# elif RT_INLINE_ASM_GNU_STYLE
1118# ifdef RT_ARCH_AMD64
1119 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1120# else
1121 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1122# endif
1123# else
1124 __asm
1125 {
1126# ifdef RT_ARCH_AMD64
1127 mov rax, [uCR0]
1128 mov cr0, rax
1129# else
1130 mov eax, [uCR0]
1131 mov cr0, eax
1132# endif
1133 }
1134# endif
1135}
1136#endif
1137
1138
1139/**
1140 * Get cr2.
1141 * @returns cr2.
1142 */
1143#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1144DECLASM(RTCCUINTREG) ASMGetCR2(void);
1145#else
1146DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1147{
1148 RTCCUINTREG uCR2;
1149# if RT_INLINE_ASM_USES_INTRIN
1150 uCR2 = __readcr2();
1151
1152# elif RT_INLINE_ASM_GNU_STYLE
1153# ifdef RT_ARCH_AMD64
1154 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1155# else
1156 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1157# endif
1158# else
1159 __asm
1160 {
1161# ifdef RT_ARCH_AMD64
1162 mov rax, cr2
1163 mov [uCR2], rax
1164# else
1165 mov eax, cr2
1166 mov [uCR2], eax
1167# endif
1168 }
1169# endif
1170 return uCR2;
1171}
1172#endif
1173
1174
1175/**
1176 * Sets the CR2 register.
1177 * @param uCR2 The new CR0 value.
1178 */
1179#if RT_INLINE_ASM_EXTERNAL
1180DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1181#else
1182DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1183{
1184# if RT_INLINE_ASM_GNU_STYLE
1185# ifdef RT_ARCH_AMD64
1186 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1187# else
1188 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1189# endif
1190# else
1191 __asm
1192 {
1193# ifdef RT_ARCH_AMD64
1194 mov rax, [uCR2]
1195 mov cr2, rax
1196# else
1197 mov eax, [uCR2]
1198 mov cr2, eax
1199# endif
1200 }
1201# endif
1202}
1203#endif
1204
1205
1206/**
1207 * Get cr3.
1208 * @returns cr3.
1209 */
1210#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1211DECLASM(RTCCUINTREG) ASMGetCR3(void);
1212#else
1213DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1214{
1215 RTCCUINTREG uCR3;
1216# if RT_INLINE_ASM_USES_INTRIN
1217 uCR3 = __readcr3();
1218
1219# elif RT_INLINE_ASM_GNU_STYLE
1220# ifdef RT_ARCH_AMD64
1221 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1222# else
1223 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1224# endif
1225# else
1226 __asm
1227 {
1228# ifdef RT_ARCH_AMD64
1229 mov rax, cr3
1230 mov [uCR3], rax
1231# else
1232 mov eax, cr3
1233 mov [uCR3], eax
1234# endif
1235 }
1236# endif
1237 return uCR3;
1238}
1239#endif
1240
1241
1242/**
1243 * Sets the CR3 register.
1244 *
1245 * @param uCR3 New CR3 value.
1246 */
1247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1248DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1249#else
1250DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1251{
1252# if RT_INLINE_ASM_USES_INTRIN
1253 __writecr3(uCR3);
1254
1255# elif RT_INLINE_ASM_GNU_STYLE
1256# ifdef RT_ARCH_AMD64
1257 __asm__ __volatile__("movq %0, %%cr3\n\t" : : "r" (uCR3));
1258# else
1259 __asm__ __volatile__("movl %0, %%cr3\n\t" : : "r" (uCR3));
1260# endif
1261# else
1262 __asm
1263 {
1264# ifdef RT_ARCH_AMD64
1265 mov rax, [uCR3]
1266 mov cr3, rax
1267# else
1268 mov eax, [uCR3]
1269 mov cr3, eax
1270# endif
1271 }
1272# endif
1273}
1274#endif
1275
1276
1277/**
1278 * Reloads the CR3 register.
1279 */
1280#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1281DECLASM(void) ASMReloadCR3(void);
1282#else
1283DECLINLINE(void) ASMReloadCR3(void)
1284{
1285# if RT_INLINE_ASM_USES_INTRIN
1286 __writecr3(__readcr3());
1287
1288# elif RT_INLINE_ASM_GNU_STYLE
1289 RTCCUINTREG u;
1290# ifdef RT_ARCH_AMD64
1291 __asm__ __volatile__("movq %%cr3, %0\n\t"
1292 "movq %0, %%cr3\n\t"
1293 : "=r" (u));
1294# else
1295 __asm__ __volatile__("movl %%cr3, %0\n\t"
1296 "movl %0, %%cr3\n\t"
1297 : "=r" (u));
1298# endif
1299# else
1300 __asm
1301 {
1302# ifdef RT_ARCH_AMD64
1303 mov rax, cr3
1304 mov cr3, rax
1305# else
1306 mov eax, cr3
1307 mov cr3, eax
1308# endif
1309 }
1310# endif
1311}
1312#endif
1313
1314
1315/**
1316 * Get cr4.
1317 * @returns cr4.
1318 */
1319#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1320DECLASM(RTCCUINTREG) ASMGetCR4(void);
1321#else
1322DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1323{
1324 RTCCUINTREG uCR4;
1325# if RT_INLINE_ASM_USES_INTRIN
1326 uCR4 = __readcr4();
1327
1328# elif RT_INLINE_ASM_GNU_STYLE
1329# ifdef RT_ARCH_AMD64
1330 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1331# else
1332 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1333# endif
1334# else
1335 __asm
1336 {
1337# ifdef RT_ARCH_AMD64
1338 mov rax, cr4
1339 mov [uCR4], rax
1340# else
1341 push eax /* just in case */
1342 /*mov eax, cr4*/
1343 _emit 0x0f
1344 _emit 0x20
1345 _emit 0xe0
1346 mov [uCR4], eax
1347 pop eax
1348# endif
1349 }
1350# endif
1351 return uCR4;
1352}
1353#endif
1354
1355
1356/**
1357 * Sets the CR4 register.
1358 *
1359 * @param uCR4 New CR4 value.
1360 */
1361#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1362DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1363#else
1364DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1365{
1366# if RT_INLINE_ASM_USES_INTRIN
1367 __writecr4(uCR4);
1368
1369# elif RT_INLINE_ASM_GNU_STYLE
1370# ifdef RT_ARCH_AMD64
1371 __asm__ __volatile__("movq %0, %%cr4\n\t" : : "r" (uCR4));
1372# else
1373 __asm__ __volatile__("movl %0, %%cr4\n\t" : : "r" (uCR4));
1374# endif
1375# else
1376 __asm
1377 {
1378# ifdef RT_ARCH_AMD64
1379 mov rax, [uCR4]
1380 mov cr4, rax
1381# else
1382 mov eax, [uCR4]
1383 _emit 0x0F
1384 _emit 0x22
1385 _emit 0xE0 /* mov cr4, eax */
1386# endif
1387 }
1388# endif
1389}
1390#endif
1391
1392
1393/**
1394 * Get cr8.
1395 * @returns cr8.
1396 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1397 */
1398#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1399DECLASM(RTCCUINTREG) ASMGetCR8(void);
1400#else
1401DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1402{
1403# ifdef RT_ARCH_AMD64
1404 RTCCUINTREG uCR8;
1405# if RT_INLINE_ASM_USES_INTRIN
1406 uCR8 = __readcr8();
1407
1408# elif RT_INLINE_ASM_GNU_STYLE
1409 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1410# else
1411 __asm
1412 {
1413 mov rax, cr8
1414 mov [uCR8], rax
1415 }
1416# endif
1417 return uCR8;
1418# else /* !RT_ARCH_AMD64 */
1419 return 0;
1420# endif /* !RT_ARCH_AMD64 */
1421}
1422#endif
1423
1424
1425/**
1426 * Enables interrupts (EFLAGS.IF).
1427 */
1428#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1429DECLASM(void) ASMIntEnable(void);
1430#else
1431DECLINLINE(void) ASMIntEnable(void)
1432{
1433# if RT_INLINE_ASM_GNU_STYLE
1434 __asm("sti\n");
1435# elif RT_INLINE_ASM_USES_INTRIN
1436 _enable();
1437# else
1438 __asm sti
1439# endif
1440}
1441#endif
1442
1443
1444/**
1445 * Disables interrupts (!EFLAGS.IF).
1446 */
1447#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1448DECLASM(void) ASMIntDisable(void);
1449#else
1450DECLINLINE(void) ASMIntDisable(void)
1451{
1452# if RT_INLINE_ASM_GNU_STYLE
1453 __asm("cli\n");
1454# elif RT_INLINE_ASM_USES_INTRIN
1455 _disable();
1456# else
1457 __asm cli
1458# endif
1459}
1460#endif
1461
1462
1463/**
1464 * Disables interrupts and returns previous xFLAGS.
1465 */
1466#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1467DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1468#else
1469DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1470{
1471 RTCCUINTREG xFlags;
1472# if RT_INLINE_ASM_GNU_STYLE
1473# ifdef RT_ARCH_AMD64
1474 __asm__ __volatile__("pushfq\n\t"
1475 "cli\n\t"
1476 "popq %0\n\t"
1477 : "=rm" (xFlags));
1478# else
1479 __asm__ __volatile__("pushfl\n\t"
1480 "cli\n\t"
1481 "popl %0\n\t"
1482 : "=rm" (xFlags));
1483# endif
1484# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1485 xFlags = ASMGetFlags();
1486 _disable();
1487# else
1488 __asm {
1489 pushfd
1490 cli
1491 pop [xFlags]
1492 }
1493# endif
1494 return xFlags;
1495}
1496#endif
1497
1498
1499/**
1500 * Halts the CPU until interrupted.
1501 */
1502#if RT_INLINE_ASM_EXTERNAL
1503DECLASM(void) ASMHalt(void);
1504#else
1505DECLINLINE(void) ASMHalt(void)
1506{
1507# if RT_INLINE_ASM_GNU_STYLE
1508 __asm__ __volatile__("hlt\n\t");
1509# else
1510 __asm {
1511 hlt
1512 }
1513# endif
1514}
1515#endif
1516
1517
1518/**
1519 * Reads a machine specific register.
1520 *
1521 * @returns Register content.
1522 * @param uRegister Register to read.
1523 */
1524#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1525DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1526#else
1527DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1528{
1529 RTUINT64U u;
1530# if RT_INLINE_ASM_GNU_STYLE
1531 __asm__ __volatile__("rdmsr\n\t"
1532 : "=a" (u.s.Lo),
1533 "=d" (u.s.Hi)
1534 : "c" (uRegister));
1535
1536# elif RT_INLINE_ASM_USES_INTRIN
1537 u.u = __readmsr(uRegister);
1538
1539# else
1540 __asm
1541 {
1542 mov ecx, [uRegister]
1543 rdmsr
1544 mov [u.s.Lo], eax
1545 mov [u.s.Hi], edx
1546 }
1547# endif
1548
1549 return u.u;
1550}
1551#endif
1552
1553
1554/**
1555 * Writes a machine specific register.
1556 *
1557 * @returns Register content.
1558 * @param uRegister Register to write to.
1559 * @param u64Val Value to write.
1560 */
1561#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1562DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1563#else
1564DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1565{
1566 RTUINT64U u;
1567
1568 u.u = u64Val;
1569# if RT_INLINE_ASM_GNU_STYLE
1570 __asm__ __volatile__("wrmsr\n\t"
1571 ::"a" (u.s.Lo),
1572 "d" (u.s.Hi),
1573 "c" (uRegister));
1574
1575# elif RT_INLINE_ASM_USES_INTRIN
1576 __writemsr(uRegister, u.u);
1577
1578# else
1579 __asm
1580 {
1581 mov ecx, [uRegister]
1582 mov edx, [u.s.Hi]
1583 mov eax, [u.s.Lo]
1584 wrmsr
1585 }
1586# endif
1587}
1588#endif
1589
1590
1591/**
1592 * Reads low part of a machine specific register.
1593 *
1594 * @returns Register content.
1595 * @param uRegister Register to read.
1596 */
1597#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1598DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1599#else
1600DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1601{
1602 uint32_t u32;
1603# if RT_INLINE_ASM_GNU_STYLE
1604 __asm__ __volatile__("rdmsr\n\t"
1605 : "=a" (u32)
1606 : "c" (uRegister)
1607 : "edx");
1608
1609# elif RT_INLINE_ASM_USES_INTRIN
1610 u32 = (uint32_t)__readmsr(uRegister);
1611
1612#else
1613 __asm
1614 {
1615 mov ecx, [uRegister]
1616 rdmsr
1617 mov [u32], eax
1618 }
1619# endif
1620
1621 return u32;
1622}
1623#endif
1624
1625
1626/**
1627 * Reads high part of a machine specific register.
1628 *
1629 * @returns Register content.
1630 * @param uRegister Register to read.
1631 */
1632#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1633DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1634#else
1635DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1636{
1637 uint32_t u32;
1638# if RT_INLINE_ASM_GNU_STYLE
1639 __asm__ __volatile__("rdmsr\n\t"
1640 : "=d" (u32)
1641 : "c" (uRegister)
1642 : "eax");
1643
1644# elif RT_INLINE_ASM_USES_INTRIN
1645 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1646
1647# else
1648 __asm
1649 {
1650 mov ecx, [uRegister]
1651 rdmsr
1652 mov [u32], edx
1653 }
1654# endif
1655
1656 return u32;
1657}
1658#endif
1659
1660
1661/**
1662 * Gets dr0.
1663 *
1664 * @returns dr0.
1665 */
1666#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1667DECLASM(RTCCUINTREG) ASMGetDR0(void);
1668#else
1669DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1670{
1671 RTCCUINTREG uDR0;
1672# if RT_INLINE_ASM_USES_INTRIN
1673 uDR0 = __readdr(0);
1674# elif RT_INLINE_ASM_GNU_STYLE
1675# ifdef RT_ARCH_AMD64
1676 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1677# else
1678 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1679# endif
1680# else
1681 __asm
1682 {
1683# ifdef RT_ARCH_AMD64
1684 mov rax, dr0
1685 mov [uDR0], rax
1686# else
1687 mov eax, dr0
1688 mov [uDR0], eax
1689# endif
1690 }
1691# endif
1692 return uDR0;
1693}
1694#endif
1695
1696
1697/**
1698 * Gets dr1.
1699 *
1700 * @returns dr1.
1701 */
1702#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1703DECLASM(RTCCUINTREG) ASMGetDR1(void);
1704#else
1705DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1706{
1707 RTCCUINTREG uDR1;
1708# if RT_INLINE_ASM_USES_INTRIN
1709 uDR1 = __readdr(1);
1710# elif RT_INLINE_ASM_GNU_STYLE
1711# ifdef RT_ARCH_AMD64
1712 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1713# else
1714 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1715# endif
1716# else
1717 __asm
1718 {
1719# ifdef RT_ARCH_AMD64
1720 mov rax, dr1
1721 mov [uDR1], rax
1722# else
1723 mov eax, dr1
1724 mov [uDR1], eax
1725# endif
1726 }
1727# endif
1728 return uDR1;
1729}
1730#endif
1731
1732
1733/**
1734 * Gets dr2.
1735 *
1736 * @returns dr2.
1737 */
1738#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1739DECLASM(RTCCUINTREG) ASMGetDR2(void);
1740#else
1741DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1742{
1743 RTCCUINTREG uDR2;
1744# if RT_INLINE_ASM_USES_INTRIN
1745 uDR2 = __readdr(2);
1746# elif RT_INLINE_ASM_GNU_STYLE
1747# ifdef RT_ARCH_AMD64
1748 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1749# else
1750 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1751# endif
1752# else
1753 __asm
1754 {
1755# ifdef RT_ARCH_AMD64
1756 mov rax, dr2
1757 mov [uDR2], rax
1758# else
1759 mov eax, dr2
1760 mov [uDR2], eax
1761# endif
1762 }
1763# endif
1764 return uDR2;
1765}
1766#endif
1767
1768
1769/**
1770 * Gets dr3.
1771 *
1772 * @returns dr3.
1773 */
1774#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1775DECLASM(RTCCUINTREG) ASMGetDR3(void);
1776#else
1777DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1778{
1779 RTCCUINTREG uDR3;
1780# if RT_INLINE_ASM_USES_INTRIN
1781 uDR3 = __readdr(3);
1782# elif RT_INLINE_ASM_GNU_STYLE
1783# ifdef RT_ARCH_AMD64
1784 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1785# else
1786 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1787# endif
1788# else
1789 __asm
1790 {
1791# ifdef RT_ARCH_AMD64
1792 mov rax, dr3
1793 mov [uDR3], rax
1794# else
1795 mov eax, dr3
1796 mov [uDR3], eax
1797# endif
1798 }
1799# endif
1800 return uDR3;
1801}
1802#endif
1803
1804
1805/**
1806 * Gets dr6.
1807 *
1808 * @returns dr6.
1809 */
1810#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1811DECLASM(RTCCUINTREG) ASMGetDR6(void);
1812#else
1813DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1814{
1815 RTCCUINTREG uDR6;
1816# if RT_INLINE_ASM_USES_INTRIN
1817 uDR6 = __readdr(6);
1818# elif RT_INLINE_ASM_GNU_STYLE
1819# ifdef RT_ARCH_AMD64
1820 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1821# else
1822 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1823# endif
1824# else
1825 __asm
1826 {
1827# ifdef RT_ARCH_AMD64
1828 mov rax, dr6
1829 mov [uDR6], rax
1830# else
1831 mov eax, dr6
1832 mov [uDR6], eax
1833# endif
1834 }
1835# endif
1836 return uDR6;
1837}
1838#endif
1839
1840
1841/**
1842 * Reads and clears DR6.
1843 *
1844 * @returns DR6.
1845 */
1846#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1847DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1848#else
1849DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1850{
1851 RTCCUINTREG uDR6;
1852# if RT_INLINE_ASM_USES_INTRIN
1853 uDR6 = __readdr(6);
1854 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1855# elif RT_INLINE_ASM_GNU_STYLE
1856 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1857# ifdef RT_ARCH_AMD64
1858 __asm__ __volatile__("movq %%dr6, %0\n\t"
1859 "movq %1, %%dr6\n\t"
1860 : "=r" (uDR6)
1861 : "r" (uNewValue));
1862# else
1863 __asm__ __volatile__("movl %%dr6, %0\n\t"
1864 "movl %1, %%dr6\n\t"
1865 : "=r" (uDR6)
1866 : "r" (uNewValue));
1867# endif
1868# else
1869 __asm
1870 {
1871# ifdef RT_ARCH_AMD64
1872 mov rax, dr6
1873 mov [uDR6], rax
1874 mov rcx, rax
1875 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1876 mov dr6, rcx
1877# else
1878 mov eax, dr6
1879 mov [uDR6], eax
1880 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1881 mov dr6, ecx
1882# endif
1883 }
1884# endif
1885 return uDR6;
1886}
1887#endif
1888
1889
1890/**
1891 * Gets dr7.
1892 *
1893 * @returns dr7.
1894 */
1895#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1896DECLASM(RTCCUINTREG) ASMGetDR7(void);
1897#else
1898DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1899{
1900 RTCCUINTREG uDR7;
1901# if RT_INLINE_ASM_USES_INTRIN
1902 uDR7 = __readdr(7);
1903# elif RT_INLINE_ASM_GNU_STYLE
1904# ifdef RT_ARCH_AMD64
1905 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1906# else
1907 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1908# endif
1909# else
1910 __asm
1911 {
1912# ifdef RT_ARCH_AMD64
1913 mov rax, dr7
1914 mov [uDR7], rax
1915# else
1916 mov eax, dr7
1917 mov [uDR7], eax
1918# endif
1919 }
1920# endif
1921 return uDR7;
1922}
1923#endif
1924
1925
1926/**
1927 * Sets dr0.
1928 *
1929 * @param uDRVal Debug register value to write
1930 */
1931#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1932DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1933#else
1934DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1935{
1936# if RT_INLINE_ASM_USES_INTRIN
1937 __writedr(0, uDRVal);
1938# elif RT_INLINE_ASM_GNU_STYLE
1939# ifdef RT_ARCH_AMD64
1940 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1941# else
1942 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1943# endif
1944# else
1945 __asm
1946 {
1947# ifdef RT_ARCH_AMD64
1948 mov rax, [uDRVal]
1949 mov dr0, rax
1950# else
1951 mov eax, [uDRVal]
1952 mov dr0, eax
1953# endif
1954 }
1955# endif
1956}
1957#endif
1958
1959
1960/**
1961 * Sets dr1.
1962 *
1963 * @param uDRVal Debug register value to write
1964 */
1965#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1966DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1967#else
1968DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1969{
1970# if RT_INLINE_ASM_USES_INTRIN
1971 __writedr(1, uDRVal);
1972# elif RT_INLINE_ASM_GNU_STYLE
1973# ifdef RT_ARCH_AMD64
1974 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
1975# else
1976 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
1977# endif
1978# else
1979 __asm
1980 {
1981# ifdef RT_ARCH_AMD64
1982 mov rax, [uDRVal]
1983 mov dr1, rax
1984# else
1985 mov eax, [uDRVal]
1986 mov dr1, eax
1987# endif
1988 }
1989# endif
1990}
1991#endif
1992
1993
1994/**
1995 * Sets dr2.
1996 *
1997 * @param uDRVal Debug register value to write
1998 */
1999#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2000DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
2001#else
2002DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
2003{
2004# if RT_INLINE_ASM_USES_INTRIN
2005 __writedr(2, uDRVal);
2006# elif RT_INLINE_ASM_GNU_STYLE
2007# ifdef RT_ARCH_AMD64
2008 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
2009# else
2010 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
2011# endif
2012# else
2013 __asm
2014 {
2015# ifdef RT_ARCH_AMD64
2016 mov rax, [uDRVal]
2017 mov dr2, rax
2018# else
2019 mov eax, [uDRVal]
2020 mov dr2, eax
2021# endif
2022 }
2023# endif
2024}
2025#endif
2026
2027
2028/**
2029 * Sets dr3.
2030 *
2031 * @param uDRVal Debug register value to write
2032 */
2033#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2034DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
2035#else
2036DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
2037{
2038# if RT_INLINE_ASM_USES_INTRIN
2039 __writedr(3, uDRVal);
2040# elif RT_INLINE_ASM_GNU_STYLE
2041# ifdef RT_ARCH_AMD64
2042 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2043# else
2044 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2045# endif
2046# else
2047 __asm
2048 {
2049# ifdef RT_ARCH_AMD64
2050 mov rax, [uDRVal]
2051 mov dr3, rax
2052# else
2053 mov eax, [uDRVal]
2054 mov dr3, eax
2055# endif
2056 }
2057# endif
2058}
2059#endif
2060
2061
2062/**
2063 * Sets dr6.
2064 *
2065 * @param uDRVal Debug register value to write
2066 */
2067#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2068DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2069#else
2070DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2071{
2072# if RT_INLINE_ASM_USES_INTRIN
2073 __writedr(6, uDRVal);
2074# elif RT_INLINE_ASM_GNU_STYLE
2075# ifdef RT_ARCH_AMD64
2076 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2077# else
2078 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2079# endif
2080# else
2081 __asm
2082 {
2083# ifdef RT_ARCH_AMD64
2084 mov rax, [uDRVal]
2085 mov dr6, rax
2086# else
2087 mov eax, [uDRVal]
2088 mov dr6, eax
2089# endif
2090 }
2091# endif
2092}
2093#endif
2094
2095
2096/**
2097 * Sets dr7.
2098 *
2099 * @param uDRVal Debug register value to write
2100 */
2101#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2102DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2103#else
2104DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2105{
2106# if RT_INLINE_ASM_USES_INTRIN
2107 __writedr(7, uDRVal);
2108# elif RT_INLINE_ASM_GNU_STYLE
2109# ifdef RT_ARCH_AMD64
2110 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2111# else
2112 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2113# endif
2114# else
2115 __asm
2116 {
2117# ifdef RT_ARCH_AMD64
2118 mov rax, [uDRVal]
2119 mov dr7, rax
2120# else
2121 mov eax, [uDRVal]
2122 mov dr7, eax
2123# endif
2124 }
2125# endif
2126}
2127#endif
2128
2129
2130/**
2131 * Compiler memory barrier.
2132 *
2133 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2134 * values or any outstanding writes when returning from this function.
2135 *
2136 * This function must be used if non-volatile data is modified by a
2137 * device or the VMM. Typical cases are port access, MMIO access,
2138 * trapping instruction, etc.
2139 */
2140#if RT_INLINE_ASM_GNU_STYLE
2141# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
2142#elif RT_INLINE_ASM_USES_INTRIN
2143# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2144#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2145DECLINLINE(void) ASMCompilerBarrier(void)
2146{
2147 __asm
2148 {
2149 }
2150}
2151#endif
2152
2153
2154/**
2155 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2156 *
2157 * @param Port I/O port to write to.
2158 * @param u8 8-bit integer to write.
2159 */
2160#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2161DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2162#else
2163DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2164{
2165# if RT_INLINE_ASM_GNU_STYLE
2166 __asm__ __volatile__("outb %b1, %w0\n\t"
2167 :: "Nd" (Port),
2168 "a" (u8));
2169
2170# elif RT_INLINE_ASM_USES_INTRIN
2171 __outbyte(Port, u8);
2172
2173# else
2174 __asm
2175 {
2176 mov dx, [Port]
2177 mov al, [u8]
2178 out dx, al
2179 }
2180# endif
2181}
2182#endif
2183
2184
2185/**
2186 * Reads a 8-bit unsigned integer from an I/O port, ordered.
2187 *
2188 * @returns 8-bit integer.
2189 * @param Port I/O port to read from.
2190 */
2191#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2192DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2193#else
2194DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2195{
2196 uint8_t u8;
2197# if RT_INLINE_ASM_GNU_STYLE
2198 __asm__ __volatile__("inb %w1, %b0\n\t"
2199 : "=a" (u8)
2200 : "Nd" (Port));
2201
2202# elif RT_INLINE_ASM_USES_INTRIN
2203 u8 = __inbyte(Port);
2204
2205# else
2206 __asm
2207 {
2208 mov dx, [Port]
2209 in al, dx
2210 mov [u8], al
2211 }
2212# endif
2213 return u8;
2214}
2215#endif
2216
2217
2218/**
2219 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2220 *
2221 * @param Port I/O port to write to.
2222 * @param u16 16-bit integer to write.
2223 */
2224#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2225DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2226#else
2227DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2228{
2229# if RT_INLINE_ASM_GNU_STYLE
2230 __asm__ __volatile__("outw %w1, %w0\n\t"
2231 :: "Nd" (Port),
2232 "a" (u16));
2233
2234# elif RT_INLINE_ASM_USES_INTRIN
2235 __outword(Port, u16);
2236
2237# else
2238 __asm
2239 {
2240 mov dx, [Port]
2241 mov ax, [u16]
2242 out dx, ax
2243 }
2244# endif
2245}
2246#endif
2247
2248
2249/**
2250 * Reads a 16-bit unsigned integer from an I/O port, ordered.
2251 *
2252 * @returns 16-bit integer.
2253 * @param Port I/O port to read from.
2254 */
2255#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2256DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2257#else
2258DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2259{
2260 uint16_t u16;
2261# if RT_INLINE_ASM_GNU_STYLE
2262 __asm__ __volatile__("inw %w1, %w0\n\t"
2263 : "=a" (u16)
2264 : "Nd" (Port));
2265
2266# elif RT_INLINE_ASM_USES_INTRIN
2267 u16 = __inword(Port);
2268
2269# else
2270 __asm
2271 {
2272 mov dx, [Port]
2273 in ax, dx
2274 mov [u16], ax
2275 }
2276# endif
2277 return u16;
2278}
2279#endif
2280
2281
2282/**
2283 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2284 *
2285 * @param Port I/O port to write to.
2286 * @param u32 32-bit integer to write.
2287 */
2288#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2289DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2290#else
2291DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2292{
2293# if RT_INLINE_ASM_GNU_STYLE
2294 __asm__ __volatile__("outl %1, %w0\n\t"
2295 :: "Nd" (Port),
2296 "a" (u32));
2297
2298# elif RT_INLINE_ASM_USES_INTRIN
2299 __outdword(Port, u32);
2300
2301# else
2302 __asm
2303 {
2304 mov dx, [Port]
2305 mov eax, [u32]
2306 out dx, eax
2307 }
2308# endif
2309}
2310#endif
2311
2312
2313/**
2314 * Reads a 32-bit unsigned integer from an I/O port, ordered.
2315 *
2316 * @returns 32-bit integer.
2317 * @param Port I/O port to read from.
2318 */
2319#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2320DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2321#else
2322DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2323{
2324 uint32_t u32;
2325# if RT_INLINE_ASM_GNU_STYLE
2326 __asm__ __volatile__("inl %w1, %0\n\t"
2327 : "=a" (u32)
2328 : "Nd" (Port));
2329
2330# elif RT_INLINE_ASM_USES_INTRIN
2331 u32 = __indword(Port);
2332
2333# else
2334 __asm
2335 {
2336 mov dx, [Port]
2337 in eax, dx
2338 mov [u32], eax
2339 }
2340# endif
2341 return u32;
2342}
2343#endif
2344
2345
2346/**
2347 * Writes a string of 8-bit unsigned integer items to an I/O port, ordered.
2348 *
2349 * @param Port I/O port to write to.
2350 * @param pau8 Pointer to the string buffer.
2351 * @param c The number of items to write.
2352 */
2353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2354DECLASM(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c);
2355#else
2356DECLINLINE(void) ASMOutStrU8(RTIOPORT Port, uint8_t const *pau8, size_t c)
2357{
2358# if RT_INLINE_ASM_GNU_STYLE
2359 __asm__ __volatile__("rep; outsb\n\t"
2360 : "+S" (pau8),
2361 "+c" (c)
2362 : "d" (Port));
2363
2364# elif RT_INLINE_ASM_USES_INTRIN
2365 __outbytestring(Port, (unsigned char *)pau8, (unsigned long)c);
2366
2367# else
2368 __asm
2369 {
2370 mov dx, [Port]
2371 mov ecx, [c]
2372 mov eax, [pau8]
2373 xchg esi, eax
2374 rep outsb
2375 xchg esi, eax
2376 }
2377# endif
2378}
2379#endif
2380
2381
2382/**
2383 * Reads a string of 8-bit unsigned integer items from an I/O port, ordered.
2384 *
2385 * @param Port I/O port to read from.
2386 * @param pau8 Pointer to the string buffer (output).
2387 * @param c The number of items to read.
2388 */
2389#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2390DECLASM(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c);
2391#else
2392DECLINLINE(void) ASMInStrU8(RTIOPORT Port, uint8_t *pau8, size_t c)
2393{
2394# if RT_INLINE_ASM_GNU_STYLE
2395 __asm__ __volatile__("rep; insb\n\t"
2396 : "+D" (pau8),
2397 "+c" (c)
2398 : "d" (Port));
2399
2400# elif RT_INLINE_ASM_USES_INTRIN
2401 __inbytestring(Port, pau8, (unsigned long)c);
2402
2403# else
2404 __asm
2405 {
2406 mov dx, [Port]
2407 mov ecx, [c]
2408 mov eax, [pau8]
2409 xchg edi, eax
2410 rep insb
2411 xchg edi, eax
2412 }
2413# endif
2414}
2415#endif
2416
2417
2418/**
2419 * Writes a string of 16-bit unsigned integer items to an I/O port, ordered.
2420 *
2421 * @param Port I/O port to write to.
2422 * @param pau16 Pointer to the string buffer.
2423 * @param c The number of items to write.
2424 */
2425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2426DECLASM(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c);
2427#else
2428DECLINLINE(void) ASMOutStrU16(RTIOPORT Port, uint16_t const *pau16, size_t c)
2429{
2430# if RT_INLINE_ASM_GNU_STYLE
2431 __asm__ __volatile__("rep; outsw\n\t"
2432 : "+S" (pau16),
2433 "+c" (c)
2434 : "d" (Port));
2435
2436# elif RT_INLINE_ASM_USES_INTRIN
2437 __outwordstring(Port, (unsigned short *)pau16, (unsigned long)c);
2438
2439# else
2440 __asm
2441 {
2442 mov dx, [Port]
2443 mov ecx, [c]
2444 mov eax, [pau16]
2445 xchg esi, eax
2446 rep outsw
2447 xchg esi, eax
2448 }
2449# endif
2450}
2451#endif
2452
2453
2454/**
2455 * Reads a string of 16-bit unsigned integer items from an I/O port, ordered.
2456 *
2457 * @param Port I/O port to read from.
2458 * @param pau16 Pointer to the string buffer (output).
2459 * @param c The number of items to read.
2460 */
2461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2462DECLASM(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c);
2463#else
2464DECLINLINE(void) ASMInStrU16(RTIOPORT Port, uint16_t *pau16, size_t c)
2465{
2466# if RT_INLINE_ASM_GNU_STYLE
2467 __asm__ __volatile__("rep; insw\n\t"
2468 : "+D" (pau16),
2469 "+c" (c)
2470 : "d" (Port));
2471
2472# elif RT_INLINE_ASM_USES_INTRIN
2473 __inwordstring(Port, pau16, (unsigned long)c);
2474
2475# else
2476 __asm
2477 {
2478 mov dx, [Port]
2479 mov ecx, [c]
2480 mov eax, [pau16]
2481 xchg edi, eax
2482 rep insw
2483 xchg edi, eax
2484 }
2485# endif
2486}
2487#endif
2488
2489
2490/**
2491 * Writes a string of 32-bit unsigned integer items to an I/O port, ordered.
2492 *
2493 * @param Port I/O port to write to.
2494 * @param pau32 Pointer to the string buffer.
2495 * @param c The number of items to write.
2496 */
2497#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2498DECLASM(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c);
2499#else
2500DECLINLINE(void) ASMOutStrU32(RTIOPORT Port, uint32_t const *pau32, size_t c)
2501{
2502# if RT_INLINE_ASM_GNU_STYLE
2503 __asm__ __volatile__("rep; outsl\n\t"
2504 : "+S" (pau32),
2505 "+c" (c)
2506 : "d" (Port));
2507
2508# elif RT_INLINE_ASM_USES_INTRIN
2509 __outdwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2510
2511# else
2512 __asm
2513 {
2514 mov dx, [Port]
2515 mov ecx, [c]
2516 mov eax, [pau32]
2517 xchg esi, eax
2518 rep outsd
2519 xchg esi, eax
2520 }
2521# endif
2522}
2523#endif
2524
2525
2526/**
2527 * Reads a string of 32-bit unsigned integer items from an I/O port, ordered.
2528 *
2529 * @param Port I/O port to read from.
2530 * @param pau32 Pointer to the string buffer (output).
2531 * @param c The number of items to read.
2532 */
2533#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2534DECLASM(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c);
2535#else
2536DECLINLINE(void) ASMInStrU32(RTIOPORT Port, uint32_t *pau32, size_t c)
2537{
2538# if RT_INLINE_ASM_GNU_STYLE
2539 __asm__ __volatile__("rep; insl\n\t"
2540 : "+D" (pau32),
2541 "+c" (c)
2542 : "d" (Port));
2543
2544# elif RT_INLINE_ASM_USES_INTRIN
2545 __indwordstring(Port, (unsigned long *)pau32, (unsigned long)c);
2546
2547# else
2548 __asm
2549 {
2550 mov dx, [Port]
2551 mov ecx, [c]
2552 mov eax, [pau32]
2553 xchg edi, eax
2554 rep insd
2555 xchg edi, eax
2556 }
2557# endif
2558}
2559#endif
2560
2561
2562/**
2563 * Atomically Exchange an unsigned 8-bit value, ordered.
2564 *
2565 * @returns Current *pu8 value
2566 * @param pu8 Pointer to the 8-bit variable to update.
2567 * @param u8 The 8-bit value to assign to *pu8.
2568 */
2569#if RT_INLINE_ASM_EXTERNAL
2570DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2571#else
2572DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2573{
2574# if RT_INLINE_ASM_GNU_STYLE
2575 __asm__ __volatile__("xchgb %0, %1\n\t"
2576 : "=m" (*pu8),
2577 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2578 : "1" (u8),
2579 "m" (*pu8));
2580# else
2581 __asm
2582 {
2583# ifdef RT_ARCH_AMD64
2584 mov rdx, [pu8]
2585 mov al, [u8]
2586 xchg [rdx], al
2587 mov [u8], al
2588# else
2589 mov edx, [pu8]
2590 mov al, [u8]
2591 xchg [edx], al
2592 mov [u8], al
2593# endif
2594 }
2595# endif
2596 return u8;
2597}
2598#endif
2599
2600
2601/**
2602 * Atomically Exchange a signed 8-bit value, ordered.
2603 *
2604 * @returns Current *pu8 value
2605 * @param pi8 Pointer to the 8-bit variable to update.
2606 * @param i8 The 8-bit value to assign to *pi8.
2607 */
2608DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2609{
2610 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2611}
2612
2613
2614/**
2615 * Atomically Exchange a bool value, ordered.
2616 *
2617 * @returns Current *pf value
2618 * @param pf Pointer to the 8-bit variable to update.
2619 * @param f The 8-bit value to assign to *pi8.
2620 */
2621DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2622{
2623#ifdef _MSC_VER
2624 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2625#else
2626 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2627#endif
2628}
2629
2630
2631/**
2632 * Atomically Exchange an unsigned 16-bit value, ordered.
2633 *
2634 * @returns Current *pu16 value
2635 * @param pu16 Pointer to the 16-bit variable to update.
2636 * @param u16 The 16-bit value to assign to *pu16.
2637 */
2638#if RT_INLINE_ASM_EXTERNAL
2639DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2640#else
2641DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2642{
2643# if RT_INLINE_ASM_GNU_STYLE
2644 __asm__ __volatile__("xchgw %0, %1\n\t"
2645 : "=m" (*pu16),
2646 "=r" (u16)
2647 : "1" (u16),
2648 "m" (*pu16));
2649# else
2650 __asm
2651 {
2652# ifdef RT_ARCH_AMD64
2653 mov rdx, [pu16]
2654 mov ax, [u16]
2655 xchg [rdx], ax
2656 mov [u16], ax
2657# else
2658 mov edx, [pu16]
2659 mov ax, [u16]
2660 xchg [edx], ax
2661 mov [u16], ax
2662# endif
2663 }
2664# endif
2665 return u16;
2666}
2667#endif
2668
2669
2670/**
2671 * Atomically Exchange a signed 16-bit value, ordered.
2672 *
2673 * @returns Current *pu16 value
2674 * @param pi16 Pointer to the 16-bit variable to update.
2675 * @param i16 The 16-bit value to assign to *pi16.
2676 */
2677DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2678{
2679 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2680}
2681
2682
2683/**
2684 * Atomically Exchange an unsigned 32-bit value, ordered.
2685 *
2686 * @returns Current *pu32 value
2687 * @param pu32 Pointer to the 32-bit variable to update.
2688 * @param u32 The 32-bit value to assign to *pu32.
2689 */
2690#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2691DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2692#else
2693DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2694{
2695# if RT_INLINE_ASM_GNU_STYLE
2696 __asm__ __volatile__("xchgl %0, %1\n\t"
2697 : "=m" (*pu32),
2698 "=r" (u32)
2699 : "1" (u32),
2700 "m" (*pu32));
2701
2702# elif RT_INLINE_ASM_USES_INTRIN
2703 u32 = _InterlockedExchange((long *)pu32, u32);
2704
2705# else
2706 __asm
2707 {
2708# ifdef RT_ARCH_AMD64
2709 mov rdx, [pu32]
2710 mov eax, u32
2711 xchg [rdx], eax
2712 mov [u32], eax
2713# else
2714 mov edx, [pu32]
2715 mov eax, u32
2716 xchg [edx], eax
2717 mov [u32], eax
2718# endif
2719 }
2720# endif
2721 return u32;
2722}
2723#endif
2724
2725
2726/**
2727 * Atomically Exchange a signed 32-bit value, ordered.
2728 *
2729 * @returns Current *pu32 value
2730 * @param pi32 Pointer to the 32-bit variable to update.
2731 * @param i32 The 32-bit value to assign to *pi32.
2732 */
2733DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2734{
2735 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2736}
2737
2738
2739/**
2740 * Atomically Exchange an unsigned 64-bit value, ordered.
2741 *
2742 * @returns Current *pu64 value
2743 * @param pu64 Pointer to the 64-bit variable to update.
2744 * @param u64 The 64-bit value to assign to *pu64.
2745 */
2746#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2747DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2748#else
2749DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2750{
2751# if defined(RT_ARCH_AMD64)
2752# if RT_INLINE_ASM_USES_INTRIN
2753 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2754
2755# elif RT_INLINE_ASM_GNU_STYLE
2756 __asm__ __volatile__("xchgq %0, %1\n\t"
2757 : "=m" (*pu64),
2758 "=r" (u64)
2759 : "1" (u64),
2760 "m" (*pu64));
2761# else
2762 __asm
2763 {
2764 mov rdx, [pu64]
2765 mov rax, [u64]
2766 xchg [rdx], rax
2767 mov [u64], rax
2768 }
2769# endif
2770# else /* !RT_ARCH_AMD64 */
2771# if RT_INLINE_ASM_GNU_STYLE
2772# if defined(PIC) || defined(__PIC__)
2773 uint32_t u32EBX = (uint32_t)u64;
2774 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2775 "xchgl %%ebx, %3\n\t"
2776 "1:\n\t"
2777 "lock; cmpxchg8b (%5)\n\t"
2778 "jnz 1b\n\t"
2779 "movl %3, %%ebx\n\t"
2780 /*"xchgl %%esi, %5\n\t"*/
2781 : "=A" (u64),
2782 "=m" (*pu64)
2783 : "0" (*pu64),
2784 "m" ( u32EBX ),
2785 "c" ( (uint32_t)(u64 >> 32) ),
2786 "S" (pu64));
2787# else /* !PIC */
2788 __asm__ __volatile__("1:\n\t"
2789 "lock; cmpxchg8b %1\n\t"
2790 "jnz 1b\n\t"
2791 : "=A" (u64),
2792 "=m" (*pu64)
2793 : "0" (*pu64),
2794 "b" ( (uint32_t)u64 ),
2795 "c" ( (uint32_t)(u64 >> 32) ));
2796# endif
2797# else
2798 __asm
2799 {
2800 mov ebx, dword ptr [u64]
2801 mov ecx, dword ptr [u64 + 4]
2802 mov edi, pu64
2803 mov eax, dword ptr [edi]
2804 mov edx, dword ptr [edi + 4]
2805 retry:
2806 lock cmpxchg8b [edi]
2807 jnz retry
2808 mov dword ptr [u64], eax
2809 mov dword ptr [u64 + 4], edx
2810 }
2811# endif
2812# endif /* !RT_ARCH_AMD64 */
2813 return u64;
2814}
2815#endif
2816
2817
2818/**
2819 * Atomically Exchange an signed 64-bit value, ordered.
2820 *
2821 * @returns Current *pi64 value
2822 * @param pi64 Pointer to the 64-bit variable to update.
2823 * @param i64 The 64-bit value to assign to *pi64.
2824 */
2825DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2826{
2827 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2828}
2829
2830
2831#ifdef RT_ARCH_AMD64
2832/**
2833 * Atomically Exchange an unsigned 128-bit value, ordered.
2834 *
2835 * @returns Current *pu128.
2836 * @param pu128 Pointer to the 128-bit variable to update.
2837 * @param u128 The 128-bit value to assign to *pu128.
2838 *
2839 * @remark We cannot really assume that any hardware supports this. Nor do I have
2840 * GAS support for it. So, for the time being we'll BREAK the atomic
2841 * bit of this function and use two 64-bit exchanges instead.
2842 */
2843# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2844DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2845# else
2846DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2847{
2848 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2849 {
2850 /** @todo this is clumsy code */
2851 RTUINT128U u128Ret;
2852 u128Ret.u = u128;
2853 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2854 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2855 return u128Ret.u;
2856 }
2857#if 0 /* later? */
2858 else
2859 {
2860# if RT_INLINE_ASM_GNU_STYLE
2861 __asm__ __volatile__("1:\n\t"
2862 "lock; cmpxchg8b %1\n\t"
2863 "jnz 1b\n\t"
2864 : "=A" (u128),
2865 "=m" (*pu128)
2866 : "0" (*pu128),
2867 "b" ( (uint64_t)u128 ),
2868 "c" ( (uint64_t)(u128 >> 64) ));
2869# else
2870 __asm
2871 {
2872 mov rbx, dword ptr [u128]
2873 mov rcx, dword ptr [u128 + 8]
2874 mov rdi, pu128
2875 mov rax, dword ptr [rdi]
2876 mov rdx, dword ptr [rdi + 8]
2877 retry:
2878 lock cmpxchg16b [rdi]
2879 jnz retry
2880 mov dword ptr [u128], rax
2881 mov dword ptr [u128 + 8], rdx
2882 }
2883# endif
2884 }
2885 return u128;
2886#endif
2887}
2888# endif
2889#endif /* RT_ARCH_AMD64 */
2890
2891
2892/**
2893 * Atomically Exchange a pointer value, ordered.
2894 *
2895 * @returns Current *ppv value
2896 * @param ppv Pointer to the pointer variable to update.
2897 * @param pv The pointer value to assign to *ppv.
2898 */
2899DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2900{
2901#if ARCH_BITS == 32
2902 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2903#elif ARCH_BITS == 64
2904 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2905#else
2906# error "ARCH_BITS is bogus"
2907#endif
2908}
2909
2910
2911/**
2912 * Atomically Exchange a raw-mode context pointer value, ordered.
2913 *
2914 * @returns Current *ppv value
2915 * @param ppvRC Pointer to the pointer variable to update.
2916 * @param pvRC The pointer value to assign to *ppv.
2917 */
2918DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2919{
2920 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2921}
2922
2923
2924/**
2925 * Atomically Exchange a ring-0 pointer value, ordered.
2926 *
2927 * @returns Current *ppv value
2928 * @param ppvR0 Pointer to the pointer variable to update.
2929 * @param pvR0 The pointer value to assign to *ppv.
2930 */
2931DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2932{
2933#if R0_ARCH_BITS == 32
2934 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2935#elif R0_ARCH_BITS == 64
2936 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2937#else
2938# error "R0_ARCH_BITS is bogus"
2939#endif
2940}
2941
2942
2943/**
2944 * Atomically Exchange a ring-3 pointer value, ordered.
2945 *
2946 * @returns Current *ppv value
2947 * @param ppvR3 Pointer to the pointer variable to update.
2948 * @param pvR3 The pointer value to assign to *ppv.
2949 */
2950DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2951{
2952#if R3_ARCH_BITS == 32
2953 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2954#elif R3_ARCH_BITS == 64
2955 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2956#else
2957# error "R3_ARCH_BITS is bogus"
2958#endif
2959}
2960
2961
2962/** @def ASMAtomicXchgHandle
2963 * Atomically Exchange a typical IPRT handle value, ordered.
2964 *
2965 * @param ph Pointer to the value to update.
2966 * @param hNew The new value to assigned to *pu.
2967 * @param phRes Where to store the current *ph value.
2968 *
2969 * @remarks This doesn't currently work for all handles (like RTFILE).
2970 */
2971#if HC_ARCH_BITS == 32
2972# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2973 do { \
2974 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2975 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2976 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2977 } while (0)
2978#elif HC_ARCH_BITS == 64
2979# define ASMAtomicXchgHandle(ph, hNew, phRes) \
2980 do { \
2981 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2982 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2983 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2984 } while (0)
2985#else
2986# error HC_ARCH_BITS
2987#endif
2988
2989
2990/**
2991 * Atomically Exchange a value which size might differ
2992 * between platforms or compilers, ordered.
2993 *
2994 * @param pu Pointer to the variable to update.
2995 * @param uNew The value to assign to *pu.
2996 * @todo This is busted as its missing the result argument.
2997 */
2998#define ASMAtomicXchgSize(pu, uNew) \
2999 do { \
3000 switch (sizeof(*(pu))) { \
3001 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3002 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3003 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3004 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3005 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3006 } \
3007 } while (0)
3008
3009/**
3010 * Atomically Exchange a value which size might differ
3011 * between platforms or compilers, ordered.
3012 *
3013 * @param pu Pointer to the variable to update.
3014 * @param uNew The value to assign to *pu.
3015 * @param puRes Where to store the current *pu value.
3016 */
3017#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
3018 do { \
3019 switch (sizeof(*(pu))) { \
3020 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
3021 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3022 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3023 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3024 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3025 } \
3026 } while (0)
3027
3028
3029/**
3030 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
3031 *
3032 * @returns true if xchg was done.
3033 * @returns false if xchg wasn't done.
3034 *
3035 * @param pu32 Pointer to the value to update.
3036 * @param u32New The new value to assigned to *pu32.
3037 * @param u32Old The old value to *pu32 compare with.
3038 */
3039#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3040DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
3041#else
3042DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
3043{
3044# if RT_INLINE_ASM_GNU_STYLE
3045 uint8_t u8Ret;
3046 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3047 "setz %1\n\t"
3048 : "=m" (*pu32),
3049 "=qm" (u8Ret),
3050 "=a" (u32Old)
3051 : "r" (u32New),
3052 "2" (u32Old),
3053 "m" (*pu32));
3054 return (bool)u8Ret;
3055
3056# elif RT_INLINE_ASM_USES_INTRIN
3057 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
3058
3059# else
3060 uint32_t u32Ret;
3061 __asm
3062 {
3063# ifdef RT_ARCH_AMD64
3064 mov rdx, [pu32]
3065# else
3066 mov edx, [pu32]
3067# endif
3068 mov eax, [u32Old]
3069 mov ecx, [u32New]
3070# ifdef RT_ARCH_AMD64
3071 lock cmpxchg [rdx], ecx
3072# else
3073 lock cmpxchg [edx], ecx
3074# endif
3075 setz al
3076 movzx eax, al
3077 mov [u32Ret], eax
3078 }
3079 return !!u32Ret;
3080# endif
3081}
3082#endif
3083
3084
3085/**
3086 * Atomically Compare and Exchange a signed 32-bit value, ordered.
3087 *
3088 * @returns true if xchg was done.
3089 * @returns false if xchg wasn't done.
3090 *
3091 * @param pi32 Pointer to the value to update.
3092 * @param i32New The new value to assigned to *pi32.
3093 * @param i32Old The old value to *pi32 compare with.
3094 */
3095DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
3096{
3097 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
3098}
3099
3100
3101/**
3102 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
3103 *
3104 * @returns true if xchg was done.
3105 * @returns false if xchg wasn't done.
3106 *
3107 * @param pu64 Pointer to the 64-bit variable to update.
3108 * @param u64New The 64-bit value to assign to *pu64.
3109 * @param u64Old The value to compare with.
3110 */
3111#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
3112 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
3113DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
3114#else
3115DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
3116{
3117# if RT_INLINE_ASM_USES_INTRIN
3118 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
3119
3120# elif defined(RT_ARCH_AMD64)
3121# if RT_INLINE_ASM_GNU_STYLE
3122 uint8_t u8Ret;
3123 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3124 "setz %1\n\t"
3125 : "=m" (*pu64),
3126 "=qm" (u8Ret),
3127 "=a" (u64Old)
3128 : "r" (u64New),
3129 "2" (u64Old),
3130 "m" (*pu64));
3131 return (bool)u8Ret;
3132# else
3133 bool fRet;
3134 __asm
3135 {
3136 mov rdx, [pu32]
3137 mov rax, [u64Old]
3138 mov rcx, [u64New]
3139 lock cmpxchg [rdx], rcx
3140 setz al
3141 mov [fRet], al
3142 }
3143 return fRet;
3144# endif
3145# else /* !RT_ARCH_AMD64 */
3146 uint32_t u32Ret;
3147# if RT_INLINE_ASM_GNU_STYLE
3148# if defined(PIC) || defined(__PIC__)
3149 uint32_t u32EBX = (uint32_t)u64New;
3150 uint32_t u32Spill;
3151 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
3152 "lock; cmpxchg8b (%6)\n\t"
3153 "setz %%al\n\t"
3154 "movl %4, %%ebx\n\t"
3155 "movzbl %%al, %%eax\n\t"
3156 : "=a" (u32Ret),
3157 "=d" (u32Spill),
3158# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3159 "+m" (*pu64)
3160# else
3161 "=m" (*pu64)
3162# endif
3163 : "A" (u64Old),
3164 "m" ( u32EBX ),
3165 "c" ( (uint32_t)(u64New >> 32) ),
3166 "S" (pu64));
3167# else /* !PIC */
3168 uint32_t u32Spill;
3169 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
3170 "setz %%al\n\t"
3171 "movzbl %%al, %%eax\n\t"
3172 : "=a" (u32Ret),
3173 "=d" (u32Spill),
3174 "+m" (*pu64)
3175 : "A" (u64Old),
3176 "b" ( (uint32_t)u64New ),
3177 "c" ( (uint32_t)(u64New >> 32) ));
3178# endif
3179 return (bool)u32Ret;
3180# else
3181 __asm
3182 {
3183 mov ebx, dword ptr [u64New]
3184 mov ecx, dword ptr [u64New + 4]
3185 mov edi, [pu64]
3186 mov eax, dword ptr [u64Old]
3187 mov edx, dword ptr [u64Old + 4]
3188 lock cmpxchg8b [edi]
3189 setz al
3190 movzx eax, al
3191 mov dword ptr [u32Ret], eax
3192 }
3193 return !!u32Ret;
3194# endif
3195# endif /* !RT_ARCH_AMD64 */
3196}
3197#endif
3198
3199
3200/**
3201 * Atomically Compare and exchange a signed 64-bit value, ordered.
3202 *
3203 * @returns true if xchg was done.
3204 * @returns false if xchg wasn't done.
3205 *
3206 * @param pi64 Pointer to the 64-bit variable to update.
3207 * @param i64 The 64-bit value to assign to *pu64.
3208 * @param i64Old The value to compare with.
3209 */
3210DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
3211{
3212 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
3213}
3214
3215
3216/**
3217 * Atomically Compare and Exchange a pointer value, ordered.
3218 *
3219 * @returns true if xchg was done.
3220 * @returns false if xchg wasn't done.
3221 *
3222 * @param ppv Pointer to the value to update.
3223 * @param pvNew The new value to assigned to *ppv.
3224 * @param pvOld The old value to *ppv compare with.
3225 */
3226DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
3227{
3228#if ARCH_BITS == 32
3229 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
3230#elif ARCH_BITS == 64
3231 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
3232#else
3233# error "ARCH_BITS is bogus"
3234#endif
3235}
3236
3237
3238/** @def ASMAtomicCmpXchgHandle
3239 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3240 *
3241 * @param ph Pointer to the value to update.
3242 * @param hNew The new value to assigned to *pu.
3243 * @param hOld The old value to *pu compare with.
3244 * @param fRc Where to store the result.
3245 *
3246 * @remarks This doesn't currently work for all handles (like RTFILE).
3247 */
3248#if HC_ARCH_BITS == 32
3249# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3250 do { \
3251 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
3252 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3253 } while (0)
3254#elif HC_ARCH_BITS == 64
3255# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
3256 do { \
3257 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
3258 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3259 } while (0)
3260#else
3261# error HC_ARCH_BITS
3262#endif
3263
3264
3265/** @def ASMAtomicCmpXchgSize
3266 * Atomically Compare and Exchange a value which size might differ
3267 * between platforms or compilers, ordered.
3268 *
3269 * @param pu Pointer to the value to update.
3270 * @param uNew The new value to assigned to *pu.
3271 * @param uOld The old value to *pu compare with.
3272 * @param fRc Where to store the result.
3273 */
3274#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
3275 do { \
3276 switch (sizeof(*(pu))) { \
3277 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3278 break; \
3279 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3280 break; \
3281 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3282 (fRc) = false; \
3283 break; \
3284 } \
3285 } while (0)
3286
3287
3288/**
3289 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3290 * passes back old value, ordered.
3291 *
3292 * @returns true if xchg was done.
3293 * @returns false if xchg wasn't done.
3294 *
3295 * @param pu32 Pointer to the value to update.
3296 * @param u32New The new value to assigned to *pu32.
3297 * @param u32Old The old value to *pu32 compare with.
3298 * @param pu32Old Pointer store the old value at.
3299 */
3300#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3301DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3302#else
3303DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3304{
3305# if RT_INLINE_ASM_GNU_STYLE
3306 uint8_t u8Ret;
3307 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3308 "setz %1\n\t"
3309 : "=m" (*pu32),
3310 "=qm" (u8Ret),
3311 "=a" (*pu32Old)
3312 : "r" (u32New),
3313 "a" (u32Old),
3314 "m" (*pu32));
3315 return (bool)u8Ret;
3316
3317# elif RT_INLINE_ASM_USES_INTRIN
3318 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3319
3320# else
3321 uint32_t u32Ret;
3322 __asm
3323 {
3324# ifdef RT_ARCH_AMD64
3325 mov rdx, [pu32]
3326# else
3327 mov edx, [pu32]
3328# endif
3329 mov eax, [u32Old]
3330 mov ecx, [u32New]
3331# ifdef RT_ARCH_AMD64
3332 lock cmpxchg [rdx], ecx
3333 mov rdx, [pu32Old]
3334 mov [rdx], eax
3335# else
3336 lock cmpxchg [edx], ecx
3337 mov edx, [pu32Old]
3338 mov [edx], eax
3339# endif
3340 setz al
3341 movzx eax, al
3342 mov [u32Ret], eax
3343 }
3344 return !!u32Ret;
3345# endif
3346}
3347#endif
3348
3349
3350/**
3351 * Atomically Compare and Exchange a signed 32-bit value, additionally
3352 * passes back old value, ordered.
3353 *
3354 * @returns true if xchg was done.
3355 * @returns false if xchg wasn't done.
3356 *
3357 * @param pi32 Pointer to the value to update.
3358 * @param i32New The new value to assigned to *pi32.
3359 * @param i32Old The old value to *pi32 compare with.
3360 * @param pi32Old Pointer store the old value at.
3361 */
3362DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3363{
3364 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3365}
3366
3367
3368/**
3369 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3370 * passing back old value, ordered.
3371 *
3372 * @returns true if xchg was done.
3373 * @returns false if xchg wasn't done.
3374 *
3375 * @param pu64 Pointer to the 64-bit variable to update.
3376 * @param u64New The 64-bit value to assign to *pu64.
3377 * @param u64Old The value to compare with.
3378 * @param pu64Old Pointer store the old value at.
3379 */
3380#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3381DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3382#else
3383DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3384{
3385# if RT_INLINE_ASM_USES_INTRIN
3386 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3387
3388# elif defined(RT_ARCH_AMD64)
3389# if RT_INLINE_ASM_GNU_STYLE
3390 uint8_t u8Ret;
3391 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3392 "setz %1\n\t"
3393 : "=m" (*pu64),
3394 "=qm" (u8Ret),
3395 "=a" (*pu64Old)
3396 : "r" (u64New),
3397 "a" (u64Old),
3398 "m" (*pu64));
3399 return (bool)u8Ret;
3400# else
3401 bool fRet;
3402 __asm
3403 {
3404 mov rdx, [pu32]
3405 mov rax, [u64Old]
3406 mov rcx, [u64New]
3407 lock cmpxchg [rdx], rcx
3408 mov rdx, [pu64Old]
3409 mov [rdx], rax
3410 setz al
3411 mov [fRet], al
3412 }
3413 return fRet;
3414# endif
3415# else /* !RT_ARCH_AMD64 */
3416# if RT_INLINE_ASM_GNU_STYLE
3417 uint64_t u64Ret;
3418# if defined(PIC) || defined(__PIC__)
3419 /* NB: this code uses a memory clobber description, because the clean
3420 * solution with an output value for *pu64 makes gcc run out of registers.
3421 * This will cause suboptimal code, and anyone with a better solution is
3422 * welcome to improve this. */
3423 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3424 "lock; cmpxchg8b %3\n\t"
3425 "xchgl %%ebx, %1\n\t"
3426 : "=A" (u64Ret)
3427 : "DS" ((uint32_t)u64New),
3428 "c" ((uint32_t)(u64New >> 32)),
3429 "m" (*pu64),
3430 "0" (u64Old)
3431 : "memory" );
3432# else /* !PIC */
3433 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3434 : "=A" (u64Ret),
3435 "=m" (*pu64)
3436 : "b" ((uint32_t)u64New),
3437 "c" ((uint32_t)(u64New >> 32)),
3438 "m" (*pu64),
3439 "0" (u64Old));
3440# endif
3441 *pu64Old = u64Ret;
3442 return u64Ret == u64Old;
3443# else
3444 uint32_t u32Ret;
3445 __asm
3446 {
3447 mov ebx, dword ptr [u64New]
3448 mov ecx, dword ptr [u64New + 4]
3449 mov edi, [pu64]
3450 mov eax, dword ptr [u64Old]
3451 mov edx, dword ptr [u64Old + 4]
3452 lock cmpxchg8b [edi]
3453 mov ebx, [pu64Old]
3454 mov [ebx], eax
3455 setz al
3456 movzx eax, al
3457 add ebx, 4
3458 mov [ebx], edx
3459 mov dword ptr [u32Ret], eax
3460 }
3461 return !!u32Ret;
3462# endif
3463# endif /* !RT_ARCH_AMD64 */
3464}
3465#endif
3466
3467
3468/**
3469 * Atomically Compare and exchange a signed 64-bit value, additionally
3470 * passing back old value, ordered.
3471 *
3472 * @returns true if xchg was done.
3473 * @returns false if xchg wasn't done.
3474 *
3475 * @param pi64 Pointer to the 64-bit variable to update.
3476 * @param i64 The 64-bit value to assign to *pu64.
3477 * @param i64Old The value to compare with.
3478 * @param pi64Old Pointer store the old value at.
3479 */
3480DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3481{
3482 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3483}
3484
3485/** @def ASMAtomicCmpXchgExHandle
3486 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3487 *
3488 * @param ph Pointer to the value to update.
3489 * @param hNew The new value to assigned to *pu.
3490 * @param hOld The old value to *pu compare with.
3491 * @param fRc Where to store the result.
3492 * @param phOldVal Pointer to where to store the old value.
3493 *
3494 * @remarks This doesn't currently work for all handles (like RTFILE).
3495 */
3496#if HC_ARCH_BITS == 32
3497# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3498 do { \
3499 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3500 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
3501 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
3502 } while (0)
3503#elif HC_ARCH_BITS == 64
3504# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3505 do { \
3506 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3507 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3508 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
3509 } while (0)
3510#else
3511# error HC_ARCH_BITS
3512#endif
3513
3514
3515/** @def ASMAtomicCmpXchgExSize
3516 * Atomically Compare and Exchange a value which size might differ
3517 * between platforms or compilers. Additionally passes back old value.
3518 *
3519 * @param pu Pointer to the value to update.
3520 * @param uNew The new value to assigned to *pu.
3521 * @param uOld The old value to *pu compare with.
3522 * @param fRc Where to store the result.
3523 * @param puOldVal Pointer to where to store the old value.
3524 */
3525#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3526 do { \
3527 switch (sizeof(*(pu))) { \
3528 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3529 break; \
3530 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3531 break; \
3532 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3533 (fRc) = false; \
3534 (uOldVal) = 0; \
3535 break; \
3536 } \
3537 } while (0)
3538
3539
3540/**
3541 * Atomically Compare and Exchange a pointer value, additionally
3542 * passing back old value, ordered.
3543 *
3544 * @returns true if xchg was done.
3545 * @returns false if xchg wasn't done.
3546 *
3547 * @param ppv Pointer to the value to update.
3548 * @param pvNew The new value to assigned to *ppv.
3549 * @param pvOld The old value to *ppv compare with.
3550 * @param ppvOld Pointer store the old value at.
3551 */
3552DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3553{
3554#if ARCH_BITS == 32
3555 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3556#elif ARCH_BITS == 64
3557 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3558#else
3559# error "ARCH_BITS is bogus"
3560#endif
3561}
3562
3563
3564/**
3565 * Atomically exchanges and adds to a 32-bit value, ordered.
3566 *
3567 * @returns The old value.
3568 * @param pu32 Pointer to the value.
3569 * @param u32 Number to add.
3570 */
3571#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3572DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3573#else
3574DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3575{
3576# if RT_INLINE_ASM_USES_INTRIN
3577 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3578 return u32;
3579
3580# elif RT_INLINE_ASM_GNU_STYLE
3581 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3582 : "=r" (u32),
3583 "=m" (*pu32)
3584 : "0" (u32),
3585 "m" (*pu32)
3586 : "memory");
3587 return u32;
3588# else
3589 __asm
3590 {
3591 mov eax, [u32]
3592# ifdef RT_ARCH_AMD64
3593 mov rdx, [pu32]
3594 lock xadd [rdx], eax
3595# else
3596 mov edx, [pu32]
3597 lock xadd [edx], eax
3598# endif
3599 mov [u32], eax
3600 }
3601 return u32;
3602# endif
3603}
3604#endif
3605
3606
3607/**
3608 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3609 *
3610 * @returns The old value.
3611 * @param pi32 Pointer to the value.
3612 * @param i32 Number to add.
3613 */
3614DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3615{
3616 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3617}
3618
3619
3620/**
3621 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3622 *
3623 * @returns The old value.
3624 * @param pu32 Pointer to the value.
3625 * @param u32 Number to subtract.
3626 */
3627DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3628{
3629 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3630}
3631
3632
3633/**
3634 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3635 *
3636 * @returns The old value.
3637 * @param pi32 Pointer to the value.
3638 * @param i32 Number to subtract.
3639 */
3640DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3641{
3642 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3643}
3644
3645
3646/**
3647 * Atomically increment a 32-bit value, ordered.
3648 *
3649 * @returns The new value.
3650 * @param pu32 Pointer to the value to increment.
3651 */
3652#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3653DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3654#else
3655DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3656{
3657 uint32_t u32;
3658# if RT_INLINE_ASM_USES_INTRIN
3659 u32 = _InterlockedIncrement((long *)pu32);
3660 return u32;
3661
3662# elif RT_INLINE_ASM_GNU_STYLE
3663 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3664 : "=r" (u32),
3665 "=m" (*pu32)
3666 : "0" (1),
3667 "m" (*pu32)
3668 : "memory");
3669 return u32+1;
3670# else
3671 __asm
3672 {
3673 mov eax, 1
3674# ifdef RT_ARCH_AMD64
3675 mov rdx, [pu32]
3676 lock xadd [rdx], eax
3677# else
3678 mov edx, [pu32]
3679 lock xadd [edx], eax
3680# endif
3681 mov u32, eax
3682 }
3683 return u32+1;
3684# endif
3685}
3686#endif
3687
3688
3689/**
3690 * Atomically increment a signed 32-bit value, ordered.
3691 *
3692 * @returns The new value.
3693 * @param pi32 Pointer to the value to increment.
3694 */
3695DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3696{
3697 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3698}
3699
3700
3701/**
3702 * Atomically decrement an unsigned 32-bit value, ordered.
3703 *
3704 * @returns The new value.
3705 * @param pu32 Pointer to the value to decrement.
3706 */
3707#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3708DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3709#else
3710DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3711{
3712 uint32_t u32;
3713# if RT_INLINE_ASM_USES_INTRIN
3714 u32 = _InterlockedDecrement((long *)pu32);
3715 return u32;
3716
3717# elif RT_INLINE_ASM_GNU_STYLE
3718 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3719 : "=r" (u32),
3720 "=m" (*pu32)
3721 : "0" (-1),
3722 "m" (*pu32)
3723 : "memory");
3724 return u32-1;
3725# else
3726 __asm
3727 {
3728 mov eax, -1
3729# ifdef RT_ARCH_AMD64
3730 mov rdx, [pu32]
3731 lock xadd [rdx], eax
3732# else
3733 mov edx, [pu32]
3734 lock xadd [edx], eax
3735# endif
3736 mov u32, eax
3737 }
3738 return u32-1;
3739# endif
3740}
3741#endif
3742
3743
3744/**
3745 * Atomically decrement a signed 32-bit value, ordered.
3746 *
3747 * @returns The new value.
3748 * @param pi32 Pointer to the value to decrement.
3749 */
3750DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3751{
3752 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3753}
3754
3755
3756/**
3757 * Atomically Or an unsigned 32-bit value, ordered.
3758 *
3759 * @param pu32 Pointer to the pointer variable to OR u32 with.
3760 * @param u32 The value to OR *pu32 with.
3761 */
3762#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3763DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3764#else
3765DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3766{
3767# if RT_INLINE_ASM_USES_INTRIN
3768 _InterlockedOr((long volatile *)pu32, (long)u32);
3769
3770# elif RT_INLINE_ASM_GNU_STYLE
3771 __asm__ __volatile__("lock; orl %1, %0\n\t"
3772 : "=m" (*pu32)
3773 : "ir" (u32),
3774 "m" (*pu32));
3775# else
3776 __asm
3777 {
3778 mov eax, [u32]
3779# ifdef RT_ARCH_AMD64
3780 mov rdx, [pu32]
3781 lock or [rdx], eax
3782# else
3783 mov edx, [pu32]
3784 lock or [edx], eax
3785# endif
3786 }
3787# endif
3788}
3789#endif
3790
3791
3792/**
3793 * Atomically Or a signed 32-bit value, ordered.
3794 *
3795 * @param pi32 Pointer to the pointer variable to OR u32 with.
3796 * @param i32 The value to OR *pu32 with.
3797 */
3798DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3799{
3800 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3801}
3802
3803
3804/**
3805 * Atomically And an unsigned 32-bit value, ordered.
3806 *
3807 * @param pu32 Pointer to the pointer variable to AND u32 with.
3808 * @param u32 The value to AND *pu32 with.
3809 */
3810#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3811DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3812#else
3813DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3814{
3815# if RT_INLINE_ASM_USES_INTRIN
3816 _InterlockedAnd((long volatile *)pu32, u32);
3817
3818# elif RT_INLINE_ASM_GNU_STYLE
3819 __asm__ __volatile__("lock; andl %1, %0\n\t"
3820 : "=m" (*pu32)
3821 : "ir" (u32),
3822 "m" (*pu32));
3823# else
3824 __asm
3825 {
3826 mov eax, [u32]
3827# ifdef RT_ARCH_AMD64
3828 mov rdx, [pu32]
3829 lock and [rdx], eax
3830# else
3831 mov edx, [pu32]
3832 lock and [edx], eax
3833# endif
3834 }
3835# endif
3836}
3837#endif
3838
3839
3840/**
3841 * Atomically And a signed 32-bit value, ordered.
3842 *
3843 * @param pi32 Pointer to the pointer variable to AND i32 with.
3844 * @param i32 The value to AND *pi32 with.
3845 */
3846DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3847{
3848 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3849}
3850
3851
3852/**
3853 * Memory fence, waits for any pending writes and reads to complete.
3854 */
3855DECLINLINE(void) ASMMemoryFence(void)
3856{
3857 /** @todo use mfence? check if all cpus we care for support it. */
3858 uint32_t volatile u32;
3859 ASMAtomicXchgU32(&u32, 0);
3860}
3861
3862
3863/**
3864 * Write fence, waits for any pending writes to complete.
3865 */
3866DECLINLINE(void) ASMWriteFence(void)
3867{
3868 /** @todo use sfence? check if all cpus we care for support it. */
3869 ASMMemoryFence();
3870}
3871
3872
3873/**
3874 * Read fence, waits for any pending reads to complete.
3875 */
3876DECLINLINE(void) ASMReadFence(void)
3877{
3878 /** @todo use lfence? check if all cpus we care for support it. */
3879 ASMMemoryFence();
3880}
3881
3882
3883/**
3884 * Atomically reads an unsigned 8-bit value, ordered.
3885 *
3886 * @returns Current *pu8 value
3887 * @param pu8 Pointer to the 8-bit variable to read.
3888 */
3889DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3890{
3891 ASMMemoryFence();
3892 return *pu8; /* byte reads are atomic on x86 */
3893}
3894
3895
3896/**
3897 * Atomically reads an unsigned 8-bit value, unordered.
3898 *
3899 * @returns Current *pu8 value
3900 * @param pu8 Pointer to the 8-bit variable to read.
3901 */
3902DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3903{
3904 return *pu8; /* byte reads are atomic on x86 */
3905}
3906
3907
3908/**
3909 * Atomically reads a signed 8-bit value, ordered.
3910 *
3911 * @returns Current *pi8 value
3912 * @param pi8 Pointer to the 8-bit variable to read.
3913 */
3914DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3915{
3916 ASMMemoryFence();
3917 return *pi8; /* byte reads are atomic on x86 */
3918}
3919
3920
3921/**
3922 * Atomically reads a signed 8-bit value, unordered.
3923 *
3924 * @returns Current *pi8 value
3925 * @param pi8 Pointer to the 8-bit variable to read.
3926 */
3927DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3928{
3929 return *pi8; /* byte reads are atomic on x86 */
3930}
3931
3932
3933/**
3934 * Atomically reads an unsigned 16-bit value, ordered.
3935 *
3936 * @returns Current *pu16 value
3937 * @param pu16 Pointer to the 16-bit variable to read.
3938 */
3939DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3940{
3941 ASMMemoryFence();
3942 Assert(!((uintptr_t)pu16 & 1));
3943 return *pu16;
3944}
3945
3946
3947/**
3948 * Atomically reads an unsigned 16-bit value, unordered.
3949 *
3950 * @returns Current *pu16 value
3951 * @param pu16 Pointer to the 16-bit variable to read.
3952 */
3953DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3954{
3955 Assert(!((uintptr_t)pu16 & 1));
3956 return *pu16;
3957}
3958
3959
3960/**
3961 * Atomically reads a signed 16-bit value, ordered.
3962 *
3963 * @returns Current *pi16 value
3964 * @param pi16 Pointer to the 16-bit variable to read.
3965 */
3966DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3967{
3968 ASMMemoryFence();
3969 Assert(!((uintptr_t)pi16 & 1));
3970 return *pi16;
3971}
3972
3973
3974/**
3975 * Atomically reads a signed 16-bit value, unordered.
3976 *
3977 * @returns Current *pi16 value
3978 * @param pi16 Pointer to the 16-bit variable to read.
3979 */
3980DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3981{
3982 Assert(!((uintptr_t)pi16 & 1));
3983 return *pi16;
3984}
3985
3986
3987/**
3988 * Atomically reads an unsigned 32-bit value, ordered.
3989 *
3990 * @returns Current *pu32 value
3991 * @param pu32 Pointer to the 32-bit variable to read.
3992 */
3993DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3994{
3995 ASMMemoryFence();
3996 Assert(!((uintptr_t)pu32 & 3));
3997 return *pu32;
3998}
3999
4000
4001/**
4002 * Atomically reads an unsigned 32-bit value, unordered.
4003 *
4004 * @returns Current *pu32 value
4005 * @param pu32 Pointer to the 32-bit variable to read.
4006 */
4007DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
4008{
4009 Assert(!((uintptr_t)pu32 & 3));
4010 return *pu32;
4011}
4012
4013
4014/**
4015 * Atomically reads a signed 32-bit value, ordered.
4016 *
4017 * @returns Current *pi32 value
4018 * @param pi32 Pointer to the 32-bit variable to read.
4019 */
4020DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
4021{
4022 ASMMemoryFence();
4023 Assert(!((uintptr_t)pi32 & 3));
4024 return *pi32;
4025}
4026
4027
4028/**
4029 * Atomically reads a signed 32-bit value, unordered.
4030 *
4031 * @returns Current *pi32 value
4032 * @param pi32 Pointer to the 32-bit variable to read.
4033 */
4034DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
4035{
4036 Assert(!((uintptr_t)pi32 & 3));
4037 return *pi32;
4038}
4039
4040
4041/**
4042 * Atomically reads an unsigned 64-bit value, ordered.
4043 *
4044 * @returns Current *pu64 value
4045 * @param pu64 Pointer to the 64-bit variable to read.
4046 * The memory pointed to must be writable.
4047 * @remark This will fault if the memory is read-only!
4048 */
4049#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
4050 || (RT_INLINE_ASM_GCC_4_3_X_X86 && defined(IN_RING3) && defined(__PIC__))
4051DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
4052#else
4053DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
4054{
4055 uint64_t u64;
4056# ifdef RT_ARCH_AMD64
4057 Assert(!((uintptr_t)pu64 & 7));
4058/*# if RT_INLINE_ASM_GNU_STYLE
4059 __asm__ __volatile__( "mfence\n\t"
4060 "movq %1, %0\n\t"
4061 : "=r" (u64)
4062 : "m" (*pu64));
4063# else
4064 __asm
4065 {
4066 mfence
4067 mov rdx, [pu64]
4068 mov rax, [rdx]
4069 mov [u64], rax
4070 }
4071# endif*/
4072 ASMMemoryFence();
4073 u64 = *pu64;
4074# else /* !RT_ARCH_AMD64 */
4075# if RT_INLINE_ASM_GNU_STYLE
4076# if defined(PIC) || defined(__PIC__)
4077 uint32_t u32EBX = 0;
4078 Assert(!((uintptr_t)pu64 & 7));
4079 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
4080 "lock; cmpxchg8b (%5)\n\t"
4081 "movl %3, %%ebx\n\t"
4082 : "=A" (u64),
4083# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4084 "+m" (*pu64)
4085# else
4086 "=m" (*pu64)
4087# endif
4088 : "0" (0),
4089 "m" (u32EBX),
4090 "c" (0),
4091 "S" (pu64));
4092# else /* !PIC */
4093 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
4094 : "=A" (u64),
4095 "+m" (*pu64)
4096 : "0" (0),
4097 "b" (0),
4098 "c" (0));
4099# endif
4100# else
4101 Assert(!((uintptr_t)pu64 & 7));
4102 __asm
4103 {
4104 xor eax, eax
4105 xor edx, edx
4106 mov edi, pu64
4107 xor ecx, ecx
4108 xor ebx, ebx
4109 lock cmpxchg8b [edi]
4110 mov dword ptr [u64], eax
4111 mov dword ptr [u64 + 4], edx
4112 }
4113# endif
4114# endif /* !RT_ARCH_AMD64 */
4115 return u64;
4116}
4117#endif
4118
4119
4120/**
4121 * Atomically reads an unsigned 64-bit value, unordered.
4122 *
4123 * @returns Current *pu64 value
4124 * @param pu64 Pointer to the 64-bit variable to read.
4125 * The memory pointed to must be writable.
4126 * @remark This will fault if the memory is read-only!
4127 */
4128#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4129DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
4130#else
4131DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
4132{
4133 uint64_t u64;
4134# ifdef RT_ARCH_AMD64
4135 Assert(!((uintptr_t)pu64 & 7));
4136/*# if RT_INLINE_ASM_GNU_STYLE
4137 Assert(!((uintptr_t)pu64 & 7));
4138 __asm__ __volatile__("movq %1, %0\n\t"
4139 : "=r" (u64)
4140 : "m" (*pu64));
4141# else
4142 __asm
4143 {
4144 mov rdx, [pu64]
4145 mov rax, [rdx]
4146 mov [u64], rax
4147 }
4148# endif */
4149 u64 = *pu64;
4150# else /* !RT_ARCH_AMD64 */
4151# if RT_INLINE_ASM_GNU_STYLE
4152# if defined(PIC) || defined(__PIC__)
4153 uint32_t u32EBX = 0;
4154 uint32_t u32Spill;
4155 Assert(!((uintptr_t)pu64 & 7));
4156 __asm__ __volatile__("xor %%eax,%%eax\n\t"
4157 "xor %%ecx,%%ecx\n\t"
4158 "xor %%edx,%%edx\n\t"
4159 "xchgl %%ebx, %3\n\t"
4160 "lock; cmpxchg8b (%4)\n\t"
4161 "movl %3, %%ebx\n\t"
4162 : "=A" (u64),
4163# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
4164 "+m" (*pu64),
4165# else
4166 "=m" (*pu64),
4167# endif
4168 "=c" (u32Spill)
4169 : "m" (u32EBX),
4170 "S" (pu64));
4171# else /* !PIC */
4172 __asm__ __volatile__("cmpxchg8b %1\n\t"
4173 : "=A" (u64),
4174 "+m" (*pu64)
4175 : "0" (0),
4176 "b" (0),
4177 "c" (0));
4178# endif
4179# else
4180 Assert(!((uintptr_t)pu64 & 7));
4181 __asm
4182 {
4183 xor eax, eax
4184 xor edx, edx
4185 mov edi, pu64
4186 xor ecx, ecx
4187 xor ebx, ebx
4188 lock cmpxchg8b [edi]
4189 mov dword ptr [u64], eax
4190 mov dword ptr [u64 + 4], edx
4191 }
4192# endif
4193# endif /* !RT_ARCH_AMD64 */
4194 return u64;
4195}
4196#endif
4197
4198
4199/**
4200 * Atomically reads a signed 64-bit value, ordered.
4201 *
4202 * @returns Current *pi64 value
4203 * @param pi64 Pointer to the 64-bit variable to read.
4204 * The memory pointed to must be writable.
4205 * @remark This will fault if the memory is read-only!
4206 */
4207DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
4208{
4209 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
4210}
4211
4212
4213/**
4214 * Atomically reads a signed 64-bit value, unordered.
4215 *
4216 * @returns Current *pi64 value
4217 * @param pi64 Pointer to the 64-bit variable to read.
4218 * The memory pointed to must be writable.
4219 * @remark This will fault if the memory is read-only!
4220 */
4221DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
4222{
4223 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
4224}
4225
4226
4227/**
4228 * Atomically reads a pointer value, ordered.
4229 *
4230 * @returns Current *pv value
4231 * @param ppv Pointer to the pointer variable to read.
4232 */
4233DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
4234{
4235#if ARCH_BITS == 32
4236 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
4237#elif ARCH_BITS == 64
4238 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
4239#else
4240# error "ARCH_BITS is bogus"
4241#endif
4242}
4243
4244
4245/**
4246 * Atomically reads a pointer value, unordered.
4247 *
4248 * @returns Current *pv value
4249 * @param ppv Pointer to the pointer variable to read.
4250 */
4251DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
4252{
4253#if ARCH_BITS == 32
4254 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
4255#elif ARCH_BITS == 64
4256 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
4257#else
4258# error "ARCH_BITS is bogus"
4259#endif
4260}
4261
4262
4263/**
4264 * Atomically reads a boolean value, ordered.
4265 *
4266 * @returns Current *pf value
4267 * @param pf Pointer to the boolean variable to read.
4268 */
4269DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
4270{
4271 ASMMemoryFence();
4272 return *pf; /* byte reads are atomic on x86 */
4273}
4274
4275
4276/**
4277 * Atomically reads a boolean value, unordered.
4278 *
4279 * @returns Current *pf value
4280 * @param pf Pointer to the boolean variable to read.
4281 */
4282DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4283{
4284 return *pf; /* byte reads are atomic on x86 */
4285}
4286
4287
4288/**
4289 * Atomically read a typical IPRT handle value, ordered.
4290 *
4291 * @param ph Pointer to the handle variable to read.
4292 * @param phRes Where to store the result.
4293 *
4294 * @remarks This doesn't currently work for all handles (like RTFILE).
4295 */
4296#if HC_ARCH_BITS == 32
4297# define ASMAtomicReadHandle(ph, phRes) \
4298 do { \
4299 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
4300 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4301 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4302 } while (0)
4303#elif HC_ARCH_BITS == 64
4304# define ASMAtomicReadHandle(ph, phRes) \
4305 do { \
4306 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
4307 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4308 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4309 } while (0)
4310#else
4311# error HC_ARCH_BITS
4312#endif
4313
4314
4315/**
4316 * Atomically read a typical IPRT handle value, unordered.
4317 *
4318 * @param ph Pointer to the handle variable to read.
4319 * @param phRes Where to store the result.
4320 *
4321 * @remarks This doesn't currently work for all handles (like RTFILE).
4322 */
4323#if HC_ARCH_BITS == 32
4324# define ASMAtomicUoReadHandle(ph, phRes) \
4325 do { \
4326 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
4327 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4328 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
4329 } while (0)
4330#elif HC_ARCH_BITS == 64
4331# define ASMAtomicUoReadHandle(ph, phRes) \
4332 do { \
4333 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
4334 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4335 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
4336 } while (0)
4337#else
4338# error HC_ARCH_BITS
4339#endif
4340
4341
4342/**
4343 * Atomically read a value which size might differ
4344 * between platforms or compilers, ordered.
4345 *
4346 * @param pu Pointer to the variable to update.
4347 * @param puRes Where to store the result.
4348 */
4349#define ASMAtomicReadSize(pu, puRes) \
4350 do { \
4351 switch (sizeof(*(pu))) { \
4352 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4353 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4354 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4355 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4356 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4357 } \
4358 } while (0)
4359
4360
4361/**
4362 * Atomically read a value which size might differ
4363 * between platforms or compilers, unordered.
4364 *
4365 * @param pu Pointer to the variable to update.
4366 * @param puRes Where to store the result.
4367 */
4368#define ASMAtomicUoReadSize(pu, puRes) \
4369 do { \
4370 switch (sizeof(*(pu))) { \
4371 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4372 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4373 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4374 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4375 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4376 } \
4377 } while (0)
4378
4379
4380/**
4381 * Atomically writes an unsigned 8-bit value, ordered.
4382 *
4383 * @param pu8 Pointer to the 8-bit variable.
4384 * @param u8 The 8-bit value to assign to *pu8.
4385 */
4386DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4387{
4388 ASMAtomicXchgU8(pu8, u8);
4389}
4390
4391
4392/**
4393 * Atomically writes an unsigned 8-bit value, unordered.
4394 *
4395 * @param pu8 Pointer to the 8-bit variable.
4396 * @param u8 The 8-bit value to assign to *pu8.
4397 */
4398DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4399{
4400 *pu8 = u8; /* byte writes are atomic on x86 */
4401}
4402
4403
4404/**
4405 * Atomically writes a signed 8-bit value, ordered.
4406 *
4407 * @param pi8 Pointer to the 8-bit variable to read.
4408 * @param i8 The 8-bit value to assign to *pi8.
4409 */
4410DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4411{
4412 ASMAtomicXchgS8(pi8, i8);
4413}
4414
4415
4416/**
4417 * Atomically writes a signed 8-bit value, unordered.
4418 *
4419 * @param pi8 Pointer to the 8-bit variable to read.
4420 * @param i8 The 8-bit value to assign to *pi8.
4421 */
4422DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4423{
4424 *pi8 = i8; /* byte writes are atomic on x86 */
4425}
4426
4427
4428/**
4429 * Atomically writes an unsigned 16-bit value, ordered.
4430 *
4431 * @param pu16 Pointer to the 16-bit variable.
4432 * @param u16 The 16-bit value to assign to *pu16.
4433 */
4434DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4435{
4436 ASMAtomicXchgU16(pu16, u16);
4437}
4438
4439
4440/**
4441 * Atomically writes an unsigned 16-bit value, unordered.
4442 *
4443 * @param pu16 Pointer to the 16-bit variable.
4444 * @param u16 The 16-bit value to assign to *pu16.
4445 */
4446DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4447{
4448 Assert(!((uintptr_t)pu16 & 1));
4449 *pu16 = u16;
4450}
4451
4452
4453/**
4454 * Atomically writes a signed 16-bit value, ordered.
4455 *
4456 * @param pi16 Pointer to the 16-bit variable to read.
4457 * @param i16 The 16-bit value to assign to *pi16.
4458 */
4459DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4460{
4461 ASMAtomicXchgS16(pi16, i16);
4462}
4463
4464
4465/**
4466 * Atomically writes a signed 16-bit value, unordered.
4467 *
4468 * @param pi16 Pointer to the 16-bit variable to read.
4469 * @param i16 The 16-bit value to assign to *pi16.
4470 */
4471DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4472{
4473 Assert(!((uintptr_t)pi16 & 1));
4474 *pi16 = i16;
4475}
4476
4477
4478/**
4479 * Atomically writes an unsigned 32-bit value, ordered.
4480 *
4481 * @param pu32 Pointer to the 32-bit variable.
4482 * @param u32 The 32-bit value to assign to *pu32.
4483 */
4484DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4485{
4486 ASMAtomicXchgU32(pu32, u32);
4487}
4488
4489
4490/**
4491 * Atomically writes an unsigned 32-bit value, unordered.
4492 *
4493 * @param pu32 Pointer to the 32-bit variable.
4494 * @param u32 The 32-bit value to assign to *pu32.
4495 */
4496DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4497{
4498 Assert(!((uintptr_t)pu32 & 3));
4499 *pu32 = u32;
4500}
4501
4502
4503/**
4504 * Atomically writes a signed 32-bit value, ordered.
4505 *
4506 * @param pi32 Pointer to the 32-bit variable to read.
4507 * @param i32 The 32-bit value to assign to *pi32.
4508 */
4509DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4510{
4511 ASMAtomicXchgS32(pi32, i32);
4512}
4513
4514
4515/**
4516 * Atomically writes a signed 32-bit value, unordered.
4517 *
4518 * @param pi32 Pointer to the 32-bit variable to read.
4519 * @param i32 The 32-bit value to assign to *pi32.
4520 */
4521DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4522{
4523 Assert(!((uintptr_t)pi32 & 3));
4524 *pi32 = i32;
4525}
4526
4527
4528/**
4529 * Atomically writes an unsigned 64-bit value, ordered.
4530 *
4531 * @param pu64 Pointer to the 64-bit variable.
4532 * @param u64 The 64-bit value to assign to *pu64.
4533 */
4534DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4535{
4536 ASMAtomicXchgU64(pu64, u64);
4537}
4538
4539
4540/**
4541 * Atomically writes an unsigned 64-bit value, unordered.
4542 *
4543 * @param pu64 Pointer to the 64-bit variable.
4544 * @param u64 The 64-bit value to assign to *pu64.
4545 */
4546DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4547{
4548 Assert(!((uintptr_t)pu64 & 7));
4549#if ARCH_BITS == 64
4550 *pu64 = u64;
4551#else
4552 ASMAtomicXchgU64(pu64, u64);
4553#endif
4554}
4555
4556
4557/**
4558 * Atomically writes a signed 64-bit value, ordered.
4559 *
4560 * @param pi64 Pointer to the 64-bit variable.
4561 * @param i64 The 64-bit value to assign to *pi64.
4562 */
4563DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4564{
4565 ASMAtomicXchgS64(pi64, i64);
4566}
4567
4568
4569/**
4570 * Atomically writes a signed 64-bit value, unordered.
4571 *
4572 * @param pi64 Pointer to the 64-bit variable.
4573 * @param i64 The 64-bit value to assign to *pi64.
4574 */
4575DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4576{
4577 Assert(!((uintptr_t)pi64 & 7));
4578#if ARCH_BITS == 64
4579 *pi64 = i64;
4580#else
4581 ASMAtomicXchgS64(pi64, i64);
4582#endif
4583}
4584
4585
4586/**
4587 * Atomically writes a boolean value, unordered.
4588 *
4589 * @param pf Pointer to the boolean variable.
4590 * @param f The boolean value to assign to *pf.
4591 */
4592DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4593{
4594 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4595}
4596
4597
4598/**
4599 * Atomically writes a boolean value, unordered.
4600 *
4601 * @param pf Pointer to the boolean variable.
4602 * @param f The boolean value to assign to *pf.
4603 */
4604DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4605{
4606 *pf = f; /* byte writes are atomic on x86 */
4607}
4608
4609
4610/**
4611 * Atomically writes a pointer value, ordered.
4612 *
4613 * @returns Current *pv value
4614 * @param ppv Pointer to the pointer variable.
4615 * @param pv The pointer value to assigne to *ppv.
4616 */
4617DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4618{
4619#if ARCH_BITS == 32
4620 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4621#elif ARCH_BITS == 64
4622 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4623#else
4624# error "ARCH_BITS is bogus"
4625#endif
4626}
4627
4628
4629/**
4630 * Atomically writes a pointer value, unordered.
4631 *
4632 * @returns Current *pv value
4633 * @param ppv Pointer to the pointer variable.
4634 * @param pv The pointer value to assigne to *ppv.
4635 */
4636DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4637{
4638#if ARCH_BITS == 32
4639 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4640#elif ARCH_BITS == 64
4641 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4642#else
4643# error "ARCH_BITS is bogus"
4644#endif
4645}
4646
4647
4648/**
4649 * Atomically write a typical IPRT handle value, ordered.
4650 *
4651 * @param ph Pointer to the variable to update.
4652 * @param hNew The value to assign to *ph.
4653 *
4654 * @remarks This doesn't currently work for all handles (like RTFILE).
4655 */
4656#if HC_ARCH_BITS == 32
4657# define ASMAtomicWriteHandle(ph, hNew) \
4658 do { \
4659 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
4660 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4661 } while (0)
4662#elif HC_ARCH_BITS == 64
4663# define ASMAtomicWriteHandle(ph, hNew) \
4664 do { \
4665 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
4666 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4667 } while (0)
4668#else
4669# error HC_ARCH_BITS
4670#endif
4671
4672
4673/**
4674 * Atomically write a typical IPRT handle value, unordered.
4675 *
4676 * @param ph Pointer to the variable to update.
4677 * @param hNew The value to assign to *ph.
4678 *
4679 * @remarks This doesn't currently work for all handles (like RTFILE).
4680 */
4681#if HC_ARCH_BITS == 32
4682# define ASMAtomicUoWriteHandle(ph, hNew) \
4683 do { \
4684 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
4685 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
4686 } while (0)
4687#elif HC_ARCH_BITS == 64
4688# define ASMAtomicUoWriteHandle(ph, hNew) \
4689 do { \
4690 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
4691 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
4692 } while (0)
4693#else
4694# error HC_ARCH_BITS
4695#endif
4696
4697
4698/**
4699 * Atomically write a value which size might differ
4700 * between platforms or compilers, ordered.
4701 *
4702 * @param pu Pointer to the variable to update.
4703 * @param uNew The value to assign to *pu.
4704 */
4705#define ASMAtomicWriteSize(pu, uNew) \
4706 do { \
4707 switch (sizeof(*(pu))) { \
4708 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4709 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4710 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4711 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4712 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4713 } \
4714 } while (0)
4715
4716/**
4717 * Atomically write a value which size might differ
4718 * between platforms or compilers, unordered.
4719 *
4720 * @param pu Pointer to the variable to update.
4721 * @param uNew The value to assign to *pu.
4722 */
4723#define ASMAtomicUoWriteSize(pu, uNew) \
4724 do { \
4725 switch (sizeof(*(pu))) { \
4726 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4727 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4728 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4729 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4730 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4731 } \
4732 } while (0)
4733
4734
4735
4736
4737/**
4738 * Invalidate page.
4739 *
4740 * @param pv Address of the page to invalidate.
4741 */
4742#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4743DECLASM(void) ASMInvalidatePage(void *pv);
4744#else
4745DECLINLINE(void) ASMInvalidatePage(void *pv)
4746{
4747# if RT_INLINE_ASM_USES_INTRIN
4748 __invlpg(pv);
4749
4750# elif RT_INLINE_ASM_GNU_STYLE
4751 __asm__ __volatile__("invlpg %0\n\t"
4752 : : "m" (*(uint8_t *)pv));
4753# else
4754 __asm
4755 {
4756# ifdef RT_ARCH_AMD64
4757 mov rax, [pv]
4758 invlpg [rax]
4759# else
4760 mov eax, [pv]
4761 invlpg [eax]
4762# endif
4763 }
4764# endif
4765}
4766#endif
4767
4768
4769/**
4770 * Write back the internal caches and invalidate them.
4771 */
4772#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4773DECLASM(void) ASMWriteBackAndInvalidateCaches(void);
4774#else
4775DECLINLINE(void) ASMWriteBackAndInvalidateCaches(void)
4776{
4777# if RT_INLINE_ASM_USES_INTRIN
4778 __wbinvd();
4779
4780# elif RT_INLINE_ASM_GNU_STYLE
4781 __asm__ __volatile__("wbinvd");
4782# else
4783 __asm
4784 {
4785 wbinvd
4786 }
4787# endif
4788}
4789#endif
4790
4791
4792/**
4793 * Invalidate internal and (perhaps) external caches without first
4794 * flushing dirty cache lines. Use with extreme care.
4795 */
4796#if RT_INLINE_ASM_EXTERNAL
4797DECLASM(void) ASMInvalidateInternalCaches(void);
4798#else
4799DECLINLINE(void) ASMInvalidateInternalCaches(void)
4800{
4801# if RT_INLINE_ASM_GNU_STYLE
4802 __asm__ __volatile__("invd");
4803# else
4804 __asm
4805 {
4806 invd
4807 }
4808# endif
4809}
4810#endif
4811
4812
4813#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4814# if PAGE_SIZE != 0x1000
4815# error "PAGE_SIZE is not 0x1000!"
4816# endif
4817#endif
4818
4819/**
4820 * Zeros a 4K memory page.
4821 *
4822 * @param pv Pointer to the memory block. This must be page aligned.
4823 */
4824#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4825DECLASM(void) ASMMemZeroPage(volatile void *pv);
4826# else
4827DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4828{
4829# if RT_INLINE_ASM_USES_INTRIN
4830# ifdef RT_ARCH_AMD64
4831 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4832# else
4833 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4834# endif
4835
4836# elif RT_INLINE_ASM_GNU_STYLE
4837 RTCCUINTREG uDummy;
4838# ifdef RT_ARCH_AMD64
4839 __asm__ __volatile__("rep stosq"
4840 : "=D" (pv),
4841 "=c" (uDummy)
4842 : "0" (pv),
4843 "c" (0x1000 >> 3),
4844 "a" (0)
4845 : "memory");
4846# else
4847 __asm__ __volatile__("rep stosl"
4848 : "=D" (pv),
4849 "=c" (uDummy)
4850 : "0" (pv),
4851 "c" (0x1000 >> 2),
4852 "a" (0)
4853 : "memory");
4854# endif
4855# else
4856 __asm
4857 {
4858# ifdef RT_ARCH_AMD64
4859 xor rax, rax
4860 mov ecx, 0200h
4861 mov rdi, [pv]
4862 rep stosq
4863# else
4864 xor eax, eax
4865 mov ecx, 0400h
4866 mov edi, [pv]
4867 rep stosd
4868# endif
4869 }
4870# endif
4871}
4872# endif
4873
4874
4875/**
4876 * Zeros a memory block with a 32-bit aligned size.
4877 *
4878 * @param pv Pointer to the memory block.
4879 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4880 */
4881#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4882DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4883#else
4884DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4885{
4886# if RT_INLINE_ASM_USES_INTRIN
4887# ifdef RT_ARCH_AMD64
4888 if (!(cb & 7))
4889 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4890 else
4891# endif
4892 __stosd((unsigned long *)pv, 0, cb / 4);
4893
4894# elif RT_INLINE_ASM_GNU_STYLE
4895 __asm__ __volatile__("rep stosl"
4896 : "=D" (pv),
4897 "=c" (cb)
4898 : "0" (pv),
4899 "1" (cb >> 2),
4900 "a" (0)
4901 : "memory");
4902# else
4903 __asm
4904 {
4905 xor eax, eax
4906# ifdef RT_ARCH_AMD64
4907 mov rcx, [cb]
4908 shr rcx, 2
4909 mov rdi, [pv]
4910# else
4911 mov ecx, [cb]
4912 shr ecx, 2
4913 mov edi, [pv]
4914# endif
4915 rep stosd
4916 }
4917# endif
4918}
4919#endif
4920
4921
4922/**
4923 * Fills a memory block with a 32-bit aligned size.
4924 *
4925 * @param pv Pointer to the memory block.
4926 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4927 * @param u32 The value to fill with.
4928 */
4929#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4930DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4931#else
4932DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4933{
4934# if RT_INLINE_ASM_USES_INTRIN
4935# ifdef RT_ARCH_AMD64
4936 if (!(cb & 7))
4937 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4938 else
4939# endif
4940 __stosd((unsigned long *)pv, u32, cb / 4);
4941
4942# elif RT_INLINE_ASM_GNU_STYLE
4943 __asm__ __volatile__("rep stosl"
4944 : "=D" (pv),
4945 "=c" (cb)
4946 : "0" (pv),
4947 "1" (cb >> 2),
4948 "a" (u32)
4949 : "memory");
4950# else
4951 __asm
4952 {
4953# ifdef RT_ARCH_AMD64
4954 mov rcx, [cb]
4955 shr rcx, 2
4956 mov rdi, [pv]
4957# else
4958 mov ecx, [cb]
4959 shr ecx, 2
4960 mov edi, [pv]
4961# endif
4962 mov eax, [u32]
4963 rep stosd
4964 }
4965# endif
4966}
4967#endif
4968
4969
4970/**
4971 * Checks if a memory block is filled with the specified byte.
4972 *
4973 * This is a sort of inverted memchr.
4974 *
4975 * @returns Pointer to the byte which doesn't equal u8.
4976 * @returns NULL if all equal to u8.
4977 *
4978 * @param pv Pointer to the memory block.
4979 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4980 * @param u8 The value it's supposed to be filled with.
4981 */
4982#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4983DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4984#else
4985DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4986{
4987/** @todo rewrite this in inline assembly? */
4988 uint8_t const *pb = (uint8_t const *)pv;
4989 for (; cb; cb--, pb++)
4990 if (RT_UNLIKELY(*pb != u8))
4991 return (void *)pb;
4992 return NULL;
4993}
4994#endif
4995
4996
4997/**
4998 * Checks if a memory block is filled with the specified 32-bit value.
4999 *
5000 * This is a sort of inverted memchr.
5001 *
5002 * @returns Pointer to the first value which doesn't equal u32.
5003 * @returns NULL if all equal to u32.
5004 *
5005 * @param pv Pointer to the memory block.
5006 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5007 * @param u32 The value it's supposed to be filled with.
5008 */
5009#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5010DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
5011#else
5012DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
5013{
5014/** @todo rewrite this in inline assembly? */
5015 uint32_t const *pu32 = (uint32_t const *)pv;
5016 for (; cb; cb -= 4, pu32++)
5017 if (RT_UNLIKELY(*pu32 != u32))
5018 return (uint32_t *)pu32;
5019 return NULL;
5020}
5021#endif
5022
5023
5024/**
5025 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
5026 *
5027 * @returns u32F1 * u32F2.
5028 */
5029#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5030DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
5031#else
5032DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
5033{
5034# ifdef RT_ARCH_AMD64
5035 return (uint64_t)u32F1 * u32F2;
5036# else /* !RT_ARCH_AMD64 */
5037 uint64_t u64;
5038# if RT_INLINE_ASM_GNU_STYLE
5039 __asm__ __volatile__("mull %%edx"
5040 : "=A" (u64)
5041 : "a" (u32F2), "d" (u32F1));
5042# else
5043 __asm
5044 {
5045 mov edx, [u32F1]
5046 mov eax, [u32F2]
5047 mul edx
5048 mov dword ptr [u64], eax
5049 mov dword ptr [u64 + 4], edx
5050 }
5051# endif
5052 return u64;
5053# endif /* !RT_ARCH_AMD64 */
5054}
5055#endif
5056
5057
5058/**
5059 * Multiplies two signed 32-bit values returning a signed 64-bit result.
5060 *
5061 * @returns u32F1 * u32F2.
5062 */
5063#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5064DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
5065#else
5066DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
5067{
5068# ifdef RT_ARCH_AMD64
5069 return (int64_t)i32F1 * i32F2;
5070# else /* !RT_ARCH_AMD64 */
5071 int64_t i64;
5072# if RT_INLINE_ASM_GNU_STYLE
5073 __asm__ __volatile__("imull %%edx"
5074 : "=A" (i64)
5075 : "a" (i32F2), "d" (i32F1));
5076# else
5077 __asm
5078 {
5079 mov edx, [i32F1]
5080 mov eax, [i32F2]
5081 imul edx
5082 mov dword ptr [i64], eax
5083 mov dword ptr [i64 + 4], edx
5084 }
5085# endif
5086 return i64;
5087# endif /* !RT_ARCH_AMD64 */
5088}
5089#endif
5090
5091
5092/**
5093 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
5094 *
5095 * @returns u64 / u32.
5096 */
5097#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5098DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
5099#else
5100DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
5101{
5102# ifdef RT_ARCH_AMD64
5103 return (uint32_t)(u64 / u32);
5104# else /* !RT_ARCH_AMD64 */
5105# if RT_INLINE_ASM_GNU_STYLE
5106 RTCCUINTREG uDummy;
5107 __asm__ __volatile__("divl %3"
5108 : "=a" (u32), "=d"(uDummy)
5109 : "A" (u64), "r" (u32));
5110# else
5111 __asm
5112 {
5113 mov eax, dword ptr [u64]
5114 mov edx, dword ptr [u64 + 4]
5115 mov ecx, [u32]
5116 div ecx
5117 mov [u32], eax
5118 }
5119# endif
5120 return u32;
5121# endif /* !RT_ARCH_AMD64 */
5122}
5123#endif
5124
5125
5126/**
5127 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
5128 *
5129 * @returns u64 / u32.
5130 */
5131#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5132DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
5133#else
5134DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
5135{
5136# ifdef RT_ARCH_AMD64
5137 return (int32_t)(i64 / i32);
5138# else /* !RT_ARCH_AMD64 */
5139# if RT_INLINE_ASM_GNU_STYLE
5140 RTCCUINTREG iDummy;
5141 __asm__ __volatile__("idivl %3"
5142 : "=a" (i32), "=d"(iDummy)
5143 : "A" (i64), "r" (i32));
5144# else
5145 __asm
5146 {
5147 mov eax, dword ptr [i64]
5148 mov edx, dword ptr [i64 + 4]
5149 mov ecx, [i32]
5150 idiv ecx
5151 mov [i32], eax
5152 }
5153# endif
5154 return i32;
5155# endif /* !RT_ARCH_AMD64 */
5156}
5157#endif
5158
5159
5160/**
5161 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
5162 * returning the rest.
5163 *
5164 * @returns u64 % u32.
5165 *
5166 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5167 */
5168#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5169DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
5170#else
5171DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
5172{
5173# ifdef RT_ARCH_AMD64
5174 return (uint32_t)(u64 % u32);
5175# else /* !RT_ARCH_AMD64 */
5176# if RT_INLINE_ASM_GNU_STYLE
5177 RTCCUINTREG uDummy;
5178 __asm__ __volatile__("divl %3"
5179 : "=a" (uDummy), "=d"(u32)
5180 : "A" (u64), "r" (u32));
5181# else
5182 __asm
5183 {
5184 mov eax, dword ptr [u64]
5185 mov edx, dword ptr [u64 + 4]
5186 mov ecx, [u32]
5187 div ecx
5188 mov [u32], edx
5189 }
5190# endif
5191 return u32;
5192# endif /* !RT_ARCH_AMD64 */
5193}
5194#endif
5195
5196
5197/**
5198 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
5199 * returning the rest.
5200 *
5201 * @returns u64 % u32.
5202 *
5203 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
5204 */
5205#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
5206DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
5207#else
5208DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
5209{
5210# ifdef RT_ARCH_AMD64
5211 return (int32_t)(i64 % i32);
5212# else /* !RT_ARCH_AMD64 */
5213# if RT_INLINE_ASM_GNU_STYLE
5214 RTCCUINTREG iDummy;
5215 __asm__ __volatile__("idivl %3"
5216 : "=a" (iDummy), "=d"(i32)
5217 : "A" (i64), "r" (i32));
5218# else
5219 __asm
5220 {
5221 mov eax, dword ptr [i64]
5222 mov edx, dword ptr [i64 + 4]
5223 mov ecx, [i32]
5224 idiv ecx
5225 mov [i32], edx
5226 }
5227# endif
5228 return i32;
5229# endif /* !RT_ARCH_AMD64 */
5230}
5231#endif
5232
5233
5234/**
5235 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
5236 * using a 96 bit intermediate result.
5237 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
5238 * __udivdi3 and __umoddi3 even if this inline function is not used.
5239 *
5240 * @returns (u64A * u32B) / u32C.
5241 * @param u64A The 64-bit value.
5242 * @param u32B The 32-bit value to multiple by A.
5243 * @param u32C The 32-bit value to divide A*B by.
5244 */
5245#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
5246DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
5247#else
5248DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
5249{
5250# if RT_INLINE_ASM_GNU_STYLE
5251# ifdef RT_ARCH_AMD64
5252 uint64_t u64Result, u64Spill;
5253 __asm__ __volatile__("mulq %2\n\t"
5254 "divq %3\n\t"
5255 : "=a" (u64Result),
5256 "=d" (u64Spill)
5257 : "r" ((uint64_t)u32B),
5258 "r" ((uint64_t)u32C),
5259 "0" (u64A),
5260 "1" (0));
5261 return u64Result;
5262# else
5263 uint32_t u32Dummy;
5264 uint64_t u64Result;
5265 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
5266 edx = u64Lo.hi = (u64A.lo * u32B).hi */
5267 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
5268 eax = u64A.hi */
5269 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
5270 edx = u32C */
5271 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
5272 edx = u32B */
5273 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
5274 edx = u64Hi.hi = (u64A.hi * u32B).hi */
5275 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
5276 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
5277 "divl %%ecx \n\t" /* eax = u64Hi / u32C
5278 edx = u64Hi % u32C */
5279 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
5280 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
5281 "divl %%ecx \n\t" /* u64Result.lo */
5282 "movl %%edi,%%edx \n\t" /* u64Result.hi */
5283 : "=A"(u64Result), "=c"(u32Dummy),
5284 "=S"(u32Dummy), "=D"(u32Dummy)
5285 : "a"((uint32_t)u64A),
5286 "S"((uint32_t)(u64A >> 32)),
5287 "c"(u32B),
5288 "D"(u32C));
5289 return u64Result;
5290# endif
5291# else
5292 RTUINT64U u;
5293 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
5294 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
5295 u64Hi += (u64Lo >> 32);
5296 u.s.Hi = (uint32_t)(u64Hi / u32C);
5297 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
5298 return u.u;
5299# endif
5300}
5301#endif
5302
5303
5304/**
5305 * Probes a byte pointer for read access.
5306 *
5307 * While the function will not fault if the byte is not read accessible,
5308 * the idea is to do this in a safe place like before acquiring locks
5309 * and such like.
5310 *
5311 * Also, this functions guarantees that an eager compiler is not going
5312 * to optimize the probing away.
5313 *
5314 * @param pvByte Pointer to the byte.
5315 */
5316#if RT_INLINE_ASM_EXTERNAL
5317DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
5318#else
5319DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
5320{
5321 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5322 uint8_t u8;
5323# if RT_INLINE_ASM_GNU_STYLE
5324 __asm__ __volatile__("movb (%1), %0\n\t"
5325 : "=r" (u8)
5326 : "r" (pvByte));
5327# else
5328 __asm
5329 {
5330# ifdef RT_ARCH_AMD64
5331 mov rax, [pvByte]
5332 mov al, [rax]
5333# else
5334 mov eax, [pvByte]
5335 mov al, [eax]
5336# endif
5337 mov [u8], al
5338 }
5339# endif
5340 return u8;
5341}
5342#endif
5343
5344/**
5345 * Probes a buffer for read access page by page.
5346 *
5347 * While the function will fault if the buffer is not fully read
5348 * accessible, the idea is to do this in a safe place like before
5349 * acquiring locks and such like.
5350 *
5351 * Also, this functions guarantees that an eager compiler is not going
5352 * to optimize the probing away.
5353 *
5354 * @param pvBuf Pointer to the buffer.
5355 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5356 */
5357DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
5358{
5359 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5360 /* the first byte */
5361 const uint8_t *pu8 = (const uint8_t *)pvBuf;
5362 ASMProbeReadByte(pu8);
5363
5364 /* the pages in between pages. */
5365 while (cbBuf > /*PAGE_SIZE*/0x1000)
5366 {
5367 ASMProbeReadByte(pu8);
5368 cbBuf -= /*PAGE_SIZE*/0x1000;
5369 pu8 += /*PAGE_SIZE*/0x1000;
5370 }
5371
5372 /* the last byte */
5373 ASMProbeReadByte(pu8 + cbBuf - 1);
5374}
5375
5376
5377/** @def ASMBreakpoint
5378 * Debugger Breakpoint.
5379 * @remark In the gnu world we add a nop instruction after the int3 to
5380 * force gdb to remain at the int3 source line.
5381 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5382 * @internal
5383 */
5384#if RT_INLINE_ASM_GNU_STYLE
5385# ifndef __L4ENV__
5386# define ASMBreakpoint() do { __asm__ __volatile__("int3\n\tnop"); } while (0)
5387# else
5388# define ASMBreakpoint() do { __asm__ __volatile__("int3; jmp 1f; 1:"); } while (0)
5389# endif
5390#else
5391# define ASMBreakpoint() __debugbreak()
5392#endif
5393
5394
5395
5396/** @defgroup grp_inline_bits Bit Operations
5397 * @{
5398 */
5399
5400
5401/**
5402 * Sets a bit in a bitmap.
5403 *
5404 * @param pvBitmap Pointer to the bitmap.
5405 * @param iBit The bit to set.
5406 */
5407#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5408DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5409#else
5410DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5411{
5412# if RT_INLINE_ASM_USES_INTRIN
5413 _bittestandset((long *)pvBitmap, iBit);
5414
5415# elif RT_INLINE_ASM_GNU_STYLE
5416 __asm__ __volatile__("btsl %1, %0"
5417 : "=m" (*(volatile long *)pvBitmap)
5418 : "Ir" (iBit),
5419 "m" (*(volatile long *)pvBitmap)
5420 : "memory");
5421# else
5422 __asm
5423 {
5424# ifdef RT_ARCH_AMD64
5425 mov rax, [pvBitmap]
5426 mov edx, [iBit]
5427 bts [rax], edx
5428# else
5429 mov eax, [pvBitmap]
5430 mov edx, [iBit]
5431 bts [eax], edx
5432# endif
5433 }
5434# endif
5435}
5436#endif
5437
5438
5439/**
5440 * Atomically sets a bit in a bitmap, ordered.
5441 *
5442 * @param pvBitmap Pointer to the bitmap.
5443 * @param iBit The bit to set.
5444 */
5445#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5446DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5447#else
5448DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5449{
5450# if RT_INLINE_ASM_USES_INTRIN
5451 _interlockedbittestandset((long *)pvBitmap, iBit);
5452# elif RT_INLINE_ASM_GNU_STYLE
5453 __asm__ __volatile__("lock; btsl %1, %0"
5454 : "=m" (*(volatile long *)pvBitmap)
5455 : "Ir" (iBit),
5456 "m" (*(volatile long *)pvBitmap)
5457 : "memory");
5458# else
5459 __asm
5460 {
5461# ifdef RT_ARCH_AMD64
5462 mov rax, [pvBitmap]
5463 mov edx, [iBit]
5464 lock bts [rax], edx
5465# else
5466 mov eax, [pvBitmap]
5467 mov edx, [iBit]
5468 lock bts [eax], edx
5469# endif
5470 }
5471# endif
5472}
5473#endif
5474
5475
5476/**
5477 * Clears a bit in a bitmap.
5478 *
5479 * @param pvBitmap Pointer to the bitmap.
5480 * @param iBit The bit to clear.
5481 */
5482#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5483DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5484#else
5485DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5486{
5487# if RT_INLINE_ASM_USES_INTRIN
5488 _bittestandreset((long *)pvBitmap, iBit);
5489
5490# elif RT_INLINE_ASM_GNU_STYLE
5491 __asm__ __volatile__("btrl %1, %0"
5492 : "=m" (*(volatile long *)pvBitmap)
5493 : "Ir" (iBit),
5494 "m" (*(volatile long *)pvBitmap)
5495 : "memory");
5496# else
5497 __asm
5498 {
5499# ifdef RT_ARCH_AMD64
5500 mov rax, [pvBitmap]
5501 mov edx, [iBit]
5502 btr [rax], edx
5503# else
5504 mov eax, [pvBitmap]
5505 mov edx, [iBit]
5506 btr [eax], edx
5507# endif
5508 }
5509# endif
5510}
5511#endif
5512
5513
5514/**
5515 * Atomically clears a bit in a bitmap, ordered.
5516 *
5517 * @param pvBitmap Pointer to the bitmap.
5518 * @param iBit The bit to toggle set.
5519 * @remark No memory barrier, take care on smp.
5520 */
5521#if RT_INLINE_ASM_EXTERNAL
5522DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5523#else
5524DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5525{
5526# if RT_INLINE_ASM_GNU_STYLE
5527 __asm__ __volatile__("lock; btrl %1, %0"
5528 : "=m" (*(volatile long *)pvBitmap)
5529 : "Ir" (iBit),
5530 "m" (*(volatile long *)pvBitmap)
5531 : "memory");
5532# else
5533 __asm
5534 {
5535# ifdef RT_ARCH_AMD64
5536 mov rax, [pvBitmap]
5537 mov edx, [iBit]
5538 lock btr [rax], edx
5539# else
5540 mov eax, [pvBitmap]
5541 mov edx, [iBit]
5542 lock btr [eax], edx
5543# endif
5544 }
5545# endif
5546}
5547#endif
5548
5549
5550/**
5551 * Toggles a bit in a bitmap.
5552 *
5553 * @param pvBitmap Pointer to the bitmap.
5554 * @param iBit The bit to toggle.
5555 */
5556#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5557DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5558#else
5559DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5560{
5561# if RT_INLINE_ASM_USES_INTRIN
5562 _bittestandcomplement((long *)pvBitmap, iBit);
5563# elif RT_INLINE_ASM_GNU_STYLE
5564 __asm__ __volatile__("btcl %1, %0"
5565 : "=m" (*(volatile long *)pvBitmap)
5566 : "Ir" (iBit),
5567 "m" (*(volatile long *)pvBitmap)
5568 : "memory");
5569# else
5570 __asm
5571 {
5572# ifdef RT_ARCH_AMD64
5573 mov rax, [pvBitmap]
5574 mov edx, [iBit]
5575 btc [rax], edx
5576# else
5577 mov eax, [pvBitmap]
5578 mov edx, [iBit]
5579 btc [eax], edx
5580# endif
5581 }
5582# endif
5583}
5584#endif
5585
5586
5587/**
5588 * Atomically toggles a bit in a bitmap, ordered.
5589 *
5590 * @param pvBitmap Pointer to the bitmap.
5591 * @param iBit The bit to test and set.
5592 */
5593#if RT_INLINE_ASM_EXTERNAL
5594DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5595#else
5596DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5597{
5598# if RT_INLINE_ASM_GNU_STYLE
5599 __asm__ __volatile__("lock; btcl %1, %0"
5600 : "=m" (*(volatile long *)pvBitmap)
5601 : "Ir" (iBit),
5602 "m" (*(volatile long *)pvBitmap)
5603 : "memory");
5604# else
5605 __asm
5606 {
5607# ifdef RT_ARCH_AMD64
5608 mov rax, [pvBitmap]
5609 mov edx, [iBit]
5610 lock btc [rax], edx
5611# else
5612 mov eax, [pvBitmap]
5613 mov edx, [iBit]
5614 lock btc [eax], edx
5615# endif
5616 }
5617# endif
5618}
5619#endif
5620
5621
5622/**
5623 * Tests and sets a bit in a bitmap.
5624 *
5625 * @returns true if the bit was set.
5626 * @returns false if the bit was clear.
5627 * @param pvBitmap Pointer to the bitmap.
5628 * @param iBit The bit to test and set.
5629 */
5630#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5631DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5632#else
5633DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5634{
5635 union { bool f; uint32_t u32; uint8_t u8; } rc;
5636# if RT_INLINE_ASM_USES_INTRIN
5637 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5638
5639# elif RT_INLINE_ASM_GNU_STYLE
5640 __asm__ __volatile__("btsl %2, %1\n\t"
5641 "setc %b0\n\t"
5642 "andl $1, %0\n\t"
5643 : "=q" (rc.u32),
5644 "=m" (*(volatile long *)pvBitmap)
5645 : "Ir" (iBit),
5646 "m" (*(volatile long *)pvBitmap)
5647 : "memory");
5648# else
5649 __asm
5650 {
5651 mov edx, [iBit]
5652# ifdef RT_ARCH_AMD64
5653 mov rax, [pvBitmap]
5654 bts [rax], edx
5655# else
5656 mov eax, [pvBitmap]
5657 bts [eax], edx
5658# endif
5659 setc al
5660 and eax, 1
5661 mov [rc.u32], eax
5662 }
5663# endif
5664 return rc.f;
5665}
5666#endif
5667
5668
5669/**
5670 * Atomically tests and sets a bit in a bitmap, ordered.
5671 *
5672 * @returns true if the bit was set.
5673 * @returns false if the bit was clear.
5674 * @param pvBitmap Pointer to the bitmap.
5675 * @param iBit The bit to set.
5676 */
5677#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5678DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5679#else
5680DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5681{
5682 union { bool f; uint32_t u32; uint8_t u8; } rc;
5683# if RT_INLINE_ASM_USES_INTRIN
5684 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5685# elif RT_INLINE_ASM_GNU_STYLE
5686 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5687 "setc %b0\n\t"
5688 "andl $1, %0\n\t"
5689 : "=q" (rc.u32),
5690 "=m" (*(volatile long *)pvBitmap)
5691 : "Ir" (iBit),
5692 "m" (*(volatile long *)pvBitmap)
5693 : "memory");
5694# else
5695 __asm
5696 {
5697 mov edx, [iBit]
5698# ifdef RT_ARCH_AMD64
5699 mov rax, [pvBitmap]
5700 lock bts [rax], edx
5701# else
5702 mov eax, [pvBitmap]
5703 lock bts [eax], edx
5704# endif
5705 setc al
5706 and eax, 1
5707 mov [rc.u32], eax
5708 }
5709# endif
5710 return rc.f;
5711}
5712#endif
5713
5714
5715/**
5716 * Tests and clears a bit in a bitmap.
5717 *
5718 * @returns true if the bit was set.
5719 * @returns false if the bit was clear.
5720 * @param pvBitmap Pointer to the bitmap.
5721 * @param iBit The bit to test and clear.
5722 */
5723#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5724DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5725#else
5726DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5727{
5728 union { bool f; uint32_t u32; uint8_t u8; } rc;
5729# if RT_INLINE_ASM_USES_INTRIN
5730 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5731
5732# elif RT_INLINE_ASM_GNU_STYLE
5733 __asm__ __volatile__("btrl %2, %1\n\t"
5734 "setc %b0\n\t"
5735 "andl $1, %0\n\t"
5736 : "=q" (rc.u32),
5737 "=m" (*(volatile long *)pvBitmap)
5738 : "Ir" (iBit),
5739 "m" (*(volatile long *)pvBitmap)
5740 : "memory");
5741# else
5742 __asm
5743 {
5744 mov edx, [iBit]
5745# ifdef RT_ARCH_AMD64
5746 mov rax, [pvBitmap]
5747 btr [rax], edx
5748# else
5749 mov eax, [pvBitmap]
5750 btr [eax], edx
5751# endif
5752 setc al
5753 and eax, 1
5754 mov [rc.u32], eax
5755 }
5756# endif
5757 return rc.f;
5758}
5759#endif
5760
5761
5762/**
5763 * Atomically tests and clears a bit in a bitmap, ordered.
5764 *
5765 * @returns true if the bit was set.
5766 * @returns false if the bit was clear.
5767 * @param pvBitmap Pointer to the bitmap.
5768 * @param iBit The bit to test and clear.
5769 * @remark No memory barrier, take care on smp.
5770 */
5771#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5772DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5773#else
5774DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5775{
5776 union { bool f; uint32_t u32; uint8_t u8; } rc;
5777# if RT_INLINE_ASM_USES_INTRIN
5778 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5779
5780# elif RT_INLINE_ASM_GNU_STYLE
5781 __asm__ __volatile__("lock; btrl %2, %1\n\t"
5782 "setc %b0\n\t"
5783 "andl $1, %0\n\t"
5784 : "=q" (rc.u32),
5785 "=m" (*(volatile long *)pvBitmap)
5786 : "Ir" (iBit),
5787 "m" (*(volatile long *)pvBitmap)
5788 : "memory");
5789# else
5790 __asm
5791 {
5792 mov edx, [iBit]
5793# ifdef RT_ARCH_AMD64
5794 mov rax, [pvBitmap]
5795 lock btr [rax], edx
5796# else
5797 mov eax, [pvBitmap]
5798 lock btr [eax], edx
5799# endif
5800 setc al
5801 and eax, 1
5802 mov [rc.u32], eax
5803 }
5804# endif
5805 return rc.f;
5806}
5807#endif
5808
5809
5810/**
5811 * Tests and toggles a bit in a bitmap.
5812 *
5813 * @returns true if the bit was set.
5814 * @returns false if the bit was clear.
5815 * @param pvBitmap Pointer to the bitmap.
5816 * @param iBit The bit to test and toggle.
5817 */
5818#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5819DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5820#else
5821DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5822{
5823 union { bool f; uint32_t u32; uint8_t u8; } rc;
5824# if RT_INLINE_ASM_USES_INTRIN
5825 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5826
5827# elif RT_INLINE_ASM_GNU_STYLE
5828 __asm__ __volatile__("btcl %2, %1\n\t"
5829 "setc %b0\n\t"
5830 "andl $1, %0\n\t"
5831 : "=q" (rc.u32),
5832 "=m" (*(volatile long *)pvBitmap)
5833 : "Ir" (iBit),
5834 "m" (*(volatile long *)pvBitmap)
5835 : "memory");
5836# else
5837 __asm
5838 {
5839 mov edx, [iBit]
5840# ifdef RT_ARCH_AMD64
5841 mov rax, [pvBitmap]
5842 btc [rax], edx
5843# else
5844 mov eax, [pvBitmap]
5845 btc [eax], edx
5846# endif
5847 setc al
5848 and eax, 1
5849 mov [rc.u32], eax
5850 }
5851# endif
5852 return rc.f;
5853}
5854#endif
5855
5856
5857/**
5858 * Atomically tests and toggles a bit in a bitmap, ordered.
5859 *
5860 * @returns true if the bit was set.
5861 * @returns false if the bit was clear.
5862 * @param pvBitmap Pointer to the bitmap.
5863 * @param iBit The bit to test and toggle.
5864 */
5865#if RT_INLINE_ASM_EXTERNAL
5866DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5867#else
5868DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5869{
5870 union { bool f; uint32_t u32; uint8_t u8; } rc;
5871# if RT_INLINE_ASM_GNU_STYLE
5872 __asm__ __volatile__("lock; btcl %2, %1\n\t"
5873 "setc %b0\n\t"
5874 "andl $1, %0\n\t"
5875 : "=q" (rc.u32),
5876 "=m" (*(volatile long *)pvBitmap)
5877 : "Ir" (iBit),
5878 "m" (*(volatile long *)pvBitmap)
5879 : "memory");
5880# else
5881 __asm
5882 {
5883 mov edx, [iBit]
5884# ifdef RT_ARCH_AMD64
5885 mov rax, [pvBitmap]
5886 lock btc [rax], edx
5887# else
5888 mov eax, [pvBitmap]
5889 lock btc [eax], edx
5890# endif
5891 setc al
5892 and eax, 1
5893 mov [rc.u32], eax
5894 }
5895# endif
5896 return rc.f;
5897}
5898#endif
5899
5900
5901/**
5902 * Tests if a bit in a bitmap is set.
5903 *
5904 * @returns true if the bit is set.
5905 * @returns false if the bit is clear.
5906 * @param pvBitmap Pointer to the bitmap.
5907 * @param iBit The bit to test.
5908 */
5909#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5910DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5911#else
5912DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5913{
5914 union { bool f; uint32_t u32; uint8_t u8; } rc;
5915# if RT_INLINE_ASM_USES_INTRIN
5916 rc.u32 = _bittest((long *)pvBitmap, iBit);
5917# elif RT_INLINE_ASM_GNU_STYLE
5918
5919 __asm__ __volatile__("btl %2, %1\n\t"
5920 "setc %b0\n\t"
5921 "andl $1, %0\n\t"
5922 : "=q" (rc.u32)
5923 : "m" (*(const volatile long *)pvBitmap),
5924 "Ir" (iBit)
5925 : "memory");
5926# else
5927 __asm
5928 {
5929 mov edx, [iBit]
5930# ifdef RT_ARCH_AMD64
5931 mov rax, [pvBitmap]
5932 bt [rax], edx
5933# else
5934 mov eax, [pvBitmap]
5935 bt [eax], edx
5936# endif
5937 setc al
5938 and eax, 1
5939 mov [rc.u32], eax
5940 }
5941# endif
5942 return rc.f;
5943}
5944#endif
5945
5946
5947/**
5948 * Clears a bit range within a bitmap.
5949 *
5950 * @param pvBitmap Pointer to the bitmap.
5951 * @param iBitStart The First bit to clear.
5952 * @param iBitEnd The first bit not to clear.
5953 */
5954DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5955{
5956 if (iBitStart < iBitEnd)
5957 {
5958 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5959 int iStart = iBitStart & ~31;
5960 int iEnd = iBitEnd & ~31;
5961 if (iStart == iEnd)
5962 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5963 else
5964 {
5965 /* bits in first dword. */
5966 if (iBitStart & 31)
5967 {
5968 *pu32 &= (1 << (iBitStart & 31)) - 1;
5969 pu32++;
5970 iBitStart = iStart + 32;
5971 }
5972
5973 /* whole dword. */
5974 if (iBitStart != iEnd)
5975 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5976
5977 /* bits in last dword. */
5978 if (iBitEnd & 31)
5979 {
5980 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5981 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5982 }
5983 }
5984 }
5985}
5986
5987
5988/**
5989 * Sets a bit range within a bitmap.
5990 *
5991 * @param pvBitmap Pointer to the bitmap.
5992 * @param iBitStart The First bit to set.
5993 * @param iBitEnd The first bit not to set.
5994 */
5995DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5996{
5997 if (iBitStart < iBitEnd)
5998 {
5999 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
6000 int iStart = iBitStart & ~31;
6001 int iEnd = iBitEnd & ~31;
6002 if (iStart == iEnd)
6003 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
6004 else
6005 {
6006 /* bits in first dword. */
6007 if (iBitStart & 31)
6008 {
6009 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
6010 pu32++;
6011 iBitStart = iStart + 32;
6012 }
6013
6014 /* whole dword. */
6015 if (iBitStart != iEnd)
6016 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
6017
6018 /* bits in last dword. */
6019 if (iBitEnd & 31)
6020 {
6021 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6022 *pu32 |= (1 << (iBitEnd & 31)) - 1;
6023 }
6024 }
6025 }
6026}
6027
6028
6029/**
6030 * Finds the first clear bit in a bitmap.
6031 *
6032 * @returns Index of the first zero bit.
6033 * @returns -1 if no clear bit was found.
6034 * @param pvBitmap Pointer to the bitmap.
6035 * @param cBits The number of bits in the bitmap. Multiple of 32.
6036 */
6037#if RT_INLINE_ASM_EXTERNAL
6038DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
6039#else
6040DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
6041{
6042 if (cBits)
6043 {
6044 int32_t iBit;
6045# if RT_INLINE_ASM_GNU_STYLE
6046 RTCCUINTREG uEAX, uECX, uEDI;
6047 cBits = RT_ALIGN_32(cBits, 32);
6048 __asm__ __volatile__("repe; scasl\n\t"
6049 "je 1f\n\t"
6050# ifdef RT_ARCH_AMD64
6051 "lea -4(%%rdi), %%rdi\n\t"
6052 "xorl (%%rdi), %%eax\n\t"
6053 "subq %5, %%rdi\n\t"
6054# else
6055 "lea -4(%%edi), %%edi\n\t"
6056 "xorl (%%edi), %%eax\n\t"
6057 "subl %5, %%edi\n\t"
6058# endif
6059 "shll $3, %%edi\n\t"
6060 "bsfl %%eax, %%edx\n\t"
6061 "addl %%edi, %%edx\n\t"
6062 "1:\t\n"
6063 : "=d" (iBit),
6064 "=&c" (uECX),
6065 "=&D" (uEDI),
6066 "=&a" (uEAX)
6067 : "0" (0xffffffff),
6068 "mr" (pvBitmap),
6069 "1" (cBits >> 5),
6070 "2" (pvBitmap),
6071 "3" (0xffffffff));
6072# else
6073 cBits = RT_ALIGN_32(cBits, 32);
6074 __asm
6075 {
6076# ifdef RT_ARCH_AMD64
6077 mov rdi, [pvBitmap]
6078 mov rbx, rdi
6079# else
6080 mov edi, [pvBitmap]
6081 mov ebx, edi
6082# endif
6083 mov edx, 0ffffffffh
6084 mov eax, edx
6085 mov ecx, [cBits]
6086 shr ecx, 5
6087 repe scasd
6088 je done
6089
6090# ifdef RT_ARCH_AMD64
6091 lea rdi, [rdi - 4]
6092 xor eax, [rdi]
6093 sub rdi, rbx
6094# else
6095 lea edi, [edi - 4]
6096 xor eax, [edi]
6097 sub edi, ebx
6098# endif
6099 shl edi, 3
6100 bsf edx, eax
6101 add edx, edi
6102 done:
6103 mov [iBit], edx
6104 }
6105# endif
6106 return iBit;
6107 }
6108 return -1;
6109}
6110#endif
6111
6112
6113/**
6114 * Finds the next clear bit in a bitmap.
6115 *
6116 * @returns Index of the first zero bit.
6117 * @returns -1 if no clear bit was found.
6118 * @param pvBitmap Pointer to the bitmap.
6119 * @param cBits The number of bits in the bitmap. Multiple of 32.
6120 * @param iBitPrev The bit returned from the last search.
6121 * The search will start at iBitPrev + 1.
6122 */
6123#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6124DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6125#else
6126DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6127{
6128 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6129 int iBit = ++iBitPrev & 31;
6130 if (iBit)
6131 {
6132 /*
6133 * Inspect the 32-bit word containing the unaligned bit.
6134 */
6135 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6136
6137# if RT_INLINE_ASM_USES_INTRIN
6138 unsigned long ulBit = 0;
6139 if (_BitScanForward(&ulBit, u32))
6140 return ulBit + iBitPrev;
6141# else
6142# if RT_INLINE_ASM_GNU_STYLE
6143 __asm__ __volatile__("bsf %1, %0\n\t"
6144 "jnz 1f\n\t"
6145 "movl $-1, %0\n\t"
6146 "1:\n\t"
6147 : "=r" (iBit)
6148 : "r" (u32));
6149# else
6150 __asm
6151 {
6152 mov edx, [u32]
6153 bsf eax, edx
6154 jnz done
6155 mov eax, 0ffffffffh
6156 done:
6157 mov [iBit], eax
6158 }
6159# endif
6160 if (iBit >= 0)
6161 return iBit + iBitPrev;
6162# endif
6163
6164 /*
6165 * Skip ahead and see if there is anything left to search.
6166 */
6167 iBitPrev |= 31;
6168 iBitPrev++;
6169 if (cBits <= (uint32_t)iBitPrev)
6170 return -1;
6171 }
6172
6173 /*
6174 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6175 */
6176 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6177 if (iBit >= 0)
6178 iBit += iBitPrev;
6179 return iBit;
6180}
6181#endif
6182
6183
6184/**
6185 * Finds the first set bit in a bitmap.
6186 *
6187 * @returns Index of the first set bit.
6188 * @returns -1 if no clear bit was found.
6189 * @param pvBitmap Pointer to the bitmap.
6190 * @param cBits The number of bits in the bitmap. Multiple of 32.
6191 */
6192#if RT_INLINE_ASM_EXTERNAL
6193DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
6194#else
6195DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
6196{
6197 if (cBits)
6198 {
6199 int32_t iBit;
6200# if RT_INLINE_ASM_GNU_STYLE
6201 RTCCUINTREG uEAX, uECX, uEDI;
6202 cBits = RT_ALIGN_32(cBits, 32);
6203 __asm__ __volatile__("repe; scasl\n\t"
6204 "je 1f\n\t"
6205# ifdef RT_ARCH_AMD64
6206 "lea -4(%%rdi), %%rdi\n\t"
6207 "movl (%%rdi), %%eax\n\t"
6208 "subq %5, %%rdi\n\t"
6209# else
6210 "lea -4(%%edi), %%edi\n\t"
6211 "movl (%%edi), %%eax\n\t"
6212 "subl %5, %%edi\n\t"
6213# endif
6214 "shll $3, %%edi\n\t"
6215 "bsfl %%eax, %%edx\n\t"
6216 "addl %%edi, %%edx\n\t"
6217 "1:\t\n"
6218 : "=d" (iBit),
6219 "=&c" (uECX),
6220 "=&D" (uEDI),
6221 "=&a" (uEAX)
6222 : "0" (0xffffffff),
6223 "mr" (pvBitmap),
6224 "1" (cBits >> 5),
6225 "2" (pvBitmap),
6226 "3" (0));
6227# else
6228 cBits = RT_ALIGN_32(cBits, 32);
6229 __asm
6230 {
6231# ifdef RT_ARCH_AMD64
6232 mov rdi, [pvBitmap]
6233 mov rbx, rdi
6234# else
6235 mov edi, [pvBitmap]
6236 mov ebx, edi
6237# endif
6238 mov edx, 0ffffffffh
6239 xor eax, eax
6240 mov ecx, [cBits]
6241 shr ecx, 5
6242 repe scasd
6243 je done
6244# ifdef RT_ARCH_AMD64
6245 lea rdi, [rdi - 4]
6246 mov eax, [rdi]
6247 sub rdi, rbx
6248# else
6249 lea edi, [edi - 4]
6250 mov eax, [edi]
6251 sub edi, ebx
6252# endif
6253 shl edi, 3
6254 bsf edx, eax
6255 add edx, edi
6256 done:
6257 mov [iBit], edx
6258 }
6259# endif
6260 return iBit;
6261 }
6262 return -1;
6263}
6264#endif
6265
6266
6267/**
6268 * Finds the next set bit in a bitmap.
6269 *
6270 * @returns Index of the next set bit.
6271 * @returns -1 if no set bit was found.
6272 * @param pvBitmap Pointer to the bitmap.
6273 * @param cBits The number of bits in the bitmap. Multiple of 32.
6274 * @param iBitPrev The bit returned from the last search.
6275 * The search will start at iBitPrev + 1.
6276 */
6277#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
6278DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
6279#else
6280DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
6281{
6282 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
6283 int iBit = ++iBitPrev & 31;
6284 if (iBit)
6285 {
6286 /*
6287 * Inspect the 32-bit word containing the unaligned bit.
6288 */
6289 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6290
6291# if RT_INLINE_ASM_USES_INTRIN
6292 unsigned long ulBit = 0;
6293 if (_BitScanForward(&ulBit, u32))
6294 return ulBit + iBitPrev;
6295# else
6296# if RT_INLINE_ASM_GNU_STYLE
6297 __asm__ __volatile__("bsf %1, %0\n\t"
6298 "jnz 1f\n\t"
6299 "movl $-1, %0\n\t"
6300 "1:\n\t"
6301 : "=r" (iBit)
6302 : "r" (u32));
6303# else
6304 __asm
6305 {
6306 mov edx, [u32]
6307 bsf eax, edx
6308 jnz done
6309 mov eax, 0ffffffffh
6310 done:
6311 mov [iBit], eax
6312 }
6313# endif
6314 if (iBit >= 0)
6315 return iBit + iBitPrev;
6316# endif
6317
6318 /*
6319 * Skip ahead and see if there is anything left to search.
6320 */
6321 iBitPrev |= 31;
6322 iBitPrev++;
6323 if (cBits <= (uint32_t)iBitPrev)
6324 return -1;
6325 }
6326
6327 /*
6328 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6329 */
6330 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6331 if (iBit >= 0)
6332 iBit += iBitPrev;
6333 return iBit;
6334}
6335#endif
6336
6337
6338/**
6339 * Finds the first bit which is set in the given 32-bit integer.
6340 * Bits are numbered from 1 (least significant) to 32.
6341 *
6342 * @returns index [1..32] of the first set bit.
6343 * @returns 0 if all bits are cleared.
6344 * @param u32 Integer to search for set bits.
6345 * @remark Similar to ffs() in BSD.
6346 */
6347DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
6348{
6349# if RT_INLINE_ASM_USES_INTRIN
6350 unsigned long iBit;
6351 if (_BitScanForward(&iBit, u32))
6352 iBit++;
6353 else
6354 iBit = 0;
6355# elif RT_INLINE_ASM_GNU_STYLE
6356 uint32_t iBit;
6357 __asm__ __volatile__("bsf %1, %0\n\t"
6358 "jnz 1f\n\t"
6359 "xorl %0, %0\n\t"
6360 "jmp 2f\n"
6361 "1:\n\t"
6362 "incl %0\n"
6363 "2:\n\t"
6364 : "=r" (iBit)
6365 : "rm" (u32));
6366# else
6367 uint32_t iBit;
6368 _asm
6369 {
6370 bsf eax, [u32]
6371 jnz found
6372 xor eax, eax
6373 jmp done
6374 found:
6375 inc eax
6376 done:
6377 mov [iBit], eax
6378 }
6379# endif
6380 return iBit;
6381}
6382
6383
6384/**
6385 * Finds the first bit which is set in the given 32-bit integer.
6386 * Bits are numbered from 1 (least significant) to 32.
6387 *
6388 * @returns index [1..32] of the first set bit.
6389 * @returns 0 if all bits are cleared.
6390 * @param i32 Integer to search for set bits.
6391 * @remark Similar to ffs() in BSD.
6392 */
6393DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6394{
6395 return ASMBitFirstSetU32((uint32_t)i32);
6396}
6397
6398
6399/**
6400 * Finds the last bit which is set in the given 32-bit integer.
6401 * Bits are numbered from 1 (least significant) to 32.
6402 *
6403 * @returns index [1..32] of the last set bit.
6404 * @returns 0 if all bits are cleared.
6405 * @param u32 Integer to search for set bits.
6406 * @remark Similar to fls() in BSD.
6407 */
6408DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6409{
6410# if RT_INLINE_ASM_USES_INTRIN
6411 unsigned long iBit;
6412 if (_BitScanReverse(&iBit, u32))
6413 iBit++;
6414 else
6415 iBit = 0;
6416# elif RT_INLINE_ASM_GNU_STYLE
6417 uint32_t iBit;
6418 __asm__ __volatile__("bsrl %1, %0\n\t"
6419 "jnz 1f\n\t"
6420 "xorl %0, %0\n\t"
6421 "jmp 2f\n"
6422 "1:\n\t"
6423 "incl %0\n"
6424 "2:\n\t"
6425 : "=r" (iBit)
6426 : "rm" (u32));
6427# else
6428 uint32_t iBit;
6429 _asm
6430 {
6431 bsr eax, [u32]
6432 jnz found
6433 xor eax, eax
6434 jmp done
6435 found:
6436 inc eax
6437 done:
6438 mov [iBit], eax
6439 }
6440# endif
6441 return iBit;
6442}
6443
6444
6445/**
6446 * Finds the last bit which is set in the given 32-bit integer.
6447 * Bits are numbered from 1 (least significant) to 32.
6448 *
6449 * @returns index [1..32] of the last set bit.
6450 * @returns 0 if all bits are cleared.
6451 * @param i32 Integer to search for set bits.
6452 * @remark Similar to fls() in BSD.
6453 */
6454DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6455{
6456 return ASMBitLastSetU32((uint32_t)i32);
6457}
6458
6459/**
6460 * Reverse the byte order of the given 16-bit integer.
6461 *
6462 * @returns Revert
6463 * @param u16 16-bit integer value.
6464 */
6465DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6466{
6467#if RT_INLINE_ASM_USES_INTRIN
6468 u16 = _byteswap_ushort(u16);
6469#elif RT_INLINE_ASM_GNU_STYLE
6470 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6471#else
6472 _asm
6473 {
6474 mov ax, [u16]
6475 ror ax, 8
6476 mov [u16], ax
6477 }
6478#endif
6479 return u16;
6480}
6481
6482/**
6483 * Reverse the byte order of the given 32-bit integer.
6484 *
6485 * @returns Revert
6486 * @param u32 32-bit integer value.
6487 */
6488DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6489{
6490#if RT_INLINE_ASM_USES_INTRIN
6491 u32 = _byteswap_ulong(u32);
6492#elif RT_INLINE_ASM_GNU_STYLE
6493 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6494#else
6495 _asm
6496 {
6497 mov eax, [u32]
6498 bswap eax
6499 mov [u32], eax
6500 }
6501#endif
6502 return u32;
6503}
6504
6505
6506/**
6507 * Reverse the byte order of the given 64-bit integer.
6508 *
6509 * @returns Revert
6510 * @param u64 64-bit integer value.
6511 */
6512DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6513{
6514#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6515 u64 = _byteswap_uint64(u64);
6516#else
6517 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6518 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6519#endif
6520 return u64;
6521}
6522
6523
6524/** @} */
6525
6526
6527/** @} */
6528#endif
6529
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette