VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 12086

Last change on this file since 12086 was 12086, checked in by vboxsync, 16 years ago

Functions to fetch dr0-3 & 6.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 148.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(_BitScanForward)
72# pragma intrinsic(_BitScanReverse)
73# pragma intrinsic(_bittest)
74# pragma intrinsic(_bittestandset)
75# pragma intrinsic(_bittestandreset)
76# pragma intrinsic(_bittestandcomplement)
77# pragma intrinsic(_byteswap_ushort)
78# pragma intrinsic(_byteswap_ulong)
79# pragma intrinsic(_interlockedbittestandset)
80# pragma intrinsic(_interlockedbittestandreset)
81# pragma intrinsic(_InterlockedAnd)
82# pragma intrinsic(_InterlockedOr)
83# pragma intrinsic(_InterlockedIncrement)
84# pragma intrinsic(_InterlockedDecrement)
85# pragma intrinsic(_InterlockedExchange)
86# pragma intrinsic(_InterlockedExchangeAdd)
87# pragma intrinsic(_InterlockedCompareExchange)
88# pragma intrinsic(_InterlockedCompareExchange64)
89# ifdef RT_ARCH_AMD64
90# pragma intrinsic(__stosq)
91# pragma intrinsic(__readcr8)
92# pragma intrinsic(__writecr8)
93# pragma intrinsic(_byteswap_uint64)
94# pragma intrinsic(_InterlockedExchange64)
95# endif
96# endif
97#endif
98#ifndef RT_INLINE_ASM_USES_INTRIN
99# define RT_INLINE_ASM_USES_INTRIN 0
100#endif
101
102
103
104/** @defgroup grp_asm ASM - Assembly Routines
105 * @ingroup grp_rt
106 *
107 * @remarks The difference between ordered and unordered atomic operations are that
108 * the former will complete outstanding reads and writes before continuing
109 * while the latter doesn't make any promisses about the order. Ordered
110 * operations doesn't, it seems, make any 100% promise wrt to whether
111 * the operation will complete before any subsequent memory access.
112 * (please, correct if wrong.)
113 *
114 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
115 * are unordered (note the Uo).
116 *
117 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
118 * or even optimize assembler instructions away. For instance, in the following code
119 * the second rdmsr instruction is optimized away because gcc treats that instruction
120 * as deterministic:
121 *
122 * @code
123 * static inline uint64_t rdmsr_low(int idx)
124 * {
125 * uint32_t low;
126 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
127 * }
128 * ...
129 * uint32_t msr1 = rdmsr_low(1);
130 * foo(msr1);
131 * msr1 = rdmsr_low(1);
132 * bar(msr1);
133 * @endcode
134 *
135 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
136 * use the result of the first call as input parameter for bar() as well. For rdmsr this
137 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143/** @def RT_INLINE_ASM_EXTERNAL
144 * Defined as 1 if the compiler does not support inline assembly.
145 * The ASM* functions will then be implemented in an external .asm file.
146 *
147 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
148 * inline assmebly in their AMD64 compiler.
149 */
150#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
151# define RT_INLINE_ASM_EXTERNAL 1
152#else
153# define RT_INLINE_ASM_EXTERNAL 0
154#endif
155
156/** @def RT_INLINE_ASM_GNU_STYLE
157 * Defined as 1 if the compiler understand GNU style inline assembly.
158 */
159#if defined(_MSC_VER)
160# define RT_INLINE_ASM_GNU_STYLE 0
161#else
162# define RT_INLINE_ASM_GNU_STYLE 1
163#endif
164
165
166/** @todo find a more proper place for this structure? */
167#pragma pack(1)
168/** IDTR */
169typedef struct RTIDTR
170{
171 /** Size of the IDT. */
172 uint16_t cbIdt;
173 /** Address of the IDT. */
174 uintptr_t pIdt;
175} RTIDTR, *PRTIDTR;
176#pragma pack()
177
178#pragma pack(1)
179/** GDTR */
180typedef struct RTGDTR
181{
182 /** Size of the GDT. */
183 uint16_t cbGdt;
184 /** Address of the GDT. */
185 uintptr_t pGdt;
186} RTGDTR, *PRTGDTR;
187#pragma pack()
188
189
190/** @def ASMReturnAddress
191 * Gets the return address of the current (or calling if you like) function or method.
192 */
193#ifdef _MSC_VER
194# ifdef __cplusplus
195extern "C"
196# endif
197void * _ReturnAddress(void);
198# pragma intrinsic(_ReturnAddress)
199# define ASMReturnAddress() _ReturnAddress()
200#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
201# define ASMReturnAddress() __builtin_return_address(0)
202#else
203# error "Unsupported compiler."
204#endif
205
206
207/**
208 * Gets the content of the IDTR CPU register.
209 * @param pIdtr Where to store the IDTR contents.
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
213#else
214DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 sidt [rax]
224# else
225 mov eax, [pIdtr]
226 sidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Sets the content of the IDTR CPU register.
236 * @param pIdtr Where to load the IDTR contents from
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
240#else
241DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 lidt [rax]
251# else
252 mov eax, [pIdtr]
253 lidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Gets the content of the GDTR CPU register.
263 * @param pGdtr Where to store the GDTR contents.
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
267#else
268DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pGdtr]
277 sgdt [rax]
278# else
279 mov eax, [pGdtr]
280 sgdt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287/**
288 * Get the cs register.
289 * @returns cs.
290 */
291#if RT_INLINE_ASM_EXTERNAL
292DECLASM(RTSEL) ASMGetCS(void);
293#else
294DECLINLINE(RTSEL) ASMGetCS(void)
295{
296 RTSEL SelCS;
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
299# else
300 __asm
301 {
302 mov ax, cs
303 mov [SelCS], ax
304 }
305# endif
306 return SelCS;
307}
308#endif
309
310
311/**
312 * Get the DS register.
313 * @returns DS.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetDS(void);
317#else
318DECLINLINE(RTSEL) ASMGetDS(void)
319{
320 RTSEL SelDS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
323# else
324 __asm
325 {
326 mov ax, ds
327 mov [SelDS], ax
328 }
329# endif
330 return SelDS;
331}
332#endif
333
334
335/**
336 * Get the ES register.
337 * @returns ES.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetES(void);
341#else
342DECLINLINE(RTSEL) ASMGetES(void)
343{
344 RTSEL SelES;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
347# else
348 __asm
349 {
350 mov ax, es
351 mov [SelES], ax
352 }
353# endif
354 return SelES;
355}
356#endif
357
358
359/**
360 * Get the FS register.
361 * @returns FS.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetFS(void);
365#else
366DECLINLINE(RTSEL) ASMGetFS(void)
367{
368 RTSEL SelFS;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
371# else
372 __asm
373 {
374 mov ax, fs
375 mov [SelFS], ax
376 }
377# endif
378 return SelFS;
379}
380# endif
381
382
383/**
384 * Get the GS register.
385 * @returns GS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetGS(void);
389#else
390DECLINLINE(RTSEL) ASMGetGS(void)
391{
392 RTSEL SelGS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
395# else
396 __asm
397 {
398 mov ax, gs
399 mov [SelGS], ax
400 }
401# endif
402 return SelGS;
403}
404#endif
405
406
407/**
408 * Get the SS register.
409 * @returns SS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetSS(void);
413#else
414DECLINLINE(RTSEL) ASMGetSS(void)
415{
416 RTSEL SelSS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
419# else
420 __asm
421 {
422 mov ax, ss
423 mov [SelSS], ax
424 }
425# endif
426 return SelSS;
427}
428#endif
429
430
431/**
432 * Get the TR register.
433 * @returns TR.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetTR(void);
437#else
438DECLINLINE(RTSEL) ASMGetTR(void)
439{
440 RTSEL SelTR;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
443# else
444 __asm
445 {
446 str ax
447 mov [SelTR], ax
448 }
449# endif
450 return SelTR;
451}
452#endif
453
454
455/**
456 * Get the [RE]FLAGS register.
457 * @returns [RE]FLAGS.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTCCUINTREG) ASMGetFlags(void);
461#else
462DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
463{
464 RTCCUINTREG uFlags;
465# if RT_INLINE_ASM_GNU_STYLE
466# ifdef RT_ARCH_AMD64
467 __asm__ __volatile__("pushfq\n\t"
468 "popq %0\n\t"
469 : "=g" (uFlags));
470# else
471 __asm__ __volatile__("pushfl\n\t"
472 "popl %0\n\t"
473 : "=g" (uFlags));
474# endif
475# else
476 __asm
477 {
478# ifdef RT_ARCH_AMD64
479 pushfq
480 pop [uFlags]
481# else
482 pushfd
483 pop [uFlags]
484# endif
485 }
486# endif
487 return uFlags;
488}
489#endif
490
491
492/**
493 * Set the [RE]FLAGS register.
494 * @param uFlags The new [RE]FLAGS value.
495 */
496#if RT_INLINE_ASM_EXTERNAL
497DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
498#else
499DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
500{
501# if RT_INLINE_ASM_GNU_STYLE
502# ifdef RT_ARCH_AMD64
503 __asm__ __volatile__("pushq %0\n\t"
504 "popfq\n\t"
505 : : "g" (uFlags));
506# else
507 __asm__ __volatile__("pushl %0\n\t"
508 "popfl\n\t"
509 : : "g" (uFlags));
510# endif
511# else
512 __asm
513 {
514# ifdef RT_ARCH_AMD64
515 push [uFlags]
516 popfq
517# else
518 push [uFlags]
519 popfd
520# endif
521 }
522# endif
523}
524#endif
525
526
527/**
528 * Gets the content of the CPU timestamp counter register.
529 *
530 * @returns TSC.
531 */
532#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
533DECLASM(uint64_t) ASMReadTSC(void);
534#else
535DECLINLINE(uint64_t) ASMReadTSC(void)
536{
537 RTUINT64U u;
538# if RT_INLINE_ASM_GNU_STYLE
539 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
540# else
541# if RT_INLINE_ASM_USES_INTRIN
542 u.u = __rdtsc();
543# else
544 __asm
545 {
546 rdtsc
547 mov [u.s.Lo], eax
548 mov [u.s.Hi], edx
549 }
550# endif
551# endif
552 return u.u;
553}
554#endif
555
556
557/**
558 * Performs the cpuid instruction returning all registers.
559 *
560 * @param uOperator CPUID operation (eax).
561 * @param pvEAX Where to store eax.
562 * @param pvEBX Where to store ebx.
563 * @param pvECX Where to store ecx.
564 * @param pvEDX Where to store edx.
565 * @remark We're using void pointers to ease the use of special bitfield structures and such.
566 */
567#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
568DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
569#else
570DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
571{
572# if RT_INLINE_ASM_GNU_STYLE
573# ifdef RT_ARCH_AMD64
574 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
575 __asm__ ("cpuid\n\t"
576 : "=a" (uRAX),
577 "=b" (uRBX),
578 "=c" (uRCX),
579 "=d" (uRDX)
580 : "0" (uOperator));
581 *(uint32_t *)pvEAX = (uint32_t)uRAX;
582 *(uint32_t *)pvEBX = (uint32_t)uRBX;
583 *(uint32_t *)pvECX = (uint32_t)uRCX;
584 *(uint32_t *)pvEDX = (uint32_t)uRDX;
585# else
586 __asm__ ("xchgl %%ebx, %1\n\t"
587 "cpuid\n\t"
588 "xchgl %%ebx, %1\n\t"
589 : "=a" (*(uint32_t *)pvEAX),
590 "=r" (*(uint32_t *)pvEBX),
591 "=c" (*(uint32_t *)pvECX),
592 "=d" (*(uint32_t *)pvEDX)
593 : "0" (uOperator));
594# endif
595
596# elif RT_INLINE_ASM_USES_INTRIN
597 int aInfo[4];
598 __cpuid(aInfo, uOperator);
599 *(uint32_t *)pvEAX = aInfo[0];
600 *(uint32_t *)pvEBX = aInfo[1];
601 *(uint32_t *)pvECX = aInfo[2];
602 *(uint32_t *)pvEDX = aInfo[3];
603
604# else
605 uint32_t uEAX;
606 uint32_t uEBX;
607 uint32_t uECX;
608 uint32_t uEDX;
609 __asm
610 {
611 push ebx
612 mov eax, [uOperator]
613 cpuid
614 mov [uEAX], eax
615 mov [uEBX], ebx
616 mov [uECX], ecx
617 mov [uEDX], edx
618 pop ebx
619 }
620 *(uint32_t *)pvEAX = uEAX;
621 *(uint32_t *)pvEBX = uEBX;
622 *(uint32_t *)pvECX = uECX;
623 *(uint32_t *)pvEDX = uEDX;
624# endif
625}
626#endif
627
628
629/**
630 * Performs the cpuid instruction returning all registers.
631 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
632 *
633 * @param uOperator CPUID operation (eax).
634 * @param uIdxECX ecx index
635 * @param pvEAX Where to store eax.
636 * @param pvEBX Where to store ebx.
637 * @param pvECX Where to store ecx.
638 * @param pvEDX Where to store edx.
639 * @remark We're using void pointers to ease the use of special bitfield structures and such.
640 */
641#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
642DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
643#else
644DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
645{
646# if RT_INLINE_ASM_GNU_STYLE
647# ifdef RT_ARCH_AMD64
648 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
649 __asm__ ("cpuid\n\t"
650 : "=a" (uRAX),
651 "=b" (uRBX),
652 "=c" (uRCX),
653 "=d" (uRDX)
654 : "0" (uOperator),
655 "2" (uIdxECX));
656 *(uint32_t *)pvEAX = (uint32_t)uRAX;
657 *(uint32_t *)pvEBX = (uint32_t)uRBX;
658 *(uint32_t *)pvECX = (uint32_t)uRCX;
659 *(uint32_t *)pvEDX = (uint32_t)uRDX;
660# else
661 __asm__ ("xchgl %%ebx, %1\n\t"
662 "cpuid\n\t"
663 "xchgl %%ebx, %1\n\t"
664 : "=a" (*(uint32_t *)pvEAX),
665 "=r" (*(uint32_t *)pvEBX),
666 "=c" (*(uint32_t *)pvECX),
667 "=d" (*(uint32_t *)pvEDX)
668 : "0" (uOperator),
669 "2" (uIdxECX));
670# endif
671
672# elif RT_INLINE_ASM_USES_INTRIN
673 int aInfo[4];
674 /* ??? another intrinsic ??? */
675 __cpuid(aInfo, uOperator);
676 *(uint32_t *)pvEAX = aInfo[0];
677 *(uint32_t *)pvEBX = aInfo[1];
678 *(uint32_t *)pvECX = aInfo[2];
679 *(uint32_t *)pvEDX = aInfo[3];
680
681# else
682 uint32_t uEAX;
683 uint32_t uEBX;
684 uint32_t uECX;
685 uint32_t uEDX;
686 __asm
687 {
688 push ebx
689 mov eax, [uOperator]
690 mov ecx, [uIdxECX]
691 cpuid
692 mov [uEAX], eax
693 mov [uEBX], ebx
694 mov [uECX], ecx
695 mov [uEDX], edx
696 pop ebx
697 }
698 *(uint32_t *)pvEAX = uEAX;
699 *(uint32_t *)pvEBX = uEBX;
700 *(uint32_t *)pvECX = uECX;
701 *(uint32_t *)pvEDX = uEDX;
702# endif
703}
704#endif
705
706
707/**
708 * Performs the cpuid instruction returning ecx and edx.
709 *
710 * @param uOperator CPUID operation (eax).
711 * @param pvECX Where to store ecx.
712 * @param pvEDX Where to store edx.
713 * @remark We're using void pointers to ease the use of special bitfield structures and such.
714 */
715#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
716DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
717#else
718DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
719{
720 uint32_t uEBX;
721 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
722}
723#endif
724
725
726/**
727 * Performs the cpuid instruction returning edx.
728 *
729 * @param uOperator CPUID operation (eax).
730 * @returns EDX after cpuid operation.
731 */
732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
733DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
734#else
735DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
736{
737 RTCCUINTREG xDX;
738# if RT_INLINE_ASM_GNU_STYLE
739# ifdef RT_ARCH_AMD64
740 RTCCUINTREG uSpill;
741 __asm__ ("cpuid"
742 : "=a" (uSpill),
743 "=d" (xDX)
744 : "0" (uOperator)
745 : "rbx", "rcx");
746# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
747 __asm__ ("push %%ebx\n\t"
748 "cpuid\n\t"
749 "pop %%ebx\n\t"
750 : "=a" (uOperator),
751 "=d" (xDX)
752 : "0" (uOperator)
753 : "ecx");
754# else
755 __asm__ ("cpuid"
756 : "=a" (uOperator),
757 "=d" (xDX)
758 : "0" (uOperator)
759 : "ebx", "ecx");
760# endif
761
762# elif RT_INLINE_ASM_USES_INTRIN
763 int aInfo[4];
764 __cpuid(aInfo, uOperator);
765 xDX = aInfo[3];
766
767# else
768 __asm
769 {
770 push ebx
771 mov eax, [uOperator]
772 cpuid
773 mov [xDX], edx
774 pop ebx
775 }
776# endif
777 return (uint32_t)xDX;
778}
779#endif
780
781
782/**
783 * Performs the cpuid instruction returning ecx.
784 *
785 * @param uOperator CPUID operation (eax).
786 * @returns ECX after cpuid operation.
787 */
788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
789DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
790#else
791DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
792{
793 RTCCUINTREG xCX;
794# if RT_INLINE_ASM_GNU_STYLE
795# ifdef RT_ARCH_AMD64
796 RTCCUINTREG uSpill;
797 __asm__ ("cpuid"
798 : "=a" (uSpill),
799 "=c" (xCX)
800 : "0" (uOperator)
801 : "rbx", "rdx");
802# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
803 __asm__ ("push %%ebx\n\t"
804 "cpuid\n\t"
805 "pop %%ebx\n\t"
806 : "=a" (uOperator),
807 "=c" (xCX)
808 : "0" (uOperator)
809 : "edx");
810# else
811 __asm__ ("cpuid"
812 : "=a" (uOperator),
813 "=c" (xCX)
814 : "0" (uOperator)
815 : "ebx", "edx");
816
817# endif
818
819# elif RT_INLINE_ASM_USES_INTRIN
820 int aInfo[4];
821 __cpuid(aInfo, uOperator);
822 xCX = aInfo[2];
823
824# else
825 __asm
826 {
827 push ebx
828 mov eax, [uOperator]
829 cpuid
830 mov [xCX], ecx
831 pop ebx
832 }
833# endif
834 return (uint32_t)xCX;
835}
836#endif
837
838
839/**
840 * Checks if the current CPU supports CPUID.
841 *
842 * @returns true if CPUID is supported.
843 */
844DECLINLINE(bool) ASMHasCpuId(void)
845{
846#ifdef RT_ARCH_AMD64
847 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
848#else /* !RT_ARCH_AMD64 */
849 bool fRet = false;
850# if RT_INLINE_ASM_GNU_STYLE
851 uint32_t u1;
852 uint32_t u2;
853 __asm__ ("pushf\n\t"
854 "pop %1\n\t"
855 "mov %1, %2\n\t"
856 "xorl $0x200000, %1\n\t"
857 "push %1\n\t"
858 "popf\n\t"
859 "pushf\n\t"
860 "pop %1\n\t"
861 "cmpl %1, %2\n\t"
862 "setne %0\n\t"
863 "push %2\n\t"
864 "popf\n\t"
865 : "=m" (fRet), "=r" (u1), "=r" (u2));
866# else
867 __asm
868 {
869 pushfd
870 pop eax
871 mov ebx, eax
872 xor eax, 0200000h
873 push eax
874 popfd
875 pushfd
876 pop eax
877 cmp eax, ebx
878 setne fRet
879 push ebx
880 popfd
881 }
882# endif
883 return fRet;
884#endif /* !RT_ARCH_AMD64 */
885}
886
887
888/**
889 * Gets the APIC ID of the current CPU.
890 *
891 * @returns the APIC ID.
892 */
893#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
894DECLASM(uint8_t) ASMGetApicId(void);
895#else
896DECLINLINE(uint8_t) ASMGetApicId(void)
897{
898 RTCCUINTREG xBX;
899# if RT_INLINE_ASM_GNU_STYLE
900# ifdef RT_ARCH_AMD64
901 RTCCUINTREG uSpill;
902 __asm__ ("cpuid"
903 : "=a" (uSpill),
904 "=b" (xBX)
905 : "0" (1)
906 : "rcx", "rdx");
907# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
908 RTCCUINTREG uSpill;
909 __asm__ ("mov %%ebx,%1\n\t"
910 "cpuid\n\t"
911 "xchgl %%ebx,%1\n\t"
912 : "=a" (uSpill),
913 "=r" (xBX)
914 : "0" (1)
915 : "ecx", "edx");
916# else
917 RTCCUINTREG uSpill;
918 __asm__ ("cpuid"
919 : "=a" (uSpill),
920 "=b" (xBX)
921 : "0" (1)
922 : "ecx", "edx");
923# endif
924
925# elif RT_INLINE_ASM_USES_INTRIN
926 int aInfo[4];
927 __cpuid(aInfo, 1);
928 xBX = aInfo[1];
929
930# else
931 __asm
932 {
933 push ebx
934 mov eax, 1
935 cpuid
936 mov [xBX], ebx
937 pop ebx
938 }
939# endif
940 return (uint8_t)(xBX >> 24);
941}
942#endif
943
944
945/**
946 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
947 *
948 * @returns true/false.
949 * @param uEBX EBX return from ASMCpuId(0)
950 * @param uECX ECX return from ASMCpuId(0)
951 * @param uEDX EDX return from ASMCpuId(0)
952 */
953DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
954{
955 return uEBX == 0x756e6547
956 || uECX == 0x6c65746e
957 || uEDX == 0x49656e69;
958}
959
960
961/**
962 * Tests if this is an genuin Intel CPU.
963 *
964 * @returns true/false.
965 */
966DECLINLINE(bool) ASMIsIntelCpu(void)
967{
968 uint32_t uEAX, uEBX, uECX, uEDX;
969 ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX);
970 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
971}
972
973
974/**
975 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
976 *
977 * @returns Family.
978 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
979 */
980DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
981{
982 return ((uEAX >> 8) & 0xf) == 0xf
983 ? ((uEAX >> 20) & 0x7f) + 0xf
984 : ((uEAX >> 8) & 0xf);
985}
986
987
988/**
989 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
990 *
991 * @returns Model.
992 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
993 * @param fIntel Whether it's an intel CPU.
994 */
995DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
996{
997 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
998 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
999 : ((uEAX >> 4) & 0xf);
1000}
1001
1002
1003/**
1004 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1005 *
1006 * @returns Model.
1007 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1008 * @param fIntel Whether it's an intel CPU.
1009 */
1010DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1011{
1012 return ((uEAX >> 8) & 0xf) == 0xf
1013 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1014 : ((uEAX >> 4) & 0xf);
1015}
1016
1017
1018/**
1019 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1020 *
1021 * @returns Model.
1022 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1023 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1024 */
1025DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1026{
1027 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1028 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1029 : ((uEAX >> 4) & 0xf);
1030}
1031
1032
1033/**
1034 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1035 *
1036 * @returns Model.
1037 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1038 */
1039DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1040{
1041 return uEAX & 0xf;
1042}
1043
1044
1045/**
1046 * Get cr0.
1047 * @returns cr0.
1048 */
1049#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1050DECLASM(RTCCUINTREG) ASMGetCR0(void);
1051#else
1052DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1053{
1054 RTCCUINTREG uCR0;
1055# if RT_INLINE_ASM_USES_INTRIN
1056 uCR0 = __readcr0();
1057
1058# elif RT_INLINE_ASM_GNU_STYLE
1059# ifdef RT_ARCH_AMD64
1060 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1061# else
1062 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1063# endif
1064# else
1065 __asm
1066 {
1067# ifdef RT_ARCH_AMD64
1068 mov rax, cr0
1069 mov [uCR0], rax
1070# else
1071 mov eax, cr0
1072 mov [uCR0], eax
1073# endif
1074 }
1075# endif
1076 return uCR0;
1077}
1078#endif
1079
1080
1081/**
1082 * Sets the CR0 register.
1083 * @param uCR0 The new CR0 value.
1084 */
1085#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1086DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1087#else
1088DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1089{
1090# if RT_INLINE_ASM_USES_INTRIN
1091 __writecr0(uCR0);
1092
1093# elif RT_INLINE_ASM_GNU_STYLE
1094# ifdef RT_ARCH_AMD64
1095 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1096# else
1097 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1098# endif
1099# else
1100 __asm
1101 {
1102# ifdef RT_ARCH_AMD64
1103 mov rax, [uCR0]
1104 mov cr0, rax
1105# else
1106 mov eax, [uCR0]
1107 mov cr0, eax
1108# endif
1109 }
1110# endif
1111}
1112#endif
1113
1114
1115/**
1116 * Get cr2.
1117 * @returns cr2.
1118 */
1119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1120DECLASM(RTCCUINTREG) ASMGetCR2(void);
1121#else
1122DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1123{
1124 RTCCUINTREG uCR2;
1125# if RT_INLINE_ASM_USES_INTRIN
1126 uCR2 = __readcr2();
1127
1128# elif RT_INLINE_ASM_GNU_STYLE
1129# ifdef RT_ARCH_AMD64
1130 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1131# else
1132 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1133# endif
1134# else
1135 __asm
1136 {
1137# ifdef RT_ARCH_AMD64
1138 mov rax, cr2
1139 mov [uCR2], rax
1140# else
1141 mov eax, cr2
1142 mov [uCR2], eax
1143# endif
1144 }
1145# endif
1146 return uCR2;
1147}
1148#endif
1149
1150
1151/**
1152 * Sets the CR2 register.
1153 * @param uCR2 The new CR0 value.
1154 */
1155#if RT_INLINE_ASM_EXTERNAL
1156DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1157#else
1158DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1159{
1160# if RT_INLINE_ASM_GNU_STYLE
1161# ifdef RT_ARCH_AMD64
1162 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1163# else
1164 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1165# endif
1166# else
1167 __asm
1168 {
1169# ifdef RT_ARCH_AMD64
1170 mov rax, [uCR2]
1171 mov cr2, rax
1172# else
1173 mov eax, [uCR2]
1174 mov cr2, eax
1175# endif
1176 }
1177# endif
1178}
1179#endif
1180
1181
1182/**
1183 * Get cr3.
1184 * @returns cr3.
1185 */
1186#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1187DECLASM(RTCCUINTREG) ASMGetCR3(void);
1188#else
1189DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1190{
1191 RTCCUINTREG uCR3;
1192# if RT_INLINE_ASM_USES_INTRIN
1193 uCR3 = __readcr3();
1194
1195# elif RT_INLINE_ASM_GNU_STYLE
1196# ifdef RT_ARCH_AMD64
1197 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1198# else
1199 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1200# endif
1201# else
1202 __asm
1203 {
1204# ifdef RT_ARCH_AMD64
1205 mov rax, cr3
1206 mov [uCR3], rax
1207# else
1208 mov eax, cr3
1209 mov [uCR3], eax
1210# endif
1211 }
1212# endif
1213 return uCR3;
1214}
1215#endif
1216
1217
1218/**
1219 * Sets the CR3 register.
1220 *
1221 * @param uCR3 New CR3 value.
1222 */
1223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1224DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1225#else
1226DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1227{
1228# if RT_INLINE_ASM_USES_INTRIN
1229 __writecr3(uCR3);
1230
1231# elif RT_INLINE_ASM_GNU_STYLE
1232# ifdef RT_ARCH_AMD64
1233 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1234# else
1235 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1236# endif
1237# else
1238 __asm
1239 {
1240# ifdef RT_ARCH_AMD64
1241 mov rax, [uCR3]
1242 mov cr3, rax
1243# else
1244 mov eax, [uCR3]
1245 mov cr3, eax
1246# endif
1247 }
1248# endif
1249}
1250#endif
1251
1252
1253/**
1254 * Reloads the CR3 register.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(void) ASMReloadCR3(void);
1258#else
1259DECLINLINE(void) ASMReloadCR3(void)
1260{
1261# if RT_INLINE_ASM_USES_INTRIN
1262 __writecr3(__readcr3());
1263
1264# elif RT_INLINE_ASM_GNU_STYLE
1265 RTCCUINTREG u;
1266# ifdef RT_ARCH_AMD64
1267 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1268 "movq %0, %%cr3\n\t"
1269 : "=r" (u));
1270# else
1271 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1272 "movl %0, %%cr3\n\t"
1273 : "=r" (u));
1274# endif
1275# else
1276 __asm
1277 {
1278# ifdef RT_ARCH_AMD64
1279 mov rax, cr3
1280 mov cr3, rax
1281# else
1282 mov eax, cr3
1283 mov cr3, eax
1284# endif
1285 }
1286# endif
1287}
1288#endif
1289
1290
1291/**
1292 * Get cr4.
1293 * @returns cr4.
1294 */
1295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1296DECLASM(RTCCUINTREG) ASMGetCR4(void);
1297#else
1298DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1299{
1300 RTCCUINTREG uCR4;
1301# if RT_INLINE_ASM_USES_INTRIN
1302 uCR4 = __readcr4();
1303
1304# elif RT_INLINE_ASM_GNU_STYLE
1305# ifdef RT_ARCH_AMD64
1306 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1307# else
1308 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1309# endif
1310# else
1311 __asm
1312 {
1313# ifdef RT_ARCH_AMD64
1314 mov rax, cr4
1315 mov [uCR4], rax
1316# else
1317 push eax /* just in case */
1318 /*mov eax, cr4*/
1319 _emit 0x0f
1320 _emit 0x20
1321 _emit 0xe0
1322 mov [uCR4], eax
1323 pop eax
1324# endif
1325 }
1326# endif
1327 return uCR4;
1328}
1329#endif
1330
1331
1332/**
1333 * Sets the CR4 register.
1334 *
1335 * @param uCR4 New CR4 value.
1336 */
1337#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1338DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1339#else
1340DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1341{
1342# if RT_INLINE_ASM_USES_INTRIN
1343 __writecr4(uCR4);
1344
1345# elif RT_INLINE_ASM_GNU_STYLE
1346# ifdef RT_ARCH_AMD64
1347 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1348# else
1349 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1350# endif
1351# else
1352 __asm
1353 {
1354# ifdef RT_ARCH_AMD64
1355 mov rax, [uCR4]
1356 mov cr4, rax
1357# else
1358 mov eax, [uCR4]
1359 _emit 0x0F
1360 _emit 0x22
1361 _emit 0xE0 /* mov cr4, eax */
1362# endif
1363 }
1364# endif
1365}
1366#endif
1367
1368
1369/**
1370 * Get cr8.
1371 * @returns cr8.
1372 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1373 */
1374#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1375DECLASM(RTCCUINTREG) ASMGetCR8(void);
1376#else
1377DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1378{
1379# ifdef RT_ARCH_AMD64
1380 RTCCUINTREG uCR8;
1381# if RT_INLINE_ASM_USES_INTRIN
1382 uCR8 = __readcr8();
1383
1384# elif RT_INLINE_ASM_GNU_STYLE
1385 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1386# else
1387 __asm
1388 {
1389 mov rax, cr8
1390 mov [uCR8], rax
1391 }
1392# endif
1393 return uCR8;
1394# else /* !RT_ARCH_AMD64 */
1395 return 0;
1396# endif /* !RT_ARCH_AMD64 */
1397}
1398#endif
1399
1400
1401/**
1402 * Enables interrupts (EFLAGS.IF).
1403 */
1404#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1405DECLASM(void) ASMIntEnable(void);
1406#else
1407DECLINLINE(void) ASMIntEnable(void)
1408{
1409# if RT_INLINE_ASM_GNU_STYLE
1410 __asm("sti\n");
1411# elif RT_INLINE_ASM_USES_INTRIN
1412 _enable();
1413# else
1414 __asm sti
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Disables interrupts (!EFLAGS.IF).
1422 */
1423#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1424DECLASM(void) ASMIntDisable(void);
1425#else
1426DECLINLINE(void) ASMIntDisable(void)
1427{
1428# if RT_INLINE_ASM_GNU_STYLE
1429 __asm("cli\n");
1430# elif RT_INLINE_ASM_USES_INTRIN
1431 _disable();
1432# else
1433 __asm cli
1434# endif
1435}
1436#endif
1437
1438
1439/**
1440 * Disables interrupts and returns previous xFLAGS.
1441 */
1442#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1443DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1444#else
1445DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1446{
1447 RTCCUINTREG xFlags;
1448# if RT_INLINE_ASM_GNU_STYLE
1449# ifdef RT_ARCH_AMD64
1450 __asm__ __volatile__("pushfq\n\t"
1451 "cli\n\t"
1452 "popq %0\n\t"
1453 : "=rm" (xFlags));
1454# else
1455 __asm__ __volatile__("pushfl\n\t"
1456 "cli\n\t"
1457 "popl %0\n\t"
1458 : "=rm" (xFlags));
1459# endif
1460# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1461 xFlags = ASMGetFlags();
1462 _disable();
1463# else
1464 __asm {
1465 pushfd
1466 cli
1467 pop [xFlags]
1468 }
1469# endif
1470 return xFlags;
1471}
1472#endif
1473
1474
1475/**
1476 * Reads a machine specific register.
1477 *
1478 * @returns Register content.
1479 * @param uRegister Register to read.
1480 */
1481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1482DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1483#else
1484DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1485{
1486 RTUINT64U u;
1487# if RT_INLINE_ASM_GNU_STYLE
1488 __asm__ __volatile__("rdmsr\n\t"
1489 : "=a" (u.s.Lo),
1490 "=d" (u.s.Hi)
1491 : "c" (uRegister));
1492
1493# elif RT_INLINE_ASM_USES_INTRIN
1494 u.u = __readmsr(uRegister);
1495
1496# else
1497 __asm
1498 {
1499 mov ecx, [uRegister]
1500 rdmsr
1501 mov [u.s.Lo], eax
1502 mov [u.s.Hi], edx
1503 }
1504# endif
1505
1506 return u.u;
1507}
1508#endif
1509
1510
1511/**
1512 * Writes a machine specific register.
1513 *
1514 * @returns Register content.
1515 * @param uRegister Register to write to.
1516 * @param u64Val Value to write.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1519DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1520#else
1521DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1522{
1523 RTUINT64U u;
1524
1525 u.u = u64Val;
1526# if RT_INLINE_ASM_GNU_STYLE
1527 __asm__ __volatile__("wrmsr\n\t"
1528 ::"a" (u.s.Lo),
1529 "d" (u.s.Hi),
1530 "c" (uRegister));
1531
1532# elif RT_INLINE_ASM_USES_INTRIN
1533 __writemsr(uRegister, u.u);
1534
1535# else
1536 __asm
1537 {
1538 mov ecx, [uRegister]
1539 mov edx, [u.s.Hi]
1540 mov eax, [u.s.Lo]
1541 wrmsr
1542 }
1543# endif
1544}
1545#endif
1546
1547
1548/**
1549 * Reads low part of a machine specific register.
1550 *
1551 * @returns Register content.
1552 * @param uRegister Register to read.
1553 */
1554#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1555DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1556#else
1557DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1558{
1559 uint32_t u32;
1560# if RT_INLINE_ASM_GNU_STYLE
1561 __asm__ __volatile__("rdmsr\n\t"
1562 : "=a" (u32)
1563 : "c" (uRegister)
1564 : "edx");
1565
1566# elif RT_INLINE_ASM_USES_INTRIN
1567 u32 = (uint32_t)__readmsr(uRegister);
1568
1569#else
1570 __asm
1571 {
1572 mov ecx, [uRegister]
1573 rdmsr
1574 mov [u32], eax
1575 }
1576# endif
1577
1578 return u32;
1579}
1580#endif
1581
1582
1583/**
1584 * Reads high part of a machine specific register.
1585 *
1586 * @returns Register content.
1587 * @param uRegister Register to read.
1588 */
1589#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1590DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1591#else
1592DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1593{
1594 uint32_t u32;
1595# if RT_INLINE_ASM_GNU_STYLE
1596 __asm__ __volatile__("rdmsr\n\t"
1597 : "=d" (u32)
1598 : "c" (uRegister)
1599 : "eax");
1600
1601# elif RT_INLINE_ASM_USES_INTRIN
1602 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1603
1604# else
1605 __asm
1606 {
1607 mov ecx, [uRegister]
1608 rdmsr
1609 mov [u32], edx
1610 }
1611# endif
1612
1613 return u32;
1614}
1615#endif
1616
1617
1618/**
1619 * Gets dr7.
1620 *
1621 * @returns dr7.
1622 */
1623#if RT_INLINE_ASM_EXTERNAL
1624DECLASM(RTCCUINTREG) ASMGetDR7(void);
1625#else
1626DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1627{
1628 RTCCUINTREG uDR7;
1629# if RT_INLINE_ASM_GNU_STYLE
1630# ifdef RT_ARCH_AMD64
1631 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1632# else
1633 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1634# endif
1635# else
1636 __asm
1637 {
1638# ifdef RT_ARCH_AMD64
1639 mov rax, dr7
1640 mov [uDR7], rax
1641# else
1642 mov eax, dr7
1643 mov [uDR7], eax
1644# endif
1645 }
1646# endif
1647 return uDR7;
1648}
1649#endif
1650
1651
1652/**
1653 * Gets dr6.
1654 *
1655 * @returns dr6.
1656 */
1657#if RT_INLINE_ASM_EXTERNAL
1658DECLASM(RTCCUINTREG) ASMGetDR6(void);
1659#else
1660DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1661{
1662 RTCCUINTREG uDR6;
1663# if RT_INLINE_ASM_GNU_STYLE
1664# ifdef RT_ARCH_AMD64
1665 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1666# else
1667 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1668# endif
1669# else
1670 __asm
1671 {
1672# ifdef RT_ARCH_AMD64
1673 mov rax, dr6
1674 mov [uDR6], rax
1675# else
1676 mov eax, dr6
1677 mov [uDR6], eax
1678# endif
1679 }
1680# endif
1681 return uDR6;
1682}
1683#endif
1684
1685
1686/**
1687 * Reads and clears DR6.
1688 *
1689 * @returns DR6.
1690 */
1691#if RT_INLINE_ASM_EXTERNAL
1692DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1693#else
1694DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1695{
1696 RTCCUINTREG uDR6;
1697# if RT_INLINE_ASM_GNU_STYLE
1698 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1699# ifdef RT_ARCH_AMD64
1700 __asm__ __volatile__("movq %%dr6, %0\n\t"
1701 "movq %1, %%dr6\n\t"
1702 : "=r" (uDR6)
1703 : "r" (uNewValue));
1704# else
1705 __asm__ __volatile__("movl %%dr6, %0\n\t"
1706 "movl %1, %%dr6\n\t"
1707 : "=r" (uDR6)
1708 : "r" (uNewValue));
1709# endif
1710# else
1711 __asm
1712 {
1713# ifdef RT_ARCH_AMD64
1714 mov rax, dr6
1715 mov [uDR6], rax
1716 mov rcx, rax
1717 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1718 mov dr6, rcx
1719# else
1720 mov eax, dr6
1721 mov [uDR6], eax
1722 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1723 mov dr6, ecx
1724# endif
1725 }
1726# endif
1727 return uDR6;
1728}
1729#endif
1730
1731/**
1732 * Gets dr0.
1733 *
1734 * @returns dr0.
1735 */
1736#if RT_INLINE_ASM_EXTERNAL
1737DECLASM(RTCCUINTREG) ASMGetDR0(void);
1738#else
1739DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1740{
1741 RTCCUINTREG uDR0;
1742# if RT_INLINE_ASM_GNU_STYLE
1743# ifdef RT_ARCH_AMD64
1744 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1745# else
1746 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1747# endif
1748# else
1749 __asm
1750 {
1751# ifdef RT_ARCH_AMD64
1752 mov rax, dr0
1753 mov [uDR0], rax
1754# else
1755 mov eax, dr0
1756 mov [uDR0], eax
1757# endif
1758 }
1759# endif
1760 return uDR0;
1761}
1762#endif
1763
1764
1765/**
1766 * Gets dr1.
1767 *
1768 * @returns dr1.
1769 */
1770#if RT_INLINE_ASM_EXTERNAL
1771DECLASM(RTCCUINTREG) ASMGetDR1(void);
1772#else
1773DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1774{
1775 RTCCUINTREG uDR1;
1776# if RT_INLINE_ASM_GNU_STYLE
1777# ifdef RT_ARCH_AMD64
1778 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1779# else
1780 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1781# endif
1782# else
1783 __asm
1784 {
1785# ifdef RT_ARCH_AMD64
1786 mov rax, dr1
1787 mov [uDR1], rax
1788# else
1789 mov eax, dr1
1790 mov [uDR1], eax
1791# endif
1792 }
1793# endif
1794 return uDR1;
1795}
1796#endif
1797
1798/**
1799 * Gets dr2.
1800 *
1801 * @returns dr2.
1802 */
1803#if RT_INLINE_ASM_EXTERNAL
1804DECLASM(RTCCUINTREG) ASMGetDR2(void);
1805#else
1806DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1807{
1808 RTCCUINTREG uDR2;
1809# if RT_INLINE_ASM_GNU_STYLE
1810# ifdef RT_ARCH_AMD64
1811 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1812# else
1813 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1814# endif
1815# else
1816 __asm
1817 {
1818# ifdef RT_ARCH_AMD64
1819 mov rax, dr2
1820 mov [uDR2], rax
1821# else
1822 mov eax, dr2
1823 mov [uDR2], eax
1824# endif
1825 }
1826# endif
1827 return uDR2;
1828}
1829#endif
1830
1831/**
1832 * Gets dr3.
1833 *
1834 * @returns dr3.
1835 */
1836#if RT_INLINE_ASM_EXTERNAL
1837DECLASM(RTCCUINTREG) ASMGetDR3(void);
1838#else
1839DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1840{
1841 RTCCUINTREG uDR3;
1842# if RT_INLINE_ASM_GNU_STYLE
1843# ifdef RT_ARCH_AMD64
1844 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1845# else
1846 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1847# endif
1848# else
1849 __asm
1850 {
1851# ifdef RT_ARCH_AMD64
1852 mov rax, dr3
1853 mov [uDR3], rax
1854# else
1855 mov eax, dr3
1856 mov [uDR3], eax
1857# endif
1858 }
1859# endif
1860 return uDR3;
1861}
1862#endif
1863
1864/**
1865 * Compiler memory barrier.
1866 *
1867 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1868 * values or any outstanding writes when returning from this function.
1869 *
1870 * This function must be used if non-volatile data is modified by a
1871 * device or the VMM. Typical cases are port access, MMIO access,
1872 * trapping instruction, etc.
1873 */
1874#if RT_INLINE_ASM_GNU_STYLE
1875# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1876#elif RT_INLINE_ASM_USES_INTRIN
1877# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1878#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1879DECLINLINE(void) ASMCompilerBarrier(void)
1880{
1881 __asm
1882 {
1883 }
1884}
1885#endif
1886
1887
1888/**
1889 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1890 *
1891 * @param Port I/O port to read from.
1892 * @param u8 8-bit integer to write.
1893 */
1894#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1895DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1896#else
1897DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1898{
1899# if RT_INLINE_ASM_GNU_STYLE
1900 __asm__ __volatile__("outb %b1, %w0\n\t"
1901 :: "Nd" (Port),
1902 "a" (u8));
1903
1904# elif RT_INLINE_ASM_USES_INTRIN
1905 __outbyte(Port, u8);
1906
1907# else
1908 __asm
1909 {
1910 mov dx, [Port]
1911 mov al, [u8]
1912 out dx, al
1913 }
1914# endif
1915}
1916#endif
1917
1918
1919/**
1920 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1921 *
1922 * @returns 8-bit integer.
1923 * @param Port I/O port to read from.
1924 */
1925#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1926DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1927#else
1928DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1929{
1930 uint8_t u8;
1931# if RT_INLINE_ASM_GNU_STYLE
1932 __asm__ __volatile__("inb %w1, %b0\n\t"
1933 : "=a" (u8)
1934 : "Nd" (Port));
1935
1936# elif RT_INLINE_ASM_USES_INTRIN
1937 u8 = __inbyte(Port);
1938
1939# else
1940 __asm
1941 {
1942 mov dx, [Port]
1943 in al, dx
1944 mov [u8], al
1945 }
1946# endif
1947 return u8;
1948}
1949#endif
1950
1951
1952/**
1953 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1954 *
1955 * @param Port I/O port to read from.
1956 * @param u16 16-bit integer to write.
1957 */
1958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1959DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1960#else
1961DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1962{
1963# if RT_INLINE_ASM_GNU_STYLE
1964 __asm__ __volatile__("outw %w1, %w0\n\t"
1965 :: "Nd" (Port),
1966 "a" (u16));
1967
1968# elif RT_INLINE_ASM_USES_INTRIN
1969 __outword(Port, u16);
1970
1971# else
1972 __asm
1973 {
1974 mov dx, [Port]
1975 mov ax, [u16]
1976 out dx, ax
1977 }
1978# endif
1979}
1980#endif
1981
1982
1983/**
1984 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1985 *
1986 * @returns 16-bit integer.
1987 * @param Port I/O port to read from.
1988 */
1989#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1990DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1991#else
1992DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1993{
1994 uint16_t u16;
1995# if RT_INLINE_ASM_GNU_STYLE
1996 __asm__ __volatile__("inw %w1, %w0\n\t"
1997 : "=a" (u16)
1998 : "Nd" (Port));
1999
2000# elif RT_INLINE_ASM_USES_INTRIN
2001 u16 = __inword(Port);
2002
2003# else
2004 __asm
2005 {
2006 mov dx, [Port]
2007 in ax, dx
2008 mov [u16], ax
2009 }
2010# endif
2011 return u16;
2012}
2013#endif
2014
2015
2016/**
2017 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2018 *
2019 * @param Port I/O port to read from.
2020 * @param u32 32-bit integer to write.
2021 */
2022#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2023DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2024#else
2025DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2026{
2027# if RT_INLINE_ASM_GNU_STYLE
2028 __asm__ __volatile__("outl %1, %w0\n\t"
2029 :: "Nd" (Port),
2030 "a" (u32));
2031
2032# elif RT_INLINE_ASM_USES_INTRIN
2033 __outdword(Port, u32);
2034
2035# else
2036 __asm
2037 {
2038 mov dx, [Port]
2039 mov eax, [u32]
2040 out dx, eax
2041 }
2042# endif
2043}
2044#endif
2045
2046
2047/**
2048 * Gets a 32-bit unsigned integer from an I/O port, ordered.
2049 *
2050 * @returns 32-bit integer.
2051 * @param Port I/O port to read from.
2052 */
2053#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2054DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2055#else
2056DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2057{
2058 uint32_t u32;
2059# if RT_INLINE_ASM_GNU_STYLE
2060 __asm__ __volatile__("inl %w1, %0\n\t"
2061 : "=a" (u32)
2062 : "Nd" (Port));
2063
2064# elif RT_INLINE_ASM_USES_INTRIN
2065 u32 = __indword(Port);
2066
2067# else
2068 __asm
2069 {
2070 mov dx, [Port]
2071 in eax, dx
2072 mov [u32], eax
2073 }
2074# endif
2075 return u32;
2076}
2077#endif
2078
2079/** @todo string i/o */
2080
2081
2082/**
2083 * Atomically Exchange an unsigned 8-bit value, ordered.
2084 *
2085 * @returns Current *pu8 value
2086 * @param pu8 Pointer to the 8-bit variable to update.
2087 * @param u8 The 8-bit value to assign to *pu8.
2088 */
2089#if RT_INLINE_ASM_EXTERNAL
2090DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2091#else
2092DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2093{
2094# if RT_INLINE_ASM_GNU_STYLE
2095 __asm__ __volatile__("xchgb %0, %1\n\t"
2096 : "=m" (*pu8),
2097 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2098 : "1" (u8));
2099# else
2100 __asm
2101 {
2102# ifdef RT_ARCH_AMD64
2103 mov rdx, [pu8]
2104 mov al, [u8]
2105 xchg [rdx], al
2106 mov [u8], al
2107# else
2108 mov edx, [pu8]
2109 mov al, [u8]
2110 xchg [edx], al
2111 mov [u8], al
2112# endif
2113 }
2114# endif
2115 return u8;
2116}
2117#endif
2118
2119
2120/**
2121 * Atomically Exchange a signed 8-bit value, ordered.
2122 *
2123 * @returns Current *pu8 value
2124 * @param pi8 Pointer to the 8-bit variable to update.
2125 * @param i8 The 8-bit value to assign to *pi8.
2126 */
2127DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2128{
2129 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2130}
2131
2132
2133/**
2134 * Atomically Exchange a bool value, ordered.
2135 *
2136 * @returns Current *pf value
2137 * @param pf Pointer to the 8-bit variable to update.
2138 * @param f The 8-bit value to assign to *pi8.
2139 */
2140DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2141{
2142#ifdef _MSC_VER
2143 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2144#else
2145 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2146#endif
2147}
2148
2149
2150/**
2151 * Atomically Exchange an unsigned 16-bit value, ordered.
2152 *
2153 * @returns Current *pu16 value
2154 * @param pu16 Pointer to the 16-bit variable to update.
2155 * @param u16 The 16-bit value to assign to *pu16.
2156 */
2157#if RT_INLINE_ASM_EXTERNAL
2158DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2159#else
2160DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2161{
2162# if RT_INLINE_ASM_GNU_STYLE
2163 __asm__ __volatile__("xchgw %0, %1\n\t"
2164 : "=m" (*pu16),
2165 "=r" (u16)
2166 : "1" (u16));
2167# else
2168 __asm
2169 {
2170# ifdef RT_ARCH_AMD64
2171 mov rdx, [pu16]
2172 mov ax, [u16]
2173 xchg [rdx], ax
2174 mov [u16], ax
2175# else
2176 mov edx, [pu16]
2177 mov ax, [u16]
2178 xchg [edx], ax
2179 mov [u16], ax
2180# endif
2181 }
2182# endif
2183 return u16;
2184}
2185#endif
2186
2187
2188/**
2189 * Atomically Exchange a signed 16-bit value, ordered.
2190 *
2191 * @returns Current *pu16 value
2192 * @param pi16 Pointer to the 16-bit variable to update.
2193 * @param i16 The 16-bit value to assign to *pi16.
2194 */
2195DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2196{
2197 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2198}
2199
2200
2201/**
2202 * Atomically Exchange an unsigned 32-bit value, ordered.
2203 *
2204 * @returns Current *pu32 value
2205 * @param pu32 Pointer to the 32-bit variable to update.
2206 * @param u32 The 32-bit value to assign to *pu32.
2207 */
2208#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2209DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2210#else
2211DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2212{
2213# if RT_INLINE_ASM_GNU_STYLE
2214 __asm__ __volatile__("xchgl %0, %1\n\t"
2215 : "=m" (*pu32),
2216 "=r" (u32)
2217 : "1" (u32));
2218
2219# elif RT_INLINE_ASM_USES_INTRIN
2220 u32 = _InterlockedExchange((long *)pu32, u32);
2221
2222# else
2223 __asm
2224 {
2225# ifdef RT_ARCH_AMD64
2226 mov rdx, [pu32]
2227 mov eax, u32
2228 xchg [rdx], eax
2229 mov [u32], eax
2230# else
2231 mov edx, [pu32]
2232 mov eax, u32
2233 xchg [edx], eax
2234 mov [u32], eax
2235# endif
2236 }
2237# endif
2238 return u32;
2239}
2240#endif
2241
2242
2243/**
2244 * Atomically Exchange a signed 32-bit value, ordered.
2245 *
2246 * @returns Current *pu32 value
2247 * @param pi32 Pointer to the 32-bit variable to update.
2248 * @param i32 The 32-bit value to assign to *pi32.
2249 */
2250DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2251{
2252 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2253}
2254
2255
2256/**
2257 * Atomically Exchange an unsigned 64-bit value, ordered.
2258 *
2259 * @returns Current *pu64 value
2260 * @param pu64 Pointer to the 64-bit variable to update.
2261 * @param u64 The 64-bit value to assign to *pu64.
2262 */
2263#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2264DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2265#else
2266DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2267{
2268# if defined(RT_ARCH_AMD64)
2269# if RT_INLINE_ASM_USES_INTRIN
2270 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2271
2272# elif RT_INLINE_ASM_GNU_STYLE
2273 __asm__ __volatile__("xchgq %0, %1\n\t"
2274 : "=m" (*pu64),
2275 "=r" (u64)
2276 : "1" (u64));
2277# else
2278 __asm
2279 {
2280 mov rdx, [pu64]
2281 mov rax, [u64]
2282 xchg [rdx], rax
2283 mov [u64], rax
2284 }
2285# endif
2286# else /* !RT_ARCH_AMD64 */
2287# if RT_INLINE_ASM_GNU_STYLE
2288# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2289 uint32_t u32EBX = (uint32_t)u64;
2290 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2291 "xchgl %%ebx, %3\n\t"
2292 "1:\n\t"
2293 "lock; cmpxchg8b (%5)\n\t"
2294 "jnz 1b\n\t"
2295 "movl %3, %%ebx\n\t"
2296 /*"xchgl %%esi, %5\n\t"*/
2297 : "=A" (u64),
2298 "=m" (*pu64)
2299 : "0" (*pu64),
2300 "m" ( u32EBX ),
2301 "c" ( (uint32_t)(u64 >> 32) ),
2302 "S" (pu64) );
2303# else /* !PIC */
2304 __asm__ __volatile__("1:\n\t"
2305 "lock; cmpxchg8b %1\n\t"
2306 "jnz 1b\n\t"
2307 : "=A" (u64),
2308 "=m" (*pu64)
2309 : "0" (*pu64),
2310 "b" ( (uint32_t)u64 ),
2311 "c" ( (uint32_t)(u64 >> 32) ));
2312# endif
2313# else
2314 __asm
2315 {
2316 mov ebx, dword ptr [u64]
2317 mov ecx, dword ptr [u64 + 4]
2318 mov edi, pu64
2319 mov eax, dword ptr [edi]
2320 mov edx, dword ptr [edi + 4]
2321 retry:
2322 lock cmpxchg8b [edi]
2323 jnz retry
2324 mov dword ptr [u64], eax
2325 mov dword ptr [u64 + 4], edx
2326 }
2327# endif
2328# endif /* !RT_ARCH_AMD64 */
2329 return u64;
2330}
2331#endif
2332
2333
2334/**
2335 * Atomically Exchange an signed 64-bit value, ordered.
2336 *
2337 * @returns Current *pi64 value
2338 * @param pi64 Pointer to the 64-bit variable to update.
2339 * @param i64 The 64-bit value to assign to *pi64.
2340 */
2341DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2342{
2343 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2344}
2345
2346
2347#ifdef RT_ARCH_AMD64
2348/**
2349 * Atomically Exchange an unsigned 128-bit value, ordered.
2350 *
2351 * @returns Current *pu128.
2352 * @param pu128 Pointer to the 128-bit variable to update.
2353 * @param u128 The 128-bit value to assign to *pu128.
2354 *
2355 * @remark We cannot really assume that any hardware supports this. Nor do I have
2356 * GAS support for it. So, for the time being we'll BREAK the atomic
2357 * bit of this function and use two 64-bit exchanges instead.
2358 */
2359# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2360DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2361# else
2362DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2363{
2364 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2365 {
2366 /** @todo this is clumsy code */
2367 RTUINT128U u128Ret;
2368 u128Ret.u = u128;
2369 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2370 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2371 return u128Ret.u;
2372 }
2373#if 0 /* later? */
2374 else
2375 {
2376# if RT_INLINE_ASM_GNU_STYLE
2377 __asm__ __volatile__("1:\n\t"
2378 "lock; cmpxchg8b %1\n\t"
2379 "jnz 1b\n\t"
2380 : "=A" (u128),
2381 "=m" (*pu128)
2382 : "0" (*pu128),
2383 "b" ( (uint64_t)u128 ),
2384 "c" ( (uint64_t)(u128 >> 64) ));
2385# else
2386 __asm
2387 {
2388 mov rbx, dword ptr [u128]
2389 mov rcx, dword ptr [u128 + 8]
2390 mov rdi, pu128
2391 mov rax, dword ptr [rdi]
2392 mov rdx, dword ptr [rdi + 8]
2393 retry:
2394 lock cmpxchg16b [rdi]
2395 jnz retry
2396 mov dword ptr [u128], rax
2397 mov dword ptr [u128 + 8], rdx
2398 }
2399# endif
2400 }
2401 return u128;
2402#endif
2403}
2404# endif
2405#endif /* RT_ARCH_AMD64 */
2406
2407
2408/**
2409 * Atomically Exchange a pointer value, ordered.
2410 *
2411 * @returns Current *ppv value
2412 * @param ppv Pointer to the pointer variable to update.
2413 * @param pv The pointer value to assign to *ppv.
2414 */
2415DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2416{
2417#if ARCH_BITS == 32
2418 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2419#elif ARCH_BITS == 64
2420 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2421#else
2422# error "ARCH_BITS is bogus"
2423#endif
2424}
2425
2426
2427/** @def ASMAtomicXchgHandle
2428 * Atomically Exchange a typical IPRT handle value, ordered.
2429 *
2430 * @param ph Pointer to the value to update.
2431 * @param hNew The new value to assigned to *pu.
2432 * @param phRes Where to store the current *ph value.
2433 *
2434 * @remarks This doesn't currently work for all handles (like RTFILE).
2435 */
2436#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2437 do { \
2438 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (void *)(hNew)); \
2439 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2440 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2441 } while (0)
2442
2443
2444/**
2445 * Atomically Exchange a value which size might differ
2446 * between platforms or compilers, ordered.
2447 *
2448 * @param pu Pointer to the variable to update.
2449 * @param uNew The value to assign to *pu.
2450 * @todo This is busted as its missing the result argument.
2451 */
2452#define ASMAtomicXchgSize(pu, uNew) \
2453 do { \
2454 switch (sizeof(*(pu))) { \
2455 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2456 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2457 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2458 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2459 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2460 } \
2461 } while (0)
2462
2463/**
2464 * Atomically Exchange a value which size might differ
2465 * between platforms or compilers, ordered.
2466 *
2467 * @param pu Pointer to the variable to update.
2468 * @param uNew The value to assign to *pu.
2469 * @param puRes Where to store the current *pu value.
2470 */
2471#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2472 do { \
2473 switch (sizeof(*(pu))) { \
2474 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2475 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2476 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2477 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2478 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2479 } \
2480 } while (0)
2481
2482
2483/**
2484 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2485 *
2486 * @returns true if xchg was done.
2487 * @returns false if xchg wasn't done.
2488 *
2489 * @param pu32 Pointer to the value to update.
2490 * @param u32New The new value to assigned to *pu32.
2491 * @param u32Old The old value to *pu32 compare with.
2492 */
2493#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2494DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2495#else
2496DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2497{
2498# if RT_INLINE_ASM_GNU_STYLE
2499 uint8_t u8Ret;
2500 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2501 "setz %1\n\t"
2502 : "=m" (*pu32),
2503 "=qm" (u8Ret),
2504 "=a" (u32Old)
2505 : "r" (u32New),
2506 "2" (u32Old));
2507 return (bool)u8Ret;
2508
2509# elif RT_INLINE_ASM_USES_INTRIN
2510 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2511
2512# else
2513 uint32_t u32Ret;
2514 __asm
2515 {
2516# ifdef RT_ARCH_AMD64
2517 mov rdx, [pu32]
2518# else
2519 mov edx, [pu32]
2520# endif
2521 mov eax, [u32Old]
2522 mov ecx, [u32New]
2523# ifdef RT_ARCH_AMD64
2524 lock cmpxchg [rdx], ecx
2525# else
2526 lock cmpxchg [edx], ecx
2527# endif
2528 setz al
2529 movzx eax, al
2530 mov [u32Ret], eax
2531 }
2532 return !!u32Ret;
2533# endif
2534}
2535#endif
2536
2537
2538/**
2539 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2540 *
2541 * @returns true if xchg was done.
2542 * @returns false if xchg wasn't done.
2543 *
2544 * @param pi32 Pointer to the value to update.
2545 * @param i32New The new value to assigned to *pi32.
2546 * @param i32Old The old value to *pi32 compare with.
2547 */
2548DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2549{
2550 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2551}
2552
2553
2554/**
2555 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2556 *
2557 * @returns true if xchg was done.
2558 * @returns false if xchg wasn't done.
2559 *
2560 * @param pu64 Pointer to the 64-bit variable to update.
2561 * @param u64New The 64-bit value to assign to *pu64.
2562 * @param u64Old The value to compare with.
2563 */
2564#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2565DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2566#else
2567DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2568{
2569# if RT_INLINE_ASM_USES_INTRIN
2570 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2571
2572# elif defined(RT_ARCH_AMD64)
2573# if RT_INLINE_ASM_GNU_STYLE
2574 uint8_t u8Ret;
2575 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2576 "setz %1\n\t"
2577 : "=m" (*pu64),
2578 "=qm" (u8Ret),
2579 "=a" (u64Old)
2580 : "r" (u64New),
2581 "2" (u64Old));
2582 return (bool)u8Ret;
2583# else
2584 bool fRet;
2585 __asm
2586 {
2587 mov rdx, [pu32]
2588 mov rax, [u64Old]
2589 mov rcx, [u64New]
2590 lock cmpxchg [rdx], rcx
2591 setz al
2592 mov [fRet], al
2593 }
2594 return fRet;
2595# endif
2596# else /* !RT_ARCH_AMD64 */
2597 uint32_t u32Ret;
2598# if RT_INLINE_ASM_GNU_STYLE
2599# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2600 uint32_t u32EBX = (uint32_t)u64New;
2601 uint32_t u32Spill;
2602 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2603 "lock; cmpxchg8b (%6)\n\t"
2604 "setz %%al\n\t"
2605 "movl %4, %%ebx\n\t"
2606 "movzbl %%al, %%eax\n\t"
2607 : "=a" (u32Ret),
2608 "=d" (u32Spill),
2609 "=m" (*pu64)
2610 : "A" (u64Old),
2611 "m" ( u32EBX ),
2612 "c" ( (uint32_t)(u64New >> 32) ),
2613 "S" (pu64) );
2614# else /* !PIC */
2615 uint32_t u32Spill;
2616 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2617 "setz %%al\n\t"
2618 "movzbl %%al, %%eax\n\t"
2619 : "=a" (u32Ret),
2620 "=d" (u32Spill),
2621 "=m" (*pu64)
2622 : "A" (u64Old),
2623 "b" ( (uint32_t)u64New ),
2624 "c" ( (uint32_t)(u64New >> 32) ));
2625# endif
2626 return (bool)u32Ret;
2627# else
2628 __asm
2629 {
2630 mov ebx, dword ptr [u64New]
2631 mov ecx, dword ptr [u64New + 4]
2632 mov edi, [pu64]
2633 mov eax, dword ptr [u64Old]
2634 mov edx, dword ptr [u64Old + 4]
2635 lock cmpxchg8b [edi]
2636 setz al
2637 movzx eax, al
2638 mov dword ptr [u32Ret], eax
2639 }
2640 return !!u32Ret;
2641# endif
2642# endif /* !RT_ARCH_AMD64 */
2643}
2644#endif
2645
2646
2647/**
2648 * Atomically Compare and exchange a signed 64-bit value, ordered.
2649 *
2650 * @returns true if xchg was done.
2651 * @returns false if xchg wasn't done.
2652 *
2653 * @param pi64 Pointer to the 64-bit variable to update.
2654 * @param i64 The 64-bit value to assign to *pu64.
2655 * @param i64Old The value to compare with.
2656 */
2657DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2658{
2659 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2660}
2661
2662
2663/**
2664 * Atomically Compare and Exchange a pointer value, ordered.
2665 *
2666 * @returns true if xchg was done.
2667 * @returns false if xchg wasn't done.
2668 *
2669 * @param ppv Pointer to the value to update.
2670 * @param pvNew The new value to assigned to *ppv.
2671 * @param pvOld The old value to *ppv compare with.
2672 */
2673DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2674{
2675#if ARCH_BITS == 32
2676 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2677#elif ARCH_BITS == 64
2678 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2679#else
2680# error "ARCH_BITS is bogus"
2681#endif
2682}
2683
2684
2685/** @def ASMAtomicCmpXchgHandle
2686 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2687 *
2688 * @param ph Pointer to the value to update.
2689 * @param hNew The new value to assigned to *pu.
2690 * @param hOld The old value to *pu compare with.
2691 * @param fRc Where to store the result.
2692 *
2693 * @remarks This doesn't currently work for all handles (like RTFILE).
2694 */
2695#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
2696 do { \
2697 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
2698 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2699 } while (0)
2700
2701
2702/** @def ASMAtomicCmpXchgSize
2703 * Atomically Compare and Exchange a value which size might differ
2704 * between platforms or compilers, ordered.
2705 *
2706 * @param pu Pointer to the value to update.
2707 * @param uNew The new value to assigned to *pu.
2708 * @param uOld The old value to *pu compare with.
2709 * @param fRc Where to store the result.
2710 */
2711#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2712 do { \
2713 switch (sizeof(*(pu))) { \
2714 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2715 break; \
2716 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2717 break; \
2718 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2719 (fRc) = false; \
2720 break; \
2721 } \
2722 } while (0)
2723
2724
2725/**
2726 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2727 * passes back old value, ordered.
2728 *
2729 * @returns true if xchg was done.
2730 * @returns false if xchg wasn't done.
2731 *
2732 * @param pu32 Pointer to the value to update.
2733 * @param u32New The new value to assigned to *pu32.
2734 * @param u32Old The old value to *pu32 compare with.
2735 * @param pu32Old Pointer store the old value at.
2736 */
2737#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2738DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2739#else
2740DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2741{
2742# if RT_INLINE_ASM_GNU_STYLE
2743 uint8_t u8Ret;
2744 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2745 "setz %1\n\t"
2746 : "=m" (*pu32),
2747 "=qm" (u8Ret),
2748 "=a" (*pu32Old)
2749 : "r" (u32New),
2750 "a" (u32Old));
2751 return (bool)u8Ret;
2752
2753# elif RT_INLINE_ASM_USES_INTRIN
2754 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2755
2756# else
2757 uint32_t u32Ret;
2758 __asm
2759 {
2760# ifdef RT_ARCH_AMD64
2761 mov rdx, [pu32]
2762# else
2763 mov edx, [pu32]
2764# endif
2765 mov eax, [u32Old]
2766 mov ecx, [u32New]
2767# ifdef RT_ARCH_AMD64
2768 lock cmpxchg [rdx], ecx
2769 mov rdx, [pu32Old]
2770 mov [rdx], eax
2771# else
2772 lock cmpxchg [edx], ecx
2773 mov edx, [pu32Old]
2774 mov [edx], eax
2775# endif
2776 setz al
2777 movzx eax, al
2778 mov [u32Ret], eax
2779 }
2780 return !!u32Ret;
2781# endif
2782}
2783#endif
2784
2785
2786/**
2787 * Atomically Compare and Exchange a signed 32-bit value, additionally
2788 * passes back old value, ordered.
2789 *
2790 * @returns true if xchg was done.
2791 * @returns false if xchg wasn't done.
2792 *
2793 * @param pi32 Pointer to the value to update.
2794 * @param i32New The new value to assigned to *pi32.
2795 * @param i32Old The old value to *pi32 compare with.
2796 * @param pi32Old Pointer store the old value at.
2797 */
2798DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2799{
2800 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2801}
2802
2803
2804/**
2805 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2806 * passing back old value, ordered.
2807 *
2808 * @returns true if xchg was done.
2809 * @returns false if xchg wasn't done.
2810 *
2811 * @param pu64 Pointer to the 64-bit variable to update.
2812 * @param u64New The 64-bit value to assign to *pu64.
2813 * @param u64Old The value to compare with.
2814 * @param pu64Old Pointer store the old value at.
2815 */
2816#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2817DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2818#else
2819DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2820{
2821# if RT_INLINE_ASM_USES_INTRIN
2822 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2823
2824# elif defined(RT_ARCH_AMD64)
2825# if RT_INLINE_ASM_GNU_STYLE
2826 uint8_t u8Ret;
2827 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2828 "setz %1\n\t"
2829 : "=m" (*pu64),
2830 "=qm" (u8Ret),
2831 "=a" (*pu64Old)
2832 : "r" (u64New),
2833 "a" (u64Old));
2834 return (bool)u8Ret;
2835# else
2836 bool fRet;
2837 __asm
2838 {
2839 mov rdx, [pu32]
2840 mov rax, [u64Old]
2841 mov rcx, [u64New]
2842 lock cmpxchg [rdx], rcx
2843 mov rdx, [pu64Old]
2844 mov [rdx], rax
2845 setz al
2846 mov [fRet], al
2847 }
2848 return fRet;
2849# endif
2850# else /* !RT_ARCH_AMD64 */
2851# if RT_INLINE_ASM_GNU_STYLE
2852 uint64_t u64Ret;
2853# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2854 /* NB: this code uses a memory clobber description, because the clean
2855 * solution with an output value for *pu64 makes gcc run out of registers.
2856 * This will cause suboptimal code, and anyone with a better solution is
2857 * welcome to improve this. */
2858 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2859 "lock; cmpxchg8b %3\n\t"
2860 "xchgl %%ebx, %1\n\t"
2861 : "=A" (u64Ret)
2862 : "DS" ((uint32_t)u64New),
2863 "c" ((uint32_t)(u64New >> 32)),
2864 "m" (*pu64),
2865 "0" (u64Old)
2866 : "memory" );
2867# else /* !PIC */
2868 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2869 : "=A" (u64Ret),
2870 "=m" (*pu64)
2871 : "b" ((uint32_t)u64New),
2872 "c" ((uint32_t)(u64New >> 32)),
2873 "m" (*pu64),
2874 "0" (u64Old));
2875# endif
2876 *pu64Old = u64Ret;
2877 return u64Ret == u64Old;
2878# else
2879 uint32_t u32Ret;
2880 __asm
2881 {
2882 mov ebx, dword ptr [u64New]
2883 mov ecx, dword ptr [u64New + 4]
2884 mov edi, [pu64]
2885 mov eax, dword ptr [u64Old]
2886 mov edx, dword ptr [u64Old + 4]
2887 lock cmpxchg8b [edi]
2888 mov ebx, [pu64Old]
2889 mov [ebx], eax
2890 setz al
2891 movzx eax, al
2892 add ebx, 4
2893 mov [ebx], edx
2894 mov dword ptr [u32Ret], eax
2895 }
2896 return !!u32Ret;
2897# endif
2898# endif /* !RT_ARCH_AMD64 */
2899}
2900#endif
2901
2902
2903/**
2904 * Atomically Compare and exchange a signed 64-bit value, additionally
2905 * passing back old value, ordered.
2906 *
2907 * @returns true if xchg was done.
2908 * @returns false if xchg wasn't done.
2909 *
2910 * @param pi64 Pointer to the 64-bit variable to update.
2911 * @param i64 The 64-bit value to assign to *pu64.
2912 * @param i64Old The value to compare with.
2913 * @param pi64Old Pointer store the old value at.
2914 */
2915DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2916{
2917 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2918}
2919
2920/** @def ASMAtomicCmpXchgExHandle
2921 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2922 *
2923 * @param ph Pointer to the value to update.
2924 * @param hNew The new value to assigned to *pu.
2925 * @param hOld The old value to *pu compare with.
2926 * @param fRc Where to store the result.
2927 * @param phOldVal Pointer to where to store the old value.
2928 *
2929 * @remarks This doesn't currently work for all handles (like RTFILE).
2930 */
2931#if ARCH_BITS == 32
2932# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2933 do { \
2934 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
2935 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2936 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
2937 } while (0)
2938#elif ARCH_BITS == 64
2939# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
2940 do { \
2941 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
2942 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2943 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
2944 } while (0)
2945#endif
2946
2947
2948/** @def ASMAtomicCmpXchgExSize
2949 * Atomically Compare and Exchange a value which size might differ
2950 * between platforms or compilers. Additionally passes back old value.
2951 *
2952 * @param pu Pointer to the value to update.
2953 * @param uNew The new value to assigned to *pu.
2954 * @param uOld The old value to *pu compare with.
2955 * @param fRc Where to store the result.
2956 * @param puOldVal Pointer to where to store the old value.
2957 */
2958#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
2959 do { \
2960 switch (sizeof(*(pu))) { \
2961 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
2962 break; \
2963 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
2964 break; \
2965 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2966 (fRc) = false; \
2967 (uOldVal) = 0; \
2968 break; \
2969 } \
2970 } while (0)
2971
2972
2973/**
2974 * Atomically Compare and Exchange a pointer value, additionally
2975 * passing back old value, ordered.
2976 *
2977 * @returns true if xchg was done.
2978 * @returns false if xchg wasn't done.
2979 *
2980 * @param ppv Pointer to the value to update.
2981 * @param pvNew The new value to assigned to *ppv.
2982 * @param pvOld The old value to *ppv compare with.
2983 * @param ppvOld Pointer store the old value at.
2984 */
2985DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2986{
2987#if ARCH_BITS == 32
2988 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2989#elif ARCH_BITS == 64
2990 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2991#else
2992# error "ARCH_BITS is bogus"
2993#endif
2994}
2995
2996
2997/**
2998 * Atomically exchanges and adds to a 32-bit value, ordered.
2999 *
3000 * @returns The old value.
3001 * @param pu32 Pointer to the value.
3002 * @param u32 Number to add.
3003 */
3004#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3005DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3006#else
3007DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3008{
3009# if RT_INLINE_ASM_USES_INTRIN
3010 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3011 return u32;
3012
3013# elif RT_INLINE_ASM_GNU_STYLE
3014 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3015 : "=r" (u32),
3016 "=m" (*pu32)
3017 : "0" (u32)
3018 : "memory");
3019 return u32;
3020# else
3021 __asm
3022 {
3023 mov eax, [u32]
3024# ifdef RT_ARCH_AMD64
3025 mov rdx, [pu32]
3026 lock xadd [rdx], eax
3027# else
3028 mov edx, [pu32]
3029 lock xadd [edx], eax
3030# endif
3031 mov [u32], eax
3032 }
3033 return u32;
3034# endif
3035}
3036#endif
3037
3038
3039/**
3040 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3041 *
3042 * @returns The old value.
3043 * @param pi32 Pointer to the value.
3044 * @param i32 Number to add.
3045 */
3046DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3047{
3048 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3049}
3050
3051
3052/**
3053 * Atomically increment a 32-bit value, ordered.
3054 *
3055 * @returns The new value.
3056 * @param pu32 Pointer to the value to increment.
3057 */
3058#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3059DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3060#else
3061DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3062{
3063 uint32_t u32;
3064# if RT_INLINE_ASM_USES_INTRIN
3065 u32 = _InterlockedIncrement((long *)pu32);
3066 return u32;
3067
3068# elif RT_INLINE_ASM_GNU_STYLE
3069 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3070 : "=r" (u32),
3071 "=m" (*pu32)
3072 : "0" (1)
3073 : "memory");
3074 return u32+1;
3075# else
3076 __asm
3077 {
3078 mov eax, 1
3079# ifdef RT_ARCH_AMD64
3080 mov rdx, [pu32]
3081 lock xadd [rdx], eax
3082# else
3083 mov edx, [pu32]
3084 lock xadd [edx], eax
3085# endif
3086 mov u32, eax
3087 }
3088 return u32+1;
3089# endif
3090}
3091#endif
3092
3093
3094/**
3095 * Atomically increment a signed 32-bit value, ordered.
3096 *
3097 * @returns The new value.
3098 * @param pi32 Pointer to the value to increment.
3099 */
3100DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3101{
3102 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3103}
3104
3105
3106/**
3107 * Atomically decrement an unsigned 32-bit value, ordered.
3108 *
3109 * @returns The new value.
3110 * @param pu32 Pointer to the value to decrement.
3111 */
3112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3113DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3114#else
3115DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3116{
3117 uint32_t u32;
3118# if RT_INLINE_ASM_USES_INTRIN
3119 u32 = _InterlockedDecrement((long *)pu32);
3120 return u32;
3121
3122# elif RT_INLINE_ASM_GNU_STYLE
3123 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3124 : "=r" (u32),
3125 "=m" (*pu32)
3126 : "0" (-1)
3127 : "memory");
3128 return u32-1;
3129# else
3130 __asm
3131 {
3132 mov eax, -1
3133# ifdef RT_ARCH_AMD64
3134 mov rdx, [pu32]
3135 lock xadd [rdx], eax
3136# else
3137 mov edx, [pu32]
3138 lock xadd [edx], eax
3139# endif
3140 mov u32, eax
3141 }
3142 return u32-1;
3143# endif
3144}
3145#endif
3146
3147
3148/**
3149 * Atomically decrement a signed 32-bit value, ordered.
3150 *
3151 * @returns The new value.
3152 * @param pi32 Pointer to the value to decrement.
3153 */
3154DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3155{
3156 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3157}
3158
3159
3160/**
3161 * Atomically Or an unsigned 32-bit value, ordered.
3162 *
3163 * @param pu32 Pointer to the pointer variable to OR u32 with.
3164 * @param u32 The value to OR *pu32 with.
3165 */
3166#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3167DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3168#else
3169DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3170{
3171# if RT_INLINE_ASM_USES_INTRIN
3172 _InterlockedOr((long volatile *)pu32, (long)u32);
3173
3174# elif RT_INLINE_ASM_GNU_STYLE
3175 __asm__ __volatile__("lock; orl %1, %0\n\t"
3176 : "=m" (*pu32)
3177 : "ir" (u32));
3178# else
3179 __asm
3180 {
3181 mov eax, [u32]
3182# ifdef RT_ARCH_AMD64
3183 mov rdx, [pu32]
3184 lock or [rdx], eax
3185# else
3186 mov edx, [pu32]
3187 lock or [edx], eax
3188# endif
3189 }
3190# endif
3191}
3192#endif
3193
3194
3195/**
3196 * Atomically Or a signed 32-bit value, ordered.
3197 *
3198 * @param pi32 Pointer to the pointer variable to OR u32 with.
3199 * @param i32 The value to OR *pu32 with.
3200 */
3201DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3202{
3203 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3204}
3205
3206
3207/**
3208 * Atomically And an unsigned 32-bit value, ordered.
3209 *
3210 * @param pu32 Pointer to the pointer variable to AND u32 with.
3211 * @param u32 The value to AND *pu32 with.
3212 */
3213#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3214DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3215#else
3216DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3217{
3218# if RT_INLINE_ASM_USES_INTRIN
3219 _InterlockedAnd((long volatile *)pu32, u32);
3220
3221# elif RT_INLINE_ASM_GNU_STYLE
3222 __asm__ __volatile__("lock; andl %1, %0\n\t"
3223 : "=m" (*pu32)
3224 : "ir" (u32));
3225# else
3226 __asm
3227 {
3228 mov eax, [u32]
3229# ifdef RT_ARCH_AMD64
3230 mov rdx, [pu32]
3231 lock and [rdx], eax
3232# else
3233 mov edx, [pu32]
3234 lock and [edx], eax
3235# endif
3236 }
3237# endif
3238}
3239#endif
3240
3241
3242/**
3243 * Atomically And a signed 32-bit value, ordered.
3244 *
3245 * @param pi32 Pointer to the pointer variable to AND i32 with.
3246 * @param i32 The value to AND *pi32 with.
3247 */
3248DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3249{
3250 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3251}
3252
3253
3254/**
3255 * Memory fence, waits for any pending writes and reads to complete.
3256 */
3257DECLINLINE(void) ASMMemoryFence(void)
3258{
3259 /** @todo use mfence? check if all cpus we care for support it. */
3260 uint32_t volatile u32;
3261 ASMAtomicXchgU32(&u32, 0);
3262}
3263
3264
3265/**
3266 * Write fence, waits for any pending writes to complete.
3267 */
3268DECLINLINE(void) ASMWriteFence(void)
3269{
3270 /** @todo use sfence? check if all cpus we care for support it. */
3271 ASMMemoryFence();
3272}
3273
3274
3275/**
3276 * Read fence, waits for any pending reads to complete.
3277 */
3278DECLINLINE(void) ASMReadFence(void)
3279{
3280 /** @todo use lfence? check if all cpus we care for support it. */
3281 ASMMemoryFence();
3282}
3283
3284
3285/**
3286 * Atomically reads an unsigned 8-bit value, ordered.
3287 *
3288 * @returns Current *pu8 value
3289 * @param pu8 Pointer to the 8-bit variable to read.
3290 */
3291DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3292{
3293 ASMMemoryFence();
3294 return *pu8; /* byte reads are atomic on x86 */
3295}
3296
3297
3298/**
3299 * Atomically reads an unsigned 8-bit value, unordered.
3300 *
3301 * @returns Current *pu8 value
3302 * @param pu8 Pointer to the 8-bit variable to read.
3303 */
3304DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3305{
3306 return *pu8; /* byte reads are atomic on x86 */
3307}
3308
3309
3310/**
3311 * Atomically reads a signed 8-bit value, ordered.
3312 *
3313 * @returns Current *pi8 value
3314 * @param pi8 Pointer to the 8-bit variable to read.
3315 */
3316DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3317{
3318 ASMMemoryFence();
3319 return *pi8; /* byte reads are atomic on x86 */
3320}
3321
3322
3323/**
3324 * Atomically reads a signed 8-bit value, unordered.
3325 *
3326 * @returns Current *pi8 value
3327 * @param pi8 Pointer to the 8-bit variable to read.
3328 */
3329DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3330{
3331 return *pi8; /* byte reads are atomic on x86 */
3332}
3333
3334
3335/**
3336 * Atomically reads an unsigned 16-bit value, ordered.
3337 *
3338 * @returns Current *pu16 value
3339 * @param pu16 Pointer to the 16-bit variable to read.
3340 */
3341DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3342{
3343 ASMMemoryFence();
3344 Assert(!((uintptr_t)pu16 & 1));
3345 return *pu16;
3346}
3347
3348
3349/**
3350 * Atomically reads an unsigned 16-bit value, unordered.
3351 *
3352 * @returns Current *pu16 value
3353 * @param pu16 Pointer to the 16-bit variable to read.
3354 */
3355DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3356{
3357 Assert(!((uintptr_t)pu16 & 1));
3358 return *pu16;
3359}
3360
3361
3362/**
3363 * Atomically reads a signed 16-bit value, ordered.
3364 *
3365 * @returns Current *pi16 value
3366 * @param pi16 Pointer to the 16-bit variable to read.
3367 */
3368DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3369{
3370 ASMMemoryFence();
3371 Assert(!((uintptr_t)pi16 & 1));
3372 return *pi16;
3373}
3374
3375
3376/**
3377 * Atomically reads a signed 16-bit value, unordered.
3378 *
3379 * @returns Current *pi16 value
3380 * @param pi16 Pointer to the 16-bit variable to read.
3381 */
3382DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3383{
3384 Assert(!((uintptr_t)pi16 & 1));
3385 return *pi16;
3386}
3387
3388
3389/**
3390 * Atomically reads an unsigned 32-bit value, ordered.
3391 *
3392 * @returns Current *pu32 value
3393 * @param pu32 Pointer to the 32-bit variable to read.
3394 */
3395DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3396{
3397 ASMMemoryFence();
3398 Assert(!((uintptr_t)pu32 & 3));
3399 return *pu32;
3400}
3401
3402
3403/**
3404 * Atomically reads an unsigned 32-bit value, unordered.
3405 *
3406 * @returns Current *pu32 value
3407 * @param pu32 Pointer to the 32-bit variable to read.
3408 */
3409DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3410{
3411 Assert(!((uintptr_t)pu32 & 3));
3412 return *pu32;
3413}
3414
3415
3416/**
3417 * Atomically reads a signed 32-bit value, ordered.
3418 *
3419 * @returns Current *pi32 value
3420 * @param pi32 Pointer to the 32-bit variable to read.
3421 */
3422DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3423{
3424 ASMMemoryFence();
3425 Assert(!((uintptr_t)pi32 & 3));
3426 return *pi32;
3427}
3428
3429
3430/**
3431 * Atomically reads a signed 32-bit value, unordered.
3432 *
3433 * @returns Current *pi32 value
3434 * @param pi32 Pointer to the 32-bit variable to read.
3435 */
3436DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3437{
3438 Assert(!((uintptr_t)pi32 & 3));
3439 return *pi32;
3440}
3441
3442
3443/**
3444 * Atomically reads an unsigned 64-bit value, ordered.
3445 *
3446 * @returns Current *pu64 value
3447 * @param pu64 Pointer to the 64-bit variable to read.
3448 * The memory pointed to must be writable.
3449 * @remark This will fault if the memory is read-only!
3450 */
3451#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3452DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3453#else
3454DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3455{
3456 uint64_t u64;
3457# ifdef RT_ARCH_AMD64
3458 Assert(!((uintptr_t)pu64 & 7));
3459/*# if RT_INLINE_ASM_GNU_STYLE
3460 __asm__ __volatile__( "mfence\n\t"
3461 "movq %1, %0\n\t"
3462 : "=r" (u64)
3463 : "m" (*pu64));
3464# else
3465 __asm
3466 {
3467 mfence
3468 mov rdx, [pu64]
3469 mov rax, [rdx]
3470 mov [u64], rax
3471 }
3472# endif*/
3473 ASMMemoryFence();
3474 u64 = *pu64;
3475# else /* !RT_ARCH_AMD64 */
3476# if RT_INLINE_ASM_GNU_STYLE
3477# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3478 uint32_t u32EBX = 0;
3479 Assert(!((uintptr_t)pu64 & 7));
3480 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3481 "lock; cmpxchg8b (%5)\n\t"
3482 "movl %3, %%ebx\n\t"
3483 : "=A" (u64),
3484 "=m" (*pu64)
3485 : "0" (0),
3486 "m" (u32EBX),
3487 "c" (0),
3488 "S" (pu64));
3489# else /* !PIC */
3490 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3491 : "=A" (u64),
3492 "=m" (*pu64)
3493 : "0" (0),
3494 "b" (0),
3495 "c" (0));
3496# endif
3497# else
3498 Assert(!((uintptr_t)pu64 & 7));
3499 __asm
3500 {
3501 xor eax, eax
3502 xor edx, edx
3503 mov edi, pu64
3504 xor ecx, ecx
3505 xor ebx, ebx
3506 lock cmpxchg8b [edi]
3507 mov dword ptr [u64], eax
3508 mov dword ptr [u64 + 4], edx
3509 }
3510# endif
3511# endif /* !RT_ARCH_AMD64 */
3512 return u64;
3513}
3514#endif
3515
3516
3517/**
3518 * Atomically reads an unsigned 64-bit value, unordered.
3519 *
3520 * @returns Current *pu64 value
3521 * @param pu64 Pointer to the 64-bit variable to read.
3522 * The memory pointed to must be writable.
3523 * @remark This will fault if the memory is read-only!
3524 */
3525#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3526DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3527#else
3528DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3529{
3530 uint64_t u64;
3531# ifdef RT_ARCH_AMD64
3532 Assert(!((uintptr_t)pu64 & 7));
3533/*# if RT_INLINE_ASM_GNU_STYLE
3534 Assert(!((uintptr_t)pu64 & 7));
3535 __asm__ __volatile__("movq %1, %0\n\t"
3536 : "=r" (u64)
3537 : "m" (*pu64));
3538# else
3539 __asm
3540 {
3541 mov rdx, [pu64]
3542 mov rax, [rdx]
3543 mov [u64], rax
3544 }
3545# endif */
3546 u64 = *pu64;
3547# else /* !RT_ARCH_AMD64 */
3548# if RT_INLINE_ASM_GNU_STYLE
3549# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3550 uint32_t u32EBX = 0;
3551 Assert(!((uintptr_t)pu64 & 7));
3552 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3553 "lock; cmpxchg8b (%5)\n\t"
3554 "movl %3, %%ebx\n\t"
3555 : "=A" (u64),
3556 "=m" (*pu64)
3557 : "0" (0),
3558 "m" (u32EBX),
3559 "c" (0),
3560 "S" (pu64));
3561# else /* !PIC */
3562 __asm__ __volatile__("cmpxchg8b %1\n\t"
3563 : "=A" (u64),
3564 "=m" (*pu64)
3565 : "0" (0),
3566 "b" (0),
3567 "c" (0));
3568# endif
3569# else
3570 Assert(!((uintptr_t)pu64 & 7));
3571 __asm
3572 {
3573 xor eax, eax
3574 xor edx, edx
3575 mov edi, pu64
3576 xor ecx, ecx
3577 xor ebx, ebx
3578 lock cmpxchg8b [edi]
3579 mov dword ptr [u64], eax
3580 mov dword ptr [u64 + 4], edx
3581 }
3582# endif
3583# endif /* !RT_ARCH_AMD64 */
3584 return u64;
3585}
3586#endif
3587
3588
3589/**
3590 * Atomically reads a signed 64-bit value, ordered.
3591 *
3592 * @returns Current *pi64 value
3593 * @param pi64 Pointer to the 64-bit variable to read.
3594 * The memory pointed to must be writable.
3595 * @remark This will fault if the memory is read-only!
3596 */
3597DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3598{
3599 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3600}
3601
3602
3603/**
3604 * Atomically reads a signed 64-bit value, unordered.
3605 *
3606 * @returns Current *pi64 value
3607 * @param pi64 Pointer to the 64-bit variable to read.
3608 * The memory pointed to must be writable.
3609 * @remark This will fault if the memory is read-only!
3610 */
3611DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3612{
3613 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3614}
3615
3616
3617/**
3618 * Atomically reads a pointer value, ordered.
3619 *
3620 * @returns Current *pv value
3621 * @param ppv Pointer to the pointer variable to read.
3622 */
3623DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3624{
3625#if ARCH_BITS == 32
3626 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3627#elif ARCH_BITS == 64
3628 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3629#else
3630# error "ARCH_BITS is bogus"
3631#endif
3632}
3633
3634
3635/**
3636 * Atomically reads a pointer value, unordered.
3637 *
3638 * @returns Current *pv value
3639 * @param ppv Pointer to the pointer variable to read.
3640 */
3641DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3642{
3643#if ARCH_BITS == 32
3644 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3645#elif ARCH_BITS == 64
3646 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3647#else
3648# error "ARCH_BITS is bogus"
3649#endif
3650}
3651
3652
3653/**
3654 * Atomically reads a boolean value, ordered.
3655 *
3656 * @returns Current *pf value
3657 * @param pf Pointer to the boolean variable to read.
3658 */
3659DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3660{
3661 ASMMemoryFence();
3662 return *pf; /* byte reads are atomic on x86 */
3663}
3664
3665
3666/**
3667 * Atomically reads a boolean value, unordered.
3668 *
3669 * @returns Current *pf value
3670 * @param pf Pointer to the boolean variable to read.
3671 */
3672DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3673{
3674 return *pf; /* byte reads are atomic on x86 */
3675}
3676
3677
3678/**
3679 * Atomically read a typical IPRT handle value, ordered.
3680 *
3681 * @param ph Pointer to the handle variable to read.
3682 * @param phRes Where to store the result.
3683 *
3684 * @remarks This doesn't currently work for all handles (like RTFILE).
3685 */
3686#define ASMAtomicReadHandle(ph, phRes) \
3687 do { \
3688 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
3689 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3690 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3691 } while (0)
3692
3693
3694/**
3695 * Atomically read a typical IPRT handle value, unordered.
3696 *
3697 * @param ph Pointer to the handle variable to read.
3698 * @param phRes Where to store the result.
3699 *
3700 * @remarks This doesn't currently work for all handles (like RTFILE).
3701 */
3702#define ASMAtomicUoReadHandle(ph, phRes) \
3703 do { \
3704 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
3705 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3706 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3707 } while (0)
3708
3709
3710/**
3711 * Atomically read a value which size might differ
3712 * between platforms or compilers, ordered.
3713 *
3714 * @param pu Pointer to the variable to update.
3715 * @param puRes Where to store the result.
3716 */
3717#define ASMAtomicReadSize(pu, puRes) \
3718 do { \
3719 switch (sizeof(*(pu))) { \
3720 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3721 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3722 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3723 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3724 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3725 } \
3726 } while (0)
3727
3728
3729/**
3730 * Atomically read a value which size might differ
3731 * between platforms or compilers, unordered.
3732 *
3733 * @param pu Pointer to the variable to update.
3734 * @param puRes Where to store the result.
3735 */
3736#define ASMAtomicUoReadSize(pu, puRes) \
3737 do { \
3738 switch (sizeof(*(pu))) { \
3739 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3740 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3741 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3742 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3743 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3744 } \
3745 } while (0)
3746
3747
3748/**
3749 * Atomically writes an unsigned 8-bit value, ordered.
3750 *
3751 * @param pu8 Pointer to the 8-bit variable.
3752 * @param u8 The 8-bit value to assign to *pu8.
3753 */
3754DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3755{
3756 ASMAtomicXchgU8(pu8, u8);
3757}
3758
3759
3760/**
3761 * Atomically writes an unsigned 8-bit value, unordered.
3762 *
3763 * @param pu8 Pointer to the 8-bit variable.
3764 * @param u8 The 8-bit value to assign to *pu8.
3765 */
3766DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3767{
3768 *pu8 = u8; /* byte writes are atomic on x86 */
3769}
3770
3771
3772/**
3773 * Atomically writes a signed 8-bit value, ordered.
3774 *
3775 * @param pi8 Pointer to the 8-bit variable to read.
3776 * @param i8 The 8-bit value to assign to *pi8.
3777 */
3778DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3779{
3780 ASMAtomicXchgS8(pi8, i8);
3781}
3782
3783
3784/**
3785 * Atomically writes a signed 8-bit value, unordered.
3786 *
3787 * @param pi8 Pointer to the 8-bit variable to read.
3788 * @param i8 The 8-bit value to assign to *pi8.
3789 */
3790DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3791{
3792 *pi8 = i8; /* byte writes are atomic on x86 */
3793}
3794
3795
3796/**
3797 * Atomically writes an unsigned 16-bit value, ordered.
3798 *
3799 * @param pu16 Pointer to the 16-bit variable.
3800 * @param u16 The 16-bit value to assign to *pu16.
3801 */
3802DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3803{
3804 ASMAtomicXchgU16(pu16, u16);
3805}
3806
3807
3808/**
3809 * Atomically writes an unsigned 16-bit value, unordered.
3810 *
3811 * @param pu16 Pointer to the 16-bit variable.
3812 * @param u16 The 16-bit value to assign to *pu16.
3813 */
3814DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3815{
3816 Assert(!((uintptr_t)pu16 & 1));
3817 *pu16 = u16;
3818}
3819
3820
3821/**
3822 * Atomically writes a signed 16-bit value, ordered.
3823 *
3824 * @param pi16 Pointer to the 16-bit variable to read.
3825 * @param i16 The 16-bit value to assign to *pi16.
3826 */
3827DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3828{
3829 ASMAtomicXchgS16(pi16, i16);
3830}
3831
3832
3833/**
3834 * Atomically writes a signed 16-bit value, unordered.
3835 *
3836 * @param pi16 Pointer to the 16-bit variable to read.
3837 * @param i16 The 16-bit value to assign to *pi16.
3838 */
3839DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3840{
3841 Assert(!((uintptr_t)pi16 & 1));
3842 *pi16 = i16;
3843}
3844
3845
3846/**
3847 * Atomically writes an unsigned 32-bit value, ordered.
3848 *
3849 * @param pu32 Pointer to the 32-bit variable.
3850 * @param u32 The 32-bit value to assign to *pu32.
3851 */
3852DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3853{
3854 ASMAtomicXchgU32(pu32, u32);
3855}
3856
3857
3858/**
3859 * Atomically writes an unsigned 32-bit value, unordered.
3860 *
3861 * @param pu32 Pointer to the 32-bit variable.
3862 * @param u32 The 32-bit value to assign to *pu32.
3863 */
3864DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3865{
3866 Assert(!((uintptr_t)pu32 & 3));
3867 *pu32 = u32;
3868}
3869
3870
3871/**
3872 * Atomically writes a signed 32-bit value, ordered.
3873 *
3874 * @param pi32 Pointer to the 32-bit variable to read.
3875 * @param i32 The 32-bit value to assign to *pi32.
3876 */
3877DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3878{
3879 ASMAtomicXchgS32(pi32, i32);
3880}
3881
3882
3883/**
3884 * Atomically writes a signed 32-bit value, unordered.
3885 *
3886 * @param pi32 Pointer to the 32-bit variable to read.
3887 * @param i32 The 32-bit value to assign to *pi32.
3888 */
3889DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3890{
3891 Assert(!((uintptr_t)pi32 & 3));
3892 *pi32 = i32;
3893}
3894
3895
3896/**
3897 * Atomically writes an unsigned 64-bit value, ordered.
3898 *
3899 * @param pu64 Pointer to the 64-bit variable.
3900 * @param u64 The 64-bit value to assign to *pu64.
3901 */
3902DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3903{
3904 ASMAtomicXchgU64(pu64, u64);
3905}
3906
3907
3908/**
3909 * Atomically writes an unsigned 64-bit value, unordered.
3910 *
3911 * @param pu64 Pointer to the 64-bit variable.
3912 * @param u64 The 64-bit value to assign to *pu64.
3913 */
3914DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3915{
3916 Assert(!((uintptr_t)pu64 & 7));
3917#if ARCH_BITS == 64
3918 *pu64 = u64;
3919#else
3920 ASMAtomicXchgU64(pu64, u64);
3921#endif
3922}
3923
3924
3925/**
3926 * Atomically writes a signed 64-bit value, ordered.
3927 *
3928 * @param pi64 Pointer to the 64-bit variable.
3929 * @param i64 The 64-bit value to assign to *pi64.
3930 */
3931DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3932{
3933 ASMAtomicXchgS64(pi64, i64);
3934}
3935
3936
3937/**
3938 * Atomically writes a signed 64-bit value, unordered.
3939 *
3940 * @param pi64 Pointer to the 64-bit variable.
3941 * @param i64 The 64-bit value to assign to *pi64.
3942 */
3943DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3944{
3945 Assert(!((uintptr_t)pi64 & 7));
3946#if ARCH_BITS == 64
3947 *pi64 = i64;
3948#else
3949 ASMAtomicXchgS64(pi64, i64);
3950#endif
3951}
3952
3953
3954/**
3955 * Atomically writes a boolean value, unordered.
3956 *
3957 * @param pf Pointer to the boolean variable.
3958 * @param f The boolean value to assign to *pf.
3959 */
3960DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3961{
3962 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3963}
3964
3965
3966/**
3967 * Atomically writes a boolean value, unordered.
3968 *
3969 * @param pf Pointer to the boolean variable.
3970 * @param f The boolean value to assign to *pf.
3971 */
3972DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3973{
3974 *pf = f; /* byte writes are atomic on x86 */
3975}
3976
3977
3978/**
3979 * Atomically writes a pointer value, ordered.
3980 *
3981 * @returns Current *pv value
3982 * @param ppv Pointer to the pointer variable.
3983 * @param pv The pointer value to assigne to *ppv.
3984 */
3985DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3986{
3987#if ARCH_BITS == 32
3988 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3989#elif ARCH_BITS == 64
3990 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3991#else
3992# error "ARCH_BITS is bogus"
3993#endif
3994}
3995
3996
3997/**
3998 * Atomically writes a pointer value, unordered.
3999 *
4000 * @returns Current *pv value
4001 * @param ppv Pointer to the pointer variable.
4002 * @param pv The pointer value to assigne to *ppv.
4003 */
4004DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
4005{
4006#if ARCH_BITS == 32
4007 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4008#elif ARCH_BITS == 64
4009 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4010#else
4011# error "ARCH_BITS is bogus"
4012#endif
4013}
4014
4015
4016/**
4017 * Atomically write a typical IPRT handle value, ordered.
4018 *
4019 * @param ph Pointer to the variable to update.
4020 * @param hNew The value to assign to *ph.
4021 *
4022 * @remarks This doesn't currently work for all handles (like RTFILE).
4023 */
4024#define ASMAtomicWriteHandle(ph, hNew) \
4025 do { \
4026 ASMAtomicWritePtr((void * volatile *)(ph), (void *)hNew); \
4027 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4028 } while (0)
4029
4030
4031/**
4032 * Atomically write a typical IPRT handle value, unordered.
4033 *
4034 * @param ph Pointer to the variable to update.
4035 * @param hNew The value to assign to *ph.
4036 *
4037 * @remarks This doesn't currently work for all handles (like RTFILE).
4038 */
4039#define ASMAtomicUoWriteHandle(ph, hNew) \
4040 do { \
4041 ASMAtomicUoWritePtr((void * volatile *)(ph), (void *)hNew); \
4042 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4043 } while (0)
4044
4045
4046/**
4047 * Atomically write a value which size might differ
4048 * between platforms or compilers, ordered.
4049 *
4050 * @param pu Pointer to the variable to update.
4051 * @param uNew The value to assign to *pu.
4052 */
4053#define ASMAtomicWriteSize(pu, uNew) \
4054 do { \
4055 switch (sizeof(*(pu))) { \
4056 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4057 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4058 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4059 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4060 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4061 } \
4062 } while (0)
4063
4064/**
4065 * Atomically write a value which size might differ
4066 * between platforms or compilers, unordered.
4067 *
4068 * @param pu Pointer to the variable to update.
4069 * @param uNew The value to assign to *pu.
4070 */
4071#define ASMAtomicUoWriteSize(pu, uNew) \
4072 do { \
4073 switch (sizeof(*(pu))) { \
4074 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4075 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4076 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4077 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4078 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4079 } \
4080 } while (0)
4081
4082
4083
4084
4085/**
4086 * Invalidate page.
4087 *
4088 * @param pv Address of the page to invalidate.
4089 */
4090#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4091DECLASM(void) ASMInvalidatePage(void *pv);
4092#else
4093DECLINLINE(void) ASMInvalidatePage(void *pv)
4094{
4095# if RT_INLINE_ASM_USES_INTRIN
4096 __invlpg(pv);
4097
4098# elif RT_INLINE_ASM_GNU_STYLE
4099 __asm__ __volatile__("invlpg %0\n\t"
4100 : : "m" (*(uint8_t *)pv));
4101# else
4102 __asm
4103 {
4104# ifdef RT_ARCH_AMD64
4105 mov rax, [pv]
4106 invlpg [rax]
4107# else
4108 mov eax, [pv]
4109 invlpg [eax]
4110# endif
4111 }
4112# endif
4113}
4114#endif
4115
4116
4117#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4118# if PAGE_SIZE != 0x1000
4119# error "PAGE_SIZE is not 0x1000!"
4120# endif
4121#endif
4122
4123/**
4124 * Zeros a 4K memory page.
4125 *
4126 * @param pv Pointer to the memory block. This must be page aligned.
4127 */
4128#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4129DECLASM(void) ASMMemZeroPage(volatile void *pv);
4130# else
4131DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4132{
4133# if RT_INLINE_ASM_USES_INTRIN
4134# ifdef RT_ARCH_AMD64
4135 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4136# else
4137 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4138# endif
4139
4140# elif RT_INLINE_ASM_GNU_STYLE
4141 RTCCUINTREG uDummy;
4142# ifdef RT_ARCH_AMD64
4143 __asm__ __volatile__ ("rep stosq"
4144 : "=D" (pv),
4145 "=c" (uDummy)
4146 : "0" (pv),
4147 "c" (0x1000 >> 3),
4148 "a" (0)
4149 : "memory");
4150# else
4151 __asm__ __volatile__ ("rep stosl"
4152 : "=D" (pv),
4153 "=c" (uDummy)
4154 : "0" (pv),
4155 "c" (0x1000 >> 2),
4156 "a" (0)
4157 : "memory");
4158# endif
4159# else
4160 __asm
4161 {
4162# ifdef RT_ARCH_AMD64
4163 xor rax, rax
4164 mov ecx, 0200h
4165 mov rdi, [pv]
4166 rep stosq
4167# else
4168 xor eax, eax
4169 mov ecx, 0400h
4170 mov edi, [pv]
4171 rep stosd
4172# endif
4173 }
4174# endif
4175}
4176# endif
4177
4178
4179/**
4180 * Zeros a memory block with a 32-bit aligned size.
4181 *
4182 * @param pv Pointer to the memory block.
4183 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4184 */
4185#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4186DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4187#else
4188DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4189{
4190# if RT_INLINE_ASM_USES_INTRIN
4191# ifdef RT_ARCH_AMD64
4192 if (!(cb & 7))
4193 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4194 else
4195# endif
4196 __stosd((unsigned long *)pv, 0, cb / 4);
4197
4198# elif RT_INLINE_ASM_GNU_STYLE
4199 __asm__ __volatile__ ("rep stosl"
4200 : "=D" (pv),
4201 "=c" (cb)
4202 : "0" (pv),
4203 "1" (cb >> 2),
4204 "a" (0)
4205 : "memory");
4206# else
4207 __asm
4208 {
4209 xor eax, eax
4210# ifdef RT_ARCH_AMD64
4211 mov rcx, [cb]
4212 shr rcx, 2
4213 mov rdi, [pv]
4214# else
4215 mov ecx, [cb]
4216 shr ecx, 2
4217 mov edi, [pv]
4218# endif
4219 rep stosd
4220 }
4221# endif
4222}
4223#endif
4224
4225
4226/**
4227 * Fills a memory block with a 32-bit aligned size.
4228 *
4229 * @param pv Pointer to the memory block.
4230 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4231 * @param u32 The value to fill with.
4232 */
4233#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4234DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4235#else
4236DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4237{
4238# if RT_INLINE_ASM_USES_INTRIN
4239# ifdef RT_ARCH_AMD64
4240 if (!(cb & 7))
4241 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4242 else
4243# endif
4244 __stosd((unsigned long *)pv, u32, cb / 4);
4245
4246# elif RT_INLINE_ASM_GNU_STYLE
4247 __asm__ __volatile__ ("rep stosl"
4248 : "=D" (pv),
4249 "=c" (cb)
4250 : "0" (pv),
4251 "1" (cb >> 2),
4252 "a" (u32)
4253 : "memory");
4254# else
4255 __asm
4256 {
4257# ifdef RT_ARCH_AMD64
4258 mov rcx, [cb]
4259 shr rcx, 2
4260 mov rdi, [pv]
4261# else
4262 mov ecx, [cb]
4263 shr ecx, 2
4264 mov edi, [pv]
4265# endif
4266 mov eax, [u32]
4267 rep stosd
4268 }
4269# endif
4270}
4271#endif
4272
4273
4274/**
4275 * Checks if a memory block is filled with the specified byte.
4276 *
4277 * This is a sort of inverted memchr.
4278 *
4279 * @returns Pointer to the byte which doesn't equal u8.
4280 * @returns NULL if all equal to u8.
4281 *
4282 * @param pv Pointer to the memory block.
4283 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4284 * @param u8 The value it's supposed to be filled with.
4285 */
4286#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4287DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4288#else
4289DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4290{
4291/** @todo rewrite this in inline assembly? */
4292 uint8_t const *pb = (uint8_t const *)pv;
4293 for (; cb; cb--, pb++)
4294 if (RT_UNLIKELY(*pb != u8))
4295 return (void *)pb;
4296 return NULL;
4297}
4298#endif
4299
4300
4301/**
4302 * Checks if a memory block is filled with the specified 32-bit value.
4303 *
4304 * This is a sort of inverted memchr.
4305 *
4306 * @returns Pointer to the first value which doesn't equal u32.
4307 * @returns NULL if all equal to u32.
4308 *
4309 * @param pv Pointer to the memory block.
4310 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4311 * @param u32 The value it's supposed to be filled with.
4312 */
4313#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4314DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4315#else
4316DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4317{
4318/** @todo rewrite this in inline assembly? */
4319 uint32_t const *pu32 = (uint32_t const *)pv;
4320 for (; cb; cb -= 4, pu32++)
4321 if (RT_UNLIKELY(*pu32 != u32))
4322 return (uint32_t *)pu32;
4323 return NULL;
4324}
4325#endif
4326
4327
4328/**
4329 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4330 *
4331 * @returns u32F1 * u32F2.
4332 */
4333#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4334DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4335#else
4336DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4337{
4338# ifdef RT_ARCH_AMD64
4339 return (uint64_t)u32F1 * u32F2;
4340# else /* !RT_ARCH_AMD64 */
4341 uint64_t u64;
4342# if RT_INLINE_ASM_GNU_STYLE
4343 __asm__ __volatile__("mull %%edx"
4344 : "=A" (u64)
4345 : "a" (u32F2), "d" (u32F1));
4346# else
4347 __asm
4348 {
4349 mov edx, [u32F1]
4350 mov eax, [u32F2]
4351 mul edx
4352 mov dword ptr [u64], eax
4353 mov dword ptr [u64 + 4], edx
4354 }
4355# endif
4356 return u64;
4357# endif /* !RT_ARCH_AMD64 */
4358}
4359#endif
4360
4361
4362/**
4363 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4364 *
4365 * @returns u32F1 * u32F2.
4366 */
4367#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4368DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4369#else
4370DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4371{
4372# ifdef RT_ARCH_AMD64
4373 return (int64_t)i32F1 * i32F2;
4374# else /* !RT_ARCH_AMD64 */
4375 int64_t i64;
4376# if RT_INLINE_ASM_GNU_STYLE
4377 __asm__ __volatile__("imull %%edx"
4378 : "=A" (i64)
4379 : "a" (i32F2), "d" (i32F1));
4380# else
4381 __asm
4382 {
4383 mov edx, [i32F1]
4384 mov eax, [i32F2]
4385 imul edx
4386 mov dword ptr [i64], eax
4387 mov dword ptr [i64 + 4], edx
4388 }
4389# endif
4390 return i64;
4391# endif /* !RT_ARCH_AMD64 */
4392}
4393#endif
4394
4395
4396/**
4397 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4398 *
4399 * @returns u64 / u32.
4400 */
4401#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4402DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4403#else
4404DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4405{
4406# ifdef RT_ARCH_AMD64
4407 return (uint32_t)(u64 / u32);
4408# else /* !RT_ARCH_AMD64 */
4409# if RT_INLINE_ASM_GNU_STYLE
4410 RTCCUINTREG uDummy;
4411 __asm__ __volatile__("divl %3"
4412 : "=a" (u32), "=d"(uDummy)
4413 : "A" (u64), "r" (u32));
4414# else
4415 __asm
4416 {
4417 mov eax, dword ptr [u64]
4418 mov edx, dword ptr [u64 + 4]
4419 mov ecx, [u32]
4420 div ecx
4421 mov [u32], eax
4422 }
4423# endif
4424 return u32;
4425# endif /* !RT_ARCH_AMD64 */
4426}
4427#endif
4428
4429
4430/**
4431 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4432 *
4433 * @returns u64 / u32.
4434 */
4435#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4436DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4437#else
4438DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4439{
4440# ifdef RT_ARCH_AMD64
4441 return (int32_t)(i64 / i32);
4442# else /* !RT_ARCH_AMD64 */
4443# if RT_INLINE_ASM_GNU_STYLE
4444 RTCCUINTREG iDummy;
4445 __asm__ __volatile__("idivl %3"
4446 : "=a" (i32), "=d"(iDummy)
4447 : "A" (i64), "r" (i32));
4448# else
4449 __asm
4450 {
4451 mov eax, dword ptr [i64]
4452 mov edx, dword ptr [i64 + 4]
4453 mov ecx, [i32]
4454 idiv ecx
4455 mov [i32], eax
4456 }
4457# endif
4458 return i32;
4459# endif /* !RT_ARCH_AMD64 */
4460}
4461#endif
4462
4463
4464/**
4465 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
4466 * returning the rest.
4467 *
4468 * @returns u64 % u32.
4469 *
4470 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4471 */
4472#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4473DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
4474#else
4475DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
4476{
4477# ifdef RT_ARCH_AMD64
4478 return (uint32_t)(u64 % u32);
4479# else /* !RT_ARCH_AMD64 */
4480# if RT_INLINE_ASM_GNU_STYLE
4481 RTCCUINTREG uDummy;
4482 __asm__ __volatile__("divl %3"
4483 : "=a" (uDummy), "=d"(u32)
4484 : "A" (u64), "r" (u32));
4485# else
4486 __asm
4487 {
4488 mov eax, dword ptr [u64]
4489 mov edx, dword ptr [u64 + 4]
4490 mov ecx, [u32]
4491 div ecx
4492 mov [u32], edx
4493 }
4494# endif
4495 return u32;
4496# endif /* !RT_ARCH_AMD64 */
4497}
4498#endif
4499
4500
4501/**
4502 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
4503 * returning the rest.
4504 *
4505 * @returns u64 % u32.
4506 *
4507 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4508 */
4509#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4510DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
4511#else
4512DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
4513{
4514# ifdef RT_ARCH_AMD64
4515 return (int32_t)(i64 % i32);
4516# else /* !RT_ARCH_AMD64 */
4517# if RT_INLINE_ASM_GNU_STYLE
4518 RTCCUINTREG iDummy;
4519 __asm__ __volatile__("idivl %3"
4520 : "=a" (iDummy), "=d"(i32)
4521 : "A" (i64), "r" (i32));
4522# else
4523 __asm
4524 {
4525 mov eax, dword ptr [i64]
4526 mov edx, dword ptr [i64 + 4]
4527 mov ecx, [i32]
4528 idiv ecx
4529 mov [i32], edx
4530 }
4531# endif
4532 return i32;
4533# endif /* !RT_ARCH_AMD64 */
4534}
4535#endif
4536
4537
4538/**
4539 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4540 * using a 96 bit intermediate result.
4541 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4542 * __udivdi3 and __umoddi3 even if this inline function is not used.
4543 *
4544 * @returns (u64A * u32B) / u32C.
4545 * @param u64A The 64-bit value.
4546 * @param u32B The 32-bit value to multiple by A.
4547 * @param u32C The 32-bit value to divide A*B by.
4548 */
4549#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4550DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4551#else
4552DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4553{
4554# if RT_INLINE_ASM_GNU_STYLE
4555# ifdef RT_ARCH_AMD64
4556 uint64_t u64Result, u64Spill;
4557 __asm__ __volatile__("mulq %2\n\t"
4558 "divq %3\n\t"
4559 : "=a" (u64Result),
4560 "=d" (u64Spill)
4561 : "r" ((uint64_t)u32B),
4562 "r" ((uint64_t)u32C),
4563 "0" (u64A),
4564 "1" (0));
4565 return u64Result;
4566# else
4567 uint32_t u32Dummy;
4568 uint64_t u64Result;
4569 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4570 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4571 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4572 eax = u64A.hi */
4573 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4574 edx = u32C */
4575 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4576 edx = u32B */
4577 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4578 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4579 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4580 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4581 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4582 edx = u64Hi % u32C */
4583 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4584 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4585 "divl %%ecx \n\t" /* u64Result.lo */
4586 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4587 : "=A"(u64Result), "=c"(u32Dummy),
4588 "=S"(u32Dummy), "=D"(u32Dummy)
4589 : "a"((uint32_t)u64A),
4590 "S"((uint32_t)(u64A >> 32)),
4591 "c"(u32B),
4592 "D"(u32C));
4593 return u64Result;
4594# endif
4595# else
4596 RTUINT64U u;
4597 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4598 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4599 u64Hi += (u64Lo >> 32);
4600 u.s.Hi = (uint32_t)(u64Hi / u32C);
4601 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4602 return u.u;
4603# endif
4604}
4605#endif
4606
4607
4608/**
4609 * Probes a byte pointer for read access.
4610 *
4611 * While the function will not fault if the byte is not read accessible,
4612 * the idea is to do this in a safe place like before acquiring locks
4613 * and such like.
4614 *
4615 * Also, this functions guarantees that an eager compiler is not going
4616 * to optimize the probing away.
4617 *
4618 * @param pvByte Pointer to the byte.
4619 */
4620#if RT_INLINE_ASM_EXTERNAL
4621DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4622#else
4623DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4624{
4625 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4626 uint8_t u8;
4627# if RT_INLINE_ASM_GNU_STYLE
4628 __asm__ __volatile__("movb (%1), %0\n\t"
4629 : "=r" (u8)
4630 : "r" (pvByte));
4631# else
4632 __asm
4633 {
4634# ifdef RT_ARCH_AMD64
4635 mov rax, [pvByte]
4636 mov al, [rax]
4637# else
4638 mov eax, [pvByte]
4639 mov al, [eax]
4640# endif
4641 mov [u8], al
4642 }
4643# endif
4644 return u8;
4645}
4646#endif
4647
4648/**
4649 * Probes a buffer for read access page by page.
4650 *
4651 * While the function will fault if the buffer is not fully read
4652 * accessible, the idea is to do this in a safe place like before
4653 * acquiring locks and such like.
4654 *
4655 * Also, this functions guarantees that an eager compiler is not going
4656 * to optimize the probing away.
4657 *
4658 * @param pvBuf Pointer to the buffer.
4659 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4660 */
4661DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4662{
4663 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4664 /* the first byte */
4665 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4666 ASMProbeReadByte(pu8);
4667
4668 /* the pages in between pages. */
4669 while (cbBuf > /*PAGE_SIZE*/0x1000)
4670 {
4671 ASMProbeReadByte(pu8);
4672 cbBuf -= /*PAGE_SIZE*/0x1000;
4673 pu8 += /*PAGE_SIZE*/0x1000;
4674 }
4675
4676 /* the last byte */
4677 ASMProbeReadByte(pu8 + cbBuf - 1);
4678}
4679
4680
4681/** @def ASMBreakpoint
4682 * Debugger Breakpoint.
4683 * @remark In the gnu world we add a nop instruction after the int3 to
4684 * force gdb to remain at the int3 source line.
4685 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4686 * @internal
4687 */
4688#if RT_INLINE_ASM_GNU_STYLE
4689# ifndef __L4ENV__
4690# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4691# else
4692# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4693# endif
4694#else
4695# define ASMBreakpoint() __debugbreak()
4696#endif
4697
4698
4699
4700/** @defgroup grp_inline_bits Bit Operations
4701 * @{
4702 */
4703
4704
4705/**
4706 * Sets a bit in a bitmap.
4707 *
4708 * @param pvBitmap Pointer to the bitmap.
4709 * @param iBit The bit to set.
4710 */
4711#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4712DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4713#else
4714DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4715{
4716# if RT_INLINE_ASM_USES_INTRIN
4717 _bittestandset((long *)pvBitmap, iBit);
4718
4719# elif RT_INLINE_ASM_GNU_STYLE
4720 __asm__ __volatile__ ("btsl %1, %0"
4721 : "=m" (*(volatile long *)pvBitmap)
4722 : "Ir" (iBit)
4723 : "memory");
4724# else
4725 __asm
4726 {
4727# ifdef RT_ARCH_AMD64
4728 mov rax, [pvBitmap]
4729 mov edx, [iBit]
4730 bts [rax], edx
4731# else
4732 mov eax, [pvBitmap]
4733 mov edx, [iBit]
4734 bts [eax], edx
4735# endif
4736 }
4737# endif
4738}
4739#endif
4740
4741
4742/**
4743 * Atomically sets a bit in a bitmap, ordered.
4744 *
4745 * @param pvBitmap Pointer to the bitmap.
4746 * @param iBit The bit to set.
4747 */
4748#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4749DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4750#else
4751DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4752{
4753# if RT_INLINE_ASM_USES_INTRIN
4754 _interlockedbittestandset((long *)pvBitmap, iBit);
4755# elif RT_INLINE_ASM_GNU_STYLE
4756 __asm__ __volatile__ ("lock; btsl %1, %0"
4757 : "=m" (*(volatile long *)pvBitmap)
4758 : "Ir" (iBit)
4759 : "memory");
4760# else
4761 __asm
4762 {
4763# ifdef RT_ARCH_AMD64
4764 mov rax, [pvBitmap]
4765 mov edx, [iBit]
4766 lock bts [rax], edx
4767# else
4768 mov eax, [pvBitmap]
4769 mov edx, [iBit]
4770 lock bts [eax], edx
4771# endif
4772 }
4773# endif
4774}
4775#endif
4776
4777
4778/**
4779 * Clears a bit in a bitmap.
4780 *
4781 * @param pvBitmap Pointer to the bitmap.
4782 * @param iBit The bit to clear.
4783 */
4784#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4785DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4786#else
4787DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4788{
4789# if RT_INLINE_ASM_USES_INTRIN
4790 _bittestandreset((long *)pvBitmap, iBit);
4791
4792# elif RT_INLINE_ASM_GNU_STYLE
4793 __asm__ __volatile__ ("btrl %1, %0"
4794 : "=m" (*(volatile long *)pvBitmap)
4795 : "Ir" (iBit)
4796 : "memory");
4797# else
4798 __asm
4799 {
4800# ifdef RT_ARCH_AMD64
4801 mov rax, [pvBitmap]
4802 mov edx, [iBit]
4803 btr [rax], edx
4804# else
4805 mov eax, [pvBitmap]
4806 mov edx, [iBit]
4807 btr [eax], edx
4808# endif
4809 }
4810# endif
4811}
4812#endif
4813
4814
4815/**
4816 * Atomically clears a bit in a bitmap, ordered.
4817 *
4818 * @param pvBitmap Pointer to the bitmap.
4819 * @param iBit The bit to toggle set.
4820 * @remark No memory barrier, take care on smp.
4821 */
4822#if RT_INLINE_ASM_EXTERNAL
4823DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4824#else
4825DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4826{
4827# if RT_INLINE_ASM_GNU_STYLE
4828 __asm__ __volatile__ ("lock; btrl %1, %0"
4829 : "=m" (*(volatile long *)pvBitmap)
4830 : "Ir" (iBit)
4831 : "memory");
4832# else
4833 __asm
4834 {
4835# ifdef RT_ARCH_AMD64
4836 mov rax, [pvBitmap]
4837 mov edx, [iBit]
4838 lock btr [rax], edx
4839# else
4840 mov eax, [pvBitmap]
4841 mov edx, [iBit]
4842 lock btr [eax], edx
4843# endif
4844 }
4845# endif
4846}
4847#endif
4848
4849
4850/**
4851 * Toggles a bit in a bitmap.
4852 *
4853 * @param pvBitmap Pointer to the bitmap.
4854 * @param iBit The bit to toggle.
4855 */
4856#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4857DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4858#else
4859DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4860{
4861# if RT_INLINE_ASM_USES_INTRIN
4862 _bittestandcomplement((long *)pvBitmap, iBit);
4863# elif RT_INLINE_ASM_GNU_STYLE
4864 __asm__ __volatile__ ("btcl %1, %0"
4865 : "=m" (*(volatile long *)pvBitmap)
4866 : "Ir" (iBit)
4867 : "memory");
4868# else
4869 __asm
4870 {
4871# ifdef RT_ARCH_AMD64
4872 mov rax, [pvBitmap]
4873 mov edx, [iBit]
4874 btc [rax], edx
4875# else
4876 mov eax, [pvBitmap]
4877 mov edx, [iBit]
4878 btc [eax], edx
4879# endif
4880 }
4881# endif
4882}
4883#endif
4884
4885
4886/**
4887 * Atomically toggles a bit in a bitmap, ordered.
4888 *
4889 * @param pvBitmap Pointer to the bitmap.
4890 * @param iBit The bit to test and set.
4891 */
4892#if RT_INLINE_ASM_EXTERNAL
4893DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4894#else
4895DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4896{
4897# if RT_INLINE_ASM_GNU_STYLE
4898 __asm__ __volatile__ ("lock; btcl %1, %0"
4899 : "=m" (*(volatile long *)pvBitmap)
4900 : "Ir" (iBit)
4901 : "memory");
4902# else
4903 __asm
4904 {
4905# ifdef RT_ARCH_AMD64
4906 mov rax, [pvBitmap]
4907 mov edx, [iBit]
4908 lock btc [rax], edx
4909# else
4910 mov eax, [pvBitmap]
4911 mov edx, [iBit]
4912 lock btc [eax], edx
4913# endif
4914 }
4915# endif
4916}
4917#endif
4918
4919
4920/**
4921 * Tests and sets a bit in a bitmap.
4922 *
4923 * @returns true if the bit was set.
4924 * @returns false if the bit was clear.
4925 * @param pvBitmap Pointer to the bitmap.
4926 * @param iBit The bit to test and set.
4927 */
4928#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4929DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4930#else
4931DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4932{
4933 union { bool f; uint32_t u32; uint8_t u8; } rc;
4934# if RT_INLINE_ASM_USES_INTRIN
4935 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4936
4937# elif RT_INLINE_ASM_GNU_STYLE
4938 __asm__ __volatile__ ("btsl %2, %1\n\t"
4939 "setc %b0\n\t"
4940 "andl $1, %0\n\t"
4941 : "=q" (rc.u32),
4942 "=m" (*(volatile long *)pvBitmap)
4943 : "Ir" (iBit)
4944 : "memory");
4945# else
4946 __asm
4947 {
4948 mov edx, [iBit]
4949# ifdef RT_ARCH_AMD64
4950 mov rax, [pvBitmap]
4951 bts [rax], edx
4952# else
4953 mov eax, [pvBitmap]
4954 bts [eax], edx
4955# endif
4956 setc al
4957 and eax, 1
4958 mov [rc.u32], eax
4959 }
4960# endif
4961 return rc.f;
4962}
4963#endif
4964
4965
4966/**
4967 * Atomically tests and sets a bit in a bitmap, ordered.
4968 *
4969 * @returns true if the bit was set.
4970 * @returns false if the bit was clear.
4971 * @param pvBitmap Pointer to the bitmap.
4972 * @param iBit The bit to set.
4973 */
4974#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4975DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4976#else
4977DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4978{
4979 union { bool f; uint32_t u32; uint8_t u8; } rc;
4980# if RT_INLINE_ASM_USES_INTRIN
4981 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4982# elif RT_INLINE_ASM_GNU_STYLE
4983 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4984 "setc %b0\n\t"
4985 "andl $1, %0\n\t"
4986 : "=q" (rc.u32),
4987 "=m" (*(volatile long *)pvBitmap)
4988 : "Ir" (iBit)
4989 : "memory");
4990# else
4991 __asm
4992 {
4993 mov edx, [iBit]
4994# ifdef RT_ARCH_AMD64
4995 mov rax, [pvBitmap]
4996 lock bts [rax], edx
4997# else
4998 mov eax, [pvBitmap]
4999 lock bts [eax], edx
5000# endif
5001 setc al
5002 and eax, 1
5003 mov [rc.u32], eax
5004 }
5005# endif
5006 return rc.f;
5007}
5008#endif
5009
5010
5011/**
5012 * Tests and clears a bit in a bitmap.
5013 *
5014 * @returns true if the bit was set.
5015 * @returns false if the bit was clear.
5016 * @param pvBitmap Pointer to the bitmap.
5017 * @param iBit The bit to test and clear.
5018 */
5019#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5020DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5021#else
5022DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5023{
5024 union { bool f; uint32_t u32; uint8_t u8; } rc;
5025# if RT_INLINE_ASM_USES_INTRIN
5026 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5027
5028# elif RT_INLINE_ASM_GNU_STYLE
5029 __asm__ __volatile__ ("btrl %2, %1\n\t"
5030 "setc %b0\n\t"
5031 "andl $1, %0\n\t"
5032 : "=q" (rc.u32),
5033 "=m" (*(volatile long *)pvBitmap)
5034 : "Ir" (iBit)
5035 : "memory");
5036# else
5037 __asm
5038 {
5039 mov edx, [iBit]
5040# ifdef RT_ARCH_AMD64
5041 mov rax, [pvBitmap]
5042 btr [rax], edx
5043# else
5044 mov eax, [pvBitmap]
5045 btr [eax], edx
5046# endif
5047 setc al
5048 and eax, 1
5049 mov [rc.u32], eax
5050 }
5051# endif
5052 return rc.f;
5053}
5054#endif
5055
5056
5057/**
5058 * Atomically tests and clears a bit in a bitmap, ordered.
5059 *
5060 * @returns true if the bit was set.
5061 * @returns false if the bit was clear.
5062 * @param pvBitmap Pointer to the bitmap.
5063 * @param iBit The bit to test and clear.
5064 * @remark No memory barrier, take care on smp.
5065 */
5066#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5067DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5068#else
5069DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5070{
5071 union { bool f; uint32_t u32; uint8_t u8; } rc;
5072# if RT_INLINE_ASM_USES_INTRIN
5073 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5074
5075# elif RT_INLINE_ASM_GNU_STYLE
5076 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
5077 "setc %b0\n\t"
5078 "andl $1, %0\n\t"
5079 : "=q" (rc.u32),
5080 "=m" (*(volatile long *)pvBitmap)
5081 : "Ir" (iBit)
5082 : "memory");
5083# else
5084 __asm
5085 {
5086 mov edx, [iBit]
5087# ifdef RT_ARCH_AMD64
5088 mov rax, [pvBitmap]
5089 lock btr [rax], edx
5090# else
5091 mov eax, [pvBitmap]
5092 lock btr [eax], edx
5093# endif
5094 setc al
5095 and eax, 1
5096 mov [rc.u32], eax
5097 }
5098# endif
5099 return rc.f;
5100}
5101#endif
5102
5103
5104/**
5105 * Tests and toggles a bit in a bitmap.
5106 *
5107 * @returns true if the bit was set.
5108 * @returns false if the bit was clear.
5109 * @param pvBitmap Pointer to the bitmap.
5110 * @param iBit The bit to test and toggle.
5111 */
5112#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5113DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5114#else
5115DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5116{
5117 union { bool f; uint32_t u32; uint8_t u8; } rc;
5118# if RT_INLINE_ASM_USES_INTRIN
5119 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5120
5121# elif RT_INLINE_ASM_GNU_STYLE
5122 __asm__ __volatile__ ("btcl %2, %1\n\t"
5123 "setc %b0\n\t"
5124 "andl $1, %0\n\t"
5125 : "=q" (rc.u32),
5126 "=m" (*(volatile long *)pvBitmap)
5127 : "Ir" (iBit)
5128 : "memory");
5129# else
5130 __asm
5131 {
5132 mov edx, [iBit]
5133# ifdef RT_ARCH_AMD64
5134 mov rax, [pvBitmap]
5135 btc [rax], edx
5136# else
5137 mov eax, [pvBitmap]
5138 btc [eax], edx
5139# endif
5140 setc al
5141 and eax, 1
5142 mov [rc.u32], eax
5143 }
5144# endif
5145 return rc.f;
5146}
5147#endif
5148
5149
5150/**
5151 * Atomically tests and toggles a bit in a bitmap, ordered.
5152 *
5153 * @returns true if the bit was set.
5154 * @returns false if the bit was clear.
5155 * @param pvBitmap Pointer to the bitmap.
5156 * @param iBit The bit to test and toggle.
5157 */
5158#if RT_INLINE_ASM_EXTERNAL
5159DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5160#else
5161DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5162{
5163 union { bool f; uint32_t u32; uint8_t u8; } rc;
5164# if RT_INLINE_ASM_GNU_STYLE
5165 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
5166 "setc %b0\n\t"
5167 "andl $1, %0\n\t"
5168 : "=q" (rc.u32),
5169 "=m" (*(volatile long *)pvBitmap)
5170 : "Ir" (iBit)
5171 : "memory");
5172# else
5173 __asm
5174 {
5175 mov edx, [iBit]
5176# ifdef RT_ARCH_AMD64
5177 mov rax, [pvBitmap]
5178 lock btc [rax], edx
5179# else
5180 mov eax, [pvBitmap]
5181 lock btc [eax], edx
5182# endif
5183 setc al
5184 and eax, 1
5185 mov [rc.u32], eax
5186 }
5187# endif
5188 return rc.f;
5189}
5190#endif
5191
5192
5193/**
5194 * Tests if a bit in a bitmap is set.
5195 *
5196 * @returns true if the bit is set.
5197 * @returns false if the bit is clear.
5198 * @param pvBitmap Pointer to the bitmap.
5199 * @param iBit The bit to test.
5200 */
5201#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5202DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5203#else
5204DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5205{
5206 union { bool f; uint32_t u32; uint8_t u8; } rc;
5207# if RT_INLINE_ASM_USES_INTRIN
5208 rc.u32 = _bittest((long *)pvBitmap, iBit);
5209# elif RT_INLINE_ASM_GNU_STYLE
5210
5211 __asm__ __volatile__ ("btl %2, %1\n\t"
5212 "setc %b0\n\t"
5213 "andl $1, %0\n\t"
5214 : "=q" (rc.u32)
5215 : "m" (*(const volatile long *)pvBitmap),
5216 "Ir" (iBit)
5217 : "memory");
5218# else
5219 __asm
5220 {
5221 mov edx, [iBit]
5222# ifdef RT_ARCH_AMD64
5223 mov rax, [pvBitmap]
5224 bt [rax], edx
5225# else
5226 mov eax, [pvBitmap]
5227 bt [eax], edx
5228# endif
5229 setc al
5230 and eax, 1
5231 mov [rc.u32], eax
5232 }
5233# endif
5234 return rc.f;
5235}
5236#endif
5237
5238
5239/**
5240 * Clears a bit range within a bitmap.
5241 *
5242 * @param pvBitmap Pointer to the bitmap.
5243 * @param iBitStart The First bit to clear.
5244 * @param iBitEnd The first bit not to clear.
5245 */
5246DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5247{
5248 if (iBitStart < iBitEnd)
5249 {
5250 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5251 int iStart = iBitStart & ~31;
5252 int iEnd = iBitEnd & ~31;
5253 if (iStart == iEnd)
5254 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5255 else
5256 {
5257 /* bits in first dword. */
5258 if (iBitStart & 31)
5259 {
5260 *pu32 &= (1 << (iBitStart & 31)) - 1;
5261 pu32++;
5262 iBitStart = iStart + 32;
5263 }
5264
5265 /* whole dword. */
5266 if (iBitStart != iEnd)
5267 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5268
5269 /* bits in last dword. */
5270 if (iBitEnd & 31)
5271 {
5272 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5273 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5274 }
5275 }
5276 }
5277}
5278
5279
5280/**
5281 * Sets a bit range within a bitmap.
5282 *
5283 * @param pvBitmap Pointer to the bitmap.
5284 * @param iBitStart The First bit to set.
5285 * @param iBitEnd The first bit not to set.
5286 */
5287DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5288{
5289 if (iBitStart < iBitEnd)
5290 {
5291 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5292 int iStart = iBitStart & ~31;
5293 int iEnd = iBitEnd & ~31;
5294 if (iStart == iEnd)
5295 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5296 else
5297 {
5298 /* bits in first dword. */
5299 if (iBitStart & 31)
5300 {
5301 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5302 pu32++;
5303 iBitStart = iStart + 32;
5304 }
5305
5306 /* whole dword. */
5307 if (iBitStart != iEnd)
5308 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5309
5310 /* bits in last dword. */
5311 if (iBitEnd & 31)
5312 {
5313 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5314 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5315 }
5316 }
5317 }
5318}
5319
5320
5321/**
5322 * Finds the first clear bit in a bitmap.
5323 *
5324 * @returns Index of the first zero bit.
5325 * @returns -1 if no clear bit was found.
5326 * @param pvBitmap Pointer to the bitmap.
5327 * @param cBits The number of bits in the bitmap. Multiple of 32.
5328 */
5329#if RT_INLINE_ASM_EXTERNAL
5330DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5331#else
5332DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5333{
5334 if (cBits)
5335 {
5336 int32_t iBit;
5337# if RT_INLINE_ASM_GNU_STYLE
5338 RTCCUINTREG uEAX, uECX, uEDI;
5339 cBits = RT_ALIGN_32(cBits, 32);
5340 __asm__ __volatile__("repe; scasl\n\t"
5341 "je 1f\n\t"
5342# ifdef RT_ARCH_AMD64
5343 "lea -4(%%rdi), %%rdi\n\t"
5344 "xorl (%%rdi), %%eax\n\t"
5345 "subq %5, %%rdi\n\t"
5346# else
5347 "lea -4(%%edi), %%edi\n\t"
5348 "xorl (%%edi), %%eax\n\t"
5349 "subl %5, %%edi\n\t"
5350# endif
5351 "shll $3, %%edi\n\t"
5352 "bsfl %%eax, %%edx\n\t"
5353 "addl %%edi, %%edx\n\t"
5354 "1:\t\n"
5355 : "=d" (iBit),
5356 "=&c" (uECX),
5357 "=&D" (uEDI),
5358 "=&a" (uEAX)
5359 : "0" (0xffffffff),
5360 "mr" (pvBitmap),
5361 "1" (cBits >> 5),
5362 "2" (pvBitmap),
5363 "3" (0xffffffff));
5364# else
5365 cBits = RT_ALIGN_32(cBits, 32);
5366 __asm
5367 {
5368# ifdef RT_ARCH_AMD64
5369 mov rdi, [pvBitmap]
5370 mov rbx, rdi
5371# else
5372 mov edi, [pvBitmap]
5373 mov ebx, edi
5374# endif
5375 mov edx, 0ffffffffh
5376 mov eax, edx
5377 mov ecx, [cBits]
5378 shr ecx, 5
5379 repe scasd
5380 je done
5381
5382# ifdef RT_ARCH_AMD64
5383 lea rdi, [rdi - 4]
5384 xor eax, [rdi]
5385 sub rdi, rbx
5386# else
5387 lea edi, [edi - 4]
5388 xor eax, [edi]
5389 sub edi, ebx
5390# endif
5391 shl edi, 3
5392 bsf edx, eax
5393 add edx, edi
5394 done:
5395 mov [iBit], edx
5396 }
5397# endif
5398 return iBit;
5399 }
5400 return -1;
5401}
5402#endif
5403
5404
5405/**
5406 * Finds the next clear bit in a bitmap.
5407 *
5408 * @returns Index of the first zero bit.
5409 * @returns -1 if no clear bit was found.
5410 * @param pvBitmap Pointer to the bitmap.
5411 * @param cBits The number of bits in the bitmap. Multiple of 32.
5412 * @param iBitPrev The bit returned from the last search.
5413 * The search will start at iBitPrev + 1.
5414 */
5415#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5416DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5417#else
5418DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5419{
5420 int iBit = ++iBitPrev & 31;
5421 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5422 cBits -= iBitPrev & ~31;
5423 if (iBit)
5424 {
5425 /* inspect the first dword. */
5426 uint32_t u32 = (~*(const volatile uint32_t *)pvBitmap) >> iBit;
5427# if RT_INLINE_ASM_USES_INTRIN
5428 unsigned long ulBit = 0;
5429 if (_BitScanForward(&ulBit, u32))
5430 return ulBit + iBitPrev;
5431 iBit = -1;
5432# else
5433# if RT_INLINE_ASM_GNU_STYLE
5434 __asm__ __volatile__("bsf %1, %0\n\t"
5435 "jnz 1f\n\t"
5436 "movl $-1, %0\n\t"
5437 "1:\n\t"
5438 : "=r" (iBit)
5439 : "r" (u32));
5440# else
5441 __asm
5442 {
5443 mov edx, [u32]
5444 bsf eax, edx
5445 jnz done
5446 mov eax, 0ffffffffh
5447 done:
5448 mov [iBit], eax
5449 }
5450# endif
5451 if (iBit >= 0)
5452 return iBit + iBitPrev;
5453# endif
5454 /* Search the rest of the bitmap, if there is anything. */
5455 if (cBits > 32)
5456 {
5457 iBit = ASMBitFirstClear((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5458 if (iBit >= 0)
5459 return iBit + (iBitPrev & ~31) + 32;
5460 }
5461 }
5462 else
5463 {
5464 /* Search the rest of the bitmap. */
5465 iBit = ASMBitFirstClear(pvBitmap, cBits);
5466 if (iBit >= 0)
5467 return iBit + (iBitPrev & ~31);
5468 }
5469 return iBit;
5470}
5471#endif
5472
5473
5474/**
5475 * Finds the first set bit in a bitmap.
5476 *
5477 * @returns Index of the first set bit.
5478 * @returns -1 if no clear bit was found.
5479 * @param pvBitmap Pointer to the bitmap.
5480 * @param cBits The number of bits in the bitmap. Multiple of 32.
5481 */
5482#if RT_INLINE_ASM_EXTERNAL
5483DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
5484#else
5485DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
5486{
5487 if (cBits)
5488 {
5489 int32_t iBit;
5490# if RT_INLINE_ASM_GNU_STYLE
5491 RTCCUINTREG uEAX, uECX, uEDI;
5492 cBits = RT_ALIGN_32(cBits, 32);
5493 __asm__ __volatile__("repe; scasl\n\t"
5494 "je 1f\n\t"
5495# ifdef RT_ARCH_AMD64
5496 "lea -4(%%rdi), %%rdi\n\t"
5497 "movl (%%rdi), %%eax\n\t"
5498 "subq %5, %%rdi\n\t"
5499# else
5500 "lea -4(%%edi), %%edi\n\t"
5501 "movl (%%edi), %%eax\n\t"
5502 "subl %5, %%edi\n\t"
5503# endif
5504 "shll $3, %%edi\n\t"
5505 "bsfl %%eax, %%edx\n\t"
5506 "addl %%edi, %%edx\n\t"
5507 "1:\t\n"
5508 : "=d" (iBit),
5509 "=&c" (uECX),
5510 "=&D" (uEDI),
5511 "=&a" (uEAX)
5512 : "0" (0xffffffff),
5513 "mr" (pvBitmap),
5514 "1" (cBits >> 5),
5515 "2" (pvBitmap),
5516 "3" (0));
5517# else
5518 cBits = RT_ALIGN_32(cBits, 32);
5519 __asm
5520 {
5521# ifdef RT_ARCH_AMD64
5522 mov rdi, [pvBitmap]
5523 mov rbx, rdi
5524# else
5525 mov edi, [pvBitmap]
5526 mov ebx, edi
5527# endif
5528 mov edx, 0ffffffffh
5529 xor eax, eax
5530 mov ecx, [cBits]
5531 shr ecx, 5
5532 repe scasd
5533 je done
5534# ifdef RT_ARCH_AMD64
5535 lea rdi, [rdi - 4]
5536 mov eax, [rdi]
5537 sub rdi, rbx
5538# else
5539 lea edi, [edi - 4]
5540 mov eax, [edi]
5541 sub edi, ebx
5542# endif
5543 shl edi, 3
5544 bsf edx, eax
5545 add edx, edi
5546 done:
5547 mov [iBit], edx
5548 }
5549# endif
5550 return iBit;
5551 }
5552 return -1;
5553}
5554#endif
5555
5556
5557/**
5558 * Finds the next set bit in a bitmap.
5559 *
5560 * @returns Index of the next set bit.
5561 * @returns -1 if no set bit was found.
5562 * @param pvBitmap Pointer to the bitmap.
5563 * @param cBits The number of bits in the bitmap. Multiple of 32.
5564 * @param iBitPrev The bit returned from the last search.
5565 * The search will start at iBitPrev + 1.
5566 */
5567#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5568DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5569#else
5570DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5571{
5572 int iBit = ++iBitPrev & 31;
5573 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5574 cBits -= iBitPrev & ~31;
5575 if (iBit)
5576 {
5577 /* inspect the first dword. */
5578 uint32_t u32 = *(const volatile uint32_t *)pvBitmap >> iBit;
5579# if RT_INLINE_ASM_USES_INTRIN
5580 unsigned long ulBit = 0;
5581 if (_BitScanForward(&ulBit, u32))
5582 return ulBit + iBitPrev;
5583 iBit = -1;
5584# else
5585# if RT_INLINE_ASM_GNU_STYLE
5586 __asm__ __volatile__("bsf %1, %0\n\t"
5587 "jnz 1f\n\t"
5588 "movl $-1, %0\n\t"
5589 "1:\n\t"
5590 : "=r" (iBit)
5591 : "r" (u32));
5592# else
5593 __asm
5594 {
5595 mov edx, u32
5596 bsf eax, edx
5597 jnz done
5598 mov eax, 0ffffffffh
5599 done:
5600 mov [iBit], eax
5601 }
5602# endif
5603 if (iBit >= 0)
5604 return iBit + iBitPrev;
5605# endif
5606 /* Search the rest of the bitmap, if there is anything. */
5607 if (cBits > 32)
5608 {
5609 iBit = ASMBitFirstSet((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5610 if (iBit >= 0)
5611 return iBit + (iBitPrev & ~31) + 32;
5612 }
5613
5614 }
5615 else
5616 {
5617 /* Search the rest of the bitmap. */
5618 iBit = ASMBitFirstSet(pvBitmap, cBits);
5619 if (iBit >= 0)
5620 return iBit + (iBitPrev & ~31);
5621 }
5622 return iBit;
5623}
5624#endif
5625
5626
5627/**
5628 * Finds the first bit which is set in the given 32-bit integer.
5629 * Bits are numbered from 1 (least significant) to 32.
5630 *
5631 * @returns index [1..32] of the first set bit.
5632 * @returns 0 if all bits are cleared.
5633 * @param u32 Integer to search for set bits.
5634 * @remark Similar to ffs() in BSD.
5635 */
5636DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5637{
5638# if RT_INLINE_ASM_USES_INTRIN
5639 unsigned long iBit;
5640 if (_BitScanForward(&iBit, u32))
5641 iBit++;
5642 else
5643 iBit = 0;
5644# elif RT_INLINE_ASM_GNU_STYLE
5645 uint32_t iBit;
5646 __asm__ __volatile__("bsf %1, %0\n\t"
5647 "jnz 1f\n\t"
5648 "xorl %0, %0\n\t"
5649 "jmp 2f\n"
5650 "1:\n\t"
5651 "incl %0\n"
5652 "2:\n\t"
5653 : "=r" (iBit)
5654 : "rm" (u32));
5655# else
5656 uint32_t iBit;
5657 _asm
5658 {
5659 bsf eax, [u32]
5660 jnz found
5661 xor eax, eax
5662 jmp done
5663 found:
5664 inc eax
5665 done:
5666 mov [iBit], eax
5667 }
5668# endif
5669 return iBit;
5670}
5671
5672
5673/**
5674 * Finds the first bit which is set in the given 32-bit integer.
5675 * Bits are numbered from 1 (least significant) to 32.
5676 *
5677 * @returns index [1..32] of the first set bit.
5678 * @returns 0 if all bits are cleared.
5679 * @param i32 Integer to search for set bits.
5680 * @remark Similar to ffs() in BSD.
5681 */
5682DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5683{
5684 return ASMBitFirstSetU32((uint32_t)i32);
5685}
5686
5687
5688/**
5689 * Finds the last bit which is set in the given 32-bit integer.
5690 * Bits are numbered from 1 (least significant) to 32.
5691 *
5692 * @returns index [1..32] of the last set bit.
5693 * @returns 0 if all bits are cleared.
5694 * @param u32 Integer to search for set bits.
5695 * @remark Similar to fls() in BSD.
5696 */
5697DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5698{
5699# if RT_INLINE_ASM_USES_INTRIN
5700 unsigned long iBit;
5701 if (_BitScanReverse(&iBit, u32))
5702 iBit++;
5703 else
5704 iBit = 0;
5705# elif RT_INLINE_ASM_GNU_STYLE
5706 uint32_t iBit;
5707 __asm__ __volatile__("bsrl %1, %0\n\t"
5708 "jnz 1f\n\t"
5709 "xorl %0, %0\n\t"
5710 "jmp 2f\n"
5711 "1:\n\t"
5712 "incl %0\n"
5713 "2:\n\t"
5714 : "=r" (iBit)
5715 : "rm" (u32));
5716# else
5717 uint32_t iBit;
5718 _asm
5719 {
5720 bsr eax, [u32]
5721 jnz found
5722 xor eax, eax
5723 jmp done
5724 found:
5725 inc eax
5726 done:
5727 mov [iBit], eax
5728 }
5729# endif
5730 return iBit;
5731}
5732
5733
5734/**
5735 * Finds the last bit which is set in the given 32-bit integer.
5736 * Bits are numbered from 1 (least significant) to 32.
5737 *
5738 * @returns index [1..32] of the last set bit.
5739 * @returns 0 if all bits are cleared.
5740 * @param i32 Integer to search for set bits.
5741 * @remark Similar to fls() in BSD.
5742 */
5743DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5744{
5745 return ASMBitLastSetS32((uint32_t)i32);
5746}
5747
5748/**
5749 * Reverse the byte order of the given 16-bit integer.
5750 *
5751 * @returns Revert
5752 * @param u16 16-bit integer value.
5753 */
5754DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5755{
5756#if RT_INLINE_ASM_USES_INTRIN
5757 u16 = _byteswap_ushort(u16);
5758#elif RT_INLINE_ASM_GNU_STYLE
5759 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5760#else
5761 _asm
5762 {
5763 mov ax, [u16]
5764 ror ax, 8
5765 mov [u16], ax
5766 }
5767#endif
5768 return u16;
5769}
5770
5771/**
5772 * Reverse the byte order of the given 32-bit integer.
5773 *
5774 * @returns Revert
5775 * @param u32 32-bit integer value.
5776 */
5777DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5778{
5779#if RT_INLINE_ASM_USES_INTRIN
5780 u32 = _byteswap_ulong(u32);
5781#elif RT_INLINE_ASM_GNU_STYLE
5782 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5783#else
5784 _asm
5785 {
5786 mov eax, [u32]
5787 bswap eax
5788 mov [u32], eax
5789 }
5790#endif
5791 return u32;
5792}
5793
5794
5795/**
5796 * Reverse the byte order of the given 64-bit integer.
5797 *
5798 * @returns Revert
5799 * @param u64 64-bit integer value.
5800 */
5801DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5802{
5803#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5804 u64 = _byteswap_uint64(u64);
5805#else
5806 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5807 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5808#endif
5809 return u64;
5810}
5811
5812
5813/** @} */
5814
5815
5816/** @} */
5817#endif
5818
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette