VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 12142

Last change on this file since 12142 was 12092, checked in by vboxsync, 17 years ago

Compile fixes

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 151.9 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(_BitScanForward)
72# pragma intrinsic(_BitScanReverse)
73# pragma intrinsic(_bittest)
74# pragma intrinsic(_bittestandset)
75# pragma intrinsic(_bittestandreset)
76# pragma intrinsic(_bittestandcomplement)
77# pragma intrinsic(_byteswap_ushort)
78# pragma intrinsic(_byteswap_ulong)
79# pragma intrinsic(_interlockedbittestandset)
80# pragma intrinsic(_interlockedbittestandreset)
81# pragma intrinsic(_InterlockedAnd)
82# pragma intrinsic(_InterlockedOr)
83# pragma intrinsic(_InterlockedIncrement)
84# pragma intrinsic(_InterlockedDecrement)
85# pragma intrinsic(_InterlockedExchange)
86# pragma intrinsic(_InterlockedExchangeAdd)
87# pragma intrinsic(_InterlockedCompareExchange)
88# pragma intrinsic(_InterlockedCompareExchange64)
89# ifdef RT_ARCH_AMD64
90# pragma intrinsic(__stosq)
91# pragma intrinsic(__readcr8)
92# pragma intrinsic(__writecr8)
93# pragma intrinsic(_byteswap_uint64)
94# pragma intrinsic(_InterlockedExchange64)
95# endif
96# endif
97#endif
98#ifndef RT_INLINE_ASM_USES_INTRIN
99# define RT_INLINE_ASM_USES_INTRIN 0
100#endif
101
102
103
104/** @defgroup grp_asm ASM - Assembly Routines
105 * @ingroup grp_rt
106 *
107 * @remarks The difference between ordered and unordered atomic operations are that
108 * the former will complete outstanding reads and writes before continuing
109 * while the latter doesn't make any promisses about the order. Ordered
110 * operations doesn't, it seems, make any 100% promise wrt to whether
111 * the operation will complete before any subsequent memory access.
112 * (please, correct if wrong.)
113 *
114 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
115 * are unordered (note the Uo).
116 *
117 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
118 * or even optimize assembler instructions away. For instance, in the following code
119 * the second rdmsr instruction is optimized away because gcc treats that instruction
120 * as deterministic:
121 *
122 * @code
123 * static inline uint64_t rdmsr_low(int idx)
124 * {
125 * uint32_t low;
126 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
127 * }
128 * ...
129 * uint32_t msr1 = rdmsr_low(1);
130 * foo(msr1);
131 * msr1 = rdmsr_low(1);
132 * bar(msr1);
133 * @endcode
134 *
135 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
136 * use the result of the first call as input parameter for bar() as well. For rdmsr this
137 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143/** @def RT_INLINE_ASM_EXTERNAL
144 * Defined as 1 if the compiler does not support inline assembly.
145 * The ASM* functions will then be implemented in an external .asm file.
146 *
147 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
148 * inline assmebly in their AMD64 compiler.
149 */
150#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
151# define RT_INLINE_ASM_EXTERNAL 1
152#else
153# define RT_INLINE_ASM_EXTERNAL 0
154#endif
155
156/** @def RT_INLINE_ASM_GNU_STYLE
157 * Defined as 1 if the compiler understand GNU style inline assembly.
158 */
159#if defined(_MSC_VER)
160# define RT_INLINE_ASM_GNU_STYLE 0
161#else
162# define RT_INLINE_ASM_GNU_STYLE 1
163#endif
164
165
166/** @todo find a more proper place for this structure? */
167#pragma pack(1)
168/** IDTR */
169typedef struct RTIDTR
170{
171 /** Size of the IDT. */
172 uint16_t cbIdt;
173 /** Address of the IDT. */
174 uintptr_t pIdt;
175} RTIDTR, *PRTIDTR;
176#pragma pack()
177
178#pragma pack(1)
179/** GDTR */
180typedef struct RTGDTR
181{
182 /** Size of the GDT. */
183 uint16_t cbGdt;
184 /** Address of the GDT. */
185 uintptr_t pGdt;
186} RTGDTR, *PRTGDTR;
187#pragma pack()
188
189
190/** @def ASMReturnAddress
191 * Gets the return address of the current (or calling if you like) function or method.
192 */
193#ifdef _MSC_VER
194# ifdef __cplusplus
195extern "C"
196# endif
197void * _ReturnAddress(void);
198# pragma intrinsic(_ReturnAddress)
199# define ASMReturnAddress() _ReturnAddress()
200#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
201# define ASMReturnAddress() __builtin_return_address(0)
202#else
203# error "Unsupported compiler."
204#endif
205
206
207/**
208 * Gets the content of the IDTR CPU register.
209 * @param pIdtr Where to store the IDTR contents.
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
213#else
214DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 sidt [rax]
224# else
225 mov eax, [pIdtr]
226 sidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Sets the content of the IDTR CPU register.
236 * @param pIdtr Where to load the IDTR contents from
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
240#else
241DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 lidt [rax]
251# else
252 mov eax, [pIdtr]
253 lidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Gets the content of the GDTR CPU register.
263 * @param pGdtr Where to store the GDTR contents.
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
267#else
268DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pGdtr]
277 sgdt [rax]
278# else
279 mov eax, [pGdtr]
280 sgdt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287/**
288 * Get the cs register.
289 * @returns cs.
290 */
291#if RT_INLINE_ASM_EXTERNAL
292DECLASM(RTSEL) ASMGetCS(void);
293#else
294DECLINLINE(RTSEL) ASMGetCS(void)
295{
296 RTSEL SelCS;
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
299# else
300 __asm
301 {
302 mov ax, cs
303 mov [SelCS], ax
304 }
305# endif
306 return SelCS;
307}
308#endif
309
310
311/**
312 * Get the DS register.
313 * @returns DS.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetDS(void);
317#else
318DECLINLINE(RTSEL) ASMGetDS(void)
319{
320 RTSEL SelDS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
323# else
324 __asm
325 {
326 mov ax, ds
327 mov [SelDS], ax
328 }
329# endif
330 return SelDS;
331}
332#endif
333
334
335/**
336 * Get the ES register.
337 * @returns ES.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetES(void);
341#else
342DECLINLINE(RTSEL) ASMGetES(void)
343{
344 RTSEL SelES;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
347# else
348 __asm
349 {
350 mov ax, es
351 mov [SelES], ax
352 }
353# endif
354 return SelES;
355}
356#endif
357
358
359/**
360 * Get the FS register.
361 * @returns FS.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetFS(void);
365#else
366DECLINLINE(RTSEL) ASMGetFS(void)
367{
368 RTSEL SelFS;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
371# else
372 __asm
373 {
374 mov ax, fs
375 mov [SelFS], ax
376 }
377# endif
378 return SelFS;
379}
380# endif
381
382
383/**
384 * Get the GS register.
385 * @returns GS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetGS(void);
389#else
390DECLINLINE(RTSEL) ASMGetGS(void)
391{
392 RTSEL SelGS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
395# else
396 __asm
397 {
398 mov ax, gs
399 mov [SelGS], ax
400 }
401# endif
402 return SelGS;
403}
404#endif
405
406
407/**
408 * Get the SS register.
409 * @returns SS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetSS(void);
413#else
414DECLINLINE(RTSEL) ASMGetSS(void)
415{
416 RTSEL SelSS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
419# else
420 __asm
421 {
422 mov ax, ss
423 mov [SelSS], ax
424 }
425# endif
426 return SelSS;
427}
428#endif
429
430
431/**
432 * Get the TR register.
433 * @returns TR.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetTR(void);
437#else
438DECLINLINE(RTSEL) ASMGetTR(void)
439{
440 RTSEL SelTR;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
443# else
444 __asm
445 {
446 str ax
447 mov [SelTR], ax
448 }
449# endif
450 return SelTR;
451}
452#endif
453
454
455/**
456 * Get the [RE]FLAGS register.
457 * @returns [RE]FLAGS.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTCCUINTREG) ASMGetFlags(void);
461#else
462DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
463{
464 RTCCUINTREG uFlags;
465# if RT_INLINE_ASM_GNU_STYLE
466# ifdef RT_ARCH_AMD64
467 __asm__ __volatile__("pushfq\n\t"
468 "popq %0\n\t"
469 : "=g" (uFlags));
470# else
471 __asm__ __volatile__("pushfl\n\t"
472 "popl %0\n\t"
473 : "=g" (uFlags));
474# endif
475# else
476 __asm
477 {
478# ifdef RT_ARCH_AMD64
479 pushfq
480 pop [uFlags]
481# else
482 pushfd
483 pop [uFlags]
484# endif
485 }
486# endif
487 return uFlags;
488}
489#endif
490
491
492/**
493 * Set the [RE]FLAGS register.
494 * @param uFlags The new [RE]FLAGS value.
495 */
496#if RT_INLINE_ASM_EXTERNAL
497DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
498#else
499DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
500{
501# if RT_INLINE_ASM_GNU_STYLE
502# ifdef RT_ARCH_AMD64
503 __asm__ __volatile__("pushq %0\n\t"
504 "popfq\n\t"
505 : : "g" (uFlags));
506# else
507 __asm__ __volatile__("pushl %0\n\t"
508 "popfl\n\t"
509 : : "g" (uFlags));
510# endif
511# else
512 __asm
513 {
514# ifdef RT_ARCH_AMD64
515 push [uFlags]
516 popfq
517# else
518 push [uFlags]
519 popfd
520# endif
521 }
522# endif
523}
524#endif
525
526
527/**
528 * Gets the content of the CPU timestamp counter register.
529 *
530 * @returns TSC.
531 */
532#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
533DECLASM(uint64_t) ASMReadTSC(void);
534#else
535DECLINLINE(uint64_t) ASMReadTSC(void)
536{
537 RTUINT64U u;
538# if RT_INLINE_ASM_GNU_STYLE
539 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
540# else
541# if RT_INLINE_ASM_USES_INTRIN
542 u.u = __rdtsc();
543# else
544 __asm
545 {
546 rdtsc
547 mov [u.s.Lo], eax
548 mov [u.s.Hi], edx
549 }
550# endif
551# endif
552 return u.u;
553}
554#endif
555
556
557/**
558 * Performs the cpuid instruction returning all registers.
559 *
560 * @param uOperator CPUID operation (eax).
561 * @param pvEAX Where to store eax.
562 * @param pvEBX Where to store ebx.
563 * @param pvECX Where to store ecx.
564 * @param pvEDX Where to store edx.
565 * @remark We're using void pointers to ease the use of special bitfield structures and such.
566 */
567#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
568DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
569#else
570DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
571{
572# if RT_INLINE_ASM_GNU_STYLE
573# ifdef RT_ARCH_AMD64
574 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
575 __asm__ ("cpuid\n\t"
576 : "=a" (uRAX),
577 "=b" (uRBX),
578 "=c" (uRCX),
579 "=d" (uRDX)
580 : "0" (uOperator));
581 *(uint32_t *)pvEAX = (uint32_t)uRAX;
582 *(uint32_t *)pvEBX = (uint32_t)uRBX;
583 *(uint32_t *)pvECX = (uint32_t)uRCX;
584 *(uint32_t *)pvEDX = (uint32_t)uRDX;
585# else
586 __asm__ ("xchgl %%ebx, %1\n\t"
587 "cpuid\n\t"
588 "xchgl %%ebx, %1\n\t"
589 : "=a" (*(uint32_t *)pvEAX),
590 "=r" (*(uint32_t *)pvEBX),
591 "=c" (*(uint32_t *)pvECX),
592 "=d" (*(uint32_t *)pvEDX)
593 : "0" (uOperator));
594# endif
595
596# elif RT_INLINE_ASM_USES_INTRIN
597 int aInfo[4];
598 __cpuid(aInfo, uOperator);
599 *(uint32_t *)pvEAX = aInfo[0];
600 *(uint32_t *)pvEBX = aInfo[1];
601 *(uint32_t *)pvECX = aInfo[2];
602 *(uint32_t *)pvEDX = aInfo[3];
603
604# else
605 uint32_t uEAX;
606 uint32_t uEBX;
607 uint32_t uECX;
608 uint32_t uEDX;
609 __asm
610 {
611 push ebx
612 mov eax, [uOperator]
613 cpuid
614 mov [uEAX], eax
615 mov [uEBX], ebx
616 mov [uECX], ecx
617 mov [uEDX], edx
618 pop ebx
619 }
620 *(uint32_t *)pvEAX = uEAX;
621 *(uint32_t *)pvEBX = uEBX;
622 *(uint32_t *)pvECX = uECX;
623 *(uint32_t *)pvEDX = uEDX;
624# endif
625}
626#endif
627
628
629/**
630 * Performs the cpuid instruction returning all registers.
631 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
632 *
633 * @param uOperator CPUID operation (eax).
634 * @param uIdxECX ecx index
635 * @param pvEAX Where to store eax.
636 * @param pvEBX Where to store ebx.
637 * @param pvECX Where to store ecx.
638 * @param pvEDX Where to store edx.
639 * @remark We're using void pointers to ease the use of special bitfield structures and such.
640 */
641#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
642DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
643#else
644DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
645{
646# if RT_INLINE_ASM_GNU_STYLE
647# ifdef RT_ARCH_AMD64
648 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
649 __asm__ ("cpuid\n\t"
650 : "=a" (uRAX),
651 "=b" (uRBX),
652 "=c" (uRCX),
653 "=d" (uRDX)
654 : "0" (uOperator),
655 "2" (uIdxECX));
656 *(uint32_t *)pvEAX = (uint32_t)uRAX;
657 *(uint32_t *)pvEBX = (uint32_t)uRBX;
658 *(uint32_t *)pvECX = (uint32_t)uRCX;
659 *(uint32_t *)pvEDX = (uint32_t)uRDX;
660# else
661 __asm__ ("xchgl %%ebx, %1\n\t"
662 "cpuid\n\t"
663 "xchgl %%ebx, %1\n\t"
664 : "=a" (*(uint32_t *)pvEAX),
665 "=r" (*(uint32_t *)pvEBX),
666 "=c" (*(uint32_t *)pvECX),
667 "=d" (*(uint32_t *)pvEDX)
668 : "0" (uOperator),
669 "2" (uIdxECX));
670# endif
671
672# elif RT_INLINE_ASM_USES_INTRIN
673 int aInfo[4];
674 /* ??? another intrinsic ??? */
675 __cpuid(aInfo, uOperator);
676 *(uint32_t *)pvEAX = aInfo[0];
677 *(uint32_t *)pvEBX = aInfo[1];
678 *(uint32_t *)pvECX = aInfo[2];
679 *(uint32_t *)pvEDX = aInfo[3];
680
681# else
682 uint32_t uEAX;
683 uint32_t uEBX;
684 uint32_t uECX;
685 uint32_t uEDX;
686 __asm
687 {
688 push ebx
689 mov eax, [uOperator]
690 mov ecx, [uIdxECX]
691 cpuid
692 mov [uEAX], eax
693 mov [uEBX], ebx
694 mov [uECX], ecx
695 mov [uEDX], edx
696 pop ebx
697 }
698 *(uint32_t *)pvEAX = uEAX;
699 *(uint32_t *)pvEBX = uEBX;
700 *(uint32_t *)pvECX = uECX;
701 *(uint32_t *)pvEDX = uEDX;
702# endif
703}
704#endif
705
706
707/**
708 * Performs the cpuid instruction returning ecx and edx.
709 *
710 * @param uOperator CPUID operation (eax).
711 * @param pvECX Where to store ecx.
712 * @param pvEDX Where to store edx.
713 * @remark We're using void pointers to ease the use of special bitfield structures and such.
714 */
715#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
716DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
717#else
718DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
719{
720 uint32_t uEBX;
721 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
722}
723#endif
724
725
726/**
727 * Performs the cpuid instruction returning edx.
728 *
729 * @param uOperator CPUID operation (eax).
730 * @returns EDX after cpuid operation.
731 */
732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
733DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
734#else
735DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
736{
737 RTCCUINTREG xDX;
738# if RT_INLINE_ASM_GNU_STYLE
739# ifdef RT_ARCH_AMD64
740 RTCCUINTREG uSpill;
741 __asm__ ("cpuid"
742 : "=a" (uSpill),
743 "=d" (xDX)
744 : "0" (uOperator)
745 : "rbx", "rcx");
746# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
747 __asm__ ("push %%ebx\n\t"
748 "cpuid\n\t"
749 "pop %%ebx\n\t"
750 : "=a" (uOperator),
751 "=d" (xDX)
752 : "0" (uOperator)
753 : "ecx");
754# else
755 __asm__ ("cpuid"
756 : "=a" (uOperator),
757 "=d" (xDX)
758 : "0" (uOperator)
759 : "ebx", "ecx");
760# endif
761
762# elif RT_INLINE_ASM_USES_INTRIN
763 int aInfo[4];
764 __cpuid(aInfo, uOperator);
765 xDX = aInfo[3];
766
767# else
768 __asm
769 {
770 push ebx
771 mov eax, [uOperator]
772 cpuid
773 mov [xDX], edx
774 pop ebx
775 }
776# endif
777 return (uint32_t)xDX;
778}
779#endif
780
781
782/**
783 * Performs the cpuid instruction returning ecx.
784 *
785 * @param uOperator CPUID operation (eax).
786 * @returns ECX after cpuid operation.
787 */
788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
789DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
790#else
791DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
792{
793 RTCCUINTREG xCX;
794# if RT_INLINE_ASM_GNU_STYLE
795# ifdef RT_ARCH_AMD64
796 RTCCUINTREG uSpill;
797 __asm__ ("cpuid"
798 : "=a" (uSpill),
799 "=c" (xCX)
800 : "0" (uOperator)
801 : "rbx", "rdx");
802# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
803 __asm__ ("push %%ebx\n\t"
804 "cpuid\n\t"
805 "pop %%ebx\n\t"
806 : "=a" (uOperator),
807 "=c" (xCX)
808 : "0" (uOperator)
809 : "edx");
810# else
811 __asm__ ("cpuid"
812 : "=a" (uOperator),
813 "=c" (xCX)
814 : "0" (uOperator)
815 : "ebx", "edx");
816
817# endif
818
819# elif RT_INLINE_ASM_USES_INTRIN
820 int aInfo[4];
821 __cpuid(aInfo, uOperator);
822 xCX = aInfo[2];
823
824# else
825 __asm
826 {
827 push ebx
828 mov eax, [uOperator]
829 cpuid
830 mov [xCX], ecx
831 pop ebx
832 }
833# endif
834 return (uint32_t)xCX;
835}
836#endif
837
838
839/**
840 * Checks if the current CPU supports CPUID.
841 *
842 * @returns true if CPUID is supported.
843 */
844DECLINLINE(bool) ASMHasCpuId(void)
845{
846#ifdef RT_ARCH_AMD64
847 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
848#else /* !RT_ARCH_AMD64 */
849 bool fRet = false;
850# if RT_INLINE_ASM_GNU_STYLE
851 uint32_t u1;
852 uint32_t u2;
853 __asm__ ("pushf\n\t"
854 "pop %1\n\t"
855 "mov %1, %2\n\t"
856 "xorl $0x200000, %1\n\t"
857 "push %1\n\t"
858 "popf\n\t"
859 "pushf\n\t"
860 "pop %1\n\t"
861 "cmpl %1, %2\n\t"
862 "setne %0\n\t"
863 "push %2\n\t"
864 "popf\n\t"
865 : "=m" (fRet), "=r" (u1), "=r" (u2));
866# else
867 __asm
868 {
869 pushfd
870 pop eax
871 mov ebx, eax
872 xor eax, 0200000h
873 push eax
874 popfd
875 pushfd
876 pop eax
877 cmp eax, ebx
878 setne fRet
879 push ebx
880 popfd
881 }
882# endif
883 return fRet;
884#endif /* !RT_ARCH_AMD64 */
885}
886
887
888/**
889 * Gets the APIC ID of the current CPU.
890 *
891 * @returns the APIC ID.
892 */
893#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
894DECLASM(uint8_t) ASMGetApicId(void);
895#else
896DECLINLINE(uint8_t) ASMGetApicId(void)
897{
898 RTCCUINTREG xBX;
899# if RT_INLINE_ASM_GNU_STYLE
900# ifdef RT_ARCH_AMD64
901 RTCCUINTREG uSpill;
902 __asm__ ("cpuid"
903 : "=a" (uSpill),
904 "=b" (xBX)
905 : "0" (1)
906 : "rcx", "rdx");
907# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
908 RTCCUINTREG uSpill;
909 __asm__ ("mov %%ebx,%1\n\t"
910 "cpuid\n\t"
911 "xchgl %%ebx,%1\n\t"
912 : "=a" (uSpill),
913 "=r" (xBX)
914 : "0" (1)
915 : "ecx", "edx");
916# else
917 RTCCUINTREG uSpill;
918 __asm__ ("cpuid"
919 : "=a" (uSpill),
920 "=b" (xBX)
921 : "0" (1)
922 : "ecx", "edx");
923# endif
924
925# elif RT_INLINE_ASM_USES_INTRIN
926 int aInfo[4];
927 __cpuid(aInfo, 1);
928 xBX = aInfo[1];
929
930# else
931 __asm
932 {
933 push ebx
934 mov eax, 1
935 cpuid
936 mov [xBX], ebx
937 pop ebx
938 }
939# endif
940 return (uint8_t)(xBX >> 24);
941}
942#endif
943
944
945/**
946 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
947 *
948 * @returns true/false.
949 * @param uEBX EBX return from ASMCpuId(0)
950 * @param uECX ECX return from ASMCpuId(0)
951 * @param uEDX EDX return from ASMCpuId(0)
952 */
953DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
954{
955 return uEBX == 0x756e6547
956 || uECX == 0x6c65746e
957 || uEDX == 0x49656e69;
958}
959
960
961/**
962 * Tests if this is an genuin Intel CPU.
963 *
964 * @returns true/false.
965 */
966DECLINLINE(bool) ASMIsIntelCpu(void)
967{
968 uint32_t uEAX, uEBX, uECX, uEDX;
969 ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX);
970 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
971}
972
973
974/**
975 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
976 *
977 * @returns Family.
978 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
979 */
980DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
981{
982 return ((uEAX >> 8) & 0xf) == 0xf
983 ? ((uEAX >> 20) & 0x7f) + 0xf
984 : ((uEAX >> 8) & 0xf);
985}
986
987
988/**
989 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
990 *
991 * @returns Model.
992 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
993 * @param fIntel Whether it's an intel CPU.
994 */
995DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
996{
997 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
998 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
999 : ((uEAX >> 4) & 0xf);
1000}
1001
1002
1003/**
1004 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1005 *
1006 * @returns Model.
1007 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1008 * @param fIntel Whether it's an intel CPU.
1009 */
1010DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1011{
1012 return ((uEAX >> 8) & 0xf) == 0xf
1013 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1014 : ((uEAX >> 4) & 0xf);
1015}
1016
1017
1018/**
1019 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1020 *
1021 * @returns Model.
1022 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1023 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1024 */
1025DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1026{
1027 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1028 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1029 : ((uEAX >> 4) & 0xf);
1030}
1031
1032
1033/**
1034 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1035 *
1036 * @returns Model.
1037 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1038 */
1039DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1040{
1041 return uEAX & 0xf;
1042}
1043
1044
1045/**
1046 * Get cr0.
1047 * @returns cr0.
1048 */
1049#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1050DECLASM(RTCCUINTREG) ASMGetCR0(void);
1051#else
1052DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1053{
1054 RTCCUINTREG uCR0;
1055# if RT_INLINE_ASM_USES_INTRIN
1056 uCR0 = __readcr0();
1057
1058# elif RT_INLINE_ASM_GNU_STYLE
1059# ifdef RT_ARCH_AMD64
1060 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1061# else
1062 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1063# endif
1064# else
1065 __asm
1066 {
1067# ifdef RT_ARCH_AMD64
1068 mov rax, cr0
1069 mov [uCR0], rax
1070# else
1071 mov eax, cr0
1072 mov [uCR0], eax
1073# endif
1074 }
1075# endif
1076 return uCR0;
1077}
1078#endif
1079
1080
1081/**
1082 * Sets the CR0 register.
1083 * @param uCR0 The new CR0 value.
1084 */
1085#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1086DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1087#else
1088DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1089{
1090# if RT_INLINE_ASM_USES_INTRIN
1091 __writecr0(uCR0);
1092
1093# elif RT_INLINE_ASM_GNU_STYLE
1094# ifdef RT_ARCH_AMD64
1095 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1096# else
1097 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1098# endif
1099# else
1100 __asm
1101 {
1102# ifdef RT_ARCH_AMD64
1103 mov rax, [uCR0]
1104 mov cr0, rax
1105# else
1106 mov eax, [uCR0]
1107 mov cr0, eax
1108# endif
1109 }
1110# endif
1111}
1112#endif
1113
1114
1115/**
1116 * Get cr2.
1117 * @returns cr2.
1118 */
1119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1120DECLASM(RTCCUINTREG) ASMGetCR2(void);
1121#else
1122DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1123{
1124 RTCCUINTREG uCR2;
1125# if RT_INLINE_ASM_USES_INTRIN
1126 uCR2 = __readcr2();
1127
1128# elif RT_INLINE_ASM_GNU_STYLE
1129# ifdef RT_ARCH_AMD64
1130 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1131# else
1132 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1133# endif
1134# else
1135 __asm
1136 {
1137# ifdef RT_ARCH_AMD64
1138 mov rax, cr2
1139 mov [uCR2], rax
1140# else
1141 mov eax, cr2
1142 mov [uCR2], eax
1143# endif
1144 }
1145# endif
1146 return uCR2;
1147}
1148#endif
1149
1150
1151/**
1152 * Sets the CR2 register.
1153 * @param uCR2 The new CR0 value.
1154 */
1155#if RT_INLINE_ASM_EXTERNAL
1156DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1157#else
1158DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1159{
1160# if RT_INLINE_ASM_GNU_STYLE
1161# ifdef RT_ARCH_AMD64
1162 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1163# else
1164 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1165# endif
1166# else
1167 __asm
1168 {
1169# ifdef RT_ARCH_AMD64
1170 mov rax, [uCR2]
1171 mov cr2, rax
1172# else
1173 mov eax, [uCR2]
1174 mov cr2, eax
1175# endif
1176 }
1177# endif
1178}
1179#endif
1180
1181
1182/**
1183 * Get cr3.
1184 * @returns cr3.
1185 */
1186#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1187DECLASM(RTCCUINTREG) ASMGetCR3(void);
1188#else
1189DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1190{
1191 RTCCUINTREG uCR3;
1192# if RT_INLINE_ASM_USES_INTRIN
1193 uCR3 = __readcr3();
1194
1195# elif RT_INLINE_ASM_GNU_STYLE
1196# ifdef RT_ARCH_AMD64
1197 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1198# else
1199 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1200# endif
1201# else
1202 __asm
1203 {
1204# ifdef RT_ARCH_AMD64
1205 mov rax, cr3
1206 mov [uCR3], rax
1207# else
1208 mov eax, cr3
1209 mov [uCR3], eax
1210# endif
1211 }
1212# endif
1213 return uCR3;
1214}
1215#endif
1216
1217
1218/**
1219 * Sets the CR3 register.
1220 *
1221 * @param uCR3 New CR3 value.
1222 */
1223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1224DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1225#else
1226DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1227{
1228# if RT_INLINE_ASM_USES_INTRIN
1229 __writecr3(uCR3);
1230
1231# elif RT_INLINE_ASM_GNU_STYLE
1232# ifdef RT_ARCH_AMD64
1233 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1234# else
1235 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1236# endif
1237# else
1238 __asm
1239 {
1240# ifdef RT_ARCH_AMD64
1241 mov rax, [uCR3]
1242 mov cr3, rax
1243# else
1244 mov eax, [uCR3]
1245 mov cr3, eax
1246# endif
1247 }
1248# endif
1249}
1250#endif
1251
1252
1253/**
1254 * Reloads the CR3 register.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(void) ASMReloadCR3(void);
1258#else
1259DECLINLINE(void) ASMReloadCR3(void)
1260{
1261# if RT_INLINE_ASM_USES_INTRIN
1262 __writecr3(__readcr3());
1263
1264# elif RT_INLINE_ASM_GNU_STYLE
1265 RTCCUINTREG u;
1266# ifdef RT_ARCH_AMD64
1267 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1268 "movq %0, %%cr3\n\t"
1269 : "=r" (u));
1270# else
1271 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1272 "movl %0, %%cr3\n\t"
1273 : "=r" (u));
1274# endif
1275# else
1276 __asm
1277 {
1278# ifdef RT_ARCH_AMD64
1279 mov rax, cr3
1280 mov cr3, rax
1281# else
1282 mov eax, cr3
1283 mov cr3, eax
1284# endif
1285 }
1286# endif
1287}
1288#endif
1289
1290
1291/**
1292 * Get cr4.
1293 * @returns cr4.
1294 */
1295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1296DECLASM(RTCCUINTREG) ASMGetCR4(void);
1297#else
1298DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1299{
1300 RTCCUINTREG uCR4;
1301# if RT_INLINE_ASM_USES_INTRIN
1302 uCR4 = __readcr4();
1303
1304# elif RT_INLINE_ASM_GNU_STYLE
1305# ifdef RT_ARCH_AMD64
1306 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1307# else
1308 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1309# endif
1310# else
1311 __asm
1312 {
1313# ifdef RT_ARCH_AMD64
1314 mov rax, cr4
1315 mov [uCR4], rax
1316# else
1317 push eax /* just in case */
1318 /*mov eax, cr4*/
1319 _emit 0x0f
1320 _emit 0x20
1321 _emit 0xe0
1322 mov [uCR4], eax
1323 pop eax
1324# endif
1325 }
1326# endif
1327 return uCR4;
1328}
1329#endif
1330
1331
1332/**
1333 * Sets the CR4 register.
1334 *
1335 * @param uCR4 New CR4 value.
1336 */
1337#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1338DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1339#else
1340DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1341{
1342# if RT_INLINE_ASM_USES_INTRIN
1343 __writecr4(uCR4);
1344
1345# elif RT_INLINE_ASM_GNU_STYLE
1346# ifdef RT_ARCH_AMD64
1347 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1348# else
1349 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1350# endif
1351# else
1352 __asm
1353 {
1354# ifdef RT_ARCH_AMD64
1355 mov rax, [uCR4]
1356 mov cr4, rax
1357# else
1358 mov eax, [uCR4]
1359 _emit 0x0F
1360 _emit 0x22
1361 _emit 0xE0 /* mov cr4, eax */
1362# endif
1363 }
1364# endif
1365}
1366#endif
1367
1368
1369/**
1370 * Get cr8.
1371 * @returns cr8.
1372 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1373 */
1374#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1375DECLASM(RTCCUINTREG) ASMGetCR8(void);
1376#else
1377DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1378{
1379# ifdef RT_ARCH_AMD64
1380 RTCCUINTREG uCR8;
1381# if RT_INLINE_ASM_USES_INTRIN
1382 uCR8 = __readcr8();
1383
1384# elif RT_INLINE_ASM_GNU_STYLE
1385 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1386# else
1387 __asm
1388 {
1389 mov rax, cr8
1390 mov [uCR8], rax
1391 }
1392# endif
1393 return uCR8;
1394# else /* !RT_ARCH_AMD64 */
1395 return 0;
1396# endif /* !RT_ARCH_AMD64 */
1397}
1398#endif
1399
1400
1401/**
1402 * Enables interrupts (EFLAGS.IF).
1403 */
1404#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1405DECLASM(void) ASMIntEnable(void);
1406#else
1407DECLINLINE(void) ASMIntEnable(void)
1408{
1409# if RT_INLINE_ASM_GNU_STYLE
1410 __asm("sti\n");
1411# elif RT_INLINE_ASM_USES_INTRIN
1412 _enable();
1413# else
1414 __asm sti
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Disables interrupts (!EFLAGS.IF).
1422 */
1423#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1424DECLASM(void) ASMIntDisable(void);
1425#else
1426DECLINLINE(void) ASMIntDisable(void)
1427{
1428# if RT_INLINE_ASM_GNU_STYLE
1429 __asm("cli\n");
1430# elif RT_INLINE_ASM_USES_INTRIN
1431 _disable();
1432# else
1433 __asm cli
1434# endif
1435}
1436#endif
1437
1438
1439/**
1440 * Disables interrupts and returns previous xFLAGS.
1441 */
1442#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1443DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1444#else
1445DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1446{
1447 RTCCUINTREG xFlags;
1448# if RT_INLINE_ASM_GNU_STYLE
1449# ifdef RT_ARCH_AMD64
1450 __asm__ __volatile__("pushfq\n\t"
1451 "cli\n\t"
1452 "popq %0\n\t"
1453 : "=rm" (xFlags));
1454# else
1455 __asm__ __volatile__("pushfl\n\t"
1456 "cli\n\t"
1457 "popl %0\n\t"
1458 : "=rm" (xFlags));
1459# endif
1460# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1461 xFlags = ASMGetFlags();
1462 _disable();
1463# else
1464 __asm {
1465 pushfd
1466 cli
1467 pop [xFlags]
1468 }
1469# endif
1470 return xFlags;
1471}
1472#endif
1473
1474
1475/**
1476 * Reads a machine specific register.
1477 *
1478 * @returns Register content.
1479 * @param uRegister Register to read.
1480 */
1481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1482DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1483#else
1484DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1485{
1486 RTUINT64U u;
1487# if RT_INLINE_ASM_GNU_STYLE
1488 __asm__ __volatile__("rdmsr\n\t"
1489 : "=a" (u.s.Lo),
1490 "=d" (u.s.Hi)
1491 : "c" (uRegister));
1492
1493# elif RT_INLINE_ASM_USES_INTRIN
1494 u.u = __readmsr(uRegister);
1495
1496# else
1497 __asm
1498 {
1499 mov ecx, [uRegister]
1500 rdmsr
1501 mov [u.s.Lo], eax
1502 mov [u.s.Hi], edx
1503 }
1504# endif
1505
1506 return u.u;
1507}
1508#endif
1509
1510
1511/**
1512 * Writes a machine specific register.
1513 *
1514 * @returns Register content.
1515 * @param uRegister Register to write to.
1516 * @param u64Val Value to write.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1519DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1520#else
1521DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1522{
1523 RTUINT64U u;
1524
1525 u.u = u64Val;
1526# if RT_INLINE_ASM_GNU_STYLE
1527 __asm__ __volatile__("wrmsr\n\t"
1528 ::"a" (u.s.Lo),
1529 "d" (u.s.Hi),
1530 "c" (uRegister));
1531
1532# elif RT_INLINE_ASM_USES_INTRIN
1533 __writemsr(uRegister, u.u);
1534
1535# else
1536 __asm
1537 {
1538 mov ecx, [uRegister]
1539 mov edx, [u.s.Hi]
1540 mov eax, [u.s.Lo]
1541 wrmsr
1542 }
1543# endif
1544}
1545#endif
1546
1547
1548/**
1549 * Reads low part of a machine specific register.
1550 *
1551 * @returns Register content.
1552 * @param uRegister Register to read.
1553 */
1554#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1555DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1556#else
1557DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1558{
1559 uint32_t u32;
1560# if RT_INLINE_ASM_GNU_STYLE
1561 __asm__ __volatile__("rdmsr\n\t"
1562 : "=a" (u32)
1563 : "c" (uRegister)
1564 : "edx");
1565
1566# elif RT_INLINE_ASM_USES_INTRIN
1567 u32 = (uint32_t)__readmsr(uRegister);
1568
1569#else
1570 __asm
1571 {
1572 mov ecx, [uRegister]
1573 rdmsr
1574 mov [u32], eax
1575 }
1576# endif
1577
1578 return u32;
1579}
1580#endif
1581
1582
1583/**
1584 * Reads high part of a machine specific register.
1585 *
1586 * @returns Register content.
1587 * @param uRegister Register to read.
1588 */
1589#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1590DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1591#else
1592DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1593{
1594 uint32_t u32;
1595# if RT_INLINE_ASM_GNU_STYLE
1596 __asm__ __volatile__("rdmsr\n\t"
1597 : "=d" (u32)
1598 : "c" (uRegister)
1599 : "eax");
1600
1601# elif RT_INLINE_ASM_USES_INTRIN
1602 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1603
1604# else
1605 __asm
1606 {
1607 mov ecx, [uRegister]
1608 rdmsr
1609 mov [u32], edx
1610 }
1611# endif
1612
1613 return u32;
1614}
1615#endif
1616
1617
1618/**
1619 * Gets dr7.
1620 *
1621 * @returns dr7.
1622 */
1623#if RT_INLINE_ASM_EXTERNAL
1624DECLASM(RTCCUINTREG) ASMGetDR7(void);
1625#else
1626DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1627{
1628 RTCCUINTREG uDR7;
1629# if RT_INLINE_ASM_GNU_STYLE
1630# ifdef RT_ARCH_AMD64
1631 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1632# else
1633 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1634# endif
1635# else
1636 __asm
1637 {
1638# ifdef RT_ARCH_AMD64
1639 mov rax, dr7
1640 mov [uDR7], rax
1641# else
1642 mov eax, dr7
1643 mov [uDR7], eax
1644# endif
1645 }
1646# endif
1647 return uDR7;
1648}
1649#endif
1650
1651
1652/**
1653 * Gets dr6.
1654 *
1655 * @returns dr6.
1656 */
1657#if RT_INLINE_ASM_EXTERNAL
1658DECLASM(RTCCUINTREG) ASMGetDR6(void);
1659#else
1660DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1661{
1662 RTCCUINTREG uDR6;
1663# if RT_INLINE_ASM_GNU_STYLE
1664# ifdef RT_ARCH_AMD64
1665 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1666# else
1667 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1668# endif
1669# else
1670 __asm
1671 {
1672# ifdef RT_ARCH_AMD64
1673 mov rax, dr6
1674 mov [uDR6], rax
1675# else
1676 mov eax, dr6
1677 mov [uDR6], eax
1678# endif
1679 }
1680# endif
1681 return uDR6;
1682}
1683#endif
1684
1685
1686/**
1687 * Reads and clears DR6.
1688 *
1689 * @returns DR6.
1690 */
1691#if RT_INLINE_ASM_EXTERNAL
1692DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1693#else
1694DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1695{
1696 RTCCUINTREG uDR6;
1697# if RT_INLINE_ASM_GNU_STYLE
1698 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1699# ifdef RT_ARCH_AMD64
1700 __asm__ __volatile__("movq %%dr6, %0\n\t"
1701 "movq %1, %%dr6\n\t"
1702 : "=r" (uDR6)
1703 : "r" (uNewValue));
1704# else
1705 __asm__ __volatile__("movl %%dr6, %0\n\t"
1706 "movl %1, %%dr6\n\t"
1707 : "=r" (uDR6)
1708 : "r" (uNewValue));
1709# endif
1710# else
1711 __asm
1712 {
1713# ifdef RT_ARCH_AMD64
1714 mov rax, dr6
1715 mov [uDR6], rax
1716 mov rcx, rax
1717 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1718 mov dr6, rcx
1719# else
1720 mov eax, dr6
1721 mov [uDR6], eax
1722 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1723 mov dr6, ecx
1724# endif
1725 }
1726# endif
1727 return uDR6;
1728}
1729#endif
1730
1731/**
1732 * Gets dr0.
1733 *
1734 * @returns dr0.
1735 */
1736#if RT_INLINE_ASM_EXTERNAL
1737DECLASM(RTCCUINTREG) ASMGetDR0(void);
1738#else
1739DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1740{
1741 RTCCUINTREG uDR0;
1742# if RT_INLINE_ASM_GNU_STYLE
1743# ifdef RT_ARCH_AMD64
1744 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1745# else
1746 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1747# endif
1748# else
1749 __asm
1750 {
1751# ifdef RT_ARCH_AMD64
1752 mov rax, dr0
1753 mov [uDR0], rax
1754# else
1755 mov eax, dr0
1756 mov [uDR0], eax
1757# endif
1758 }
1759# endif
1760 return uDR0;
1761}
1762#endif
1763
1764
1765/**
1766 * Gets dr1.
1767 *
1768 * @returns dr1.
1769 */
1770#if RT_INLINE_ASM_EXTERNAL
1771DECLASM(RTCCUINTREG) ASMGetDR1(void);
1772#else
1773DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1774{
1775 RTCCUINTREG uDR1;
1776# if RT_INLINE_ASM_GNU_STYLE
1777# ifdef RT_ARCH_AMD64
1778 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1779# else
1780 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1781# endif
1782# else
1783 __asm
1784 {
1785# ifdef RT_ARCH_AMD64
1786 mov rax, dr1
1787 mov [uDR1], rax
1788# else
1789 mov eax, dr1
1790 mov [uDR1], eax
1791# endif
1792 }
1793# endif
1794 return uDR1;
1795}
1796#endif
1797
1798/**
1799 * Gets dr2.
1800 *
1801 * @returns dr2.
1802 */
1803#if RT_INLINE_ASM_EXTERNAL
1804DECLASM(RTCCUINTREG) ASMGetDR2(void);
1805#else
1806DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1807{
1808 RTCCUINTREG uDR2;
1809# if RT_INLINE_ASM_GNU_STYLE
1810# ifdef RT_ARCH_AMD64
1811 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1812# else
1813 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1814# endif
1815# else
1816 __asm
1817 {
1818# ifdef RT_ARCH_AMD64
1819 mov rax, dr2
1820 mov [uDR2], rax
1821# else
1822 mov eax, dr2
1823 mov [uDR2], eax
1824# endif
1825 }
1826# endif
1827 return uDR2;
1828}
1829#endif
1830
1831/**
1832 * Gets dr3.
1833 *
1834 * @returns dr3.
1835 */
1836#if RT_INLINE_ASM_EXTERNAL
1837DECLASM(RTCCUINTREG) ASMGetDR3(void);
1838#else
1839DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1840{
1841 RTCCUINTREG uDR3;
1842# if RT_INLINE_ASM_GNU_STYLE
1843# ifdef RT_ARCH_AMD64
1844 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1845# else
1846 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1847# endif
1848# else
1849 __asm
1850 {
1851# ifdef RT_ARCH_AMD64
1852 mov rax, dr3
1853 mov [uDR3], rax
1854# else
1855 mov eax, dr3
1856 mov [uDR3], eax
1857# endif
1858 }
1859# endif
1860 return uDR3;
1861}
1862#endif
1863
1864/**
1865 * Sets dr0.
1866 *
1867 * @param uDRVal Debug register value to write
1868 */
1869#if RT_INLINE_ASM_EXTERNAL
1870DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1871#else
1872DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1873{
1874# if RT_INLINE_ASM_GNU_STYLE
1875# ifdef RT_ARCH_AMD64
1876 __asm__ __volatile__("movq %0, %%dr0\n\t" : "=r" (uDRVal));
1877# else
1878 __asm__ __volatile__("movl %0, %%dr0\n\t" : "=r" (uDRVal));
1879# endif
1880# else
1881 __asm
1882 {
1883# ifdef RT_ARCH_AMD64
1884 mov dr0, [uDRVal]
1885# else
1886 mov eax, [uDRVal]
1887 mov dr0, eax
1888# endif
1889 }
1890# endif
1891}
1892#endif
1893
1894/**
1895 * Sets dr1.
1896 *
1897 * @param uDRVal Debug register value to write
1898 */
1899#if RT_INLINE_ASM_EXTERNAL
1900DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1901#else
1902DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1903{
1904# if RT_INLINE_ASM_GNU_STYLE
1905# ifdef RT_ARCH_AMD64
1906 __asm__ __volatile__("movq %0, %%dr1\n\t" : "=r" (uDRVal));
1907# else
1908 __asm__ __volatile__("movl %0, %%dr1\n\t" : "=r" (uDRVal));
1909# endif
1910# else
1911 __asm
1912 {
1913# ifdef RT_ARCH_AMD64
1914 mov dr1, [uDRVal]
1915# else
1916 mov eax, [uDRVal]
1917 mov dr1, eax
1918# endif
1919 }
1920# endif
1921}
1922#endif
1923
1924/**
1925 * Sets dr2.
1926 *
1927 * @param uDRVal Debug register value to write
1928 */
1929#if RT_INLINE_ASM_EXTERNAL
1930DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
1931#else
1932DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
1933{
1934# if RT_INLINE_ASM_GNU_STYLE
1935# ifdef RT_ARCH_AMD64
1936 __asm__ __volatile__("movq %0, %%dr2\n\t" : "=r" (uDRVal));
1937# else
1938 __asm__ __volatile__("movl %0, %%dr2\n\t" : "=r" (uDRVal));
1939# endif
1940# else
1941 __asm
1942 {
1943# ifdef RT_ARCH_AMD64
1944 mov dr2, [uDRVal]
1945# else
1946 mov eax, [uDRVal]
1947 mov dr2, eax
1948# endif
1949 }
1950# endif
1951}
1952#endif
1953
1954/**
1955 * Sets dr3.
1956 *
1957 * @param uDRVal Debug register value to write
1958 */
1959#if RT_INLINE_ASM_EXTERNAL
1960DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
1961#else
1962DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
1963{
1964# if RT_INLINE_ASM_GNU_STYLE
1965# ifdef RT_ARCH_AMD64
1966 __asm__ __volatile__("movq %0, %%dr3\n\t" : "=r" (uDRVal));
1967# else
1968 __asm__ __volatile__("movl %0, %%dr3\n\t" : "=r" (uDRVal));
1969# endif
1970# else
1971 __asm
1972 {
1973# ifdef RT_ARCH_AMD64
1974 mov dr3, [uDRVal]
1975# else
1976 mov eax, [uDRVal]
1977 mov dr3, eax
1978# endif
1979 }
1980# endif
1981}
1982#endif
1983
1984/**
1985 * Sets dr6.
1986 *
1987 * @param uDRVal Debug register value to write
1988 */
1989#if RT_INLINE_ASM_EXTERNAL
1990DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
1991#else
1992DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
1993{
1994# if RT_INLINE_ASM_GNU_STYLE
1995# ifdef RT_ARCH_AMD64
1996 __asm__ __volatile__("movq %0, %%dr6\n\t" : "=r" (uDRVal));
1997# else
1998 __asm__ __volatile__("movl %0, %%dr6\n\t" : "=r" (uDRVal));
1999# endif
2000# else
2001 __asm
2002 {
2003# ifdef RT_ARCH_AMD64
2004 mov dr6, [uDRVal]
2005# else
2006 mov eax, [uDRVal]
2007 mov dr6, eax
2008# endif
2009 }
2010# endif
2011}
2012#endif
2013
2014/**
2015 * Sets dr7.
2016 *
2017 * @param uDRVal Debug register value to write
2018 */
2019#if RT_INLINE_ASM_EXTERNAL
2020DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2021#else
2022DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2023{
2024# if RT_INLINE_ASM_GNU_STYLE
2025# ifdef RT_ARCH_AMD64
2026 __asm__ __volatile__("movq %0, %%dr7\n\t" : "=r" (uDRVal));
2027# else
2028 __asm__ __volatile__("movl %0, %%dr7\n\t" : "=r" (uDRVal));
2029# endif
2030# else
2031 __asm
2032 {
2033# ifdef RT_ARCH_AMD64
2034 mov dr7, [uDRVal]
2035# else
2036 mov eax, [uDRVal]
2037 mov dr7, eax
2038# endif
2039 }
2040# endif
2041}
2042#endif
2043
2044/**
2045 * Compiler memory barrier.
2046 *
2047 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2048 * values or any outstanding writes when returning from this function.
2049 *
2050 * This function must be used if non-volatile data is modified by a
2051 * device or the VMM. Typical cases are port access, MMIO access,
2052 * trapping instruction, etc.
2053 */
2054#if RT_INLINE_ASM_GNU_STYLE
2055# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
2056#elif RT_INLINE_ASM_USES_INTRIN
2057# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2058#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2059DECLINLINE(void) ASMCompilerBarrier(void)
2060{
2061 __asm
2062 {
2063 }
2064}
2065#endif
2066
2067
2068/**
2069 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2070 *
2071 * @param Port I/O port to read from.
2072 * @param u8 8-bit integer to write.
2073 */
2074#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2075DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2076#else
2077DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2078{
2079# if RT_INLINE_ASM_GNU_STYLE
2080 __asm__ __volatile__("outb %b1, %w0\n\t"
2081 :: "Nd" (Port),
2082 "a" (u8));
2083
2084# elif RT_INLINE_ASM_USES_INTRIN
2085 __outbyte(Port, u8);
2086
2087# else
2088 __asm
2089 {
2090 mov dx, [Port]
2091 mov al, [u8]
2092 out dx, al
2093 }
2094# endif
2095}
2096#endif
2097
2098
2099/**
2100 * Gets a 8-bit unsigned integer from an I/O port, ordered.
2101 *
2102 * @returns 8-bit integer.
2103 * @param Port I/O port to read from.
2104 */
2105#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2106DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2107#else
2108DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2109{
2110 uint8_t u8;
2111# if RT_INLINE_ASM_GNU_STYLE
2112 __asm__ __volatile__("inb %w1, %b0\n\t"
2113 : "=a" (u8)
2114 : "Nd" (Port));
2115
2116# elif RT_INLINE_ASM_USES_INTRIN
2117 u8 = __inbyte(Port);
2118
2119# else
2120 __asm
2121 {
2122 mov dx, [Port]
2123 in al, dx
2124 mov [u8], al
2125 }
2126# endif
2127 return u8;
2128}
2129#endif
2130
2131
2132/**
2133 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2134 *
2135 * @param Port I/O port to read from.
2136 * @param u16 16-bit integer to write.
2137 */
2138#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2139DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2140#else
2141DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2142{
2143# if RT_INLINE_ASM_GNU_STYLE
2144 __asm__ __volatile__("outw %w1, %w0\n\t"
2145 :: "Nd" (Port),
2146 "a" (u16));
2147
2148# elif RT_INLINE_ASM_USES_INTRIN
2149 __outword(Port, u16);
2150
2151# else
2152 __asm
2153 {
2154 mov dx, [Port]
2155 mov ax, [u16]
2156 out dx, ax
2157 }
2158# endif
2159}
2160#endif
2161
2162
2163/**
2164 * Gets a 16-bit unsigned integer from an I/O port, ordered.
2165 *
2166 * @returns 16-bit integer.
2167 * @param Port I/O port to read from.
2168 */
2169#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2170DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2171#else
2172DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2173{
2174 uint16_t u16;
2175# if RT_INLINE_ASM_GNU_STYLE
2176 __asm__ __volatile__("inw %w1, %w0\n\t"
2177 : "=a" (u16)
2178 : "Nd" (Port));
2179
2180# elif RT_INLINE_ASM_USES_INTRIN
2181 u16 = __inword(Port);
2182
2183# else
2184 __asm
2185 {
2186 mov dx, [Port]
2187 in ax, dx
2188 mov [u16], ax
2189 }
2190# endif
2191 return u16;
2192}
2193#endif
2194
2195
2196/**
2197 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2198 *
2199 * @param Port I/O port to read from.
2200 * @param u32 32-bit integer to write.
2201 */
2202#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2203DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2204#else
2205DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2206{
2207# if RT_INLINE_ASM_GNU_STYLE
2208 __asm__ __volatile__("outl %1, %w0\n\t"
2209 :: "Nd" (Port),
2210 "a" (u32));
2211
2212# elif RT_INLINE_ASM_USES_INTRIN
2213 __outdword(Port, u32);
2214
2215# else
2216 __asm
2217 {
2218 mov dx, [Port]
2219 mov eax, [u32]
2220 out dx, eax
2221 }
2222# endif
2223}
2224#endif
2225
2226
2227/**
2228 * Gets a 32-bit unsigned integer from an I/O port, ordered.
2229 *
2230 * @returns 32-bit integer.
2231 * @param Port I/O port to read from.
2232 */
2233#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2234DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2235#else
2236DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2237{
2238 uint32_t u32;
2239# if RT_INLINE_ASM_GNU_STYLE
2240 __asm__ __volatile__("inl %w1, %0\n\t"
2241 : "=a" (u32)
2242 : "Nd" (Port));
2243
2244# elif RT_INLINE_ASM_USES_INTRIN
2245 u32 = __indword(Port);
2246
2247# else
2248 __asm
2249 {
2250 mov dx, [Port]
2251 in eax, dx
2252 mov [u32], eax
2253 }
2254# endif
2255 return u32;
2256}
2257#endif
2258
2259/** @todo string i/o */
2260
2261
2262/**
2263 * Atomically Exchange an unsigned 8-bit value, ordered.
2264 *
2265 * @returns Current *pu8 value
2266 * @param pu8 Pointer to the 8-bit variable to update.
2267 * @param u8 The 8-bit value to assign to *pu8.
2268 */
2269#if RT_INLINE_ASM_EXTERNAL
2270DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2271#else
2272DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2273{
2274# if RT_INLINE_ASM_GNU_STYLE
2275 __asm__ __volatile__("xchgb %0, %1\n\t"
2276 : "=m" (*pu8),
2277 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2278 : "1" (u8));
2279# else
2280 __asm
2281 {
2282# ifdef RT_ARCH_AMD64
2283 mov rdx, [pu8]
2284 mov al, [u8]
2285 xchg [rdx], al
2286 mov [u8], al
2287# else
2288 mov edx, [pu8]
2289 mov al, [u8]
2290 xchg [edx], al
2291 mov [u8], al
2292# endif
2293 }
2294# endif
2295 return u8;
2296}
2297#endif
2298
2299
2300/**
2301 * Atomically Exchange a signed 8-bit value, ordered.
2302 *
2303 * @returns Current *pu8 value
2304 * @param pi8 Pointer to the 8-bit variable to update.
2305 * @param i8 The 8-bit value to assign to *pi8.
2306 */
2307DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2308{
2309 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2310}
2311
2312
2313/**
2314 * Atomically Exchange a bool value, ordered.
2315 *
2316 * @returns Current *pf value
2317 * @param pf Pointer to the 8-bit variable to update.
2318 * @param f The 8-bit value to assign to *pi8.
2319 */
2320DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2321{
2322#ifdef _MSC_VER
2323 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2324#else
2325 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2326#endif
2327}
2328
2329
2330/**
2331 * Atomically Exchange an unsigned 16-bit value, ordered.
2332 *
2333 * @returns Current *pu16 value
2334 * @param pu16 Pointer to the 16-bit variable to update.
2335 * @param u16 The 16-bit value to assign to *pu16.
2336 */
2337#if RT_INLINE_ASM_EXTERNAL
2338DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2339#else
2340DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2341{
2342# if RT_INLINE_ASM_GNU_STYLE
2343 __asm__ __volatile__("xchgw %0, %1\n\t"
2344 : "=m" (*pu16),
2345 "=r" (u16)
2346 : "1" (u16));
2347# else
2348 __asm
2349 {
2350# ifdef RT_ARCH_AMD64
2351 mov rdx, [pu16]
2352 mov ax, [u16]
2353 xchg [rdx], ax
2354 mov [u16], ax
2355# else
2356 mov edx, [pu16]
2357 mov ax, [u16]
2358 xchg [edx], ax
2359 mov [u16], ax
2360# endif
2361 }
2362# endif
2363 return u16;
2364}
2365#endif
2366
2367
2368/**
2369 * Atomically Exchange a signed 16-bit value, ordered.
2370 *
2371 * @returns Current *pu16 value
2372 * @param pi16 Pointer to the 16-bit variable to update.
2373 * @param i16 The 16-bit value to assign to *pi16.
2374 */
2375DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2376{
2377 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2378}
2379
2380
2381/**
2382 * Atomically Exchange an unsigned 32-bit value, ordered.
2383 *
2384 * @returns Current *pu32 value
2385 * @param pu32 Pointer to the 32-bit variable to update.
2386 * @param u32 The 32-bit value to assign to *pu32.
2387 */
2388#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2389DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2390#else
2391DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2392{
2393# if RT_INLINE_ASM_GNU_STYLE
2394 __asm__ __volatile__("xchgl %0, %1\n\t"
2395 : "=m" (*pu32),
2396 "=r" (u32)
2397 : "1" (u32));
2398
2399# elif RT_INLINE_ASM_USES_INTRIN
2400 u32 = _InterlockedExchange((long *)pu32, u32);
2401
2402# else
2403 __asm
2404 {
2405# ifdef RT_ARCH_AMD64
2406 mov rdx, [pu32]
2407 mov eax, u32
2408 xchg [rdx], eax
2409 mov [u32], eax
2410# else
2411 mov edx, [pu32]
2412 mov eax, u32
2413 xchg [edx], eax
2414 mov [u32], eax
2415# endif
2416 }
2417# endif
2418 return u32;
2419}
2420#endif
2421
2422
2423/**
2424 * Atomically Exchange a signed 32-bit value, ordered.
2425 *
2426 * @returns Current *pu32 value
2427 * @param pi32 Pointer to the 32-bit variable to update.
2428 * @param i32 The 32-bit value to assign to *pi32.
2429 */
2430DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2431{
2432 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2433}
2434
2435
2436/**
2437 * Atomically Exchange an unsigned 64-bit value, ordered.
2438 *
2439 * @returns Current *pu64 value
2440 * @param pu64 Pointer to the 64-bit variable to update.
2441 * @param u64 The 64-bit value to assign to *pu64.
2442 */
2443#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2444DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2445#else
2446DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2447{
2448# if defined(RT_ARCH_AMD64)
2449# if RT_INLINE_ASM_USES_INTRIN
2450 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2451
2452# elif RT_INLINE_ASM_GNU_STYLE
2453 __asm__ __volatile__("xchgq %0, %1\n\t"
2454 : "=m" (*pu64),
2455 "=r" (u64)
2456 : "1" (u64));
2457# else
2458 __asm
2459 {
2460 mov rdx, [pu64]
2461 mov rax, [u64]
2462 xchg [rdx], rax
2463 mov [u64], rax
2464 }
2465# endif
2466# else /* !RT_ARCH_AMD64 */
2467# if RT_INLINE_ASM_GNU_STYLE
2468# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2469 uint32_t u32EBX = (uint32_t)u64;
2470 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2471 "xchgl %%ebx, %3\n\t"
2472 "1:\n\t"
2473 "lock; cmpxchg8b (%5)\n\t"
2474 "jnz 1b\n\t"
2475 "movl %3, %%ebx\n\t"
2476 /*"xchgl %%esi, %5\n\t"*/
2477 : "=A" (u64),
2478 "=m" (*pu64)
2479 : "0" (*pu64),
2480 "m" ( u32EBX ),
2481 "c" ( (uint32_t)(u64 >> 32) ),
2482 "S" (pu64) );
2483# else /* !PIC */
2484 __asm__ __volatile__("1:\n\t"
2485 "lock; cmpxchg8b %1\n\t"
2486 "jnz 1b\n\t"
2487 : "=A" (u64),
2488 "=m" (*pu64)
2489 : "0" (*pu64),
2490 "b" ( (uint32_t)u64 ),
2491 "c" ( (uint32_t)(u64 >> 32) ));
2492# endif
2493# else
2494 __asm
2495 {
2496 mov ebx, dword ptr [u64]
2497 mov ecx, dword ptr [u64 + 4]
2498 mov edi, pu64
2499 mov eax, dword ptr [edi]
2500 mov edx, dword ptr [edi + 4]
2501 retry:
2502 lock cmpxchg8b [edi]
2503 jnz retry
2504 mov dword ptr [u64], eax
2505 mov dword ptr [u64 + 4], edx
2506 }
2507# endif
2508# endif /* !RT_ARCH_AMD64 */
2509 return u64;
2510}
2511#endif
2512
2513
2514/**
2515 * Atomically Exchange an signed 64-bit value, ordered.
2516 *
2517 * @returns Current *pi64 value
2518 * @param pi64 Pointer to the 64-bit variable to update.
2519 * @param i64 The 64-bit value to assign to *pi64.
2520 */
2521DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2522{
2523 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2524}
2525
2526
2527#ifdef RT_ARCH_AMD64
2528/**
2529 * Atomically Exchange an unsigned 128-bit value, ordered.
2530 *
2531 * @returns Current *pu128.
2532 * @param pu128 Pointer to the 128-bit variable to update.
2533 * @param u128 The 128-bit value to assign to *pu128.
2534 *
2535 * @remark We cannot really assume that any hardware supports this. Nor do I have
2536 * GAS support for it. So, for the time being we'll BREAK the atomic
2537 * bit of this function and use two 64-bit exchanges instead.
2538 */
2539# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2540DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2541# else
2542DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2543{
2544 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2545 {
2546 /** @todo this is clumsy code */
2547 RTUINT128U u128Ret;
2548 u128Ret.u = u128;
2549 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2550 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2551 return u128Ret.u;
2552 }
2553#if 0 /* later? */
2554 else
2555 {
2556# if RT_INLINE_ASM_GNU_STYLE
2557 __asm__ __volatile__("1:\n\t"
2558 "lock; cmpxchg8b %1\n\t"
2559 "jnz 1b\n\t"
2560 : "=A" (u128),
2561 "=m" (*pu128)
2562 : "0" (*pu128),
2563 "b" ( (uint64_t)u128 ),
2564 "c" ( (uint64_t)(u128 >> 64) ));
2565# else
2566 __asm
2567 {
2568 mov rbx, dword ptr [u128]
2569 mov rcx, dword ptr [u128 + 8]
2570 mov rdi, pu128
2571 mov rax, dword ptr [rdi]
2572 mov rdx, dword ptr [rdi + 8]
2573 retry:
2574 lock cmpxchg16b [rdi]
2575 jnz retry
2576 mov dword ptr [u128], rax
2577 mov dword ptr [u128 + 8], rdx
2578 }
2579# endif
2580 }
2581 return u128;
2582#endif
2583}
2584# endif
2585#endif /* RT_ARCH_AMD64 */
2586
2587
2588/**
2589 * Atomically Exchange a pointer value, ordered.
2590 *
2591 * @returns Current *ppv value
2592 * @param ppv Pointer to the pointer variable to update.
2593 * @param pv The pointer value to assign to *ppv.
2594 */
2595DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2596{
2597#if ARCH_BITS == 32
2598 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2599#elif ARCH_BITS == 64
2600 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2601#else
2602# error "ARCH_BITS is bogus"
2603#endif
2604}
2605
2606
2607/** @def ASMAtomicXchgHandle
2608 * Atomically Exchange a typical IPRT handle value, ordered.
2609 *
2610 * @param ph Pointer to the value to update.
2611 * @param hNew The new value to assigned to *pu.
2612 * @param phRes Where to store the current *ph value.
2613 *
2614 * @remarks This doesn't currently work for all handles (like RTFILE).
2615 */
2616#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2617 do { \
2618 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (void *)(hNew)); \
2619 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2620 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2621 } while (0)
2622
2623
2624/**
2625 * Atomically Exchange a value which size might differ
2626 * between platforms or compilers, ordered.
2627 *
2628 * @param pu Pointer to the variable to update.
2629 * @param uNew The value to assign to *pu.
2630 * @todo This is busted as its missing the result argument.
2631 */
2632#define ASMAtomicXchgSize(pu, uNew) \
2633 do { \
2634 switch (sizeof(*(pu))) { \
2635 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2636 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2637 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2638 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2639 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2640 } \
2641 } while (0)
2642
2643/**
2644 * Atomically Exchange a value which size might differ
2645 * between platforms or compilers, ordered.
2646 *
2647 * @param pu Pointer to the variable to update.
2648 * @param uNew The value to assign to *pu.
2649 * @param puRes Where to store the current *pu value.
2650 */
2651#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2652 do { \
2653 switch (sizeof(*(pu))) { \
2654 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2655 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2656 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2657 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2658 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2659 } \
2660 } while (0)
2661
2662
2663/**
2664 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2665 *
2666 * @returns true if xchg was done.
2667 * @returns false if xchg wasn't done.
2668 *
2669 * @param pu32 Pointer to the value to update.
2670 * @param u32New The new value to assigned to *pu32.
2671 * @param u32Old The old value to *pu32 compare with.
2672 */
2673#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2674DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2675#else
2676DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2677{
2678# if RT_INLINE_ASM_GNU_STYLE
2679 uint8_t u8Ret;
2680 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2681 "setz %1\n\t"
2682 : "=m" (*pu32),
2683 "=qm" (u8Ret),
2684 "=a" (u32Old)
2685 : "r" (u32New),
2686 "2" (u32Old));
2687 return (bool)u8Ret;
2688
2689# elif RT_INLINE_ASM_USES_INTRIN
2690 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2691
2692# else
2693 uint32_t u32Ret;
2694 __asm
2695 {
2696# ifdef RT_ARCH_AMD64
2697 mov rdx, [pu32]
2698# else
2699 mov edx, [pu32]
2700# endif
2701 mov eax, [u32Old]
2702 mov ecx, [u32New]
2703# ifdef RT_ARCH_AMD64
2704 lock cmpxchg [rdx], ecx
2705# else
2706 lock cmpxchg [edx], ecx
2707# endif
2708 setz al
2709 movzx eax, al
2710 mov [u32Ret], eax
2711 }
2712 return !!u32Ret;
2713# endif
2714}
2715#endif
2716
2717
2718/**
2719 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2720 *
2721 * @returns true if xchg was done.
2722 * @returns false if xchg wasn't done.
2723 *
2724 * @param pi32 Pointer to the value to update.
2725 * @param i32New The new value to assigned to *pi32.
2726 * @param i32Old The old value to *pi32 compare with.
2727 */
2728DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2729{
2730 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2731}
2732
2733
2734/**
2735 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2736 *
2737 * @returns true if xchg was done.
2738 * @returns false if xchg wasn't done.
2739 *
2740 * @param pu64 Pointer to the 64-bit variable to update.
2741 * @param u64New The 64-bit value to assign to *pu64.
2742 * @param u64Old The value to compare with.
2743 */
2744#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2745DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2746#else
2747DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2748{
2749# if RT_INLINE_ASM_USES_INTRIN
2750 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2751
2752# elif defined(RT_ARCH_AMD64)
2753# if RT_INLINE_ASM_GNU_STYLE
2754 uint8_t u8Ret;
2755 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2756 "setz %1\n\t"
2757 : "=m" (*pu64),
2758 "=qm" (u8Ret),
2759 "=a" (u64Old)
2760 : "r" (u64New),
2761 "2" (u64Old));
2762 return (bool)u8Ret;
2763# else
2764 bool fRet;
2765 __asm
2766 {
2767 mov rdx, [pu32]
2768 mov rax, [u64Old]
2769 mov rcx, [u64New]
2770 lock cmpxchg [rdx], rcx
2771 setz al
2772 mov [fRet], al
2773 }
2774 return fRet;
2775# endif
2776# else /* !RT_ARCH_AMD64 */
2777 uint32_t u32Ret;
2778# if RT_INLINE_ASM_GNU_STYLE
2779# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2780 uint32_t u32EBX = (uint32_t)u64New;
2781 uint32_t u32Spill;
2782 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2783 "lock; cmpxchg8b (%6)\n\t"
2784 "setz %%al\n\t"
2785 "movl %4, %%ebx\n\t"
2786 "movzbl %%al, %%eax\n\t"
2787 : "=a" (u32Ret),
2788 "=d" (u32Spill),
2789 "=m" (*pu64)
2790 : "A" (u64Old),
2791 "m" ( u32EBX ),
2792 "c" ( (uint32_t)(u64New >> 32) ),
2793 "S" (pu64) );
2794# else /* !PIC */
2795 uint32_t u32Spill;
2796 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2797 "setz %%al\n\t"
2798 "movzbl %%al, %%eax\n\t"
2799 : "=a" (u32Ret),
2800 "=d" (u32Spill),
2801 "=m" (*pu64)
2802 : "A" (u64Old),
2803 "b" ( (uint32_t)u64New ),
2804 "c" ( (uint32_t)(u64New >> 32) ));
2805# endif
2806 return (bool)u32Ret;
2807# else
2808 __asm
2809 {
2810 mov ebx, dword ptr [u64New]
2811 mov ecx, dword ptr [u64New + 4]
2812 mov edi, [pu64]
2813 mov eax, dword ptr [u64Old]
2814 mov edx, dword ptr [u64Old + 4]
2815 lock cmpxchg8b [edi]
2816 setz al
2817 movzx eax, al
2818 mov dword ptr [u32Ret], eax
2819 }
2820 return !!u32Ret;
2821# endif
2822# endif /* !RT_ARCH_AMD64 */
2823}
2824#endif
2825
2826
2827/**
2828 * Atomically Compare and exchange a signed 64-bit value, ordered.
2829 *
2830 * @returns true if xchg was done.
2831 * @returns false if xchg wasn't done.
2832 *
2833 * @param pi64 Pointer to the 64-bit variable to update.
2834 * @param i64 The 64-bit value to assign to *pu64.
2835 * @param i64Old The value to compare with.
2836 */
2837DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2838{
2839 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2840}
2841
2842
2843/**
2844 * Atomically Compare and Exchange a pointer value, ordered.
2845 *
2846 * @returns true if xchg was done.
2847 * @returns false if xchg wasn't done.
2848 *
2849 * @param ppv Pointer to the value to update.
2850 * @param pvNew The new value to assigned to *ppv.
2851 * @param pvOld The old value to *ppv compare with.
2852 */
2853DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2854{
2855#if ARCH_BITS == 32
2856 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2857#elif ARCH_BITS == 64
2858 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2859#else
2860# error "ARCH_BITS is bogus"
2861#endif
2862}
2863
2864
2865/** @def ASMAtomicCmpXchgHandle
2866 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2867 *
2868 * @param ph Pointer to the value to update.
2869 * @param hNew The new value to assigned to *pu.
2870 * @param hOld The old value to *pu compare with.
2871 * @param fRc Where to store the result.
2872 *
2873 * @remarks This doesn't currently work for all handles (like RTFILE).
2874 */
2875#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
2876 do { \
2877 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
2878 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2879 } while (0)
2880
2881
2882/** @def ASMAtomicCmpXchgSize
2883 * Atomically Compare and Exchange a value which size might differ
2884 * between platforms or compilers, ordered.
2885 *
2886 * @param pu Pointer to the value to update.
2887 * @param uNew The new value to assigned to *pu.
2888 * @param uOld The old value to *pu compare with.
2889 * @param fRc Where to store the result.
2890 */
2891#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2892 do { \
2893 switch (sizeof(*(pu))) { \
2894 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2895 break; \
2896 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2897 break; \
2898 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2899 (fRc) = false; \
2900 break; \
2901 } \
2902 } while (0)
2903
2904
2905/**
2906 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2907 * passes back old value, ordered.
2908 *
2909 * @returns true if xchg was done.
2910 * @returns false if xchg wasn't done.
2911 *
2912 * @param pu32 Pointer to the value to update.
2913 * @param u32New The new value to assigned to *pu32.
2914 * @param u32Old The old value to *pu32 compare with.
2915 * @param pu32Old Pointer store the old value at.
2916 */
2917#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2918DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2919#else
2920DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2921{
2922# if RT_INLINE_ASM_GNU_STYLE
2923 uint8_t u8Ret;
2924 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2925 "setz %1\n\t"
2926 : "=m" (*pu32),
2927 "=qm" (u8Ret),
2928 "=a" (*pu32Old)
2929 : "r" (u32New),
2930 "a" (u32Old));
2931 return (bool)u8Ret;
2932
2933# elif RT_INLINE_ASM_USES_INTRIN
2934 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2935
2936# else
2937 uint32_t u32Ret;
2938 __asm
2939 {
2940# ifdef RT_ARCH_AMD64
2941 mov rdx, [pu32]
2942# else
2943 mov edx, [pu32]
2944# endif
2945 mov eax, [u32Old]
2946 mov ecx, [u32New]
2947# ifdef RT_ARCH_AMD64
2948 lock cmpxchg [rdx], ecx
2949 mov rdx, [pu32Old]
2950 mov [rdx], eax
2951# else
2952 lock cmpxchg [edx], ecx
2953 mov edx, [pu32Old]
2954 mov [edx], eax
2955# endif
2956 setz al
2957 movzx eax, al
2958 mov [u32Ret], eax
2959 }
2960 return !!u32Ret;
2961# endif
2962}
2963#endif
2964
2965
2966/**
2967 * Atomically Compare and Exchange a signed 32-bit value, additionally
2968 * passes back old value, ordered.
2969 *
2970 * @returns true if xchg was done.
2971 * @returns false if xchg wasn't done.
2972 *
2973 * @param pi32 Pointer to the value to update.
2974 * @param i32New The new value to assigned to *pi32.
2975 * @param i32Old The old value to *pi32 compare with.
2976 * @param pi32Old Pointer store the old value at.
2977 */
2978DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2979{
2980 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2981}
2982
2983
2984/**
2985 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2986 * passing back old value, ordered.
2987 *
2988 * @returns true if xchg was done.
2989 * @returns false if xchg wasn't done.
2990 *
2991 * @param pu64 Pointer to the 64-bit variable to update.
2992 * @param u64New The 64-bit value to assign to *pu64.
2993 * @param u64Old The value to compare with.
2994 * @param pu64Old Pointer store the old value at.
2995 */
2996#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2997DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2998#else
2999DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3000{
3001# if RT_INLINE_ASM_USES_INTRIN
3002 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3003
3004# elif defined(RT_ARCH_AMD64)
3005# if RT_INLINE_ASM_GNU_STYLE
3006 uint8_t u8Ret;
3007 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3008 "setz %1\n\t"
3009 : "=m" (*pu64),
3010 "=qm" (u8Ret),
3011 "=a" (*pu64Old)
3012 : "r" (u64New),
3013 "a" (u64Old));
3014 return (bool)u8Ret;
3015# else
3016 bool fRet;
3017 __asm
3018 {
3019 mov rdx, [pu32]
3020 mov rax, [u64Old]
3021 mov rcx, [u64New]
3022 lock cmpxchg [rdx], rcx
3023 mov rdx, [pu64Old]
3024 mov [rdx], rax
3025 setz al
3026 mov [fRet], al
3027 }
3028 return fRet;
3029# endif
3030# else /* !RT_ARCH_AMD64 */
3031# if RT_INLINE_ASM_GNU_STYLE
3032 uint64_t u64Ret;
3033# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3034 /* NB: this code uses a memory clobber description, because the clean
3035 * solution with an output value for *pu64 makes gcc run out of registers.
3036 * This will cause suboptimal code, and anyone with a better solution is
3037 * welcome to improve this. */
3038 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3039 "lock; cmpxchg8b %3\n\t"
3040 "xchgl %%ebx, %1\n\t"
3041 : "=A" (u64Ret)
3042 : "DS" ((uint32_t)u64New),
3043 "c" ((uint32_t)(u64New >> 32)),
3044 "m" (*pu64),
3045 "0" (u64Old)
3046 : "memory" );
3047# else /* !PIC */
3048 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3049 : "=A" (u64Ret),
3050 "=m" (*pu64)
3051 : "b" ((uint32_t)u64New),
3052 "c" ((uint32_t)(u64New >> 32)),
3053 "m" (*pu64),
3054 "0" (u64Old));
3055# endif
3056 *pu64Old = u64Ret;
3057 return u64Ret == u64Old;
3058# else
3059 uint32_t u32Ret;
3060 __asm
3061 {
3062 mov ebx, dword ptr [u64New]
3063 mov ecx, dword ptr [u64New + 4]
3064 mov edi, [pu64]
3065 mov eax, dword ptr [u64Old]
3066 mov edx, dword ptr [u64Old + 4]
3067 lock cmpxchg8b [edi]
3068 mov ebx, [pu64Old]
3069 mov [ebx], eax
3070 setz al
3071 movzx eax, al
3072 add ebx, 4
3073 mov [ebx], edx
3074 mov dword ptr [u32Ret], eax
3075 }
3076 return !!u32Ret;
3077# endif
3078# endif /* !RT_ARCH_AMD64 */
3079}
3080#endif
3081
3082
3083/**
3084 * Atomically Compare and exchange a signed 64-bit value, additionally
3085 * passing back old value, ordered.
3086 *
3087 * @returns true if xchg was done.
3088 * @returns false if xchg wasn't done.
3089 *
3090 * @param pi64 Pointer to the 64-bit variable to update.
3091 * @param i64 The 64-bit value to assign to *pu64.
3092 * @param i64Old The value to compare with.
3093 * @param pi64Old Pointer store the old value at.
3094 */
3095DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3096{
3097 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3098}
3099
3100/** @def ASMAtomicCmpXchgExHandle
3101 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3102 *
3103 * @param ph Pointer to the value to update.
3104 * @param hNew The new value to assigned to *pu.
3105 * @param hOld The old value to *pu compare with.
3106 * @param fRc Where to store the result.
3107 * @param phOldVal Pointer to where to store the old value.
3108 *
3109 * @remarks This doesn't currently work for all handles (like RTFILE).
3110 */
3111#if ARCH_BITS == 32
3112# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3113 do { \
3114 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3115 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3116 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3117 } while (0)
3118#elif ARCH_BITS == 64
3119# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3120 do { \
3121 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3122 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3123 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3124 } while (0)
3125#endif
3126
3127
3128/** @def ASMAtomicCmpXchgExSize
3129 * Atomically Compare and Exchange a value which size might differ
3130 * between platforms or compilers. Additionally passes back old value.
3131 *
3132 * @param pu Pointer to the value to update.
3133 * @param uNew The new value to assigned to *pu.
3134 * @param uOld The old value to *pu compare with.
3135 * @param fRc Where to store the result.
3136 * @param puOldVal Pointer to where to store the old value.
3137 */
3138#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3139 do { \
3140 switch (sizeof(*(pu))) { \
3141 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3142 break; \
3143 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3144 break; \
3145 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3146 (fRc) = false; \
3147 (uOldVal) = 0; \
3148 break; \
3149 } \
3150 } while (0)
3151
3152
3153/**
3154 * Atomically Compare and Exchange a pointer value, additionally
3155 * passing back old value, ordered.
3156 *
3157 * @returns true if xchg was done.
3158 * @returns false if xchg wasn't done.
3159 *
3160 * @param ppv Pointer to the value to update.
3161 * @param pvNew The new value to assigned to *ppv.
3162 * @param pvOld The old value to *ppv compare with.
3163 * @param ppvOld Pointer store the old value at.
3164 */
3165DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
3166{
3167#if ARCH_BITS == 32
3168 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3169#elif ARCH_BITS == 64
3170 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3171#else
3172# error "ARCH_BITS is bogus"
3173#endif
3174}
3175
3176
3177/**
3178 * Atomically exchanges and adds to a 32-bit value, ordered.
3179 *
3180 * @returns The old value.
3181 * @param pu32 Pointer to the value.
3182 * @param u32 Number to add.
3183 */
3184#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3185DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3186#else
3187DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3188{
3189# if RT_INLINE_ASM_USES_INTRIN
3190 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3191 return u32;
3192
3193# elif RT_INLINE_ASM_GNU_STYLE
3194 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3195 : "=r" (u32),
3196 "=m" (*pu32)
3197 : "0" (u32)
3198 : "memory");
3199 return u32;
3200# else
3201 __asm
3202 {
3203 mov eax, [u32]
3204# ifdef RT_ARCH_AMD64
3205 mov rdx, [pu32]
3206 lock xadd [rdx], eax
3207# else
3208 mov edx, [pu32]
3209 lock xadd [edx], eax
3210# endif
3211 mov [u32], eax
3212 }
3213 return u32;
3214# endif
3215}
3216#endif
3217
3218
3219/**
3220 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3221 *
3222 * @returns The old value.
3223 * @param pi32 Pointer to the value.
3224 * @param i32 Number to add.
3225 */
3226DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3227{
3228 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3229}
3230
3231
3232/**
3233 * Atomically increment a 32-bit value, ordered.
3234 *
3235 * @returns The new value.
3236 * @param pu32 Pointer to the value to increment.
3237 */
3238#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3239DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3240#else
3241DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3242{
3243 uint32_t u32;
3244# if RT_INLINE_ASM_USES_INTRIN
3245 u32 = _InterlockedIncrement((long *)pu32);
3246 return u32;
3247
3248# elif RT_INLINE_ASM_GNU_STYLE
3249 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3250 : "=r" (u32),
3251 "=m" (*pu32)
3252 : "0" (1)
3253 : "memory");
3254 return u32+1;
3255# else
3256 __asm
3257 {
3258 mov eax, 1
3259# ifdef RT_ARCH_AMD64
3260 mov rdx, [pu32]
3261 lock xadd [rdx], eax
3262# else
3263 mov edx, [pu32]
3264 lock xadd [edx], eax
3265# endif
3266 mov u32, eax
3267 }
3268 return u32+1;
3269# endif
3270}
3271#endif
3272
3273
3274/**
3275 * Atomically increment a signed 32-bit value, ordered.
3276 *
3277 * @returns The new value.
3278 * @param pi32 Pointer to the value to increment.
3279 */
3280DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3281{
3282 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3283}
3284
3285
3286/**
3287 * Atomically decrement an unsigned 32-bit value, ordered.
3288 *
3289 * @returns The new value.
3290 * @param pu32 Pointer to the value to decrement.
3291 */
3292#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3293DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3294#else
3295DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3296{
3297 uint32_t u32;
3298# if RT_INLINE_ASM_USES_INTRIN
3299 u32 = _InterlockedDecrement((long *)pu32);
3300 return u32;
3301
3302# elif RT_INLINE_ASM_GNU_STYLE
3303 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3304 : "=r" (u32),
3305 "=m" (*pu32)
3306 : "0" (-1)
3307 : "memory");
3308 return u32-1;
3309# else
3310 __asm
3311 {
3312 mov eax, -1
3313# ifdef RT_ARCH_AMD64
3314 mov rdx, [pu32]
3315 lock xadd [rdx], eax
3316# else
3317 mov edx, [pu32]
3318 lock xadd [edx], eax
3319# endif
3320 mov u32, eax
3321 }
3322 return u32-1;
3323# endif
3324}
3325#endif
3326
3327
3328/**
3329 * Atomically decrement a signed 32-bit value, ordered.
3330 *
3331 * @returns The new value.
3332 * @param pi32 Pointer to the value to decrement.
3333 */
3334DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3335{
3336 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3337}
3338
3339
3340/**
3341 * Atomically Or an unsigned 32-bit value, ordered.
3342 *
3343 * @param pu32 Pointer to the pointer variable to OR u32 with.
3344 * @param u32 The value to OR *pu32 with.
3345 */
3346#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3347DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3348#else
3349DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3350{
3351# if RT_INLINE_ASM_USES_INTRIN
3352 _InterlockedOr((long volatile *)pu32, (long)u32);
3353
3354# elif RT_INLINE_ASM_GNU_STYLE
3355 __asm__ __volatile__("lock; orl %1, %0\n\t"
3356 : "=m" (*pu32)
3357 : "ir" (u32));
3358# else
3359 __asm
3360 {
3361 mov eax, [u32]
3362# ifdef RT_ARCH_AMD64
3363 mov rdx, [pu32]
3364 lock or [rdx], eax
3365# else
3366 mov edx, [pu32]
3367 lock or [edx], eax
3368# endif
3369 }
3370# endif
3371}
3372#endif
3373
3374
3375/**
3376 * Atomically Or a signed 32-bit value, ordered.
3377 *
3378 * @param pi32 Pointer to the pointer variable to OR u32 with.
3379 * @param i32 The value to OR *pu32 with.
3380 */
3381DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3382{
3383 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3384}
3385
3386
3387/**
3388 * Atomically And an unsigned 32-bit value, ordered.
3389 *
3390 * @param pu32 Pointer to the pointer variable to AND u32 with.
3391 * @param u32 The value to AND *pu32 with.
3392 */
3393#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3394DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3395#else
3396DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3397{
3398# if RT_INLINE_ASM_USES_INTRIN
3399 _InterlockedAnd((long volatile *)pu32, u32);
3400
3401# elif RT_INLINE_ASM_GNU_STYLE
3402 __asm__ __volatile__("lock; andl %1, %0\n\t"
3403 : "=m" (*pu32)
3404 : "ir" (u32));
3405# else
3406 __asm
3407 {
3408 mov eax, [u32]
3409# ifdef RT_ARCH_AMD64
3410 mov rdx, [pu32]
3411 lock and [rdx], eax
3412# else
3413 mov edx, [pu32]
3414 lock and [edx], eax
3415# endif
3416 }
3417# endif
3418}
3419#endif
3420
3421
3422/**
3423 * Atomically And a signed 32-bit value, ordered.
3424 *
3425 * @param pi32 Pointer to the pointer variable to AND i32 with.
3426 * @param i32 The value to AND *pi32 with.
3427 */
3428DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3429{
3430 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3431}
3432
3433
3434/**
3435 * Memory fence, waits for any pending writes and reads to complete.
3436 */
3437DECLINLINE(void) ASMMemoryFence(void)
3438{
3439 /** @todo use mfence? check if all cpus we care for support it. */
3440 uint32_t volatile u32;
3441 ASMAtomicXchgU32(&u32, 0);
3442}
3443
3444
3445/**
3446 * Write fence, waits for any pending writes to complete.
3447 */
3448DECLINLINE(void) ASMWriteFence(void)
3449{
3450 /** @todo use sfence? check if all cpus we care for support it. */
3451 ASMMemoryFence();
3452}
3453
3454
3455/**
3456 * Read fence, waits for any pending reads to complete.
3457 */
3458DECLINLINE(void) ASMReadFence(void)
3459{
3460 /** @todo use lfence? check if all cpus we care for support it. */
3461 ASMMemoryFence();
3462}
3463
3464
3465/**
3466 * Atomically reads an unsigned 8-bit value, ordered.
3467 *
3468 * @returns Current *pu8 value
3469 * @param pu8 Pointer to the 8-bit variable to read.
3470 */
3471DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3472{
3473 ASMMemoryFence();
3474 return *pu8; /* byte reads are atomic on x86 */
3475}
3476
3477
3478/**
3479 * Atomically reads an unsigned 8-bit value, unordered.
3480 *
3481 * @returns Current *pu8 value
3482 * @param pu8 Pointer to the 8-bit variable to read.
3483 */
3484DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3485{
3486 return *pu8; /* byte reads are atomic on x86 */
3487}
3488
3489
3490/**
3491 * Atomically reads a signed 8-bit value, ordered.
3492 *
3493 * @returns Current *pi8 value
3494 * @param pi8 Pointer to the 8-bit variable to read.
3495 */
3496DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3497{
3498 ASMMemoryFence();
3499 return *pi8; /* byte reads are atomic on x86 */
3500}
3501
3502
3503/**
3504 * Atomically reads a signed 8-bit value, unordered.
3505 *
3506 * @returns Current *pi8 value
3507 * @param pi8 Pointer to the 8-bit variable to read.
3508 */
3509DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3510{
3511 return *pi8; /* byte reads are atomic on x86 */
3512}
3513
3514
3515/**
3516 * Atomically reads an unsigned 16-bit value, ordered.
3517 *
3518 * @returns Current *pu16 value
3519 * @param pu16 Pointer to the 16-bit variable to read.
3520 */
3521DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3522{
3523 ASMMemoryFence();
3524 Assert(!((uintptr_t)pu16 & 1));
3525 return *pu16;
3526}
3527
3528
3529/**
3530 * Atomically reads an unsigned 16-bit value, unordered.
3531 *
3532 * @returns Current *pu16 value
3533 * @param pu16 Pointer to the 16-bit variable to read.
3534 */
3535DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3536{
3537 Assert(!((uintptr_t)pu16 & 1));
3538 return *pu16;
3539}
3540
3541
3542/**
3543 * Atomically reads a signed 16-bit value, ordered.
3544 *
3545 * @returns Current *pi16 value
3546 * @param pi16 Pointer to the 16-bit variable to read.
3547 */
3548DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3549{
3550 ASMMemoryFence();
3551 Assert(!((uintptr_t)pi16 & 1));
3552 return *pi16;
3553}
3554
3555
3556/**
3557 * Atomically reads a signed 16-bit value, unordered.
3558 *
3559 * @returns Current *pi16 value
3560 * @param pi16 Pointer to the 16-bit variable to read.
3561 */
3562DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3563{
3564 Assert(!((uintptr_t)pi16 & 1));
3565 return *pi16;
3566}
3567
3568
3569/**
3570 * Atomically reads an unsigned 32-bit value, ordered.
3571 *
3572 * @returns Current *pu32 value
3573 * @param pu32 Pointer to the 32-bit variable to read.
3574 */
3575DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3576{
3577 ASMMemoryFence();
3578 Assert(!((uintptr_t)pu32 & 3));
3579 return *pu32;
3580}
3581
3582
3583/**
3584 * Atomically reads an unsigned 32-bit value, unordered.
3585 *
3586 * @returns Current *pu32 value
3587 * @param pu32 Pointer to the 32-bit variable to read.
3588 */
3589DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3590{
3591 Assert(!((uintptr_t)pu32 & 3));
3592 return *pu32;
3593}
3594
3595
3596/**
3597 * Atomically reads a signed 32-bit value, ordered.
3598 *
3599 * @returns Current *pi32 value
3600 * @param pi32 Pointer to the 32-bit variable to read.
3601 */
3602DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3603{
3604 ASMMemoryFence();
3605 Assert(!((uintptr_t)pi32 & 3));
3606 return *pi32;
3607}
3608
3609
3610/**
3611 * Atomically reads a signed 32-bit value, unordered.
3612 *
3613 * @returns Current *pi32 value
3614 * @param pi32 Pointer to the 32-bit variable to read.
3615 */
3616DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3617{
3618 Assert(!((uintptr_t)pi32 & 3));
3619 return *pi32;
3620}
3621
3622
3623/**
3624 * Atomically reads an unsigned 64-bit value, ordered.
3625 *
3626 * @returns Current *pu64 value
3627 * @param pu64 Pointer to the 64-bit variable to read.
3628 * The memory pointed to must be writable.
3629 * @remark This will fault if the memory is read-only!
3630 */
3631#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3632DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3633#else
3634DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3635{
3636 uint64_t u64;
3637# ifdef RT_ARCH_AMD64
3638 Assert(!((uintptr_t)pu64 & 7));
3639/*# if RT_INLINE_ASM_GNU_STYLE
3640 __asm__ __volatile__( "mfence\n\t"
3641 "movq %1, %0\n\t"
3642 : "=r" (u64)
3643 : "m" (*pu64));
3644# else
3645 __asm
3646 {
3647 mfence
3648 mov rdx, [pu64]
3649 mov rax, [rdx]
3650 mov [u64], rax
3651 }
3652# endif*/
3653 ASMMemoryFence();
3654 u64 = *pu64;
3655# else /* !RT_ARCH_AMD64 */
3656# if RT_INLINE_ASM_GNU_STYLE
3657# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3658 uint32_t u32EBX = 0;
3659 Assert(!((uintptr_t)pu64 & 7));
3660 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3661 "lock; cmpxchg8b (%5)\n\t"
3662 "movl %3, %%ebx\n\t"
3663 : "=A" (u64),
3664 "=m" (*pu64)
3665 : "0" (0),
3666 "m" (u32EBX),
3667 "c" (0),
3668 "S" (pu64));
3669# else /* !PIC */
3670 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3671 : "=A" (u64),
3672 "=m" (*pu64)
3673 : "0" (0),
3674 "b" (0),
3675 "c" (0));
3676# endif
3677# else
3678 Assert(!((uintptr_t)pu64 & 7));
3679 __asm
3680 {
3681 xor eax, eax
3682 xor edx, edx
3683 mov edi, pu64
3684 xor ecx, ecx
3685 xor ebx, ebx
3686 lock cmpxchg8b [edi]
3687 mov dword ptr [u64], eax
3688 mov dword ptr [u64 + 4], edx
3689 }
3690# endif
3691# endif /* !RT_ARCH_AMD64 */
3692 return u64;
3693}
3694#endif
3695
3696
3697/**
3698 * Atomically reads an unsigned 64-bit value, unordered.
3699 *
3700 * @returns Current *pu64 value
3701 * @param pu64 Pointer to the 64-bit variable to read.
3702 * The memory pointed to must be writable.
3703 * @remark This will fault if the memory is read-only!
3704 */
3705#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3706DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3707#else
3708DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3709{
3710 uint64_t u64;
3711# ifdef RT_ARCH_AMD64
3712 Assert(!((uintptr_t)pu64 & 7));
3713/*# if RT_INLINE_ASM_GNU_STYLE
3714 Assert(!((uintptr_t)pu64 & 7));
3715 __asm__ __volatile__("movq %1, %0\n\t"
3716 : "=r" (u64)
3717 : "m" (*pu64));
3718# else
3719 __asm
3720 {
3721 mov rdx, [pu64]
3722 mov rax, [rdx]
3723 mov [u64], rax
3724 }
3725# endif */
3726 u64 = *pu64;
3727# else /* !RT_ARCH_AMD64 */
3728# if RT_INLINE_ASM_GNU_STYLE
3729# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3730 uint32_t u32EBX = 0;
3731 Assert(!((uintptr_t)pu64 & 7));
3732 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3733 "lock; cmpxchg8b (%5)\n\t"
3734 "movl %3, %%ebx\n\t"
3735 : "=A" (u64),
3736 "=m" (*pu64)
3737 : "0" (0),
3738 "m" (u32EBX),
3739 "c" (0),
3740 "S" (pu64));
3741# else /* !PIC */
3742 __asm__ __volatile__("cmpxchg8b %1\n\t"
3743 : "=A" (u64),
3744 "=m" (*pu64)
3745 : "0" (0),
3746 "b" (0),
3747 "c" (0));
3748# endif
3749# else
3750 Assert(!((uintptr_t)pu64 & 7));
3751 __asm
3752 {
3753 xor eax, eax
3754 xor edx, edx
3755 mov edi, pu64
3756 xor ecx, ecx
3757 xor ebx, ebx
3758 lock cmpxchg8b [edi]
3759 mov dword ptr [u64], eax
3760 mov dword ptr [u64 + 4], edx
3761 }
3762# endif
3763# endif /* !RT_ARCH_AMD64 */
3764 return u64;
3765}
3766#endif
3767
3768
3769/**
3770 * Atomically reads a signed 64-bit value, ordered.
3771 *
3772 * @returns Current *pi64 value
3773 * @param pi64 Pointer to the 64-bit variable to read.
3774 * The memory pointed to must be writable.
3775 * @remark This will fault if the memory is read-only!
3776 */
3777DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3778{
3779 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3780}
3781
3782
3783/**
3784 * Atomically reads a signed 64-bit value, unordered.
3785 *
3786 * @returns Current *pi64 value
3787 * @param pi64 Pointer to the 64-bit variable to read.
3788 * The memory pointed to must be writable.
3789 * @remark This will fault if the memory is read-only!
3790 */
3791DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3792{
3793 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3794}
3795
3796
3797/**
3798 * Atomically reads a pointer value, ordered.
3799 *
3800 * @returns Current *pv value
3801 * @param ppv Pointer to the pointer variable to read.
3802 */
3803DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3804{
3805#if ARCH_BITS == 32
3806 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3807#elif ARCH_BITS == 64
3808 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3809#else
3810# error "ARCH_BITS is bogus"
3811#endif
3812}
3813
3814
3815/**
3816 * Atomically reads a pointer value, unordered.
3817 *
3818 * @returns Current *pv value
3819 * @param ppv Pointer to the pointer variable to read.
3820 */
3821DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3822{
3823#if ARCH_BITS == 32
3824 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3825#elif ARCH_BITS == 64
3826 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3827#else
3828# error "ARCH_BITS is bogus"
3829#endif
3830}
3831
3832
3833/**
3834 * Atomically reads a boolean value, ordered.
3835 *
3836 * @returns Current *pf value
3837 * @param pf Pointer to the boolean variable to read.
3838 */
3839DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3840{
3841 ASMMemoryFence();
3842 return *pf; /* byte reads are atomic on x86 */
3843}
3844
3845
3846/**
3847 * Atomically reads a boolean value, unordered.
3848 *
3849 * @returns Current *pf value
3850 * @param pf Pointer to the boolean variable to read.
3851 */
3852DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3853{
3854 return *pf; /* byte reads are atomic on x86 */
3855}
3856
3857
3858/**
3859 * Atomically read a typical IPRT handle value, ordered.
3860 *
3861 * @param ph Pointer to the handle variable to read.
3862 * @param phRes Where to store the result.
3863 *
3864 * @remarks This doesn't currently work for all handles (like RTFILE).
3865 */
3866#define ASMAtomicReadHandle(ph, phRes) \
3867 do { \
3868 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
3869 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3870 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3871 } while (0)
3872
3873
3874/**
3875 * Atomically read a typical IPRT handle value, unordered.
3876 *
3877 * @param ph Pointer to the handle variable to read.
3878 * @param phRes Where to store the result.
3879 *
3880 * @remarks This doesn't currently work for all handles (like RTFILE).
3881 */
3882#define ASMAtomicUoReadHandle(ph, phRes) \
3883 do { \
3884 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
3885 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3886 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3887 } while (0)
3888
3889
3890/**
3891 * Atomically read a value which size might differ
3892 * between platforms or compilers, ordered.
3893 *
3894 * @param pu Pointer to the variable to update.
3895 * @param puRes Where to store the result.
3896 */
3897#define ASMAtomicReadSize(pu, puRes) \
3898 do { \
3899 switch (sizeof(*(pu))) { \
3900 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3901 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3902 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3903 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3904 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3905 } \
3906 } while (0)
3907
3908
3909/**
3910 * Atomically read a value which size might differ
3911 * between platforms or compilers, unordered.
3912 *
3913 * @param pu Pointer to the variable to update.
3914 * @param puRes Where to store the result.
3915 */
3916#define ASMAtomicUoReadSize(pu, puRes) \
3917 do { \
3918 switch (sizeof(*(pu))) { \
3919 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3920 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3921 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3922 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3923 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3924 } \
3925 } while (0)
3926
3927
3928/**
3929 * Atomically writes an unsigned 8-bit value, ordered.
3930 *
3931 * @param pu8 Pointer to the 8-bit variable.
3932 * @param u8 The 8-bit value to assign to *pu8.
3933 */
3934DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3935{
3936 ASMAtomicXchgU8(pu8, u8);
3937}
3938
3939
3940/**
3941 * Atomically writes an unsigned 8-bit value, unordered.
3942 *
3943 * @param pu8 Pointer to the 8-bit variable.
3944 * @param u8 The 8-bit value to assign to *pu8.
3945 */
3946DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3947{
3948 *pu8 = u8; /* byte writes are atomic on x86 */
3949}
3950
3951
3952/**
3953 * Atomically writes a signed 8-bit value, ordered.
3954 *
3955 * @param pi8 Pointer to the 8-bit variable to read.
3956 * @param i8 The 8-bit value to assign to *pi8.
3957 */
3958DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3959{
3960 ASMAtomicXchgS8(pi8, i8);
3961}
3962
3963
3964/**
3965 * Atomically writes a signed 8-bit value, unordered.
3966 *
3967 * @param pi8 Pointer to the 8-bit variable to read.
3968 * @param i8 The 8-bit value to assign to *pi8.
3969 */
3970DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3971{
3972 *pi8 = i8; /* byte writes are atomic on x86 */
3973}
3974
3975
3976/**
3977 * Atomically writes an unsigned 16-bit value, ordered.
3978 *
3979 * @param pu16 Pointer to the 16-bit variable.
3980 * @param u16 The 16-bit value to assign to *pu16.
3981 */
3982DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3983{
3984 ASMAtomicXchgU16(pu16, u16);
3985}
3986
3987
3988/**
3989 * Atomically writes an unsigned 16-bit value, unordered.
3990 *
3991 * @param pu16 Pointer to the 16-bit variable.
3992 * @param u16 The 16-bit value to assign to *pu16.
3993 */
3994DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3995{
3996 Assert(!((uintptr_t)pu16 & 1));
3997 *pu16 = u16;
3998}
3999
4000
4001/**
4002 * Atomically writes a signed 16-bit value, ordered.
4003 *
4004 * @param pi16 Pointer to the 16-bit variable to read.
4005 * @param i16 The 16-bit value to assign to *pi16.
4006 */
4007DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4008{
4009 ASMAtomicXchgS16(pi16, i16);
4010}
4011
4012
4013/**
4014 * Atomically writes a signed 16-bit value, unordered.
4015 *
4016 * @param pi16 Pointer to the 16-bit variable to read.
4017 * @param i16 The 16-bit value to assign to *pi16.
4018 */
4019DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4020{
4021 Assert(!((uintptr_t)pi16 & 1));
4022 *pi16 = i16;
4023}
4024
4025
4026/**
4027 * Atomically writes an unsigned 32-bit value, ordered.
4028 *
4029 * @param pu32 Pointer to the 32-bit variable.
4030 * @param u32 The 32-bit value to assign to *pu32.
4031 */
4032DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4033{
4034 ASMAtomicXchgU32(pu32, u32);
4035}
4036
4037
4038/**
4039 * Atomically writes an unsigned 32-bit value, unordered.
4040 *
4041 * @param pu32 Pointer to the 32-bit variable.
4042 * @param u32 The 32-bit value to assign to *pu32.
4043 */
4044DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4045{
4046 Assert(!((uintptr_t)pu32 & 3));
4047 *pu32 = u32;
4048}
4049
4050
4051/**
4052 * Atomically writes a signed 32-bit value, ordered.
4053 *
4054 * @param pi32 Pointer to the 32-bit variable to read.
4055 * @param i32 The 32-bit value to assign to *pi32.
4056 */
4057DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4058{
4059 ASMAtomicXchgS32(pi32, i32);
4060}
4061
4062
4063/**
4064 * Atomically writes a signed 32-bit value, unordered.
4065 *
4066 * @param pi32 Pointer to the 32-bit variable to read.
4067 * @param i32 The 32-bit value to assign to *pi32.
4068 */
4069DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4070{
4071 Assert(!((uintptr_t)pi32 & 3));
4072 *pi32 = i32;
4073}
4074
4075
4076/**
4077 * Atomically writes an unsigned 64-bit value, ordered.
4078 *
4079 * @param pu64 Pointer to the 64-bit variable.
4080 * @param u64 The 64-bit value to assign to *pu64.
4081 */
4082DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4083{
4084 ASMAtomicXchgU64(pu64, u64);
4085}
4086
4087
4088/**
4089 * Atomically writes an unsigned 64-bit value, unordered.
4090 *
4091 * @param pu64 Pointer to the 64-bit variable.
4092 * @param u64 The 64-bit value to assign to *pu64.
4093 */
4094DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4095{
4096 Assert(!((uintptr_t)pu64 & 7));
4097#if ARCH_BITS == 64
4098 *pu64 = u64;
4099#else
4100 ASMAtomicXchgU64(pu64, u64);
4101#endif
4102}
4103
4104
4105/**
4106 * Atomically writes a signed 64-bit value, ordered.
4107 *
4108 * @param pi64 Pointer to the 64-bit variable.
4109 * @param i64 The 64-bit value to assign to *pi64.
4110 */
4111DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4112{
4113 ASMAtomicXchgS64(pi64, i64);
4114}
4115
4116
4117/**
4118 * Atomically writes a signed 64-bit value, unordered.
4119 *
4120 * @param pi64 Pointer to the 64-bit variable.
4121 * @param i64 The 64-bit value to assign to *pi64.
4122 */
4123DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4124{
4125 Assert(!((uintptr_t)pi64 & 7));
4126#if ARCH_BITS == 64
4127 *pi64 = i64;
4128#else
4129 ASMAtomicXchgS64(pi64, i64);
4130#endif
4131}
4132
4133
4134/**
4135 * Atomically writes a boolean value, unordered.
4136 *
4137 * @param pf Pointer to the boolean variable.
4138 * @param f The boolean value to assign to *pf.
4139 */
4140DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4141{
4142 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4143}
4144
4145
4146/**
4147 * Atomically writes a boolean value, unordered.
4148 *
4149 * @param pf Pointer to the boolean variable.
4150 * @param f The boolean value to assign to *pf.
4151 */
4152DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4153{
4154 *pf = f; /* byte writes are atomic on x86 */
4155}
4156
4157
4158/**
4159 * Atomically writes a pointer value, ordered.
4160 *
4161 * @returns Current *pv value
4162 * @param ppv Pointer to the pointer variable.
4163 * @param pv The pointer value to assigne to *ppv.
4164 */
4165DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
4166{
4167#if ARCH_BITS == 32
4168 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4169#elif ARCH_BITS == 64
4170 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4171#else
4172# error "ARCH_BITS is bogus"
4173#endif
4174}
4175
4176
4177/**
4178 * Atomically writes a pointer value, unordered.
4179 *
4180 * @returns Current *pv value
4181 * @param ppv Pointer to the pointer variable.
4182 * @param pv The pointer value to assigne to *ppv.
4183 */
4184DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
4185{
4186#if ARCH_BITS == 32
4187 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4188#elif ARCH_BITS == 64
4189 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4190#else
4191# error "ARCH_BITS is bogus"
4192#endif
4193}
4194
4195
4196/**
4197 * Atomically write a typical IPRT handle value, ordered.
4198 *
4199 * @param ph Pointer to the variable to update.
4200 * @param hNew The value to assign to *ph.
4201 *
4202 * @remarks This doesn't currently work for all handles (like RTFILE).
4203 */
4204#define ASMAtomicWriteHandle(ph, hNew) \
4205 do { \
4206 ASMAtomicWritePtr((void * volatile *)(ph), (void *)hNew); \
4207 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4208 } while (0)
4209
4210
4211/**
4212 * Atomically write a typical IPRT handle value, unordered.
4213 *
4214 * @param ph Pointer to the variable to update.
4215 * @param hNew The value to assign to *ph.
4216 *
4217 * @remarks This doesn't currently work for all handles (like RTFILE).
4218 */
4219#define ASMAtomicUoWriteHandle(ph, hNew) \
4220 do { \
4221 ASMAtomicUoWritePtr((void * volatile *)(ph), (void *)hNew); \
4222 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4223 } while (0)
4224
4225
4226/**
4227 * Atomically write a value which size might differ
4228 * between platforms or compilers, ordered.
4229 *
4230 * @param pu Pointer to the variable to update.
4231 * @param uNew The value to assign to *pu.
4232 */
4233#define ASMAtomicWriteSize(pu, uNew) \
4234 do { \
4235 switch (sizeof(*(pu))) { \
4236 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4237 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4238 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4239 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4240 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4241 } \
4242 } while (0)
4243
4244/**
4245 * Atomically write a value which size might differ
4246 * between platforms or compilers, unordered.
4247 *
4248 * @param pu Pointer to the variable to update.
4249 * @param uNew The value to assign to *pu.
4250 */
4251#define ASMAtomicUoWriteSize(pu, uNew) \
4252 do { \
4253 switch (sizeof(*(pu))) { \
4254 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4255 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4256 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4257 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4258 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4259 } \
4260 } while (0)
4261
4262
4263
4264
4265/**
4266 * Invalidate page.
4267 *
4268 * @param pv Address of the page to invalidate.
4269 */
4270#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4271DECLASM(void) ASMInvalidatePage(void *pv);
4272#else
4273DECLINLINE(void) ASMInvalidatePage(void *pv)
4274{
4275# if RT_INLINE_ASM_USES_INTRIN
4276 __invlpg(pv);
4277
4278# elif RT_INLINE_ASM_GNU_STYLE
4279 __asm__ __volatile__("invlpg %0\n\t"
4280 : : "m" (*(uint8_t *)pv));
4281# else
4282 __asm
4283 {
4284# ifdef RT_ARCH_AMD64
4285 mov rax, [pv]
4286 invlpg [rax]
4287# else
4288 mov eax, [pv]
4289 invlpg [eax]
4290# endif
4291 }
4292# endif
4293}
4294#endif
4295
4296
4297#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4298# if PAGE_SIZE != 0x1000
4299# error "PAGE_SIZE is not 0x1000!"
4300# endif
4301#endif
4302
4303/**
4304 * Zeros a 4K memory page.
4305 *
4306 * @param pv Pointer to the memory block. This must be page aligned.
4307 */
4308#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4309DECLASM(void) ASMMemZeroPage(volatile void *pv);
4310# else
4311DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4312{
4313# if RT_INLINE_ASM_USES_INTRIN
4314# ifdef RT_ARCH_AMD64
4315 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4316# else
4317 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4318# endif
4319
4320# elif RT_INLINE_ASM_GNU_STYLE
4321 RTCCUINTREG uDummy;
4322# ifdef RT_ARCH_AMD64
4323 __asm__ __volatile__ ("rep stosq"
4324 : "=D" (pv),
4325 "=c" (uDummy)
4326 : "0" (pv),
4327 "c" (0x1000 >> 3),
4328 "a" (0)
4329 : "memory");
4330# else
4331 __asm__ __volatile__ ("rep stosl"
4332 : "=D" (pv),
4333 "=c" (uDummy)
4334 : "0" (pv),
4335 "c" (0x1000 >> 2),
4336 "a" (0)
4337 : "memory");
4338# endif
4339# else
4340 __asm
4341 {
4342# ifdef RT_ARCH_AMD64
4343 xor rax, rax
4344 mov ecx, 0200h
4345 mov rdi, [pv]
4346 rep stosq
4347# else
4348 xor eax, eax
4349 mov ecx, 0400h
4350 mov edi, [pv]
4351 rep stosd
4352# endif
4353 }
4354# endif
4355}
4356# endif
4357
4358
4359/**
4360 * Zeros a memory block with a 32-bit aligned size.
4361 *
4362 * @param pv Pointer to the memory block.
4363 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4364 */
4365#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4366DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4367#else
4368DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4369{
4370# if RT_INLINE_ASM_USES_INTRIN
4371# ifdef RT_ARCH_AMD64
4372 if (!(cb & 7))
4373 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4374 else
4375# endif
4376 __stosd((unsigned long *)pv, 0, cb / 4);
4377
4378# elif RT_INLINE_ASM_GNU_STYLE
4379 __asm__ __volatile__ ("rep stosl"
4380 : "=D" (pv),
4381 "=c" (cb)
4382 : "0" (pv),
4383 "1" (cb >> 2),
4384 "a" (0)
4385 : "memory");
4386# else
4387 __asm
4388 {
4389 xor eax, eax
4390# ifdef RT_ARCH_AMD64
4391 mov rcx, [cb]
4392 shr rcx, 2
4393 mov rdi, [pv]
4394# else
4395 mov ecx, [cb]
4396 shr ecx, 2
4397 mov edi, [pv]
4398# endif
4399 rep stosd
4400 }
4401# endif
4402}
4403#endif
4404
4405
4406/**
4407 * Fills a memory block with a 32-bit aligned size.
4408 *
4409 * @param pv Pointer to the memory block.
4410 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4411 * @param u32 The value to fill with.
4412 */
4413#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4414DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4415#else
4416DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4417{
4418# if RT_INLINE_ASM_USES_INTRIN
4419# ifdef RT_ARCH_AMD64
4420 if (!(cb & 7))
4421 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4422 else
4423# endif
4424 __stosd((unsigned long *)pv, u32, cb / 4);
4425
4426# elif RT_INLINE_ASM_GNU_STYLE
4427 __asm__ __volatile__ ("rep stosl"
4428 : "=D" (pv),
4429 "=c" (cb)
4430 : "0" (pv),
4431 "1" (cb >> 2),
4432 "a" (u32)
4433 : "memory");
4434# else
4435 __asm
4436 {
4437# ifdef RT_ARCH_AMD64
4438 mov rcx, [cb]
4439 shr rcx, 2
4440 mov rdi, [pv]
4441# else
4442 mov ecx, [cb]
4443 shr ecx, 2
4444 mov edi, [pv]
4445# endif
4446 mov eax, [u32]
4447 rep stosd
4448 }
4449# endif
4450}
4451#endif
4452
4453
4454/**
4455 * Checks if a memory block is filled with the specified byte.
4456 *
4457 * This is a sort of inverted memchr.
4458 *
4459 * @returns Pointer to the byte which doesn't equal u8.
4460 * @returns NULL if all equal to u8.
4461 *
4462 * @param pv Pointer to the memory block.
4463 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4464 * @param u8 The value it's supposed to be filled with.
4465 */
4466#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4467DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4468#else
4469DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4470{
4471/** @todo rewrite this in inline assembly? */
4472 uint8_t const *pb = (uint8_t const *)pv;
4473 for (; cb; cb--, pb++)
4474 if (RT_UNLIKELY(*pb != u8))
4475 return (void *)pb;
4476 return NULL;
4477}
4478#endif
4479
4480
4481/**
4482 * Checks if a memory block is filled with the specified 32-bit value.
4483 *
4484 * This is a sort of inverted memchr.
4485 *
4486 * @returns Pointer to the first value which doesn't equal u32.
4487 * @returns NULL if all equal to u32.
4488 *
4489 * @param pv Pointer to the memory block.
4490 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4491 * @param u32 The value it's supposed to be filled with.
4492 */
4493#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4494DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4495#else
4496DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4497{
4498/** @todo rewrite this in inline assembly? */
4499 uint32_t const *pu32 = (uint32_t const *)pv;
4500 for (; cb; cb -= 4, pu32++)
4501 if (RT_UNLIKELY(*pu32 != u32))
4502 return (uint32_t *)pu32;
4503 return NULL;
4504}
4505#endif
4506
4507
4508/**
4509 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4510 *
4511 * @returns u32F1 * u32F2.
4512 */
4513#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4514DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4515#else
4516DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4517{
4518# ifdef RT_ARCH_AMD64
4519 return (uint64_t)u32F1 * u32F2;
4520# else /* !RT_ARCH_AMD64 */
4521 uint64_t u64;
4522# if RT_INLINE_ASM_GNU_STYLE
4523 __asm__ __volatile__("mull %%edx"
4524 : "=A" (u64)
4525 : "a" (u32F2), "d" (u32F1));
4526# else
4527 __asm
4528 {
4529 mov edx, [u32F1]
4530 mov eax, [u32F2]
4531 mul edx
4532 mov dword ptr [u64], eax
4533 mov dword ptr [u64 + 4], edx
4534 }
4535# endif
4536 return u64;
4537# endif /* !RT_ARCH_AMD64 */
4538}
4539#endif
4540
4541
4542/**
4543 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4544 *
4545 * @returns u32F1 * u32F2.
4546 */
4547#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4548DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4549#else
4550DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4551{
4552# ifdef RT_ARCH_AMD64
4553 return (int64_t)i32F1 * i32F2;
4554# else /* !RT_ARCH_AMD64 */
4555 int64_t i64;
4556# if RT_INLINE_ASM_GNU_STYLE
4557 __asm__ __volatile__("imull %%edx"
4558 : "=A" (i64)
4559 : "a" (i32F2), "d" (i32F1));
4560# else
4561 __asm
4562 {
4563 mov edx, [i32F1]
4564 mov eax, [i32F2]
4565 imul edx
4566 mov dword ptr [i64], eax
4567 mov dword ptr [i64 + 4], edx
4568 }
4569# endif
4570 return i64;
4571# endif /* !RT_ARCH_AMD64 */
4572}
4573#endif
4574
4575
4576/**
4577 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4578 *
4579 * @returns u64 / u32.
4580 */
4581#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4582DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4583#else
4584DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4585{
4586# ifdef RT_ARCH_AMD64
4587 return (uint32_t)(u64 / u32);
4588# else /* !RT_ARCH_AMD64 */
4589# if RT_INLINE_ASM_GNU_STYLE
4590 RTCCUINTREG uDummy;
4591 __asm__ __volatile__("divl %3"
4592 : "=a" (u32), "=d"(uDummy)
4593 : "A" (u64), "r" (u32));
4594# else
4595 __asm
4596 {
4597 mov eax, dword ptr [u64]
4598 mov edx, dword ptr [u64 + 4]
4599 mov ecx, [u32]
4600 div ecx
4601 mov [u32], eax
4602 }
4603# endif
4604 return u32;
4605# endif /* !RT_ARCH_AMD64 */
4606}
4607#endif
4608
4609
4610/**
4611 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4612 *
4613 * @returns u64 / u32.
4614 */
4615#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4616DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4617#else
4618DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4619{
4620# ifdef RT_ARCH_AMD64
4621 return (int32_t)(i64 / i32);
4622# else /* !RT_ARCH_AMD64 */
4623# if RT_INLINE_ASM_GNU_STYLE
4624 RTCCUINTREG iDummy;
4625 __asm__ __volatile__("idivl %3"
4626 : "=a" (i32), "=d"(iDummy)
4627 : "A" (i64), "r" (i32));
4628# else
4629 __asm
4630 {
4631 mov eax, dword ptr [i64]
4632 mov edx, dword ptr [i64 + 4]
4633 mov ecx, [i32]
4634 idiv ecx
4635 mov [i32], eax
4636 }
4637# endif
4638 return i32;
4639# endif /* !RT_ARCH_AMD64 */
4640}
4641#endif
4642
4643
4644/**
4645 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
4646 * returning the rest.
4647 *
4648 * @returns u64 % u32.
4649 *
4650 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4651 */
4652#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4653DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
4654#else
4655DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
4656{
4657# ifdef RT_ARCH_AMD64
4658 return (uint32_t)(u64 % u32);
4659# else /* !RT_ARCH_AMD64 */
4660# if RT_INLINE_ASM_GNU_STYLE
4661 RTCCUINTREG uDummy;
4662 __asm__ __volatile__("divl %3"
4663 : "=a" (uDummy), "=d"(u32)
4664 : "A" (u64), "r" (u32));
4665# else
4666 __asm
4667 {
4668 mov eax, dword ptr [u64]
4669 mov edx, dword ptr [u64 + 4]
4670 mov ecx, [u32]
4671 div ecx
4672 mov [u32], edx
4673 }
4674# endif
4675 return u32;
4676# endif /* !RT_ARCH_AMD64 */
4677}
4678#endif
4679
4680
4681/**
4682 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
4683 * returning the rest.
4684 *
4685 * @returns u64 % u32.
4686 *
4687 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4688 */
4689#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4690DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
4691#else
4692DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
4693{
4694# ifdef RT_ARCH_AMD64
4695 return (int32_t)(i64 % i32);
4696# else /* !RT_ARCH_AMD64 */
4697# if RT_INLINE_ASM_GNU_STYLE
4698 RTCCUINTREG iDummy;
4699 __asm__ __volatile__("idivl %3"
4700 : "=a" (iDummy), "=d"(i32)
4701 : "A" (i64), "r" (i32));
4702# else
4703 __asm
4704 {
4705 mov eax, dword ptr [i64]
4706 mov edx, dword ptr [i64 + 4]
4707 mov ecx, [i32]
4708 idiv ecx
4709 mov [i32], edx
4710 }
4711# endif
4712 return i32;
4713# endif /* !RT_ARCH_AMD64 */
4714}
4715#endif
4716
4717
4718/**
4719 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4720 * using a 96 bit intermediate result.
4721 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4722 * __udivdi3 and __umoddi3 even if this inline function is not used.
4723 *
4724 * @returns (u64A * u32B) / u32C.
4725 * @param u64A The 64-bit value.
4726 * @param u32B The 32-bit value to multiple by A.
4727 * @param u32C The 32-bit value to divide A*B by.
4728 */
4729#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4730DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4731#else
4732DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4733{
4734# if RT_INLINE_ASM_GNU_STYLE
4735# ifdef RT_ARCH_AMD64
4736 uint64_t u64Result, u64Spill;
4737 __asm__ __volatile__("mulq %2\n\t"
4738 "divq %3\n\t"
4739 : "=a" (u64Result),
4740 "=d" (u64Spill)
4741 : "r" ((uint64_t)u32B),
4742 "r" ((uint64_t)u32C),
4743 "0" (u64A),
4744 "1" (0));
4745 return u64Result;
4746# else
4747 uint32_t u32Dummy;
4748 uint64_t u64Result;
4749 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4750 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4751 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4752 eax = u64A.hi */
4753 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4754 edx = u32C */
4755 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4756 edx = u32B */
4757 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4758 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4759 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4760 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4761 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4762 edx = u64Hi % u32C */
4763 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4764 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4765 "divl %%ecx \n\t" /* u64Result.lo */
4766 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4767 : "=A"(u64Result), "=c"(u32Dummy),
4768 "=S"(u32Dummy), "=D"(u32Dummy)
4769 : "a"((uint32_t)u64A),
4770 "S"((uint32_t)(u64A >> 32)),
4771 "c"(u32B),
4772 "D"(u32C));
4773 return u64Result;
4774# endif
4775# else
4776 RTUINT64U u;
4777 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4778 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4779 u64Hi += (u64Lo >> 32);
4780 u.s.Hi = (uint32_t)(u64Hi / u32C);
4781 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4782 return u.u;
4783# endif
4784}
4785#endif
4786
4787
4788/**
4789 * Probes a byte pointer for read access.
4790 *
4791 * While the function will not fault if the byte is not read accessible,
4792 * the idea is to do this in a safe place like before acquiring locks
4793 * and such like.
4794 *
4795 * Also, this functions guarantees that an eager compiler is not going
4796 * to optimize the probing away.
4797 *
4798 * @param pvByte Pointer to the byte.
4799 */
4800#if RT_INLINE_ASM_EXTERNAL
4801DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4802#else
4803DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4804{
4805 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4806 uint8_t u8;
4807# if RT_INLINE_ASM_GNU_STYLE
4808 __asm__ __volatile__("movb (%1), %0\n\t"
4809 : "=r" (u8)
4810 : "r" (pvByte));
4811# else
4812 __asm
4813 {
4814# ifdef RT_ARCH_AMD64
4815 mov rax, [pvByte]
4816 mov al, [rax]
4817# else
4818 mov eax, [pvByte]
4819 mov al, [eax]
4820# endif
4821 mov [u8], al
4822 }
4823# endif
4824 return u8;
4825}
4826#endif
4827
4828/**
4829 * Probes a buffer for read access page by page.
4830 *
4831 * While the function will fault if the buffer is not fully read
4832 * accessible, the idea is to do this in a safe place like before
4833 * acquiring locks and such like.
4834 *
4835 * Also, this functions guarantees that an eager compiler is not going
4836 * to optimize the probing away.
4837 *
4838 * @param pvBuf Pointer to the buffer.
4839 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4840 */
4841DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4842{
4843 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4844 /* the first byte */
4845 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4846 ASMProbeReadByte(pu8);
4847
4848 /* the pages in between pages. */
4849 while (cbBuf > /*PAGE_SIZE*/0x1000)
4850 {
4851 ASMProbeReadByte(pu8);
4852 cbBuf -= /*PAGE_SIZE*/0x1000;
4853 pu8 += /*PAGE_SIZE*/0x1000;
4854 }
4855
4856 /* the last byte */
4857 ASMProbeReadByte(pu8 + cbBuf - 1);
4858}
4859
4860
4861/** @def ASMBreakpoint
4862 * Debugger Breakpoint.
4863 * @remark In the gnu world we add a nop instruction after the int3 to
4864 * force gdb to remain at the int3 source line.
4865 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4866 * @internal
4867 */
4868#if RT_INLINE_ASM_GNU_STYLE
4869# ifndef __L4ENV__
4870# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4871# else
4872# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4873# endif
4874#else
4875# define ASMBreakpoint() __debugbreak()
4876#endif
4877
4878
4879
4880/** @defgroup grp_inline_bits Bit Operations
4881 * @{
4882 */
4883
4884
4885/**
4886 * Sets a bit in a bitmap.
4887 *
4888 * @param pvBitmap Pointer to the bitmap.
4889 * @param iBit The bit to set.
4890 */
4891#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4892DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4893#else
4894DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4895{
4896# if RT_INLINE_ASM_USES_INTRIN
4897 _bittestandset((long *)pvBitmap, iBit);
4898
4899# elif RT_INLINE_ASM_GNU_STYLE
4900 __asm__ __volatile__ ("btsl %1, %0"
4901 : "=m" (*(volatile long *)pvBitmap)
4902 : "Ir" (iBit)
4903 : "memory");
4904# else
4905 __asm
4906 {
4907# ifdef RT_ARCH_AMD64
4908 mov rax, [pvBitmap]
4909 mov edx, [iBit]
4910 bts [rax], edx
4911# else
4912 mov eax, [pvBitmap]
4913 mov edx, [iBit]
4914 bts [eax], edx
4915# endif
4916 }
4917# endif
4918}
4919#endif
4920
4921
4922/**
4923 * Atomically sets a bit in a bitmap, ordered.
4924 *
4925 * @param pvBitmap Pointer to the bitmap.
4926 * @param iBit The bit to set.
4927 */
4928#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4929DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4930#else
4931DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4932{
4933# if RT_INLINE_ASM_USES_INTRIN
4934 _interlockedbittestandset((long *)pvBitmap, iBit);
4935# elif RT_INLINE_ASM_GNU_STYLE
4936 __asm__ __volatile__ ("lock; btsl %1, %0"
4937 : "=m" (*(volatile long *)pvBitmap)
4938 : "Ir" (iBit)
4939 : "memory");
4940# else
4941 __asm
4942 {
4943# ifdef RT_ARCH_AMD64
4944 mov rax, [pvBitmap]
4945 mov edx, [iBit]
4946 lock bts [rax], edx
4947# else
4948 mov eax, [pvBitmap]
4949 mov edx, [iBit]
4950 lock bts [eax], edx
4951# endif
4952 }
4953# endif
4954}
4955#endif
4956
4957
4958/**
4959 * Clears a bit in a bitmap.
4960 *
4961 * @param pvBitmap Pointer to the bitmap.
4962 * @param iBit The bit to clear.
4963 */
4964#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4965DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4966#else
4967DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4968{
4969# if RT_INLINE_ASM_USES_INTRIN
4970 _bittestandreset((long *)pvBitmap, iBit);
4971
4972# elif RT_INLINE_ASM_GNU_STYLE
4973 __asm__ __volatile__ ("btrl %1, %0"
4974 : "=m" (*(volatile long *)pvBitmap)
4975 : "Ir" (iBit)
4976 : "memory");
4977# else
4978 __asm
4979 {
4980# ifdef RT_ARCH_AMD64
4981 mov rax, [pvBitmap]
4982 mov edx, [iBit]
4983 btr [rax], edx
4984# else
4985 mov eax, [pvBitmap]
4986 mov edx, [iBit]
4987 btr [eax], edx
4988# endif
4989 }
4990# endif
4991}
4992#endif
4993
4994
4995/**
4996 * Atomically clears a bit in a bitmap, ordered.
4997 *
4998 * @param pvBitmap Pointer to the bitmap.
4999 * @param iBit The bit to toggle set.
5000 * @remark No memory barrier, take care on smp.
5001 */
5002#if RT_INLINE_ASM_EXTERNAL
5003DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5004#else
5005DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5006{
5007# if RT_INLINE_ASM_GNU_STYLE
5008 __asm__ __volatile__ ("lock; btrl %1, %0"
5009 : "=m" (*(volatile long *)pvBitmap)
5010 : "Ir" (iBit)
5011 : "memory");
5012# else
5013 __asm
5014 {
5015# ifdef RT_ARCH_AMD64
5016 mov rax, [pvBitmap]
5017 mov edx, [iBit]
5018 lock btr [rax], edx
5019# else
5020 mov eax, [pvBitmap]
5021 mov edx, [iBit]
5022 lock btr [eax], edx
5023# endif
5024 }
5025# endif
5026}
5027#endif
5028
5029
5030/**
5031 * Toggles a bit in a bitmap.
5032 *
5033 * @param pvBitmap Pointer to the bitmap.
5034 * @param iBit The bit to toggle.
5035 */
5036#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5037DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5038#else
5039DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5040{
5041# if RT_INLINE_ASM_USES_INTRIN
5042 _bittestandcomplement((long *)pvBitmap, iBit);
5043# elif RT_INLINE_ASM_GNU_STYLE
5044 __asm__ __volatile__ ("btcl %1, %0"
5045 : "=m" (*(volatile long *)pvBitmap)
5046 : "Ir" (iBit)
5047 : "memory");
5048# else
5049 __asm
5050 {
5051# ifdef RT_ARCH_AMD64
5052 mov rax, [pvBitmap]
5053 mov edx, [iBit]
5054 btc [rax], edx
5055# else
5056 mov eax, [pvBitmap]
5057 mov edx, [iBit]
5058 btc [eax], edx
5059# endif
5060 }
5061# endif
5062}
5063#endif
5064
5065
5066/**
5067 * Atomically toggles a bit in a bitmap, ordered.
5068 *
5069 * @param pvBitmap Pointer to the bitmap.
5070 * @param iBit The bit to test and set.
5071 */
5072#if RT_INLINE_ASM_EXTERNAL
5073DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5074#else
5075DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5076{
5077# if RT_INLINE_ASM_GNU_STYLE
5078 __asm__ __volatile__ ("lock; btcl %1, %0"
5079 : "=m" (*(volatile long *)pvBitmap)
5080 : "Ir" (iBit)
5081 : "memory");
5082# else
5083 __asm
5084 {
5085# ifdef RT_ARCH_AMD64
5086 mov rax, [pvBitmap]
5087 mov edx, [iBit]
5088 lock btc [rax], edx
5089# else
5090 mov eax, [pvBitmap]
5091 mov edx, [iBit]
5092 lock btc [eax], edx
5093# endif
5094 }
5095# endif
5096}
5097#endif
5098
5099
5100/**
5101 * Tests and sets a bit in a bitmap.
5102 *
5103 * @returns true if the bit was set.
5104 * @returns false if the bit was clear.
5105 * @param pvBitmap Pointer to the bitmap.
5106 * @param iBit The bit to test and set.
5107 */
5108#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5109DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5110#else
5111DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5112{
5113 union { bool f; uint32_t u32; uint8_t u8; } rc;
5114# if RT_INLINE_ASM_USES_INTRIN
5115 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5116
5117# elif RT_INLINE_ASM_GNU_STYLE
5118 __asm__ __volatile__ ("btsl %2, %1\n\t"
5119 "setc %b0\n\t"
5120 "andl $1, %0\n\t"
5121 : "=q" (rc.u32),
5122 "=m" (*(volatile long *)pvBitmap)
5123 : "Ir" (iBit)
5124 : "memory");
5125# else
5126 __asm
5127 {
5128 mov edx, [iBit]
5129# ifdef RT_ARCH_AMD64
5130 mov rax, [pvBitmap]
5131 bts [rax], edx
5132# else
5133 mov eax, [pvBitmap]
5134 bts [eax], edx
5135# endif
5136 setc al
5137 and eax, 1
5138 mov [rc.u32], eax
5139 }
5140# endif
5141 return rc.f;
5142}
5143#endif
5144
5145
5146/**
5147 * Atomically tests and sets a bit in a bitmap, ordered.
5148 *
5149 * @returns true if the bit was set.
5150 * @returns false if the bit was clear.
5151 * @param pvBitmap Pointer to the bitmap.
5152 * @param iBit The bit to set.
5153 */
5154#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5155DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5156#else
5157DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5158{
5159 union { bool f; uint32_t u32; uint8_t u8; } rc;
5160# if RT_INLINE_ASM_USES_INTRIN
5161 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5162# elif RT_INLINE_ASM_GNU_STYLE
5163 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
5164 "setc %b0\n\t"
5165 "andl $1, %0\n\t"
5166 : "=q" (rc.u32),
5167 "=m" (*(volatile long *)pvBitmap)
5168 : "Ir" (iBit)
5169 : "memory");
5170# else
5171 __asm
5172 {
5173 mov edx, [iBit]
5174# ifdef RT_ARCH_AMD64
5175 mov rax, [pvBitmap]
5176 lock bts [rax], edx
5177# else
5178 mov eax, [pvBitmap]
5179 lock bts [eax], edx
5180# endif
5181 setc al
5182 and eax, 1
5183 mov [rc.u32], eax
5184 }
5185# endif
5186 return rc.f;
5187}
5188#endif
5189
5190
5191/**
5192 * Tests and clears a bit in a bitmap.
5193 *
5194 * @returns true if the bit was set.
5195 * @returns false if the bit was clear.
5196 * @param pvBitmap Pointer to the bitmap.
5197 * @param iBit The bit to test and clear.
5198 */
5199#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5200DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5201#else
5202DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5203{
5204 union { bool f; uint32_t u32; uint8_t u8; } rc;
5205# if RT_INLINE_ASM_USES_INTRIN
5206 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5207
5208# elif RT_INLINE_ASM_GNU_STYLE
5209 __asm__ __volatile__ ("btrl %2, %1\n\t"
5210 "setc %b0\n\t"
5211 "andl $1, %0\n\t"
5212 : "=q" (rc.u32),
5213 "=m" (*(volatile long *)pvBitmap)
5214 : "Ir" (iBit)
5215 : "memory");
5216# else
5217 __asm
5218 {
5219 mov edx, [iBit]
5220# ifdef RT_ARCH_AMD64
5221 mov rax, [pvBitmap]
5222 btr [rax], edx
5223# else
5224 mov eax, [pvBitmap]
5225 btr [eax], edx
5226# endif
5227 setc al
5228 and eax, 1
5229 mov [rc.u32], eax
5230 }
5231# endif
5232 return rc.f;
5233}
5234#endif
5235
5236
5237/**
5238 * Atomically tests and clears a bit in a bitmap, ordered.
5239 *
5240 * @returns true if the bit was set.
5241 * @returns false if the bit was clear.
5242 * @param pvBitmap Pointer to the bitmap.
5243 * @param iBit The bit to test and clear.
5244 * @remark No memory barrier, take care on smp.
5245 */
5246#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5247DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5248#else
5249DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5250{
5251 union { bool f; uint32_t u32; uint8_t u8; } rc;
5252# if RT_INLINE_ASM_USES_INTRIN
5253 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5254
5255# elif RT_INLINE_ASM_GNU_STYLE
5256 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
5257 "setc %b0\n\t"
5258 "andl $1, %0\n\t"
5259 : "=q" (rc.u32),
5260 "=m" (*(volatile long *)pvBitmap)
5261 : "Ir" (iBit)
5262 : "memory");
5263# else
5264 __asm
5265 {
5266 mov edx, [iBit]
5267# ifdef RT_ARCH_AMD64
5268 mov rax, [pvBitmap]
5269 lock btr [rax], edx
5270# else
5271 mov eax, [pvBitmap]
5272 lock btr [eax], edx
5273# endif
5274 setc al
5275 and eax, 1
5276 mov [rc.u32], eax
5277 }
5278# endif
5279 return rc.f;
5280}
5281#endif
5282
5283
5284/**
5285 * Tests and toggles a bit in a bitmap.
5286 *
5287 * @returns true if the bit was set.
5288 * @returns false if the bit was clear.
5289 * @param pvBitmap Pointer to the bitmap.
5290 * @param iBit The bit to test and toggle.
5291 */
5292#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5293DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5294#else
5295DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5296{
5297 union { bool f; uint32_t u32; uint8_t u8; } rc;
5298# if RT_INLINE_ASM_USES_INTRIN
5299 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5300
5301# elif RT_INLINE_ASM_GNU_STYLE
5302 __asm__ __volatile__ ("btcl %2, %1\n\t"
5303 "setc %b0\n\t"
5304 "andl $1, %0\n\t"
5305 : "=q" (rc.u32),
5306 "=m" (*(volatile long *)pvBitmap)
5307 : "Ir" (iBit)
5308 : "memory");
5309# else
5310 __asm
5311 {
5312 mov edx, [iBit]
5313# ifdef RT_ARCH_AMD64
5314 mov rax, [pvBitmap]
5315 btc [rax], edx
5316# else
5317 mov eax, [pvBitmap]
5318 btc [eax], edx
5319# endif
5320 setc al
5321 and eax, 1
5322 mov [rc.u32], eax
5323 }
5324# endif
5325 return rc.f;
5326}
5327#endif
5328
5329
5330/**
5331 * Atomically tests and toggles a bit in a bitmap, ordered.
5332 *
5333 * @returns true if the bit was set.
5334 * @returns false if the bit was clear.
5335 * @param pvBitmap Pointer to the bitmap.
5336 * @param iBit The bit to test and toggle.
5337 */
5338#if RT_INLINE_ASM_EXTERNAL
5339DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5340#else
5341DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5342{
5343 union { bool f; uint32_t u32; uint8_t u8; } rc;
5344# if RT_INLINE_ASM_GNU_STYLE
5345 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
5346 "setc %b0\n\t"
5347 "andl $1, %0\n\t"
5348 : "=q" (rc.u32),
5349 "=m" (*(volatile long *)pvBitmap)
5350 : "Ir" (iBit)
5351 : "memory");
5352# else
5353 __asm
5354 {
5355 mov edx, [iBit]
5356# ifdef RT_ARCH_AMD64
5357 mov rax, [pvBitmap]
5358 lock btc [rax], edx
5359# else
5360 mov eax, [pvBitmap]
5361 lock btc [eax], edx
5362# endif
5363 setc al
5364 and eax, 1
5365 mov [rc.u32], eax
5366 }
5367# endif
5368 return rc.f;
5369}
5370#endif
5371
5372
5373/**
5374 * Tests if a bit in a bitmap is set.
5375 *
5376 * @returns true if the bit is set.
5377 * @returns false if the bit is clear.
5378 * @param pvBitmap Pointer to the bitmap.
5379 * @param iBit The bit to test.
5380 */
5381#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5382DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5383#else
5384DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5385{
5386 union { bool f; uint32_t u32; uint8_t u8; } rc;
5387# if RT_INLINE_ASM_USES_INTRIN
5388 rc.u32 = _bittest((long *)pvBitmap, iBit);
5389# elif RT_INLINE_ASM_GNU_STYLE
5390
5391 __asm__ __volatile__ ("btl %2, %1\n\t"
5392 "setc %b0\n\t"
5393 "andl $1, %0\n\t"
5394 : "=q" (rc.u32)
5395 : "m" (*(const volatile long *)pvBitmap),
5396 "Ir" (iBit)
5397 : "memory");
5398# else
5399 __asm
5400 {
5401 mov edx, [iBit]
5402# ifdef RT_ARCH_AMD64
5403 mov rax, [pvBitmap]
5404 bt [rax], edx
5405# else
5406 mov eax, [pvBitmap]
5407 bt [eax], edx
5408# endif
5409 setc al
5410 and eax, 1
5411 mov [rc.u32], eax
5412 }
5413# endif
5414 return rc.f;
5415}
5416#endif
5417
5418
5419/**
5420 * Clears a bit range within a bitmap.
5421 *
5422 * @param pvBitmap Pointer to the bitmap.
5423 * @param iBitStart The First bit to clear.
5424 * @param iBitEnd The first bit not to clear.
5425 */
5426DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5427{
5428 if (iBitStart < iBitEnd)
5429 {
5430 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5431 int iStart = iBitStart & ~31;
5432 int iEnd = iBitEnd & ~31;
5433 if (iStart == iEnd)
5434 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5435 else
5436 {
5437 /* bits in first dword. */
5438 if (iBitStart & 31)
5439 {
5440 *pu32 &= (1 << (iBitStart & 31)) - 1;
5441 pu32++;
5442 iBitStart = iStart + 32;
5443 }
5444
5445 /* whole dword. */
5446 if (iBitStart != iEnd)
5447 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5448
5449 /* bits in last dword. */
5450 if (iBitEnd & 31)
5451 {
5452 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5453 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5454 }
5455 }
5456 }
5457}
5458
5459
5460/**
5461 * Sets a bit range within a bitmap.
5462 *
5463 * @param pvBitmap Pointer to the bitmap.
5464 * @param iBitStart The First bit to set.
5465 * @param iBitEnd The first bit not to set.
5466 */
5467DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5468{
5469 if (iBitStart < iBitEnd)
5470 {
5471 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5472 int iStart = iBitStart & ~31;
5473 int iEnd = iBitEnd & ~31;
5474 if (iStart == iEnd)
5475 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5476 else
5477 {
5478 /* bits in first dword. */
5479 if (iBitStart & 31)
5480 {
5481 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5482 pu32++;
5483 iBitStart = iStart + 32;
5484 }
5485
5486 /* whole dword. */
5487 if (iBitStart != iEnd)
5488 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5489
5490 /* bits in last dword. */
5491 if (iBitEnd & 31)
5492 {
5493 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5494 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5495 }
5496 }
5497 }
5498}
5499
5500
5501/**
5502 * Finds the first clear bit in a bitmap.
5503 *
5504 * @returns Index of the first zero bit.
5505 * @returns -1 if no clear bit was found.
5506 * @param pvBitmap Pointer to the bitmap.
5507 * @param cBits The number of bits in the bitmap. Multiple of 32.
5508 */
5509#if RT_INLINE_ASM_EXTERNAL
5510DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5511#else
5512DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5513{
5514 if (cBits)
5515 {
5516 int32_t iBit;
5517# if RT_INLINE_ASM_GNU_STYLE
5518 RTCCUINTREG uEAX, uECX, uEDI;
5519 cBits = RT_ALIGN_32(cBits, 32);
5520 __asm__ __volatile__("repe; scasl\n\t"
5521 "je 1f\n\t"
5522# ifdef RT_ARCH_AMD64
5523 "lea -4(%%rdi), %%rdi\n\t"
5524 "xorl (%%rdi), %%eax\n\t"
5525 "subq %5, %%rdi\n\t"
5526# else
5527 "lea -4(%%edi), %%edi\n\t"
5528 "xorl (%%edi), %%eax\n\t"
5529 "subl %5, %%edi\n\t"
5530# endif
5531 "shll $3, %%edi\n\t"
5532 "bsfl %%eax, %%edx\n\t"
5533 "addl %%edi, %%edx\n\t"
5534 "1:\t\n"
5535 : "=d" (iBit),
5536 "=&c" (uECX),
5537 "=&D" (uEDI),
5538 "=&a" (uEAX)
5539 : "0" (0xffffffff),
5540 "mr" (pvBitmap),
5541 "1" (cBits >> 5),
5542 "2" (pvBitmap),
5543 "3" (0xffffffff));
5544# else
5545 cBits = RT_ALIGN_32(cBits, 32);
5546 __asm
5547 {
5548# ifdef RT_ARCH_AMD64
5549 mov rdi, [pvBitmap]
5550 mov rbx, rdi
5551# else
5552 mov edi, [pvBitmap]
5553 mov ebx, edi
5554# endif
5555 mov edx, 0ffffffffh
5556 mov eax, edx
5557 mov ecx, [cBits]
5558 shr ecx, 5
5559 repe scasd
5560 je done
5561
5562# ifdef RT_ARCH_AMD64
5563 lea rdi, [rdi - 4]
5564 xor eax, [rdi]
5565 sub rdi, rbx
5566# else
5567 lea edi, [edi - 4]
5568 xor eax, [edi]
5569 sub edi, ebx
5570# endif
5571 shl edi, 3
5572 bsf edx, eax
5573 add edx, edi
5574 done:
5575 mov [iBit], edx
5576 }
5577# endif
5578 return iBit;
5579 }
5580 return -1;
5581}
5582#endif
5583
5584
5585/**
5586 * Finds the next clear bit in a bitmap.
5587 *
5588 * @returns Index of the first zero bit.
5589 * @returns -1 if no clear bit was found.
5590 * @param pvBitmap Pointer to the bitmap.
5591 * @param cBits The number of bits in the bitmap. Multiple of 32.
5592 * @param iBitPrev The bit returned from the last search.
5593 * The search will start at iBitPrev + 1.
5594 */
5595#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5596DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5597#else
5598DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5599{
5600 int iBit = ++iBitPrev & 31;
5601 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5602 cBits -= iBitPrev & ~31;
5603 if (iBit)
5604 {
5605 /* inspect the first dword. */
5606 uint32_t u32 = (~*(const volatile uint32_t *)pvBitmap) >> iBit;
5607# if RT_INLINE_ASM_USES_INTRIN
5608 unsigned long ulBit = 0;
5609 if (_BitScanForward(&ulBit, u32))
5610 return ulBit + iBitPrev;
5611 iBit = -1;
5612# else
5613# if RT_INLINE_ASM_GNU_STYLE
5614 __asm__ __volatile__("bsf %1, %0\n\t"
5615 "jnz 1f\n\t"
5616 "movl $-1, %0\n\t"
5617 "1:\n\t"
5618 : "=r" (iBit)
5619 : "r" (u32));
5620# else
5621 __asm
5622 {
5623 mov edx, [u32]
5624 bsf eax, edx
5625 jnz done
5626 mov eax, 0ffffffffh
5627 done:
5628 mov [iBit], eax
5629 }
5630# endif
5631 if (iBit >= 0)
5632 return iBit + iBitPrev;
5633# endif
5634 /* Search the rest of the bitmap, if there is anything. */
5635 if (cBits > 32)
5636 {
5637 iBit = ASMBitFirstClear((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5638 if (iBit >= 0)
5639 return iBit + (iBitPrev & ~31) + 32;
5640 }
5641 }
5642 else
5643 {
5644 /* Search the rest of the bitmap. */
5645 iBit = ASMBitFirstClear(pvBitmap, cBits);
5646 if (iBit >= 0)
5647 return iBit + (iBitPrev & ~31);
5648 }
5649 return iBit;
5650}
5651#endif
5652
5653
5654/**
5655 * Finds the first set bit in a bitmap.
5656 *
5657 * @returns Index of the first set bit.
5658 * @returns -1 if no clear bit was found.
5659 * @param pvBitmap Pointer to the bitmap.
5660 * @param cBits The number of bits in the bitmap. Multiple of 32.
5661 */
5662#if RT_INLINE_ASM_EXTERNAL
5663DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
5664#else
5665DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
5666{
5667 if (cBits)
5668 {
5669 int32_t iBit;
5670# if RT_INLINE_ASM_GNU_STYLE
5671 RTCCUINTREG uEAX, uECX, uEDI;
5672 cBits = RT_ALIGN_32(cBits, 32);
5673 __asm__ __volatile__("repe; scasl\n\t"
5674 "je 1f\n\t"
5675# ifdef RT_ARCH_AMD64
5676 "lea -4(%%rdi), %%rdi\n\t"
5677 "movl (%%rdi), %%eax\n\t"
5678 "subq %5, %%rdi\n\t"
5679# else
5680 "lea -4(%%edi), %%edi\n\t"
5681 "movl (%%edi), %%eax\n\t"
5682 "subl %5, %%edi\n\t"
5683# endif
5684 "shll $3, %%edi\n\t"
5685 "bsfl %%eax, %%edx\n\t"
5686 "addl %%edi, %%edx\n\t"
5687 "1:\t\n"
5688 : "=d" (iBit),
5689 "=&c" (uECX),
5690 "=&D" (uEDI),
5691 "=&a" (uEAX)
5692 : "0" (0xffffffff),
5693 "mr" (pvBitmap),
5694 "1" (cBits >> 5),
5695 "2" (pvBitmap),
5696 "3" (0));
5697# else
5698 cBits = RT_ALIGN_32(cBits, 32);
5699 __asm
5700 {
5701# ifdef RT_ARCH_AMD64
5702 mov rdi, [pvBitmap]
5703 mov rbx, rdi
5704# else
5705 mov edi, [pvBitmap]
5706 mov ebx, edi
5707# endif
5708 mov edx, 0ffffffffh
5709 xor eax, eax
5710 mov ecx, [cBits]
5711 shr ecx, 5
5712 repe scasd
5713 je done
5714# ifdef RT_ARCH_AMD64
5715 lea rdi, [rdi - 4]
5716 mov eax, [rdi]
5717 sub rdi, rbx
5718# else
5719 lea edi, [edi - 4]
5720 mov eax, [edi]
5721 sub edi, ebx
5722# endif
5723 shl edi, 3
5724 bsf edx, eax
5725 add edx, edi
5726 done:
5727 mov [iBit], edx
5728 }
5729# endif
5730 return iBit;
5731 }
5732 return -1;
5733}
5734#endif
5735
5736
5737/**
5738 * Finds the next set bit in a bitmap.
5739 *
5740 * @returns Index of the next set bit.
5741 * @returns -1 if no set bit was found.
5742 * @param pvBitmap Pointer to the bitmap.
5743 * @param cBits The number of bits in the bitmap. Multiple of 32.
5744 * @param iBitPrev The bit returned from the last search.
5745 * The search will start at iBitPrev + 1.
5746 */
5747#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5748DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5749#else
5750DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5751{
5752 int iBit = ++iBitPrev & 31;
5753 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5754 cBits -= iBitPrev & ~31;
5755 if (iBit)
5756 {
5757 /* inspect the first dword. */
5758 uint32_t u32 = *(const volatile uint32_t *)pvBitmap >> iBit;
5759# if RT_INLINE_ASM_USES_INTRIN
5760 unsigned long ulBit = 0;
5761 if (_BitScanForward(&ulBit, u32))
5762 return ulBit + iBitPrev;
5763 iBit = -1;
5764# else
5765# if RT_INLINE_ASM_GNU_STYLE
5766 __asm__ __volatile__("bsf %1, %0\n\t"
5767 "jnz 1f\n\t"
5768 "movl $-1, %0\n\t"
5769 "1:\n\t"
5770 : "=r" (iBit)
5771 : "r" (u32));
5772# else
5773 __asm
5774 {
5775 mov edx, u32
5776 bsf eax, edx
5777 jnz done
5778 mov eax, 0ffffffffh
5779 done:
5780 mov [iBit], eax
5781 }
5782# endif
5783 if (iBit >= 0)
5784 return iBit + iBitPrev;
5785# endif
5786 /* Search the rest of the bitmap, if there is anything. */
5787 if (cBits > 32)
5788 {
5789 iBit = ASMBitFirstSet((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5790 if (iBit >= 0)
5791 return iBit + (iBitPrev & ~31) + 32;
5792 }
5793
5794 }
5795 else
5796 {
5797 /* Search the rest of the bitmap. */
5798 iBit = ASMBitFirstSet(pvBitmap, cBits);
5799 if (iBit >= 0)
5800 return iBit + (iBitPrev & ~31);
5801 }
5802 return iBit;
5803}
5804#endif
5805
5806
5807/**
5808 * Finds the first bit which is set in the given 32-bit integer.
5809 * Bits are numbered from 1 (least significant) to 32.
5810 *
5811 * @returns index [1..32] of the first set bit.
5812 * @returns 0 if all bits are cleared.
5813 * @param u32 Integer to search for set bits.
5814 * @remark Similar to ffs() in BSD.
5815 */
5816DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5817{
5818# if RT_INLINE_ASM_USES_INTRIN
5819 unsigned long iBit;
5820 if (_BitScanForward(&iBit, u32))
5821 iBit++;
5822 else
5823 iBit = 0;
5824# elif RT_INLINE_ASM_GNU_STYLE
5825 uint32_t iBit;
5826 __asm__ __volatile__("bsf %1, %0\n\t"
5827 "jnz 1f\n\t"
5828 "xorl %0, %0\n\t"
5829 "jmp 2f\n"
5830 "1:\n\t"
5831 "incl %0\n"
5832 "2:\n\t"
5833 : "=r" (iBit)
5834 : "rm" (u32));
5835# else
5836 uint32_t iBit;
5837 _asm
5838 {
5839 bsf eax, [u32]
5840 jnz found
5841 xor eax, eax
5842 jmp done
5843 found:
5844 inc eax
5845 done:
5846 mov [iBit], eax
5847 }
5848# endif
5849 return iBit;
5850}
5851
5852
5853/**
5854 * Finds the first bit which is set in the given 32-bit integer.
5855 * Bits are numbered from 1 (least significant) to 32.
5856 *
5857 * @returns index [1..32] of the first set bit.
5858 * @returns 0 if all bits are cleared.
5859 * @param i32 Integer to search for set bits.
5860 * @remark Similar to ffs() in BSD.
5861 */
5862DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5863{
5864 return ASMBitFirstSetU32((uint32_t)i32);
5865}
5866
5867
5868/**
5869 * Finds the last bit which is set in the given 32-bit integer.
5870 * Bits are numbered from 1 (least significant) to 32.
5871 *
5872 * @returns index [1..32] of the last set bit.
5873 * @returns 0 if all bits are cleared.
5874 * @param u32 Integer to search for set bits.
5875 * @remark Similar to fls() in BSD.
5876 */
5877DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5878{
5879# if RT_INLINE_ASM_USES_INTRIN
5880 unsigned long iBit;
5881 if (_BitScanReverse(&iBit, u32))
5882 iBit++;
5883 else
5884 iBit = 0;
5885# elif RT_INLINE_ASM_GNU_STYLE
5886 uint32_t iBit;
5887 __asm__ __volatile__("bsrl %1, %0\n\t"
5888 "jnz 1f\n\t"
5889 "xorl %0, %0\n\t"
5890 "jmp 2f\n"
5891 "1:\n\t"
5892 "incl %0\n"
5893 "2:\n\t"
5894 : "=r" (iBit)
5895 : "rm" (u32));
5896# else
5897 uint32_t iBit;
5898 _asm
5899 {
5900 bsr eax, [u32]
5901 jnz found
5902 xor eax, eax
5903 jmp done
5904 found:
5905 inc eax
5906 done:
5907 mov [iBit], eax
5908 }
5909# endif
5910 return iBit;
5911}
5912
5913
5914/**
5915 * Finds the last bit which is set in the given 32-bit integer.
5916 * Bits are numbered from 1 (least significant) to 32.
5917 *
5918 * @returns index [1..32] of the last set bit.
5919 * @returns 0 if all bits are cleared.
5920 * @param i32 Integer to search for set bits.
5921 * @remark Similar to fls() in BSD.
5922 */
5923DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5924{
5925 return ASMBitLastSetS32((uint32_t)i32);
5926}
5927
5928/**
5929 * Reverse the byte order of the given 16-bit integer.
5930 *
5931 * @returns Revert
5932 * @param u16 16-bit integer value.
5933 */
5934DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5935{
5936#if RT_INLINE_ASM_USES_INTRIN
5937 u16 = _byteswap_ushort(u16);
5938#elif RT_INLINE_ASM_GNU_STYLE
5939 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5940#else
5941 _asm
5942 {
5943 mov ax, [u16]
5944 ror ax, 8
5945 mov [u16], ax
5946 }
5947#endif
5948 return u16;
5949}
5950
5951/**
5952 * Reverse the byte order of the given 32-bit integer.
5953 *
5954 * @returns Revert
5955 * @param u32 32-bit integer value.
5956 */
5957DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5958{
5959#if RT_INLINE_ASM_USES_INTRIN
5960 u32 = _byteswap_ulong(u32);
5961#elif RT_INLINE_ASM_GNU_STYLE
5962 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5963#else
5964 _asm
5965 {
5966 mov eax, [u32]
5967 bswap eax
5968 mov [u32], eax
5969 }
5970#endif
5971 return u32;
5972}
5973
5974
5975/**
5976 * Reverse the byte order of the given 64-bit integer.
5977 *
5978 * @returns Revert
5979 * @param u64 64-bit integer value.
5980 */
5981DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5982{
5983#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5984 u64 = _byteswap_uint64(u64);
5985#else
5986 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5987 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5988#endif
5989 return u64;
5990}
5991
5992
5993/** @} */
5994
5995
5996/** @} */
5997#endif
5998
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette