VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 14094

Last change on this file since 14094 was 14073, checked in by vboxsync, 17 years ago

asm.h: ASMAtomicXchgR3Ptr, ASMAtomicXchgR0Ptr, ASMAtomicXchgRCPtr.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 156.6 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(__readdr)
72# pragma intrinsic(__writedr)
73# pragma intrinsic(_BitScanForward)
74# pragma intrinsic(_BitScanReverse)
75# pragma intrinsic(_bittest)
76# pragma intrinsic(_bittestandset)
77# pragma intrinsic(_bittestandreset)
78# pragma intrinsic(_bittestandcomplement)
79# pragma intrinsic(_byteswap_ushort)
80# pragma intrinsic(_byteswap_ulong)
81# pragma intrinsic(_interlockedbittestandset)
82# pragma intrinsic(_interlockedbittestandreset)
83# pragma intrinsic(_InterlockedAnd)
84# pragma intrinsic(_InterlockedOr)
85# pragma intrinsic(_InterlockedIncrement)
86# pragma intrinsic(_InterlockedDecrement)
87# pragma intrinsic(_InterlockedExchange)
88# pragma intrinsic(_InterlockedExchangeAdd)
89# pragma intrinsic(_InterlockedCompareExchange)
90# pragma intrinsic(_InterlockedCompareExchange64)
91# ifdef RT_ARCH_AMD64
92# pragma intrinsic(__stosq)
93# pragma intrinsic(__readcr8)
94# pragma intrinsic(__writecr8)
95# pragma intrinsic(_byteswap_uint64)
96# pragma intrinsic(_InterlockedExchange64)
97# endif
98# endif
99#endif
100#ifndef RT_INLINE_ASM_USES_INTRIN
101# define RT_INLINE_ASM_USES_INTRIN 0
102#endif
103
104
105
106/** @defgroup grp_asm ASM - Assembly Routines
107 * @ingroup grp_rt
108 *
109 * @remarks The difference between ordered and unordered atomic operations are that
110 * the former will complete outstanding reads and writes before continuing
111 * while the latter doesn't make any promisses about the order. Ordered
112 * operations doesn't, it seems, make any 100% promise wrt to whether
113 * the operation will complete before any subsequent memory access.
114 * (please, correct if wrong.)
115 *
116 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
117 * are unordered (note the Uo).
118 *
119 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
120 * or even optimize assembler instructions away. For instance, in the following code
121 * the second rdmsr instruction is optimized away because gcc treats that instruction
122 * as deterministic:
123 *
124 * @code
125 * static inline uint64_t rdmsr_low(int idx)
126 * {
127 * uint32_t low;
128 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
129 * }
130 * ...
131 * uint32_t msr1 = rdmsr_low(1);
132 * foo(msr1);
133 * msr1 = rdmsr_low(1);
134 * bar(msr1);
135 * @endcode
136 *
137 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
138 * use the result of the first call as input parameter for bar() as well. For rdmsr this
139 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
140 * machine status information in general.
141 *
142 * @{
143 */
144
145/** @def RT_INLINE_ASM_EXTERNAL
146 * Defined as 1 if the compiler does not support inline assembly.
147 * The ASM* functions will then be implemented in an external .asm file.
148 *
149 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
150 * inline assembly in their AMD64 compiler.
151 */
152#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
153# define RT_INLINE_ASM_EXTERNAL 1
154#else
155# define RT_INLINE_ASM_EXTERNAL 0
156#endif
157
158/** @def RT_INLINE_ASM_GNU_STYLE
159 * Defined as 1 if the compiler understands GNU style inline assembly.
160 */
161#if defined(_MSC_VER)
162# define RT_INLINE_ASM_GNU_STYLE 0
163#else
164# define RT_INLINE_ASM_GNU_STYLE 1
165#endif
166
167
168/** @todo find a more proper place for this structure? */
169#pragma pack(1)
170/** IDTR */
171typedef struct RTIDTR
172{
173 /** Size of the IDT. */
174 uint16_t cbIdt;
175 /** Address of the IDT. */
176 uintptr_t pIdt;
177} RTIDTR, *PRTIDTR;
178#pragma pack()
179
180#pragma pack(1)
181/** GDTR */
182typedef struct RTGDTR
183{
184 /** Size of the GDT. */
185 uint16_t cbGdt;
186 /** Address of the GDT. */
187 uintptr_t pGdt;
188} RTGDTR, *PRTGDTR;
189#pragma pack()
190
191
192/** @def ASMReturnAddress
193 * Gets the return address of the current (or calling if you like) function or method.
194 */
195#ifdef _MSC_VER
196# ifdef __cplusplus
197extern "C"
198# endif
199void * _ReturnAddress(void);
200# pragma intrinsic(_ReturnAddress)
201# define ASMReturnAddress() _ReturnAddress()
202#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
203# define ASMReturnAddress() __builtin_return_address(0)
204#else
205# error "Unsupported compiler."
206#endif
207
208
209/**
210 * Gets the content of the IDTR CPU register.
211 * @param pIdtr Where to store the IDTR contents.
212 */
213#if RT_INLINE_ASM_EXTERNAL
214DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
215#else
216DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
217{
218# if RT_INLINE_ASM_GNU_STYLE
219 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
220# else
221 __asm
222 {
223# ifdef RT_ARCH_AMD64
224 mov rax, [pIdtr]
225 sidt [rax]
226# else
227 mov eax, [pIdtr]
228 sidt [eax]
229# endif
230 }
231# endif
232}
233#endif
234
235
236/**
237 * Sets the content of the IDTR CPU register.
238 * @param pIdtr Where to load the IDTR contents from
239 */
240#if RT_INLINE_ASM_EXTERNAL
241DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
242#else
243DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
244{
245# if RT_INLINE_ASM_GNU_STYLE
246 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
247# else
248 __asm
249 {
250# ifdef RT_ARCH_AMD64
251 mov rax, [pIdtr]
252 lidt [rax]
253# else
254 mov eax, [pIdtr]
255 lidt [eax]
256# endif
257 }
258# endif
259}
260#endif
261
262
263/**
264 * Gets the content of the GDTR CPU register.
265 * @param pGdtr Where to store the GDTR contents.
266 */
267#if RT_INLINE_ASM_EXTERNAL
268DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
269#else
270DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
271{
272# if RT_INLINE_ASM_GNU_STYLE
273 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
274# else
275 __asm
276 {
277# ifdef RT_ARCH_AMD64
278 mov rax, [pGdtr]
279 sgdt [rax]
280# else
281 mov eax, [pGdtr]
282 sgdt [eax]
283# endif
284 }
285# endif
286}
287#endif
288
289/**
290 * Get the cs register.
291 * @returns cs.
292 */
293#if RT_INLINE_ASM_EXTERNAL
294DECLASM(RTSEL) ASMGetCS(void);
295#else
296DECLINLINE(RTSEL) ASMGetCS(void)
297{
298 RTSEL SelCS;
299# if RT_INLINE_ASM_GNU_STYLE
300 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
301# else
302 __asm
303 {
304 mov ax, cs
305 mov [SelCS], ax
306 }
307# endif
308 return SelCS;
309}
310#endif
311
312
313/**
314 * Get the DS register.
315 * @returns DS.
316 */
317#if RT_INLINE_ASM_EXTERNAL
318DECLASM(RTSEL) ASMGetDS(void);
319#else
320DECLINLINE(RTSEL) ASMGetDS(void)
321{
322 RTSEL SelDS;
323# if RT_INLINE_ASM_GNU_STYLE
324 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
325# else
326 __asm
327 {
328 mov ax, ds
329 mov [SelDS], ax
330 }
331# endif
332 return SelDS;
333}
334#endif
335
336
337/**
338 * Get the ES register.
339 * @returns ES.
340 */
341#if RT_INLINE_ASM_EXTERNAL
342DECLASM(RTSEL) ASMGetES(void);
343#else
344DECLINLINE(RTSEL) ASMGetES(void)
345{
346 RTSEL SelES;
347# if RT_INLINE_ASM_GNU_STYLE
348 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
349# else
350 __asm
351 {
352 mov ax, es
353 mov [SelES], ax
354 }
355# endif
356 return SelES;
357}
358#endif
359
360
361/**
362 * Get the FS register.
363 * @returns FS.
364 */
365#if RT_INLINE_ASM_EXTERNAL
366DECLASM(RTSEL) ASMGetFS(void);
367#else
368DECLINLINE(RTSEL) ASMGetFS(void)
369{
370 RTSEL SelFS;
371# if RT_INLINE_ASM_GNU_STYLE
372 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
373# else
374 __asm
375 {
376 mov ax, fs
377 mov [SelFS], ax
378 }
379# endif
380 return SelFS;
381}
382# endif
383
384
385/**
386 * Get the GS register.
387 * @returns GS.
388 */
389#if RT_INLINE_ASM_EXTERNAL
390DECLASM(RTSEL) ASMGetGS(void);
391#else
392DECLINLINE(RTSEL) ASMGetGS(void)
393{
394 RTSEL SelGS;
395# if RT_INLINE_ASM_GNU_STYLE
396 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
397# else
398 __asm
399 {
400 mov ax, gs
401 mov [SelGS], ax
402 }
403# endif
404 return SelGS;
405}
406#endif
407
408
409/**
410 * Get the SS register.
411 * @returns SS.
412 */
413#if RT_INLINE_ASM_EXTERNAL
414DECLASM(RTSEL) ASMGetSS(void);
415#else
416DECLINLINE(RTSEL) ASMGetSS(void)
417{
418 RTSEL SelSS;
419# if RT_INLINE_ASM_GNU_STYLE
420 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
421# else
422 __asm
423 {
424 mov ax, ss
425 mov [SelSS], ax
426 }
427# endif
428 return SelSS;
429}
430#endif
431
432
433/**
434 * Get the TR register.
435 * @returns TR.
436 */
437#if RT_INLINE_ASM_EXTERNAL
438DECLASM(RTSEL) ASMGetTR(void);
439#else
440DECLINLINE(RTSEL) ASMGetTR(void)
441{
442 RTSEL SelTR;
443# if RT_INLINE_ASM_GNU_STYLE
444 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
445# else
446 __asm
447 {
448 str ax
449 mov [SelTR], ax
450 }
451# endif
452 return SelTR;
453}
454#endif
455
456
457/**
458 * Get the [RE]FLAGS register.
459 * @returns [RE]FLAGS.
460 */
461#if RT_INLINE_ASM_EXTERNAL
462DECLASM(RTCCUINTREG) ASMGetFlags(void);
463#else
464DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
465{
466 RTCCUINTREG uFlags;
467# if RT_INLINE_ASM_GNU_STYLE
468# ifdef RT_ARCH_AMD64
469 __asm__ __volatile__("pushfq\n\t"
470 "popq %0\n\t"
471 : "=g" (uFlags));
472# else
473 __asm__ __volatile__("pushfl\n\t"
474 "popl %0\n\t"
475 : "=g" (uFlags));
476# endif
477# else
478 __asm
479 {
480# ifdef RT_ARCH_AMD64
481 pushfq
482 pop [uFlags]
483# else
484 pushfd
485 pop [uFlags]
486# endif
487 }
488# endif
489 return uFlags;
490}
491#endif
492
493
494/**
495 * Set the [RE]FLAGS register.
496 * @param uFlags The new [RE]FLAGS value.
497 */
498#if RT_INLINE_ASM_EXTERNAL
499DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
500#else
501DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
502{
503# if RT_INLINE_ASM_GNU_STYLE
504# ifdef RT_ARCH_AMD64
505 __asm__ __volatile__("pushq %0\n\t"
506 "popfq\n\t"
507 : : "g" (uFlags));
508# else
509 __asm__ __volatile__("pushl %0\n\t"
510 "popfl\n\t"
511 : : "g" (uFlags));
512# endif
513# else
514 __asm
515 {
516# ifdef RT_ARCH_AMD64
517 push [uFlags]
518 popfq
519# else
520 push [uFlags]
521 popfd
522# endif
523 }
524# endif
525}
526#endif
527
528
529/**
530 * Gets the content of the CPU timestamp counter register.
531 *
532 * @returns TSC.
533 */
534#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
535DECLASM(uint64_t) ASMReadTSC(void);
536#else
537DECLINLINE(uint64_t) ASMReadTSC(void)
538{
539 RTUINT64U u;
540# if RT_INLINE_ASM_GNU_STYLE
541 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
542# else
543# if RT_INLINE_ASM_USES_INTRIN
544 u.u = __rdtsc();
545# else
546 __asm
547 {
548 rdtsc
549 mov [u.s.Lo], eax
550 mov [u.s.Hi], edx
551 }
552# endif
553# endif
554 return u.u;
555}
556#endif
557
558
559/**
560 * Performs the cpuid instruction returning all registers.
561 *
562 * @param uOperator CPUID operation (eax).
563 * @param pvEAX Where to store eax.
564 * @param pvEBX Where to store ebx.
565 * @param pvECX Where to store ecx.
566 * @param pvEDX Where to store edx.
567 * @remark We're using void pointers to ease the use of special bitfield structures and such.
568 */
569#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
570DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
571#else
572DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
573{
574# if RT_INLINE_ASM_GNU_STYLE
575# ifdef RT_ARCH_AMD64
576 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
577 __asm__ ("cpuid\n\t"
578 : "=a" (uRAX),
579 "=b" (uRBX),
580 "=c" (uRCX),
581 "=d" (uRDX)
582 : "0" (uOperator));
583 *(uint32_t *)pvEAX = (uint32_t)uRAX;
584 *(uint32_t *)pvEBX = (uint32_t)uRBX;
585 *(uint32_t *)pvECX = (uint32_t)uRCX;
586 *(uint32_t *)pvEDX = (uint32_t)uRDX;
587# else
588 __asm__ ("xchgl %%ebx, %1\n\t"
589 "cpuid\n\t"
590 "xchgl %%ebx, %1\n\t"
591 : "=a" (*(uint32_t *)pvEAX),
592 "=r" (*(uint32_t *)pvEBX),
593 "=c" (*(uint32_t *)pvECX),
594 "=d" (*(uint32_t *)pvEDX)
595 : "0" (uOperator));
596# endif
597
598# elif RT_INLINE_ASM_USES_INTRIN
599 int aInfo[4];
600 __cpuid(aInfo, uOperator);
601 *(uint32_t *)pvEAX = aInfo[0];
602 *(uint32_t *)pvEBX = aInfo[1];
603 *(uint32_t *)pvECX = aInfo[2];
604 *(uint32_t *)pvEDX = aInfo[3];
605
606# else
607 uint32_t uEAX;
608 uint32_t uEBX;
609 uint32_t uECX;
610 uint32_t uEDX;
611 __asm
612 {
613 push ebx
614 mov eax, [uOperator]
615 cpuid
616 mov [uEAX], eax
617 mov [uEBX], ebx
618 mov [uECX], ecx
619 mov [uEDX], edx
620 pop ebx
621 }
622 *(uint32_t *)pvEAX = uEAX;
623 *(uint32_t *)pvEBX = uEBX;
624 *(uint32_t *)pvECX = uECX;
625 *(uint32_t *)pvEDX = uEDX;
626# endif
627}
628#endif
629
630
631/**
632 * Performs the cpuid instruction returning all registers.
633 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
634 *
635 * @param uOperator CPUID operation (eax).
636 * @param uIdxECX ecx index
637 * @param pvEAX Where to store eax.
638 * @param pvEBX Where to store ebx.
639 * @param pvECX Where to store ecx.
640 * @param pvEDX Where to store edx.
641 * @remark We're using void pointers to ease the use of special bitfield structures and such.
642 */
643#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
644DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
645#else
646DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
647{
648# if RT_INLINE_ASM_GNU_STYLE
649# ifdef RT_ARCH_AMD64
650 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
651 __asm__ ("cpuid\n\t"
652 : "=a" (uRAX),
653 "=b" (uRBX),
654 "=c" (uRCX),
655 "=d" (uRDX)
656 : "0" (uOperator),
657 "2" (uIdxECX));
658 *(uint32_t *)pvEAX = (uint32_t)uRAX;
659 *(uint32_t *)pvEBX = (uint32_t)uRBX;
660 *(uint32_t *)pvECX = (uint32_t)uRCX;
661 *(uint32_t *)pvEDX = (uint32_t)uRDX;
662# else
663 __asm__ ("xchgl %%ebx, %1\n\t"
664 "cpuid\n\t"
665 "xchgl %%ebx, %1\n\t"
666 : "=a" (*(uint32_t *)pvEAX),
667 "=r" (*(uint32_t *)pvEBX),
668 "=c" (*(uint32_t *)pvECX),
669 "=d" (*(uint32_t *)pvEDX)
670 : "0" (uOperator),
671 "2" (uIdxECX));
672# endif
673
674# elif RT_INLINE_ASM_USES_INTRIN
675 int aInfo[4];
676 /* ??? another intrinsic ??? */
677 __cpuid(aInfo, uOperator);
678 *(uint32_t *)pvEAX = aInfo[0];
679 *(uint32_t *)pvEBX = aInfo[1];
680 *(uint32_t *)pvECX = aInfo[2];
681 *(uint32_t *)pvEDX = aInfo[3];
682
683# else
684 uint32_t uEAX;
685 uint32_t uEBX;
686 uint32_t uECX;
687 uint32_t uEDX;
688 __asm
689 {
690 push ebx
691 mov eax, [uOperator]
692 mov ecx, [uIdxECX]
693 cpuid
694 mov [uEAX], eax
695 mov [uEBX], ebx
696 mov [uECX], ecx
697 mov [uEDX], edx
698 pop ebx
699 }
700 *(uint32_t *)pvEAX = uEAX;
701 *(uint32_t *)pvEBX = uEBX;
702 *(uint32_t *)pvECX = uECX;
703 *(uint32_t *)pvEDX = uEDX;
704# endif
705}
706#endif
707
708
709/**
710 * Performs the cpuid instruction returning ecx and edx.
711 *
712 * @param uOperator CPUID operation (eax).
713 * @param pvECX Where to store ecx.
714 * @param pvEDX Where to store edx.
715 * @remark We're using void pointers to ease the use of special bitfield structures and such.
716 */
717#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
718DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
719#else
720DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
721{
722 uint32_t uEBX;
723 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
724}
725#endif
726
727
728/**
729 * Performs the cpuid instruction returning edx.
730 *
731 * @param uOperator CPUID operation (eax).
732 * @returns EDX after cpuid operation.
733 */
734#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
735DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
736#else
737DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
738{
739 RTCCUINTREG xDX;
740# if RT_INLINE_ASM_GNU_STYLE
741# ifdef RT_ARCH_AMD64
742 RTCCUINTREG uSpill;
743 __asm__ ("cpuid"
744 : "=a" (uSpill),
745 "=d" (xDX)
746 : "0" (uOperator)
747 : "rbx", "rcx");
748# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
749 __asm__ ("push %%ebx\n\t"
750 "cpuid\n\t"
751 "pop %%ebx\n\t"
752 : "=a" (uOperator),
753 "=d" (xDX)
754 : "0" (uOperator)
755 : "ecx");
756# else
757 __asm__ ("cpuid"
758 : "=a" (uOperator),
759 "=d" (xDX)
760 : "0" (uOperator)
761 : "ebx", "ecx");
762# endif
763
764# elif RT_INLINE_ASM_USES_INTRIN
765 int aInfo[4];
766 __cpuid(aInfo, uOperator);
767 xDX = aInfo[3];
768
769# else
770 __asm
771 {
772 push ebx
773 mov eax, [uOperator]
774 cpuid
775 mov [xDX], edx
776 pop ebx
777 }
778# endif
779 return (uint32_t)xDX;
780}
781#endif
782
783
784/**
785 * Performs the cpuid instruction returning ecx.
786 *
787 * @param uOperator CPUID operation (eax).
788 * @returns ECX after cpuid operation.
789 */
790#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
791DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
792#else
793DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
794{
795 RTCCUINTREG xCX;
796# if RT_INLINE_ASM_GNU_STYLE
797# ifdef RT_ARCH_AMD64
798 RTCCUINTREG uSpill;
799 __asm__ ("cpuid"
800 : "=a" (uSpill),
801 "=c" (xCX)
802 : "0" (uOperator)
803 : "rbx", "rdx");
804# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
805 __asm__ ("push %%ebx\n\t"
806 "cpuid\n\t"
807 "pop %%ebx\n\t"
808 : "=a" (uOperator),
809 "=c" (xCX)
810 : "0" (uOperator)
811 : "edx");
812# else
813 __asm__ ("cpuid"
814 : "=a" (uOperator),
815 "=c" (xCX)
816 : "0" (uOperator)
817 : "ebx", "edx");
818
819# endif
820
821# elif RT_INLINE_ASM_USES_INTRIN
822 int aInfo[4];
823 __cpuid(aInfo, uOperator);
824 xCX = aInfo[2];
825
826# else
827 __asm
828 {
829 push ebx
830 mov eax, [uOperator]
831 cpuid
832 mov [xCX], ecx
833 pop ebx
834 }
835# endif
836 return (uint32_t)xCX;
837}
838#endif
839
840
841/**
842 * Checks if the current CPU supports CPUID.
843 *
844 * @returns true if CPUID is supported.
845 */
846DECLINLINE(bool) ASMHasCpuId(void)
847{
848#ifdef RT_ARCH_AMD64
849 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
850#else /* !RT_ARCH_AMD64 */
851 bool fRet = false;
852# if RT_INLINE_ASM_GNU_STYLE
853 uint32_t u1;
854 uint32_t u2;
855 __asm__ ("pushf\n\t"
856 "pop %1\n\t"
857 "mov %1, %2\n\t"
858 "xorl $0x200000, %1\n\t"
859 "push %1\n\t"
860 "popf\n\t"
861 "pushf\n\t"
862 "pop %1\n\t"
863 "cmpl %1, %2\n\t"
864 "setne %0\n\t"
865 "push %2\n\t"
866 "popf\n\t"
867 : "=m" (fRet), "=r" (u1), "=r" (u2));
868# else
869 __asm
870 {
871 pushfd
872 pop eax
873 mov ebx, eax
874 xor eax, 0200000h
875 push eax
876 popfd
877 pushfd
878 pop eax
879 cmp eax, ebx
880 setne fRet
881 push ebx
882 popfd
883 }
884# endif
885 return fRet;
886#endif /* !RT_ARCH_AMD64 */
887}
888
889
890/**
891 * Gets the APIC ID of the current CPU.
892 *
893 * @returns the APIC ID.
894 */
895#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
896DECLASM(uint8_t) ASMGetApicId(void);
897#else
898DECLINLINE(uint8_t) ASMGetApicId(void)
899{
900 RTCCUINTREG xBX;
901# if RT_INLINE_ASM_GNU_STYLE
902# ifdef RT_ARCH_AMD64
903 RTCCUINTREG uSpill;
904 __asm__ ("cpuid"
905 : "=a" (uSpill),
906 "=b" (xBX)
907 : "0" (1)
908 : "rcx", "rdx");
909# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
910 RTCCUINTREG uSpill;
911 __asm__ ("mov %%ebx,%1\n\t"
912 "cpuid\n\t"
913 "xchgl %%ebx,%1\n\t"
914 : "=a" (uSpill),
915 "=r" (xBX)
916 : "0" (1)
917 : "ecx", "edx");
918# else
919 RTCCUINTREG uSpill;
920 __asm__ ("cpuid"
921 : "=a" (uSpill),
922 "=b" (xBX)
923 : "0" (1)
924 : "ecx", "edx");
925# endif
926
927# elif RT_INLINE_ASM_USES_INTRIN
928 int aInfo[4];
929 __cpuid(aInfo, 1);
930 xBX = aInfo[1];
931
932# else
933 __asm
934 {
935 push ebx
936 mov eax, 1
937 cpuid
938 mov [xBX], ebx
939 pop ebx
940 }
941# endif
942 return (uint8_t)(xBX >> 24);
943}
944#endif
945
946
947/**
948 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
949 *
950 * @returns true/false.
951 * @param uEBX EBX return from ASMCpuId(0)
952 * @param uECX ECX return from ASMCpuId(0)
953 * @param uEDX EDX return from ASMCpuId(0)
954 */
955DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
956{
957 return uEBX == 0x756e6547
958 || uECX == 0x6c65746e
959 || uEDX == 0x49656e69;
960}
961
962
963/**
964 * Tests if this is an genuin Intel CPU.
965 *
966 * @returns true/false.
967 */
968DECLINLINE(bool) ASMIsIntelCpu(void)
969{
970 uint32_t uEAX, uEBX, uECX, uEDX;
971 ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX);
972 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
973}
974
975
976/**
977 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
978 *
979 * @returns Family.
980 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
981 */
982DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
983{
984 return ((uEAX >> 8) & 0xf) == 0xf
985 ? ((uEAX >> 20) & 0x7f) + 0xf
986 : ((uEAX >> 8) & 0xf);
987}
988
989
990/**
991 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
992 *
993 * @returns Model.
994 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
995 * @param fIntel Whether it's an intel CPU.
996 */
997DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
998{
999 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1000 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1001 : ((uEAX >> 4) & 0xf);
1002}
1003
1004
1005/**
1006 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1007 *
1008 * @returns Model.
1009 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1010 * @param fIntel Whether it's an intel CPU.
1011 */
1012DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1013{
1014 return ((uEAX >> 8) & 0xf) == 0xf
1015 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1016 : ((uEAX >> 4) & 0xf);
1017}
1018
1019
1020/**
1021 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1022 *
1023 * @returns Model.
1024 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1025 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1026 */
1027DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1028{
1029 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1030 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1031 : ((uEAX >> 4) & 0xf);
1032}
1033
1034
1035/**
1036 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1037 *
1038 * @returns Model.
1039 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1040 */
1041DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1042{
1043 return uEAX & 0xf;
1044}
1045
1046
1047/**
1048 * Get cr0.
1049 * @returns cr0.
1050 */
1051#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1052DECLASM(RTCCUINTREG) ASMGetCR0(void);
1053#else
1054DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1055{
1056 RTCCUINTREG uCR0;
1057# if RT_INLINE_ASM_USES_INTRIN
1058 uCR0 = __readcr0();
1059
1060# elif RT_INLINE_ASM_GNU_STYLE
1061# ifdef RT_ARCH_AMD64
1062 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1063# else
1064 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1065# endif
1066# else
1067 __asm
1068 {
1069# ifdef RT_ARCH_AMD64
1070 mov rax, cr0
1071 mov [uCR0], rax
1072# else
1073 mov eax, cr0
1074 mov [uCR0], eax
1075# endif
1076 }
1077# endif
1078 return uCR0;
1079}
1080#endif
1081
1082
1083/**
1084 * Sets the CR0 register.
1085 * @param uCR0 The new CR0 value.
1086 */
1087#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1088DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1089#else
1090DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1091{
1092# if RT_INLINE_ASM_USES_INTRIN
1093 __writecr0(uCR0);
1094
1095# elif RT_INLINE_ASM_GNU_STYLE
1096# ifdef RT_ARCH_AMD64
1097 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1098# else
1099 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1100# endif
1101# else
1102 __asm
1103 {
1104# ifdef RT_ARCH_AMD64
1105 mov rax, [uCR0]
1106 mov cr0, rax
1107# else
1108 mov eax, [uCR0]
1109 mov cr0, eax
1110# endif
1111 }
1112# endif
1113}
1114#endif
1115
1116
1117/**
1118 * Get cr2.
1119 * @returns cr2.
1120 */
1121#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1122DECLASM(RTCCUINTREG) ASMGetCR2(void);
1123#else
1124DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1125{
1126 RTCCUINTREG uCR2;
1127# if RT_INLINE_ASM_USES_INTRIN
1128 uCR2 = __readcr2();
1129
1130# elif RT_INLINE_ASM_GNU_STYLE
1131# ifdef RT_ARCH_AMD64
1132 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1133# else
1134 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1135# endif
1136# else
1137 __asm
1138 {
1139# ifdef RT_ARCH_AMD64
1140 mov rax, cr2
1141 mov [uCR2], rax
1142# else
1143 mov eax, cr2
1144 mov [uCR2], eax
1145# endif
1146 }
1147# endif
1148 return uCR2;
1149}
1150#endif
1151
1152
1153/**
1154 * Sets the CR2 register.
1155 * @param uCR2 The new CR0 value.
1156 */
1157#if RT_INLINE_ASM_EXTERNAL
1158DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1159#else
1160DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1161{
1162# if RT_INLINE_ASM_GNU_STYLE
1163# ifdef RT_ARCH_AMD64
1164 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1165# else
1166 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1167# endif
1168# else
1169 __asm
1170 {
1171# ifdef RT_ARCH_AMD64
1172 mov rax, [uCR2]
1173 mov cr2, rax
1174# else
1175 mov eax, [uCR2]
1176 mov cr2, eax
1177# endif
1178 }
1179# endif
1180}
1181#endif
1182
1183
1184/**
1185 * Get cr3.
1186 * @returns cr3.
1187 */
1188#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1189DECLASM(RTCCUINTREG) ASMGetCR3(void);
1190#else
1191DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1192{
1193 RTCCUINTREG uCR3;
1194# if RT_INLINE_ASM_USES_INTRIN
1195 uCR3 = __readcr3();
1196
1197# elif RT_INLINE_ASM_GNU_STYLE
1198# ifdef RT_ARCH_AMD64
1199 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1200# else
1201 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1202# endif
1203# else
1204 __asm
1205 {
1206# ifdef RT_ARCH_AMD64
1207 mov rax, cr3
1208 mov [uCR3], rax
1209# else
1210 mov eax, cr3
1211 mov [uCR3], eax
1212# endif
1213 }
1214# endif
1215 return uCR3;
1216}
1217#endif
1218
1219
1220/**
1221 * Sets the CR3 register.
1222 *
1223 * @param uCR3 New CR3 value.
1224 */
1225#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1226DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1227#else
1228DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1229{
1230# if RT_INLINE_ASM_USES_INTRIN
1231 __writecr3(uCR3);
1232
1233# elif RT_INLINE_ASM_GNU_STYLE
1234# ifdef RT_ARCH_AMD64
1235 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1236# else
1237 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1238# endif
1239# else
1240 __asm
1241 {
1242# ifdef RT_ARCH_AMD64
1243 mov rax, [uCR3]
1244 mov cr3, rax
1245# else
1246 mov eax, [uCR3]
1247 mov cr3, eax
1248# endif
1249 }
1250# endif
1251}
1252#endif
1253
1254
1255/**
1256 * Reloads the CR3 register.
1257 */
1258#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1259DECLASM(void) ASMReloadCR3(void);
1260#else
1261DECLINLINE(void) ASMReloadCR3(void)
1262{
1263# if RT_INLINE_ASM_USES_INTRIN
1264 __writecr3(__readcr3());
1265
1266# elif RT_INLINE_ASM_GNU_STYLE
1267 RTCCUINTREG u;
1268# ifdef RT_ARCH_AMD64
1269 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1270 "movq %0, %%cr3\n\t"
1271 : "=r" (u));
1272# else
1273 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1274 "movl %0, %%cr3\n\t"
1275 : "=r" (u));
1276# endif
1277# else
1278 __asm
1279 {
1280# ifdef RT_ARCH_AMD64
1281 mov rax, cr3
1282 mov cr3, rax
1283# else
1284 mov eax, cr3
1285 mov cr3, eax
1286# endif
1287 }
1288# endif
1289}
1290#endif
1291
1292
1293/**
1294 * Get cr4.
1295 * @returns cr4.
1296 */
1297#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1298DECLASM(RTCCUINTREG) ASMGetCR4(void);
1299#else
1300DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1301{
1302 RTCCUINTREG uCR4;
1303# if RT_INLINE_ASM_USES_INTRIN
1304 uCR4 = __readcr4();
1305
1306# elif RT_INLINE_ASM_GNU_STYLE
1307# ifdef RT_ARCH_AMD64
1308 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1309# else
1310 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1311# endif
1312# else
1313 __asm
1314 {
1315# ifdef RT_ARCH_AMD64
1316 mov rax, cr4
1317 mov [uCR4], rax
1318# else
1319 push eax /* just in case */
1320 /*mov eax, cr4*/
1321 _emit 0x0f
1322 _emit 0x20
1323 _emit 0xe0
1324 mov [uCR4], eax
1325 pop eax
1326# endif
1327 }
1328# endif
1329 return uCR4;
1330}
1331#endif
1332
1333
1334/**
1335 * Sets the CR4 register.
1336 *
1337 * @param uCR4 New CR4 value.
1338 */
1339#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1340DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1341#else
1342DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1343{
1344# if RT_INLINE_ASM_USES_INTRIN
1345 __writecr4(uCR4);
1346
1347# elif RT_INLINE_ASM_GNU_STYLE
1348# ifdef RT_ARCH_AMD64
1349 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1350# else
1351 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1352# endif
1353# else
1354 __asm
1355 {
1356# ifdef RT_ARCH_AMD64
1357 mov rax, [uCR4]
1358 mov cr4, rax
1359# else
1360 mov eax, [uCR4]
1361 _emit 0x0F
1362 _emit 0x22
1363 _emit 0xE0 /* mov cr4, eax */
1364# endif
1365 }
1366# endif
1367}
1368#endif
1369
1370
1371/**
1372 * Get cr8.
1373 * @returns cr8.
1374 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1375 */
1376#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1377DECLASM(RTCCUINTREG) ASMGetCR8(void);
1378#else
1379DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1380{
1381# ifdef RT_ARCH_AMD64
1382 RTCCUINTREG uCR8;
1383# if RT_INLINE_ASM_USES_INTRIN
1384 uCR8 = __readcr8();
1385
1386# elif RT_INLINE_ASM_GNU_STYLE
1387 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1388# else
1389 __asm
1390 {
1391 mov rax, cr8
1392 mov [uCR8], rax
1393 }
1394# endif
1395 return uCR8;
1396# else /* !RT_ARCH_AMD64 */
1397 return 0;
1398# endif /* !RT_ARCH_AMD64 */
1399}
1400#endif
1401
1402
1403/**
1404 * Enables interrupts (EFLAGS.IF).
1405 */
1406#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1407DECLASM(void) ASMIntEnable(void);
1408#else
1409DECLINLINE(void) ASMIntEnable(void)
1410{
1411# if RT_INLINE_ASM_GNU_STYLE
1412 __asm("sti\n");
1413# elif RT_INLINE_ASM_USES_INTRIN
1414 _enable();
1415# else
1416 __asm sti
1417# endif
1418}
1419#endif
1420
1421
1422/**
1423 * Disables interrupts (!EFLAGS.IF).
1424 */
1425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1426DECLASM(void) ASMIntDisable(void);
1427#else
1428DECLINLINE(void) ASMIntDisable(void)
1429{
1430# if RT_INLINE_ASM_GNU_STYLE
1431 __asm("cli\n");
1432# elif RT_INLINE_ASM_USES_INTRIN
1433 _disable();
1434# else
1435 __asm cli
1436# endif
1437}
1438#endif
1439
1440
1441/**
1442 * Disables interrupts and returns previous xFLAGS.
1443 */
1444#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1445DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1446#else
1447DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1448{
1449 RTCCUINTREG xFlags;
1450# if RT_INLINE_ASM_GNU_STYLE
1451# ifdef RT_ARCH_AMD64
1452 __asm__ __volatile__("pushfq\n\t"
1453 "cli\n\t"
1454 "popq %0\n\t"
1455 : "=rm" (xFlags));
1456# else
1457 __asm__ __volatile__("pushfl\n\t"
1458 "cli\n\t"
1459 "popl %0\n\t"
1460 : "=rm" (xFlags));
1461# endif
1462# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1463 xFlags = ASMGetFlags();
1464 _disable();
1465# else
1466 __asm {
1467 pushfd
1468 cli
1469 pop [xFlags]
1470 }
1471# endif
1472 return xFlags;
1473}
1474#endif
1475
1476
1477/**
1478 * Reads a machine specific register.
1479 *
1480 * @returns Register content.
1481 * @param uRegister Register to read.
1482 */
1483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1484DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1485#else
1486DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1487{
1488 RTUINT64U u;
1489# if RT_INLINE_ASM_GNU_STYLE
1490 __asm__ __volatile__("rdmsr\n\t"
1491 : "=a" (u.s.Lo),
1492 "=d" (u.s.Hi)
1493 : "c" (uRegister));
1494
1495# elif RT_INLINE_ASM_USES_INTRIN
1496 u.u = __readmsr(uRegister);
1497
1498# else
1499 __asm
1500 {
1501 mov ecx, [uRegister]
1502 rdmsr
1503 mov [u.s.Lo], eax
1504 mov [u.s.Hi], edx
1505 }
1506# endif
1507
1508 return u.u;
1509}
1510#endif
1511
1512
1513/**
1514 * Writes a machine specific register.
1515 *
1516 * @returns Register content.
1517 * @param uRegister Register to write to.
1518 * @param u64Val Value to write.
1519 */
1520#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1521DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1522#else
1523DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1524{
1525 RTUINT64U u;
1526
1527 u.u = u64Val;
1528# if RT_INLINE_ASM_GNU_STYLE
1529 __asm__ __volatile__("wrmsr\n\t"
1530 ::"a" (u.s.Lo),
1531 "d" (u.s.Hi),
1532 "c" (uRegister));
1533
1534# elif RT_INLINE_ASM_USES_INTRIN
1535 __writemsr(uRegister, u.u);
1536
1537# else
1538 __asm
1539 {
1540 mov ecx, [uRegister]
1541 mov edx, [u.s.Hi]
1542 mov eax, [u.s.Lo]
1543 wrmsr
1544 }
1545# endif
1546}
1547#endif
1548
1549
1550/**
1551 * Reads low part of a machine specific register.
1552 *
1553 * @returns Register content.
1554 * @param uRegister Register to read.
1555 */
1556#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1557DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1558#else
1559DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1560{
1561 uint32_t u32;
1562# if RT_INLINE_ASM_GNU_STYLE
1563 __asm__ __volatile__("rdmsr\n\t"
1564 : "=a" (u32)
1565 : "c" (uRegister)
1566 : "edx");
1567
1568# elif RT_INLINE_ASM_USES_INTRIN
1569 u32 = (uint32_t)__readmsr(uRegister);
1570
1571#else
1572 __asm
1573 {
1574 mov ecx, [uRegister]
1575 rdmsr
1576 mov [u32], eax
1577 }
1578# endif
1579
1580 return u32;
1581}
1582#endif
1583
1584
1585/**
1586 * Reads high part of a machine specific register.
1587 *
1588 * @returns Register content.
1589 * @param uRegister Register to read.
1590 */
1591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1592DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1593#else
1594DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1595{
1596 uint32_t u32;
1597# if RT_INLINE_ASM_GNU_STYLE
1598 __asm__ __volatile__("rdmsr\n\t"
1599 : "=d" (u32)
1600 : "c" (uRegister)
1601 : "eax");
1602
1603# elif RT_INLINE_ASM_USES_INTRIN
1604 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1605
1606# else
1607 __asm
1608 {
1609 mov ecx, [uRegister]
1610 rdmsr
1611 mov [u32], edx
1612 }
1613# endif
1614
1615 return u32;
1616}
1617#endif
1618
1619
1620/**
1621 * Gets dr0.
1622 *
1623 * @returns dr0.
1624 */
1625#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1626DECLASM(RTCCUINTREG) ASMGetDR0(void);
1627#else
1628DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1629{
1630 RTCCUINTREG uDR0;
1631# if RT_INLINE_ASM_USES_INTRIN
1632 uDR0 = __readdr(0);
1633# elif RT_INLINE_ASM_GNU_STYLE
1634# ifdef RT_ARCH_AMD64
1635 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1636# else
1637 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1638# endif
1639# else
1640 __asm
1641 {
1642# ifdef RT_ARCH_AMD64
1643 mov rax, dr0
1644 mov [uDR0], rax
1645# else
1646 mov eax, dr0
1647 mov [uDR0], eax
1648# endif
1649 }
1650# endif
1651 return uDR0;
1652}
1653#endif
1654
1655
1656/**
1657 * Gets dr1.
1658 *
1659 * @returns dr1.
1660 */
1661#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1662DECLASM(RTCCUINTREG) ASMGetDR1(void);
1663#else
1664DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1665{
1666 RTCCUINTREG uDR1;
1667# if RT_INLINE_ASM_USES_INTRIN
1668 uDR1 = __readdr(1);
1669# elif RT_INLINE_ASM_GNU_STYLE
1670# ifdef RT_ARCH_AMD64
1671 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1672# else
1673 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1674# endif
1675# else
1676 __asm
1677 {
1678# ifdef RT_ARCH_AMD64
1679 mov rax, dr1
1680 mov [uDR1], rax
1681# else
1682 mov eax, dr1
1683 mov [uDR1], eax
1684# endif
1685 }
1686# endif
1687 return uDR1;
1688}
1689#endif
1690
1691
1692/**
1693 * Gets dr2.
1694 *
1695 * @returns dr2.
1696 */
1697#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1698DECLASM(RTCCUINTREG) ASMGetDR2(void);
1699#else
1700DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1701{
1702 RTCCUINTREG uDR2;
1703# if RT_INLINE_ASM_USES_INTRIN
1704 uDR2 = __readdr(2);
1705# elif RT_INLINE_ASM_GNU_STYLE
1706# ifdef RT_ARCH_AMD64
1707 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1708# else
1709 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1710# endif
1711# else
1712 __asm
1713 {
1714# ifdef RT_ARCH_AMD64
1715 mov rax, dr2
1716 mov [uDR2], rax
1717# else
1718 mov eax, dr2
1719 mov [uDR2], eax
1720# endif
1721 }
1722# endif
1723 return uDR2;
1724}
1725#endif
1726
1727
1728/**
1729 * Gets dr3.
1730 *
1731 * @returns dr3.
1732 */
1733#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1734DECLASM(RTCCUINTREG) ASMGetDR3(void);
1735#else
1736DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1737{
1738 RTCCUINTREG uDR3;
1739# if RT_INLINE_ASM_USES_INTRIN
1740 uDR3 = __readdr(3);
1741# elif RT_INLINE_ASM_GNU_STYLE
1742# ifdef RT_ARCH_AMD64
1743 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1744# else
1745 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1746# endif
1747# else
1748 __asm
1749 {
1750# ifdef RT_ARCH_AMD64
1751 mov rax, dr3
1752 mov [uDR3], rax
1753# else
1754 mov eax, dr3
1755 mov [uDR3], eax
1756# endif
1757 }
1758# endif
1759 return uDR3;
1760}
1761#endif
1762
1763
1764/**
1765 * Gets dr6.
1766 *
1767 * @returns dr6.
1768 */
1769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1770DECLASM(RTCCUINTREG) ASMGetDR6(void);
1771#else
1772DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1773{
1774 RTCCUINTREG uDR6;
1775# if RT_INLINE_ASM_USES_INTRIN
1776 uDR6 = __readdr(6);
1777# elif RT_INLINE_ASM_GNU_STYLE
1778# ifdef RT_ARCH_AMD64
1779 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1780# else
1781 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1782# endif
1783# else
1784 __asm
1785 {
1786# ifdef RT_ARCH_AMD64
1787 mov rax, dr6
1788 mov [uDR6], rax
1789# else
1790 mov eax, dr6
1791 mov [uDR6], eax
1792# endif
1793 }
1794# endif
1795 return uDR6;
1796}
1797#endif
1798
1799
1800/**
1801 * Reads and clears DR6.
1802 *
1803 * @returns DR6.
1804 */
1805#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1806DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1807#else
1808DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1809{
1810 RTCCUINTREG uDR6;
1811# if RT_INLINE_ASM_USES_INTRIN
1812 uDR6 = __readdr(6);
1813 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1814# elif RT_INLINE_ASM_GNU_STYLE
1815 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1816# ifdef RT_ARCH_AMD64
1817 __asm__ __volatile__("movq %%dr6, %0\n\t"
1818 "movq %1, %%dr6\n\t"
1819 : "=r" (uDR6)
1820 : "r" (uNewValue));
1821# else
1822 __asm__ __volatile__("movl %%dr6, %0\n\t"
1823 "movl %1, %%dr6\n\t"
1824 : "=r" (uDR6)
1825 : "r" (uNewValue));
1826# endif
1827# else
1828 __asm
1829 {
1830# ifdef RT_ARCH_AMD64
1831 mov rax, dr6
1832 mov [uDR6], rax
1833 mov rcx, rax
1834 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1835 mov dr6, rcx
1836# else
1837 mov eax, dr6
1838 mov [uDR6], eax
1839 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1840 mov dr6, ecx
1841# endif
1842 }
1843# endif
1844 return uDR6;
1845}
1846#endif
1847
1848
1849/**
1850 * Gets dr7.
1851 *
1852 * @returns dr7.
1853 */
1854#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1855DECLASM(RTCCUINTREG) ASMGetDR7(void);
1856#else
1857DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1858{
1859 RTCCUINTREG uDR7;
1860# if RT_INLINE_ASM_USES_INTRIN
1861 uDR7 = __readdr(7);
1862# elif RT_INLINE_ASM_GNU_STYLE
1863# ifdef RT_ARCH_AMD64
1864 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1865# else
1866 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1867# endif
1868# else
1869 __asm
1870 {
1871# ifdef RT_ARCH_AMD64
1872 mov rax, dr7
1873 mov [uDR7], rax
1874# else
1875 mov eax, dr7
1876 mov [uDR7], eax
1877# endif
1878 }
1879# endif
1880 return uDR7;
1881}
1882#endif
1883
1884
1885/**
1886 * Sets dr0.
1887 *
1888 * @param uDRVal Debug register value to write
1889 */
1890#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1891DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1892#else
1893DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1894{
1895# if RT_INLINE_ASM_USES_INTRIN
1896 __writedr(0, uDRVal);
1897# elif RT_INLINE_ASM_GNU_STYLE
1898# ifdef RT_ARCH_AMD64
1899 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1900# else
1901 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1902# endif
1903# else
1904 __asm
1905 {
1906# ifdef RT_ARCH_AMD64
1907 mov rax, [uDRVal]
1908 mov dr0, rax
1909# else
1910 mov eax, [uDRVal]
1911 mov dr0, eax
1912# endif
1913 }
1914# endif
1915}
1916#endif
1917
1918
1919/**
1920 * Sets dr1.
1921 *
1922 * @param uDRVal Debug register value to write
1923 */
1924#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1925DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1926#else
1927DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1928{
1929# if RT_INLINE_ASM_USES_INTRIN
1930 __writedr(1, uDRVal);
1931# elif RT_INLINE_ASM_GNU_STYLE
1932# ifdef RT_ARCH_AMD64
1933 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
1934# else
1935 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
1936# endif
1937# else
1938 __asm
1939 {
1940# ifdef RT_ARCH_AMD64
1941 mov rax, [uDRVal]
1942 mov dr1, rax
1943# else
1944 mov eax, [uDRVal]
1945 mov dr1, eax
1946# endif
1947 }
1948# endif
1949}
1950#endif
1951
1952
1953/**
1954 * Sets dr2.
1955 *
1956 * @param uDRVal Debug register value to write
1957 */
1958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1959DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
1960#else
1961DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
1962{
1963# if RT_INLINE_ASM_USES_INTRIN
1964 __writedr(2, uDRVal);
1965# elif RT_INLINE_ASM_GNU_STYLE
1966# ifdef RT_ARCH_AMD64
1967 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
1968# else
1969 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
1970# endif
1971# else
1972 __asm
1973 {
1974# ifdef RT_ARCH_AMD64
1975 mov rax, [uDRVal]
1976 mov dr2, rax
1977# else
1978 mov eax, [uDRVal]
1979 mov dr2, eax
1980# endif
1981 }
1982# endif
1983}
1984#endif
1985
1986
1987/**
1988 * Sets dr3.
1989 *
1990 * @param uDRVal Debug register value to write
1991 */
1992#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1993DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
1994#else
1995DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
1996{
1997# if RT_INLINE_ASM_USES_INTRIN
1998 __writedr(3, uDRVal);
1999# elif RT_INLINE_ASM_GNU_STYLE
2000# ifdef RT_ARCH_AMD64
2001 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2002# else
2003 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2004# endif
2005# else
2006 __asm
2007 {
2008# ifdef RT_ARCH_AMD64
2009 mov rax, [uDRVal]
2010 mov dr3, rax
2011# else
2012 mov eax, [uDRVal]
2013 mov dr3, eax
2014# endif
2015 }
2016# endif
2017}
2018#endif
2019
2020
2021/**
2022 * Sets dr6.
2023 *
2024 * @param uDRVal Debug register value to write
2025 */
2026#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2027DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2028#else
2029DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2030{
2031# if RT_INLINE_ASM_USES_INTRIN
2032 __writedr(6, uDRVal);
2033# elif RT_INLINE_ASM_GNU_STYLE
2034# ifdef RT_ARCH_AMD64
2035 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2036# else
2037 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2038# endif
2039# else
2040 __asm
2041 {
2042# ifdef RT_ARCH_AMD64
2043 mov rax, [uDRVal]
2044 mov dr6, rax
2045# else
2046 mov eax, [uDRVal]
2047 mov dr6, eax
2048# endif
2049 }
2050# endif
2051}
2052#endif
2053
2054
2055/**
2056 * Sets dr7.
2057 *
2058 * @param uDRVal Debug register value to write
2059 */
2060#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2061DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2062#else
2063DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2064{
2065# if RT_INLINE_ASM_USES_INTRIN
2066 __writedr(7, uDRVal);
2067# elif RT_INLINE_ASM_GNU_STYLE
2068# ifdef RT_ARCH_AMD64
2069 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2070# else
2071 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2072# endif
2073# else
2074 __asm
2075 {
2076# ifdef RT_ARCH_AMD64
2077 mov rax, [uDRVal]
2078 mov dr7, rax
2079# else
2080 mov eax, [uDRVal]
2081 mov dr7, eax
2082# endif
2083 }
2084# endif
2085}
2086#endif
2087
2088
2089/**
2090 * Compiler memory barrier.
2091 *
2092 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2093 * values or any outstanding writes when returning from this function.
2094 *
2095 * This function must be used if non-volatile data is modified by a
2096 * device or the VMM. Typical cases are port access, MMIO access,
2097 * trapping instruction, etc.
2098 */
2099#if RT_INLINE_ASM_GNU_STYLE
2100# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
2101#elif RT_INLINE_ASM_USES_INTRIN
2102# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2103#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2104DECLINLINE(void) ASMCompilerBarrier(void)
2105{
2106 __asm
2107 {
2108 }
2109}
2110#endif
2111
2112
2113/**
2114 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2115 *
2116 * @param Port I/O port to read from.
2117 * @param u8 8-bit integer to write.
2118 */
2119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2120DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2121#else
2122DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2123{
2124# if RT_INLINE_ASM_GNU_STYLE
2125 __asm__ __volatile__("outb %b1, %w0\n\t"
2126 :: "Nd" (Port),
2127 "a" (u8));
2128
2129# elif RT_INLINE_ASM_USES_INTRIN
2130 __outbyte(Port, u8);
2131
2132# else
2133 __asm
2134 {
2135 mov dx, [Port]
2136 mov al, [u8]
2137 out dx, al
2138 }
2139# endif
2140}
2141#endif
2142
2143
2144/**
2145 * Gets a 8-bit unsigned integer from an I/O port, ordered.
2146 *
2147 * @returns 8-bit integer.
2148 * @param Port I/O port to read from.
2149 */
2150#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2151DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2152#else
2153DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2154{
2155 uint8_t u8;
2156# if RT_INLINE_ASM_GNU_STYLE
2157 __asm__ __volatile__("inb %w1, %b0\n\t"
2158 : "=a" (u8)
2159 : "Nd" (Port));
2160
2161# elif RT_INLINE_ASM_USES_INTRIN
2162 u8 = __inbyte(Port);
2163
2164# else
2165 __asm
2166 {
2167 mov dx, [Port]
2168 in al, dx
2169 mov [u8], al
2170 }
2171# endif
2172 return u8;
2173}
2174#endif
2175
2176
2177/**
2178 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2179 *
2180 * @param Port I/O port to read from.
2181 * @param u16 16-bit integer to write.
2182 */
2183#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2184DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2185#else
2186DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2187{
2188# if RT_INLINE_ASM_GNU_STYLE
2189 __asm__ __volatile__("outw %w1, %w0\n\t"
2190 :: "Nd" (Port),
2191 "a" (u16));
2192
2193# elif RT_INLINE_ASM_USES_INTRIN
2194 __outword(Port, u16);
2195
2196# else
2197 __asm
2198 {
2199 mov dx, [Port]
2200 mov ax, [u16]
2201 out dx, ax
2202 }
2203# endif
2204}
2205#endif
2206
2207
2208/**
2209 * Gets a 16-bit unsigned integer from an I/O port, ordered.
2210 *
2211 * @returns 16-bit integer.
2212 * @param Port I/O port to read from.
2213 */
2214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2215DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2216#else
2217DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2218{
2219 uint16_t u16;
2220# if RT_INLINE_ASM_GNU_STYLE
2221 __asm__ __volatile__("inw %w1, %w0\n\t"
2222 : "=a" (u16)
2223 : "Nd" (Port));
2224
2225# elif RT_INLINE_ASM_USES_INTRIN
2226 u16 = __inword(Port);
2227
2228# else
2229 __asm
2230 {
2231 mov dx, [Port]
2232 in ax, dx
2233 mov [u16], ax
2234 }
2235# endif
2236 return u16;
2237}
2238#endif
2239
2240
2241/**
2242 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2243 *
2244 * @param Port I/O port to read from.
2245 * @param u32 32-bit integer to write.
2246 */
2247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2248DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2249#else
2250DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2251{
2252# if RT_INLINE_ASM_GNU_STYLE
2253 __asm__ __volatile__("outl %1, %w0\n\t"
2254 :: "Nd" (Port),
2255 "a" (u32));
2256
2257# elif RT_INLINE_ASM_USES_INTRIN
2258 __outdword(Port, u32);
2259
2260# else
2261 __asm
2262 {
2263 mov dx, [Port]
2264 mov eax, [u32]
2265 out dx, eax
2266 }
2267# endif
2268}
2269#endif
2270
2271
2272/**
2273 * Gets a 32-bit unsigned integer from an I/O port, ordered.
2274 *
2275 * @returns 32-bit integer.
2276 * @param Port I/O port to read from.
2277 */
2278#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2279DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2280#else
2281DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2282{
2283 uint32_t u32;
2284# if RT_INLINE_ASM_GNU_STYLE
2285 __asm__ __volatile__("inl %w1, %0\n\t"
2286 : "=a" (u32)
2287 : "Nd" (Port));
2288
2289# elif RT_INLINE_ASM_USES_INTRIN
2290 u32 = __indword(Port);
2291
2292# else
2293 __asm
2294 {
2295 mov dx, [Port]
2296 in eax, dx
2297 mov [u32], eax
2298 }
2299# endif
2300 return u32;
2301}
2302#endif
2303
2304/** @todo string i/o */
2305
2306
2307/**
2308 * Atomically Exchange an unsigned 8-bit value, ordered.
2309 *
2310 * @returns Current *pu8 value
2311 * @param pu8 Pointer to the 8-bit variable to update.
2312 * @param u8 The 8-bit value to assign to *pu8.
2313 */
2314#if RT_INLINE_ASM_EXTERNAL
2315DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2316#else
2317DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2318{
2319# if RT_INLINE_ASM_GNU_STYLE
2320 __asm__ __volatile__("xchgb %0, %1\n\t"
2321 : "=m" (*pu8),
2322 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2323 : "1" (u8),
2324 "m" (*pu8));
2325# else
2326 __asm
2327 {
2328# ifdef RT_ARCH_AMD64
2329 mov rdx, [pu8]
2330 mov al, [u8]
2331 xchg [rdx], al
2332 mov [u8], al
2333# else
2334 mov edx, [pu8]
2335 mov al, [u8]
2336 xchg [edx], al
2337 mov [u8], al
2338# endif
2339 }
2340# endif
2341 return u8;
2342}
2343#endif
2344
2345
2346/**
2347 * Atomically Exchange a signed 8-bit value, ordered.
2348 *
2349 * @returns Current *pu8 value
2350 * @param pi8 Pointer to the 8-bit variable to update.
2351 * @param i8 The 8-bit value to assign to *pi8.
2352 */
2353DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2354{
2355 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2356}
2357
2358
2359/**
2360 * Atomically Exchange a bool value, ordered.
2361 *
2362 * @returns Current *pf value
2363 * @param pf Pointer to the 8-bit variable to update.
2364 * @param f The 8-bit value to assign to *pi8.
2365 */
2366DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2367{
2368#ifdef _MSC_VER
2369 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2370#else
2371 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2372#endif
2373}
2374
2375
2376/**
2377 * Atomically Exchange an unsigned 16-bit value, ordered.
2378 *
2379 * @returns Current *pu16 value
2380 * @param pu16 Pointer to the 16-bit variable to update.
2381 * @param u16 The 16-bit value to assign to *pu16.
2382 */
2383#if RT_INLINE_ASM_EXTERNAL
2384DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2385#else
2386DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2387{
2388# if RT_INLINE_ASM_GNU_STYLE
2389 __asm__ __volatile__("xchgw %0, %1\n\t"
2390 : "=m" (*pu16),
2391 "=r" (u16)
2392 : "1" (u16),
2393 "m" (*pu16));
2394# else
2395 __asm
2396 {
2397# ifdef RT_ARCH_AMD64
2398 mov rdx, [pu16]
2399 mov ax, [u16]
2400 xchg [rdx], ax
2401 mov [u16], ax
2402# else
2403 mov edx, [pu16]
2404 mov ax, [u16]
2405 xchg [edx], ax
2406 mov [u16], ax
2407# endif
2408 }
2409# endif
2410 return u16;
2411}
2412#endif
2413
2414
2415/**
2416 * Atomically Exchange a signed 16-bit value, ordered.
2417 *
2418 * @returns Current *pu16 value
2419 * @param pi16 Pointer to the 16-bit variable to update.
2420 * @param i16 The 16-bit value to assign to *pi16.
2421 */
2422DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2423{
2424 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2425}
2426
2427
2428/**
2429 * Atomically Exchange an unsigned 32-bit value, ordered.
2430 *
2431 * @returns Current *pu32 value
2432 * @param pu32 Pointer to the 32-bit variable to update.
2433 * @param u32 The 32-bit value to assign to *pu32.
2434 */
2435#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2436DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2437#else
2438DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2439{
2440# if RT_INLINE_ASM_GNU_STYLE
2441 __asm__ __volatile__("xchgl %0, %1\n\t"
2442 : "=m" (*pu32),
2443 "=r" (u32)
2444 : "1" (u32),
2445 "m" (*pu32));
2446
2447# elif RT_INLINE_ASM_USES_INTRIN
2448 u32 = _InterlockedExchange((long *)pu32, u32);
2449
2450# else
2451 __asm
2452 {
2453# ifdef RT_ARCH_AMD64
2454 mov rdx, [pu32]
2455 mov eax, u32
2456 xchg [rdx], eax
2457 mov [u32], eax
2458# else
2459 mov edx, [pu32]
2460 mov eax, u32
2461 xchg [edx], eax
2462 mov [u32], eax
2463# endif
2464 }
2465# endif
2466 return u32;
2467}
2468#endif
2469
2470
2471/**
2472 * Atomically Exchange a signed 32-bit value, ordered.
2473 *
2474 * @returns Current *pu32 value
2475 * @param pi32 Pointer to the 32-bit variable to update.
2476 * @param i32 The 32-bit value to assign to *pi32.
2477 */
2478DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2479{
2480 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2481}
2482
2483
2484/**
2485 * Atomically Exchange an unsigned 64-bit value, ordered.
2486 *
2487 * @returns Current *pu64 value
2488 * @param pu64 Pointer to the 64-bit variable to update.
2489 * @param u64 The 64-bit value to assign to *pu64.
2490 */
2491#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2492DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2493#else
2494DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2495{
2496# if defined(RT_ARCH_AMD64)
2497# if RT_INLINE_ASM_USES_INTRIN
2498 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2499
2500# elif RT_INLINE_ASM_GNU_STYLE
2501 __asm__ __volatile__("xchgq %0, %1\n\t"
2502 : "=m" (*pu64),
2503 "=r" (u64)
2504 : "1" (u64),
2505 "m" (*pu64));
2506# else
2507 __asm
2508 {
2509 mov rdx, [pu64]
2510 mov rax, [u64]
2511 xchg [rdx], rax
2512 mov [u64], rax
2513 }
2514# endif
2515# else /* !RT_ARCH_AMD64 */
2516# if RT_INLINE_ASM_GNU_STYLE
2517# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2518 uint32_t u32EBX = (uint32_t)u64;
2519 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2520 "xchgl %%ebx, %3\n\t"
2521 "1:\n\t"
2522 "lock; cmpxchg8b (%5)\n\t"
2523 "jnz 1b\n\t"
2524 "movl %3, %%ebx\n\t"
2525 /*"xchgl %%esi, %5\n\t"*/
2526 : "=A" (u64),
2527 "=m" (*pu64)
2528 : "0" (*pu64),
2529 "m" ( u32EBX ),
2530 "c" ( (uint32_t)(u64 >> 32) ),
2531 "S" (pu64));
2532# else /* !PIC */
2533 __asm__ __volatile__("1:\n\t"
2534 "lock; cmpxchg8b %1\n\t"
2535 "jnz 1b\n\t"
2536 : "=A" (u64),
2537 "=m" (*pu64)
2538 : "0" (*pu64),
2539 "b" ( (uint32_t)u64 ),
2540 "c" ( (uint32_t)(u64 >> 32) ));
2541# endif
2542# else
2543 __asm
2544 {
2545 mov ebx, dword ptr [u64]
2546 mov ecx, dword ptr [u64 + 4]
2547 mov edi, pu64
2548 mov eax, dword ptr [edi]
2549 mov edx, dword ptr [edi + 4]
2550 retry:
2551 lock cmpxchg8b [edi]
2552 jnz retry
2553 mov dword ptr [u64], eax
2554 mov dword ptr [u64 + 4], edx
2555 }
2556# endif
2557# endif /* !RT_ARCH_AMD64 */
2558 return u64;
2559}
2560#endif
2561
2562
2563/**
2564 * Atomically Exchange an signed 64-bit value, ordered.
2565 *
2566 * @returns Current *pi64 value
2567 * @param pi64 Pointer to the 64-bit variable to update.
2568 * @param i64 The 64-bit value to assign to *pi64.
2569 */
2570DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2571{
2572 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2573}
2574
2575
2576#ifdef RT_ARCH_AMD64
2577/**
2578 * Atomically Exchange an unsigned 128-bit value, ordered.
2579 *
2580 * @returns Current *pu128.
2581 * @param pu128 Pointer to the 128-bit variable to update.
2582 * @param u128 The 128-bit value to assign to *pu128.
2583 *
2584 * @remark We cannot really assume that any hardware supports this. Nor do I have
2585 * GAS support for it. So, for the time being we'll BREAK the atomic
2586 * bit of this function and use two 64-bit exchanges instead.
2587 */
2588# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2589DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2590# else
2591DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2592{
2593 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2594 {
2595 /** @todo this is clumsy code */
2596 RTUINT128U u128Ret;
2597 u128Ret.u = u128;
2598 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2599 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2600 return u128Ret.u;
2601 }
2602#if 0 /* later? */
2603 else
2604 {
2605# if RT_INLINE_ASM_GNU_STYLE
2606 __asm__ __volatile__("1:\n\t"
2607 "lock; cmpxchg8b %1\n\t"
2608 "jnz 1b\n\t"
2609 : "=A" (u128),
2610 "=m" (*pu128)
2611 : "0" (*pu128),
2612 "b" ( (uint64_t)u128 ),
2613 "c" ( (uint64_t)(u128 >> 64) ));
2614# else
2615 __asm
2616 {
2617 mov rbx, dword ptr [u128]
2618 mov rcx, dword ptr [u128 + 8]
2619 mov rdi, pu128
2620 mov rax, dword ptr [rdi]
2621 mov rdx, dword ptr [rdi + 8]
2622 retry:
2623 lock cmpxchg16b [rdi]
2624 jnz retry
2625 mov dword ptr [u128], rax
2626 mov dword ptr [u128 + 8], rdx
2627 }
2628# endif
2629 }
2630 return u128;
2631#endif
2632}
2633# endif
2634#endif /* RT_ARCH_AMD64 */
2635
2636
2637/**
2638 * Atomically Exchange a pointer value, ordered.
2639 *
2640 * @returns Current *ppv value
2641 * @param ppv Pointer to the pointer variable to update.
2642 * @param pv The pointer value to assign to *ppv.
2643 */
2644DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2645{
2646#if ARCH_BITS == 32
2647 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2648#elif ARCH_BITS == 64
2649 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2650#else
2651# error "ARCH_BITS is bogus"
2652#endif
2653}
2654
2655
2656/**
2657 * Atomically Exchange a raw-mode context pointer value, ordered.
2658 *
2659 * @returns Current *ppv value
2660 * @param ppvRC Pointer to the pointer variable to update.
2661 * @param pvRC The pointer value to assign to *ppv.
2662 */
2663DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2664{
2665 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2666}
2667
2668
2669/**
2670 * Atomically Exchange a ring-0 pointer value, ordered.
2671 *
2672 * @returns Current *ppv value
2673 * @param ppvR0 Pointer to the pointer variable to update.
2674 * @param pvR0 The pointer value to assign to *ppv.
2675 */
2676DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2677{
2678#if R0_ARCH_BITS == 32
2679 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2680#elif R0_ARCH_BITS == 64
2681 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2682#else
2683# error "R0_ARCH_BITS is bogus"
2684#endif
2685}
2686
2687
2688/**
2689 * Atomically Exchange a ring-3 pointer value, ordered.
2690 *
2691 * @returns Current *ppv value
2692 * @param ppvR3 Pointer to the pointer variable to update.
2693 * @param pvR3 The pointer value to assign to *ppv.
2694 */
2695DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2696{
2697#if R3_ARCH_BITS == 32
2698 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2699#elif R3_ARCH_BITS == 64
2700 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2701#else
2702# error "R3_ARCH_BITS is bogus"
2703#endif
2704}
2705
2706
2707/** @def ASMAtomicXchgHandle
2708 * Atomically Exchange a typical IPRT handle value, ordered.
2709 *
2710 * @param ph Pointer to the value to update.
2711 * @param hNew The new value to assigned to *pu.
2712 * @param phRes Where to store the current *ph value.
2713 *
2714 * @remarks This doesn't currently work for all handles (like RTFILE).
2715 */
2716#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2717 do { \
2718 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (const void *)(hNew)); \
2719 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2720 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2721 } while (0)
2722
2723
2724/**
2725 * Atomically Exchange a value which size might differ
2726 * between platforms or compilers, ordered.
2727 *
2728 * @param pu Pointer to the variable to update.
2729 * @param uNew The value to assign to *pu.
2730 * @todo This is busted as its missing the result argument.
2731 */
2732#define ASMAtomicXchgSize(pu, uNew) \
2733 do { \
2734 switch (sizeof(*(pu))) { \
2735 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2736 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2737 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2738 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2739 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2740 } \
2741 } while (0)
2742
2743/**
2744 * Atomically Exchange a value which size might differ
2745 * between platforms or compilers, ordered.
2746 *
2747 * @param pu Pointer to the variable to update.
2748 * @param uNew The value to assign to *pu.
2749 * @param puRes Where to store the current *pu value.
2750 */
2751#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2752 do { \
2753 switch (sizeof(*(pu))) { \
2754 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2755 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2756 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2757 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2758 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2759 } \
2760 } while (0)
2761
2762
2763/**
2764 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2765 *
2766 * @returns true if xchg was done.
2767 * @returns false if xchg wasn't done.
2768 *
2769 * @param pu32 Pointer to the value to update.
2770 * @param u32New The new value to assigned to *pu32.
2771 * @param u32Old The old value to *pu32 compare with.
2772 */
2773#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2774DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2775#else
2776DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2777{
2778# if RT_INLINE_ASM_GNU_STYLE
2779 uint8_t u8Ret;
2780 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2781 "setz %1\n\t"
2782 : "=m" (*pu32),
2783 "=qm" (u8Ret),
2784 "=a" (u32Old)
2785 : "r" (u32New),
2786 "2" (u32Old),
2787 "m" (*pu32));
2788 return (bool)u8Ret;
2789
2790# elif RT_INLINE_ASM_USES_INTRIN
2791 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2792
2793# else
2794 uint32_t u32Ret;
2795 __asm
2796 {
2797# ifdef RT_ARCH_AMD64
2798 mov rdx, [pu32]
2799# else
2800 mov edx, [pu32]
2801# endif
2802 mov eax, [u32Old]
2803 mov ecx, [u32New]
2804# ifdef RT_ARCH_AMD64
2805 lock cmpxchg [rdx], ecx
2806# else
2807 lock cmpxchg [edx], ecx
2808# endif
2809 setz al
2810 movzx eax, al
2811 mov [u32Ret], eax
2812 }
2813 return !!u32Ret;
2814# endif
2815}
2816#endif
2817
2818
2819/**
2820 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2821 *
2822 * @returns true if xchg was done.
2823 * @returns false if xchg wasn't done.
2824 *
2825 * @param pi32 Pointer to the value to update.
2826 * @param i32New The new value to assigned to *pi32.
2827 * @param i32Old The old value to *pi32 compare with.
2828 */
2829DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2830{
2831 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2832}
2833
2834
2835/**
2836 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2837 *
2838 * @returns true if xchg was done.
2839 * @returns false if xchg wasn't done.
2840 *
2841 * @param pu64 Pointer to the 64-bit variable to update.
2842 * @param u64New The 64-bit value to assign to *pu64.
2843 * @param u64Old The value to compare with.
2844 */
2845#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2846DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2847#else
2848DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2849{
2850# if RT_INLINE_ASM_USES_INTRIN
2851 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2852
2853# elif defined(RT_ARCH_AMD64)
2854# if RT_INLINE_ASM_GNU_STYLE
2855 uint8_t u8Ret;
2856 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2857 "setz %1\n\t"
2858 : "=m" (*pu64),
2859 "=qm" (u8Ret),
2860 "=a" (u64Old)
2861 : "r" (u64New),
2862 "2" (u64Old),
2863 "m" (*pu64));
2864 return (bool)u8Ret;
2865# else
2866 bool fRet;
2867 __asm
2868 {
2869 mov rdx, [pu32]
2870 mov rax, [u64Old]
2871 mov rcx, [u64New]
2872 lock cmpxchg [rdx], rcx
2873 setz al
2874 mov [fRet], al
2875 }
2876 return fRet;
2877# endif
2878# else /* !RT_ARCH_AMD64 */
2879 uint32_t u32Ret;
2880# if RT_INLINE_ASM_GNU_STYLE
2881# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2882 uint32_t u32EBX = (uint32_t)u64New;
2883 uint32_t u32Spill;
2884 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2885 "lock; cmpxchg8b (%6)\n\t"
2886 "setz %%al\n\t"
2887 "movl %4, %%ebx\n\t"
2888 "movzbl %%al, %%eax\n\t"
2889 : "=a" (u32Ret),
2890 "=d" (u32Spill),
2891# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
2892 "+m" (*pu64)
2893# else
2894 "=m" (*pu64)
2895# endif
2896 : "A" (u64Old),
2897 "m" ( u32EBX ),
2898 "c" ( (uint32_t)(u64New >> 32) ),
2899 "S" (pu64));
2900# else /* !PIC */
2901 uint32_t u32Spill;
2902 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2903 "setz %%al\n\t"
2904 "movzbl %%al, %%eax\n\t"
2905 : "=a" (u32Ret),
2906 "=d" (u32Spill),
2907 "+m" (*pu64)
2908 : "A" (u64Old),
2909 "b" ( (uint32_t)u64New ),
2910 "c" ( (uint32_t)(u64New >> 32) ));
2911# endif
2912 return (bool)u32Ret;
2913# else
2914 __asm
2915 {
2916 mov ebx, dword ptr [u64New]
2917 mov ecx, dword ptr [u64New + 4]
2918 mov edi, [pu64]
2919 mov eax, dword ptr [u64Old]
2920 mov edx, dword ptr [u64Old + 4]
2921 lock cmpxchg8b [edi]
2922 setz al
2923 movzx eax, al
2924 mov dword ptr [u32Ret], eax
2925 }
2926 return !!u32Ret;
2927# endif
2928# endif /* !RT_ARCH_AMD64 */
2929}
2930#endif
2931
2932
2933/**
2934 * Atomically Compare and exchange a signed 64-bit value, ordered.
2935 *
2936 * @returns true if xchg was done.
2937 * @returns false if xchg wasn't done.
2938 *
2939 * @param pi64 Pointer to the 64-bit variable to update.
2940 * @param i64 The 64-bit value to assign to *pu64.
2941 * @param i64Old The value to compare with.
2942 */
2943DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2944{
2945 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2946}
2947
2948
2949/**
2950 * Atomically Compare and Exchange a pointer value, ordered.
2951 *
2952 * @returns true if xchg was done.
2953 * @returns false if xchg wasn't done.
2954 *
2955 * @param ppv Pointer to the value to update.
2956 * @param pvNew The new value to assigned to *ppv.
2957 * @param pvOld The old value to *ppv compare with.
2958 */
2959DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
2960{
2961#if ARCH_BITS == 32
2962 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2963#elif ARCH_BITS == 64
2964 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2965#else
2966# error "ARCH_BITS is bogus"
2967#endif
2968}
2969
2970
2971/** @def ASMAtomicCmpXchgHandle
2972 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2973 *
2974 * @param ph Pointer to the value to update.
2975 * @param hNew The new value to assigned to *pu.
2976 * @param hOld The old value to *pu compare with.
2977 * @param fRc Where to store the result.
2978 *
2979 * @remarks This doesn't currently work for all handles (like RTFILE).
2980 */
2981#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
2982 do { \
2983 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
2984 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2985 } while (0)
2986
2987
2988/** @def ASMAtomicCmpXchgSize
2989 * Atomically Compare and Exchange a value which size might differ
2990 * between platforms or compilers, ordered.
2991 *
2992 * @param pu Pointer to the value to update.
2993 * @param uNew The new value to assigned to *pu.
2994 * @param uOld The old value to *pu compare with.
2995 * @param fRc Where to store the result.
2996 */
2997#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2998 do { \
2999 switch (sizeof(*(pu))) { \
3000 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3001 break; \
3002 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3003 break; \
3004 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3005 (fRc) = false; \
3006 break; \
3007 } \
3008 } while (0)
3009
3010
3011/**
3012 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3013 * passes back old value, ordered.
3014 *
3015 * @returns true if xchg was done.
3016 * @returns false if xchg wasn't done.
3017 *
3018 * @param pu32 Pointer to the value to update.
3019 * @param u32New The new value to assigned to *pu32.
3020 * @param u32Old The old value to *pu32 compare with.
3021 * @param pu32Old Pointer store the old value at.
3022 */
3023#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3024DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3025#else
3026DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3027{
3028# if RT_INLINE_ASM_GNU_STYLE
3029 uint8_t u8Ret;
3030 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3031 "setz %1\n\t"
3032 : "=m" (*pu32),
3033 "=qm" (u8Ret),
3034 "=a" (*pu32Old)
3035 : "r" (u32New),
3036 "a" (u32Old),
3037 "m" (*pu32));
3038 return (bool)u8Ret;
3039
3040# elif RT_INLINE_ASM_USES_INTRIN
3041 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3042
3043# else
3044 uint32_t u32Ret;
3045 __asm
3046 {
3047# ifdef RT_ARCH_AMD64
3048 mov rdx, [pu32]
3049# else
3050 mov edx, [pu32]
3051# endif
3052 mov eax, [u32Old]
3053 mov ecx, [u32New]
3054# ifdef RT_ARCH_AMD64
3055 lock cmpxchg [rdx], ecx
3056 mov rdx, [pu32Old]
3057 mov [rdx], eax
3058# else
3059 lock cmpxchg [edx], ecx
3060 mov edx, [pu32Old]
3061 mov [edx], eax
3062# endif
3063 setz al
3064 movzx eax, al
3065 mov [u32Ret], eax
3066 }
3067 return !!u32Ret;
3068# endif
3069}
3070#endif
3071
3072
3073/**
3074 * Atomically Compare and Exchange a signed 32-bit value, additionally
3075 * passes back old value, ordered.
3076 *
3077 * @returns true if xchg was done.
3078 * @returns false if xchg wasn't done.
3079 *
3080 * @param pi32 Pointer to the value to update.
3081 * @param i32New The new value to assigned to *pi32.
3082 * @param i32Old The old value to *pi32 compare with.
3083 * @param pi32Old Pointer store the old value at.
3084 */
3085DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3086{
3087 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3088}
3089
3090
3091/**
3092 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3093 * passing back old value, ordered.
3094 *
3095 * @returns true if xchg was done.
3096 * @returns false if xchg wasn't done.
3097 *
3098 * @param pu64 Pointer to the 64-bit variable to update.
3099 * @param u64New The 64-bit value to assign to *pu64.
3100 * @param u64Old The value to compare with.
3101 * @param pu64Old Pointer store the old value at.
3102 */
3103#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3104DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3105#else
3106DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3107{
3108# if RT_INLINE_ASM_USES_INTRIN
3109 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3110
3111# elif defined(RT_ARCH_AMD64)
3112# if RT_INLINE_ASM_GNU_STYLE
3113 uint8_t u8Ret;
3114 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3115 "setz %1\n\t"
3116 : "=m" (*pu64),
3117 "=qm" (u8Ret),
3118 "=a" (*pu64Old)
3119 : "r" (u64New),
3120 "a" (u64Old),
3121 "m" (*pu64));
3122 return (bool)u8Ret;
3123# else
3124 bool fRet;
3125 __asm
3126 {
3127 mov rdx, [pu32]
3128 mov rax, [u64Old]
3129 mov rcx, [u64New]
3130 lock cmpxchg [rdx], rcx
3131 mov rdx, [pu64Old]
3132 mov [rdx], rax
3133 setz al
3134 mov [fRet], al
3135 }
3136 return fRet;
3137# endif
3138# else /* !RT_ARCH_AMD64 */
3139# if RT_INLINE_ASM_GNU_STYLE
3140 uint64_t u64Ret;
3141# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3142 /* NB: this code uses a memory clobber description, because the clean
3143 * solution with an output value for *pu64 makes gcc run out of registers.
3144 * This will cause suboptimal code, and anyone with a better solution is
3145 * welcome to improve this. */
3146 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3147 "lock; cmpxchg8b %3\n\t"
3148 "xchgl %%ebx, %1\n\t"
3149 : "=A" (u64Ret)
3150 : "DS" ((uint32_t)u64New),
3151 "c" ((uint32_t)(u64New >> 32)),
3152 "m" (*pu64),
3153 "0" (u64Old)
3154 : "memory" );
3155# else /* !PIC */
3156 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3157 : "=A" (u64Ret),
3158 "=m" (*pu64)
3159 : "b" ((uint32_t)u64New),
3160 "c" ((uint32_t)(u64New >> 32)),
3161 "m" (*pu64),
3162 "0" (u64Old));
3163# endif
3164 *pu64Old = u64Ret;
3165 return u64Ret == u64Old;
3166# else
3167 uint32_t u32Ret;
3168 __asm
3169 {
3170 mov ebx, dword ptr [u64New]
3171 mov ecx, dword ptr [u64New + 4]
3172 mov edi, [pu64]
3173 mov eax, dword ptr [u64Old]
3174 mov edx, dword ptr [u64Old + 4]
3175 lock cmpxchg8b [edi]
3176 mov ebx, [pu64Old]
3177 mov [ebx], eax
3178 setz al
3179 movzx eax, al
3180 add ebx, 4
3181 mov [ebx], edx
3182 mov dword ptr [u32Ret], eax
3183 }
3184 return !!u32Ret;
3185# endif
3186# endif /* !RT_ARCH_AMD64 */
3187}
3188#endif
3189
3190
3191/**
3192 * Atomically Compare and exchange a signed 64-bit value, additionally
3193 * passing back old value, ordered.
3194 *
3195 * @returns true if xchg was done.
3196 * @returns false if xchg wasn't done.
3197 *
3198 * @param pi64 Pointer to the 64-bit variable to update.
3199 * @param i64 The 64-bit value to assign to *pu64.
3200 * @param i64Old The value to compare with.
3201 * @param pi64Old Pointer store the old value at.
3202 */
3203DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3204{
3205 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3206}
3207
3208/** @def ASMAtomicCmpXchgExHandle
3209 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3210 *
3211 * @param ph Pointer to the value to update.
3212 * @param hNew The new value to assigned to *pu.
3213 * @param hOld The old value to *pu compare with.
3214 * @param fRc Where to store the result.
3215 * @param phOldVal Pointer to where to store the old value.
3216 *
3217 * @remarks This doesn't currently work for all handles (like RTFILE).
3218 */
3219#if ARCH_BITS == 32
3220# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3221 do { \
3222 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3223 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3224 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3225 } while (0)
3226#elif ARCH_BITS == 64
3227# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3228 do { \
3229 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3230 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3231 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3232 } while (0)
3233#endif
3234
3235
3236/** @def ASMAtomicCmpXchgExSize
3237 * Atomically Compare and Exchange a value which size might differ
3238 * between platforms or compilers. Additionally passes back old value.
3239 *
3240 * @param pu Pointer to the value to update.
3241 * @param uNew The new value to assigned to *pu.
3242 * @param uOld The old value to *pu compare with.
3243 * @param fRc Where to store the result.
3244 * @param puOldVal Pointer to where to store the old value.
3245 */
3246#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3247 do { \
3248 switch (sizeof(*(pu))) { \
3249 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3250 break; \
3251 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3252 break; \
3253 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3254 (fRc) = false; \
3255 (uOldVal) = 0; \
3256 break; \
3257 } \
3258 } while (0)
3259
3260
3261/**
3262 * Atomically Compare and Exchange a pointer value, additionally
3263 * passing back old value, ordered.
3264 *
3265 * @returns true if xchg was done.
3266 * @returns false if xchg wasn't done.
3267 *
3268 * @param ppv Pointer to the value to update.
3269 * @param pvNew The new value to assigned to *ppv.
3270 * @param pvOld The old value to *ppv compare with.
3271 * @param ppvOld Pointer store the old value at.
3272 */
3273DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3274{
3275#if ARCH_BITS == 32
3276 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3277#elif ARCH_BITS == 64
3278 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3279#else
3280# error "ARCH_BITS is bogus"
3281#endif
3282}
3283
3284
3285/**
3286 * Atomically exchanges and adds to a 32-bit value, ordered.
3287 *
3288 * @returns The old value.
3289 * @param pu32 Pointer to the value.
3290 * @param u32 Number to add.
3291 */
3292#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3293DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3294#else
3295DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3296{
3297# if RT_INLINE_ASM_USES_INTRIN
3298 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3299 return u32;
3300
3301# elif RT_INLINE_ASM_GNU_STYLE
3302 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3303 : "=r" (u32),
3304 "=m" (*pu32)
3305 : "0" (u32),
3306 "m" (*pu32)
3307 : "memory");
3308 return u32;
3309# else
3310 __asm
3311 {
3312 mov eax, [u32]
3313# ifdef RT_ARCH_AMD64
3314 mov rdx, [pu32]
3315 lock xadd [rdx], eax
3316# else
3317 mov edx, [pu32]
3318 lock xadd [edx], eax
3319# endif
3320 mov [u32], eax
3321 }
3322 return u32;
3323# endif
3324}
3325#endif
3326
3327
3328/**
3329 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3330 *
3331 * @returns The old value.
3332 * @param pi32 Pointer to the value.
3333 * @param i32 Number to add.
3334 */
3335DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3336{
3337 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3338}
3339
3340
3341/**
3342 * Atomically increment a 32-bit value, ordered.
3343 *
3344 * @returns The new value.
3345 * @param pu32 Pointer to the value to increment.
3346 */
3347#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3348DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3349#else
3350DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3351{
3352 uint32_t u32;
3353# if RT_INLINE_ASM_USES_INTRIN
3354 u32 = _InterlockedIncrement((long *)pu32);
3355 return u32;
3356
3357# elif RT_INLINE_ASM_GNU_STYLE
3358 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3359 : "=r" (u32),
3360 "=m" (*pu32)
3361 : "0" (1),
3362 "m" (*pu32)
3363 : "memory");
3364 return u32+1;
3365# else
3366 __asm
3367 {
3368 mov eax, 1
3369# ifdef RT_ARCH_AMD64
3370 mov rdx, [pu32]
3371 lock xadd [rdx], eax
3372# else
3373 mov edx, [pu32]
3374 lock xadd [edx], eax
3375# endif
3376 mov u32, eax
3377 }
3378 return u32+1;
3379# endif
3380}
3381#endif
3382
3383
3384/**
3385 * Atomically increment a signed 32-bit value, ordered.
3386 *
3387 * @returns The new value.
3388 * @param pi32 Pointer to the value to increment.
3389 */
3390DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3391{
3392 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3393}
3394
3395
3396/**
3397 * Atomically decrement an unsigned 32-bit value, ordered.
3398 *
3399 * @returns The new value.
3400 * @param pu32 Pointer to the value to decrement.
3401 */
3402#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3403DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3404#else
3405DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3406{
3407 uint32_t u32;
3408# if RT_INLINE_ASM_USES_INTRIN
3409 u32 = _InterlockedDecrement((long *)pu32);
3410 return u32;
3411
3412# elif RT_INLINE_ASM_GNU_STYLE
3413 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3414 : "=r" (u32),
3415 "=m" (*pu32)
3416 : "0" (-1),
3417 "m" (*pu32)
3418 : "memory");
3419 return u32-1;
3420# else
3421 __asm
3422 {
3423 mov eax, -1
3424# ifdef RT_ARCH_AMD64
3425 mov rdx, [pu32]
3426 lock xadd [rdx], eax
3427# else
3428 mov edx, [pu32]
3429 lock xadd [edx], eax
3430# endif
3431 mov u32, eax
3432 }
3433 return u32-1;
3434# endif
3435}
3436#endif
3437
3438
3439/**
3440 * Atomically decrement a signed 32-bit value, ordered.
3441 *
3442 * @returns The new value.
3443 * @param pi32 Pointer to the value to decrement.
3444 */
3445DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3446{
3447 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3448}
3449
3450
3451/**
3452 * Atomically Or an unsigned 32-bit value, ordered.
3453 *
3454 * @param pu32 Pointer to the pointer variable to OR u32 with.
3455 * @param u32 The value to OR *pu32 with.
3456 */
3457#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3458DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3459#else
3460DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3461{
3462# if RT_INLINE_ASM_USES_INTRIN
3463 _InterlockedOr((long volatile *)pu32, (long)u32);
3464
3465# elif RT_INLINE_ASM_GNU_STYLE
3466 __asm__ __volatile__("lock; orl %1, %0\n\t"
3467 : "=m" (*pu32)
3468 : "ir" (u32),
3469 "m" (*pu32));
3470# else
3471 __asm
3472 {
3473 mov eax, [u32]
3474# ifdef RT_ARCH_AMD64
3475 mov rdx, [pu32]
3476 lock or [rdx], eax
3477# else
3478 mov edx, [pu32]
3479 lock or [edx], eax
3480# endif
3481 }
3482# endif
3483}
3484#endif
3485
3486
3487/**
3488 * Atomically Or a signed 32-bit value, ordered.
3489 *
3490 * @param pi32 Pointer to the pointer variable to OR u32 with.
3491 * @param i32 The value to OR *pu32 with.
3492 */
3493DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3494{
3495 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3496}
3497
3498
3499/**
3500 * Atomically And an unsigned 32-bit value, ordered.
3501 *
3502 * @param pu32 Pointer to the pointer variable to AND u32 with.
3503 * @param u32 The value to AND *pu32 with.
3504 */
3505#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3506DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3507#else
3508DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3509{
3510# if RT_INLINE_ASM_USES_INTRIN
3511 _InterlockedAnd((long volatile *)pu32, u32);
3512
3513# elif RT_INLINE_ASM_GNU_STYLE
3514 __asm__ __volatile__("lock; andl %1, %0\n\t"
3515 : "=m" (*pu32)
3516 : "ir" (u32),
3517 "m" (*pu32));
3518# else
3519 __asm
3520 {
3521 mov eax, [u32]
3522# ifdef RT_ARCH_AMD64
3523 mov rdx, [pu32]
3524 lock and [rdx], eax
3525# else
3526 mov edx, [pu32]
3527 lock and [edx], eax
3528# endif
3529 }
3530# endif
3531}
3532#endif
3533
3534
3535/**
3536 * Atomically And a signed 32-bit value, ordered.
3537 *
3538 * @param pi32 Pointer to the pointer variable to AND i32 with.
3539 * @param i32 The value to AND *pi32 with.
3540 */
3541DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3542{
3543 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3544}
3545
3546
3547/**
3548 * Memory fence, waits for any pending writes and reads to complete.
3549 */
3550DECLINLINE(void) ASMMemoryFence(void)
3551{
3552 /** @todo use mfence? check if all cpus we care for support it. */
3553 uint32_t volatile u32;
3554 ASMAtomicXchgU32(&u32, 0);
3555}
3556
3557
3558/**
3559 * Write fence, waits for any pending writes to complete.
3560 */
3561DECLINLINE(void) ASMWriteFence(void)
3562{
3563 /** @todo use sfence? check if all cpus we care for support it. */
3564 ASMMemoryFence();
3565}
3566
3567
3568/**
3569 * Read fence, waits for any pending reads to complete.
3570 */
3571DECLINLINE(void) ASMReadFence(void)
3572{
3573 /** @todo use lfence? check if all cpus we care for support it. */
3574 ASMMemoryFence();
3575}
3576
3577
3578/**
3579 * Atomically reads an unsigned 8-bit value, ordered.
3580 *
3581 * @returns Current *pu8 value
3582 * @param pu8 Pointer to the 8-bit variable to read.
3583 */
3584DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3585{
3586 ASMMemoryFence();
3587 return *pu8; /* byte reads are atomic on x86 */
3588}
3589
3590
3591/**
3592 * Atomically reads an unsigned 8-bit value, unordered.
3593 *
3594 * @returns Current *pu8 value
3595 * @param pu8 Pointer to the 8-bit variable to read.
3596 */
3597DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3598{
3599 return *pu8; /* byte reads are atomic on x86 */
3600}
3601
3602
3603/**
3604 * Atomically reads a signed 8-bit value, ordered.
3605 *
3606 * @returns Current *pi8 value
3607 * @param pi8 Pointer to the 8-bit variable to read.
3608 */
3609DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3610{
3611 ASMMemoryFence();
3612 return *pi8; /* byte reads are atomic on x86 */
3613}
3614
3615
3616/**
3617 * Atomically reads a signed 8-bit value, unordered.
3618 *
3619 * @returns Current *pi8 value
3620 * @param pi8 Pointer to the 8-bit variable to read.
3621 */
3622DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3623{
3624 return *pi8; /* byte reads are atomic on x86 */
3625}
3626
3627
3628/**
3629 * Atomically reads an unsigned 16-bit value, ordered.
3630 *
3631 * @returns Current *pu16 value
3632 * @param pu16 Pointer to the 16-bit variable to read.
3633 */
3634DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3635{
3636 ASMMemoryFence();
3637 Assert(!((uintptr_t)pu16 & 1));
3638 return *pu16;
3639}
3640
3641
3642/**
3643 * Atomically reads an unsigned 16-bit value, unordered.
3644 *
3645 * @returns Current *pu16 value
3646 * @param pu16 Pointer to the 16-bit variable to read.
3647 */
3648DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3649{
3650 Assert(!((uintptr_t)pu16 & 1));
3651 return *pu16;
3652}
3653
3654
3655/**
3656 * Atomically reads a signed 16-bit value, ordered.
3657 *
3658 * @returns Current *pi16 value
3659 * @param pi16 Pointer to the 16-bit variable to read.
3660 */
3661DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3662{
3663 ASMMemoryFence();
3664 Assert(!((uintptr_t)pi16 & 1));
3665 return *pi16;
3666}
3667
3668
3669/**
3670 * Atomically reads a signed 16-bit value, unordered.
3671 *
3672 * @returns Current *pi16 value
3673 * @param pi16 Pointer to the 16-bit variable to read.
3674 */
3675DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3676{
3677 Assert(!((uintptr_t)pi16 & 1));
3678 return *pi16;
3679}
3680
3681
3682/**
3683 * Atomically reads an unsigned 32-bit value, ordered.
3684 *
3685 * @returns Current *pu32 value
3686 * @param pu32 Pointer to the 32-bit variable to read.
3687 */
3688DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3689{
3690 ASMMemoryFence();
3691 Assert(!((uintptr_t)pu32 & 3));
3692 return *pu32;
3693}
3694
3695
3696/**
3697 * Atomically reads an unsigned 32-bit value, unordered.
3698 *
3699 * @returns Current *pu32 value
3700 * @param pu32 Pointer to the 32-bit variable to read.
3701 */
3702DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3703{
3704 Assert(!((uintptr_t)pu32 & 3));
3705 return *pu32;
3706}
3707
3708
3709/**
3710 * Atomically reads a signed 32-bit value, ordered.
3711 *
3712 * @returns Current *pi32 value
3713 * @param pi32 Pointer to the 32-bit variable to read.
3714 */
3715DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3716{
3717 ASMMemoryFence();
3718 Assert(!((uintptr_t)pi32 & 3));
3719 return *pi32;
3720}
3721
3722
3723/**
3724 * Atomically reads a signed 32-bit value, unordered.
3725 *
3726 * @returns Current *pi32 value
3727 * @param pi32 Pointer to the 32-bit variable to read.
3728 */
3729DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3730{
3731 Assert(!((uintptr_t)pi32 & 3));
3732 return *pi32;
3733}
3734
3735
3736/**
3737 * Atomically reads an unsigned 64-bit value, ordered.
3738 *
3739 * @returns Current *pu64 value
3740 * @param pu64 Pointer to the 64-bit variable to read.
3741 * The memory pointed to must be writable.
3742 * @remark This will fault if the memory is read-only!
3743 */
3744#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3745DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3746#else
3747DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3748{
3749 uint64_t u64;
3750# ifdef RT_ARCH_AMD64
3751 Assert(!((uintptr_t)pu64 & 7));
3752/*# if RT_INLINE_ASM_GNU_STYLE
3753 __asm__ __volatile__( "mfence\n\t"
3754 "movq %1, %0\n\t"
3755 : "=r" (u64)
3756 : "m" (*pu64));
3757# else
3758 __asm
3759 {
3760 mfence
3761 mov rdx, [pu64]
3762 mov rax, [rdx]
3763 mov [u64], rax
3764 }
3765# endif*/
3766 ASMMemoryFence();
3767 u64 = *pu64;
3768# else /* !RT_ARCH_AMD64 */
3769# if RT_INLINE_ASM_GNU_STYLE
3770# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3771 uint32_t u32EBX = 0;
3772 Assert(!((uintptr_t)pu64 & 7));
3773 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3774 "lock; cmpxchg8b (%5)\n\t"
3775 "movl %3, %%ebx\n\t"
3776 : "=A" (u64),
3777# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3778 "+m" (*pu64)
3779# else
3780 "=m" (*pu64)
3781# endif
3782 : "0" (0),
3783 "m" (u32EBX),
3784 "c" (0),
3785 "S" (pu64));
3786# else /* !PIC */
3787 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3788 : "=A" (u64),
3789 "+m" (*pu64)
3790 : "0" (0),
3791 "b" (0),
3792 "c" (0));
3793# endif
3794# else
3795 Assert(!((uintptr_t)pu64 & 7));
3796 __asm
3797 {
3798 xor eax, eax
3799 xor edx, edx
3800 mov edi, pu64
3801 xor ecx, ecx
3802 xor ebx, ebx
3803 lock cmpxchg8b [edi]
3804 mov dword ptr [u64], eax
3805 mov dword ptr [u64 + 4], edx
3806 }
3807# endif
3808# endif /* !RT_ARCH_AMD64 */
3809 return u64;
3810}
3811#endif
3812
3813
3814/**
3815 * Atomically reads an unsigned 64-bit value, unordered.
3816 *
3817 * @returns Current *pu64 value
3818 * @param pu64 Pointer to the 64-bit variable to read.
3819 * The memory pointed to must be writable.
3820 * @remark This will fault if the memory is read-only!
3821 */
3822#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3823DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3824#else
3825DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3826{
3827 uint64_t u64;
3828# ifdef RT_ARCH_AMD64
3829 Assert(!((uintptr_t)pu64 & 7));
3830/*# if RT_INLINE_ASM_GNU_STYLE
3831 Assert(!((uintptr_t)pu64 & 7));
3832 __asm__ __volatile__("movq %1, %0\n\t"
3833 : "=r" (u64)
3834 : "m" (*pu64));
3835# else
3836 __asm
3837 {
3838 mov rdx, [pu64]
3839 mov rax, [rdx]
3840 mov [u64], rax
3841 }
3842# endif */
3843 u64 = *pu64;
3844# else /* !RT_ARCH_AMD64 */
3845# if RT_INLINE_ASM_GNU_STYLE
3846# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3847 uint32_t u32EBX = 0;
3848 uint32_t u32Spill;
3849 Assert(!((uintptr_t)pu64 & 7));
3850 __asm__ __volatile__("xor %%eax,%%eax\n\t"
3851 "xor %%ecx,%%ecx\n\t"
3852 "xor %%edx,%%edx\n\t"
3853 "xchgl %%ebx, %3\n\t"
3854 "lock; cmpxchg8b (%4)\n\t"
3855 "movl %3, %%ebx\n\t"
3856 : "=A" (u64),
3857# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3858 "+m" (*pu64),
3859# else
3860 "=m" (*pu64),
3861# endif
3862 "=c" (u32Spill)
3863 : "m" (u32EBX),
3864 "S" (pu64));
3865# else /* !PIC */
3866 __asm__ __volatile__("cmpxchg8b %1\n\t"
3867 : "=A" (u64),
3868 "+m" (*pu64)
3869 : "0" (0),
3870 "b" (0),
3871 "c" (0));
3872# endif
3873# else
3874 Assert(!((uintptr_t)pu64 & 7));
3875 __asm
3876 {
3877 xor eax, eax
3878 xor edx, edx
3879 mov edi, pu64
3880 xor ecx, ecx
3881 xor ebx, ebx
3882 lock cmpxchg8b [edi]
3883 mov dword ptr [u64], eax
3884 mov dword ptr [u64 + 4], edx
3885 }
3886# endif
3887# endif /* !RT_ARCH_AMD64 */
3888 return u64;
3889}
3890#endif
3891
3892
3893/**
3894 * Atomically reads a signed 64-bit value, ordered.
3895 *
3896 * @returns Current *pi64 value
3897 * @param pi64 Pointer to the 64-bit variable to read.
3898 * The memory pointed to must be writable.
3899 * @remark This will fault if the memory is read-only!
3900 */
3901DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3902{
3903 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3904}
3905
3906
3907/**
3908 * Atomically reads a signed 64-bit value, unordered.
3909 *
3910 * @returns Current *pi64 value
3911 * @param pi64 Pointer to the 64-bit variable to read.
3912 * The memory pointed to must be writable.
3913 * @remark This will fault if the memory is read-only!
3914 */
3915DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3916{
3917 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3918}
3919
3920
3921/**
3922 * Atomically reads a pointer value, ordered.
3923 *
3924 * @returns Current *pv value
3925 * @param ppv Pointer to the pointer variable to read.
3926 */
3927DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3928{
3929#if ARCH_BITS == 32
3930 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3931#elif ARCH_BITS == 64
3932 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3933#else
3934# error "ARCH_BITS is bogus"
3935#endif
3936}
3937
3938
3939/**
3940 * Atomically reads a pointer value, unordered.
3941 *
3942 * @returns Current *pv value
3943 * @param ppv Pointer to the pointer variable to read.
3944 */
3945DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3946{
3947#if ARCH_BITS == 32
3948 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3949#elif ARCH_BITS == 64
3950 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3951#else
3952# error "ARCH_BITS is bogus"
3953#endif
3954}
3955
3956
3957/**
3958 * Atomically reads a boolean value, ordered.
3959 *
3960 * @returns Current *pf value
3961 * @param pf Pointer to the boolean variable to read.
3962 */
3963DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3964{
3965 ASMMemoryFence();
3966 return *pf; /* byte reads are atomic on x86 */
3967}
3968
3969
3970/**
3971 * Atomically reads a boolean value, unordered.
3972 *
3973 * @returns Current *pf value
3974 * @param pf Pointer to the boolean variable to read.
3975 */
3976DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3977{
3978 return *pf; /* byte reads are atomic on x86 */
3979}
3980
3981
3982/**
3983 * Atomically read a typical IPRT handle value, ordered.
3984 *
3985 * @param ph Pointer to the handle variable to read.
3986 * @param phRes Where to store the result.
3987 *
3988 * @remarks This doesn't currently work for all handles (like RTFILE).
3989 */
3990#define ASMAtomicReadHandle(ph, phRes) \
3991 do { \
3992 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
3993 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3994 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3995 } while (0)
3996
3997
3998/**
3999 * Atomically read a typical IPRT handle value, unordered.
4000 *
4001 * @param ph Pointer to the handle variable to read.
4002 * @param phRes Where to store the result.
4003 *
4004 * @remarks This doesn't currently work for all handles (like RTFILE).
4005 */
4006#define ASMAtomicUoReadHandle(ph, phRes) \
4007 do { \
4008 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
4009 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4010 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4011 } while (0)
4012
4013
4014/**
4015 * Atomically read a value which size might differ
4016 * between platforms or compilers, ordered.
4017 *
4018 * @param pu Pointer to the variable to update.
4019 * @param puRes Where to store the result.
4020 */
4021#define ASMAtomicReadSize(pu, puRes) \
4022 do { \
4023 switch (sizeof(*(pu))) { \
4024 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4025 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4026 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4027 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4028 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4029 } \
4030 } while (0)
4031
4032
4033/**
4034 * Atomically read a value which size might differ
4035 * between platforms or compilers, unordered.
4036 *
4037 * @param pu Pointer to the variable to update.
4038 * @param puRes Where to store the result.
4039 */
4040#define ASMAtomicUoReadSize(pu, puRes) \
4041 do { \
4042 switch (sizeof(*(pu))) { \
4043 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4044 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4045 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4046 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4047 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4048 } \
4049 } while (0)
4050
4051
4052/**
4053 * Atomically writes an unsigned 8-bit value, ordered.
4054 *
4055 * @param pu8 Pointer to the 8-bit variable.
4056 * @param u8 The 8-bit value to assign to *pu8.
4057 */
4058DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4059{
4060 ASMAtomicXchgU8(pu8, u8);
4061}
4062
4063
4064/**
4065 * Atomically writes an unsigned 8-bit value, unordered.
4066 *
4067 * @param pu8 Pointer to the 8-bit variable.
4068 * @param u8 The 8-bit value to assign to *pu8.
4069 */
4070DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4071{
4072 *pu8 = u8; /* byte writes are atomic on x86 */
4073}
4074
4075
4076/**
4077 * Atomically writes a signed 8-bit value, ordered.
4078 *
4079 * @param pi8 Pointer to the 8-bit variable to read.
4080 * @param i8 The 8-bit value to assign to *pi8.
4081 */
4082DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4083{
4084 ASMAtomicXchgS8(pi8, i8);
4085}
4086
4087
4088/**
4089 * Atomically writes a signed 8-bit value, unordered.
4090 *
4091 * @param pi8 Pointer to the 8-bit variable to read.
4092 * @param i8 The 8-bit value to assign to *pi8.
4093 */
4094DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4095{
4096 *pi8 = i8; /* byte writes are atomic on x86 */
4097}
4098
4099
4100/**
4101 * Atomically writes an unsigned 16-bit value, ordered.
4102 *
4103 * @param pu16 Pointer to the 16-bit variable.
4104 * @param u16 The 16-bit value to assign to *pu16.
4105 */
4106DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4107{
4108 ASMAtomicXchgU16(pu16, u16);
4109}
4110
4111
4112/**
4113 * Atomically writes an unsigned 16-bit value, unordered.
4114 *
4115 * @param pu16 Pointer to the 16-bit variable.
4116 * @param u16 The 16-bit value to assign to *pu16.
4117 */
4118DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4119{
4120 Assert(!((uintptr_t)pu16 & 1));
4121 *pu16 = u16;
4122}
4123
4124
4125/**
4126 * Atomically writes a signed 16-bit value, ordered.
4127 *
4128 * @param pi16 Pointer to the 16-bit variable to read.
4129 * @param i16 The 16-bit value to assign to *pi16.
4130 */
4131DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4132{
4133 ASMAtomicXchgS16(pi16, i16);
4134}
4135
4136
4137/**
4138 * Atomically writes a signed 16-bit value, unordered.
4139 *
4140 * @param pi16 Pointer to the 16-bit variable to read.
4141 * @param i16 The 16-bit value to assign to *pi16.
4142 */
4143DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4144{
4145 Assert(!((uintptr_t)pi16 & 1));
4146 *pi16 = i16;
4147}
4148
4149
4150/**
4151 * Atomically writes an unsigned 32-bit value, ordered.
4152 *
4153 * @param pu32 Pointer to the 32-bit variable.
4154 * @param u32 The 32-bit value to assign to *pu32.
4155 */
4156DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4157{
4158 ASMAtomicXchgU32(pu32, u32);
4159}
4160
4161
4162/**
4163 * Atomically writes an unsigned 32-bit value, unordered.
4164 *
4165 * @param pu32 Pointer to the 32-bit variable.
4166 * @param u32 The 32-bit value to assign to *pu32.
4167 */
4168DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4169{
4170 Assert(!((uintptr_t)pu32 & 3));
4171 *pu32 = u32;
4172}
4173
4174
4175/**
4176 * Atomically writes a signed 32-bit value, ordered.
4177 *
4178 * @param pi32 Pointer to the 32-bit variable to read.
4179 * @param i32 The 32-bit value to assign to *pi32.
4180 */
4181DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4182{
4183 ASMAtomicXchgS32(pi32, i32);
4184}
4185
4186
4187/**
4188 * Atomically writes a signed 32-bit value, unordered.
4189 *
4190 * @param pi32 Pointer to the 32-bit variable to read.
4191 * @param i32 The 32-bit value to assign to *pi32.
4192 */
4193DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4194{
4195 Assert(!((uintptr_t)pi32 & 3));
4196 *pi32 = i32;
4197}
4198
4199
4200/**
4201 * Atomically writes an unsigned 64-bit value, ordered.
4202 *
4203 * @param pu64 Pointer to the 64-bit variable.
4204 * @param u64 The 64-bit value to assign to *pu64.
4205 */
4206DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4207{
4208 ASMAtomicXchgU64(pu64, u64);
4209}
4210
4211
4212/**
4213 * Atomically writes an unsigned 64-bit value, unordered.
4214 *
4215 * @param pu64 Pointer to the 64-bit variable.
4216 * @param u64 The 64-bit value to assign to *pu64.
4217 */
4218DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4219{
4220 Assert(!((uintptr_t)pu64 & 7));
4221#if ARCH_BITS == 64
4222 *pu64 = u64;
4223#else
4224 ASMAtomicXchgU64(pu64, u64);
4225#endif
4226}
4227
4228
4229/**
4230 * Atomically writes a signed 64-bit value, ordered.
4231 *
4232 * @param pi64 Pointer to the 64-bit variable.
4233 * @param i64 The 64-bit value to assign to *pi64.
4234 */
4235DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4236{
4237 ASMAtomicXchgS64(pi64, i64);
4238}
4239
4240
4241/**
4242 * Atomically writes a signed 64-bit value, unordered.
4243 *
4244 * @param pi64 Pointer to the 64-bit variable.
4245 * @param i64 The 64-bit value to assign to *pi64.
4246 */
4247DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4248{
4249 Assert(!((uintptr_t)pi64 & 7));
4250#if ARCH_BITS == 64
4251 *pi64 = i64;
4252#else
4253 ASMAtomicXchgS64(pi64, i64);
4254#endif
4255}
4256
4257
4258/**
4259 * Atomically writes a boolean value, unordered.
4260 *
4261 * @param pf Pointer to the boolean variable.
4262 * @param f The boolean value to assign to *pf.
4263 */
4264DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4265{
4266 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4267}
4268
4269
4270/**
4271 * Atomically writes a boolean value, unordered.
4272 *
4273 * @param pf Pointer to the boolean variable.
4274 * @param f The boolean value to assign to *pf.
4275 */
4276DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4277{
4278 *pf = f; /* byte writes are atomic on x86 */
4279}
4280
4281
4282/**
4283 * Atomically writes a pointer value, ordered.
4284 *
4285 * @returns Current *pv value
4286 * @param ppv Pointer to the pointer variable.
4287 * @param pv The pointer value to assigne to *ppv.
4288 */
4289DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4290{
4291#if ARCH_BITS == 32
4292 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4293#elif ARCH_BITS == 64
4294 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4295#else
4296# error "ARCH_BITS is bogus"
4297#endif
4298}
4299
4300
4301/**
4302 * Atomically writes a pointer value, unordered.
4303 *
4304 * @returns Current *pv value
4305 * @param ppv Pointer to the pointer variable.
4306 * @param pv The pointer value to assigne to *ppv.
4307 */
4308DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4309{
4310#if ARCH_BITS == 32
4311 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4312#elif ARCH_BITS == 64
4313 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4314#else
4315# error "ARCH_BITS is bogus"
4316#endif
4317}
4318
4319
4320/**
4321 * Atomically write a typical IPRT handle value, ordered.
4322 *
4323 * @param ph Pointer to the variable to update.
4324 * @param hNew The value to assign to *ph.
4325 *
4326 * @remarks This doesn't currently work for all handles (like RTFILE).
4327 */
4328#define ASMAtomicWriteHandle(ph, hNew) \
4329 do { \
4330 ASMAtomicWritePtr((void * volatile *)(ph), (const void *)hNew); \
4331 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4332 } while (0)
4333
4334
4335/**
4336 * Atomically write a typical IPRT handle value, unordered.
4337 *
4338 * @param ph Pointer to the variable to update.
4339 * @param hNew The value to assign to *ph.
4340 *
4341 * @remarks This doesn't currently work for all handles (like RTFILE).
4342 */
4343#define ASMAtomicUoWriteHandle(ph, hNew) \
4344 do { \
4345 ASMAtomicUoWritePtr((void * volatile *)(ph), (const void *)hNew); \
4346 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4347 } while (0)
4348
4349
4350/**
4351 * Atomically write a value which size might differ
4352 * between platforms or compilers, ordered.
4353 *
4354 * @param pu Pointer to the variable to update.
4355 * @param uNew The value to assign to *pu.
4356 */
4357#define ASMAtomicWriteSize(pu, uNew) \
4358 do { \
4359 switch (sizeof(*(pu))) { \
4360 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4361 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4362 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4363 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4364 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4365 } \
4366 } while (0)
4367
4368/**
4369 * Atomically write a value which size might differ
4370 * between platforms or compilers, unordered.
4371 *
4372 * @param pu Pointer to the variable to update.
4373 * @param uNew The value to assign to *pu.
4374 */
4375#define ASMAtomicUoWriteSize(pu, uNew) \
4376 do { \
4377 switch (sizeof(*(pu))) { \
4378 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4379 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4380 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4381 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4382 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4383 } \
4384 } while (0)
4385
4386
4387
4388
4389/**
4390 * Invalidate page.
4391 *
4392 * @param pv Address of the page to invalidate.
4393 */
4394#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4395DECLASM(void) ASMInvalidatePage(void *pv);
4396#else
4397DECLINLINE(void) ASMInvalidatePage(void *pv)
4398{
4399# if RT_INLINE_ASM_USES_INTRIN
4400 __invlpg(pv);
4401
4402# elif RT_INLINE_ASM_GNU_STYLE
4403 __asm__ __volatile__("invlpg %0\n\t"
4404 : : "m" (*(uint8_t *)pv));
4405# else
4406 __asm
4407 {
4408# ifdef RT_ARCH_AMD64
4409 mov rax, [pv]
4410 invlpg [rax]
4411# else
4412 mov eax, [pv]
4413 invlpg [eax]
4414# endif
4415 }
4416# endif
4417}
4418#endif
4419
4420
4421#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4422# if PAGE_SIZE != 0x1000
4423# error "PAGE_SIZE is not 0x1000!"
4424# endif
4425#endif
4426
4427/**
4428 * Zeros a 4K memory page.
4429 *
4430 * @param pv Pointer to the memory block. This must be page aligned.
4431 */
4432#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4433DECLASM(void) ASMMemZeroPage(volatile void *pv);
4434# else
4435DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4436{
4437# if RT_INLINE_ASM_USES_INTRIN
4438# ifdef RT_ARCH_AMD64
4439 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4440# else
4441 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4442# endif
4443
4444# elif RT_INLINE_ASM_GNU_STYLE
4445 RTCCUINTREG uDummy;
4446# ifdef RT_ARCH_AMD64
4447 __asm__ __volatile__ ("rep stosq"
4448 : "=D" (pv),
4449 "=c" (uDummy)
4450 : "0" (pv),
4451 "c" (0x1000 >> 3),
4452 "a" (0)
4453 : "memory");
4454# else
4455 __asm__ __volatile__ ("rep stosl"
4456 : "=D" (pv),
4457 "=c" (uDummy)
4458 : "0" (pv),
4459 "c" (0x1000 >> 2),
4460 "a" (0)
4461 : "memory");
4462# endif
4463# else
4464 __asm
4465 {
4466# ifdef RT_ARCH_AMD64
4467 xor rax, rax
4468 mov ecx, 0200h
4469 mov rdi, [pv]
4470 rep stosq
4471# else
4472 xor eax, eax
4473 mov ecx, 0400h
4474 mov edi, [pv]
4475 rep stosd
4476# endif
4477 }
4478# endif
4479}
4480# endif
4481
4482
4483/**
4484 * Zeros a memory block with a 32-bit aligned size.
4485 *
4486 * @param pv Pointer to the memory block.
4487 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4488 */
4489#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4490DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4491#else
4492DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4493{
4494# if RT_INLINE_ASM_USES_INTRIN
4495# ifdef RT_ARCH_AMD64
4496 if (!(cb & 7))
4497 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4498 else
4499# endif
4500 __stosd((unsigned long *)pv, 0, cb / 4);
4501
4502# elif RT_INLINE_ASM_GNU_STYLE
4503 __asm__ __volatile__ ("rep stosl"
4504 : "=D" (pv),
4505 "=c" (cb)
4506 : "0" (pv),
4507 "1" (cb >> 2),
4508 "a" (0)
4509 : "memory");
4510# else
4511 __asm
4512 {
4513 xor eax, eax
4514# ifdef RT_ARCH_AMD64
4515 mov rcx, [cb]
4516 shr rcx, 2
4517 mov rdi, [pv]
4518# else
4519 mov ecx, [cb]
4520 shr ecx, 2
4521 mov edi, [pv]
4522# endif
4523 rep stosd
4524 }
4525# endif
4526}
4527#endif
4528
4529
4530/**
4531 * Fills a memory block with a 32-bit aligned size.
4532 *
4533 * @param pv Pointer to the memory block.
4534 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4535 * @param u32 The value to fill with.
4536 */
4537#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4538DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4539#else
4540DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4541{
4542# if RT_INLINE_ASM_USES_INTRIN
4543# ifdef RT_ARCH_AMD64
4544 if (!(cb & 7))
4545 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4546 else
4547# endif
4548 __stosd((unsigned long *)pv, u32, cb / 4);
4549
4550# elif RT_INLINE_ASM_GNU_STYLE
4551 __asm__ __volatile__ ("rep stosl"
4552 : "=D" (pv),
4553 "=c" (cb)
4554 : "0" (pv),
4555 "1" (cb >> 2),
4556 "a" (u32)
4557 : "memory");
4558# else
4559 __asm
4560 {
4561# ifdef RT_ARCH_AMD64
4562 mov rcx, [cb]
4563 shr rcx, 2
4564 mov rdi, [pv]
4565# else
4566 mov ecx, [cb]
4567 shr ecx, 2
4568 mov edi, [pv]
4569# endif
4570 mov eax, [u32]
4571 rep stosd
4572 }
4573# endif
4574}
4575#endif
4576
4577
4578/**
4579 * Checks if a memory block is filled with the specified byte.
4580 *
4581 * This is a sort of inverted memchr.
4582 *
4583 * @returns Pointer to the byte which doesn't equal u8.
4584 * @returns NULL if all equal to u8.
4585 *
4586 * @param pv Pointer to the memory block.
4587 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4588 * @param u8 The value it's supposed to be filled with.
4589 */
4590#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4591DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4592#else
4593DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4594{
4595/** @todo rewrite this in inline assembly? */
4596 uint8_t const *pb = (uint8_t const *)pv;
4597 for (; cb; cb--, pb++)
4598 if (RT_UNLIKELY(*pb != u8))
4599 return (void *)pb;
4600 return NULL;
4601}
4602#endif
4603
4604
4605/**
4606 * Checks if a memory block is filled with the specified 32-bit value.
4607 *
4608 * This is a sort of inverted memchr.
4609 *
4610 * @returns Pointer to the first value which doesn't equal u32.
4611 * @returns NULL if all equal to u32.
4612 *
4613 * @param pv Pointer to the memory block.
4614 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4615 * @param u32 The value it's supposed to be filled with.
4616 */
4617#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4618DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4619#else
4620DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4621{
4622/** @todo rewrite this in inline assembly? */
4623 uint32_t const *pu32 = (uint32_t const *)pv;
4624 for (; cb; cb -= 4, pu32++)
4625 if (RT_UNLIKELY(*pu32 != u32))
4626 return (uint32_t *)pu32;
4627 return NULL;
4628}
4629#endif
4630
4631
4632/**
4633 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4634 *
4635 * @returns u32F1 * u32F2.
4636 */
4637#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4638DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4639#else
4640DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4641{
4642# ifdef RT_ARCH_AMD64
4643 return (uint64_t)u32F1 * u32F2;
4644# else /* !RT_ARCH_AMD64 */
4645 uint64_t u64;
4646# if RT_INLINE_ASM_GNU_STYLE
4647 __asm__ __volatile__("mull %%edx"
4648 : "=A" (u64)
4649 : "a" (u32F2), "d" (u32F1));
4650# else
4651 __asm
4652 {
4653 mov edx, [u32F1]
4654 mov eax, [u32F2]
4655 mul edx
4656 mov dword ptr [u64], eax
4657 mov dword ptr [u64 + 4], edx
4658 }
4659# endif
4660 return u64;
4661# endif /* !RT_ARCH_AMD64 */
4662}
4663#endif
4664
4665
4666/**
4667 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4668 *
4669 * @returns u32F1 * u32F2.
4670 */
4671#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4672DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4673#else
4674DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4675{
4676# ifdef RT_ARCH_AMD64
4677 return (int64_t)i32F1 * i32F2;
4678# else /* !RT_ARCH_AMD64 */
4679 int64_t i64;
4680# if RT_INLINE_ASM_GNU_STYLE
4681 __asm__ __volatile__("imull %%edx"
4682 : "=A" (i64)
4683 : "a" (i32F2), "d" (i32F1));
4684# else
4685 __asm
4686 {
4687 mov edx, [i32F1]
4688 mov eax, [i32F2]
4689 imul edx
4690 mov dword ptr [i64], eax
4691 mov dword ptr [i64 + 4], edx
4692 }
4693# endif
4694 return i64;
4695# endif /* !RT_ARCH_AMD64 */
4696}
4697#endif
4698
4699
4700/**
4701 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4702 *
4703 * @returns u64 / u32.
4704 */
4705#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4706DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4707#else
4708DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4709{
4710# ifdef RT_ARCH_AMD64
4711 return (uint32_t)(u64 / u32);
4712# else /* !RT_ARCH_AMD64 */
4713# if RT_INLINE_ASM_GNU_STYLE
4714 RTCCUINTREG uDummy;
4715 __asm__ __volatile__("divl %3"
4716 : "=a" (u32), "=d"(uDummy)
4717 : "A" (u64), "r" (u32));
4718# else
4719 __asm
4720 {
4721 mov eax, dword ptr [u64]
4722 mov edx, dword ptr [u64 + 4]
4723 mov ecx, [u32]
4724 div ecx
4725 mov [u32], eax
4726 }
4727# endif
4728 return u32;
4729# endif /* !RT_ARCH_AMD64 */
4730}
4731#endif
4732
4733
4734/**
4735 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4736 *
4737 * @returns u64 / u32.
4738 */
4739#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4740DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4741#else
4742DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4743{
4744# ifdef RT_ARCH_AMD64
4745 return (int32_t)(i64 / i32);
4746# else /* !RT_ARCH_AMD64 */
4747# if RT_INLINE_ASM_GNU_STYLE
4748 RTCCUINTREG iDummy;
4749 __asm__ __volatile__("idivl %3"
4750 : "=a" (i32), "=d"(iDummy)
4751 : "A" (i64), "r" (i32));
4752# else
4753 __asm
4754 {
4755 mov eax, dword ptr [i64]
4756 mov edx, dword ptr [i64 + 4]
4757 mov ecx, [i32]
4758 idiv ecx
4759 mov [i32], eax
4760 }
4761# endif
4762 return i32;
4763# endif /* !RT_ARCH_AMD64 */
4764}
4765#endif
4766
4767
4768/**
4769 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
4770 * returning the rest.
4771 *
4772 * @returns u64 % u32.
4773 *
4774 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4775 */
4776#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4777DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
4778#else
4779DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
4780{
4781# ifdef RT_ARCH_AMD64
4782 return (uint32_t)(u64 % u32);
4783# else /* !RT_ARCH_AMD64 */
4784# if RT_INLINE_ASM_GNU_STYLE
4785 RTCCUINTREG uDummy;
4786 __asm__ __volatile__("divl %3"
4787 : "=a" (uDummy), "=d"(u32)
4788 : "A" (u64), "r" (u32));
4789# else
4790 __asm
4791 {
4792 mov eax, dword ptr [u64]
4793 mov edx, dword ptr [u64 + 4]
4794 mov ecx, [u32]
4795 div ecx
4796 mov [u32], edx
4797 }
4798# endif
4799 return u32;
4800# endif /* !RT_ARCH_AMD64 */
4801}
4802#endif
4803
4804
4805/**
4806 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
4807 * returning the rest.
4808 *
4809 * @returns u64 % u32.
4810 *
4811 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4812 */
4813#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4814DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
4815#else
4816DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
4817{
4818# ifdef RT_ARCH_AMD64
4819 return (int32_t)(i64 % i32);
4820# else /* !RT_ARCH_AMD64 */
4821# if RT_INLINE_ASM_GNU_STYLE
4822 RTCCUINTREG iDummy;
4823 __asm__ __volatile__("idivl %3"
4824 : "=a" (iDummy), "=d"(i32)
4825 : "A" (i64), "r" (i32));
4826# else
4827 __asm
4828 {
4829 mov eax, dword ptr [i64]
4830 mov edx, dword ptr [i64 + 4]
4831 mov ecx, [i32]
4832 idiv ecx
4833 mov [i32], edx
4834 }
4835# endif
4836 return i32;
4837# endif /* !RT_ARCH_AMD64 */
4838}
4839#endif
4840
4841
4842/**
4843 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4844 * using a 96 bit intermediate result.
4845 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4846 * __udivdi3 and __umoddi3 even if this inline function is not used.
4847 *
4848 * @returns (u64A * u32B) / u32C.
4849 * @param u64A The 64-bit value.
4850 * @param u32B The 32-bit value to multiple by A.
4851 * @param u32C The 32-bit value to divide A*B by.
4852 */
4853#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4854DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4855#else
4856DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4857{
4858# if RT_INLINE_ASM_GNU_STYLE
4859# ifdef RT_ARCH_AMD64
4860 uint64_t u64Result, u64Spill;
4861 __asm__ __volatile__("mulq %2\n\t"
4862 "divq %3\n\t"
4863 : "=a" (u64Result),
4864 "=d" (u64Spill)
4865 : "r" ((uint64_t)u32B),
4866 "r" ((uint64_t)u32C),
4867 "0" (u64A),
4868 "1" (0));
4869 return u64Result;
4870# else
4871 uint32_t u32Dummy;
4872 uint64_t u64Result;
4873 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4874 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4875 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4876 eax = u64A.hi */
4877 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4878 edx = u32C */
4879 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4880 edx = u32B */
4881 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4882 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4883 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4884 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4885 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4886 edx = u64Hi % u32C */
4887 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4888 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4889 "divl %%ecx \n\t" /* u64Result.lo */
4890 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4891 : "=A"(u64Result), "=c"(u32Dummy),
4892 "=S"(u32Dummy), "=D"(u32Dummy)
4893 : "a"((uint32_t)u64A),
4894 "S"((uint32_t)(u64A >> 32)),
4895 "c"(u32B),
4896 "D"(u32C));
4897 return u64Result;
4898# endif
4899# else
4900 RTUINT64U u;
4901 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4902 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4903 u64Hi += (u64Lo >> 32);
4904 u.s.Hi = (uint32_t)(u64Hi / u32C);
4905 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4906 return u.u;
4907# endif
4908}
4909#endif
4910
4911
4912/**
4913 * Probes a byte pointer for read access.
4914 *
4915 * While the function will not fault if the byte is not read accessible,
4916 * the idea is to do this in a safe place like before acquiring locks
4917 * and such like.
4918 *
4919 * Also, this functions guarantees that an eager compiler is not going
4920 * to optimize the probing away.
4921 *
4922 * @param pvByte Pointer to the byte.
4923 */
4924#if RT_INLINE_ASM_EXTERNAL
4925DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4926#else
4927DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4928{
4929 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4930 uint8_t u8;
4931# if RT_INLINE_ASM_GNU_STYLE
4932 __asm__ __volatile__("movb (%1), %0\n\t"
4933 : "=r" (u8)
4934 : "r" (pvByte));
4935# else
4936 __asm
4937 {
4938# ifdef RT_ARCH_AMD64
4939 mov rax, [pvByte]
4940 mov al, [rax]
4941# else
4942 mov eax, [pvByte]
4943 mov al, [eax]
4944# endif
4945 mov [u8], al
4946 }
4947# endif
4948 return u8;
4949}
4950#endif
4951
4952/**
4953 * Probes a buffer for read access page by page.
4954 *
4955 * While the function will fault if the buffer is not fully read
4956 * accessible, the idea is to do this in a safe place like before
4957 * acquiring locks and such like.
4958 *
4959 * Also, this functions guarantees that an eager compiler is not going
4960 * to optimize the probing away.
4961 *
4962 * @param pvBuf Pointer to the buffer.
4963 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4964 */
4965DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4966{
4967 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4968 /* the first byte */
4969 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4970 ASMProbeReadByte(pu8);
4971
4972 /* the pages in between pages. */
4973 while (cbBuf > /*PAGE_SIZE*/0x1000)
4974 {
4975 ASMProbeReadByte(pu8);
4976 cbBuf -= /*PAGE_SIZE*/0x1000;
4977 pu8 += /*PAGE_SIZE*/0x1000;
4978 }
4979
4980 /* the last byte */
4981 ASMProbeReadByte(pu8 + cbBuf - 1);
4982}
4983
4984
4985/** @def ASMBreakpoint
4986 * Debugger Breakpoint.
4987 * @remark In the gnu world we add a nop instruction after the int3 to
4988 * force gdb to remain at the int3 source line.
4989 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4990 * @internal
4991 */
4992#if RT_INLINE_ASM_GNU_STYLE
4993# ifndef __L4ENV__
4994# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4995# else
4996# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4997# endif
4998#else
4999# define ASMBreakpoint() __debugbreak()
5000#endif
5001
5002
5003
5004/** @defgroup grp_inline_bits Bit Operations
5005 * @{
5006 */
5007
5008
5009/**
5010 * Sets a bit in a bitmap.
5011 *
5012 * @param pvBitmap Pointer to the bitmap.
5013 * @param iBit The bit to set.
5014 */
5015#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5016DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5017#else
5018DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5019{
5020# if RT_INLINE_ASM_USES_INTRIN
5021 _bittestandset((long *)pvBitmap, iBit);
5022
5023# elif RT_INLINE_ASM_GNU_STYLE
5024 __asm__ __volatile__ ("btsl %1, %0"
5025 : "=m" (*(volatile long *)pvBitmap)
5026 : "Ir" (iBit),
5027 "m" (*(volatile long *)pvBitmap)
5028 : "memory");
5029# else
5030 __asm
5031 {
5032# ifdef RT_ARCH_AMD64
5033 mov rax, [pvBitmap]
5034 mov edx, [iBit]
5035 bts [rax], edx
5036# else
5037 mov eax, [pvBitmap]
5038 mov edx, [iBit]
5039 bts [eax], edx
5040# endif
5041 }
5042# endif
5043}
5044#endif
5045
5046
5047/**
5048 * Atomically sets a bit in a bitmap, ordered.
5049 *
5050 * @param pvBitmap Pointer to the bitmap.
5051 * @param iBit The bit to set.
5052 */
5053#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5054DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5055#else
5056DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5057{
5058# if RT_INLINE_ASM_USES_INTRIN
5059 _interlockedbittestandset((long *)pvBitmap, iBit);
5060# elif RT_INLINE_ASM_GNU_STYLE
5061 __asm__ __volatile__ ("lock; btsl %1, %0"
5062 : "=m" (*(volatile long *)pvBitmap)
5063 : "Ir" (iBit),
5064 "m" (*(volatile long *)pvBitmap)
5065 : "memory");
5066# else
5067 __asm
5068 {
5069# ifdef RT_ARCH_AMD64
5070 mov rax, [pvBitmap]
5071 mov edx, [iBit]
5072 lock bts [rax], edx
5073# else
5074 mov eax, [pvBitmap]
5075 mov edx, [iBit]
5076 lock bts [eax], edx
5077# endif
5078 }
5079# endif
5080}
5081#endif
5082
5083
5084/**
5085 * Clears a bit in a bitmap.
5086 *
5087 * @param pvBitmap Pointer to the bitmap.
5088 * @param iBit The bit to clear.
5089 */
5090#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5091DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5092#else
5093DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5094{
5095# if RT_INLINE_ASM_USES_INTRIN
5096 _bittestandreset((long *)pvBitmap, iBit);
5097
5098# elif RT_INLINE_ASM_GNU_STYLE
5099 __asm__ __volatile__ ("btrl %1, %0"
5100 : "=m" (*(volatile long *)pvBitmap)
5101 : "Ir" (iBit),
5102 "m" (*(volatile long *)pvBitmap)
5103 : "memory");
5104# else
5105 __asm
5106 {
5107# ifdef RT_ARCH_AMD64
5108 mov rax, [pvBitmap]
5109 mov edx, [iBit]
5110 btr [rax], edx
5111# else
5112 mov eax, [pvBitmap]
5113 mov edx, [iBit]
5114 btr [eax], edx
5115# endif
5116 }
5117# endif
5118}
5119#endif
5120
5121
5122/**
5123 * Atomically clears a bit in a bitmap, ordered.
5124 *
5125 * @param pvBitmap Pointer to the bitmap.
5126 * @param iBit The bit to toggle set.
5127 * @remark No memory barrier, take care on smp.
5128 */
5129#if RT_INLINE_ASM_EXTERNAL
5130DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5131#else
5132DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5133{
5134# if RT_INLINE_ASM_GNU_STYLE
5135 __asm__ __volatile__ ("lock; btrl %1, %0"
5136 : "=m" (*(volatile long *)pvBitmap)
5137 : "Ir" (iBit),
5138 "m" (*(volatile long *)pvBitmap)
5139 : "memory");
5140# else
5141 __asm
5142 {
5143# ifdef RT_ARCH_AMD64
5144 mov rax, [pvBitmap]
5145 mov edx, [iBit]
5146 lock btr [rax], edx
5147# else
5148 mov eax, [pvBitmap]
5149 mov edx, [iBit]
5150 lock btr [eax], edx
5151# endif
5152 }
5153# endif
5154}
5155#endif
5156
5157
5158/**
5159 * Toggles a bit in a bitmap.
5160 *
5161 * @param pvBitmap Pointer to the bitmap.
5162 * @param iBit The bit to toggle.
5163 */
5164#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5165DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5166#else
5167DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5168{
5169# if RT_INLINE_ASM_USES_INTRIN
5170 _bittestandcomplement((long *)pvBitmap, iBit);
5171# elif RT_INLINE_ASM_GNU_STYLE
5172 __asm__ __volatile__ ("btcl %1, %0"
5173 : "=m" (*(volatile long *)pvBitmap)
5174 : "Ir" (iBit),
5175 "m" (*(volatile long *)pvBitmap)
5176 : "memory");
5177# else
5178 __asm
5179 {
5180# ifdef RT_ARCH_AMD64
5181 mov rax, [pvBitmap]
5182 mov edx, [iBit]
5183 btc [rax], edx
5184# else
5185 mov eax, [pvBitmap]
5186 mov edx, [iBit]
5187 btc [eax], edx
5188# endif
5189 }
5190# endif
5191}
5192#endif
5193
5194
5195/**
5196 * Atomically toggles a bit in a bitmap, ordered.
5197 *
5198 * @param pvBitmap Pointer to the bitmap.
5199 * @param iBit The bit to test and set.
5200 */
5201#if RT_INLINE_ASM_EXTERNAL
5202DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5203#else
5204DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5205{
5206# if RT_INLINE_ASM_GNU_STYLE
5207 __asm__ __volatile__ ("lock; btcl %1, %0"
5208 : "=m" (*(volatile long *)pvBitmap)
5209 : "Ir" (iBit),
5210 "m" (*(volatile long *)pvBitmap)
5211 : "memory");
5212# else
5213 __asm
5214 {
5215# ifdef RT_ARCH_AMD64
5216 mov rax, [pvBitmap]
5217 mov edx, [iBit]
5218 lock btc [rax], edx
5219# else
5220 mov eax, [pvBitmap]
5221 mov edx, [iBit]
5222 lock btc [eax], edx
5223# endif
5224 }
5225# endif
5226}
5227#endif
5228
5229
5230/**
5231 * Tests and sets a bit in a bitmap.
5232 *
5233 * @returns true if the bit was set.
5234 * @returns false if the bit was clear.
5235 * @param pvBitmap Pointer to the bitmap.
5236 * @param iBit The bit to test and set.
5237 */
5238#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5239DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5240#else
5241DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5242{
5243 union { bool f; uint32_t u32; uint8_t u8; } rc;
5244# if RT_INLINE_ASM_USES_INTRIN
5245 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5246
5247# elif RT_INLINE_ASM_GNU_STYLE
5248 __asm__ __volatile__ ("btsl %2, %1\n\t"
5249 "setc %b0\n\t"
5250 "andl $1, %0\n\t"
5251 : "=q" (rc.u32),
5252 "=m" (*(volatile long *)pvBitmap)
5253 : "Ir" (iBit),
5254 "m" (*(volatile long *)pvBitmap)
5255 : "memory");
5256# else
5257 __asm
5258 {
5259 mov edx, [iBit]
5260# ifdef RT_ARCH_AMD64
5261 mov rax, [pvBitmap]
5262 bts [rax], edx
5263# else
5264 mov eax, [pvBitmap]
5265 bts [eax], edx
5266# endif
5267 setc al
5268 and eax, 1
5269 mov [rc.u32], eax
5270 }
5271# endif
5272 return rc.f;
5273}
5274#endif
5275
5276
5277/**
5278 * Atomically tests and sets a bit in a bitmap, ordered.
5279 *
5280 * @returns true if the bit was set.
5281 * @returns false if the bit was clear.
5282 * @param pvBitmap Pointer to the bitmap.
5283 * @param iBit The bit to set.
5284 */
5285#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5286DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5287#else
5288DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5289{
5290 union { bool f; uint32_t u32; uint8_t u8; } rc;
5291# if RT_INLINE_ASM_USES_INTRIN
5292 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5293# elif RT_INLINE_ASM_GNU_STYLE
5294 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
5295 "setc %b0\n\t"
5296 "andl $1, %0\n\t"
5297 : "=q" (rc.u32),
5298 "=m" (*(volatile long *)pvBitmap)
5299 : "Ir" (iBit),
5300 "m" (*(volatile long *)pvBitmap)
5301 : "memory");
5302# else
5303 __asm
5304 {
5305 mov edx, [iBit]
5306# ifdef RT_ARCH_AMD64
5307 mov rax, [pvBitmap]
5308 lock bts [rax], edx
5309# else
5310 mov eax, [pvBitmap]
5311 lock bts [eax], edx
5312# endif
5313 setc al
5314 and eax, 1
5315 mov [rc.u32], eax
5316 }
5317# endif
5318 return rc.f;
5319}
5320#endif
5321
5322
5323/**
5324 * Tests and clears a bit in a bitmap.
5325 *
5326 * @returns true if the bit was set.
5327 * @returns false if the bit was clear.
5328 * @param pvBitmap Pointer to the bitmap.
5329 * @param iBit The bit to test and clear.
5330 */
5331#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5332DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5333#else
5334DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5335{
5336 union { bool f; uint32_t u32; uint8_t u8; } rc;
5337# if RT_INLINE_ASM_USES_INTRIN
5338 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5339
5340# elif RT_INLINE_ASM_GNU_STYLE
5341 __asm__ __volatile__ ("btrl %2, %1\n\t"
5342 "setc %b0\n\t"
5343 "andl $1, %0\n\t"
5344 : "=q" (rc.u32),
5345 "=m" (*(volatile long *)pvBitmap)
5346 : "Ir" (iBit),
5347 "m" (*(volatile long *)pvBitmap)
5348 : "memory");
5349# else
5350 __asm
5351 {
5352 mov edx, [iBit]
5353# ifdef RT_ARCH_AMD64
5354 mov rax, [pvBitmap]
5355 btr [rax], edx
5356# else
5357 mov eax, [pvBitmap]
5358 btr [eax], edx
5359# endif
5360 setc al
5361 and eax, 1
5362 mov [rc.u32], eax
5363 }
5364# endif
5365 return rc.f;
5366}
5367#endif
5368
5369
5370/**
5371 * Atomically tests and clears a bit in a bitmap, ordered.
5372 *
5373 * @returns true if the bit was set.
5374 * @returns false if the bit was clear.
5375 * @param pvBitmap Pointer to the bitmap.
5376 * @param iBit The bit to test and clear.
5377 * @remark No memory barrier, take care on smp.
5378 */
5379#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5380DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5381#else
5382DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5383{
5384 union { bool f; uint32_t u32; uint8_t u8; } rc;
5385# if RT_INLINE_ASM_USES_INTRIN
5386 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5387
5388# elif RT_INLINE_ASM_GNU_STYLE
5389 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
5390 "setc %b0\n\t"
5391 "andl $1, %0\n\t"
5392 : "=q" (rc.u32),
5393 "=m" (*(volatile long *)pvBitmap)
5394 : "Ir" (iBit),
5395 "m" (*(volatile long *)pvBitmap)
5396 : "memory");
5397# else
5398 __asm
5399 {
5400 mov edx, [iBit]
5401# ifdef RT_ARCH_AMD64
5402 mov rax, [pvBitmap]
5403 lock btr [rax], edx
5404# else
5405 mov eax, [pvBitmap]
5406 lock btr [eax], edx
5407# endif
5408 setc al
5409 and eax, 1
5410 mov [rc.u32], eax
5411 }
5412# endif
5413 return rc.f;
5414}
5415#endif
5416
5417
5418/**
5419 * Tests and toggles a bit in a bitmap.
5420 *
5421 * @returns true if the bit was set.
5422 * @returns false if the bit was clear.
5423 * @param pvBitmap Pointer to the bitmap.
5424 * @param iBit The bit to test and toggle.
5425 */
5426#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5427DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5428#else
5429DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5430{
5431 union { bool f; uint32_t u32; uint8_t u8; } rc;
5432# if RT_INLINE_ASM_USES_INTRIN
5433 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5434
5435# elif RT_INLINE_ASM_GNU_STYLE
5436 __asm__ __volatile__ ("btcl %2, %1\n\t"
5437 "setc %b0\n\t"
5438 "andl $1, %0\n\t"
5439 : "=q" (rc.u32),
5440 "=m" (*(volatile long *)pvBitmap)
5441 : "Ir" (iBit),
5442 "m" (*(volatile long *)pvBitmap)
5443 : "memory");
5444# else
5445 __asm
5446 {
5447 mov edx, [iBit]
5448# ifdef RT_ARCH_AMD64
5449 mov rax, [pvBitmap]
5450 btc [rax], edx
5451# else
5452 mov eax, [pvBitmap]
5453 btc [eax], edx
5454# endif
5455 setc al
5456 and eax, 1
5457 mov [rc.u32], eax
5458 }
5459# endif
5460 return rc.f;
5461}
5462#endif
5463
5464
5465/**
5466 * Atomically tests and toggles a bit in a bitmap, ordered.
5467 *
5468 * @returns true if the bit was set.
5469 * @returns false if the bit was clear.
5470 * @param pvBitmap Pointer to the bitmap.
5471 * @param iBit The bit to test and toggle.
5472 */
5473#if RT_INLINE_ASM_EXTERNAL
5474DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5475#else
5476DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5477{
5478 union { bool f; uint32_t u32; uint8_t u8; } rc;
5479# if RT_INLINE_ASM_GNU_STYLE
5480 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
5481 "setc %b0\n\t"
5482 "andl $1, %0\n\t"
5483 : "=q" (rc.u32),
5484 "=m" (*(volatile long *)pvBitmap)
5485 : "Ir" (iBit),
5486 "m" (*(volatile long *)pvBitmap)
5487 : "memory");
5488# else
5489 __asm
5490 {
5491 mov edx, [iBit]
5492# ifdef RT_ARCH_AMD64
5493 mov rax, [pvBitmap]
5494 lock btc [rax], edx
5495# else
5496 mov eax, [pvBitmap]
5497 lock btc [eax], edx
5498# endif
5499 setc al
5500 and eax, 1
5501 mov [rc.u32], eax
5502 }
5503# endif
5504 return rc.f;
5505}
5506#endif
5507
5508
5509/**
5510 * Tests if a bit in a bitmap is set.
5511 *
5512 * @returns true if the bit is set.
5513 * @returns false if the bit is clear.
5514 * @param pvBitmap Pointer to the bitmap.
5515 * @param iBit The bit to test.
5516 */
5517#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5518DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5519#else
5520DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5521{
5522 union { bool f; uint32_t u32; uint8_t u8; } rc;
5523# if RT_INLINE_ASM_USES_INTRIN
5524 rc.u32 = _bittest((long *)pvBitmap, iBit);
5525# elif RT_INLINE_ASM_GNU_STYLE
5526
5527 __asm__ __volatile__ ("btl %2, %1\n\t"
5528 "setc %b0\n\t"
5529 "andl $1, %0\n\t"
5530 : "=q" (rc.u32)
5531 : "m" (*(const volatile long *)pvBitmap),
5532 "Ir" (iBit)
5533 : "memory");
5534# else
5535 __asm
5536 {
5537 mov edx, [iBit]
5538# ifdef RT_ARCH_AMD64
5539 mov rax, [pvBitmap]
5540 bt [rax], edx
5541# else
5542 mov eax, [pvBitmap]
5543 bt [eax], edx
5544# endif
5545 setc al
5546 and eax, 1
5547 mov [rc.u32], eax
5548 }
5549# endif
5550 return rc.f;
5551}
5552#endif
5553
5554
5555/**
5556 * Clears a bit range within a bitmap.
5557 *
5558 * @param pvBitmap Pointer to the bitmap.
5559 * @param iBitStart The First bit to clear.
5560 * @param iBitEnd The first bit not to clear.
5561 */
5562DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5563{
5564 if (iBitStart < iBitEnd)
5565 {
5566 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5567 int iStart = iBitStart & ~31;
5568 int iEnd = iBitEnd & ~31;
5569 if (iStart == iEnd)
5570 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5571 else
5572 {
5573 /* bits in first dword. */
5574 if (iBitStart & 31)
5575 {
5576 *pu32 &= (1 << (iBitStart & 31)) - 1;
5577 pu32++;
5578 iBitStart = iStart + 32;
5579 }
5580
5581 /* whole dword. */
5582 if (iBitStart != iEnd)
5583 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5584
5585 /* bits in last dword. */
5586 if (iBitEnd & 31)
5587 {
5588 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5589 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5590 }
5591 }
5592 }
5593}
5594
5595
5596/**
5597 * Sets a bit range within a bitmap.
5598 *
5599 * @param pvBitmap Pointer to the bitmap.
5600 * @param iBitStart The First bit to set.
5601 * @param iBitEnd The first bit not to set.
5602 */
5603DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5604{
5605 if (iBitStart < iBitEnd)
5606 {
5607 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5608 int iStart = iBitStart & ~31;
5609 int iEnd = iBitEnd & ~31;
5610 if (iStart == iEnd)
5611 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5612 else
5613 {
5614 /* bits in first dword. */
5615 if (iBitStart & 31)
5616 {
5617 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5618 pu32++;
5619 iBitStart = iStart + 32;
5620 }
5621
5622 /* whole dword. */
5623 if (iBitStart != iEnd)
5624 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5625
5626 /* bits in last dword. */
5627 if (iBitEnd & 31)
5628 {
5629 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5630 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5631 }
5632 }
5633 }
5634}
5635
5636
5637/**
5638 * Finds the first clear bit in a bitmap.
5639 *
5640 * @returns Index of the first zero bit.
5641 * @returns -1 if no clear bit was found.
5642 * @param pvBitmap Pointer to the bitmap.
5643 * @param cBits The number of bits in the bitmap. Multiple of 32.
5644 */
5645#if RT_INLINE_ASM_EXTERNAL
5646DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5647#else
5648DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5649{
5650 if (cBits)
5651 {
5652 int32_t iBit;
5653# if RT_INLINE_ASM_GNU_STYLE
5654 RTCCUINTREG uEAX, uECX, uEDI;
5655 cBits = RT_ALIGN_32(cBits, 32);
5656 __asm__ __volatile__("repe; scasl\n\t"
5657 "je 1f\n\t"
5658# ifdef RT_ARCH_AMD64
5659 "lea -4(%%rdi), %%rdi\n\t"
5660 "xorl (%%rdi), %%eax\n\t"
5661 "subq %5, %%rdi\n\t"
5662# else
5663 "lea -4(%%edi), %%edi\n\t"
5664 "xorl (%%edi), %%eax\n\t"
5665 "subl %5, %%edi\n\t"
5666# endif
5667 "shll $3, %%edi\n\t"
5668 "bsfl %%eax, %%edx\n\t"
5669 "addl %%edi, %%edx\n\t"
5670 "1:\t\n"
5671 : "=d" (iBit),
5672 "=&c" (uECX),
5673 "=&D" (uEDI),
5674 "=&a" (uEAX)
5675 : "0" (0xffffffff),
5676 "mr" (pvBitmap),
5677 "1" (cBits >> 5),
5678 "2" (pvBitmap),
5679 "3" (0xffffffff));
5680# else
5681 cBits = RT_ALIGN_32(cBits, 32);
5682 __asm
5683 {
5684# ifdef RT_ARCH_AMD64
5685 mov rdi, [pvBitmap]
5686 mov rbx, rdi
5687# else
5688 mov edi, [pvBitmap]
5689 mov ebx, edi
5690# endif
5691 mov edx, 0ffffffffh
5692 mov eax, edx
5693 mov ecx, [cBits]
5694 shr ecx, 5
5695 repe scasd
5696 je done
5697
5698# ifdef RT_ARCH_AMD64
5699 lea rdi, [rdi - 4]
5700 xor eax, [rdi]
5701 sub rdi, rbx
5702# else
5703 lea edi, [edi - 4]
5704 xor eax, [edi]
5705 sub edi, ebx
5706# endif
5707 shl edi, 3
5708 bsf edx, eax
5709 add edx, edi
5710 done:
5711 mov [iBit], edx
5712 }
5713# endif
5714 return iBit;
5715 }
5716 return -1;
5717}
5718#endif
5719
5720
5721/**
5722 * Finds the next clear bit in a bitmap.
5723 *
5724 * @returns Index of the first zero bit.
5725 * @returns -1 if no clear bit was found.
5726 * @param pvBitmap Pointer to the bitmap.
5727 * @param cBits The number of bits in the bitmap. Multiple of 32.
5728 * @param iBitPrev The bit returned from the last search.
5729 * The search will start at iBitPrev + 1.
5730 */
5731#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5732DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5733#else
5734DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5735{
5736 int iBit = ++iBitPrev & 31;
5737 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5738 cBits -= iBitPrev & ~31;
5739 if (iBit)
5740 {
5741 /* inspect the first dword. */
5742 uint32_t u32 = (~*(const volatile uint32_t *)pvBitmap) >> iBit;
5743# if RT_INLINE_ASM_USES_INTRIN
5744 unsigned long ulBit = 0;
5745 if (_BitScanForward(&ulBit, u32))
5746 return ulBit + iBitPrev;
5747 iBit = -1;
5748# else
5749# if RT_INLINE_ASM_GNU_STYLE
5750 __asm__ __volatile__("bsf %1, %0\n\t"
5751 "jnz 1f\n\t"
5752 "movl $-1, %0\n\t"
5753 "1:\n\t"
5754 : "=r" (iBit)
5755 : "r" (u32));
5756# else
5757 __asm
5758 {
5759 mov edx, [u32]
5760 bsf eax, edx
5761 jnz done
5762 mov eax, 0ffffffffh
5763 done:
5764 mov [iBit], eax
5765 }
5766# endif
5767 if (iBit >= 0)
5768 return iBit + iBitPrev;
5769# endif
5770 /* Search the rest of the bitmap, if there is anything. */
5771 if (cBits > 32)
5772 {
5773 iBit = ASMBitFirstClear((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5774 if (iBit >= 0)
5775 return iBit + (iBitPrev & ~31) + 32;
5776 }
5777 }
5778 else
5779 {
5780 /* Search the rest of the bitmap. */
5781 iBit = ASMBitFirstClear(pvBitmap, cBits);
5782 if (iBit >= 0)
5783 return iBit + (iBitPrev & ~31);
5784 }
5785 return iBit;
5786}
5787#endif
5788
5789
5790/**
5791 * Finds the first set bit in a bitmap.
5792 *
5793 * @returns Index of the first set bit.
5794 * @returns -1 if no clear bit was found.
5795 * @param pvBitmap Pointer to the bitmap.
5796 * @param cBits The number of bits in the bitmap. Multiple of 32.
5797 */
5798#if RT_INLINE_ASM_EXTERNAL
5799DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
5800#else
5801DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
5802{
5803 if (cBits)
5804 {
5805 int32_t iBit;
5806# if RT_INLINE_ASM_GNU_STYLE
5807 RTCCUINTREG uEAX, uECX, uEDI;
5808 cBits = RT_ALIGN_32(cBits, 32);
5809 __asm__ __volatile__("repe; scasl\n\t"
5810 "je 1f\n\t"
5811# ifdef RT_ARCH_AMD64
5812 "lea -4(%%rdi), %%rdi\n\t"
5813 "movl (%%rdi), %%eax\n\t"
5814 "subq %5, %%rdi\n\t"
5815# else
5816 "lea -4(%%edi), %%edi\n\t"
5817 "movl (%%edi), %%eax\n\t"
5818 "subl %5, %%edi\n\t"
5819# endif
5820 "shll $3, %%edi\n\t"
5821 "bsfl %%eax, %%edx\n\t"
5822 "addl %%edi, %%edx\n\t"
5823 "1:\t\n"
5824 : "=d" (iBit),
5825 "=&c" (uECX),
5826 "=&D" (uEDI),
5827 "=&a" (uEAX)
5828 : "0" (0xffffffff),
5829 "mr" (pvBitmap),
5830 "1" (cBits >> 5),
5831 "2" (pvBitmap),
5832 "3" (0));
5833# else
5834 cBits = RT_ALIGN_32(cBits, 32);
5835 __asm
5836 {
5837# ifdef RT_ARCH_AMD64
5838 mov rdi, [pvBitmap]
5839 mov rbx, rdi
5840# else
5841 mov edi, [pvBitmap]
5842 mov ebx, edi
5843# endif
5844 mov edx, 0ffffffffh
5845 xor eax, eax
5846 mov ecx, [cBits]
5847 shr ecx, 5
5848 repe scasd
5849 je done
5850# ifdef RT_ARCH_AMD64
5851 lea rdi, [rdi - 4]
5852 mov eax, [rdi]
5853 sub rdi, rbx
5854# else
5855 lea edi, [edi - 4]
5856 mov eax, [edi]
5857 sub edi, ebx
5858# endif
5859 shl edi, 3
5860 bsf edx, eax
5861 add edx, edi
5862 done:
5863 mov [iBit], edx
5864 }
5865# endif
5866 return iBit;
5867 }
5868 return -1;
5869}
5870#endif
5871
5872
5873/**
5874 * Finds the next set bit in a bitmap.
5875 *
5876 * @returns Index of the next set bit.
5877 * @returns -1 if no set bit was found.
5878 * @param pvBitmap Pointer to the bitmap.
5879 * @param cBits The number of bits in the bitmap. Multiple of 32.
5880 * @param iBitPrev The bit returned from the last search.
5881 * The search will start at iBitPrev + 1.
5882 */
5883#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5884DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5885#else
5886DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5887{
5888 int iBit = ++iBitPrev & 31;
5889 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5890 cBits -= iBitPrev & ~31;
5891 if (iBit)
5892 {
5893 /* inspect the first dword. */
5894 uint32_t u32 = *(const volatile uint32_t *)pvBitmap >> iBit;
5895# if RT_INLINE_ASM_USES_INTRIN
5896 unsigned long ulBit = 0;
5897 if (_BitScanForward(&ulBit, u32))
5898 return ulBit + iBitPrev;
5899 iBit = -1;
5900# else
5901# if RT_INLINE_ASM_GNU_STYLE
5902 __asm__ __volatile__("bsf %1, %0\n\t"
5903 "jnz 1f\n\t"
5904 "movl $-1, %0\n\t"
5905 "1:\n\t"
5906 : "=r" (iBit)
5907 : "r" (u32));
5908# else
5909 __asm
5910 {
5911 mov edx, u32
5912 bsf eax, edx
5913 jnz done
5914 mov eax, 0ffffffffh
5915 done:
5916 mov [iBit], eax
5917 }
5918# endif
5919 if (iBit >= 0)
5920 return iBit + iBitPrev;
5921# endif
5922 /* Search the rest of the bitmap, if there is anything. */
5923 if (cBits > 32)
5924 {
5925 iBit = ASMBitFirstSet((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5926 if (iBit >= 0)
5927 return iBit + (iBitPrev & ~31) + 32;
5928 }
5929
5930 }
5931 else
5932 {
5933 /* Search the rest of the bitmap. */
5934 iBit = ASMBitFirstSet(pvBitmap, cBits);
5935 if (iBit >= 0)
5936 return iBit + (iBitPrev & ~31);
5937 }
5938 return iBit;
5939}
5940#endif
5941
5942
5943/**
5944 * Finds the first bit which is set in the given 32-bit integer.
5945 * Bits are numbered from 1 (least significant) to 32.
5946 *
5947 * @returns index [1..32] of the first set bit.
5948 * @returns 0 if all bits are cleared.
5949 * @param u32 Integer to search for set bits.
5950 * @remark Similar to ffs() in BSD.
5951 */
5952DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5953{
5954# if RT_INLINE_ASM_USES_INTRIN
5955 unsigned long iBit;
5956 if (_BitScanForward(&iBit, u32))
5957 iBit++;
5958 else
5959 iBit = 0;
5960# elif RT_INLINE_ASM_GNU_STYLE
5961 uint32_t iBit;
5962 __asm__ __volatile__("bsf %1, %0\n\t"
5963 "jnz 1f\n\t"
5964 "xorl %0, %0\n\t"
5965 "jmp 2f\n"
5966 "1:\n\t"
5967 "incl %0\n"
5968 "2:\n\t"
5969 : "=r" (iBit)
5970 : "rm" (u32));
5971# else
5972 uint32_t iBit;
5973 _asm
5974 {
5975 bsf eax, [u32]
5976 jnz found
5977 xor eax, eax
5978 jmp done
5979 found:
5980 inc eax
5981 done:
5982 mov [iBit], eax
5983 }
5984# endif
5985 return iBit;
5986}
5987
5988
5989/**
5990 * Finds the first bit which is set in the given 32-bit integer.
5991 * Bits are numbered from 1 (least significant) to 32.
5992 *
5993 * @returns index [1..32] of the first set bit.
5994 * @returns 0 if all bits are cleared.
5995 * @param i32 Integer to search for set bits.
5996 * @remark Similar to ffs() in BSD.
5997 */
5998DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5999{
6000 return ASMBitFirstSetU32((uint32_t)i32);
6001}
6002
6003
6004/**
6005 * Finds the last bit which is set in the given 32-bit integer.
6006 * Bits are numbered from 1 (least significant) to 32.
6007 *
6008 * @returns index [1..32] of the last set bit.
6009 * @returns 0 if all bits are cleared.
6010 * @param u32 Integer to search for set bits.
6011 * @remark Similar to fls() in BSD.
6012 */
6013DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6014{
6015# if RT_INLINE_ASM_USES_INTRIN
6016 unsigned long iBit;
6017 if (_BitScanReverse(&iBit, u32))
6018 iBit++;
6019 else
6020 iBit = 0;
6021# elif RT_INLINE_ASM_GNU_STYLE
6022 uint32_t iBit;
6023 __asm__ __volatile__("bsrl %1, %0\n\t"
6024 "jnz 1f\n\t"
6025 "xorl %0, %0\n\t"
6026 "jmp 2f\n"
6027 "1:\n\t"
6028 "incl %0\n"
6029 "2:\n\t"
6030 : "=r" (iBit)
6031 : "rm" (u32));
6032# else
6033 uint32_t iBit;
6034 _asm
6035 {
6036 bsr eax, [u32]
6037 jnz found
6038 xor eax, eax
6039 jmp done
6040 found:
6041 inc eax
6042 done:
6043 mov [iBit], eax
6044 }
6045# endif
6046 return iBit;
6047}
6048
6049
6050/**
6051 * Finds the last bit which is set in the given 32-bit integer.
6052 * Bits are numbered from 1 (least significant) to 32.
6053 *
6054 * @returns index [1..32] of the last set bit.
6055 * @returns 0 if all bits are cleared.
6056 * @param i32 Integer to search for set bits.
6057 * @remark Similar to fls() in BSD.
6058 */
6059DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6060{
6061 return ASMBitLastSetS32((uint32_t)i32);
6062}
6063
6064/**
6065 * Reverse the byte order of the given 16-bit integer.
6066 *
6067 * @returns Revert
6068 * @param u16 16-bit integer value.
6069 */
6070DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6071{
6072#if RT_INLINE_ASM_USES_INTRIN
6073 u16 = _byteswap_ushort(u16);
6074#elif RT_INLINE_ASM_GNU_STYLE
6075 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6076#else
6077 _asm
6078 {
6079 mov ax, [u16]
6080 ror ax, 8
6081 mov [u16], ax
6082 }
6083#endif
6084 return u16;
6085}
6086
6087/**
6088 * Reverse the byte order of the given 32-bit integer.
6089 *
6090 * @returns Revert
6091 * @param u32 32-bit integer value.
6092 */
6093DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6094{
6095#if RT_INLINE_ASM_USES_INTRIN
6096 u32 = _byteswap_ulong(u32);
6097#elif RT_INLINE_ASM_GNU_STYLE
6098 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6099#else
6100 _asm
6101 {
6102 mov eax, [u32]
6103 bswap eax
6104 mov [u32], eax
6105 }
6106#endif
6107 return u32;
6108}
6109
6110
6111/**
6112 * Reverse the byte order of the given 64-bit integer.
6113 *
6114 * @returns Revert
6115 * @param u64 64-bit integer value.
6116 */
6117DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6118{
6119#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6120 u64 = _byteswap_uint64(u64);
6121#else
6122 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6123 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6124#endif
6125 return u64;
6126}
6127
6128
6129/** @} */
6130
6131
6132/** @} */
6133#endif
6134
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette