VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 14758

Last change on this file since 14758 was 14246, checked in by vboxsync, 16 years ago

ASMAtomicSubS32/U32.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 157.3 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(__readdr)
72# pragma intrinsic(__writedr)
73# pragma intrinsic(_BitScanForward)
74# pragma intrinsic(_BitScanReverse)
75# pragma intrinsic(_bittest)
76# pragma intrinsic(_bittestandset)
77# pragma intrinsic(_bittestandreset)
78# pragma intrinsic(_bittestandcomplement)
79# pragma intrinsic(_byteswap_ushort)
80# pragma intrinsic(_byteswap_ulong)
81# pragma intrinsic(_interlockedbittestandset)
82# pragma intrinsic(_interlockedbittestandreset)
83# pragma intrinsic(_InterlockedAnd)
84# pragma intrinsic(_InterlockedOr)
85# pragma intrinsic(_InterlockedIncrement)
86# pragma intrinsic(_InterlockedDecrement)
87# pragma intrinsic(_InterlockedExchange)
88# pragma intrinsic(_InterlockedExchangeAdd)
89# pragma intrinsic(_InterlockedCompareExchange)
90# pragma intrinsic(_InterlockedCompareExchange64)
91# ifdef RT_ARCH_AMD64
92# pragma intrinsic(__stosq)
93# pragma intrinsic(__readcr8)
94# pragma intrinsic(__writecr8)
95# pragma intrinsic(_byteswap_uint64)
96# pragma intrinsic(_InterlockedExchange64)
97# endif
98# endif
99#endif
100#ifndef RT_INLINE_ASM_USES_INTRIN
101# define RT_INLINE_ASM_USES_INTRIN 0
102#endif
103
104
105
106/** @defgroup grp_asm ASM - Assembly Routines
107 * @ingroup grp_rt
108 *
109 * @remarks The difference between ordered and unordered atomic operations are that
110 * the former will complete outstanding reads and writes before continuing
111 * while the latter doesn't make any promisses about the order. Ordered
112 * operations doesn't, it seems, make any 100% promise wrt to whether
113 * the operation will complete before any subsequent memory access.
114 * (please, correct if wrong.)
115 *
116 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
117 * are unordered (note the Uo).
118 *
119 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
120 * or even optimize assembler instructions away. For instance, in the following code
121 * the second rdmsr instruction is optimized away because gcc treats that instruction
122 * as deterministic:
123 *
124 * @code
125 * static inline uint64_t rdmsr_low(int idx)
126 * {
127 * uint32_t low;
128 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
129 * }
130 * ...
131 * uint32_t msr1 = rdmsr_low(1);
132 * foo(msr1);
133 * msr1 = rdmsr_low(1);
134 * bar(msr1);
135 * @endcode
136 *
137 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
138 * use the result of the first call as input parameter for bar() as well. For rdmsr this
139 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
140 * machine status information in general.
141 *
142 * @{
143 */
144
145/** @def RT_INLINE_ASM_EXTERNAL
146 * Defined as 1 if the compiler does not support inline assembly.
147 * The ASM* functions will then be implemented in an external .asm file.
148 *
149 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
150 * inline assembly in their AMD64 compiler.
151 */
152#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
153# define RT_INLINE_ASM_EXTERNAL 1
154#else
155# define RT_INLINE_ASM_EXTERNAL 0
156#endif
157
158/** @def RT_INLINE_ASM_GNU_STYLE
159 * Defined as 1 if the compiler understands GNU style inline assembly.
160 */
161#if defined(_MSC_VER)
162# define RT_INLINE_ASM_GNU_STYLE 0
163#else
164# define RT_INLINE_ASM_GNU_STYLE 1
165#endif
166
167
168/** @todo find a more proper place for this structure? */
169#pragma pack(1)
170/** IDTR */
171typedef struct RTIDTR
172{
173 /** Size of the IDT. */
174 uint16_t cbIdt;
175 /** Address of the IDT. */
176 uintptr_t pIdt;
177} RTIDTR, *PRTIDTR;
178#pragma pack()
179
180#pragma pack(1)
181/** GDTR */
182typedef struct RTGDTR
183{
184 /** Size of the GDT. */
185 uint16_t cbGdt;
186 /** Address of the GDT. */
187 uintptr_t pGdt;
188} RTGDTR, *PRTGDTR;
189#pragma pack()
190
191
192/** @def ASMReturnAddress
193 * Gets the return address of the current (or calling if you like) function or method.
194 */
195#ifdef _MSC_VER
196# ifdef __cplusplus
197extern "C"
198# endif
199void * _ReturnAddress(void);
200# pragma intrinsic(_ReturnAddress)
201# define ASMReturnAddress() _ReturnAddress()
202#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
203# define ASMReturnAddress() __builtin_return_address(0)
204#else
205# error "Unsupported compiler."
206#endif
207
208
209/**
210 * Gets the content of the IDTR CPU register.
211 * @param pIdtr Where to store the IDTR contents.
212 */
213#if RT_INLINE_ASM_EXTERNAL
214DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
215#else
216DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
217{
218# if RT_INLINE_ASM_GNU_STYLE
219 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
220# else
221 __asm
222 {
223# ifdef RT_ARCH_AMD64
224 mov rax, [pIdtr]
225 sidt [rax]
226# else
227 mov eax, [pIdtr]
228 sidt [eax]
229# endif
230 }
231# endif
232}
233#endif
234
235
236/**
237 * Sets the content of the IDTR CPU register.
238 * @param pIdtr Where to load the IDTR contents from
239 */
240#if RT_INLINE_ASM_EXTERNAL
241DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
242#else
243DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
244{
245# if RT_INLINE_ASM_GNU_STYLE
246 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
247# else
248 __asm
249 {
250# ifdef RT_ARCH_AMD64
251 mov rax, [pIdtr]
252 lidt [rax]
253# else
254 mov eax, [pIdtr]
255 lidt [eax]
256# endif
257 }
258# endif
259}
260#endif
261
262
263/**
264 * Gets the content of the GDTR CPU register.
265 * @param pGdtr Where to store the GDTR contents.
266 */
267#if RT_INLINE_ASM_EXTERNAL
268DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
269#else
270DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
271{
272# if RT_INLINE_ASM_GNU_STYLE
273 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
274# else
275 __asm
276 {
277# ifdef RT_ARCH_AMD64
278 mov rax, [pGdtr]
279 sgdt [rax]
280# else
281 mov eax, [pGdtr]
282 sgdt [eax]
283# endif
284 }
285# endif
286}
287#endif
288
289/**
290 * Get the cs register.
291 * @returns cs.
292 */
293#if RT_INLINE_ASM_EXTERNAL
294DECLASM(RTSEL) ASMGetCS(void);
295#else
296DECLINLINE(RTSEL) ASMGetCS(void)
297{
298 RTSEL SelCS;
299# if RT_INLINE_ASM_GNU_STYLE
300 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
301# else
302 __asm
303 {
304 mov ax, cs
305 mov [SelCS], ax
306 }
307# endif
308 return SelCS;
309}
310#endif
311
312
313/**
314 * Get the DS register.
315 * @returns DS.
316 */
317#if RT_INLINE_ASM_EXTERNAL
318DECLASM(RTSEL) ASMGetDS(void);
319#else
320DECLINLINE(RTSEL) ASMGetDS(void)
321{
322 RTSEL SelDS;
323# if RT_INLINE_ASM_GNU_STYLE
324 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
325# else
326 __asm
327 {
328 mov ax, ds
329 mov [SelDS], ax
330 }
331# endif
332 return SelDS;
333}
334#endif
335
336
337/**
338 * Get the ES register.
339 * @returns ES.
340 */
341#if RT_INLINE_ASM_EXTERNAL
342DECLASM(RTSEL) ASMGetES(void);
343#else
344DECLINLINE(RTSEL) ASMGetES(void)
345{
346 RTSEL SelES;
347# if RT_INLINE_ASM_GNU_STYLE
348 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
349# else
350 __asm
351 {
352 mov ax, es
353 mov [SelES], ax
354 }
355# endif
356 return SelES;
357}
358#endif
359
360
361/**
362 * Get the FS register.
363 * @returns FS.
364 */
365#if RT_INLINE_ASM_EXTERNAL
366DECLASM(RTSEL) ASMGetFS(void);
367#else
368DECLINLINE(RTSEL) ASMGetFS(void)
369{
370 RTSEL SelFS;
371# if RT_INLINE_ASM_GNU_STYLE
372 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
373# else
374 __asm
375 {
376 mov ax, fs
377 mov [SelFS], ax
378 }
379# endif
380 return SelFS;
381}
382# endif
383
384
385/**
386 * Get the GS register.
387 * @returns GS.
388 */
389#if RT_INLINE_ASM_EXTERNAL
390DECLASM(RTSEL) ASMGetGS(void);
391#else
392DECLINLINE(RTSEL) ASMGetGS(void)
393{
394 RTSEL SelGS;
395# if RT_INLINE_ASM_GNU_STYLE
396 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
397# else
398 __asm
399 {
400 mov ax, gs
401 mov [SelGS], ax
402 }
403# endif
404 return SelGS;
405}
406#endif
407
408
409/**
410 * Get the SS register.
411 * @returns SS.
412 */
413#if RT_INLINE_ASM_EXTERNAL
414DECLASM(RTSEL) ASMGetSS(void);
415#else
416DECLINLINE(RTSEL) ASMGetSS(void)
417{
418 RTSEL SelSS;
419# if RT_INLINE_ASM_GNU_STYLE
420 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
421# else
422 __asm
423 {
424 mov ax, ss
425 mov [SelSS], ax
426 }
427# endif
428 return SelSS;
429}
430#endif
431
432
433/**
434 * Get the TR register.
435 * @returns TR.
436 */
437#if RT_INLINE_ASM_EXTERNAL
438DECLASM(RTSEL) ASMGetTR(void);
439#else
440DECLINLINE(RTSEL) ASMGetTR(void)
441{
442 RTSEL SelTR;
443# if RT_INLINE_ASM_GNU_STYLE
444 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
445# else
446 __asm
447 {
448 str ax
449 mov [SelTR], ax
450 }
451# endif
452 return SelTR;
453}
454#endif
455
456
457/**
458 * Get the [RE]FLAGS register.
459 * @returns [RE]FLAGS.
460 */
461#if RT_INLINE_ASM_EXTERNAL
462DECLASM(RTCCUINTREG) ASMGetFlags(void);
463#else
464DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
465{
466 RTCCUINTREG uFlags;
467# if RT_INLINE_ASM_GNU_STYLE
468# ifdef RT_ARCH_AMD64
469 __asm__ __volatile__("pushfq\n\t"
470 "popq %0\n\t"
471 : "=g" (uFlags));
472# else
473 __asm__ __volatile__("pushfl\n\t"
474 "popl %0\n\t"
475 : "=g" (uFlags));
476# endif
477# else
478 __asm
479 {
480# ifdef RT_ARCH_AMD64
481 pushfq
482 pop [uFlags]
483# else
484 pushfd
485 pop [uFlags]
486# endif
487 }
488# endif
489 return uFlags;
490}
491#endif
492
493
494/**
495 * Set the [RE]FLAGS register.
496 * @param uFlags The new [RE]FLAGS value.
497 */
498#if RT_INLINE_ASM_EXTERNAL
499DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
500#else
501DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
502{
503# if RT_INLINE_ASM_GNU_STYLE
504# ifdef RT_ARCH_AMD64
505 __asm__ __volatile__("pushq %0\n\t"
506 "popfq\n\t"
507 : : "g" (uFlags));
508# else
509 __asm__ __volatile__("pushl %0\n\t"
510 "popfl\n\t"
511 : : "g" (uFlags));
512# endif
513# else
514 __asm
515 {
516# ifdef RT_ARCH_AMD64
517 push [uFlags]
518 popfq
519# else
520 push [uFlags]
521 popfd
522# endif
523 }
524# endif
525}
526#endif
527
528
529/**
530 * Gets the content of the CPU timestamp counter register.
531 *
532 * @returns TSC.
533 */
534#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
535DECLASM(uint64_t) ASMReadTSC(void);
536#else
537DECLINLINE(uint64_t) ASMReadTSC(void)
538{
539 RTUINT64U u;
540# if RT_INLINE_ASM_GNU_STYLE
541 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
542# else
543# if RT_INLINE_ASM_USES_INTRIN
544 u.u = __rdtsc();
545# else
546 __asm
547 {
548 rdtsc
549 mov [u.s.Lo], eax
550 mov [u.s.Hi], edx
551 }
552# endif
553# endif
554 return u.u;
555}
556#endif
557
558
559/**
560 * Performs the cpuid instruction returning all registers.
561 *
562 * @param uOperator CPUID operation (eax).
563 * @param pvEAX Where to store eax.
564 * @param pvEBX Where to store ebx.
565 * @param pvECX Where to store ecx.
566 * @param pvEDX Where to store edx.
567 * @remark We're using void pointers to ease the use of special bitfield structures and such.
568 */
569#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
570DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
571#else
572DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
573{
574# if RT_INLINE_ASM_GNU_STYLE
575# ifdef RT_ARCH_AMD64
576 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
577 __asm__ ("cpuid\n\t"
578 : "=a" (uRAX),
579 "=b" (uRBX),
580 "=c" (uRCX),
581 "=d" (uRDX)
582 : "0" (uOperator));
583 *(uint32_t *)pvEAX = (uint32_t)uRAX;
584 *(uint32_t *)pvEBX = (uint32_t)uRBX;
585 *(uint32_t *)pvECX = (uint32_t)uRCX;
586 *(uint32_t *)pvEDX = (uint32_t)uRDX;
587# else
588 __asm__ ("xchgl %%ebx, %1\n\t"
589 "cpuid\n\t"
590 "xchgl %%ebx, %1\n\t"
591 : "=a" (*(uint32_t *)pvEAX),
592 "=r" (*(uint32_t *)pvEBX),
593 "=c" (*(uint32_t *)pvECX),
594 "=d" (*(uint32_t *)pvEDX)
595 : "0" (uOperator));
596# endif
597
598# elif RT_INLINE_ASM_USES_INTRIN
599 int aInfo[4];
600 __cpuid(aInfo, uOperator);
601 *(uint32_t *)pvEAX = aInfo[0];
602 *(uint32_t *)pvEBX = aInfo[1];
603 *(uint32_t *)pvECX = aInfo[2];
604 *(uint32_t *)pvEDX = aInfo[3];
605
606# else
607 uint32_t uEAX;
608 uint32_t uEBX;
609 uint32_t uECX;
610 uint32_t uEDX;
611 __asm
612 {
613 push ebx
614 mov eax, [uOperator]
615 cpuid
616 mov [uEAX], eax
617 mov [uEBX], ebx
618 mov [uECX], ecx
619 mov [uEDX], edx
620 pop ebx
621 }
622 *(uint32_t *)pvEAX = uEAX;
623 *(uint32_t *)pvEBX = uEBX;
624 *(uint32_t *)pvECX = uECX;
625 *(uint32_t *)pvEDX = uEDX;
626# endif
627}
628#endif
629
630
631/**
632 * Performs the cpuid instruction returning all registers.
633 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
634 *
635 * @param uOperator CPUID operation (eax).
636 * @param uIdxECX ecx index
637 * @param pvEAX Where to store eax.
638 * @param pvEBX Where to store ebx.
639 * @param pvECX Where to store ecx.
640 * @param pvEDX Where to store edx.
641 * @remark We're using void pointers to ease the use of special bitfield structures and such.
642 */
643#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
644DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
645#else
646DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
647{
648# if RT_INLINE_ASM_GNU_STYLE
649# ifdef RT_ARCH_AMD64
650 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
651 __asm__ ("cpuid\n\t"
652 : "=a" (uRAX),
653 "=b" (uRBX),
654 "=c" (uRCX),
655 "=d" (uRDX)
656 : "0" (uOperator),
657 "2" (uIdxECX));
658 *(uint32_t *)pvEAX = (uint32_t)uRAX;
659 *(uint32_t *)pvEBX = (uint32_t)uRBX;
660 *(uint32_t *)pvECX = (uint32_t)uRCX;
661 *(uint32_t *)pvEDX = (uint32_t)uRDX;
662# else
663 __asm__ ("xchgl %%ebx, %1\n\t"
664 "cpuid\n\t"
665 "xchgl %%ebx, %1\n\t"
666 : "=a" (*(uint32_t *)pvEAX),
667 "=r" (*(uint32_t *)pvEBX),
668 "=c" (*(uint32_t *)pvECX),
669 "=d" (*(uint32_t *)pvEDX)
670 : "0" (uOperator),
671 "2" (uIdxECX));
672# endif
673
674# elif RT_INLINE_ASM_USES_INTRIN
675 int aInfo[4];
676 /* ??? another intrinsic ??? */
677 __cpuid(aInfo, uOperator);
678 *(uint32_t *)pvEAX = aInfo[0];
679 *(uint32_t *)pvEBX = aInfo[1];
680 *(uint32_t *)pvECX = aInfo[2];
681 *(uint32_t *)pvEDX = aInfo[3];
682
683# else
684 uint32_t uEAX;
685 uint32_t uEBX;
686 uint32_t uECX;
687 uint32_t uEDX;
688 __asm
689 {
690 push ebx
691 mov eax, [uOperator]
692 mov ecx, [uIdxECX]
693 cpuid
694 mov [uEAX], eax
695 mov [uEBX], ebx
696 mov [uECX], ecx
697 mov [uEDX], edx
698 pop ebx
699 }
700 *(uint32_t *)pvEAX = uEAX;
701 *(uint32_t *)pvEBX = uEBX;
702 *(uint32_t *)pvECX = uECX;
703 *(uint32_t *)pvEDX = uEDX;
704# endif
705}
706#endif
707
708
709/**
710 * Performs the cpuid instruction returning ecx and edx.
711 *
712 * @param uOperator CPUID operation (eax).
713 * @param pvECX Where to store ecx.
714 * @param pvEDX Where to store edx.
715 * @remark We're using void pointers to ease the use of special bitfield structures and such.
716 */
717#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
718DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
719#else
720DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
721{
722 uint32_t uEBX;
723 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
724}
725#endif
726
727
728/**
729 * Performs the cpuid instruction returning edx.
730 *
731 * @param uOperator CPUID operation (eax).
732 * @returns EDX after cpuid operation.
733 */
734#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
735DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
736#else
737DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
738{
739 RTCCUINTREG xDX;
740# if RT_INLINE_ASM_GNU_STYLE
741# ifdef RT_ARCH_AMD64
742 RTCCUINTREG uSpill;
743 __asm__ ("cpuid"
744 : "=a" (uSpill),
745 "=d" (xDX)
746 : "0" (uOperator)
747 : "rbx", "rcx");
748# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
749 __asm__ ("push %%ebx\n\t"
750 "cpuid\n\t"
751 "pop %%ebx\n\t"
752 : "=a" (uOperator),
753 "=d" (xDX)
754 : "0" (uOperator)
755 : "ecx");
756# else
757 __asm__ ("cpuid"
758 : "=a" (uOperator),
759 "=d" (xDX)
760 : "0" (uOperator)
761 : "ebx", "ecx");
762# endif
763
764# elif RT_INLINE_ASM_USES_INTRIN
765 int aInfo[4];
766 __cpuid(aInfo, uOperator);
767 xDX = aInfo[3];
768
769# else
770 __asm
771 {
772 push ebx
773 mov eax, [uOperator]
774 cpuid
775 mov [xDX], edx
776 pop ebx
777 }
778# endif
779 return (uint32_t)xDX;
780}
781#endif
782
783
784/**
785 * Performs the cpuid instruction returning ecx.
786 *
787 * @param uOperator CPUID operation (eax).
788 * @returns ECX after cpuid operation.
789 */
790#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
791DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
792#else
793DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
794{
795 RTCCUINTREG xCX;
796# if RT_INLINE_ASM_GNU_STYLE
797# ifdef RT_ARCH_AMD64
798 RTCCUINTREG uSpill;
799 __asm__ ("cpuid"
800 : "=a" (uSpill),
801 "=c" (xCX)
802 : "0" (uOperator)
803 : "rbx", "rdx");
804# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
805 __asm__ ("push %%ebx\n\t"
806 "cpuid\n\t"
807 "pop %%ebx\n\t"
808 : "=a" (uOperator),
809 "=c" (xCX)
810 : "0" (uOperator)
811 : "edx");
812# else
813 __asm__ ("cpuid"
814 : "=a" (uOperator),
815 "=c" (xCX)
816 : "0" (uOperator)
817 : "ebx", "edx");
818
819# endif
820
821# elif RT_INLINE_ASM_USES_INTRIN
822 int aInfo[4];
823 __cpuid(aInfo, uOperator);
824 xCX = aInfo[2];
825
826# else
827 __asm
828 {
829 push ebx
830 mov eax, [uOperator]
831 cpuid
832 mov [xCX], ecx
833 pop ebx
834 }
835# endif
836 return (uint32_t)xCX;
837}
838#endif
839
840
841/**
842 * Checks if the current CPU supports CPUID.
843 *
844 * @returns true if CPUID is supported.
845 */
846DECLINLINE(bool) ASMHasCpuId(void)
847{
848#ifdef RT_ARCH_AMD64
849 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
850#else /* !RT_ARCH_AMD64 */
851 bool fRet = false;
852# if RT_INLINE_ASM_GNU_STYLE
853 uint32_t u1;
854 uint32_t u2;
855 __asm__ ("pushf\n\t"
856 "pop %1\n\t"
857 "mov %1, %2\n\t"
858 "xorl $0x200000, %1\n\t"
859 "push %1\n\t"
860 "popf\n\t"
861 "pushf\n\t"
862 "pop %1\n\t"
863 "cmpl %1, %2\n\t"
864 "setne %0\n\t"
865 "push %2\n\t"
866 "popf\n\t"
867 : "=m" (fRet), "=r" (u1), "=r" (u2));
868# else
869 __asm
870 {
871 pushfd
872 pop eax
873 mov ebx, eax
874 xor eax, 0200000h
875 push eax
876 popfd
877 pushfd
878 pop eax
879 cmp eax, ebx
880 setne fRet
881 push ebx
882 popfd
883 }
884# endif
885 return fRet;
886#endif /* !RT_ARCH_AMD64 */
887}
888
889
890/**
891 * Gets the APIC ID of the current CPU.
892 *
893 * @returns the APIC ID.
894 */
895#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
896DECLASM(uint8_t) ASMGetApicId(void);
897#else
898DECLINLINE(uint8_t) ASMGetApicId(void)
899{
900 RTCCUINTREG xBX;
901# if RT_INLINE_ASM_GNU_STYLE
902# ifdef RT_ARCH_AMD64
903 RTCCUINTREG uSpill;
904 __asm__ ("cpuid"
905 : "=a" (uSpill),
906 "=b" (xBX)
907 : "0" (1)
908 : "rcx", "rdx");
909# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
910 RTCCUINTREG uSpill;
911 __asm__ ("mov %%ebx,%1\n\t"
912 "cpuid\n\t"
913 "xchgl %%ebx,%1\n\t"
914 : "=a" (uSpill),
915 "=r" (xBX)
916 : "0" (1)
917 : "ecx", "edx");
918# else
919 RTCCUINTREG uSpill;
920 __asm__ ("cpuid"
921 : "=a" (uSpill),
922 "=b" (xBX)
923 : "0" (1)
924 : "ecx", "edx");
925# endif
926
927# elif RT_INLINE_ASM_USES_INTRIN
928 int aInfo[4];
929 __cpuid(aInfo, 1);
930 xBX = aInfo[1];
931
932# else
933 __asm
934 {
935 push ebx
936 mov eax, 1
937 cpuid
938 mov [xBX], ebx
939 pop ebx
940 }
941# endif
942 return (uint8_t)(xBX >> 24);
943}
944#endif
945
946
947/**
948 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
949 *
950 * @returns true/false.
951 * @param uEBX EBX return from ASMCpuId(0)
952 * @param uECX ECX return from ASMCpuId(0)
953 * @param uEDX EDX return from ASMCpuId(0)
954 */
955DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
956{
957 return uEBX == 0x756e6547
958 || uECX == 0x6c65746e
959 || uEDX == 0x49656e69;
960}
961
962
963/**
964 * Tests if this is an genuin Intel CPU.
965 *
966 * @returns true/false.
967 */
968DECLINLINE(bool) ASMIsIntelCpu(void)
969{
970 uint32_t uEAX, uEBX, uECX, uEDX;
971 ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX);
972 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
973}
974
975
976/**
977 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
978 *
979 * @returns Family.
980 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
981 */
982DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
983{
984 return ((uEAX >> 8) & 0xf) == 0xf
985 ? ((uEAX >> 20) & 0x7f) + 0xf
986 : ((uEAX >> 8) & 0xf);
987}
988
989
990/**
991 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
992 *
993 * @returns Model.
994 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
995 * @param fIntel Whether it's an intel CPU.
996 */
997DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
998{
999 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1000 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1001 : ((uEAX >> 4) & 0xf);
1002}
1003
1004
1005/**
1006 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1007 *
1008 * @returns Model.
1009 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1010 * @param fIntel Whether it's an intel CPU.
1011 */
1012DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1013{
1014 return ((uEAX >> 8) & 0xf) == 0xf
1015 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1016 : ((uEAX >> 4) & 0xf);
1017}
1018
1019
1020/**
1021 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1022 *
1023 * @returns Model.
1024 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1025 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1026 */
1027DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1028{
1029 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1030 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1031 : ((uEAX >> 4) & 0xf);
1032}
1033
1034
1035/**
1036 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1037 *
1038 * @returns Model.
1039 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1040 */
1041DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1042{
1043 return uEAX & 0xf;
1044}
1045
1046
1047/**
1048 * Get cr0.
1049 * @returns cr0.
1050 */
1051#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1052DECLASM(RTCCUINTREG) ASMGetCR0(void);
1053#else
1054DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1055{
1056 RTCCUINTREG uCR0;
1057# if RT_INLINE_ASM_USES_INTRIN
1058 uCR0 = __readcr0();
1059
1060# elif RT_INLINE_ASM_GNU_STYLE
1061# ifdef RT_ARCH_AMD64
1062 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1063# else
1064 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1065# endif
1066# else
1067 __asm
1068 {
1069# ifdef RT_ARCH_AMD64
1070 mov rax, cr0
1071 mov [uCR0], rax
1072# else
1073 mov eax, cr0
1074 mov [uCR0], eax
1075# endif
1076 }
1077# endif
1078 return uCR0;
1079}
1080#endif
1081
1082
1083/**
1084 * Sets the CR0 register.
1085 * @param uCR0 The new CR0 value.
1086 */
1087#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1088DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1089#else
1090DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1091{
1092# if RT_INLINE_ASM_USES_INTRIN
1093 __writecr0(uCR0);
1094
1095# elif RT_INLINE_ASM_GNU_STYLE
1096# ifdef RT_ARCH_AMD64
1097 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1098# else
1099 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1100# endif
1101# else
1102 __asm
1103 {
1104# ifdef RT_ARCH_AMD64
1105 mov rax, [uCR0]
1106 mov cr0, rax
1107# else
1108 mov eax, [uCR0]
1109 mov cr0, eax
1110# endif
1111 }
1112# endif
1113}
1114#endif
1115
1116
1117/**
1118 * Get cr2.
1119 * @returns cr2.
1120 */
1121#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1122DECLASM(RTCCUINTREG) ASMGetCR2(void);
1123#else
1124DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1125{
1126 RTCCUINTREG uCR2;
1127# if RT_INLINE_ASM_USES_INTRIN
1128 uCR2 = __readcr2();
1129
1130# elif RT_INLINE_ASM_GNU_STYLE
1131# ifdef RT_ARCH_AMD64
1132 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1133# else
1134 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1135# endif
1136# else
1137 __asm
1138 {
1139# ifdef RT_ARCH_AMD64
1140 mov rax, cr2
1141 mov [uCR2], rax
1142# else
1143 mov eax, cr2
1144 mov [uCR2], eax
1145# endif
1146 }
1147# endif
1148 return uCR2;
1149}
1150#endif
1151
1152
1153/**
1154 * Sets the CR2 register.
1155 * @param uCR2 The new CR0 value.
1156 */
1157#if RT_INLINE_ASM_EXTERNAL
1158DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1159#else
1160DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1161{
1162# if RT_INLINE_ASM_GNU_STYLE
1163# ifdef RT_ARCH_AMD64
1164 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1165# else
1166 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1167# endif
1168# else
1169 __asm
1170 {
1171# ifdef RT_ARCH_AMD64
1172 mov rax, [uCR2]
1173 mov cr2, rax
1174# else
1175 mov eax, [uCR2]
1176 mov cr2, eax
1177# endif
1178 }
1179# endif
1180}
1181#endif
1182
1183
1184/**
1185 * Get cr3.
1186 * @returns cr3.
1187 */
1188#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1189DECLASM(RTCCUINTREG) ASMGetCR3(void);
1190#else
1191DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1192{
1193 RTCCUINTREG uCR3;
1194# if RT_INLINE_ASM_USES_INTRIN
1195 uCR3 = __readcr3();
1196
1197# elif RT_INLINE_ASM_GNU_STYLE
1198# ifdef RT_ARCH_AMD64
1199 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1200# else
1201 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1202# endif
1203# else
1204 __asm
1205 {
1206# ifdef RT_ARCH_AMD64
1207 mov rax, cr3
1208 mov [uCR3], rax
1209# else
1210 mov eax, cr3
1211 mov [uCR3], eax
1212# endif
1213 }
1214# endif
1215 return uCR3;
1216}
1217#endif
1218
1219
1220/**
1221 * Sets the CR3 register.
1222 *
1223 * @param uCR3 New CR3 value.
1224 */
1225#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1226DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1227#else
1228DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1229{
1230# if RT_INLINE_ASM_USES_INTRIN
1231 __writecr3(uCR3);
1232
1233# elif RT_INLINE_ASM_GNU_STYLE
1234# ifdef RT_ARCH_AMD64
1235 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1236# else
1237 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1238# endif
1239# else
1240 __asm
1241 {
1242# ifdef RT_ARCH_AMD64
1243 mov rax, [uCR3]
1244 mov cr3, rax
1245# else
1246 mov eax, [uCR3]
1247 mov cr3, eax
1248# endif
1249 }
1250# endif
1251}
1252#endif
1253
1254
1255/**
1256 * Reloads the CR3 register.
1257 */
1258#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1259DECLASM(void) ASMReloadCR3(void);
1260#else
1261DECLINLINE(void) ASMReloadCR3(void)
1262{
1263# if RT_INLINE_ASM_USES_INTRIN
1264 __writecr3(__readcr3());
1265
1266# elif RT_INLINE_ASM_GNU_STYLE
1267 RTCCUINTREG u;
1268# ifdef RT_ARCH_AMD64
1269 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1270 "movq %0, %%cr3\n\t"
1271 : "=r" (u));
1272# else
1273 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1274 "movl %0, %%cr3\n\t"
1275 : "=r" (u));
1276# endif
1277# else
1278 __asm
1279 {
1280# ifdef RT_ARCH_AMD64
1281 mov rax, cr3
1282 mov cr3, rax
1283# else
1284 mov eax, cr3
1285 mov cr3, eax
1286# endif
1287 }
1288# endif
1289}
1290#endif
1291
1292
1293/**
1294 * Get cr4.
1295 * @returns cr4.
1296 */
1297#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1298DECLASM(RTCCUINTREG) ASMGetCR4(void);
1299#else
1300DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1301{
1302 RTCCUINTREG uCR4;
1303# if RT_INLINE_ASM_USES_INTRIN
1304 uCR4 = __readcr4();
1305
1306# elif RT_INLINE_ASM_GNU_STYLE
1307# ifdef RT_ARCH_AMD64
1308 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1309# else
1310 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1311# endif
1312# else
1313 __asm
1314 {
1315# ifdef RT_ARCH_AMD64
1316 mov rax, cr4
1317 mov [uCR4], rax
1318# else
1319 push eax /* just in case */
1320 /*mov eax, cr4*/
1321 _emit 0x0f
1322 _emit 0x20
1323 _emit 0xe0
1324 mov [uCR4], eax
1325 pop eax
1326# endif
1327 }
1328# endif
1329 return uCR4;
1330}
1331#endif
1332
1333
1334/**
1335 * Sets the CR4 register.
1336 *
1337 * @param uCR4 New CR4 value.
1338 */
1339#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1340DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1341#else
1342DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1343{
1344# if RT_INLINE_ASM_USES_INTRIN
1345 __writecr4(uCR4);
1346
1347# elif RT_INLINE_ASM_GNU_STYLE
1348# ifdef RT_ARCH_AMD64
1349 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1350# else
1351 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1352# endif
1353# else
1354 __asm
1355 {
1356# ifdef RT_ARCH_AMD64
1357 mov rax, [uCR4]
1358 mov cr4, rax
1359# else
1360 mov eax, [uCR4]
1361 _emit 0x0F
1362 _emit 0x22
1363 _emit 0xE0 /* mov cr4, eax */
1364# endif
1365 }
1366# endif
1367}
1368#endif
1369
1370
1371/**
1372 * Get cr8.
1373 * @returns cr8.
1374 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1375 */
1376#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1377DECLASM(RTCCUINTREG) ASMGetCR8(void);
1378#else
1379DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1380{
1381# ifdef RT_ARCH_AMD64
1382 RTCCUINTREG uCR8;
1383# if RT_INLINE_ASM_USES_INTRIN
1384 uCR8 = __readcr8();
1385
1386# elif RT_INLINE_ASM_GNU_STYLE
1387 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1388# else
1389 __asm
1390 {
1391 mov rax, cr8
1392 mov [uCR8], rax
1393 }
1394# endif
1395 return uCR8;
1396# else /* !RT_ARCH_AMD64 */
1397 return 0;
1398# endif /* !RT_ARCH_AMD64 */
1399}
1400#endif
1401
1402
1403/**
1404 * Enables interrupts (EFLAGS.IF).
1405 */
1406#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1407DECLASM(void) ASMIntEnable(void);
1408#else
1409DECLINLINE(void) ASMIntEnable(void)
1410{
1411# if RT_INLINE_ASM_GNU_STYLE
1412 __asm("sti\n");
1413# elif RT_INLINE_ASM_USES_INTRIN
1414 _enable();
1415# else
1416 __asm sti
1417# endif
1418}
1419#endif
1420
1421
1422/**
1423 * Disables interrupts (!EFLAGS.IF).
1424 */
1425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1426DECLASM(void) ASMIntDisable(void);
1427#else
1428DECLINLINE(void) ASMIntDisable(void)
1429{
1430# if RT_INLINE_ASM_GNU_STYLE
1431 __asm("cli\n");
1432# elif RT_INLINE_ASM_USES_INTRIN
1433 _disable();
1434# else
1435 __asm cli
1436# endif
1437}
1438#endif
1439
1440
1441/**
1442 * Disables interrupts and returns previous xFLAGS.
1443 */
1444#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1445DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1446#else
1447DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1448{
1449 RTCCUINTREG xFlags;
1450# if RT_INLINE_ASM_GNU_STYLE
1451# ifdef RT_ARCH_AMD64
1452 __asm__ __volatile__("pushfq\n\t"
1453 "cli\n\t"
1454 "popq %0\n\t"
1455 : "=rm" (xFlags));
1456# else
1457 __asm__ __volatile__("pushfl\n\t"
1458 "cli\n\t"
1459 "popl %0\n\t"
1460 : "=rm" (xFlags));
1461# endif
1462# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1463 xFlags = ASMGetFlags();
1464 _disable();
1465# else
1466 __asm {
1467 pushfd
1468 cli
1469 pop [xFlags]
1470 }
1471# endif
1472 return xFlags;
1473}
1474#endif
1475
1476
1477/**
1478 * Reads a machine specific register.
1479 *
1480 * @returns Register content.
1481 * @param uRegister Register to read.
1482 */
1483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1484DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1485#else
1486DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1487{
1488 RTUINT64U u;
1489# if RT_INLINE_ASM_GNU_STYLE
1490 __asm__ __volatile__("rdmsr\n\t"
1491 : "=a" (u.s.Lo),
1492 "=d" (u.s.Hi)
1493 : "c" (uRegister));
1494
1495# elif RT_INLINE_ASM_USES_INTRIN
1496 u.u = __readmsr(uRegister);
1497
1498# else
1499 __asm
1500 {
1501 mov ecx, [uRegister]
1502 rdmsr
1503 mov [u.s.Lo], eax
1504 mov [u.s.Hi], edx
1505 }
1506# endif
1507
1508 return u.u;
1509}
1510#endif
1511
1512
1513/**
1514 * Writes a machine specific register.
1515 *
1516 * @returns Register content.
1517 * @param uRegister Register to write to.
1518 * @param u64Val Value to write.
1519 */
1520#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1521DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1522#else
1523DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1524{
1525 RTUINT64U u;
1526
1527 u.u = u64Val;
1528# if RT_INLINE_ASM_GNU_STYLE
1529 __asm__ __volatile__("wrmsr\n\t"
1530 ::"a" (u.s.Lo),
1531 "d" (u.s.Hi),
1532 "c" (uRegister));
1533
1534# elif RT_INLINE_ASM_USES_INTRIN
1535 __writemsr(uRegister, u.u);
1536
1537# else
1538 __asm
1539 {
1540 mov ecx, [uRegister]
1541 mov edx, [u.s.Hi]
1542 mov eax, [u.s.Lo]
1543 wrmsr
1544 }
1545# endif
1546}
1547#endif
1548
1549
1550/**
1551 * Reads low part of a machine specific register.
1552 *
1553 * @returns Register content.
1554 * @param uRegister Register to read.
1555 */
1556#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1557DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1558#else
1559DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1560{
1561 uint32_t u32;
1562# if RT_INLINE_ASM_GNU_STYLE
1563 __asm__ __volatile__("rdmsr\n\t"
1564 : "=a" (u32)
1565 : "c" (uRegister)
1566 : "edx");
1567
1568# elif RT_INLINE_ASM_USES_INTRIN
1569 u32 = (uint32_t)__readmsr(uRegister);
1570
1571#else
1572 __asm
1573 {
1574 mov ecx, [uRegister]
1575 rdmsr
1576 mov [u32], eax
1577 }
1578# endif
1579
1580 return u32;
1581}
1582#endif
1583
1584
1585/**
1586 * Reads high part of a machine specific register.
1587 *
1588 * @returns Register content.
1589 * @param uRegister Register to read.
1590 */
1591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1592DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1593#else
1594DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1595{
1596 uint32_t u32;
1597# if RT_INLINE_ASM_GNU_STYLE
1598 __asm__ __volatile__("rdmsr\n\t"
1599 : "=d" (u32)
1600 : "c" (uRegister)
1601 : "eax");
1602
1603# elif RT_INLINE_ASM_USES_INTRIN
1604 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1605
1606# else
1607 __asm
1608 {
1609 mov ecx, [uRegister]
1610 rdmsr
1611 mov [u32], edx
1612 }
1613# endif
1614
1615 return u32;
1616}
1617#endif
1618
1619
1620/**
1621 * Gets dr0.
1622 *
1623 * @returns dr0.
1624 */
1625#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1626DECLASM(RTCCUINTREG) ASMGetDR0(void);
1627#else
1628DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1629{
1630 RTCCUINTREG uDR0;
1631# if RT_INLINE_ASM_USES_INTRIN
1632 uDR0 = __readdr(0);
1633# elif RT_INLINE_ASM_GNU_STYLE
1634# ifdef RT_ARCH_AMD64
1635 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1636# else
1637 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1638# endif
1639# else
1640 __asm
1641 {
1642# ifdef RT_ARCH_AMD64
1643 mov rax, dr0
1644 mov [uDR0], rax
1645# else
1646 mov eax, dr0
1647 mov [uDR0], eax
1648# endif
1649 }
1650# endif
1651 return uDR0;
1652}
1653#endif
1654
1655
1656/**
1657 * Gets dr1.
1658 *
1659 * @returns dr1.
1660 */
1661#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1662DECLASM(RTCCUINTREG) ASMGetDR1(void);
1663#else
1664DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1665{
1666 RTCCUINTREG uDR1;
1667# if RT_INLINE_ASM_USES_INTRIN
1668 uDR1 = __readdr(1);
1669# elif RT_INLINE_ASM_GNU_STYLE
1670# ifdef RT_ARCH_AMD64
1671 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1672# else
1673 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1674# endif
1675# else
1676 __asm
1677 {
1678# ifdef RT_ARCH_AMD64
1679 mov rax, dr1
1680 mov [uDR1], rax
1681# else
1682 mov eax, dr1
1683 mov [uDR1], eax
1684# endif
1685 }
1686# endif
1687 return uDR1;
1688}
1689#endif
1690
1691
1692/**
1693 * Gets dr2.
1694 *
1695 * @returns dr2.
1696 */
1697#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1698DECLASM(RTCCUINTREG) ASMGetDR2(void);
1699#else
1700DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1701{
1702 RTCCUINTREG uDR2;
1703# if RT_INLINE_ASM_USES_INTRIN
1704 uDR2 = __readdr(2);
1705# elif RT_INLINE_ASM_GNU_STYLE
1706# ifdef RT_ARCH_AMD64
1707 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1708# else
1709 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1710# endif
1711# else
1712 __asm
1713 {
1714# ifdef RT_ARCH_AMD64
1715 mov rax, dr2
1716 mov [uDR2], rax
1717# else
1718 mov eax, dr2
1719 mov [uDR2], eax
1720# endif
1721 }
1722# endif
1723 return uDR2;
1724}
1725#endif
1726
1727
1728/**
1729 * Gets dr3.
1730 *
1731 * @returns dr3.
1732 */
1733#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1734DECLASM(RTCCUINTREG) ASMGetDR3(void);
1735#else
1736DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1737{
1738 RTCCUINTREG uDR3;
1739# if RT_INLINE_ASM_USES_INTRIN
1740 uDR3 = __readdr(3);
1741# elif RT_INLINE_ASM_GNU_STYLE
1742# ifdef RT_ARCH_AMD64
1743 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1744# else
1745 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1746# endif
1747# else
1748 __asm
1749 {
1750# ifdef RT_ARCH_AMD64
1751 mov rax, dr3
1752 mov [uDR3], rax
1753# else
1754 mov eax, dr3
1755 mov [uDR3], eax
1756# endif
1757 }
1758# endif
1759 return uDR3;
1760}
1761#endif
1762
1763
1764/**
1765 * Gets dr6.
1766 *
1767 * @returns dr6.
1768 */
1769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1770DECLASM(RTCCUINTREG) ASMGetDR6(void);
1771#else
1772DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1773{
1774 RTCCUINTREG uDR6;
1775# if RT_INLINE_ASM_USES_INTRIN
1776 uDR6 = __readdr(6);
1777# elif RT_INLINE_ASM_GNU_STYLE
1778# ifdef RT_ARCH_AMD64
1779 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1780# else
1781 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1782# endif
1783# else
1784 __asm
1785 {
1786# ifdef RT_ARCH_AMD64
1787 mov rax, dr6
1788 mov [uDR6], rax
1789# else
1790 mov eax, dr6
1791 mov [uDR6], eax
1792# endif
1793 }
1794# endif
1795 return uDR6;
1796}
1797#endif
1798
1799
1800/**
1801 * Reads and clears DR6.
1802 *
1803 * @returns DR6.
1804 */
1805#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1806DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1807#else
1808DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1809{
1810 RTCCUINTREG uDR6;
1811# if RT_INLINE_ASM_USES_INTRIN
1812 uDR6 = __readdr(6);
1813 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1814# elif RT_INLINE_ASM_GNU_STYLE
1815 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1816# ifdef RT_ARCH_AMD64
1817 __asm__ __volatile__("movq %%dr6, %0\n\t"
1818 "movq %1, %%dr6\n\t"
1819 : "=r" (uDR6)
1820 : "r" (uNewValue));
1821# else
1822 __asm__ __volatile__("movl %%dr6, %0\n\t"
1823 "movl %1, %%dr6\n\t"
1824 : "=r" (uDR6)
1825 : "r" (uNewValue));
1826# endif
1827# else
1828 __asm
1829 {
1830# ifdef RT_ARCH_AMD64
1831 mov rax, dr6
1832 mov [uDR6], rax
1833 mov rcx, rax
1834 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1835 mov dr6, rcx
1836# else
1837 mov eax, dr6
1838 mov [uDR6], eax
1839 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1840 mov dr6, ecx
1841# endif
1842 }
1843# endif
1844 return uDR6;
1845}
1846#endif
1847
1848
1849/**
1850 * Gets dr7.
1851 *
1852 * @returns dr7.
1853 */
1854#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1855DECLASM(RTCCUINTREG) ASMGetDR7(void);
1856#else
1857DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1858{
1859 RTCCUINTREG uDR7;
1860# if RT_INLINE_ASM_USES_INTRIN
1861 uDR7 = __readdr(7);
1862# elif RT_INLINE_ASM_GNU_STYLE
1863# ifdef RT_ARCH_AMD64
1864 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1865# else
1866 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1867# endif
1868# else
1869 __asm
1870 {
1871# ifdef RT_ARCH_AMD64
1872 mov rax, dr7
1873 mov [uDR7], rax
1874# else
1875 mov eax, dr7
1876 mov [uDR7], eax
1877# endif
1878 }
1879# endif
1880 return uDR7;
1881}
1882#endif
1883
1884
1885/**
1886 * Sets dr0.
1887 *
1888 * @param uDRVal Debug register value to write
1889 */
1890#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1891DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1892#else
1893DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1894{
1895# if RT_INLINE_ASM_USES_INTRIN
1896 __writedr(0, uDRVal);
1897# elif RT_INLINE_ASM_GNU_STYLE
1898# ifdef RT_ARCH_AMD64
1899 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1900# else
1901 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1902# endif
1903# else
1904 __asm
1905 {
1906# ifdef RT_ARCH_AMD64
1907 mov rax, [uDRVal]
1908 mov dr0, rax
1909# else
1910 mov eax, [uDRVal]
1911 mov dr0, eax
1912# endif
1913 }
1914# endif
1915}
1916#endif
1917
1918
1919/**
1920 * Sets dr1.
1921 *
1922 * @param uDRVal Debug register value to write
1923 */
1924#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1925DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1926#else
1927DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1928{
1929# if RT_INLINE_ASM_USES_INTRIN
1930 __writedr(1, uDRVal);
1931# elif RT_INLINE_ASM_GNU_STYLE
1932# ifdef RT_ARCH_AMD64
1933 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
1934# else
1935 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
1936# endif
1937# else
1938 __asm
1939 {
1940# ifdef RT_ARCH_AMD64
1941 mov rax, [uDRVal]
1942 mov dr1, rax
1943# else
1944 mov eax, [uDRVal]
1945 mov dr1, eax
1946# endif
1947 }
1948# endif
1949}
1950#endif
1951
1952
1953/**
1954 * Sets dr2.
1955 *
1956 * @param uDRVal Debug register value to write
1957 */
1958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1959DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
1960#else
1961DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
1962{
1963# if RT_INLINE_ASM_USES_INTRIN
1964 __writedr(2, uDRVal);
1965# elif RT_INLINE_ASM_GNU_STYLE
1966# ifdef RT_ARCH_AMD64
1967 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
1968# else
1969 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
1970# endif
1971# else
1972 __asm
1973 {
1974# ifdef RT_ARCH_AMD64
1975 mov rax, [uDRVal]
1976 mov dr2, rax
1977# else
1978 mov eax, [uDRVal]
1979 mov dr2, eax
1980# endif
1981 }
1982# endif
1983}
1984#endif
1985
1986
1987/**
1988 * Sets dr3.
1989 *
1990 * @param uDRVal Debug register value to write
1991 */
1992#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1993DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
1994#else
1995DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
1996{
1997# if RT_INLINE_ASM_USES_INTRIN
1998 __writedr(3, uDRVal);
1999# elif RT_INLINE_ASM_GNU_STYLE
2000# ifdef RT_ARCH_AMD64
2001 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2002# else
2003 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2004# endif
2005# else
2006 __asm
2007 {
2008# ifdef RT_ARCH_AMD64
2009 mov rax, [uDRVal]
2010 mov dr3, rax
2011# else
2012 mov eax, [uDRVal]
2013 mov dr3, eax
2014# endif
2015 }
2016# endif
2017}
2018#endif
2019
2020
2021/**
2022 * Sets dr6.
2023 *
2024 * @param uDRVal Debug register value to write
2025 */
2026#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2027DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2028#else
2029DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2030{
2031# if RT_INLINE_ASM_USES_INTRIN
2032 __writedr(6, uDRVal);
2033# elif RT_INLINE_ASM_GNU_STYLE
2034# ifdef RT_ARCH_AMD64
2035 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2036# else
2037 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2038# endif
2039# else
2040 __asm
2041 {
2042# ifdef RT_ARCH_AMD64
2043 mov rax, [uDRVal]
2044 mov dr6, rax
2045# else
2046 mov eax, [uDRVal]
2047 mov dr6, eax
2048# endif
2049 }
2050# endif
2051}
2052#endif
2053
2054
2055/**
2056 * Sets dr7.
2057 *
2058 * @param uDRVal Debug register value to write
2059 */
2060#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2061DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2062#else
2063DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2064{
2065# if RT_INLINE_ASM_USES_INTRIN
2066 __writedr(7, uDRVal);
2067# elif RT_INLINE_ASM_GNU_STYLE
2068# ifdef RT_ARCH_AMD64
2069 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2070# else
2071 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2072# endif
2073# else
2074 __asm
2075 {
2076# ifdef RT_ARCH_AMD64
2077 mov rax, [uDRVal]
2078 mov dr7, rax
2079# else
2080 mov eax, [uDRVal]
2081 mov dr7, eax
2082# endif
2083 }
2084# endif
2085}
2086#endif
2087
2088
2089/**
2090 * Compiler memory barrier.
2091 *
2092 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2093 * values or any outstanding writes when returning from this function.
2094 *
2095 * This function must be used if non-volatile data is modified by a
2096 * device or the VMM. Typical cases are port access, MMIO access,
2097 * trapping instruction, etc.
2098 */
2099#if RT_INLINE_ASM_GNU_STYLE
2100# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
2101#elif RT_INLINE_ASM_USES_INTRIN
2102# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2103#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2104DECLINLINE(void) ASMCompilerBarrier(void)
2105{
2106 __asm
2107 {
2108 }
2109}
2110#endif
2111
2112
2113/**
2114 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2115 *
2116 * @param Port I/O port to read from.
2117 * @param u8 8-bit integer to write.
2118 */
2119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2120DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2121#else
2122DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2123{
2124# if RT_INLINE_ASM_GNU_STYLE
2125 __asm__ __volatile__("outb %b1, %w0\n\t"
2126 :: "Nd" (Port),
2127 "a" (u8));
2128
2129# elif RT_INLINE_ASM_USES_INTRIN
2130 __outbyte(Port, u8);
2131
2132# else
2133 __asm
2134 {
2135 mov dx, [Port]
2136 mov al, [u8]
2137 out dx, al
2138 }
2139# endif
2140}
2141#endif
2142
2143
2144/**
2145 * Gets a 8-bit unsigned integer from an I/O port, ordered.
2146 *
2147 * @returns 8-bit integer.
2148 * @param Port I/O port to read from.
2149 */
2150#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2151DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2152#else
2153DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2154{
2155 uint8_t u8;
2156# if RT_INLINE_ASM_GNU_STYLE
2157 __asm__ __volatile__("inb %w1, %b0\n\t"
2158 : "=a" (u8)
2159 : "Nd" (Port));
2160
2161# elif RT_INLINE_ASM_USES_INTRIN
2162 u8 = __inbyte(Port);
2163
2164# else
2165 __asm
2166 {
2167 mov dx, [Port]
2168 in al, dx
2169 mov [u8], al
2170 }
2171# endif
2172 return u8;
2173}
2174#endif
2175
2176
2177/**
2178 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2179 *
2180 * @param Port I/O port to read from.
2181 * @param u16 16-bit integer to write.
2182 */
2183#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2184DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2185#else
2186DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2187{
2188# if RT_INLINE_ASM_GNU_STYLE
2189 __asm__ __volatile__("outw %w1, %w0\n\t"
2190 :: "Nd" (Port),
2191 "a" (u16));
2192
2193# elif RT_INLINE_ASM_USES_INTRIN
2194 __outword(Port, u16);
2195
2196# else
2197 __asm
2198 {
2199 mov dx, [Port]
2200 mov ax, [u16]
2201 out dx, ax
2202 }
2203# endif
2204}
2205#endif
2206
2207
2208/**
2209 * Gets a 16-bit unsigned integer from an I/O port, ordered.
2210 *
2211 * @returns 16-bit integer.
2212 * @param Port I/O port to read from.
2213 */
2214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2215DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2216#else
2217DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2218{
2219 uint16_t u16;
2220# if RT_INLINE_ASM_GNU_STYLE
2221 __asm__ __volatile__("inw %w1, %w0\n\t"
2222 : "=a" (u16)
2223 : "Nd" (Port));
2224
2225# elif RT_INLINE_ASM_USES_INTRIN
2226 u16 = __inword(Port);
2227
2228# else
2229 __asm
2230 {
2231 mov dx, [Port]
2232 in ax, dx
2233 mov [u16], ax
2234 }
2235# endif
2236 return u16;
2237}
2238#endif
2239
2240
2241/**
2242 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2243 *
2244 * @param Port I/O port to read from.
2245 * @param u32 32-bit integer to write.
2246 */
2247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2248DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2249#else
2250DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2251{
2252# if RT_INLINE_ASM_GNU_STYLE
2253 __asm__ __volatile__("outl %1, %w0\n\t"
2254 :: "Nd" (Port),
2255 "a" (u32));
2256
2257# elif RT_INLINE_ASM_USES_INTRIN
2258 __outdword(Port, u32);
2259
2260# else
2261 __asm
2262 {
2263 mov dx, [Port]
2264 mov eax, [u32]
2265 out dx, eax
2266 }
2267# endif
2268}
2269#endif
2270
2271
2272/**
2273 * Gets a 32-bit unsigned integer from an I/O port, ordered.
2274 *
2275 * @returns 32-bit integer.
2276 * @param Port I/O port to read from.
2277 */
2278#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2279DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2280#else
2281DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2282{
2283 uint32_t u32;
2284# if RT_INLINE_ASM_GNU_STYLE
2285 __asm__ __volatile__("inl %w1, %0\n\t"
2286 : "=a" (u32)
2287 : "Nd" (Port));
2288
2289# elif RT_INLINE_ASM_USES_INTRIN
2290 u32 = __indword(Port);
2291
2292# else
2293 __asm
2294 {
2295 mov dx, [Port]
2296 in eax, dx
2297 mov [u32], eax
2298 }
2299# endif
2300 return u32;
2301}
2302#endif
2303
2304/** @todo string i/o */
2305
2306
2307/**
2308 * Atomically Exchange an unsigned 8-bit value, ordered.
2309 *
2310 * @returns Current *pu8 value
2311 * @param pu8 Pointer to the 8-bit variable to update.
2312 * @param u8 The 8-bit value to assign to *pu8.
2313 */
2314#if RT_INLINE_ASM_EXTERNAL
2315DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2316#else
2317DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2318{
2319# if RT_INLINE_ASM_GNU_STYLE
2320 __asm__ __volatile__("xchgb %0, %1\n\t"
2321 : "=m" (*pu8),
2322 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2323 : "1" (u8),
2324 "m" (*pu8));
2325# else
2326 __asm
2327 {
2328# ifdef RT_ARCH_AMD64
2329 mov rdx, [pu8]
2330 mov al, [u8]
2331 xchg [rdx], al
2332 mov [u8], al
2333# else
2334 mov edx, [pu8]
2335 mov al, [u8]
2336 xchg [edx], al
2337 mov [u8], al
2338# endif
2339 }
2340# endif
2341 return u8;
2342}
2343#endif
2344
2345
2346/**
2347 * Atomically Exchange a signed 8-bit value, ordered.
2348 *
2349 * @returns Current *pu8 value
2350 * @param pi8 Pointer to the 8-bit variable to update.
2351 * @param i8 The 8-bit value to assign to *pi8.
2352 */
2353DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2354{
2355 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2356}
2357
2358
2359/**
2360 * Atomically Exchange a bool value, ordered.
2361 *
2362 * @returns Current *pf value
2363 * @param pf Pointer to the 8-bit variable to update.
2364 * @param f The 8-bit value to assign to *pi8.
2365 */
2366DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2367{
2368#ifdef _MSC_VER
2369 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2370#else
2371 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2372#endif
2373}
2374
2375
2376/**
2377 * Atomically Exchange an unsigned 16-bit value, ordered.
2378 *
2379 * @returns Current *pu16 value
2380 * @param pu16 Pointer to the 16-bit variable to update.
2381 * @param u16 The 16-bit value to assign to *pu16.
2382 */
2383#if RT_INLINE_ASM_EXTERNAL
2384DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2385#else
2386DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2387{
2388# if RT_INLINE_ASM_GNU_STYLE
2389 __asm__ __volatile__("xchgw %0, %1\n\t"
2390 : "=m" (*pu16),
2391 "=r" (u16)
2392 : "1" (u16),
2393 "m" (*pu16));
2394# else
2395 __asm
2396 {
2397# ifdef RT_ARCH_AMD64
2398 mov rdx, [pu16]
2399 mov ax, [u16]
2400 xchg [rdx], ax
2401 mov [u16], ax
2402# else
2403 mov edx, [pu16]
2404 mov ax, [u16]
2405 xchg [edx], ax
2406 mov [u16], ax
2407# endif
2408 }
2409# endif
2410 return u16;
2411}
2412#endif
2413
2414
2415/**
2416 * Atomically Exchange a signed 16-bit value, ordered.
2417 *
2418 * @returns Current *pu16 value
2419 * @param pi16 Pointer to the 16-bit variable to update.
2420 * @param i16 The 16-bit value to assign to *pi16.
2421 */
2422DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2423{
2424 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2425}
2426
2427
2428/**
2429 * Atomically Exchange an unsigned 32-bit value, ordered.
2430 *
2431 * @returns Current *pu32 value
2432 * @param pu32 Pointer to the 32-bit variable to update.
2433 * @param u32 The 32-bit value to assign to *pu32.
2434 */
2435#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2436DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2437#else
2438DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2439{
2440# if RT_INLINE_ASM_GNU_STYLE
2441 __asm__ __volatile__("xchgl %0, %1\n\t"
2442 : "=m" (*pu32),
2443 "=r" (u32)
2444 : "1" (u32),
2445 "m" (*pu32));
2446
2447# elif RT_INLINE_ASM_USES_INTRIN
2448 u32 = _InterlockedExchange((long *)pu32, u32);
2449
2450# else
2451 __asm
2452 {
2453# ifdef RT_ARCH_AMD64
2454 mov rdx, [pu32]
2455 mov eax, u32
2456 xchg [rdx], eax
2457 mov [u32], eax
2458# else
2459 mov edx, [pu32]
2460 mov eax, u32
2461 xchg [edx], eax
2462 mov [u32], eax
2463# endif
2464 }
2465# endif
2466 return u32;
2467}
2468#endif
2469
2470
2471/**
2472 * Atomically Exchange a signed 32-bit value, ordered.
2473 *
2474 * @returns Current *pu32 value
2475 * @param pi32 Pointer to the 32-bit variable to update.
2476 * @param i32 The 32-bit value to assign to *pi32.
2477 */
2478DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2479{
2480 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2481}
2482
2483
2484/**
2485 * Atomically Exchange an unsigned 64-bit value, ordered.
2486 *
2487 * @returns Current *pu64 value
2488 * @param pu64 Pointer to the 64-bit variable to update.
2489 * @param u64 The 64-bit value to assign to *pu64.
2490 */
2491#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2492DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2493#else
2494DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2495{
2496# if defined(RT_ARCH_AMD64)
2497# if RT_INLINE_ASM_USES_INTRIN
2498 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2499
2500# elif RT_INLINE_ASM_GNU_STYLE
2501 __asm__ __volatile__("xchgq %0, %1\n\t"
2502 : "=m" (*pu64),
2503 "=r" (u64)
2504 : "1" (u64),
2505 "m" (*pu64));
2506# else
2507 __asm
2508 {
2509 mov rdx, [pu64]
2510 mov rax, [u64]
2511 xchg [rdx], rax
2512 mov [u64], rax
2513 }
2514# endif
2515# else /* !RT_ARCH_AMD64 */
2516# if RT_INLINE_ASM_GNU_STYLE
2517# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2518 uint32_t u32EBX = (uint32_t)u64;
2519 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2520 "xchgl %%ebx, %3\n\t"
2521 "1:\n\t"
2522 "lock; cmpxchg8b (%5)\n\t"
2523 "jnz 1b\n\t"
2524 "movl %3, %%ebx\n\t"
2525 /*"xchgl %%esi, %5\n\t"*/
2526 : "=A" (u64),
2527 "=m" (*pu64)
2528 : "0" (*pu64),
2529 "m" ( u32EBX ),
2530 "c" ( (uint32_t)(u64 >> 32) ),
2531 "S" (pu64));
2532# else /* !PIC */
2533 __asm__ __volatile__("1:\n\t"
2534 "lock; cmpxchg8b %1\n\t"
2535 "jnz 1b\n\t"
2536 : "=A" (u64),
2537 "=m" (*pu64)
2538 : "0" (*pu64),
2539 "b" ( (uint32_t)u64 ),
2540 "c" ( (uint32_t)(u64 >> 32) ));
2541# endif
2542# else
2543 __asm
2544 {
2545 mov ebx, dword ptr [u64]
2546 mov ecx, dword ptr [u64 + 4]
2547 mov edi, pu64
2548 mov eax, dword ptr [edi]
2549 mov edx, dword ptr [edi + 4]
2550 retry:
2551 lock cmpxchg8b [edi]
2552 jnz retry
2553 mov dword ptr [u64], eax
2554 mov dword ptr [u64 + 4], edx
2555 }
2556# endif
2557# endif /* !RT_ARCH_AMD64 */
2558 return u64;
2559}
2560#endif
2561
2562
2563/**
2564 * Atomically Exchange an signed 64-bit value, ordered.
2565 *
2566 * @returns Current *pi64 value
2567 * @param pi64 Pointer to the 64-bit variable to update.
2568 * @param i64 The 64-bit value to assign to *pi64.
2569 */
2570DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2571{
2572 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2573}
2574
2575
2576#ifdef RT_ARCH_AMD64
2577/**
2578 * Atomically Exchange an unsigned 128-bit value, ordered.
2579 *
2580 * @returns Current *pu128.
2581 * @param pu128 Pointer to the 128-bit variable to update.
2582 * @param u128 The 128-bit value to assign to *pu128.
2583 *
2584 * @remark We cannot really assume that any hardware supports this. Nor do I have
2585 * GAS support for it. So, for the time being we'll BREAK the atomic
2586 * bit of this function and use two 64-bit exchanges instead.
2587 */
2588# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2589DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2590# else
2591DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2592{
2593 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2594 {
2595 /** @todo this is clumsy code */
2596 RTUINT128U u128Ret;
2597 u128Ret.u = u128;
2598 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2599 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2600 return u128Ret.u;
2601 }
2602#if 0 /* later? */
2603 else
2604 {
2605# if RT_INLINE_ASM_GNU_STYLE
2606 __asm__ __volatile__("1:\n\t"
2607 "lock; cmpxchg8b %1\n\t"
2608 "jnz 1b\n\t"
2609 : "=A" (u128),
2610 "=m" (*pu128)
2611 : "0" (*pu128),
2612 "b" ( (uint64_t)u128 ),
2613 "c" ( (uint64_t)(u128 >> 64) ));
2614# else
2615 __asm
2616 {
2617 mov rbx, dword ptr [u128]
2618 mov rcx, dword ptr [u128 + 8]
2619 mov rdi, pu128
2620 mov rax, dword ptr [rdi]
2621 mov rdx, dword ptr [rdi + 8]
2622 retry:
2623 lock cmpxchg16b [rdi]
2624 jnz retry
2625 mov dword ptr [u128], rax
2626 mov dword ptr [u128 + 8], rdx
2627 }
2628# endif
2629 }
2630 return u128;
2631#endif
2632}
2633# endif
2634#endif /* RT_ARCH_AMD64 */
2635
2636
2637/**
2638 * Atomically Exchange a pointer value, ordered.
2639 *
2640 * @returns Current *ppv value
2641 * @param ppv Pointer to the pointer variable to update.
2642 * @param pv The pointer value to assign to *ppv.
2643 */
2644DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2645{
2646#if ARCH_BITS == 32
2647 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2648#elif ARCH_BITS == 64
2649 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2650#else
2651# error "ARCH_BITS is bogus"
2652#endif
2653}
2654
2655
2656/**
2657 * Atomically Exchange a raw-mode context pointer value, ordered.
2658 *
2659 * @returns Current *ppv value
2660 * @param ppvRC Pointer to the pointer variable to update.
2661 * @param pvRC The pointer value to assign to *ppv.
2662 */
2663DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
2664{
2665 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
2666}
2667
2668
2669/**
2670 * Atomically Exchange a ring-0 pointer value, ordered.
2671 *
2672 * @returns Current *ppv value
2673 * @param ppvR0 Pointer to the pointer variable to update.
2674 * @param pvR0 The pointer value to assign to *ppv.
2675 */
2676DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
2677{
2678#if R0_ARCH_BITS == 32
2679 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
2680#elif R0_ARCH_BITS == 64
2681 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
2682#else
2683# error "R0_ARCH_BITS is bogus"
2684#endif
2685}
2686
2687
2688/**
2689 * Atomically Exchange a ring-3 pointer value, ordered.
2690 *
2691 * @returns Current *ppv value
2692 * @param ppvR3 Pointer to the pointer variable to update.
2693 * @param pvR3 The pointer value to assign to *ppv.
2694 */
2695DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
2696{
2697#if R3_ARCH_BITS == 32
2698 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
2699#elif R3_ARCH_BITS == 64
2700 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
2701#else
2702# error "R3_ARCH_BITS is bogus"
2703#endif
2704}
2705
2706
2707/** @def ASMAtomicXchgHandle
2708 * Atomically Exchange a typical IPRT handle value, ordered.
2709 *
2710 * @param ph Pointer to the value to update.
2711 * @param hNew The new value to assigned to *pu.
2712 * @param phRes Where to store the current *ph value.
2713 *
2714 * @remarks This doesn't currently work for all handles (like RTFILE).
2715 */
2716#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2717 do { \
2718 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (const void *)(hNew)); \
2719 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2720 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2721 } while (0)
2722
2723
2724/**
2725 * Atomically Exchange a value which size might differ
2726 * between platforms or compilers, ordered.
2727 *
2728 * @param pu Pointer to the variable to update.
2729 * @param uNew The value to assign to *pu.
2730 * @todo This is busted as its missing the result argument.
2731 */
2732#define ASMAtomicXchgSize(pu, uNew) \
2733 do { \
2734 switch (sizeof(*(pu))) { \
2735 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2736 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2737 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2738 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2739 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2740 } \
2741 } while (0)
2742
2743/**
2744 * Atomically Exchange a value which size might differ
2745 * between platforms or compilers, ordered.
2746 *
2747 * @param pu Pointer to the variable to update.
2748 * @param uNew The value to assign to *pu.
2749 * @param puRes Where to store the current *pu value.
2750 */
2751#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2752 do { \
2753 switch (sizeof(*(pu))) { \
2754 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2755 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2756 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2757 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2758 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2759 } \
2760 } while (0)
2761
2762
2763/**
2764 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2765 *
2766 * @returns true if xchg was done.
2767 * @returns false if xchg wasn't done.
2768 *
2769 * @param pu32 Pointer to the value to update.
2770 * @param u32New The new value to assigned to *pu32.
2771 * @param u32Old The old value to *pu32 compare with.
2772 */
2773#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2774DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2775#else
2776DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2777{
2778# if RT_INLINE_ASM_GNU_STYLE
2779 uint8_t u8Ret;
2780 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2781 "setz %1\n\t"
2782 : "=m" (*pu32),
2783 "=qm" (u8Ret),
2784 "=a" (u32Old)
2785 : "r" (u32New),
2786 "2" (u32Old),
2787 "m" (*pu32));
2788 return (bool)u8Ret;
2789
2790# elif RT_INLINE_ASM_USES_INTRIN
2791 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2792
2793# else
2794 uint32_t u32Ret;
2795 __asm
2796 {
2797# ifdef RT_ARCH_AMD64
2798 mov rdx, [pu32]
2799# else
2800 mov edx, [pu32]
2801# endif
2802 mov eax, [u32Old]
2803 mov ecx, [u32New]
2804# ifdef RT_ARCH_AMD64
2805 lock cmpxchg [rdx], ecx
2806# else
2807 lock cmpxchg [edx], ecx
2808# endif
2809 setz al
2810 movzx eax, al
2811 mov [u32Ret], eax
2812 }
2813 return !!u32Ret;
2814# endif
2815}
2816#endif
2817
2818
2819/**
2820 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2821 *
2822 * @returns true if xchg was done.
2823 * @returns false if xchg wasn't done.
2824 *
2825 * @param pi32 Pointer to the value to update.
2826 * @param i32New The new value to assigned to *pi32.
2827 * @param i32Old The old value to *pi32 compare with.
2828 */
2829DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2830{
2831 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2832}
2833
2834
2835/**
2836 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2837 *
2838 * @returns true if xchg was done.
2839 * @returns false if xchg wasn't done.
2840 *
2841 * @param pu64 Pointer to the 64-bit variable to update.
2842 * @param u64New The 64-bit value to assign to *pu64.
2843 * @param u64Old The value to compare with.
2844 */
2845#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2846DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2847#else
2848DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2849{
2850# if RT_INLINE_ASM_USES_INTRIN
2851 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2852
2853# elif defined(RT_ARCH_AMD64)
2854# if RT_INLINE_ASM_GNU_STYLE
2855 uint8_t u8Ret;
2856 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2857 "setz %1\n\t"
2858 : "=m" (*pu64),
2859 "=qm" (u8Ret),
2860 "=a" (u64Old)
2861 : "r" (u64New),
2862 "2" (u64Old),
2863 "m" (*pu64));
2864 return (bool)u8Ret;
2865# else
2866 bool fRet;
2867 __asm
2868 {
2869 mov rdx, [pu32]
2870 mov rax, [u64Old]
2871 mov rcx, [u64New]
2872 lock cmpxchg [rdx], rcx
2873 setz al
2874 mov [fRet], al
2875 }
2876 return fRet;
2877# endif
2878# else /* !RT_ARCH_AMD64 */
2879 uint32_t u32Ret;
2880# if RT_INLINE_ASM_GNU_STYLE
2881# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2882 uint32_t u32EBX = (uint32_t)u64New;
2883 uint32_t u32Spill;
2884 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2885 "lock; cmpxchg8b (%6)\n\t"
2886 "setz %%al\n\t"
2887 "movl %4, %%ebx\n\t"
2888 "movzbl %%al, %%eax\n\t"
2889 : "=a" (u32Ret),
2890 "=d" (u32Spill),
2891# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
2892 "+m" (*pu64)
2893# else
2894 "=m" (*pu64)
2895# endif
2896 : "A" (u64Old),
2897 "m" ( u32EBX ),
2898 "c" ( (uint32_t)(u64New >> 32) ),
2899 "S" (pu64));
2900# else /* !PIC */
2901 uint32_t u32Spill;
2902 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2903 "setz %%al\n\t"
2904 "movzbl %%al, %%eax\n\t"
2905 : "=a" (u32Ret),
2906 "=d" (u32Spill),
2907 "+m" (*pu64)
2908 : "A" (u64Old),
2909 "b" ( (uint32_t)u64New ),
2910 "c" ( (uint32_t)(u64New >> 32) ));
2911# endif
2912 return (bool)u32Ret;
2913# else
2914 __asm
2915 {
2916 mov ebx, dword ptr [u64New]
2917 mov ecx, dword ptr [u64New + 4]
2918 mov edi, [pu64]
2919 mov eax, dword ptr [u64Old]
2920 mov edx, dword ptr [u64Old + 4]
2921 lock cmpxchg8b [edi]
2922 setz al
2923 movzx eax, al
2924 mov dword ptr [u32Ret], eax
2925 }
2926 return !!u32Ret;
2927# endif
2928# endif /* !RT_ARCH_AMD64 */
2929}
2930#endif
2931
2932
2933/**
2934 * Atomically Compare and exchange a signed 64-bit value, ordered.
2935 *
2936 * @returns true if xchg was done.
2937 * @returns false if xchg wasn't done.
2938 *
2939 * @param pi64 Pointer to the 64-bit variable to update.
2940 * @param i64 The 64-bit value to assign to *pu64.
2941 * @param i64Old The value to compare with.
2942 */
2943DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2944{
2945 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2946}
2947
2948
2949/**
2950 * Atomically Compare and Exchange a pointer value, ordered.
2951 *
2952 * @returns true if xchg was done.
2953 * @returns false if xchg wasn't done.
2954 *
2955 * @param ppv Pointer to the value to update.
2956 * @param pvNew The new value to assigned to *ppv.
2957 * @param pvOld The old value to *ppv compare with.
2958 */
2959DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
2960{
2961#if ARCH_BITS == 32
2962 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2963#elif ARCH_BITS == 64
2964 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2965#else
2966# error "ARCH_BITS is bogus"
2967#endif
2968}
2969
2970
2971/** @def ASMAtomicCmpXchgHandle
2972 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2973 *
2974 * @param ph Pointer to the value to update.
2975 * @param hNew The new value to assigned to *pu.
2976 * @param hOld The old value to *pu compare with.
2977 * @param fRc Where to store the result.
2978 *
2979 * @remarks This doesn't currently work for all handles (like RTFILE).
2980 */
2981#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
2982 do { \
2983 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
2984 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2985 } while (0)
2986
2987
2988/** @def ASMAtomicCmpXchgSize
2989 * Atomically Compare and Exchange a value which size might differ
2990 * between platforms or compilers, ordered.
2991 *
2992 * @param pu Pointer to the value to update.
2993 * @param uNew The new value to assigned to *pu.
2994 * @param uOld The old value to *pu compare with.
2995 * @param fRc Where to store the result.
2996 */
2997#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2998 do { \
2999 switch (sizeof(*(pu))) { \
3000 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
3001 break; \
3002 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
3003 break; \
3004 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3005 (fRc) = false; \
3006 break; \
3007 } \
3008 } while (0)
3009
3010
3011/**
3012 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
3013 * passes back old value, ordered.
3014 *
3015 * @returns true if xchg was done.
3016 * @returns false if xchg wasn't done.
3017 *
3018 * @param pu32 Pointer to the value to update.
3019 * @param u32New The new value to assigned to *pu32.
3020 * @param u32Old The old value to *pu32 compare with.
3021 * @param pu32Old Pointer store the old value at.
3022 */
3023#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3024DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
3025#else
3026DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
3027{
3028# if RT_INLINE_ASM_GNU_STYLE
3029 uint8_t u8Ret;
3030 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
3031 "setz %1\n\t"
3032 : "=m" (*pu32),
3033 "=qm" (u8Ret),
3034 "=a" (*pu32Old)
3035 : "r" (u32New),
3036 "a" (u32Old),
3037 "m" (*pu32));
3038 return (bool)u8Ret;
3039
3040# elif RT_INLINE_ASM_USES_INTRIN
3041 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
3042
3043# else
3044 uint32_t u32Ret;
3045 __asm
3046 {
3047# ifdef RT_ARCH_AMD64
3048 mov rdx, [pu32]
3049# else
3050 mov edx, [pu32]
3051# endif
3052 mov eax, [u32Old]
3053 mov ecx, [u32New]
3054# ifdef RT_ARCH_AMD64
3055 lock cmpxchg [rdx], ecx
3056 mov rdx, [pu32Old]
3057 mov [rdx], eax
3058# else
3059 lock cmpxchg [edx], ecx
3060 mov edx, [pu32Old]
3061 mov [edx], eax
3062# endif
3063 setz al
3064 movzx eax, al
3065 mov [u32Ret], eax
3066 }
3067 return !!u32Ret;
3068# endif
3069}
3070#endif
3071
3072
3073/**
3074 * Atomically Compare and Exchange a signed 32-bit value, additionally
3075 * passes back old value, ordered.
3076 *
3077 * @returns true if xchg was done.
3078 * @returns false if xchg wasn't done.
3079 *
3080 * @param pi32 Pointer to the value to update.
3081 * @param i32New The new value to assigned to *pi32.
3082 * @param i32Old The old value to *pi32 compare with.
3083 * @param pi32Old Pointer store the old value at.
3084 */
3085DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3086{
3087 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3088}
3089
3090
3091/**
3092 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3093 * passing back old value, ordered.
3094 *
3095 * @returns true if xchg was done.
3096 * @returns false if xchg wasn't done.
3097 *
3098 * @param pu64 Pointer to the 64-bit variable to update.
3099 * @param u64New The 64-bit value to assign to *pu64.
3100 * @param u64Old The value to compare with.
3101 * @param pu64Old Pointer store the old value at.
3102 */
3103#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3104DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3105#else
3106DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3107{
3108# if RT_INLINE_ASM_USES_INTRIN
3109 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3110
3111# elif defined(RT_ARCH_AMD64)
3112# if RT_INLINE_ASM_GNU_STYLE
3113 uint8_t u8Ret;
3114 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3115 "setz %1\n\t"
3116 : "=m" (*pu64),
3117 "=qm" (u8Ret),
3118 "=a" (*pu64Old)
3119 : "r" (u64New),
3120 "a" (u64Old),
3121 "m" (*pu64));
3122 return (bool)u8Ret;
3123# else
3124 bool fRet;
3125 __asm
3126 {
3127 mov rdx, [pu32]
3128 mov rax, [u64Old]
3129 mov rcx, [u64New]
3130 lock cmpxchg [rdx], rcx
3131 mov rdx, [pu64Old]
3132 mov [rdx], rax
3133 setz al
3134 mov [fRet], al
3135 }
3136 return fRet;
3137# endif
3138# else /* !RT_ARCH_AMD64 */
3139# if RT_INLINE_ASM_GNU_STYLE
3140 uint64_t u64Ret;
3141# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3142 /* NB: this code uses a memory clobber description, because the clean
3143 * solution with an output value for *pu64 makes gcc run out of registers.
3144 * This will cause suboptimal code, and anyone with a better solution is
3145 * welcome to improve this. */
3146 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3147 "lock; cmpxchg8b %3\n\t"
3148 "xchgl %%ebx, %1\n\t"
3149 : "=A" (u64Ret)
3150 : "DS" ((uint32_t)u64New),
3151 "c" ((uint32_t)(u64New >> 32)),
3152 "m" (*pu64),
3153 "0" (u64Old)
3154 : "memory" );
3155# else /* !PIC */
3156 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3157 : "=A" (u64Ret),
3158 "=m" (*pu64)
3159 : "b" ((uint32_t)u64New),
3160 "c" ((uint32_t)(u64New >> 32)),
3161 "m" (*pu64),
3162 "0" (u64Old));
3163# endif
3164 *pu64Old = u64Ret;
3165 return u64Ret == u64Old;
3166# else
3167 uint32_t u32Ret;
3168 __asm
3169 {
3170 mov ebx, dword ptr [u64New]
3171 mov ecx, dword ptr [u64New + 4]
3172 mov edi, [pu64]
3173 mov eax, dword ptr [u64Old]
3174 mov edx, dword ptr [u64Old + 4]
3175 lock cmpxchg8b [edi]
3176 mov ebx, [pu64Old]
3177 mov [ebx], eax
3178 setz al
3179 movzx eax, al
3180 add ebx, 4
3181 mov [ebx], edx
3182 mov dword ptr [u32Ret], eax
3183 }
3184 return !!u32Ret;
3185# endif
3186# endif /* !RT_ARCH_AMD64 */
3187}
3188#endif
3189
3190
3191/**
3192 * Atomically Compare and exchange a signed 64-bit value, additionally
3193 * passing back old value, ordered.
3194 *
3195 * @returns true if xchg was done.
3196 * @returns false if xchg wasn't done.
3197 *
3198 * @param pi64 Pointer to the 64-bit variable to update.
3199 * @param i64 The 64-bit value to assign to *pu64.
3200 * @param i64Old The value to compare with.
3201 * @param pi64Old Pointer store the old value at.
3202 */
3203DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3204{
3205 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3206}
3207
3208/** @def ASMAtomicCmpXchgExHandle
3209 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3210 *
3211 * @param ph Pointer to the value to update.
3212 * @param hNew The new value to assigned to *pu.
3213 * @param hOld The old value to *pu compare with.
3214 * @param fRc Where to store the result.
3215 * @param phOldVal Pointer to where to store the old value.
3216 *
3217 * @remarks This doesn't currently work for all handles (like RTFILE).
3218 */
3219#if ARCH_BITS == 32
3220# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3221 do { \
3222 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3223 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3224 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3225 } while (0)
3226#elif ARCH_BITS == 64
3227# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3228 do { \
3229 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3230 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3231 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3232 } while (0)
3233#endif
3234
3235
3236/** @def ASMAtomicCmpXchgExSize
3237 * Atomically Compare and Exchange a value which size might differ
3238 * between platforms or compilers. Additionally passes back old value.
3239 *
3240 * @param pu Pointer to the value to update.
3241 * @param uNew The new value to assigned to *pu.
3242 * @param uOld The old value to *pu compare with.
3243 * @param fRc Where to store the result.
3244 * @param puOldVal Pointer to where to store the old value.
3245 */
3246#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3247 do { \
3248 switch (sizeof(*(pu))) { \
3249 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3250 break; \
3251 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3252 break; \
3253 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3254 (fRc) = false; \
3255 (uOldVal) = 0; \
3256 break; \
3257 } \
3258 } while (0)
3259
3260
3261/**
3262 * Atomically Compare and Exchange a pointer value, additionally
3263 * passing back old value, ordered.
3264 *
3265 * @returns true if xchg was done.
3266 * @returns false if xchg wasn't done.
3267 *
3268 * @param ppv Pointer to the value to update.
3269 * @param pvNew The new value to assigned to *ppv.
3270 * @param pvOld The old value to *ppv compare with.
3271 * @param ppvOld Pointer store the old value at.
3272 */
3273DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3274{
3275#if ARCH_BITS == 32
3276 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3277#elif ARCH_BITS == 64
3278 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3279#else
3280# error "ARCH_BITS is bogus"
3281#endif
3282}
3283
3284
3285/**
3286 * Atomically exchanges and adds to a 32-bit value, ordered.
3287 *
3288 * @returns The old value.
3289 * @param pu32 Pointer to the value.
3290 * @param u32 Number to add.
3291 */
3292#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3293DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3294#else
3295DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3296{
3297# if RT_INLINE_ASM_USES_INTRIN
3298 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3299 return u32;
3300
3301# elif RT_INLINE_ASM_GNU_STYLE
3302 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3303 : "=r" (u32),
3304 "=m" (*pu32)
3305 : "0" (u32),
3306 "m" (*pu32)
3307 : "memory");
3308 return u32;
3309# else
3310 __asm
3311 {
3312 mov eax, [u32]
3313# ifdef RT_ARCH_AMD64
3314 mov rdx, [pu32]
3315 lock xadd [rdx], eax
3316# else
3317 mov edx, [pu32]
3318 lock xadd [edx], eax
3319# endif
3320 mov [u32], eax
3321 }
3322 return u32;
3323# endif
3324}
3325#endif
3326
3327
3328/**
3329 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3330 *
3331 * @returns The old value.
3332 * @param pi32 Pointer to the value.
3333 * @param i32 Number to add.
3334 */
3335DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3336{
3337 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3338}
3339
3340
3341/**
3342 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3343 *
3344 * @returns The old value.
3345 * @param pu32 Pointer to the value.
3346 * @param u32 Number to subtract.
3347 */
3348DECLINLINE(uint32_t) ASMAtomicSubU32(int32_t volatile *pi32, uint32_t u32)
3349{
3350 return ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-(int32_t)u32);
3351}
3352
3353
3354/**
3355 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3356 *
3357 * @returns The old value.
3358 * @param pi32 Pointer to the value.
3359 * @param i32 Number to subtract.
3360 */
3361DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
3362{
3363 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
3364}
3365
3366
3367/**
3368 * Atomically increment a 32-bit value, ordered.
3369 *
3370 * @returns The new value.
3371 * @param pu32 Pointer to the value to increment.
3372 */
3373#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3374DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3375#else
3376DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3377{
3378 uint32_t u32;
3379# if RT_INLINE_ASM_USES_INTRIN
3380 u32 = _InterlockedIncrement((long *)pu32);
3381 return u32;
3382
3383# elif RT_INLINE_ASM_GNU_STYLE
3384 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3385 : "=r" (u32),
3386 "=m" (*pu32)
3387 : "0" (1),
3388 "m" (*pu32)
3389 : "memory");
3390 return u32+1;
3391# else
3392 __asm
3393 {
3394 mov eax, 1
3395# ifdef RT_ARCH_AMD64
3396 mov rdx, [pu32]
3397 lock xadd [rdx], eax
3398# else
3399 mov edx, [pu32]
3400 lock xadd [edx], eax
3401# endif
3402 mov u32, eax
3403 }
3404 return u32+1;
3405# endif
3406}
3407#endif
3408
3409
3410/**
3411 * Atomically increment a signed 32-bit value, ordered.
3412 *
3413 * @returns The new value.
3414 * @param pi32 Pointer to the value to increment.
3415 */
3416DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3417{
3418 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3419}
3420
3421
3422/**
3423 * Atomically decrement an unsigned 32-bit value, ordered.
3424 *
3425 * @returns The new value.
3426 * @param pu32 Pointer to the value to decrement.
3427 */
3428#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3429DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3430#else
3431DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3432{
3433 uint32_t u32;
3434# if RT_INLINE_ASM_USES_INTRIN
3435 u32 = _InterlockedDecrement((long *)pu32);
3436 return u32;
3437
3438# elif RT_INLINE_ASM_GNU_STYLE
3439 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3440 : "=r" (u32),
3441 "=m" (*pu32)
3442 : "0" (-1),
3443 "m" (*pu32)
3444 : "memory");
3445 return u32-1;
3446# else
3447 __asm
3448 {
3449 mov eax, -1
3450# ifdef RT_ARCH_AMD64
3451 mov rdx, [pu32]
3452 lock xadd [rdx], eax
3453# else
3454 mov edx, [pu32]
3455 lock xadd [edx], eax
3456# endif
3457 mov u32, eax
3458 }
3459 return u32-1;
3460# endif
3461}
3462#endif
3463
3464
3465/**
3466 * Atomically decrement a signed 32-bit value, ordered.
3467 *
3468 * @returns The new value.
3469 * @param pi32 Pointer to the value to decrement.
3470 */
3471DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3472{
3473 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3474}
3475
3476
3477/**
3478 * Atomically Or an unsigned 32-bit value, ordered.
3479 *
3480 * @param pu32 Pointer to the pointer variable to OR u32 with.
3481 * @param u32 The value to OR *pu32 with.
3482 */
3483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3484DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3485#else
3486DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3487{
3488# if RT_INLINE_ASM_USES_INTRIN
3489 _InterlockedOr((long volatile *)pu32, (long)u32);
3490
3491# elif RT_INLINE_ASM_GNU_STYLE
3492 __asm__ __volatile__("lock; orl %1, %0\n\t"
3493 : "=m" (*pu32)
3494 : "ir" (u32),
3495 "m" (*pu32));
3496# else
3497 __asm
3498 {
3499 mov eax, [u32]
3500# ifdef RT_ARCH_AMD64
3501 mov rdx, [pu32]
3502 lock or [rdx], eax
3503# else
3504 mov edx, [pu32]
3505 lock or [edx], eax
3506# endif
3507 }
3508# endif
3509}
3510#endif
3511
3512
3513/**
3514 * Atomically Or a signed 32-bit value, ordered.
3515 *
3516 * @param pi32 Pointer to the pointer variable to OR u32 with.
3517 * @param i32 The value to OR *pu32 with.
3518 */
3519DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3520{
3521 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3522}
3523
3524
3525/**
3526 * Atomically And an unsigned 32-bit value, ordered.
3527 *
3528 * @param pu32 Pointer to the pointer variable to AND u32 with.
3529 * @param u32 The value to AND *pu32 with.
3530 */
3531#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3532DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3533#else
3534DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3535{
3536# if RT_INLINE_ASM_USES_INTRIN
3537 _InterlockedAnd((long volatile *)pu32, u32);
3538
3539# elif RT_INLINE_ASM_GNU_STYLE
3540 __asm__ __volatile__("lock; andl %1, %0\n\t"
3541 : "=m" (*pu32)
3542 : "ir" (u32),
3543 "m" (*pu32));
3544# else
3545 __asm
3546 {
3547 mov eax, [u32]
3548# ifdef RT_ARCH_AMD64
3549 mov rdx, [pu32]
3550 lock and [rdx], eax
3551# else
3552 mov edx, [pu32]
3553 lock and [edx], eax
3554# endif
3555 }
3556# endif
3557}
3558#endif
3559
3560
3561/**
3562 * Atomically And a signed 32-bit value, ordered.
3563 *
3564 * @param pi32 Pointer to the pointer variable to AND i32 with.
3565 * @param i32 The value to AND *pi32 with.
3566 */
3567DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3568{
3569 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3570}
3571
3572
3573/**
3574 * Memory fence, waits for any pending writes and reads to complete.
3575 */
3576DECLINLINE(void) ASMMemoryFence(void)
3577{
3578 /** @todo use mfence? check if all cpus we care for support it. */
3579 uint32_t volatile u32;
3580 ASMAtomicXchgU32(&u32, 0);
3581}
3582
3583
3584/**
3585 * Write fence, waits for any pending writes to complete.
3586 */
3587DECLINLINE(void) ASMWriteFence(void)
3588{
3589 /** @todo use sfence? check if all cpus we care for support it. */
3590 ASMMemoryFence();
3591}
3592
3593
3594/**
3595 * Read fence, waits for any pending reads to complete.
3596 */
3597DECLINLINE(void) ASMReadFence(void)
3598{
3599 /** @todo use lfence? check if all cpus we care for support it. */
3600 ASMMemoryFence();
3601}
3602
3603
3604/**
3605 * Atomically reads an unsigned 8-bit value, ordered.
3606 *
3607 * @returns Current *pu8 value
3608 * @param pu8 Pointer to the 8-bit variable to read.
3609 */
3610DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3611{
3612 ASMMemoryFence();
3613 return *pu8; /* byte reads are atomic on x86 */
3614}
3615
3616
3617/**
3618 * Atomically reads an unsigned 8-bit value, unordered.
3619 *
3620 * @returns Current *pu8 value
3621 * @param pu8 Pointer to the 8-bit variable to read.
3622 */
3623DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3624{
3625 return *pu8; /* byte reads are atomic on x86 */
3626}
3627
3628
3629/**
3630 * Atomically reads a signed 8-bit value, ordered.
3631 *
3632 * @returns Current *pi8 value
3633 * @param pi8 Pointer to the 8-bit variable to read.
3634 */
3635DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3636{
3637 ASMMemoryFence();
3638 return *pi8; /* byte reads are atomic on x86 */
3639}
3640
3641
3642/**
3643 * Atomically reads a signed 8-bit value, unordered.
3644 *
3645 * @returns Current *pi8 value
3646 * @param pi8 Pointer to the 8-bit variable to read.
3647 */
3648DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3649{
3650 return *pi8; /* byte reads are atomic on x86 */
3651}
3652
3653
3654/**
3655 * Atomically reads an unsigned 16-bit value, ordered.
3656 *
3657 * @returns Current *pu16 value
3658 * @param pu16 Pointer to the 16-bit variable to read.
3659 */
3660DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3661{
3662 ASMMemoryFence();
3663 Assert(!((uintptr_t)pu16 & 1));
3664 return *pu16;
3665}
3666
3667
3668/**
3669 * Atomically reads an unsigned 16-bit value, unordered.
3670 *
3671 * @returns Current *pu16 value
3672 * @param pu16 Pointer to the 16-bit variable to read.
3673 */
3674DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3675{
3676 Assert(!((uintptr_t)pu16 & 1));
3677 return *pu16;
3678}
3679
3680
3681/**
3682 * Atomically reads a signed 16-bit value, ordered.
3683 *
3684 * @returns Current *pi16 value
3685 * @param pi16 Pointer to the 16-bit variable to read.
3686 */
3687DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3688{
3689 ASMMemoryFence();
3690 Assert(!((uintptr_t)pi16 & 1));
3691 return *pi16;
3692}
3693
3694
3695/**
3696 * Atomically reads a signed 16-bit value, unordered.
3697 *
3698 * @returns Current *pi16 value
3699 * @param pi16 Pointer to the 16-bit variable to read.
3700 */
3701DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3702{
3703 Assert(!((uintptr_t)pi16 & 1));
3704 return *pi16;
3705}
3706
3707
3708/**
3709 * Atomically reads an unsigned 32-bit value, ordered.
3710 *
3711 * @returns Current *pu32 value
3712 * @param pu32 Pointer to the 32-bit variable to read.
3713 */
3714DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3715{
3716 ASMMemoryFence();
3717 Assert(!((uintptr_t)pu32 & 3));
3718 return *pu32;
3719}
3720
3721
3722/**
3723 * Atomically reads an unsigned 32-bit value, unordered.
3724 *
3725 * @returns Current *pu32 value
3726 * @param pu32 Pointer to the 32-bit variable to read.
3727 */
3728DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3729{
3730 Assert(!((uintptr_t)pu32 & 3));
3731 return *pu32;
3732}
3733
3734
3735/**
3736 * Atomically reads a signed 32-bit value, ordered.
3737 *
3738 * @returns Current *pi32 value
3739 * @param pi32 Pointer to the 32-bit variable to read.
3740 */
3741DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3742{
3743 ASMMemoryFence();
3744 Assert(!((uintptr_t)pi32 & 3));
3745 return *pi32;
3746}
3747
3748
3749/**
3750 * Atomically reads a signed 32-bit value, unordered.
3751 *
3752 * @returns Current *pi32 value
3753 * @param pi32 Pointer to the 32-bit variable to read.
3754 */
3755DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3756{
3757 Assert(!((uintptr_t)pi32 & 3));
3758 return *pi32;
3759}
3760
3761
3762/**
3763 * Atomically reads an unsigned 64-bit value, ordered.
3764 *
3765 * @returns Current *pu64 value
3766 * @param pu64 Pointer to the 64-bit variable to read.
3767 * The memory pointed to must be writable.
3768 * @remark This will fault if the memory is read-only!
3769 */
3770#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3771DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3772#else
3773DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3774{
3775 uint64_t u64;
3776# ifdef RT_ARCH_AMD64
3777 Assert(!((uintptr_t)pu64 & 7));
3778/*# if RT_INLINE_ASM_GNU_STYLE
3779 __asm__ __volatile__( "mfence\n\t"
3780 "movq %1, %0\n\t"
3781 : "=r" (u64)
3782 : "m" (*pu64));
3783# else
3784 __asm
3785 {
3786 mfence
3787 mov rdx, [pu64]
3788 mov rax, [rdx]
3789 mov [u64], rax
3790 }
3791# endif*/
3792 ASMMemoryFence();
3793 u64 = *pu64;
3794# else /* !RT_ARCH_AMD64 */
3795# if RT_INLINE_ASM_GNU_STYLE
3796# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3797 uint32_t u32EBX = 0;
3798 Assert(!((uintptr_t)pu64 & 7));
3799 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3800 "lock; cmpxchg8b (%5)\n\t"
3801 "movl %3, %%ebx\n\t"
3802 : "=A" (u64),
3803# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3804 "+m" (*pu64)
3805# else
3806 "=m" (*pu64)
3807# endif
3808 : "0" (0),
3809 "m" (u32EBX),
3810 "c" (0),
3811 "S" (pu64));
3812# else /* !PIC */
3813 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3814 : "=A" (u64),
3815 "+m" (*pu64)
3816 : "0" (0),
3817 "b" (0),
3818 "c" (0));
3819# endif
3820# else
3821 Assert(!((uintptr_t)pu64 & 7));
3822 __asm
3823 {
3824 xor eax, eax
3825 xor edx, edx
3826 mov edi, pu64
3827 xor ecx, ecx
3828 xor ebx, ebx
3829 lock cmpxchg8b [edi]
3830 mov dword ptr [u64], eax
3831 mov dword ptr [u64 + 4], edx
3832 }
3833# endif
3834# endif /* !RT_ARCH_AMD64 */
3835 return u64;
3836}
3837#endif
3838
3839
3840/**
3841 * Atomically reads an unsigned 64-bit value, unordered.
3842 *
3843 * @returns Current *pu64 value
3844 * @param pu64 Pointer to the 64-bit variable to read.
3845 * The memory pointed to must be writable.
3846 * @remark This will fault if the memory is read-only!
3847 */
3848#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3849DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3850#else
3851DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3852{
3853 uint64_t u64;
3854# ifdef RT_ARCH_AMD64
3855 Assert(!((uintptr_t)pu64 & 7));
3856/*# if RT_INLINE_ASM_GNU_STYLE
3857 Assert(!((uintptr_t)pu64 & 7));
3858 __asm__ __volatile__("movq %1, %0\n\t"
3859 : "=r" (u64)
3860 : "m" (*pu64));
3861# else
3862 __asm
3863 {
3864 mov rdx, [pu64]
3865 mov rax, [rdx]
3866 mov [u64], rax
3867 }
3868# endif */
3869 u64 = *pu64;
3870# else /* !RT_ARCH_AMD64 */
3871# if RT_INLINE_ASM_GNU_STYLE
3872# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3873 uint32_t u32EBX = 0;
3874 uint32_t u32Spill;
3875 Assert(!((uintptr_t)pu64 & 7));
3876 __asm__ __volatile__("xor %%eax,%%eax\n\t"
3877 "xor %%ecx,%%ecx\n\t"
3878 "xor %%edx,%%edx\n\t"
3879 "xchgl %%ebx, %3\n\t"
3880 "lock; cmpxchg8b (%4)\n\t"
3881 "movl %3, %%ebx\n\t"
3882 : "=A" (u64),
3883# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3884 "+m" (*pu64),
3885# else
3886 "=m" (*pu64),
3887# endif
3888 "=c" (u32Spill)
3889 : "m" (u32EBX),
3890 "S" (pu64));
3891# else /* !PIC */
3892 __asm__ __volatile__("cmpxchg8b %1\n\t"
3893 : "=A" (u64),
3894 "+m" (*pu64)
3895 : "0" (0),
3896 "b" (0),
3897 "c" (0));
3898# endif
3899# else
3900 Assert(!((uintptr_t)pu64 & 7));
3901 __asm
3902 {
3903 xor eax, eax
3904 xor edx, edx
3905 mov edi, pu64
3906 xor ecx, ecx
3907 xor ebx, ebx
3908 lock cmpxchg8b [edi]
3909 mov dword ptr [u64], eax
3910 mov dword ptr [u64 + 4], edx
3911 }
3912# endif
3913# endif /* !RT_ARCH_AMD64 */
3914 return u64;
3915}
3916#endif
3917
3918
3919/**
3920 * Atomically reads a signed 64-bit value, ordered.
3921 *
3922 * @returns Current *pi64 value
3923 * @param pi64 Pointer to the 64-bit variable to read.
3924 * The memory pointed to must be writable.
3925 * @remark This will fault if the memory is read-only!
3926 */
3927DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3928{
3929 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3930}
3931
3932
3933/**
3934 * Atomically reads a signed 64-bit value, unordered.
3935 *
3936 * @returns Current *pi64 value
3937 * @param pi64 Pointer to the 64-bit variable to read.
3938 * The memory pointed to must be writable.
3939 * @remark This will fault if the memory is read-only!
3940 */
3941DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3942{
3943 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3944}
3945
3946
3947/**
3948 * Atomically reads a pointer value, ordered.
3949 *
3950 * @returns Current *pv value
3951 * @param ppv Pointer to the pointer variable to read.
3952 */
3953DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3954{
3955#if ARCH_BITS == 32
3956 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3957#elif ARCH_BITS == 64
3958 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3959#else
3960# error "ARCH_BITS is bogus"
3961#endif
3962}
3963
3964
3965/**
3966 * Atomically reads a pointer value, unordered.
3967 *
3968 * @returns Current *pv value
3969 * @param ppv Pointer to the pointer variable to read.
3970 */
3971DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3972{
3973#if ARCH_BITS == 32
3974 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3975#elif ARCH_BITS == 64
3976 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3977#else
3978# error "ARCH_BITS is bogus"
3979#endif
3980}
3981
3982
3983/**
3984 * Atomically reads a boolean value, ordered.
3985 *
3986 * @returns Current *pf value
3987 * @param pf Pointer to the boolean variable to read.
3988 */
3989DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3990{
3991 ASMMemoryFence();
3992 return *pf; /* byte reads are atomic on x86 */
3993}
3994
3995
3996/**
3997 * Atomically reads a boolean value, unordered.
3998 *
3999 * @returns Current *pf value
4000 * @param pf Pointer to the boolean variable to read.
4001 */
4002DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
4003{
4004 return *pf; /* byte reads are atomic on x86 */
4005}
4006
4007
4008/**
4009 * Atomically read a typical IPRT handle value, ordered.
4010 *
4011 * @param ph Pointer to the handle variable to read.
4012 * @param phRes Where to store the result.
4013 *
4014 * @remarks This doesn't currently work for all handles (like RTFILE).
4015 */
4016#define ASMAtomicReadHandle(ph, phRes) \
4017 do { \
4018 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
4019 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4020 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4021 } while (0)
4022
4023
4024/**
4025 * Atomically read a typical IPRT handle value, unordered.
4026 *
4027 * @param ph Pointer to the handle variable to read.
4028 * @param phRes Where to store the result.
4029 *
4030 * @remarks This doesn't currently work for all handles (like RTFILE).
4031 */
4032#define ASMAtomicUoReadHandle(ph, phRes) \
4033 do { \
4034 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
4035 AssertCompile(sizeof(*ph) == sizeof(void *)); \
4036 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
4037 } while (0)
4038
4039
4040/**
4041 * Atomically read a value which size might differ
4042 * between platforms or compilers, ordered.
4043 *
4044 * @param pu Pointer to the variable to update.
4045 * @param puRes Where to store the result.
4046 */
4047#define ASMAtomicReadSize(pu, puRes) \
4048 do { \
4049 switch (sizeof(*(pu))) { \
4050 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4051 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
4052 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
4053 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
4054 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4055 } \
4056 } while (0)
4057
4058
4059/**
4060 * Atomically read a value which size might differ
4061 * between platforms or compilers, unordered.
4062 *
4063 * @param pu Pointer to the variable to update.
4064 * @param puRes Where to store the result.
4065 */
4066#define ASMAtomicUoReadSize(pu, puRes) \
4067 do { \
4068 switch (sizeof(*(pu))) { \
4069 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
4070 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
4071 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
4072 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
4073 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
4074 } \
4075 } while (0)
4076
4077
4078/**
4079 * Atomically writes an unsigned 8-bit value, ordered.
4080 *
4081 * @param pu8 Pointer to the 8-bit variable.
4082 * @param u8 The 8-bit value to assign to *pu8.
4083 */
4084DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4085{
4086 ASMAtomicXchgU8(pu8, u8);
4087}
4088
4089
4090/**
4091 * Atomically writes an unsigned 8-bit value, unordered.
4092 *
4093 * @param pu8 Pointer to the 8-bit variable.
4094 * @param u8 The 8-bit value to assign to *pu8.
4095 */
4096DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4097{
4098 *pu8 = u8; /* byte writes are atomic on x86 */
4099}
4100
4101
4102/**
4103 * Atomically writes a signed 8-bit value, ordered.
4104 *
4105 * @param pi8 Pointer to the 8-bit variable to read.
4106 * @param i8 The 8-bit value to assign to *pi8.
4107 */
4108DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4109{
4110 ASMAtomicXchgS8(pi8, i8);
4111}
4112
4113
4114/**
4115 * Atomically writes a signed 8-bit value, unordered.
4116 *
4117 * @param pi8 Pointer to the 8-bit variable to read.
4118 * @param i8 The 8-bit value to assign to *pi8.
4119 */
4120DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4121{
4122 *pi8 = i8; /* byte writes are atomic on x86 */
4123}
4124
4125
4126/**
4127 * Atomically writes an unsigned 16-bit value, ordered.
4128 *
4129 * @param pu16 Pointer to the 16-bit variable.
4130 * @param u16 The 16-bit value to assign to *pu16.
4131 */
4132DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4133{
4134 ASMAtomicXchgU16(pu16, u16);
4135}
4136
4137
4138/**
4139 * Atomically writes an unsigned 16-bit value, unordered.
4140 *
4141 * @param pu16 Pointer to the 16-bit variable.
4142 * @param u16 The 16-bit value to assign to *pu16.
4143 */
4144DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4145{
4146 Assert(!((uintptr_t)pu16 & 1));
4147 *pu16 = u16;
4148}
4149
4150
4151/**
4152 * Atomically writes a signed 16-bit value, ordered.
4153 *
4154 * @param pi16 Pointer to the 16-bit variable to read.
4155 * @param i16 The 16-bit value to assign to *pi16.
4156 */
4157DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4158{
4159 ASMAtomicXchgS16(pi16, i16);
4160}
4161
4162
4163/**
4164 * Atomically writes a signed 16-bit value, unordered.
4165 *
4166 * @param pi16 Pointer to the 16-bit variable to read.
4167 * @param i16 The 16-bit value to assign to *pi16.
4168 */
4169DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4170{
4171 Assert(!((uintptr_t)pi16 & 1));
4172 *pi16 = i16;
4173}
4174
4175
4176/**
4177 * Atomically writes an unsigned 32-bit value, ordered.
4178 *
4179 * @param pu32 Pointer to the 32-bit variable.
4180 * @param u32 The 32-bit value to assign to *pu32.
4181 */
4182DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4183{
4184 ASMAtomicXchgU32(pu32, u32);
4185}
4186
4187
4188/**
4189 * Atomically writes an unsigned 32-bit value, unordered.
4190 *
4191 * @param pu32 Pointer to the 32-bit variable.
4192 * @param u32 The 32-bit value to assign to *pu32.
4193 */
4194DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4195{
4196 Assert(!((uintptr_t)pu32 & 3));
4197 *pu32 = u32;
4198}
4199
4200
4201/**
4202 * Atomically writes a signed 32-bit value, ordered.
4203 *
4204 * @param pi32 Pointer to the 32-bit variable to read.
4205 * @param i32 The 32-bit value to assign to *pi32.
4206 */
4207DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4208{
4209 ASMAtomicXchgS32(pi32, i32);
4210}
4211
4212
4213/**
4214 * Atomically writes a signed 32-bit value, unordered.
4215 *
4216 * @param pi32 Pointer to the 32-bit variable to read.
4217 * @param i32 The 32-bit value to assign to *pi32.
4218 */
4219DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4220{
4221 Assert(!((uintptr_t)pi32 & 3));
4222 *pi32 = i32;
4223}
4224
4225
4226/**
4227 * Atomically writes an unsigned 64-bit value, ordered.
4228 *
4229 * @param pu64 Pointer to the 64-bit variable.
4230 * @param u64 The 64-bit value to assign to *pu64.
4231 */
4232DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4233{
4234 ASMAtomicXchgU64(pu64, u64);
4235}
4236
4237
4238/**
4239 * Atomically writes an unsigned 64-bit value, unordered.
4240 *
4241 * @param pu64 Pointer to the 64-bit variable.
4242 * @param u64 The 64-bit value to assign to *pu64.
4243 */
4244DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4245{
4246 Assert(!((uintptr_t)pu64 & 7));
4247#if ARCH_BITS == 64
4248 *pu64 = u64;
4249#else
4250 ASMAtomicXchgU64(pu64, u64);
4251#endif
4252}
4253
4254
4255/**
4256 * Atomically writes a signed 64-bit value, ordered.
4257 *
4258 * @param pi64 Pointer to the 64-bit variable.
4259 * @param i64 The 64-bit value to assign to *pi64.
4260 */
4261DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4262{
4263 ASMAtomicXchgS64(pi64, i64);
4264}
4265
4266
4267/**
4268 * Atomically writes a signed 64-bit value, unordered.
4269 *
4270 * @param pi64 Pointer to the 64-bit variable.
4271 * @param i64 The 64-bit value to assign to *pi64.
4272 */
4273DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4274{
4275 Assert(!((uintptr_t)pi64 & 7));
4276#if ARCH_BITS == 64
4277 *pi64 = i64;
4278#else
4279 ASMAtomicXchgS64(pi64, i64);
4280#endif
4281}
4282
4283
4284/**
4285 * Atomically writes a boolean value, unordered.
4286 *
4287 * @param pf Pointer to the boolean variable.
4288 * @param f The boolean value to assign to *pf.
4289 */
4290DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4291{
4292 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4293}
4294
4295
4296/**
4297 * Atomically writes a boolean value, unordered.
4298 *
4299 * @param pf Pointer to the boolean variable.
4300 * @param f The boolean value to assign to *pf.
4301 */
4302DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4303{
4304 *pf = f; /* byte writes are atomic on x86 */
4305}
4306
4307
4308/**
4309 * Atomically writes a pointer value, ordered.
4310 *
4311 * @returns Current *pv value
4312 * @param ppv Pointer to the pointer variable.
4313 * @param pv The pointer value to assigne to *ppv.
4314 */
4315DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4316{
4317#if ARCH_BITS == 32
4318 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4319#elif ARCH_BITS == 64
4320 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4321#else
4322# error "ARCH_BITS is bogus"
4323#endif
4324}
4325
4326
4327/**
4328 * Atomically writes a pointer value, unordered.
4329 *
4330 * @returns Current *pv value
4331 * @param ppv Pointer to the pointer variable.
4332 * @param pv The pointer value to assigne to *ppv.
4333 */
4334DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4335{
4336#if ARCH_BITS == 32
4337 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4338#elif ARCH_BITS == 64
4339 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4340#else
4341# error "ARCH_BITS is bogus"
4342#endif
4343}
4344
4345
4346/**
4347 * Atomically write a typical IPRT handle value, ordered.
4348 *
4349 * @param ph Pointer to the variable to update.
4350 * @param hNew The value to assign to *ph.
4351 *
4352 * @remarks This doesn't currently work for all handles (like RTFILE).
4353 */
4354#define ASMAtomicWriteHandle(ph, hNew) \
4355 do { \
4356 ASMAtomicWritePtr((void * volatile *)(ph), (const void *)hNew); \
4357 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4358 } while (0)
4359
4360
4361/**
4362 * Atomically write a typical IPRT handle value, unordered.
4363 *
4364 * @param ph Pointer to the variable to update.
4365 * @param hNew The value to assign to *ph.
4366 *
4367 * @remarks This doesn't currently work for all handles (like RTFILE).
4368 */
4369#define ASMAtomicUoWriteHandle(ph, hNew) \
4370 do { \
4371 ASMAtomicUoWritePtr((void * volatile *)(ph), (const void *)hNew); \
4372 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4373 } while (0)
4374
4375
4376/**
4377 * Atomically write a value which size might differ
4378 * between platforms or compilers, ordered.
4379 *
4380 * @param pu Pointer to the variable to update.
4381 * @param uNew The value to assign to *pu.
4382 */
4383#define ASMAtomicWriteSize(pu, uNew) \
4384 do { \
4385 switch (sizeof(*(pu))) { \
4386 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4387 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4388 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4389 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4390 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4391 } \
4392 } while (0)
4393
4394/**
4395 * Atomically write a value which size might differ
4396 * between platforms or compilers, unordered.
4397 *
4398 * @param pu Pointer to the variable to update.
4399 * @param uNew The value to assign to *pu.
4400 */
4401#define ASMAtomicUoWriteSize(pu, uNew) \
4402 do { \
4403 switch (sizeof(*(pu))) { \
4404 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4405 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4406 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4407 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4408 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4409 } \
4410 } while (0)
4411
4412
4413
4414
4415/**
4416 * Invalidate page.
4417 *
4418 * @param pv Address of the page to invalidate.
4419 */
4420#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4421DECLASM(void) ASMInvalidatePage(void *pv);
4422#else
4423DECLINLINE(void) ASMInvalidatePage(void *pv)
4424{
4425# if RT_INLINE_ASM_USES_INTRIN
4426 __invlpg(pv);
4427
4428# elif RT_INLINE_ASM_GNU_STYLE
4429 __asm__ __volatile__("invlpg %0\n\t"
4430 : : "m" (*(uint8_t *)pv));
4431# else
4432 __asm
4433 {
4434# ifdef RT_ARCH_AMD64
4435 mov rax, [pv]
4436 invlpg [rax]
4437# else
4438 mov eax, [pv]
4439 invlpg [eax]
4440# endif
4441 }
4442# endif
4443}
4444#endif
4445
4446
4447#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4448# if PAGE_SIZE != 0x1000
4449# error "PAGE_SIZE is not 0x1000!"
4450# endif
4451#endif
4452
4453/**
4454 * Zeros a 4K memory page.
4455 *
4456 * @param pv Pointer to the memory block. This must be page aligned.
4457 */
4458#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4459DECLASM(void) ASMMemZeroPage(volatile void *pv);
4460# else
4461DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4462{
4463# if RT_INLINE_ASM_USES_INTRIN
4464# ifdef RT_ARCH_AMD64
4465 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4466# else
4467 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4468# endif
4469
4470# elif RT_INLINE_ASM_GNU_STYLE
4471 RTCCUINTREG uDummy;
4472# ifdef RT_ARCH_AMD64
4473 __asm__ __volatile__ ("rep stosq"
4474 : "=D" (pv),
4475 "=c" (uDummy)
4476 : "0" (pv),
4477 "c" (0x1000 >> 3),
4478 "a" (0)
4479 : "memory");
4480# else
4481 __asm__ __volatile__ ("rep stosl"
4482 : "=D" (pv),
4483 "=c" (uDummy)
4484 : "0" (pv),
4485 "c" (0x1000 >> 2),
4486 "a" (0)
4487 : "memory");
4488# endif
4489# else
4490 __asm
4491 {
4492# ifdef RT_ARCH_AMD64
4493 xor rax, rax
4494 mov ecx, 0200h
4495 mov rdi, [pv]
4496 rep stosq
4497# else
4498 xor eax, eax
4499 mov ecx, 0400h
4500 mov edi, [pv]
4501 rep stosd
4502# endif
4503 }
4504# endif
4505}
4506# endif
4507
4508
4509/**
4510 * Zeros a memory block with a 32-bit aligned size.
4511 *
4512 * @param pv Pointer to the memory block.
4513 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4514 */
4515#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4516DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4517#else
4518DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4519{
4520# if RT_INLINE_ASM_USES_INTRIN
4521# ifdef RT_ARCH_AMD64
4522 if (!(cb & 7))
4523 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4524 else
4525# endif
4526 __stosd((unsigned long *)pv, 0, cb / 4);
4527
4528# elif RT_INLINE_ASM_GNU_STYLE
4529 __asm__ __volatile__ ("rep stosl"
4530 : "=D" (pv),
4531 "=c" (cb)
4532 : "0" (pv),
4533 "1" (cb >> 2),
4534 "a" (0)
4535 : "memory");
4536# else
4537 __asm
4538 {
4539 xor eax, eax
4540# ifdef RT_ARCH_AMD64
4541 mov rcx, [cb]
4542 shr rcx, 2
4543 mov rdi, [pv]
4544# else
4545 mov ecx, [cb]
4546 shr ecx, 2
4547 mov edi, [pv]
4548# endif
4549 rep stosd
4550 }
4551# endif
4552}
4553#endif
4554
4555
4556/**
4557 * Fills a memory block with a 32-bit aligned size.
4558 *
4559 * @param pv Pointer to the memory block.
4560 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4561 * @param u32 The value to fill with.
4562 */
4563#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4564DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4565#else
4566DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4567{
4568# if RT_INLINE_ASM_USES_INTRIN
4569# ifdef RT_ARCH_AMD64
4570 if (!(cb & 7))
4571 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4572 else
4573# endif
4574 __stosd((unsigned long *)pv, u32, cb / 4);
4575
4576# elif RT_INLINE_ASM_GNU_STYLE
4577 __asm__ __volatile__ ("rep stosl"
4578 : "=D" (pv),
4579 "=c" (cb)
4580 : "0" (pv),
4581 "1" (cb >> 2),
4582 "a" (u32)
4583 : "memory");
4584# else
4585 __asm
4586 {
4587# ifdef RT_ARCH_AMD64
4588 mov rcx, [cb]
4589 shr rcx, 2
4590 mov rdi, [pv]
4591# else
4592 mov ecx, [cb]
4593 shr ecx, 2
4594 mov edi, [pv]
4595# endif
4596 mov eax, [u32]
4597 rep stosd
4598 }
4599# endif
4600}
4601#endif
4602
4603
4604/**
4605 * Checks if a memory block is filled with the specified byte.
4606 *
4607 * This is a sort of inverted memchr.
4608 *
4609 * @returns Pointer to the byte which doesn't equal u8.
4610 * @returns NULL if all equal to u8.
4611 *
4612 * @param pv Pointer to the memory block.
4613 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4614 * @param u8 The value it's supposed to be filled with.
4615 */
4616#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4617DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4618#else
4619DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4620{
4621/** @todo rewrite this in inline assembly? */
4622 uint8_t const *pb = (uint8_t const *)pv;
4623 for (; cb; cb--, pb++)
4624 if (RT_UNLIKELY(*pb != u8))
4625 return (void *)pb;
4626 return NULL;
4627}
4628#endif
4629
4630
4631/**
4632 * Checks if a memory block is filled with the specified 32-bit value.
4633 *
4634 * This is a sort of inverted memchr.
4635 *
4636 * @returns Pointer to the first value which doesn't equal u32.
4637 * @returns NULL if all equal to u32.
4638 *
4639 * @param pv Pointer to the memory block.
4640 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4641 * @param u32 The value it's supposed to be filled with.
4642 */
4643#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4644DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4645#else
4646DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4647{
4648/** @todo rewrite this in inline assembly? */
4649 uint32_t const *pu32 = (uint32_t const *)pv;
4650 for (; cb; cb -= 4, pu32++)
4651 if (RT_UNLIKELY(*pu32 != u32))
4652 return (uint32_t *)pu32;
4653 return NULL;
4654}
4655#endif
4656
4657
4658/**
4659 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4660 *
4661 * @returns u32F1 * u32F2.
4662 */
4663#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4664DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4665#else
4666DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4667{
4668# ifdef RT_ARCH_AMD64
4669 return (uint64_t)u32F1 * u32F2;
4670# else /* !RT_ARCH_AMD64 */
4671 uint64_t u64;
4672# if RT_INLINE_ASM_GNU_STYLE
4673 __asm__ __volatile__("mull %%edx"
4674 : "=A" (u64)
4675 : "a" (u32F2), "d" (u32F1));
4676# else
4677 __asm
4678 {
4679 mov edx, [u32F1]
4680 mov eax, [u32F2]
4681 mul edx
4682 mov dword ptr [u64], eax
4683 mov dword ptr [u64 + 4], edx
4684 }
4685# endif
4686 return u64;
4687# endif /* !RT_ARCH_AMD64 */
4688}
4689#endif
4690
4691
4692/**
4693 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4694 *
4695 * @returns u32F1 * u32F2.
4696 */
4697#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4698DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4699#else
4700DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4701{
4702# ifdef RT_ARCH_AMD64
4703 return (int64_t)i32F1 * i32F2;
4704# else /* !RT_ARCH_AMD64 */
4705 int64_t i64;
4706# if RT_INLINE_ASM_GNU_STYLE
4707 __asm__ __volatile__("imull %%edx"
4708 : "=A" (i64)
4709 : "a" (i32F2), "d" (i32F1));
4710# else
4711 __asm
4712 {
4713 mov edx, [i32F1]
4714 mov eax, [i32F2]
4715 imul edx
4716 mov dword ptr [i64], eax
4717 mov dword ptr [i64 + 4], edx
4718 }
4719# endif
4720 return i64;
4721# endif /* !RT_ARCH_AMD64 */
4722}
4723#endif
4724
4725
4726/**
4727 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4728 *
4729 * @returns u64 / u32.
4730 */
4731#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4732DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4733#else
4734DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4735{
4736# ifdef RT_ARCH_AMD64
4737 return (uint32_t)(u64 / u32);
4738# else /* !RT_ARCH_AMD64 */
4739# if RT_INLINE_ASM_GNU_STYLE
4740 RTCCUINTREG uDummy;
4741 __asm__ __volatile__("divl %3"
4742 : "=a" (u32), "=d"(uDummy)
4743 : "A" (u64), "r" (u32));
4744# else
4745 __asm
4746 {
4747 mov eax, dword ptr [u64]
4748 mov edx, dword ptr [u64 + 4]
4749 mov ecx, [u32]
4750 div ecx
4751 mov [u32], eax
4752 }
4753# endif
4754 return u32;
4755# endif /* !RT_ARCH_AMD64 */
4756}
4757#endif
4758
4759
4760/**
4761 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4762 *
4763 * @returns u64 / u32.
4764 */
4765#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4766DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4767#else
4768DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4769{
4770# ifdef RT_ARCH_AMD64
4771 return (int32_t)(i64 / i32);
4772# else /* !RT_ARCH_AMD64 */
4773# if RT_INLINE_ASM_GNU_STYLE
4774 RTCCUINTREG iDummy;
4775 __asm__ __volatile__("idivl %3"
4776 : "=a" (i32), "=d"(iDummy)
4777 : "A" (i64), "r" (i32));
4778# else
4779 __asm
4780 {
4781 mov eax, dword ptr [i64]
4782 mov edx, dword ptr [i64 + 4]
4783 mov ecx, [i32]
4784 idiv ecx
4785 mov [i32], eax
4786 }
4787# endif
4788 return i32;
4789# endif /* !RT_ARCH_AMD64 */
4790}
4791#endif
4792
4793
4794/**
4795 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
4796 * returning the rest.
4797 *
4798 * @returns u64 % u32.
4799 *
4800 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4801 */
4802#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4803DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
4804#else
4805DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
4806{
4807# ifdef RT_ARCH_AMD64
4808 return (uint32_t)(u64 % u32);
4809# else /* !RT_ARCH_AMD64 */
4810# if RT_INLINE_ASM_GNU_STYLE
4811 RTCCUINTREG uDummy;
4812 __asm__ __volatile__("divl %3"
4813 : "=a" (uDummy), "=d"(u32)
4814 : "A" (u64), "r" (u32));
4815# else
4816 __asm
4817 {
4818 mov eax, dword ptr [u64]
4819 mov edx, dword ptr [u64 + 4]
4820 mov ecx, [u32]
4821 div ecx
4822 mov [u32], edx
4823 }
4824# endif
4825 return u32;
4826# endif /* !RT_ARCH_AMD64 */
4827}
4828#endif
4829
4830
4831/**
4832 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
4833 * returning the rest.
4834 *
4835 * @returns u64 % u32.
4836 *
4837 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4838 */
4839#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4840DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
4841#else
4842DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
4843{
4844# ifdef RT_ARCH_AMD64
4845 return (int32_t)(i64 % i32);
4846# else /* !RT_ARCH_AMD64 */
4847# if RT_INLINE_ASM_GNU_STYLE
4848 RTCCUINTREG iDummy;
4849 __asm__ __volatile__("idivl %3"
4850 : "=a" (iDummy), "=d"(i32)
4851 : "A" (i64), "r" (i32));
4852# else
4853 __asm
4854 {
4855 mov eax, dword ptr [i64]
4856 mov edx, dword ptr [i64 + 4]
4857 mov ecx, [i32]
4858 idiv ecx
4859 mov [i32], edx
4860 }
4861# endif
4862 return i32;
4863# endif /* !RT_ARCH_AMD64 */
4864}
4865#endif
4866
4867
4868/**
4869 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4870 * using a 96 bit intermediate result.
4871 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4872 * __udivdi3 and __umoddi3 even if this inline function is not used.
4873 *
4874 * @returns (u64A * u32B) / u32C.
4875 * @param u64A The 64-bit value.
4876 * @param u32B The 32-bit value to multiple by A.
4877 * @param u32C The 32-bit value to divide A*B by.
4878 */
4879#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4880DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4881#else
4882DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4883{
4884# if RT_INLINE_ASM_GNU_STYLE
4885# ifdef RT_ARCH_AMD64
4886 uint64_t u64Result, u64Spill;
4887 __asm__ __volatile__("mulq %2\n\t"
4888 "divq %3\n\t"
4889 : "=a" (u64Result),
4890 "=d" (u64Spill)
4891 : "r" ((uint64_t)u32B),
4892 "r" ((uint64_t)u32C),
4893 "0" (u64A),
4894 "1" (0));
4895 return u64Result;
4896# else
4897 uint32_t u32Dummy;
4898 uint64_t u64Result;
4899 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4900 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4901 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4902 eax = u64A.hi */
4903 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4904 edx = u32C */
4905 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4906 edx = u32B */
4907 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4908 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4909 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4910 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4911 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4912 edx = u64Hi % u32C */
4913 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4914 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4915 "divl %%ecx \n\t" /* u64Result.lo */
4916 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4917 : "=A"(u64Result), "=c"(u32Dummy),
4918 "=S"(u32Dummy), "=D"(u32Dummy)
4919 : "a"((uint32_t)u64A),
4920 "S"((uint32_t)(u64A >> 32)),
4921 "c"(u32B),
4922 "D"(u32C));
4923 return u64Result;
4924# endif
4925# else
4926 RTUINT64U u;
4927 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4928 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4929 u64Hi += (u64Lo >> 32);
4930 u.s.Hi = (uint32_t)(u64Hi / u32C);
4931 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4932 return u.u;
4933# endif
4934}
4935#endif
4936
4937
4938/**
4939 * Probes a byte pointer for read access.
4940 *
4941 * While the function will not fault if the byte is not read accessible,
4942 * the idea is to do this in a safe place like before acquiring locks
4943 * and such like.
4944 *
4945 * Also, this functions guarantees that an eager compiler is not going
4946 * to optimize the probing away.
4947 *
4948 * @param pvByte Pointer to the byte.
4949 */
4950#if RT_INLINE_ASM_EXTERNAL
4951DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4952#else
4953DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4954{
4955 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4956 uint8_t u8;
4957# if RT_INLINE_ASM_GNU_STYLE
4958 __asm__ __volatile__("movb (%1), %0\n\t"
4959 : "=r" (u8)
4960 : "r" (pvByte));
4961# else
4962 __asm
4963 {
4964# ifdef RT_ARCH_AMD64
4965 mov rax, [pvByte]
4966 mov al, [rax]
4967# else
4968 mov eax, [pvByte]
4969 mov al, [eax]
4970# endif
4971 mov [u8], al
4972 }
4973# endif
4974 return u8;
4975}
4976#endif
4977
4978/**
4979 * Probes a buffer for read access page by page.
4980 *
4981 * While the function will fault if the buffer is not fully read
4982 * accessible, the idea is to do this in a safe place like before
4983 * acquiring locks and such like.
4984 *
4985 * Also, this functions guarantees that an eager compiler is not going
4986 * to optimize the probing away.
4987 *
4988 * @param pvBuf Pointer to the buffer.
4989 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4990 */
4991DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4992{
4993 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4994 /* the first byte */
4995 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4996 ASMProbeReadByte(pu8);
4997
4998 /* the pages in between pages. */
4999 while (cbBuf > /*PAGE_SIZE*/0x1000)
5000 {
5001 ASMProbeReadByte(pu8);
5002 cbBuf -= /*PAGE_SIZE*/0x1000;
5003 pu8 += /*PAGE_SIZE*/0x1000;
5004 }
5005
5006 /* the last byte */
5007 ASMProbeReadByte(pu8 + cbBuf - 1);
5008}
5009
5010
5011/** @def ASMBreakpoint
5012 * Debugger Breakpoint.
5013 * @remark In the gnu world we add a nop instruction after the int3 to
5014 * force gdb to remain at the int3 source line.
5015 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
5016 * @internal
5017 */
5018#if RT_INLINE_ASM_GNU_STYLE
5019# ifndef __L4ENV__
5020# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
5021# else
5022# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
5023# endif
5024#else
5025# define ASMBreakpoint() __debugbreak()
5026#endif
5027
5028
5029
5030/** @defgroup grp_inline_bits Bit Operations
5031 * @{
5032 */
5033
5034
5035/**
5036 * Sets a bit in a bitmap.
5037 *
5038 * @param pvBitmap Pointer to the bitmap.
5039 * @param iBit The bit to set.
5040 */
5041#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5042DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
5043#else
5044DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
5045{
5046# if RT_INLINE_ASM_USES_INTRIN
5047 _bittestandset((long *)pvBitmap, iBit);
5048
5049# elif RT_INLINE_ASM_GNU_STYLE
5050 __asm__ __volatile__ ("btsl %1, %0"
5051 : "=m" (*(volatile long *)pvBitmap)
5052 : "Ir" (iBit),
5053 "m" (*(volatile long *)pvBitmap)
5054 : "memory");
5055# else
5056 __asm
5057 {
5058# ifdef RT_ARCH_AMD64
5059 mov rax, [pvBitmap]
5060 mov edx, [iBit]
5061 bts [rax], edx
5062# else
5063 mov eax, [pvBitmap]
5064 mov edx, [iBit]
5065 bts [eax], edx
5066# endif
5067 }
5068# endif
5069}
5070#endif
5071
5072
5073/**
5074 * Atomically sets a bit in a bitmap, ordered.
5075 *
5076 * @param pvBitmap Pointer to the bitmap.
5077 * @param iBit The bit to set.
5078 */
5079#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5080DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5081#else
5082DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5083{
5084# if RT_INLINE_ASM_USES_INTRIN
5085 _interlockedbittestandset((long *)pvBitmap, iBit);
5086# elif RT_INLINE_ASM_GNU_STYLE
5087 __asm__ __volatile__ ("lock; btsl %1, %0"
5088 : "=m" (*(volatile long *)pvBitmap)
5089 : "Ir" (iBit),
5090 "m" (*(volatile long *)pvBitmap)
5091 : "memory");
5092# else
5093 __asm
5094 {
5095# ifdef RT_ARCH_AMD64
5096 mov rax, [pvBitmap]
5097 mov edx, [iBit]
5098 lock bts [rax], edx
5099# else
5100 mov eax, [pvBitmap]
5101 mov edx, [iBit]
5102 lock bts [eax], edx
5103# endif
5104 }
5105# endif
5106}
5107#endif
5108
5109
5110/**
5111 * Clears a bit in a bitmap.
5112 *
5113 * @param pvBitmap Pointer to the bitmap.
5114 * @param iBit The bit to clear.
5115 */
5116#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5117DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5118#else
5119DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5120{
5121# if RT_INLINE_ASM_USES_INTRIN
5122 _bittestandreset((long *)pvBitmap, iBit);
5123
5124# elif RT_INLINE_ASM_GNU_STYLE
5125 __asm__ __volatile__ ("btrl %1, %0"
5126 : "=m" (*(volatile long *)pvBitmap)
5127 : "Ir" (iBit),
5128 "m" (*(volatile long *)pvBitmap)
5129 : "memory");
5130# else
5131 __asm
5132 {
5133# ifdef RT_ARCH_AMD64
5134 mov rax, [pvBitmap]
5135 mov edx, [iBit]
5136 btr [rax], edx
5137# else
5138 mov eax, [pvBitmap]
5139 mov edx, [iBit]
5140 btr [eax], edx
5141# endif
5142 }
5143# endif
5144}
5145#endif
5146
5147
5148/**
5149 * Atomically clears a bit in a bitmap, ordered.
5150 *
5151 * @param pvBitmap Pointer to the bitmap.
5152 * @param iBit The bit to toggle set.
5153 * @remark No memory barrier, take care on smp.
5154 */
5155#if RT_INLINE_ASM_EXTERNAL
5156DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5157#else
5158DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5159{
5160# if RT_INLINE_ASM_GNU_STYLE
5161 __asm__ __volatile__ ("lock; btrl %1, %0"
5162 : "=m" (*(volatile long *)pvBitmap)
5163 : "Ir" (iBit),
5164 "m" (*(volatile long *)pvBitmap)
5165 : "memory");
5166# else
5167 __asm
5168 {
5169# ifdef RT_ARCH_AMD64
5170 mov rax, [pvBitmap]
5171 mov edx, [iBit]
5172 lock btr [rax], edx
5173# else
5174 mov eax, [pvBitmap]
5175 mov edx, [iBit]
5176 lock btr [eax], edx
5177# endif
5178 }
5179# endif
5180}
5181#endif
5182
5183
5184/**
5185 * Toggles a bit in a bitmap.
5186 *
5187 * @param pvBitmap Pointer to the bitmap.
5188 * @param iBit The bit to toggle.
5189 */
5190#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5191DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5192#else
5193DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5194{
5195# if RT_INLINE_ASM_USES_INTRIN
5196 _bittestandcomplement((long *)pvBitmap, iBit);
5197# elif RT_INLINE_ASM_GNU_STYLE
5198 __asm__ __volatile__ ("btcl %1, %0"
5199 : "=m" (*(volatile long *)pvBitmap)
5200 : "Ir" (iBit),
5201 "m" (*(volatile long *)pvBitmap)
5202 : "memory");
5203# else
5204 __asm
5205 {
5206# ifdef RT_ARCH_AMD64
5207 mov rax, [pvBitmap]
5208 mov edx, [iBit]
5209 btc [rax], edx
5210# else
5211 mov eax, [pvBitmap]
5212 mov edx, [iBit]
5213 btc [eax], edx
5214# endif
5215 }
5216# endif
5217}
5218#endif
5219
5220
5221/**
5222 * Atomically toggles a bit in a bitmap, ordered.
5223 *
5224 * @param pvBitmap Pointer to the bitmap.
5225 * @param iBit The bit to test and set.
5226 */
5227#if RT_INLINE_ASM_EXTERNAL
5228DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5229#else
5230DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5231{
5232# if RT_INLINE_ASM_GNU_STYLE
5233 __asm__ __volatile__ ("lock; btcl %1, %0"
5234 : "=m" (*(volatile long *)pvBitmap)
5235 : "Ir" (iBit),
5236 "m" (*(volatile long *)pvBitmap)
5237 : "memory");
5238# else
5239 __asm
5240 {
5241# ifdef RT_ARCH_AMD64
5242 mov rax, [pvBitmap]
5243 mov edx, [iBit]
5244 lock btc [rax], edx
5245# else
5246 mov eax, [pvBitmap]
5247 mov edx, [iBit]
5248 lock btc [eax], edx
5249# endif
5250 }
5251# endif
5252}
5253#endif
5254
5255
5256/**
5257 * Tests and sets a bit in a bitmap.
5258 *
5259 * @returns true if the bit was set.
5260 * @returns false if the bit was clear.
5261 * @param pvBitmap Pointer to the bitmap.
5262 * @param iBit The bit to test and set.
5263 */
5264#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5265DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5266#else
5267DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5268{
5269 union { bool f; uint32_t u32; uint8_t u8; } rc;
5270# if RT_INLINE_ASM_USES_INTRIN
5271 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5272
5273# elif RT_INLINE_ASM_GNU_STYLE
5274 __asm__ __volatile__ ("btsl %2, %1\n\t"
5275 "setc %b0\n\t"
5276 "andl $1, %0\n\t"
5277 : "=q" (rc.u32),
5278 "=m" (*(volatile long *)pvBitmap)
5279 : "Ir" (iBit),
5280 "m" (*(volatile long *)pvBitmap)
5281 : "memory");
5282# else
5283 __asm
5284 {
5285 mov edx, [iBit]
5286# ifdef RT_ARCH_AMD64
5287 mov rax, [pvBitmap]
5288 bts [rax], edx
5289# else
5290 mov eax, [pvBitmap]
5291 bts [eax], edx
5292# endif
5293 setc al
5294 and eax, 1
5295 mov [rc.u32], eax
5296 }
5297# endif
5298 return rc.f;
5299}
5300#endif
5301
5302
5303/**
5304 * Atomically tests and sets a bit in a bitmap, ordered.
5305 *
5306 * @returns true if the bit was set.
5307 * @returns false if the bit was clear.
5308 * @param pvBitmap Pointer to the bitmap.
5309 * @param iBit The bit to set.
5310 */
5311#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5312DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5313#else
5314DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5315{
5316 union { bool f; uint32_t u32; uint8_t u8; } rc;
5317# if RT_INLINE_ASM_USES_INTRIN
5318 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5319# elif RT_INLINE_ASM_GNU_STYLE
5320 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
5321 "setc %b0\n\t"
5322 "andl $1, %0\n\t"
5323 : "=q" (rc.u32),
5324 "=m" (*(volatile long *)pvBitmap)
5325 : "Ir" (iBit),
5326 "m" (*(volatile long *)pvBitmap)
5327 : "memory");
5328# else
5329 __asm
5330 {
5331 mov edx, [iBit]
5332# ifdef RT_ARCH_AMD64
5333 mov rax, [pvBitmap]
5334 lock bts [rax], edx
5335# else
5336 mov eax, [pvBitmap]
5337 lock bts [eax], edx
5338# endif
5339 setc al
5340 and eax, 1
5341 mov [rc.u32], eax
5342 }
5343# endif
5344 return rc.f;
5345}
5346#endif
5347
5348
5349/**
5350 * Tests and clears a bit in a bitmap.
5351 *
5352 * @returns true if the bit was set.
5353 * @returns false if the bit was clear.
5354 * @param pvBitmap Pointer to the bitmap.
5355 * @param iBit The bit to test and clear.
5356 */
5357#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5358DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5359#else
5360DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5361{
5362 union { bool f; uint32_t u32; uint8_t u8; } rc;
5363# if RT_INLINE_ASM_USES_INTRIN
5364 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5365
5366# elif RT_INLINE_ASM_GNU_STYLE
5367 __asm__ __volatile__ ("btrl %2, %1\n\t"
5368 "setc %b0\n\t"
5369 "andl $1, %0\n\t"
5370 : "=q" (rc.u32),
5371 "=m" (*(volatile long *)pvBitmap)
5372 : "Ir" (iBit),
5373 "m" (*(volatile long *)pvBitmap)
5374 : "memory");
5375# else
5376 __asm
5377 {
5378 mov edx, [iBit]
5379# ifdef RT_ARCH_AMD64
5380 mov rax, [pvBitmap]
5381 btr [rax], edx
5382# else
5383 mov eax, [pvBitmap]
5384 btr [eax], edx
5385# endif
5386 setc al
5387 and eax, 1
5388 mov [rc.u32], eax
5389 }
5390# endif
5391 return rc.f;
5392}
5393#endif
5394
5395
5396/**
5397 * Atomically tests and clears a bit in a bitmap, ordered.
5398 *
5399 * @returns true if the bit was set.
5400 * @returns false if the bit was clear.
5401 * @param pvBitmap Pointer to the bitmap.
5402 * @param iBit The bit to test and clear.
5403 * @remark No memory barrier, take care on smp.
5404 */
5405#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5406DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5407#else
5408DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5409{
5410 union { bool f; uint32_t u32; uint8_t u8; } rc;
5411# if RT_INLINE_ASM_USES_INTRIN
5412 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5413
5414# elif RT_INLINE_ASM_GNU_STYLE
5415 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
5416 "setc %b0\n\t"
5417 "andl $1, %0\n\t"
5418 : "=q" (rc.u32),
5419 "=m" (*(volatile long *)pvBitmap)
5420 : "Ir" (iBit),
5421 "m" (*(volatile long *)pvBitmap)
5422 : "memory");
5423# else
5424 __asm
5425 {
5426 mov edx, [iBit]
5427# ifdef RT_ARCH_AMD64
5428 mov rax, [pvBitmap]
5429 lock btr [rax], edx
5430# else
5431 mov eax, [pvBitmap]
5432 lock btr [eax], edx
5433# endif
5434 setc al
5435 and eax, 1
5436 mov [rc.u32], eax
5437 }
5438# endif
5439 return rc.f;
5440}
5441#endif
5442
5443
5444/**
5445 * Tests and toggles a bit in a bitmap.
5446 *
5447 * @returns true if the bit was set.
5448 * @returns false if the bit was clear.
5449 * @param pvBitmap Pointer to the bitmap.
5450 * @param iBit The bit to test and toggle.
5451 */
5452#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5453DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5454#else
5455DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5456{
5457 union { bool f; uint32_t u32; uint8_t u8; } rc;
5458# if RT_INLINE_ASM_USES_INTRIN
5459 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5460
5461# elif RT_INLINE_ASM_GNU_STYLE
5462 __asm__ __volatile__ ("btcl %2, %1\n\t"
5463 "setc %b0\n\t"
5464 "andl $1, %0\n\t"
5465 : "=q" (rc.u32),
5466 "=m" (*(volatile long *)pvBitmap)
5467 : "Ir" (iBit),
5468 "m" (*(volatile long *)pvBitmap)
5469 : "memory");
5470# else
5471 __asm
5472 {
5473 mov edx, [iBit]
5474# ifdef RT_ARCH_AMD64
5475 mov rax, [pvBitmap]
5476 btc [rax], edx
5477# else
5478 mov eax, [pvBitmap]
5479 btc [eax], edx
5480# endif
5481 setc al
5482 and eax, 1
5483 mov [rc.u32], eax
5484 }
5485# endif
5486 return rc.f;
5487}
5488#endif
5489
5490
5491/**
5492 * Atomically tests and toggles a bit in a bitmap, ordered.
5493 *
5494 * @returns true if the bit was set.
5495 * @returns false if the bit was clear.
5496 * @param pvBitmap Pointer to the bitmap.
5497 * @param iBit The bit to test and toggle.
5498 */
5499#if RT_INLINE_ASM_EXTERNAL
5500DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5501#else
5502DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5503{
5504 union { bool f; uint32_t u32; uint8_t u8; } rc;
5505# if RT_INLINE_ASM_GNU_STYLE
5506 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
5507 "setc %b0\n\t"
5508 "andl $1, %0\n\t"
5509 : "=q" (rc.u32),
5510 "=m" (*(volatile long *)pvBitmap)
5511 : "Ir" (iBit),
5512 "m" (*(volatile long *)pvBitmap)
5513 : "memory");
5514# else
5515 __asm
5516 {
5517 mov edx, [iBit]
5518# ifdef RT_ARCH_AMD64
5519 mov rax, [pvBitmap]
5520 lock btc [rax], edx
5521# else
5522 mov eax, [pvBitmap]
5523 lock btc [eax], edx
5524# endif
5525 setc al
5526 and eax, 1
5527 mov [rc.u32], eax
5528 }
5529# endif
5530 return rc.f;
5531}
5532#endif
5533
5534
5535/**
5536 * Tests if a bit in a bitmap is set.
5537 *
5538 * @returns true if the bit is set.
5539 * @returns false if the bit is clear.
5540 * @param pvBitmap Pointer to the bitmap.
5541 * @param iBit The bit to test.
5542 */
5543#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5544DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5545#else
5546DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5547{
5548 union { bool f; uint32_t u32; uint8_t u8; } rc;
5549# if RT_INLINE_ASM_USES_INTRIN
5550 rc.u32 = _bittest((long *)pvBitmap, iBit);
5551# elif RT_INLINE_ASM_GNU_STYLE
5552
5553 __asm__ __volatile__ ("btl %2, %1\n\t"
5554 "setc %b0\n\t"
5555 "andl $1, %0\n\t"
5556 : "=q" (rc.u32)
5557 : "m" (*(const volatile long *)pvBitmap),
5558 "Ir" (iBit)
5559 : "memory");
5560# else
5561 __asm
5562 {
5563 mov edx, [iBit]
5564# ifdef RT_ARCH_AMD64
5565 mov rax, [pvBitmap]
5566 bt [rax], edx
5567# else
5568 mov eax, [pvBitmap]
5569 bt [eax], edx
5570# endif
5571 setc al
5572 and eax, 1
5573 mov [rc.u32], eax
5574 }
5575# endif
5576 return rc.f;
5577}
5578#endif
5579
5580
5581/**
5582 * Clears a bit range within a bitmap.
5583 *
5584 * @param pvBitmap Pointer to the bitmap.
5585 * @param iBitStart The First bit to clear.
5586 * @param iBitEnd The first bit not to clear.
5587 */
5588DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5589{
5590 if (iBitStart < iBitEnd)
5591 {
5592 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5593 int iStart = iBitStart & ~31;
5594 int iEnd = iBitEnd & ~31;
5595 if (iStart == iEnd)
5596 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5597 else
5598 {
5599 /* bits in first dword. */
5600 if (iBitStart & 31)
5601 {
5602 *pu32 &= (1 << (iBitStart & 31)) - 1;
5603 pu32++;
5604 iBitStart = iStart + 32;
5605 }
5606
5607 /* whole dword. */
5608 if (iBitStart != iEnd)
5609 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5610
5611 /* bits in last dword. */
5612 if (iBitEnd & 31)
5613 {
5614 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5615 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5616 }
5617 }
5618 }
5619}
5620
5621
5622/**
5623 * Sets a bit range within a bitmap.
5624 *
5625 * @param pvBitmap Pointer to the bitmap.
5626 * @param iBitStart The First bit to set.
5627 * @param iBitEnd The first bit not to set.
5628 */
5629DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5630{
5631 if (iBitStart < iBitEnd)
5632 {
5633 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5634 int iStart = iBitStart & ~31;
5635 int iEnd = iBitEnd & ~31;
5636 if (iStart == iEnd)
5637 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5638 else
5639 {
5640 /* bits in first dword. */
5641 if (iBitStart & 31)
5642 {
5643 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5644 pu32++;
5645 iBitStart = iStart + 32;
5646 }
5647
5648 /* whole dword. */
5649 if (iBitStart != iEnd)
5650 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5651
5652 /* bits in last dword. */
5653 if (iBitEnd & 31)
5654 {
5655 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5656 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5657 }
5658 }
5659 }
5660}
5661
5662
5663/**
5664 * Finds the first clear bit in a bitmap.
5665 *
5666 * @returns Index of the first zero bit.
5667 * @returns -1 if no clear bit was found.
5668 * @param pvBitmap Pointer to the bitmap.
5669 * @param cBits The number of bits in the bitmap. Multiple of 32.
5670 */
5671#if RT_INLINE_ASM_EXTERNAL
5672DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5673#else
5674DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5675{
5676 if (cBits)
5677 {
5678 int32_t iBit;
5679# if RT_INLINE_ASM_GNU_STYLE
5680 RTCCUINTREG uEAX, uECX, uEDI;
5681 cBits = RT_ALIGN_32(cBits, 32);
5682 __asm__ __volatile__("repe; scasl\n\t"
5683 "je 1f\n\t"
5684# ifdef RT_ARCH_AMD64
5685 "lea -4(%%rdi), %%rdi\n\t"
5686 "xorl (%%rdi), %%eax\n\t"
5687 "subq %5, %%rdi\n\t"
5688# else
5689 "lea -4(%%edi), %%edi\n\t"
5690 "xorl (%%edi), %%eax\n\t"
5691 "subl %5, %%edi\n\t"
5692# endif
5693 "shll $3, %%edi\n\t"
5694 "bsfl %%eax, %%edx\n\t"
5695 "addl %%edi, %%edx\n\t"
5696 "1:\t\n"
5697 : "=d" (iBit),
5698 "=&c" (uECX),
5699 "=&D" (uEDI),
5700 "=&a" (uEAX)
5701 : "0" (0xffffffff),
5702 "mr" (pvBitmap),
5703 "1" (cBits >> 5),
5704 "2" (pvBitmap),
5705 "3" (0xffffffff));
5706# else
5707 cBits = RT_ALIGN_32(cBits, 32);
5708 __asm
5709 {
5710# ifdef RT_ARCH_AMD64
5711 mov rdi, [pvBitmap]
5712 mov rbx, rdi
5713# else
5714 mov edi, [pvBitmap]
5715 mov ebx, edi
5716# endif
5717 mov edx, 0ffffffffh
5718 mov eax, edx
5719 mov ecx, [cBits]
5720 shr ecx, 5
5721 repe scasd
5722 je done
5723
5724# ifdef RT_ARCH_AMD64
5725 lea rdi, [rdi - 4]
5726 xor eax, [rdi]
5727 sub rdi, rbx
5728# else
5729 lea edi, [edi - 4]
5730 xor eax, [edi]
5731 sub edi, ebx
5732# endif
5733 shl edi, 3
5734 bsf edx, eax
5735 add edx, edi
5736 done:
5737 mov [iBit], edx
5738 }
5739# endif
5740 return iBit;
5741 }
5742 return -1;
5743}
5744#endif
5745
5746
5747/**
5748 * Finds the next clear bit in a bitmap.
5749 *
5750 * @returns Index of the first zero bit.
5751 * @returns -1 if no clear bit was found.
5752 * @param pvBitmap Pointer to the bitmap.
5753 * @param cBits The number of bits in the bitmap. Multiple of 32.
5754 * @param iBitPrev The bit returned from the last search.
5755 * The search will start at iBitPrev + 1.
5756 */
5757#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5758DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5759#else
5760DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5761{
5762 int iBit = ++iBitPrev & 31;
5763 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5764 cBits -= iBitPrev & ~31;
5765 if (iBit)
5766 {
5767 /* inspect the first dword. */
5768 uint32_t u32 = (~*(const volatile uint32_t *)pvBitmap) >> iBit;
5769# if RT_INLINE_ASM_USES_INTRIN
5770 unsigned long ulBit = 0;
5771 if (_BitScanForward(&ulBit, u32))
5772 return ulBit + iBitPrev;
5773 iBit = -1;
5774# else
5775# if RT_INLINE_ASM_GNU_STYLE
5776 __asm__ __volatile__("bsf %1, %0\n\t"
5777 "jnz 1f\n\t"
5778 "movl $-1, %0\n\t"
5779 "1:\n\t"
5780 : "=r" (iBit)
5781 : "r" (u32));
5782# else
5783 __asm
5784 {
5785 mov edx, [u32]
5786 bsf eax, edx
5787 jnz done
5788 mov eax, 0ffffffffh
5789 done:
5790 mov [iBit], eax
5791 }
5792# endif
5793 if (iBit >= 0)
5794 return iBit + iBitPrev;
5795# endif
5796 /* Search the rest of the bitmap, if there is anything. */
5797 if (cBits > 32)
5798 {
5799 iBit = ASMBitFirstClear((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5800 if (iBit >= 0)
5801 return iBit + (iBitPrev & ~31) + 32;
5802 }
5803 }
5804 else
5805 {
5806 /* Search the rest of the bitmap. */
5807 iBit = ASMBitFirstClear(pvBitmap, cBits);
5808 if (iBit >= 0)
5809 return iBit + (iBitPrev & ~31);
5810 }
5811 return iBit;
5812}
5813#endif
5814
5815
5816/**
5817 * Finds the first set bit in a bitmap.
5818 *
5819 * @returns Index of the first set bit.
5820 * @returns -1 if no clear bit was found.
5821 * @param pvBitmap Pointer to the bitmap.
5822 * @param cBits The number of bits in the bitmap. Multiple of 32.
5823 */
5824#if RT_INLINE_ASM_EXTERNAL
5825DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
5826#else
5827DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
5828{
5829 if (cBits)
5830 {
5831 int32_t iBit;
5832# if RT_INLINE_ASM_GNU_STYLE
5833 RTCCUINTREG uEAX, uECX, uEDI;
5834 cBits = RT_ALIGN_32(cBits, 32);
5835 __asm__ __volatile__("repe; scasl\n\t"
5836 "je 1f\n\t"
5837# ifdef RT_ARCH_AMD64
5838 "lea -4(%%rdi), %%rdi\n\t"
5839 "movl (%%rdi), %%eax\n\t"
5840 "subq %5, %%rdi\n\t"
5841# else
5842 "lea -4(%%edi), %%edi\n\t"
5843 "movl (%%edi), %%eax\n\t"
5844 "subl %5, %%edi\n\t"
5845# endif
5846 "shll $3, %%edi\n\t"
5847 "bsfl %%eax, %%edx\n\t"
5848 "addl %%edi, %%edx\n\t"
5849 "1:\t\n"
5850 : "=d" (iBit),
5851 "=&c" (uECX),
5852 "=&D" (uEDI),
5853 "=&a" (uEAX)
5854 : "0" (0xffffffff),
5855 "mr" (pvBitmap),
5856 "1" (cBits >> 5),
5857 "2" (pvBitmap),
5858 "3" (0));
5859# else
5860 cBits = RT_ALIGN_32(cBits, 32);
5861 __asm
5862 {
5863# ifdef RT_ARCH_AMD64
5864 mov rdi, [pvBitmap]
5865 mov rbx, rdi
5866# else
5867 mov edi, [pvBitmap]
5868 mov ebx, edi
5869# endif
5870 mov edx, 0ffffffffh
5871 xor eax, eax
5872 mov ecx, [cBits]
5873 shr ecx, 5
5874 repe scasd
5875 je done
5876# ifdef RT_ARCH_AMD64
5877 lea rdi, [rdi - 4]
5878 mov eax, [rdi]
5879 sub rdi, rbx
5880# else
5881 lea edi, [edi - 4]
5882 mov eax, [edi]
5883 sub edi, ebx
5884# endif
5885 shl edi, 3
5886 bsf edx, eax
5887 add edx, edi
5888 done:
5889 mov [iBit], edx
5890 }
5891# endif
5892 return iBit;
5893 }
5894 return -1;
5895}
5896#endif
5897
5898
5899/**
5900 * Finds the next set bit in a bitmap.
5901 *
5902 * @returns Index of the next set bit.
5903 * @returns -1 if no set bit was found.
5904 * @param pvBitmap Pointer to the bitmap.
5905 * @param cBits The number of bits in the bitmap. Multiple of 32.
5906 * @param iBitPrev The bit returned from the last search.
5907 * The search will start at iBitPrev + 1.
5908 */
5909#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5910DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5911#else
5912DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5913{
5914 int iBit = ++iBitPrev & 31;
5915 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5916 cBits -= iBitPrev & ~31;
5917 if (iBit)
5918 {
5919 /* inspect the first dword. */
5920 uint32_t u32 = *(const volatile uint32_t *)pvBitmap >> iBit;
5921# if RT_INLINE_ASM_USES_INTRIN
5922 unsigned long ulBit = 0;
5923 if (_BitScanForward(&ulBit, u32))
5924 return ulBit + iBitPrev;
5925 iBit = -1;
5926# else
5927# if RT_INLINE_ASM_GNU_STYLE
5928 __asm__ __volatile__("bsf %1, %0\n\t"
5929 "jnz 1f\n\t"
5930 "movl $-1, %0\n\t"
5931 "1:\n\t"
5932 : "=r" (iBit)
5933 : "r" (u32));
5934# else
5935 __asm
5936 {
5937 mov edx, u32
5938 bsf eax, edx
5939 jnz done
5940 mov eax, 0ffffffffh
5941 done:
5942 mov [iBit], eax
5943 }
5944# endif
5945 if (iBit >= 0)
5946 return iBit + iBitPrev;
5947# endif
5948 /* Search the rest of the bitmap, if there is anything. */
5949 if (cBits > 32)
5950 {
5951 iBit = ASMBitFirstSet((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5952 if (iBit >= 0)
5953 return iBit + (iBitPrev & ~31) + 32;
5954 }
5955
5956 }
5957 else
5958 {
5959 /* Search the rest of the bitmap. */
5960 iBit = ASMBitFirstSet(pvBitmap, cBits);
5961 if (iBit >= 0)
5962 return iBit + (iBitPrev & ~31);
5963 }
5964 return iBit;
5965}
5966#endif
5967
5968
5969/**
5970 * Finds the first bit which is set in the given 32-bit integer.
5971 * Bits are numbered from 1 (least significant) to 32.
5972 *
5973 * @returns index [1..32] of the first set bit.
5974 * @returns 0 if all bits are cleared.
5975 * @param u32 Integer to search for set bits.
5976 * @remark Similar to ffs() in BSD.
5977 */
5978DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5979{
5980# if RT_INLINE_ASM_USES_INTRIN
5981 unsigned long iBit;
5982 if (_BitScanForward(&iBit, u32))
5983 iBit++;
5984 else
5985 iBit = 0;
5986# elif RT_INLINE_ASM_GNU_STYLE
5987 uint32_t iBit;
5988 __asm__ __volatile__("bsf %1, %0\n\t"
5989 "jnz 1f\n\t"
5990 "xorl %0, %0\n\t"
5991 "jmp 2f\n"
5992 "1:\n\t"
5993 "incl %0\n"
5994 "2:\n\t"
5995 : "=r" (iBit)
5996 : "rm" (u32));
5997# else
5998 uint32_t iBit;
5999 _asm
6000 {
6001 bsf eax, [u32]
6002 jnz found
6003 xor eax, eax
6004 jmp done
6005 found:
6006 inc eax
6007 done:
6008 mov [iBit], eax
6009 }
6010# endif
6011 return iBit;
6012}
6013
6014
6015/**
6016 * Finds the first bit which is set in the given 32-bit integer.
6017 * Bits are numbered from 1 (least significant) to 32.
6018 *
6019 * @returns index [1..32] of the first set bit.
6020 * @returns 0 if all bits are cleared.
6021 * @param i32 Integer to search for set bits.
6022 * @remark Similar to ffs() in BSD.
6023 */
6024DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
6025{
6026 return ASMBitFirstSetU32((uint32_t)i32);
6027}
6028
6029
6030/**
6031 * Finds the last bit which is set in the given 32-bit integer.
6032 * Bits are numbered from 1 (least significant) to 32.
6033 *
6034 * @returns index [1..32] of the last set bit.
6035 * @returns 0 if all bits are cleared.
6036 * @param u32 Integer to search for set bits.
6037 * @remark Similar to fls() in BSD.
6038 */
6039DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
6040{
6041# if RT_INLINE_ASM_USES_INTRIN
6042 unsigned long iBit;
6043 if (_BitScanReverse(&iBit, u32))
6044 iBit++;
6045 else
6046 iBit = 0;
6047# elif RT_INLINE_ASM_GNU_STYLE
6048 uint32_t iBit;
6049 __asm__ __volatile__("bsrl %1, %0\n\t"
6050 "jnz 1f\n\t"
6051 "xorl %0, %0\n\t"
6052 "jmp 2f\n"
6053 "1:\n\t"
6054 "incl %0\n"
6055 "2:\n\t"
6056 : "=r" (iBit)
6057 : "rm" (u32));
6058# else
6059 uint32_t iBit;
6060 _asm
6061 {
6062 bsr eax, [u32]
6063 jnz found
6064 xor eax, eax
6065 jmp done
6066 found:
6067 inc eax
6068 done:
6069 mov [iBit], eax
6070 }
6071# endif
6072 return iBit;
6073}
6074
6075
6076/**
6077 * Finds the last bit which is set in the given 32-bit integer.
6078 * Bits are numbered from 1 (least significant) to 32.
6079 *
6080 * @returns index [1..32] of the last set bit.
6081 * @returns 0 if all bits are cleared.
6082 * @param i32 Integer to search for set bits.
6083 * @remark Similar to fls() in BSD.
6084 */
6085DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6086{
6087 return ASMBitLastSetS32((uint32_t)i32);
6088}
6089
6090/**
6091 * Reverse the byte order of the given 16-bit integer.
6092 *
6093 * @returns Revert
6094 * @param u16 16-bit integer value.
6095 */
6096DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6097{
6098#if RT_INLINE_ASM_USES_INTRIN
6099 u16 = _byteswap_ushort(u16);
6100#elif RT_INLINE_ASM_GNU_STYLE
6101 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6102#else
6103 _asm
6104 {
6105 mov ax, [u16]
6106 ror ax, 8
6107 mov [u16], ax
6108 }
6109#endif
6110 return u16;
6111}
6112
6113/**
6114 * Reverse the byte order of the given 32-bit integer.
6115 *
6116 * @returns Revert
6117 * @param u32 32-bit integer value.
6118 */
6119DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6120{
6121#if RT_INLINE_ASM_USES_INTRIN
6122 u32 = _byteswap_ulong(u32);
6123#elif RT_INLINE_ASM_GNU_STYLE
6124 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6125#else
6126 _asm
6127 {
6128 mov eax, [u32]
6129 bswap eax
6130 mov [u32], eax
6131 }
6132#endif
6133 return u32;
6134}
6135
6136
6137/**
6138 * Reverse the byte order of the given 64-bit integer.
6139 *
6140 * @returns Revert
6141 * @param u64 64-bit integer value.
6142 */
6143DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6144{
6145#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6146 u64 = _byteswap_uint64(u64);
6147#else
6148 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6149 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6150#endif
6151 return u64;
6152}
6153
6154
6155/** @} */
6156
6157
6158/** @} */
6159#endif
6160
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette