VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 13830

Last change on this file since 13830 was 13511, checked in by vboxsync, 16 years ago

const

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 155.1 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(__readdr)
72# pragma intrinsic(__writedr)
73# pragma intrinsic(_BitScanForward)
74# pragma intrinsic(_BitScanReverse)
75# pragma intrinsic(_bittest)
76# pragma intrinsic(_bittestandset)
77# pragma intrinsic(_bittestandreset)
78# pragma intrinsic(_bittestandcomplement)
79# pragma intrinsic(_byteswap_ushort)
80# pragma intrinsic(_byteswap_ulong)
81# pragma intrinsic(_interlockedbittestandset)
82# pragma intrinsic(_interlockedbittestandreset)
83# pragma intrinsic(_InterlockedAnd)
84# pragma intrinsic(_InterlockedOr)
85# pragma intrinsic(_InterlockedIncrement)
86# pragma intrinsic(_InterlockedDecrement)
87# pragma intrinsic(_InterlockedExchange)
88# pragma intrinsic(_InterlockedExchangeAdd)
89# pragma intrinsic(_InterlockedCompareExchange)
90# pragma intrinsic(_InterlockedCompareExchange64)
91# ifdef RT_ARCH_AMD64
92# pragma intrinsic(__stosq)
93# pragma intrinsic(__readcr8)
94# pragma intrinsic(__writecr8)
95# pragma intrinsic(_byteswap_uint64)
96# pragma intrinsic(_InterlockedExchange64)
97# endif
98# endif
99#endif
100#ifndef RT_INLINE_ASM_USES_INTRIN
101# define RT_INLINE_ASM_USES_INTRIN 0
102#endif
103
104
105
106/** @defgroup grp_asm ASM - Assembly Routines
107 * @ingroup grp_rt
108 *
109 * @remarks The difference between ordered and unordered atomic operations are that
110 * the former will complete outstanding reads and writes before continuing
111 * while the latter doesn't make any promisses about the order. Ordered
112 * operations doesn't, it seems, make any 100% promise wrt to whether
113 * the operation will complete before any subsequent memory access.
114 * (please, correct if wrong.)
115 *
116 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
117 * are unordered (note the Uo).
118 *
119 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
120 * or even optimize assembler instructions away. For instance, in the following code
121 * the second rdmsr instruction is optimized away because gcc treats that instruction
122 * as deterministic:
123 *
124 * @code
125 * static inline uint64_t rdmsr_low(int idx)
126 * {
127 * uint32_t low;
128 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
129 * }
130 * ...
131 * uint32_t msr1 = rdmsr_low(1);
132 * foo(msr1);
133 * msr1 = rdmsr_low(1);
134 * bar(msr1);
135 * @endcode
136 *
137 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
138 * use the result of the first call as input parameter for bar() as well. For rdmsr this
139 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
140 * machine status information in general.
141 *
142 * @{
143 */
144
145/** @def RT_INLINE_ASM_EXTERNAL
146 * Defined as 1 if the compiler does not support inline assembly.
147 * The ASM* functions will then be implemented in an external .asm file.
148 *
149 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
150 * inline assmebly in their AMD64 compiler.
151 */
152#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
153# define RT_INLINE_ASM_EXTERNAL 1
154#else
155# define RT_INLINE_ASM_EXTERNAL 0
156#endif
157
158/** @def RT_INLINE_ASM_GNU_STYLE
159 * Defined as 1 if the compiler understand GNU style inline assembly.
160 */
161#if defined(_MSC_VER)
162# define RT_INLINE_ASM_GNU_STYLE 0
163#else
164# define RT_INLINE_ASM_GNU_STYLE 1
165#endif
166
167
168/** @todo find a more proper place for this structure? */
169#pragma pack(1)
170/** IDTR */
171typedef struct RTIDTR
172{
173 /** Size of the IDT. */
174 uint16_t cbIdt;
175 /** Address of the IDT. */
176 uintptr_t pIdt;
177} RTIDTR, *PRTIDTR;
178#pragma pack()
179
180#pragma pack(1)
181/** GDTR */
182typedef struct RTGDTR
183{
184 /** Size of the GDT. */
185 uint16_t cbGdt;
186 /** Address of the GDT. */
187 uintptr_t pGdt;
188} RTGDTR, *PRTGDTR;
189#pragma pack()
190
191
192/** @def ASMReturnAddress
193 * Gets the return address of the current (or calling if you like) function or method.
194 */
195#ifdef _MSC_VER
196# ifdef __cplusplus
197extern "C"
198# endif
199void * _ReturnAddress(void);
200# pragma intrinsic(_ReturnAddress)
201# define ASMReturnAddress() _ReturnAddress()
202#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
203# define ASMReturnAddress() __builtin_return_address(0)
204#else
205# error "Unsupported compiler."
206#endif
207
208
209/**
210 * Gets the content of the IDTR CPU register.
211 * @param pIdtr Where to store the IDTR contents.
212 */
213#if RT_INLINE_ASM_EXTERNAL
214DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
215#else
216DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
217{
218# if RT_INLINE_ASM_GNU_STYLE
219 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
220# else
221 __asm
222 {
223# ifdef RT_ARCH_AMD64
224 mov rax, [pIdtr]
225 sidt [rax]
226# else
227 mov eax, [pIdtr]
228 sidt [eax]
229# endif
230 }
231# endif
232}
233#endif
234
235
236/**
237 * Sets the content of the IDTR CPU register.
238 * @param pIdtr Where to load the IDTR contents from
239 */
240#if RT_INLINE_ASM_EXTERNAL
241DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
242#else
243DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
244{
245# if RT_INLINE_ASM_GNU_STYLE
246 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
247# else
248 __asm
249 {
250# ifdef RT_ARCH_AMD64
251 mov rax, [pIdtr]
252 lidt [rax]
253# else
254 mov eax, [pIdtr]
255 lidt [eax]
256# endif
257 }
258# endif
259}
260#endif
261
262
263/**
264 * Gets the content of the GDTR CPU register.
265 * @param pGdtr Where to store the GDTR contents.
266 */
267#if RT_INLINE_ASM_EXTERNAL
268DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
269#else
270DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
271{
272# if RT_INLINE_ASM_GNU_STYLE
273 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
274# else
275 __asm
276 {
277# ifdef RT_ARCH_AMD64
278 mov rax, [pGdtr]
279 sgdt [rax]
280# else
281 mov eax, [pGdtr]
282 sgdt [eax]
283# endif
284 }
285# endif
286}
287#endif
288
289/**
290 * Get the cs register.
291 * @returns cs.
292 */
293#if RT_INLINE_ASM_EXTERNAL
294DECLASM(RTSEL) ASMGetCS(void);
295#else
296DECLINLINE(RTSEL) ASMGetCS(void)
297{
298 RTSEL SelCS;
299# if RT_INLINE_ASM_GNU_STYLE
300 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
301# else
302 __asm
303 {
304 mov ax, cs
305 mov [SelCS], ax
306 }
307# endif
308 return SelCS;
309}
310#endif
311
312
313/**
314 * Get the DS register.
315 * @returns DS.
316 */
317#if RT_INLINE_ASM_EXTERNAL
318DECLASM(RTSEL) ASMGetDS(void);
319#else
320DECLINLINE(RTSEL) ASMGetDS(void)
321{
322 RTSEL SelDS;
323# if RT_INLINE_ASM_GNU_STYLE
324 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
325# else
326 __asm
327 {
328 mov ax, ds
329 mov [SelDS], ax
330 }
331# endif
332 return SelDS;
333}
334#endif
335
336
337/**
338 * Get the ES register.
339 * @returns ES.
340 */
341#if RT_INLINE_ASM_EXTERNAL
342DECLASM(RTSEL) ASMGetES(void);
343#else
344DECLINLINE(RTSEL) ASMGetES(void)
345{
346 RTSEL SelES;
347# if RT_INLINE_ASM_GNU_STYLE
348 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
349# else
350 __asm
351 {
352 mov ax, es
353 mov [SelES], ax
354 }
355# endif
356 return SelES;
357}
358#endif
359
360
361/**
362 * Get the FS register.
363 * @returns FS.
364 */
365#if RT_INLINE_ASM_EXTERNAL
366DECLASM(RTSEL) ASMGetFS(void);
367#else
368DECLINLINE(RTSEL) ASMGetFS(void)
369{
370 RTSEL SelFS;
371# if RT_INLINE_ASM_GNU_STYLE
372 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
373# else
374 __asm
375 {
376 mov ax, fs
377 mov [SelFS], ax
378 }
379# endif
380 return SelFS;
381}
382# endif
383
384
385/**
386 * Get the GS register.
387 * @returns GS.
388 */
389#if RT_INLINE_ASM_EXTERNAL
390DECLASM(RTSEL) ASMGetGS(void);
391#else
392DECLINLINE(RTSEL) ASMGetGS(void)
393{
394 RTSEL SelGS;
395# if RT_INLINE_ASM_GNU_STYLE
396 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
397# else
398 __asm
399 {
400 mov ax, gs
401 mov [SelGS], ax
402 }
403# endif
404 return SelGS;
405}
406#endif
407
408
409/**
410 * Get the SS register.
411 * @returns SS.
412 */
413#if RT_INLINE_ASM_EXTERNAL
414DECLASM(RTSEL) ASMGetSS(void);
415#else
416DECLINLINE(RTSEL) ASMGetSS(void)
417{
418 RTSEL SelSS;
419# if RT_INLINE_ASM_GNU_STYLE
420 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
421# else
422 __asm
423 {
424 mov ax, ss
425 mov [SelSS], ax
426 }
427# endif
428 return SelSS;
429}
430#endif
431
432
433/**
434 * Get the TR register.
435 * @returns TR.
436 */
437#if RT_INLINE_ASM_EXTERNAL
438DECLASM(RTSEL) ASMGetTR(void);
439#else
440DECLINLINE(RTSEL) ASMGetTR(void)
441{
442 RTSEL SelTR;
443# if RT_INLINE_ASM_GNU_STYLE
444 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
445# else
446 __asm
447 {
448 str ax
449 mov [SelTR], ax
450 }
451# endif
452 return SelTR;
453}
454#endif
455
456
457/**
458 * Get the [RE]FLAGS register.
459 * @returns [RE]FLAGS.
460 */
461#if RT_INLINE_ASM_EXTERNAL
462DECLASM(RTCCUINTREG) ASMGetFlags(void);
463#else
464DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
465{
466 RTCCUINTREG uFlags;
467# if RT_INLINE_ASM_GNU_STYLE
468# ifdef RT_ARCH_AMD64
469 __asm__ __volatile__("pushfq\n\t"
470 "popq %0\n\t"
471 : "=g" (uFlags));
472# else
473 __asm__ __volatile__("pushfl\n\t"
474 "popl %0\n\t"
475 : "=g" (uFlags));
476# endif
477# else
478 __asm
479 {
480# ifdef RT_ARCH_AMD64
481 pushfq
482 pop [uFlags]
483# else
484 pushfd
485 pop [uFlags]
486# endif
487 }
488# endif
489 return uFlags;
490}
491#endif
492
493
494/**
495 * Set the [RE]FLAGS register.
496 * @param uFlags The new [RE]FLAGS value.
497 */
498#if RT_INLINE_ASM_EXTERNAL
499DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
500#else
501DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
502{
503# if RT_INLINE_ASM_GNU_STYLE
504# ifdef RT_ARCH_AMD64
505 __asm__ __volatile__("pushq %0\n\t"
506 "popfq\n\t"
507 : : "g" (uFlags));
508# else
509 __asm__ __volatile__("pushl %0\n\t"
510 "popfl\n\t"
511 : : "g" (uFlags));
512# endif
513# else
514 __asm
515 {
516# ifdef RT_ARCH_AMD64
517 push [uFlags]
518 popfq
519# else
520 push [uFlags]
521 popfd
522# endif
523 }
524# endif
525}
526#endif
527
528
529/**
530 * Gets the content of the CPU timestamp counter register.
531 *
532 * @returns TSC.
533 */
534#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
535DECLASM(uint64_t) ASMReadTSC(void);
536#else
537DECLINLINE(uint64_t) ASMReadTSC(void)
538{
539 RTUINT64U u;
540# if RT_INLINE_ASM_GNU_STYLE
541 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
542# else
543# if RT_INLINE_ASM_USES_INTRIN
544 u.u = __rdtsc();
545# else
546 __asm
547 {
548 rdtsc
549 mov [u.s.Lo], eax
550 mov [u.s.Hi], edx
551 }
552# endif
553# endif
554 return u.u;
555}
556#endif
557
558
559/**
560 * Performs the cpuid instruction returning all registers.
561 *
562 * @param uOperator CPUID operation (eax).
563 * @param pvEAX Where to store eax.
564 * @param pvEBX Where to store ebx.
565 * @param pvECX Where to store ecx.
566 * @param pvEDX Where to store edx.
567 * @remark We're using void pointers to ease the use of special bitfield structures and such.
568 */
569#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
570DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
571#else
572DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
573{
574# if RT_INLINE_ASM_GNU_STYLE
575# ifdef RT_ARCH_AMD64
576 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
577 __asm__ ("cpuid\n\t"
578 : "=a" (uRAX),
579 "=b" (uRBX),
580 "=c" (uRCX),
581 "=d" (uRDX)
582 : "0" (uOperator));
583 *(uint32_t *)pvEAX = (uint32_t)uRAX;
584 *(uint32_t *)pvEBX = (uint32_t)uRBX;
585 *(uint32_t *)pvECX = (uint32_t)uRCX;
586 *(uint32_t *)pvEDX = (uint32_t)uRDX;
587# else
588 __asm__ ("xchgl %%ebx, %1\n\t"
589 "cpuid\n\t"
590 "xchgl %%ebx, %1\n\t"
591 : "=a" (*(uint32_t *)pvEAX),
592 "=r" (*(uint32_t *)pvEBX),
593 "=c" (*(uint32_t *)pvECX),
594 "=d" (*(uint32_t *)pvEDX)
595 : "0" (uOperator));
596# endif
597
598# elif RT_INLINE_ASM_USES_INTRIN
599 int aInfo[4];
600 __cpuid(aInfo, uOperator);
601 *(uint32_t *)pvEAX = aInfo[0];
602 *(uint32_t *)pvEBX = aInfo[1];
603 *(uint32_t *)pvECX = aInfo[2];
604 *(uint32_t *)pvEDX = aInfo[3];
605
606# else
607 uint32_t uEAX;
608 uint32_t uEBX;
609 uint32_t uECX;
610 uint32_t uEDX;
611 __asm
612 {
613 push ebx
614 mov eax, [uOperator]
615 cpuid
616 mov [uEAX], eax
617 mov [uEBX], ebx
618 mov [uECX], ecx
619 mov [uEDX], edx
620 pop ebx
621 }
622 *(uint32_t *)pvEAX = uEAX;
623 *(uint32_t *)pvEBX = uEBX;
624 *(uint32_t *)pvECX = uECX;
625 *(uint32_t *)pvEDX = uEDX;
626# endif
627}
628#endif
629
630
631/**
632 * Performs the cpuid instruction returning all registers.
633 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
634 *
635 * @param uOperator CPUID operation (eax).
636 * @param uIdxECX ecx index
637 * @param pvEAX Where to store eax.
638 * @param pvEBX Where to store ebx.
639 * @param pvECX Where to store ecx.
640 * @param pvEDX Where to store edx.
641 * @remark We're using void pointers to ease the use of special bitfield structures and such.
642 */
643#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
644DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
645#else
646DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
647{
648# if RT_INLINE_ASM_GNU_STYLE
649# ifdef RT_ARCH_AMD64
650 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
651 __asm__ ("cpuid\n\t"
652 : "=a" (uRAX),
653 "=b" (uRBX),
654 "=c" (uRCX),
655 "=d" (uRDX)
656 : "0" (uOperator),
657 "2" (uIdxECX));
658 *(uint32_t *)pvEAX = (uint32_t)uRAX;
659 *(uint32_t *)pvEBX = (uint32_t)uRBX;
660 *(uint32_t *)pvECX = (uint32_t)uRCX;
661 *(uint32_t *)pvEDX = (uint32_t)uRDX;
662# else
663 __asm__ ("xchgl %%ebx, %1\n\t"
664 "cpuid\n\t"
665 "xchgl %%ebx, %1\n\t"
666 : "=a" (*(uint32_t *)pvEAX),
667 "=r" (*(uint32_t *)pvEBX),
668 "=c" (*(uint32_t *)pvECX),
669 "=d" (*(uint32_t *)pvEDX)
670 : "0" (uOperator),
671 "2" (uIdxECX));
672# endif
673
674# elif RT_INLINE_ASM_USES_INTRIN
675 int aInfo[4];
676 /* ??? another intrinsic ??? */
677 __cpuid(aInfo, uOperator);
678 *(uint32_t *)pvEAX = aInfo[0];
679 *(uint32_t *)pvEBX = aInfo[1];
680 *(uint32_t *)pvECX = aInfo[2];
681 *(uint32_t *)pvEDX = aInfo[3];
682
683# else
684 uint32_t uEAX;
685 uint32_t uEBX;
686 uint32_t uECX;
687 uint32_t uEDX;
688 __asm
689 {
690 push ebx
691 mov eax, [uOperator]
692 mov ecx, [uIdxECX]
693 cpuid
694 mov [uEAX], eax
695 mov [uEBX], ebx
696 mov [uECX], ecx
697 mov [uEDX], edx
698 pop ebx
699 }
700 *(uint32_t *)pvEAX = uEAX;
701 *(uint32_t *)pvEBX = uEBX;
702 *(uint32_t *)pvECX = uECX;
703 *(uint32_t *)pvEDX = uEDX;
704# endif
705}
706#endif
707
708
709/**
710 * Performs the cpuid instruction returning ecx and edx.
711 *
712 * @param uOperator CPUID operation (eax).
713 * @param pvECX Where to store ecx.
714 * @param pvEDX Where to store edx.
715 * @remark We're using void pointers to ease the use of special bitfield structures and such.
716 */
717#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
718DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
719#else
720DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
721{
722 uint32_t uEBX;
723 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
724}
725#endif
726
727
728/**
729 * Performs the cpuid instruction returning edx.
730 *
731 * @param uOperator CPUID operation (eax).
732 * @returns EDX after cpuid operation.
733 */
734#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
735DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
736#else
737DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
738{
739 RTCCUINTREG xDX;
740# if RT_INLINE_ASM_GNU_STYLE
741# ifdef RT_ARCH_AMD64
742 RTCCUINTREG uSpill;
743 __asm__ ("cpuid"
744 : "=a" (uSpill),
745 "=d" (xDX)
746 : "0" (uOperator)
747 : "rbx", "rcx");
748# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
749 __asm__ ("push %%ebx\n\t"
750 "cpuid\n\t"
751 "pop %%ebx\n\t"
752 : "=a" (uOperator),
753 "=d" (xDX)
754 : "0" (uOperator)
755 : "ecx");
756# else
757 __asm__ ("cpuid"
758 : "=a" (uOperator),
759 "=d" (xDX)
760 : "0" (uOperator)
761 : "ebx", "ecx");
762# endif
763
764# elif RT_INLINE_ASM_USES_INTRIN
765 int aInfo[4];
766 __cpuid(aInfo, uOperator);
767 xDX = aInfo[3];
768
769# else
770 __asm
771 {
772 push ebx
773 mov eax, [uOperator]
774 cpuid
775 mov [xDX], edx
776 pop ebx
777 }
778# endif
779 return (uint32_t)xDX;
780}
781#endif
782
783
784/**
785 * Performs the cpuid instruction returning ecx.
786 *
787 * @param uOperator CPUID operation (eax).
788 * @returns ECX after cpuid operation.
789 */
790#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
791DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
792#else
793DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
794{
795 RTCCUINTREG xCX;
796# if RT_INLINE_ASM_GNU_STYLE
797# ifdef RT_ARCH_AMD64
798 RTCCUINTREG uSpill;
799 __asm__ ("cpuid"
800 : "=a" (uSpill),
801 "=c" (xCX)
802 : "0" (uOperator)
803 : "rbx", "rdx");
804# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
805 __asm__ ("push %%ebx\n\t"
806 "cpuid\n\t"
807 "pop %%ebx\n\t"
808 : "=a" (uOperator),
809 "=c" (xCX)
810 : "0" (uOperator)
811 : "edx");
812# else
813 __asm__ ("cpuid"
814 : "=a" (uOperator),
815 "=c" (xCX)
816 : "0" (uOperator)
817 : "ebx", "edx");
818
819# endif
820
821# elif RT_INLINE_ASM_USES_INTRIN
822 int aInfo[4];
823 __cpuid(aInfo, uOperator);
824 xCX = aInfo[2];
825
826# else
827 __asm
828 {
829 push ebx
830 mov eax, [uOperator]
831 cpuid
832 mov [xCX], ecx
833 pop ebx
834 }
835# endif
836 return (uint32_t)xCX;
837}
838#endif
839
840
841/**
842 * Checks if the current CPU supports CPUID.
843 *
844 * @returns true if CPUID is supported.
845 */
846DECLINLINE(bool) ASMHasCpuId(void)
847{
848#ifdef RT_ARCH_AMD64
849 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
850#else /* !RT_ARCH_AMD64 */
851 bool fRet = false;
852# if RT_INLINE_ASM_GNU_STYLE
853 uint32_t u1;
854 uint32_t u2;
855 __asm__ ("pushf\n\t"
856 "pop %1\n\t"
857 "mov %1, %2\n\t"
858 "xorl $0x200000, %1\n\t"
859 "push %1\n\t"
860 "popf\n\t"
861 "pushf\n\t"
862 "pop %1\n\t"
863 "cmpl %1, %2\n\t"
864 "setne %0\n\t"
865 "push %2\n\t"
866 "popf\n\t"
867 : "=m" (fRet), "=r" (u1), "=r" (u2));
868# else
869 __asm
870 {
871 pushfd
872 pop eax
873 mov ebx, eax
874 xor eax, 0200000h
875 push eax
876 popfd
877 pushfd
878 pop eax
879 cmp eax, ebx
880 setne fRet
881 push ebx
882 popfd
883 }
884# endif
885 return fRet;
886#endif /* !RT_ARCH_AMD64 */
887}
888
889
890/**
891 * Gets the APIC ID of the current CPU.
892 *
893 * @returns the APIC ID.
894 */
895#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
896DECLASM(uint8_t) ASMGetApicId(void);
897#else
898DECLINLINE(uint8_t) ASMGetApicId(void)
899{
900 RTCCUINTREG xBX;
901# if RT_INLINE_ASM_GNU_STYLE
902# ifdef RT_ARCH_AMD64
903 RTCCUINTREG uSpill;
904 __asm__ ("cpuid"
905 : "=a" (uSpill),
906 "=b" (xBX)
907 : "0" (1)
908 : "rcx", "rdx");
909# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
910 RTCCUINTREG uSpill;
911 __asm__ ("mov %%ebx,%1\n\t"
912 "cpuid\n\t"
913 "xchgl %%ebx,%1\n\t"
914 : "=a" (uSpill),
915 "=r" (xBX)
916 : "0" (1)
917 : "ecx", "edx");
918# else
919 RTCCUINTREG uSpill;
920 __asm__ ("cpuid"
921 : "=a" (uSpill),
922 "=b" (xBX)
923 : "0" (1)
924 : "ecx", "edx");
925# endif
926
927# elif RT_INLINE_ASM_USES_INTRIN
928 int aInfo[4];
929 __cpuid(aInfo, 1);
930 xBX = aInfo[1];
931
932# else
933 __asm
934 {
935 push ebx
936 mov eax, 1
937 cpuid
938 mov [xBX], ebx
939 pop ebx
940 }
941# endif
942 return (uint8_t)(xBX >> 24);
943}
944#endif
945
946
947/**
948 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
949 *
950 * @returns true/false.
951 * @param uEBX EBX return from ASMCpuId(0)
952 * @param uECX ECX return from ASMCpuId(0)
953 * @param uEDX EDX return from ASMCpuId(0)
954 */
955DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
956{
957 return uEBX == 0x756e6547
958 || uECX == 0x6c65746e
959 || uEDX == 0x49656e69;
960}
961
962
963/**
964 * Tests if this is an genuin Intel CPU.
965 *
966 * @returns true/false.
967 */
968DECLINLINE(bool) ASMIsIntelCpu(void)
969{
970 uint32_t uEAX, uEBX, uECX, uEDX;
971 ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX);
972 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
973}
974
975
976/**
977 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
978 *
979 * @returns Family.
980 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
981 */
982DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
983{
984 return ((uEAX >> 8) & 0xf) == 0xf
985 ? ((uEAX >> 20) & 0x7f) + 0xf
986 : ((uEAX >> 8) & 0xf);
987}
988
989
990/**
991 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
992 *
993 * @returns Model.
994 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
995 * @param fIntel Whether it's an intel CPU.
996 */
997DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
998{
999 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
1000 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1001 : ((uEAX >> 4) & 0xf);
1002}
1003
1004
1005/**
1006 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1007 *
1008 * @returns Model.
1009 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1010 * @param fIntel Whether it's an intel CPU.
1011 */
1012DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1013{
1014 return ((uEAX >> 8) & 0xf) == 0xf
1015 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1016 : ((uEAX >> 4) & 0xf);
1017}
1018
1019
1020/**
1021 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1022 *
1023 * @returns Model.
1024 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1025 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1026 */
1027DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1028{
1029 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1030 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1031 : ((uEAX >> 4) & 0xf);
1032}
1033
1034
1035/**
1036 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1037 *
1038 * @returns Model.
1039 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1040 */
1041DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1042{
1043 return uEAX & 0xf;
1044}
1045
1046
1047/**
1048 * Get cr0.
1049 * @returns cr0.
1050 */
1051#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1052DECLASM(RTCCUINTREG) ASMGetCR0(void);
1053#else
1054DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1055{
1056 RTCCUINTREG uCR0;
1057# if RT_INLINE_ASM_USES_INTRIN
1058 uCR0 = __readcr0();
1059
1060# elif RT_INLINE_ASM_GNU_STYLE
1061# ifdef RT_ARCH_AMD64
1062 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1063# else
1064 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1065# endif
1066# else
1067 __asm
1068 {
1069# ifdef RT_ARCH_AMD64
1070 mov rax, cr0
1071 mov [uCR0], rax
1072# else
1073 mov eax, cr0
1074 mov [uCR0], eax
1075# endif
1076 }
1077# endif
1078 return uCR0;
1079}
1080#endif
1081
1082
1083/**
1084 * Sets the CR0 register.
1085 * @param uCR0 The new CR0 value.
1086 */
1087#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1088DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1089#else
1090DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1091{
1092# if RT_INLINE_ASM_USES_INTRIN
1093 __writecr0(uCR0);
1094
1095# elif RT_INLINE_ASM_GNU_STYLE
1096# ifdef RT_ARCH_AMD64
1097 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1098# else
1099 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1100# endif
1101# else
1102 __asm
1103 {
1104# ifdef RT_ARCH_AMD64
1105 mov rax, [uCR0]
1106 mov cr0, rax
1107# else
1108 mov eax, [uCR0]
1109 mov cr0, eax
1110# endif
1111 }
1112# endif
1113}
1114#endif
1115
1116
1117/**
1118 * Get cr2.
1119 * @returns cr2.
1120 */
1121#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1122DECLASM(RTCCUINTREG) ASMGetCR2(void);
1123#else
1124DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1125{
1126 RTCCUINTREG uCR2;
1127# if RT_INLINE_ASM_USES_INTRIN
1128 uCR2 = __readcr2();
1129
1130# elif RT_INLINE_ASM_GNU_STYLE
1131# ifdef RT_ARCH_AMD64
1132 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1133# else
1134 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1135# endif
1136# else
1137 __asm
1138 {
1139# ifdef RT_ARCH_AMD64
1140 mov rax, cr2
1141 mov [uCR2], rax
1142# else
1143 mov eax, cr2
1144 mov [uCR2], eax
1145# endif
1146 }
1147# endif
1148 return uCR2;
1149}
1150#endif
1151
1152
1153/**
1154 * Sets the CR2 register.
1155 * @param uCR2 The new CR0 value.
1156 */
1157#if RT_INLINE_ASM_EXTERNAL
1158DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1159#else
1160DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1161{
1162# if RT_INLINE_ASM_GNU_STYLE
1163# ifdef RT_ARCH_AMD64
1164 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1165# else
1166 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1167# endif
1168# else
1169 __asm
1170 {
1171# ifdef RT_ARCH_AMD64
1172 mov rax, [uCR2]
1173 mov cr2, rax
1174# else
1175 mov eax, [uCR2]
1176 mov cr2, eax
1177# endif
1178 }
1179# endif
1180}
1181#endif
1182
1183
1184/**
1185 * Get cr3.
1186 * @returns cr3.
1187 */
1188#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1189DECLASM(RTCCUINTREG) ASMGetCR3(void);
1190#else
1191DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1192{
1193 RTCCUINTREG uCR3;
1194# if RT_INLINE_ASM_USES_INTRIN
1195 uCR3 = __readcr3();
1196
1197# elif RT_INLINE_ASM_GNU_STYLE
1198# ifdef RT_ARCH_AMD64
1199 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1200# else
1201 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1202# endif
1203# else
1204 __asm
1205 {
1206# ifdef RT_ARCH_AMD64
1207 mov rax, cr3
1208 mov [uCR3], rax
1209# else
1210 mov eax, cr3
1211 mov [uCR3], eax
1212# endif
1213 }
1214# endif
1215 return uCR3;
1216}
1217#endif
1218
1219
1220/**
1221 * Sets the CR3 register.
1222 *
1223 * @param uCR3 New CR3 value.
1224 */
1225#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1226DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1227#else
1228DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1229{
1230# if RT_INLINE_ASM_USES_INTRIN
1231 __writecr3(uCR3);
1232
1233# elif RT_INLINE_ASM_GNU_STYLE
1234# ifdef RT_ARCH_AMD64
1235 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1236# else
1237 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1238# endif
1239# else
1240 __asm
1241 {
1242# ifdef RT_ARCH_AMD64
1243 mov rax, [uCR3]
1244 mov cr3, rax
1245# else
1246 mov eax, [uCR3]
1247 mov cr3, eax
1248# endif
1249 }
1250# endif
1251}
1252#endif
1253
1254
1255/**
1256 * Reloads the CR3 register.
1257 */
1258#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1259DECLASM(void) ASMReloadCR3(void);
1260#else
1261DECLINLINE(void) ASMReloadCR3(void)
1262{
1263# if RT_INLINE_ASM_USES_INTRIN
1264 __writecr3(__readcr3());
1265
1266# elif RT_INLINE_ASM_GNU_STYLE
1267 RTCCUINTREG u;
1268# ifdef RT_ARCH_AMD64
1269 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1270 "movq %0, %%cr3\n\t"
1271 : "=r" (u));
1272# else
1273 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1274 "movl %0, %%cr3\n\t"
1275 : "=r" (u));
1276# endif
1277# else
1278 __asm
1279 {
1280# ifdef RT_ARCH_AMD64
1281 mov rax, cr3
1282 mov cr3, rax
1283# else
1284 mov eax, cr3
1285 mov cr3, eax
1286# endif
1287 }
1288# endif
1289}
1290#endif
1291
1292
1293/**
1294 * Get cr4.
1295 * @returns cr4.
1296 */
1297#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1298DECLASM(RTCCUINTREG) ASMGetCR4(void);
1299#else
1300DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1301{
1302 RTCCUINTREG uCR4;
1303# if RT_INLINE_ASM_USES_INTRIN
1304 uCR4 = __readcr4();
1305
1306# elif RT_INLINE_ASM_GNU_STYLE
1307# ifdef RT_ARCH_AMD64
1308 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1309# else
1310 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1311# endif
1312# else
1313 __asm
1314 {
1315# ifdef RT_ARCH_AMD64
1316 mov rax, cr4
1317 mov [uCR4], rax
1318# else
1319 push eax /* just in case */
1320 /*mov eax, cr4*/
1321 _emit 0x0f
1322 _emit 0x20
1323 _emit 0xe0
1324 mov [uCR4], eax
1325 pop eax
1326# endif
1327 }
1328# endif
1329 return uCR4;
1330}
1331#endif
1332
1333
1334/**
1335 * Sets the CR4 register.
1336 *
1337 * @param uCR4 New CR4 value.
1338 */
1339#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1340DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1341#else
1342DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1343{
1344# if RT_INLINE_ASM_USES_INTRIN
1345 __writecr4(uCR4);
1346
1347# elif RT_INLINE_ASM_GNU_STYLE
1348# ifdef RT_ARCH_AMD64
1349 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1350# else
1351 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1352# endif
1353# else
1354 __asm
1355 {
1356# ifdef RT_ARCH_AMD64
1357 mov rax, [uCR4]
1358 mov cr4, rax
1359# else
1360 mov eax, [uCR4]
1361 _emit 0x0F
1362 _emit 0x22
1363 _emit 0xE0 /* mov cr4, eax */
1364# endif
1365 }
1366# endif
1367}
1368#endif
1369
1370
1371/**
1372 * Get cr8.
1373 * @returns cr8.
1374 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1375 */
1376#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1377DECLASM(RTCCUINTREG) ASMGetCR8(void);
1378#else
1379DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1380{
1381# ifdef RT_ARCH_AMD64
1382 RTCCUINTREG uCR8;
1383# if RT_INLINE_ASM_USES_INTRIN
1384 uCR8 = __readcr8();
1385
1386# elif RT_INLINE_ASM_GNU_STYLE
1387 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1388# else
1389 __asm
1390 {
1391 mov rax, cr8
1392 mov [uCR8], rax
1393 }
1394# endif
1395 return uCR8;
1396# else /* !RT_ARCH_AMD64 */
1397 return 0;
1398# endif /* !RT_ARCH_AMD64 */
1399}
1400#endif
1401
1402
1403/**
1404 * Enables interrupts (EFLAGS.IF).
1405 */
1406#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1407DECLASM(void) ASMIntEnable(void);
1408#else
1409DECLINLINE(void) ASMIntEnable(void)
1410{
1411# if RT_INLINE_ASM_GNU_STYLE
1412 __asm("sti\n");
1413# elif RT_INLINE_ASM_USES_INTRIN
1414 _enable();
1415# else
1416 __asm sti
1417# endif
1418}
1419#endif
1420
1421
1422/**
1423 * Disables interrupts (!EFLAGS.IF).
1424 */
1425#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1426DECLASM(void) ASMIntDisable(void);
1427#else
1428DECLINLINE(void) ASMIntDisable(void)
1429{
1430# if RT_INLINE_ASM_GNU_STYLE
1431 __asm("cli\n");
1432# elif RT_INLINE_ASM_USES_INTRIN
1433 _disable();
1434# else
1435 __asm cli
1436# endif
1437}
1438#endif
1439
1440
1441/**
1442 * Disables interrupts and returns previous xFLAGS.
1443 */
1444#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1445DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1446#else
1447DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1448{
1449 RTCCUINTREG xFlags;
1450# if RT_INLINE_ASM_GNU_STYLE
1451# ifdef RT_ARCH_AMD64
1452 __asm__ __volatile__("pushfq\n\t"
1453 "cli\n\t"
1454 "popq %0\n\t"
1455 : "=rm" (xFlags));
1456# else
1457 __asm__ __volatile__("pushfl\n\t"
1458 "cli\n\t"
1459 "popl %0\n\t"
1460 : "=rm" (xFlags));
1461# endif
1462# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1463 xFlags = ASMGetFlags();
1464 _disable();
1465# else
1466 __asm {
1467 pushfd
1468 cli
1469 pop [xFlags]
1470 }
1471# endif
1472 return xFlags;
1473}
1474#endif
1475
1476
1477/**
1478 * Reads a machine specific register.
1479 *
1480 * @returns Register content.
1481 * @param uRegister Register to read.
1482 */
1483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1484DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1485#else
1486DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1487{
1488 RTUINT64U u;
1489# if RT_INLINE_ASM_GNU_STYLE
1490 __asm__ __volatile__("rdmsr\n\t"
1491 : "=a" (u.s.Lo),
1492 "=d" (u.s.Hi)
1493 : "c" (uRegister));
1494
1495# elif RT_INLINE_ASM_USES_INTRIN
1496 u.u = __readmsr(uRegister);
1497
1498# else
1499 __asm
1500 {
1501 mov ecx, [uRegister]
1502 rdmsr
1503 mov [u.s.Lo], eax
1504 mov [u.s.Hi], edx
1505 }
1506# endif
1507
1508 return u.u;
1509}
1510#endif
1511
1512
1513/**
1514 * Writes a machine specific register.
1515 *
1516 * @returns Register content.
1517 * @param uRegister Register to write to.
1518 * @param u64Val Value to write.
1519 */
1520#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1521DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1522#else
1523DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1524{
1525 RTUINT64U u;
1526
1527 u.u = u64Val;
1528# if RT_INLINE_ASM_GNU_STYLE
1529 __asm__ __volatile__("wrmsr\n\t"
1530 ::"a" (u.s.Lo),
1531 "d" (u.s.Hi),
1532 "c" (uRegister));
1533
1534# elif RT_INLINE_ASM_USES_INTRIN
1535 __writemsr(uRegister, u.u);
1536
1537# else
1538 __asm
1539 {
1540 mov ecx, [uRegister]
1541 mov edx, [u.s.Hi]
1542 mov eax, [u.s.Lo]
1543 wrmsr
1544 }
1545# endif
1546}
1547#endif
1548
1549
1550/**
1551 * Reads low part of a machine specific register.
1552 *
1553 * @returns Register content.
1554 * @param uRegister Register to read.
1555 */
1556#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1557DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1558#else
1559DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1560{
1561 uint32_t u32;
1562# if RT_INLINE_ASM_GNU_STYLE
1563 __asm__ __volatile__("rdmsr\n\t"
1564 : "=a" (u32)
1565 : "c" (uRegister)
1566 : "edx");
1567
1568# elif RT_INLINE_ASM_USES_INTRIN
1569 u32 = (uint32_t)__readmsr(uRegister);
1570
1571#else
1572 __asm
1573 {
1574 mov ecx, [uRegister]
1575 rdmsr
1576 mov [u32], eax
1577 }
1578# endif
1579
1580 return u32;
1581}
1582#endif
1583
1584
1585/**
1586 * Reads high part of a machine specific register.
1587 *
1588 * @returns Register content.
1589 * @param uRegister Register to read.
1590 */
1591#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1592DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1593#else
1594DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1595{
1596 uint32_t u32;
1597# if RT_INLINE_ASM_GNU_STYLE
1598 __asm__ __volatile__("rdmsr\n\t"
1599 : "=d" (u32)
1600 : "c" (uRegister)
1601 : "eax");
1602
1603# elif RT_INLINE_ASM_USES_INTRIN
1604 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1605
1606# else
1607 __asm
1608 {
1609 mov ecx, [uRegister]
1610 rdmsr
1611 mov [u32], edx
1612 }
1613# endif
1614
1615 return u32;
1616}
1617#endif
1618
1619
1620/**
1621 * Gets dr0.
1622 *
1623 * @returns dr0.
1624 */
1625#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1626DECLASM(RTCCUINTREG) ASMGetDR0(void);
1627#else
1628DECLINLINE(RTCCUINTREG) ASMGetDR0(void)
1629{
1630 RTCCUINTREG uDR0;
1631# if RT_INLINE_ASM_USES_INTRIN
1632 uDR0 = __readdr(0);
1633# elif RT_INLINE_ASM_GNU_STYLE
1634# ifdef RT_ARCH_AMD64
1635 __asm__ __volatile__("movq %%dr0, %0\n\t" : "=r" (uDR0));
1636# else
1637 __asm__ __volatile__("movl %%dr0, %0\n\t" : "=r" (uDR0));
1638# endif
1639# else
1640 __asm
1641 {
1642# ifdef RT_ARCH_AMD64
1643 mov rax, dr0
1644 mov [uDR0], rax
1645# else
1646 mov eax, dr0
1647 mov [uDR0], eax
1648# endif
1649 }
1650# endif
1651 return uDR0;
1652}
1653#endif
1654
1655
1656/**
1657 * Gets dr1.
1658 *
1659 * @returns dr1.
1660 */
1661#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1662DECLASM(RTCCUINTREG) ASMGetDR1(void);
1663#else
1664DECLINLINE(RTCCUINTREG) ASMGetDR1(void)
1665{
1666 RTCCUINTREG uDR1;
1667# if RT_INLINE_ASM_USES_INTRIN
1668 uDR1 = __readdr(1);
1669# elif RT_INLINE_ASM_GNU_STYLE
1670# ifdef RT_ARCH_AMD64
1671 __asm__ __volatile__("movq %%dr1, %0\n\t" : "=r" (uDR1));
1672# else
1673 __asm__ __volatile__("movl %%dr1, %0\n\t" : "=r" (uDR1));
1674# endif
1675# else
1676 __asm
1677 {
1678# ifdef RT_ARCH_AMD64
1679 mov rax, dr1
1680 mov [uDR1], rax
1681# else
1682 mov eax, dr1
1683 mov [uDR1], eax
1684# endif
1685 }
1686# endif
1687 return uDR1;
1688}
1689#endif
1690
1691
1692/**
1693 * Gets dr2.
1694 *
1695 * @returns dr2.
1696 */
1697#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1698DECLASM(RTCCUINTREG) ASMGetDR2(void);
1699#else
1700DECLINLINE(RTCCUINTREG) ASMGetDR2(void)
1701{
1702 RTCCUINTREG uDR2;
1703# if RT_INLINE_ASM_USES_INTRIN
1704 uDR2 = __readdr(2);
1705# elif RT_INLINE_ASM_GNU_STYLE
1706# ifdef RT_ARCH_AMD64
1707 __asm__ __volatile__("movq %%dr2, %0\n\t" : "=r" (uDR2));
1708# else
1709 __asm__ __volatile__("movl %%dr2, %0\n\t" : "=r" (uDR2));
1710# endif
1711# else
1712 __asm
1713 {
1714# ifdef RT_ARCH_AMD64
1715 mov rax, dr2
1716 mov [uDR2], rax
1717# else
1718 mov eax, dr2
1719 mov [uDR2], eax
1720# endif
1721 }
1722# endif
1723 return uDR2;
1724}
1725#endif
1726
1727
1728/**
1729 * Gets dr3.
1730 *
1731 * @returns dr3.
1732 */
1733#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1734DECLASM(RTCCUINTREG) ASMGetDR3(void);
1735#else
1736DECLINLINE(RTCCUINTREG) ASMGetDR3(void)
1737{
1738 RTCCUINTREG uDR3;
1739# if RT_INLINE_ASM_USES_INTRIN
1740 uDR3 = __readdr(3);
1741# elif RT_INLINE_ASM_GNU_STYLE
1742# ifdef RT_ARCH_AMD64
1743 __asm__ __volatile__("movq %%dr3, %0\n\t" : "=r" (uDR3));
1744# else
1745 __asm__ __volatile__("movl %%dr3, %0\n\t" : "=r" (uDR3));
1746# endif
1747# else
1748 __asm
1749 {
1750# ifdef RT_ARCH_AMD64
1751 mov rax, dr3
1752 mov [uDR3], rax
1753# else
1754 mov eax, dr3
1755 mov [uDR3], eax
1756# endif
1757 }
1758# endif
1759 return uDR3;
1760}
1761#endif
1762
1763
1764/**
1765 * Gets dr6.
1766 *
1767 * @returns dr6.
1768 */
1769#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1770DECLASM(RTCCUINTREG) ASMGetDR6(void);
1771#else
1772DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1773{
1774 RTCCUINTREG uDR6;
1775# if RT_INLINE_ASM_USES_INTRIN
1776 uDR6 = __readdr(6);
1777# elif RT_INLINE_ASM_GNU_STYLE
1778# ifdef RT_ARCH_AMD64
1779 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1780# else
1781 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1782# endif
1783# else
1784 __asm
1785 {
1786# ifdef RT_ARCH_AMD64
1787 mov rax, dr6
1788 mov [uDR6], rax
1789# else
1790 mov eax, dr6
1791 mov [uDR6], eax
1792# endif
1793 }
1794# endif
1795 return uDR6;
1796}
1797#endif
1798
1799
1800/**
1801 * Reads and clears DR6.
1802 *
1803 * @returns DR6.
1804 */
1805#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1806DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1807#else
1808DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1809{
1810 RTCCUINTREG uDR6;
1811# if RT_INLINE_ASM_USES_INTRIN
1812 uDR6 = __readdr(6);
1813 __writedr(6, 0xffff0ff0U); /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1814# elif RT_INLINE_ASM_GNU_STYLE
1815 RTCCUINTREG uNewValue = 0xffff0ff0U;/* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1816# ifdef RT_ARCH_AMD64
1817 __asm__ __volatile__("movq %%dr6, %0\n\t"
1818 "movq %1, %%dr6\n\t"
1819 : "=r" (uDR6)
1820 : "r" (uNewValue));
1821# else
1822 __asm__ __volatile__("movl %%dr6, %0\n\t"
1823 "movl %1, %%dr6\n\t"
1824 : "=r" (uDR6)
1825 : "r" (uNewValue));
1826# endif
1827# else
1828 __asm
1829 {
1830# ifdef RT_ARCH_AMD64
1831 mov rax, dr6
1832 mov [uDR6], rax
1833 mov rcx, rax
1834 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1835 mov dr6, rcx
1836# else
1837 mov eax, dr6
1838 mov [uDR6], eax
1839 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1840 mov dr6, ecx
1841# endif
1842 }
1843# endif
1844 return uDR6;
1845}
1846#endif
1847
1848
1849/**
1850 * Gets dr7.
1851 *
1852 * @returns dr7.
1853 */
1854#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1855DECLASM(RTCCUINTREG) ASMGetDR7(void);
1856#else
1857DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1858{
1859 RTCCUINTREG uDR7;
1860# if RT_INLINE_ASM_USES_INTRIN
1861 uDR7 = __readdr(7);
1862# elif RT_INLINE_ASM_GNU_STYLE
1863# ifdef RT_ARCH_AMD64
1864 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1865# else
1866 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1867# endif
1868# else
1869 __asm
1870 {
1871# ifdef RT_ARCH_AMD64
1872 mov rax, dr7
1873 mov [uDR7], rax
1874# else
1875 mov eax, dr7
1876 mov [uDR7], eax
1877# endif
1878 }
1879# endif
1880 return uDR7;
1881}
1882#endif
1883
1884
1885/**
1886 * Sets dr0.
1887 *
1888 * @param uDRVal Debug register value to write
1889 */
1890#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1891DECLASM(void) ASMSetDR0(RTCCUINTREG uDRVal);
1892#else
1893DECLINLINE(void) ASMSetDR0(RTCCUINTREG uDRVal)
1894{
1895# if RT_INLINE_ASM_USES_INTRIN
1896 __writedr(0, uDRVal);
1897# elif RT_INLINE_ASM_GNU_STYLE
1898# ifdef RT_ARCH_AMD64
1899 __asm__ __volatile__("movq %0, %%dr0\n\t" : : "r" (uDRVal));
1900# else
1901 __asm__ __volatile__("movl %0, %%dr0\n\t" : : "r" (uDRVal));
1902# endif
1903# else
1904 __asm
1905 {
1906# ifdef RT_ARCH_AMD64
1907 mov rax, [uDRVal]
1908 mov dr0, rax
1909# else
1910 mov eax, [uDRVal]
1911 mov dr0, eax
1912# endif
1913 }
1914# endif
1915}
1916#endif
1917
1918
1919/**
1920 * Sets dr1.
1921 *
1922 * @param uDRVal Debug register value to write
1923 */
1924#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1925DECLASM(void) ASMSetDR1(RTCCUINTREG uDRVal);
1926#else
1927DECLINLINE(void) ASMSetDR1(RTCCUINTREG uDRVal)
1928{
1929# if RT_INLINE_ASM_USES_INTRIN
1930 __writedr(1, uDRVal);
1931# elif RT_INLINE_ASM_GNU_STYLE
1932# ifdef RT_ARCH_AMD64
1933 __asm__ __volatile__("movq %0, %%dr1\n\t" : : "r" (uDRVal));
1934# else
1935 __asm__ __volatile__("movl %0, %%dr1\n\t" : : "r" (uDRVal));
1936# endif
1937# else
1938 __asm
1939 {
1940# ifdef RT_ARCH_AMD64
1941 mov rax, [uDRVal]
1942 mov dr1, rax
1943# else
1944 mov eax, [uDRVal]
1945 mov dr1, eax
1946# endif
1947 }
1948# endif
1949}
1950#endif
1951
1952
1953/**
1954 * Sets dr2.
1955 *
1956 * @param uDRVal Debug register value to write
1957 */
1958#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1959DECLASM(void) ASMSetDR2(RTCCUINTREG uDRVal);
1960#else
1961DECLINLINE(void) ASMSetDR2(RTCCUINTREG uDRVal)
1962{
1963# if RT_INLINE_ASM_USES_INTRIN
1964 __writedr(2, uDRVal);
1965# elif RT_INLINE_ASM_GNU_STYLE
1966# ifdef RT_ARCH_AMD64
1967 __asm__ __volatile__("movq %0, %%dr2\n\t" : : "r" (uDRVal));
1968# else
1969 __asm__ __volatile__("movl %0, %%dr2\n\t" : : "r" (uDRVal));
1970# endif
1971# else
1972 __asm
1973 {
1974# ifdef RT_ARCH_AMD64
1975 mov rax, [uDRVal]
1976 mov dr2, rax
1977# else
1978 mov eax, [uDRVal]
1979 mov dr2, eax
1980# endif
1981 }
1982# endif
1983}
1984#endif
1985
1986
1987/**
1988 * Sets dr3.
1989 *
1990 * @param uDRVal Debug register value to write
1991 */
1992#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1993DECLASM(void) ASMSetDR3(RTCCUINTREG uDRVal);
1994#else
1995DECLINLINE(void) ASMSetDR3(RTCCUINTREG uDRVal)
1996{
1997# if RT_INLINE_ASM_USES_INTRIN
1998 __writedr(3, uDRVal);
1999# elif RT_INLINE_ASM_GNU_STYLE
2000# ifdef RT_ARCH_AMD64
2001 __asm__ __volatile__("movq %0, %%dr3\n\t" : : "r" (uDRVal));
2002# else
2003 __asm__ __volatile__("movl %0, %%dr3\n\t" : : "r" (uDRVal));
2004# endif
2005# else
2006 __asm
2007 {
2008# ifdef RT_ARCH_AMD64
2009 mov rax, [uDRVal]
2010 mov dr3, rax
2011# else
2012 mov eax, [uDRVal]
2013 mov dr3, eax
2014# endif
2015 }
2016# endif
2017}
2018#endif
2019
2020
2021/**
2022 * Sets dr6.
2023 *
2024 * @param uDRVal Debug register value to write
2025 */
2026#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2027DECLASM(void) ASMSetDR6(RTCCUINTREG uDRVal);
2028#else
2029DECLINLINE(void) ASMSetDR6(RTCCUINTREG uDRVal)
2030{
2031# if RT_INLINE_ASM_USES_INTRIN
2032 __writedr(6, uDRVal);
2033# elif RT_INLINE_ASM_GNU_STYLE
2034# ifdef RT_ARCH_AMD64
2035 __asm__ __volatile__("movq %0, %%dr6\n\t" : : "r" (uDRVal));
2036# else
2037 __asm__ __volatile__("movl %0, %%dr6\n\t" : : "r" (uDRVal));
2038# endif
2039# else
2040 __asm
2041 {
2042# ifdef RT_ARCH_AMD64
2043 mov rax, [uDRVal]
2044 mov dr6, rax
2045# else
2046 mov eax, [uDRVal]
2047 mov dr6, eax
2048# endif
2049 }
2050# endif
2051}
2052#endif
2053
2054
2055/**
2056 * Sets dr7.
2057 *
2058 * @param uDRVal Debug register value to write
2059 */
2060#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2061DECLASM(void) ASMSetDR7(RTCCUINTREG uDRVal);
2062#else
2063DECLINLINE(void) ASMSetDR7(RTCCUINTREG uDRVal)
2064{
2065# if RT_INLINE_ASM_USES_INTRIN
2066 __writedr(7, uDRVal);
2067# elif RT_INLINE_ASM_GNU_STYLE
2068# ifdef RT_ARCH_AMD64
2069 __asm__ __volatile__("movq %0, %%dr7\n\t" : : "r" (uDRVal));
2070# else
2071 __asm__ __volatile__("movl %0, %%dr7\n\t" : : "r" (uDRVal));
2072# endif
2073# else
2074 __asm
2075 {
2076# ifdef RT_ARCH_AMD64
2077 mov rax, [uDRVal]
2078 mov dr7, rax
2079# else
2080 mov eax, [uDRVal]
2081 mov dr7, eax
2082# endif
2083 }
2084# endif
2085}
2086#endif
2087
2088
2089/**
2090 * Compiler memory barrier.
2091 *
2092 * Ensure that the compiler does not use any cached (register/tmp stack) memory
2093 * values or any outstanding writes when returning from this function.
2094 *
2095 * This function must be used if non-volatile data is modified by a
2096 * device or the VMM. Typical cases are port access, MMIO access,
2097 * trapping instruction, etc.
2098 */
2099#if RT_INLINE_ASM_GNU_STYLE
2100# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
2101#elif RT_INLINE_ASM_USES_INTRIN
2102# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
2103#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
2104DECLINLINE(void) ASMCompilerBarrier(void)
2105{
2106 __asm
2107 {
2108 }
2109}
2110#endif
2111
2112
2113/**
2114 * Writes a 8-bit unsigned integer to an I/O port, ordered.
2115 *
2116 * @param Port I/O port to read from.
2117 * @param u8 8-bit integer to write.
2118 */
2119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2120DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
2121#else
2122DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
2123{
2124# if RT_INLINE_ASM_GNU_STYLE
2125 __asm__ __volatile__("outb %b1, %w0\n\t"
2126 :: "Nd" (Port),
2127 "a" (u8));
2128
2129# elif RT_INLINE_ASM_USES_INTRIN
2130 __outbyte(Port, u8);
2131
2132# else
2133 __asm
2134 {
2135 mov dx, [Port]
2136 mov al, [u8]
2137 out dx, al
2138 }
2139# endif
2140}
2141#endif
2142
2143
2144/**
2145 * Gets a 8-bit unsigned integer from an I/O port, ordered.
2146 *
2147 * @returns 8-bit integer.
2148 * @param Port I/O port to read from.
2149 */
2150#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2151DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
2152#else
2153DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
2154{
2155 uint8_t u8;
2156# if RT_INLINE_ASM_GNU_STYLE
2157 __asm__ __volatile__("inb %w1, %b0\n\t"
2158 : "=a" (u8)
2159 : "Nd" (Port));
2160
2161# elif RT_INLINE_ASM_USES_INTRIN
2162 u8 = __inbyte(Port);
2163
2164# else
2165 __asm
2166 {
2167 mov dx, [Port]
2168 in al, dx
2169 mov [u8], al
2170 }
2171# endif
2172 return u8;
2173}
2174#endif
2175
2176
2177/**
2178 * Writes a 16-bit unsigned integer to an I/O port, ordered.
2179 *
2180 * @param Port I/O port to read from.
2181 * @param u16 16-bit integer to write.
2182 */
2183#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2184DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
2185#else
2186DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
2187{
2188# if RT_INLINE_ASM_GNU_STYLE
2189 __asm__ __volatile__("outw %w1, %w0\n\t"
2190 :: "Nd" (Port),
2191 "a" (u16));
2192
2193# elif RT_INLINE_ASM_USES_INTRIN
2194 __outword(Port, u16);
2195
2196# else
2197 __asm
2198 {
2199 mov dx, [Port]
2200 mov ax, [u16]
2201 out dx, ax
2202 }
2203# endif
2204}
2205#endif
2206
2207
2208/**
2209 * Gets a 16-bit unsigned integer from an I/O port, ordered.
2210 *
2211 * @returns 16-bit integer.
2212 * @param Port I/O port to read from.
2213 */
2214#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2215DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
2216#else
2217DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
2218{
2219 uint16_t u16;
2220# if RT_INLINE_ASM_GNU_STYLE
2221 __asm__ __volatile__("inw %w1, %w0\n\t"
2222 : "=a" (u16)
2223 : "Nd" (Port));
2224
2225# elif RT_INLINE_ASM_USES_INTRIN
2226 u16 = __inword(Port);
2227
2228# else
2229 __asm
2230 {
2231 mov dx, [Port]
2232 in ax, dx
2233 mov [u16], ax
2234 }
2235# endif
2236 return u16;
2237}
2238#endif
2239
2240
2241/**
2242 * Writes a 32-bit unsigned integer to an I/O port, ordered.
2243 *
2244 * @param Port I/O port to read from.
2245 * @param u32 32-bit integer to write.
2246 */
2247#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2248DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
2249#else
2250DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
2251{
2252# if RT_INLINE_ASM_GNU_STYLE
2253 __asm__ __volatile__("outl %1, %w0\n\t"
2254 :: "Nd" (Port),
2255 "a" (u32));
2256
2257# elif RT_INLINE_ASM_USES_INTRIN
2258 __outdword(Port, u32);
2259
2260# else
2261 __asm
2262 {
2263 mov dx, [Port]
2264 mov eax, [u32]
2265 out dx, eax
2266 }
2267# endif
2268}
2269#endif
2270
2271
2272/**
2273 * Gets a 32-bit unsigned integer from an I/O port, ordered.
2274 *
2275 * @returns 32-bit integer.
2276 * @param Port I/O port to read from.
2277 */
2278#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2279DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
2280#else
2281DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
2282{
2283 uint32_t u32;
2284# if RT_INLINE_ASM_GNU_STYLE
2285 __asm__ __volatile__("inl %w1, %0\n\t"
2286 : "=a" (u32)
2287 : "Nd" (Port));
2288
2289# elif RT_INLINE_ASM_USES_INTRIN
2290 u32 = __indword(Port);
2291
2292# else
2293 __asm
2294 {
2295 mov dx, [Port]
2296 in eax, dx
2297 mov [u32], eax
2298 }
2299# endif
2300 return u32;
2301}
2302#endif
2303
2304/** @todo string i/o */
2305
2306
2307/**
2308 * Atomically Exchange an unsigned 8-bit value, ordered.
2309 *
2310 * @returns Current *pu8 value
2311 * @param pu8 Pointer to the 8-bit variable to update.
2312 * @param u8 The 8-bit value to assign to *pu8.
2313 */
2314#if RT_INLINE_ASM_EXTERNAL
2315DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
2316#else
2317DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
2318{
2319# if RT_INLINE_ASM_GNU_STYLE
2320 __asm__ __volatile__("xchgb %0, %1\n\t"
2321 : "=m" (*pu8),
2322 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
2323 : "1" (u8),
2324 "m" (*pu8));
2325# else
2326 __asm
2327 {
2328# ifdef RT_ARCH_AMD64
2329 mov rdx, [pu8]
2330 mov al, [u8]
2331 xchg [rdx], al
2332 mov [u8], al
2333# else
2334 mov edx, [pu8]
2335 mov al, [u8]
2336 xchg [edx], al
2337 mov [u8], al
2338# endif
2339 }
2340# endif
2341 return u8;
2342}
2343#endif
2344
2345
2346/**
2347 * Atomically Exchange a signed 8-bit value, ordered.
2348 *
2349 * @returns Current *pu8 value
2350 * @param pi8 Pointer to the 8-bit variable to update.
2351 * @param i8 The 8-bit value to assign to *pi8.
2352 */
2353DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
2354{
2355 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
2356}
2357
2358
2359/**
2360 * Atomically Exchange a bool value, ordered.
2361 *
2362 * @returns Current *pf value
2363 * @param pf Pointer to the 8-bit variable to update.
2364 * @param f The 8-bit value to assign to *pi8.
2365 */
2366DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2367{
2368#ifdef _MSC_VER
2369 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2370#else
2371 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2372#endif
2373}
2374
2375
2376/**
2377 * Atomically Exchange an unsigned 16-bit value, ordered.
2378 *
2379 * @returns Current *pu16 value
2380 * @param pu16 Pointer to the 16-bit variable to update.
2381 * @param u16 The 16-bit value to assign to *pu16.
2382 */
2383#if RT_INLINE_ASM_EXTERNAL
2384DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2385#else
2386DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2387{
2388# if RT_INLINE_ASM_GNU_STYLE
2389 __asm__ __volatile__("xchgw %0, %1\n\t"
2390 : "=m" (*pu16),
2391 "=r" (u16)
2392 : "1" (u16),
2393 "m" (*pu16));
2394# else
2395 __asm
2396 {
2397# ifdef RT_ARCH_AMD64
2398 mov rdx, [pu16]
2399 mov ax, [u16]
2400 xchg [rdx], ax
2401 mov [u16], ax
2402# else
2403 mov edx, [pu16]
2404 mov ax, [u16]
2405 xchg [edx], ax
2406 mov [u16], ax
2407# endif
2408 }
2409# endif
2410 return u16;
2411}
2412#endif
2413
2414
2415/**
2416 * Atomically Exchange a signed 16-bit value, ordered.
2417 *
2418 * @returns Current *pu16 value
2419 * @param pi16 Pointer to the 16-bit variable to update.
2420 * @param i16 The 16-bit value to assign to *pi16.
2421 */
2422DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2423{
2424 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2425}
2426
2427
2428/**
2429 * Atomically Exchange an unsigned 32-bit value, ordered.
2430 *
2431 * @returns Current *pu32 value
2432 * @param pu32 Pointer to the 32-bit variable to update.
2433 * @param u32 The 32-bit value to assign to *pu32.
2434 */
2435#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2436DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2437#else
2438DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2439{
2440# if RT_INLINE_ASM_GNU_STYLE
2441 __asm__ __volatile__("xchgl %0, %1\n\t"
2442 : "=m" (*pu32),
2443 "=r" (u32)
2444 : "1" (u32),
2445 "m" (*pu32));
2446
2447# elif RT_INLINE_ASM_USES_INTRIN
2448 u32 = _InterlockedExchange((long *)pu32, u32);
2449
2450# else
2451 __asm
2452 {
2453# ifdef RT_ARCH_AMD64
2454 mov rdx, [pu32]
2455 mov eax, u32
2456 xchg [rdx], eax
2457 mov [u32], eax
2458# else
2459 mov edx, [pu32]
2460 mov eax, u32
2461 xchg [edx], eax
2462 mov [u32], eax
2463# endif
2464 }
2465# endif
2466 return u32;
2467}
2468#endif
2469
2470
2471/**
2472 * Atomically Exchange a signed 32-bit value, ordered.
2473 *
2474 * @returns Current *pu32 value
2475 * @param pi32 Pointer to the 32-bit variable to update.
2476 * @param i32 The 32-bit value to assign to *pi32.
2477 */
2478DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2479{
2480 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2481}
2482
2483
2484/**
2485 * Atomically Exchange an unsigned 64-bit value, ordered.
2486 *
2487 * @returns Current *pu64 value
2488 * @param pu64 Pointer to the 64-bit variable to update.
2489 * @param u64 The 64-bit value to assign to *pu64.
2490 */
2491#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2492DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2493#else
2494DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2495{
2496# if defined(RT_ARCH_AMD64)
2497# if RT_INLINE_ASM_USES_INTRIN
2498 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2499
2500# elif RT_INLINE_ASM_GNU_STYLE
2501 __asm__ __volatile__("xchgq %0, %1\n\t"
2502 : "=m" (*pu64),
2503 "=r" (u64)
2504 : "1" (u64),
2505 "m" (*pu64));
2506# else
2507 __asm
2508 {
2509 mov rdx, [pu64]
2510 mov rax, [u64]
2511 xchg [rdx], rax
2512 mov [u64], rax
2513 }
2514# endif
2515# else /* !RT_ARCH_AMD64 */
2516# if RT_INLINE_ASM_GNU_STYLE
2517# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2518 uint32_t u32EBX = (uint32_t)u64;
2519 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2520 "xchgl %%ebx, %3\n\t"
2521 "1:\n\t"
2522 "lock; cmpxchg8b (%5)\n\t"
2523 "jnz 1b\n\t"
2524 "movl %3, %%ebx\n\t"
2525 /*"xchgl %%esi, %5\n\t"*/
2526 : "=A" (u64),
2527 "=m" (*pu64)
2528 : "0" (*pu64),
2529 "m" ( u32EBX ),
2530 "c" ( (uint32_t)(u64 >> 32) ),
2531 "S" (pu64));
2532# else /* !PIC */
2533 __asm__ __volatile__("1:\n\t"
2534 "lock; cmpxchg8b %1\n\t"
2535 "jnz 1b\n\t"
2536 : "=A" (u64),
2537 "=m" (*pu64)
2538 : "0" (*pu64),
2539 "b" ( (uint32_t)u64 ),
2540 "c" ( (uint32_t)(u64 >> 32) ));
2541# endif
2542# else
2543 __asm
2544 {
2545 mov ebx, dword ptr [u64]
2546 mov ecx, dword ptr [u64 + 4]
2547 mov edi, pu64
2548 mov eax, dword ptr [edi]
2549 mov edx, dword ptr [edi + 4]
2550 retry:
2551 lock cmpxchg8b [edi]
2552 jnz retry
2553 mov dword ptr [u64], eax
2554 mov dword ptr [u64 + 4], edx
2555 }
2556# endif
2557# endif /* !RT_ARCH_AMD64 */
2558 return u64;
2559}
2560#endif
2561
2562
2563/**
2564 * Atomically Exchange an signed 64-bit value, ordered.
2565 *
2566 * @returns Current *pi64 value
2567 * @param pi64 Pointer to the 64-bit variable to update.
2568 * @param i64 The 64-bit value to assign to *pi64.
2569 */
2570DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2571{
2572 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2573}
2574
2575
2576#ifdef RT_ARCH_AMD64
2577/**
2578 * Atomically Exchange an unsigned 128-bit value, ordered.
2579 *
2580 * @returns Current *pu128.
2581 * @param pu128 Pointer to the 128-bit variable to update.
2582 * @param u128 The 128-bit value to assign to *pu128.
2583 *
2584 * @remark We cannot really assume that any hardware supports this. Nor do I have
2585 * GAS support for it. So, for the time being we'll BREAK the atomic
2586 * bit of this function and use two 64-bit exchanges instead.
2587 */
2588# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2589DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2590# else
2591DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2592{
2593 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2594 {
2595 /** @todo this is clumsy code */
2596 RTUINT128U u128Ret;
2597 u128Ret.u = u128;
2598 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2599 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2600 return u128Ret.u;
2601 }
2602#if 0 /* later? */
2603 else
2604 {
2605# if RT_INLINE_ASM_GNU_STYLE
2606 __asm__ __volatile__("1:\n\t"
2607 "lock; cmpxchg8b %1\n\t"
2608 "jnz 1b\n\t"
2609 : "=A" (u128),
2610 "=m" (*pu128)
2611 : "0" (*pu128),
2612 "b" ( (uint64_t)u128 ),
2613 "c" ( (uint64_t)(u128 >> 64) ));
2614# else
2615 __asm
2616 {
2617 mov rbx, dword ptr [u128]
2618 mov rcx, dword ptr [u128 + 8]
2619 mov rdi, pu128
2620 mov rax, dword ptr [rdi]
2621 mov rdx, dword ptr [rdi + 8]
2622 retry:
2623 lock cmpxchg16b [rdi]
2624 jnz retry
2625 mov dword ptr [u128], rax
2626 mov dword ptr [u128 + 8], rdx
2627 }
2628# endif
2629 }
2630 return u128;
2631#endif
2632}
2633# endif
2634#endif /* RT_ARCH_AMD64 */
2635
2636
2637/**
2638 * Atomically Exchange a pointer value, ordered.
2639 *
2640 * @returns Current *ppv value
2641 * @param ppv Pointer to the pointer variable to update.
2642 * @param pv The pointer value to assign to *ppv.
2643 */
2644DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
2645{
2646#if ARCH_BITS == 32
2647 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2648#elif ARCH_BITS == 64
2649 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2650#else
2651# error "ARCH_BITS is bogus"
2652#endif
2653}
2654
2655
2656/** @def ASMAtomicXchgHandle
2657 * Atomically Exchange a typical IPRT handle value, ordered.
2658 *
2659 * @param ph Pointer to the value to update.
2660 * @param hNew The new value to assigned to *pu.
2661 * @param phRes Where to store the current *ph value.
2662 *
2663 * @remarks This doesn't currently work for all handles (like RTFILE).
2664 */
2665#define ASMAtomicXchgHandle(ph, hNew, phRes) \
2666 do { \
2667 *(void **)(phRes) = ASMAtomicXchgPtr((void * volatile *)(ph), (const void *)(hNew)); \
2668 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2669 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
2670 } while (0)
2671
2672
2673/**
2674 * Atomically Exchange a value which size might differ
2675 * between platforms or compilers, ordered.
2676 *
2677 * @param pu Pointer to the variable to update.
2678 * @param uNew The value to assign to *pu.
2679 * @todo This is busted as its missing the result argument.
2680 */
2681#define ASMAtomicXchgSize(pu, uNew) \
2682 do { \
2683 switch (sizeof(*(pu))) { \
2684 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2685 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2686 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2687 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2688 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2689 } \
2690 } while (0)
2691
2692/**
2693 * Atomically Exchange a value which size might differ
2694 * between platforms or compilers, ordered.
2695 *
2696 * @param pu Pointer to the variable to update.
2697 * @param uNew The value to assign to *pu.
2698 * @param puRes Where to store the current *pu value.
2699 */
2700#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
2701 do { \
2702 switch (sizeof(*(pu))) { \
2703 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2704 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2705 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2706 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2707 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2708 } \
2709 } while (0)
2710
2711
2712/**
2713 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2714 *
2715 * @returns true if xchg was done.
2716 * @returns false if xchg wasn't done.
2717 *
2718 * @param pu32 Pointer to the value to update.
2719 * @param u32New The new value to assigned to *pu32.
2720 * @param u32Old The old value to *pu32 compare with.
2721 */
2722#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2723DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2724#else
2725DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2726{
2727# if RT_INLINE_ASM_GNU_STYLE
2728 uint8_t u8Ret;
2729 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2730 "setz %1\n\t"
2731 : "=m" (*pu32),
2732 "=qm" (u8Ret),
2733 "=a" (u32Old)
2734 : "r" (u32New),
2735 "2" (u32Old),
2736 "m" (*pu32));
2737 return (bool)u8Ret;
2738
2739# elif RT_INLINE_ASM_USES_INTRIN
2740 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2741
2742# else
2743 uint32_t u32Ret;
2744 __asm
2745 {
2746# ifdef RT_ARCH_AMD64
2747 mov rdx, [pu32]
2748# else
2749 mov edx, [pu32]
2750# endif
2751 mov eax, [u32Old]
2752 mov ecx, [u32New]
2753# ifdef RT_ARCH_AMD64
2754 lock cmpxchg [rdx], ecx
2755# else
2756 lock cmpxchg [edx], ecx
2757# endif
2758 setz al
2759 movzx eax, al
2760 mov [u32Ret], eax
2761 }
2762 return !!u32Ret;
2763# endif
2764}
2765#endif
2766
2767
2768/**
2769 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2770 *
2771 * @returns true if xchg was done.
2772 * @returns false if xchg wasn't done.
2773 *
2774 * @param pi32 Pointer to the value to update.
2775 * @param i32New The new value to assigned to *pi32.
2776 * @param i32Old The old value to *pi32 compare with.
2777 */
2778DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2779{
2780 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2781}
2782
2783
2784/**
2785 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2786 *
2787 * @returns true if xchg was done.
2788 * @returns false if xchg wasn't done.
2789 *
2790 * @param pu64 Pointer to the 64-bit variable to update.
2791 * @param u64New The 64-bit value to assign to *pu64.
2792 * @param u64Old The value to compare with.
2793 */
2794#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2795DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2796#else
2797DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2798{
2799# if RT_INLINE_ASM_USES_INTRIN
2800 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2801
2802# elif defined(RT_ARCH_AMD64)
2803# if RT_INLINE_ASM_GNU_STYLE
2804 uint8_t u8Ret;
2805 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2806 "setz %1\n\t"
2807 : "=m" (*pu64),
2808 "=qm" (u8Ret),
2809 "=a" (u64Old)
2810 : "r" (u64New),
2811 "2" (u64Old),
2812 "m" (*pu64));
2813 return (bool)u8Ret;
2814# else
2815 bool fRet;
2816 __asm
2817 {
2818 mov rdx, [pu32]
2819 mov rax, [u64Old]
2820 mov rcx, [u64New]
2821 lock cmpxchg [rdx], rcx
2822 setz al
2823 mov [fRet], al
2824 }
2825 return fRet;
2826# endif
2827# else /* !RT_ARCH_AMD64 */
2828 uint32_t u32Ret;
2829# if RT_INLINE_ASM_GNU_STYLE
2830# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2831 uint32_t u32EBX = (uint32_t)u64New;
2832 uint32_t u32Spill;
2833 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2834 "lock; cmpxchg8b (%6)\n\t"
2835 "setz %%al\n\t"
2836 "movl %4, %%ebx\n\t"
2837 "movzbl %%al, %%eax\n\t"
2838 : "=a" (u32Ret),
2839 "=d" (u32Spill),
2840# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
2841 "+m" (*pu64)
2842# else
2843 "=m" (*pu64)
2844# endif
2845 : "A" (u64Old),
2846 "m" ( u32EBX ),
2847 "c" ( (uint32_t)(u64New >> 32) ),
2848 "S" (pu64));
2849# else /* !PIC */
2850 uint32_t u32Spill;
2851 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2852 "setz %%al\n\t"
2853 "movzbl %%al, %%eax\n\t"
2854 : "=a" (u32Ret),
2855 "=d" (u32Spill),
2856 "+m" (*pu64)
2857 : "A" (u64Old),
2858 "b" ( (uint32_t)u64New ),
2859 "c" ( (uint32_t)(u64New >> 32) ));
2860# endif
2861 return (bool)u32Ret;
2862# else
2863 __asm
2864 {
2865 mov ebx, dword ptr [u64New]
2866 mov ecx, dword ptr [u64New + 4]
2867 mov edi, [pu64]
2868 mov eax, dword ptr [u64Old]
2869 mov edx, dword ptr [u64Old + 4]
2870 lock cmpxchg8b [edi]
2871 setz al
2872 movzx eax, al
2873 mov dword ptr [u32Ret], eax
2874 }
2875 return !!u32Ret;
2876# endif
2877# endif /* !RT_ARCH_AMD64 */
2878}
2879#endif
2880
2881
2882/**
2883 * Atomically Compare and exchange a signed 64-bit value, ordered.
2884 *
2885 * @returns true if xchg was done.
2886 * @returns false if xchg wasn't done.
2887 *
2888 * @param pi64 Pointer to the 64-bit variable to update.
2889 * @param i64 The 64-bit value to assign to *pu64.
2890 * @param i64Old The value to compare with.
2891 */
2892DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2893{
2894 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2895}
2896
2897
2898/**
2899 * Atomically Compare and Exchange a pointer value, ordered.
2900 *
2901 * @returns true if xchg was done.
2902 * @returns false if xchg wasn't done.
2903 *
2904 * @param ppv Pointer to the value to update.
2905 * @param pvNew The new value to assigned to *ppv.
2906 * @param pvOld The old value to *ppv compare with.
2907 */
2908DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, const void *pvNew, const void *pvOld)
2909{
2910#if ARCH_BITS == 32
2911 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2912#elif ARCH_BITS == 64
2913 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2914#else
2915# error "ARCH_BITS is bogus"
2916#endif
2917}
2918
2919
2920/** @def ASMAtomicCmpXchgHandle
2921 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
2922 *
2923 * @param ph Pointer to the value to update.
2924 * @param hNew The new value to assigned to *pu.
2925 * @param hOld The old value to *pu compare with.
2926 * @param fRc Where to store the result.
2927 *
2928 * @remarks This doesn't currently work for all handles (like RTFILE).
2929 */
2930#define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
2931 do { \
2932 (fRc) = ASMAtomicCmpXchgPtr((void * volatile *)(ph), (void *)(hNew), (void *)(hOld)); \
2933 AssertCompile(sizeof(*ph) == sizeof(void *)); \
2934 } while (0)
2935
2936
2937/** @def ASMAtomicCmpXchgSize
2938 * Atomically Compare and Exchange a value which size might differ
2939 * between platforms or compilers, ordered.
2940 *
2941 * @param pu Pointer to the value to update.
2942 * @param uNew The new value to assigned to *pu.
2943 * @param uOld The old value to *pu compare with.
2944 * @param fRc Where to store the result.
2945 */
2946#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2947 do { \
2948 switch (sizeof(*(pu))) { \
2949 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2950 break; \
2951 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2952 break; \
2953 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2954 (fRc) = false; \
2955 break; \
2956 } \
2957 } while (0)
2958
2959
2960/**
2961 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2962 * passes back old value, ordered.
2963 *
2964 * @returns true if xchg was done.
2965 * @returns false if xchg wasn't done.
2966 *
2967 * @param pu32 Pointer to the value to update.
2968 * @param u32New The new value to assigned to *pu32.
2969 * @param u32Old The old value to *pu32 compare with.
2970 * @param pu32Old Pointer store the old value at.
2971 */
2972#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2973DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2974#else
2975DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2976{
2977# if RT_INLINE_ASM_GNU_STYLE
2978 uint8_t u8Ret;
2979 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2980 "setz %1\n\t"
2981 : "=m" (*pu32),
2982 "=qm" (u8Ret),
2983 "=a" (*pu32Old)
2984 : "r" (u32New),
2985 "a" (u32Old),
2986 "m" (*pu32));
2987 return (bool)u8Ret;
2988
2989# elif RT_INLINE_ASM_USES_INTRIN
2990 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2991
2992# else
2993 uint32_t u32Ret;
2994 __asm
2995 {
2996# ifdef RT_ARCH_AMD64
2997 mov rdx, [pu32]
2998# else
2999 mov edx, [pu32]
3000# endif
3001 mov eax, [u32Old]
3002 mov ecx, [u32New]
3003# ifdef RT_ARCH_AMD64
3004 lock cmpxchg [rdx], ecx
3005 mov rdx, [pu32Old]
3006 mov [rdx], eax
3007# else
3008 lock cmpxchg [edx], ecx
3009 mov edx, [pu32Old]
3010 mov [edx], eax
3011# endif
3012 setz al
3013 movzx eax, al
3014 mov [u32Ret], eax
3015 }
3016 return !!u32Ret;
3017# endif
3018}
3019#endif
3020
3021
3022/**
3023 * Atomically Compare and Exchange a signed 32-bit value, additionally
3024 * passes back old value, ordered.
3025 *
3026 * @returns true if xchg was done.
3027 * @returns false if xchg wasn't done.
3028 *
3029 * @param pi32 Pointer to the value to update.
3030 * @param i32New The new value to assigned to *pi32.
3031 * @param i32Old The old value to *pi32 compare with.
3032 * @param pi32Old Pointer store the old value at.
3033 */
3034DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
3035{
3036 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
3037}
3038
3039
3040/**
3041 * Atomically Compare and exchange an unsigned 64-bit value, additionally
3042 * passing back old value, ordered.
3043 *
3044 * @returns true if xchg was done.
3045 * @returns false if xchg wasn't done.
3046 *
3047 * @param pu64 Pointer to the 64-bit variable to update.
3048 * @param u64New The 64-bit value to assign to *pu64.
3049 * @param u64Old The value to compare with.
3050 * @param pu64Old Pointer store the old value at.
3051 */
3052#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3053DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
3054#else
3055DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
3056{
3057# if RT_INLINE_ASM_USES_INTRIN
3058 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
3059
3060# elif defined(RT_ARCH_AMD64)
3061# if RT_INLINE_ASM_GNU_STYLE
3062 uint8_t u8Ret;
3063 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
3064 "setz %1\n\t"
3065 : "=m" (*pu64),
3066 "=qm" (u8Ret),
3067 "=a" (*pu64Old)
3068 : "r" (u64New),
3069 "a" (u64Old),
3070 "m" (*pu64));
3071 return (bool)u8Ret;
3072# else
3073 bool fRet;
3074 __asm
3075 {
3076 mov rdx, [pu32]
3077 mov rax, [u64Old]
3078 mov rcx, [u64New]
3079 lock cmpxchg [rdx], rcx
3080 mov rdx, [pu64Old]
3081 mov [rdx], rax
3082 setz al
3083 mov [fRet], al
3084 }
3085 return fRet;
3086# endif
3087# else /* !RT_ARCH_AMD64 */
3088# if RT_INLINE_ASM_GNU_STYLE
3089 uint64_t u64Ret;
3090# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3091 /* NB: this code uses a memory clobber description, because the clean
3092 * solution with an output value for *pu64 makes gcc run out of registers.
3093 * This will cause suboptimal code, and anyone with a better solution is
3094 * welcome to improve this. */
3095 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
3096 "lock; cmpxchg8b %3\n\t"
3097 "xchgl %%ebx, %1\n\t"
3098 : "=A" (u64Ret)
3099 : "DS" ((uint32_t)u64New),
3100 "c" ((uint32_t)(u64New >> 32)),
3101 "m" (*pu64),
3102 "0" (u64Old)
3103 : "memory" );
3104# else /* !PIC */
3105 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
3106 : "=A" (u64Ret),
3107 "=m" (*pu64)
3108 : "b" ((uint32_t)u64New),
3109 "c" ((uint32_t)(u64New >> 32)),
3110 "m" (*pu64),
3111 "0" (u64Old));
3112# endif
3113 *pu64Old = u64Ret;
3114 return u64Ret == u64Old;
3115# else
3116 uint32_t u32Ret;
3117 __asm
3118 {
3119 mov ebx, dword ptr [u64New]
3120 mov ecx, dword ptr [u64New + 4]
3121 mov edi, [pu64]
3122 mov eax, dword ptr [u64Old]
3123 mov edx, dword ptr [u64Old + 4]
3124 lock cmpxchg8b [edi]
3125 mov ebx, [pu64Old]
3126 mov [ebx], eax
3127 setz al
3128 movzx eax, al
3129 add ebx, 4
3130 mov [ebx], edx
3131 mov dword ptr [u32Ret], eax
3132 }
3133 return !!u32Ret;
3134# endif
3135# endif /* !RT_ARCH_AMD64 */
3136}
3137#endif
3138
3139
3140/**
3141 * Atomically Compare and exchange a signed 64-bit value, additionally
3142 * passing back old value, ordered.
3143 *
3144 * @returns true if xchg was done.
3145 * @returns false if xchg wasn't done.
3146 *
3147 * @param pi64 Pointer to the 64-bit variable to update.
3148 * @param i64 The 64-bit value to assign to *pu64.
3149 * @param i64Old The value to compare with.
3150 * @param pi64Old Pointer store the old value at.
3151 */
3152DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
3153{
3154 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
3155}
3156
3157/** @def ASMAtomicCmpXchgExHandle
3158 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
3159 *
3160 * @param ph Pointer to the value to update.
3161 * @param hNew The new value to assigned to *pu.
3162 * @param hOld The old value to *pu compare with.
3163 * @param fRc Where to store the result.
3164 * @param phOldVal Pointer to where to store the old value.
3165 *
3166 * @remarks This doesn't currently work for all handles (like RTFILE).
3167 */
3168#if ARCH_BITS == 32
3169# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3170 do { \
3171 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
3172 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3173 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3174 } while (0)
3175#elif ARCH_BITS == 64
3176# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
3177 do { \
3178 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
3179 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3180 AssertCompile(sizeof(*phOldVal) == sizeof(void *)); \
3181 } while (0)
3182#endif
3183
3184
3185/** @def ASMAtomicCmpXchgExSize
3186 * Atomically Compare and Exchange a value which size might differ
3187 * between platforms or compilers. Additionally passes back old value.
3188 *
3189 * @param pu Pointer to the value to update.
3190 * @param uNew The new value to assigned to *pu.
3191 * @param uOld The old value to *pu compare with.
3192 * @param fRc Where to store the result.
3193 * @param puOldVal Pointer to where to store the old value.
3194 */
3195#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
3196 do { \
3197 switch (sizeof(*(pu))) { \
3198 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
3199 break; \
3200 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
3201 break; \
3202 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
3203 (fRc) = false; \
3204 (uOldVal) = 0; \
3205 break; \
3206 } \
3207 } while (0)
3208
3209
3210/**
3211 * Atomically Compare and Exchange a pointer value, additionally
3212 * passing back old value, ordered.
3213 *
3214 * @returns true if xchg was done.
3215 * @returns false if xchg wasn't done.
3216 *
3217 * @param ppv Pointer to the value to update.
3218 * @param pvNew The new value to assigned to *ppv.
3219 * @param pvOld The old value to *ppv compare with.
3220 * @param ppvOld Pointer store the old value at.
3221 */
3222DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
3223{
3224#if ARCH_BITS == 32
3225 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
3226#elif ARCH_BITS == 64
3227 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
3228#else
3229# error "ARCH_BITS is bogus"
3230#endif
3231}
3232
3233
3234/**
3235 * Atomically exchanges and adds to a 32-bit value, ordered.
3236 *
3237 * @returns The old value.
3238 * @param pu32 Pointer to the value.
3239 * @param u32 Number to add.
3240 */
3241#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3242DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
3243#else
3244DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
3245{
3246# if RT_INLINE_ASM_USES_INTRIN
3247 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
3248 return u32;
3249
3250# elif RT_INLINE_ASM_GNU_STYLE
3251 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3252 : "=r" (u32),
3253 "=m" (*pu32)
3254 : "0" (u32),
3255 "m" (*pu32)
3256 : "memory");
3257 return u32;
3258# else
3259 __asm
3260 {
3261 mov eax, [u32]
3262# ifdef RT_ARCH_AMD64
3263 mov rdx, [pu32]
3264 lock xadd [rdx], eax
3265# else
3266 mov edx, [pu32]
3267 lock xadd [edx], eax
3268# endif
3269 mov [u32], eax
3270 }
3271 return u32;
3272# endif
3273}
3274#endif
3275
3276
3277/**
3278 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3279 *
3280 * @returns The old value.
3281 * @param pi32 Pointer to the value.
3282 * @param i32 Number to add.
3283 */
3284DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
3285{
3286 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
3287}
3288
3289
3290/**
3291 * Atomically increment a 32-bit value, ordered.
3292 *
3293 * @returns The new value.
3294 * @param pu32 Pointer to the value to increment.
3295 */
3296#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3297DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
3298#else
3299DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
3300{
3301 uint32_t u32;
3302# if RT_INLINE_ASM_USES_INTRIN
3303 u32 = _InterlockedIncrement((long *)pu32);
3304 return u32;
3305
3306# elif RT_INLINE_ASM_GNU_STYLE
3307 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3308 : "=r" (u32),
3309 "=m" (*pu32)
3310 : "0" (1),
3311 "m" (*pu32)
3312 : "memory");
3313 return u32+1;
3314# else
3315 __asm
3316 {
3317 mov eax, 1
3318# ifdef RT_ARCH_AMD64
3319 mov rdx, [pu32]
3320 lock xadd [rdx], eax
3321# else
3322 mov edx, [pu32]
3323 lock xadd [edx], eax
3324# endif
3325 mov u32, eax
3326 }
3327 return u32+1;
3328# endif
3329}
3330#endif
3331
3332
3333/**
3334 * Atomically increment a signed 32-bit value, ordered.
3335 *
3336 * @returns The new value.
3337 * @param pi32 Pointer to the value to increment.
3338 */
3339DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
3340{
3341 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
3342}
3343
3344
3345/**
3346 * Atomically decrement an unsigned 32-bit value, ordered.
3347 *
3348 * @returns The new value.
3349 * @param pu32 Pointer to the value to decrement.
3350 */
3351#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3352DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3353#else
3354DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3355{
3356 uint32_t u32;
3357# if RT_INLINE_ASM_USES_INTRIN
3358 u32 = _InterlockedDecrement((long *)pu32);
3359 return u32;
3360
3361# elif RT_INLINE_ASM_GNU_STYLE
3362 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3363 : "=r" (u32),
3364 "=m" (*pu32)
3365 : "0" (-1),
3366 "m" (*pu32)
3367 : "memory");
3368 return u32-1;
3369# else
3370 __asm
3371 {
3372 mov eax, -1
3373# ifdef RT_ARCH_AMD64
3374 mov rdx, [pu32]
3375 lock xadd [rdx], eax
3376# else
3377 mov edx, [pu32]
3378 lock xadd [edx], eax
3379# endif
3380 mov u32, eax
3381 }
3382 return u32-1;
3383# endif
3384}
3385#endif
3386
3387
3388/**
3389 * Atomically decrement a signed 32-bit value, ordered.
3390 *
3391 * @returns The new value.
3392 * @param pi32 Pointer to the value to decrement.
3393 */
3394DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3395{
3396 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3397}
3398
3399
3400/**
3401 * Atomically Or an unsigned 32-bit value, ordered.
3402 *
3403 * @param pu32 Pointer to the pointer variable to OR u32 with.
3404 * @param u32 The value to OR *pu32 with.
3405 */
3406#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3407DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3408#else
3409DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3410{
3411# if RT_INLINE_ASM_USES_INTRIN
3412 _InterlockedOr((long volatile *)pu32, (long)u32);
3413
3414# elif RT_INLINE_ASM_GNU_STYLE
3415 __asm__ __volatile__("lock; orl %1, %0\n\t"
3416 : "=m" (*pu32)
3417 : "ir" (u32),
3418 "m" (*pu32));
3419# else
3420 __asm
3421 {
3422 mov eax, [u32]
3423# ifdef RT_ARCH_AMD64
3424 mov rdx, [pu32]
3425 lock or [rdx], eax
3426# else
3427 mov edx, [pu32]
3428 lock or [edx], eax
3429# endif
3430 }
3431# endif
3432}
3433#endif
3434
3435
3436/**
3437 * Atomically Or a signed 32-bit value, ordered.
3438 *
3439 * @param pi32 Pointer to the pointer variable to OR u32 with.
3440 * @param i32 The value to OR *pu32 with.
3441 */
3442DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3443{
3444 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3445}
3446
3447
3448/**
3449 * Atomically And an unsigned 32-bit value, ordered.
3450 *
3451 * @param pu32 Pointer to the pointer variable to AND u32 with.
3452 * @param u32 The value to AND *pu32 with.
3453 */
3454#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3455DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3456#else
3457DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3458{
3459# if RT_INLINE_ASM_USES_INTRIN
3460 _InterlockedAnd((long volatile *)pu32, u32);
3461
3462# elif RT_INLINE_ASM_GNU_STYLE
3463 __asm__ __volatile__("lock; andl %1, %0\n\t"
3464 : "=m" (*pu32)
3465 : "ir" (u32),
3466 "m" (*pu32));
3467# else
3468 __asm
3469 {
3470 mov eax, [u32]
3471# ifdef RT_ARCH_AMD64
3472 mov rdx, [pu32]
3473 lock and [rdx], eax
3474# else
3475 mov edx, [pu32]
3476 lock and [edx], eax
3477# endif
3478 }
3479# endif
3480}
3481#endif
3482
3483
3484/**
3485 * Atomically And a signed 32-bit value, ordered.
3486 *
3487 * @param pi32 Pointer to the pointer variable to AND i32 with.
3488 * @param i32 The value to AND *pi32 with.
3489 */
3490DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3491{
3492 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3493}
3494
3495
3496/**
3497 * Memory fence, waits for any pending writes and reads to complete.
3498 */
3499DECLINLINE(void) ASMMemoryFence(void)
3500{
3501 /** @todo use mfence? check if all cpus we care for support it. */
3502 uint32_t volatile u32;
3503 ASMAtomicXchgU32(&u32, 0);
3504}
3505
3506
3507/**
3508 * Write fence, waits for any pending writes to complete.
3509 */
3510DECLINLINE(void) ASMWriteFence(void)
3511{
3512 /** @todo use sfence? check if all cpus we care for support it. */
3513 ASMMemoryFence();
3514}
3515
3516
3517/**
3518 * Read fence, waits for any pending reads to complete.
3519 */
3520DECLINLINE(void) ASMReadFence(void)
3521{
3522 /** @todo use lfence? check if all cpus we care for support it. */
3523 ASMMemoryFence();
3524}
3525
3526
3527/**
3528 * Atomically reads an unsigned 8-bit value, ordered.
3529 *
3530 * @returns Current *pu8 value
3531 * @param pu8 Pointer to the 8-bit variable to read.
3532 */
3533DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3534{
3535 ASMMemoryFence();
3536 return *pu8; /* byte reads are atomic on x86 */
3537}
3538
3539
3540/**
3541 * Atomically reads an unsigned 8-bit value, unordered.
3542 *
3543 * @returns Current *pu8 value
3544 * @param pu8 Pointer to the 8-bit variable to read.
3545 */
3546DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3547{
3548 return *pu8; /* byte reads are atomic on x86 */
3549}
3550
3551
3552/**
3553 * Atomically reads a signed 8-bit value, ordered.
3554 *
3555 * @returns Current *pi8 value
3556 * @param pi8 Pointer to the 8-bit variable to read.
3557 */
3558DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3559{
3560 ASMMemoryFence();
3561 return *pi8; /* byte reads are atomic on x86 */
3562}
3563
3564
3565/**
3566 * Atomically reads a signed 8-bit value, unordered.
3567 *
3568 * @returns Current *pi8 value
3569 * @param pi8 Pointer to the 8-bit variable to read.
3570 */
3571DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3572{
3573 return *pi8; /* byte reads are atomic on x86 */
3574}
3575
3576
3577/**
3578 * Atomically reads an unsigned 16-bit value, ordered.
3579 *
3580 * @returns Current *pu16 value
3581 * @param pu16 Pointer to the 16-bit variable to read.
3582 */
3583DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3584{
3585 ASMMemoryFence();
3586 Assert(!((uintptr_t)pu16 & 1));
3587 return *pu16;
3588}
3589
3590
3591/**
3592 * Atomically reads an unsigned 16-bit value, unordered.
3593 *
3594 * @returns Current *pu16 value
3595 * @param pu16 Pointer to the 16-bit variable to read.
3596 */
3597DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3598{
3599 Assert(!((uintptr_t)pu16 & 1));
3600 return *pu16;
3601}
3602
3603
3604/**
3605 * Atomically reads a signed 16-bit value, ordered.
3606 *
3607 * @returns Current *pi16 value
3608 * @param pi16 Pointer to the 16-bit variable to read.
3609 */
3610DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3611{
3612 ASMMemoryFence();
3613 Assert(!((uintptr_t)pi16 & 1));
3614 return *pi16;
3615}
3616
3617
3618/**
3619 * Atomically reads a signed 16-bit value, unordered.
3620 *
3621 * @returns Current *pi16 value
3622 * @param pi16 Pointer to the 16-bit variable to read.
3623 */
3624DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3625{
3626 Assert(!((uintptr_t)pi16 & 1));
3627 return *pi16;
3628}
3629
3630
3631/**
3632 * Atomically reads an unsigned 32-bit value, ordered.
3633 *
3634 * @returns Current *pu32 value
3635 * @param pu32 Pointer to the 32-bit variable to read.
3636 */
3637DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3638{
3639 ASMMemoryFence();
3640 Assert(!((uintptr_t)pu32 & 3));
3641 return *pu32;
3642}
3643
3644
3645/**
3646 * Atomically reads an unsigned 32-bit value, unordered.
3647 *
3648 * @returns Current *pu32 value
3649 * @param pu32 Pointer to the 32-bit variable to read.
3650 */
3651DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3652{
3653 Assert(!((uintptr_t)pu32 & 3));
3654 return *pu32;
3655}
3656
3657
3658/**
3659 * Atomically reads a signed 32-bit value, ordered.
3660 *
3661 * @returns Current *pi32 value
3662 * @param pi32 Pointer to the 32-bit variable to read.
3663 */
3664DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3665{
3666 ASMMemoryFence();
3667 Assert(!((uintptr_t)pi32 & 3));
3668 return *pi32;
3669}
3670
3671
3672/**
3673 * Atomically reads a signed 32-bit value, unordered.
3674 *
3675 * @returns Current *pi32 value
3676 * @param pi32 Pointer to the 32-bit variable to read.
3677 */
3678DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3679{
3680 Assert(!((uintptr_t)pi32 & 3));
3681 return *pi32;
3682}
3683
3684
3685/**
3686 * Atomically reads an unsigned 64-bit value, ordered.
3687 *
3688 * @returns Current *pu64 value
3689 * @param pu64 Pointer to the 64-bit variable to read.
3690 * The memory pointed to must be writable.
3691 * @remark This will fault if the memory is read-only!
3692 */
3693#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3694DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3695#else
3696DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3697{
3698 uint64_t u64;
3699# ifdef RT_ARCH_AMD64
3700 Assert(!((uintptr_t)pu64 & 7));
3701/*# if RT_INLINE_ASM_GNU_STYLE
3702 __asm__ __volatile__( "mfence\n\t"
3703 "movq %1, %0\n\t"
3704 : "=r" (u64)
3705 : "m" (*pu64));
3706# else
3707 __asm
3708 {
3709 mfence
3710 mov rdx, [pu64]
3711 mov rax, [rdx]
3712 mov [u64], rax
3713 }
3714# endif*/
3715 ASMMemoryFence();
3716 u64 = *pu64;
3717# else /* !RT_ARCH_AMD64 */
3718# if RT_INLINE_ASM_GNU_STYLE
3719# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3720 uint32_t u32EBX = 0;
3721 Assert(!((uintptr_t)pu64 & 7));
3722 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3723 "lock; cmpxchg8b (%5)\n\t"
3724 "movl %3, %%ebx\n\t"
3725 : "=A" (u64),
3726# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3727 "+m" (*pu64)
3728# else
3729 "=m" (*pu64)
3730# endif
3731 : "0" (0),
3732 "m" (u32EBX),
3733 "c" (0),
3734 "S" (pu64));
3735# else /* !PIC */
3736 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3737 : "=A" (u64),
3738 "+m" (*pu64)
3739 : "0" (0),
3740 "b" (0),
3741 "c" (0));
3742# endif
3743# else
3744 Assert(!((uintptr_t)pu64 & 7));
3745 __asm
3746 {
3747 xor eax, eax
3748 xor edx, edx
3749 mov edi, pu64
3750 xor ecx, ecx
3751 xor ebx, ebx
3752 lock cmpxchg8b [edi]
3753 mov dword ptr [u64], eax
3754 mov dword ptr [u64 + 4], edx
3755 }
3756# endif
3757# endif /* !RT_ARCH_AMD64 */
3758 return u64;
3759}
3760#endif
3761
3762
3763/**
3764 * Atomically reads an unsigned 64-bit value, unordered.
3765 *
3766 * @returns Current *pu64 value
3767 * @param pu64 Pointer to the 64-bit variable to read.
3768 * The memory pointed to must be writable.
3769 * @remark This will fault if the memory is read-only!
3770 */
3771#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3772DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3773#else
3774DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3775{
3776 uint64_t u64;
3777# ifdef RT_ARCH_AMD64
3778 Assert(!((uintptr_t)pu64 & 7));
3779/*# if RT_INLINE_ASM_GNU_STYLE
3780 Assert(!((uintptr_t)pu64 & 7));
3781 __asm__ __volatile__("movq %1, %0\n\t"
3782 : "=r" (u64)
3783 : "m" (*pu64));
3784# else
3785 __asm
3786 {
3787 mov rdx, [pu64]
3788 mov rax, [rdx]
3789 mov [u64], rax
3790 }
3791# endif */
3792 u64 = *pu64;
3793# else /* !RT_ARCH_AMD64 */
3794# if RT_INLINE_ASM_GNU_STYLE
3795# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3796 uint32_t u32EBX = 0;
3797 uint32_t u32Spill;
3798 Assert(!((uintptr_t)pu64 & 7));
3799 __asm__ __volatile__("xor %%eax,%%eax\n\t"
3800 "xor %%ecx,%%ecx\n\t"
3801 "xor %%edx,%%edx\n\t"
3802 "xchgl %%ebx, %3\n\t"
3803 "lock; cmpxchg8b (%4)\n\t"
3804 "movl %3, %%ebx\n\t"
3805 : "=A" (u64),
3806# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
3807 "+m" (*pu64),
3808# else
3809 "=m" (*pu64),
3810# endif
3811 "=c" (u32Spill)
3812 : "m" (u32EBX),
3813 "S" (pu64));
3814# else /* !PIC */
3815 __asm__ __volatile__("cmpxchg8b %1\n\t"
3816 : "=A" (u64),
3817 "+m" (*pu64)
3818 : "0" (0),
3819 "b" (0),
3820 "c" (0));
3821# endif
3822# else
3823 Assert(!((uintptr_t)pu64 & 7));
3824 __asm
3825 {
3826 xor eax, eax
3827 xor edx, edx
3828 mov edi, pu64
3829 xor ecx, ecx
3830 xor ebx, ebx
3831 lock cmpxchg8b [edi]
3832 mov dword ptr [u64], eax
3833 mov dword ptr [u64 + 4], edx
3834 }
3835# endif
3836# endif /* !RT_ARCH_AMD64 */
3837 return u64;
3838}
3839#endif
3840
3841
3842/**
3843 * Atomically reads a signed 64-bit value, ordered.
3844 *
3845 * @returns Current *pi64 value
3846 * @param pi64 Pointer to the 64-bit variable to read.
3847 * The memory pointed to must be writable.
3848 * @remark This will fault if the memory is read-only!
3849 */
3850DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3851{
3852 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3853}
3854
3855
3856/**
3857 * Atomically reads a signed 64-bit value, unordered.
3858 *
3859 * @returns Current *pi64 value
3860 * @param pi64 Pointer to the 64-bit variable to read.
3861 * The memory pointed to must be writable.
3862 * @remark This will fault if the memory is read-only!
3863 */
3864DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3865{
3866 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3867}
3868
3869
3870/**
3871 * Atomically reads a pointer value, ordered.
3872 *
3873 * @returns Current *pv value
3874 * @param ppv Pointer to the pointer variable to read.
3875 */
3876DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3877{
3878#if ARCH_BITS == 32
3879 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3880#elif ARCH_BITS == 64
3881 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3882#else
3883# error "ARCH_BITS is bogus"
3884#endif
3885}
3886
3887
3888/**
3889 * Atomically reads a pointer value, unordered.
3890 *
3891 * @returns Current *pv value
3892 * @param ppv Pointer to the pointer variable to read.
3893 */
3894DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3895{
3896#if ARCH_BITS == 32
3897 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3898#elif ARCH_BITS == 64
3899 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3900#else
3901# error "ARCH_BITS is bogus"
3902#endif
3903}
3904
3905
3906/**
3907 * Atomically reads a boolean value, ordered.
3908 *
3909 * @returns Current *pf value
3910 * @param pf Pointer to the boolean variable to read.
3911 */
3912DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3913{
3914 ASMMemoryFence();
3915 return *pf; /* byte reads are atomic on x86 */
3916}
3917
3918
3919/**
3920 * Atomically reads a boolean value, unordered.
3921 *
3922 * @returns Current *pf value
3923 * @param pf Pointer to the boolean variable to read.
3924 */
3925DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3926{
3927 return *pf; /* byte reads are atomic on x86 */
3928}
3929
3930
3931/**
3932 * Atomically read a typical IPRT handle value, ordered.
3933 *
3934 * @param ph Pointer to the handle variable to read.
3935 * @param phRes Where to store the result.
3936 *
3937 * @remarks This doesn't currently work for all handles (like RTFILE).
3938 */
3939#define ASMAtomicReadHandle(ph, phRes) \
3940 do { \
3941 *(void **)(phRes) = ASMAtomicReadPtr((void * volatile *)(ph)); \
3942 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3943 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3944 } while (0)
3945
3946
3947/**
3948 * Atomically read a typical IPRT handle value, unordered.
3949 *
3950 * @param ph Pointer to the handle variable to read.
3951 * @param phRes Where to store the result.
3952 *
3953 * @remarks This doesn't currently work for all handles (like RTFILE).
3954 */
3955#define ASMAtomicUoReadHandle(ph, phRes) \
3956 do { \
3957 *(void **)(phRes) = ASMAtomicUoReadPtr((void * volatile *)(ph)); \
3958 AssertCompile(sizeof(*ph) == sizeof(void *)); \
3959 AssertCompile(sizeof(*phRes) == sizeof(void *)); \
3960 } while (0)
3961
3962
3963/**
3964 * Atomically read a value which size might differ
3965 * between platforms or compilers, ordered.
3966 *
3967 * @param pu Pointer to the variable to update.
3968 * @param puRes Where to store the result.
3969 */
3970#define ASMAtomicReadSize(pu, puRes) \
3971 do { \
3972 switch (sizeof(*(pu))) { \
3973 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3974 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3975 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3976 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3977 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3978 } \
3979 } while (0)
3980
3981
3982/**
3983 * Atomically read a value which size might differ
3984 * between platforms or compilers, unordered.
3985 *
3986 * @param pu Pointer to the variable to update.
3987 * @param puRes Where to store the result.
3988 */
3989#define ASMAtomicUoReadSize(pu, puRes) \
3990 do { \
3991 switch (sizeof(*(pu))) { \
3992 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3993 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3994 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3995 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3996 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3997 } \
3998 } while (0)
3999
4000
4001/**
4002 * Atomically writes an unsigned 8-bit value, ordered.
4003 *
4004 * @param pu8 Pointer to the 8-bit variable.
4005 * @param u8 The 8-bit value to assign to *pu8.
4006 */
4007DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
4008{
4009 ASMAtomicXchgU8(pu8, u8);
4010}
4011
4012
4013/**
4014 * Atomically writes an unsigned 8-bit value, unordered.
4015 *
4016 * @param pu8 Pointer to the 8-bit variable.
4017 * @param u8 The 8-bit value to assign to *pu8.
4018 */
4019DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
4020{
4021 *pu8 = u8; /* byte writes are atomic on x86 */
4022}
4023
4024
4025/**
4026 * Atomically writes a signed 8-bit value, ordered.
4027 *
4028 * @param pi8 Pointer to the 8-bit variable to read.
4029 * @param i8 The 8-bit value to assign to *pi8.
4030 */
4031DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
4032{
4033 ASMAtomicXchgS8(pi8, i8);
4034}
4035
4036
4037/**
4038 * Atomically writes a signed 8-bit value, unordered.
4039 *
4040 * @param pi8 Pointer to the 8-bit variable to read.
4041 * @param i8 The 8-bit value to assign to *pi8.
4042 */
4043DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
4044{
4045 *pi8 = i8; /* byte writes are atomic on x86 */
4046}
4047
4048
4049/**
4050 * Atomically writes an unsigned 16-bit value, ordered.
4051 *
4052 * @param pu16 Pointer to the 16-bit variable.
4053 * @param u16 The 16-bit value to assign to *pu16.
4054 */
4055DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
4056{
4057 ASMAtomicXchgU16(pu16, u16);
4058}
4059
4060
4061/**
4062 * Atomically writes an unsigned 16-bit value, unordered.
4063 *
4064 * @param pu16 Pointer to the 16-bit variable.
4065 * @param u16 The 16-bit value to assign to *pu16.
4066 */
4067DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
4068{
4069 Assert(!((uintptr_t)pu16 & 1));
4070 *pu16 = u16;
4071}
4072
4073
4074/**
4075 * Atomically writes a signed 16-bit value, ordered.
4076 *
4077 * @param pi16 Pointer to the 16-bit variable to read.
4078 * @param i16 The 16-bit value to assign to *pi16.
4079 */
4080DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
4081{
4082 ASMAtomicXchgS16(pi16, i16);
4083}
4084
4085
4086/**
4087 * Atomically writes a signed 16-bit value, unordered.
4088 *
4089 * @param pi16 Pointer to the 16-bit variable to read.
4090 * @param i16 The 16-bit value to assign to *pi16.
4091 */
4092DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
4093{
4094 Assert(!((uintptr_t)pi16 & 1));
4095 *pi16 = i16;
4096}
4097
4098
4099/**
4100 * Atomically writes an unsigned 32-bit value, ordered.
4101 *
4102 * @param pu32 Pointer to the 32-bit variable.
4103 * @param u32 The 32-bit value to assign to *pu32.
4104 */
4105DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
4106{
4107 ASMAtomicXchgU32(pu32, u32);
4108}
4109
4110
4111/**
4112 * Atomically writes an unsigned 32-bit value, unordered.
4113 *
4114 * @param pu32 Pointer to the 32-bit variable.
4115 * @param u32 The 32-bit value to assign to *pu32.
4116 */
4117DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
4118{
4119 Assert(!((uintptr_t)pu32 & 3));
4120 *pu32 = u32;
4121}
4122
4123
4124/**
4125 * Atomically writes a signed 32-bit value, ordered.
4126 *
4127 * @param pi32 Pointer to the 32-bit variable to read.
4128 * @param i32 The 32-bit value to assign to *pi32.
4129 */
4130DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
4131{
4132 ASMAtomicXchgS32(pi32, i32);
4133}
4134
4135
4136/**
4137 * Atomically writes a signed 32-bit value, unordered.
4138 *
4139 * @param pi32 Pointer to the 32-bit variable to read.
4140 * @param i32 The 32-bit value to assign to *pi32.
4141 */
4142DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
4143{
4144 Assert(!((uintptr_t)pi32 & 3));
4145 *pi32 = i32;
4146}
4147
4148
4149/**
4150 * Atomically writes an unsigned 64-bit value, ordered.
4151 *
4152 * @param pu64 Pointer to the 64-bit variable.
4153 * @param u64 The 64-bit value to assign to *pu64.
4154 */
4155DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
4156{
4157 ASMAtomicXchgU64(pu64, u64);
4158}
4159
4160
4161/**
4162 * Atomically writes an unsigned 64-bit value, unordered.
4163 *
4164 * @param pu64 Pointer to the 64-bit variable.
4165 * @param u64 The 64-bit value to assign to *pu64.
4166 */
4167DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
4168{
4169 Assert(!((uintptr_t)pu64 & 7));
4170#if ARCH_BITS == 64
4171 *pu64 = u64;
4172#else
4173 ASMAtomicXchgU64(pu64, u64);
4174#endif
4175}
4176
4177
4178/**
4179 * Atomically writes a signed 64-bit value, ordered.
4180 *
4181 * @param pi64 Pointer to the 64-bit variable.
4182 * @param i64 The 64-bit value to assign to *pi64.
4183 */
4184DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
4185{
4186 ASMAtomicXchgS64(pi64, i64);
4187}
4188
4189
4190/**
4191 * Atomically writes a signed 64-bit value, unordered.
4192 *
4193 * @param pi64 Pointer to the 64-bit variable.
4194 * @param i64 The 64-bit value to assign to *pi64.
4195 */
4196DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
4197{
4198 Assert(!((uintptr_t)pi64 & 7));
4199#if ARCH_BITS == 64
4200 *pi64 = i64;
4201#else
4202 ASMAtomicXchgS64(pi64, i64);
4203#endif
4204}
4205
4206
4207/**
4208 * Atomically writes a boolean value, unordered.
4209 *
4210 * @param pf Pointer to the boolean variable.
4211 * @param f The boolean value to assign to *pf.
4212 */
4213DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
4214{
4215 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
4216}
4217
4218
4219/**
4220 * Atomically writes a boolean value, unordered.
4221 *
4222 * @param pf Pointer to the boolean variable.
4223 * @param f The boolean value to assign to *pf.
4224 */
4225DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
4226{
4227 *pf = f; /* byte writes are atomic on x86 */
4228}
4229
4230
4231/**
4232 * Atomically writes a pointer value, ordered.
4233 *
4234 * @returns Current *pv value
4235 * @param ppv Pointer to the pointer variable.
4236 * @param pv The pointer value to assigne to *ppv.
4237 */
4238DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, const void *pv)
4239{
4240#if ARCH_BITS == 32
4241 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4242#elif ARCH_BITS == 64
4243 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4244#else
4245# error "ARCH_BITS is bogus"
4246#endif
4247}
4248
4249
4250/**
4251 * Atomically writes a pointer value, unordered.
4252 *
4253 * @returns Current *pv value
4254 * @param ppv Pointer to the pointer variable.
4255 * @param pv The pointer value to assigne to *ppv.
4256 */
4257DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, const void *pv)
4258{
4259#if ARCH_BITS == 32
4260 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
4261#elif ARCH_BITS == 64
4262 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
4263#else
4264# error "ARCH_BITS is bogus"
4265#endif
4266}
4267
4268
4269/**
4270 * Atomically write a typical IPRT handle value, ordered.
4271 *
4272 * @param ph Pointer to the variable to update.
4273 * @param hNew The value to assign to *ph.
4274 *
4275 * @remarks This doesn't currently work for all handles (like RTFILE).
4276 */
4277#define ASMAtomicWriteHandle(ph, hNew) \
4278 do { \
4279 ASMAtomicWritePtr((void * volatile *)(ph), (const void *)hNew); \
4280 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4281 } while (0)
4282
4283
4284/**
4285 * Atomically write a typical IPRT handle value, unordered.
4286 *
4287 * @param ph Pointer to the variable to update.
4288 * @param hNew The value to assign to *ph.
4289 *
4290 * @remarks This doesn't currently work for all handles (like RTFILE).
4291 */
4292#define ASMAtomicUoWriteHandle(ph, hNew) \
4293 do { \
4294 ASMAtomicUoWritePtr((void * volatile *)(ph), (const void *)hNew); \
4295 AssertCompile(sizeof(*ph) == sizeof(void*)); \
4296 } while (0)
4297
4298
4299/**
4300 * Atomically write a value which size might differ
4301 * between platforms or compilers, ordered.
4302 *
4303 * @param pu Pointer to the variable to update.
4304 * @param uNew The value to assign to *pu.
4305 */
4306#define ASMAtomicWriteSize(pu, uNew) \
4307 do { \
4308 switch (sizeof(*(pu))) { \
4309 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4310 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4311 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4312 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4313 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4314 } \
4315 } while (0)
4316
4317/**
4318 * Atomically write a value which size might differ
4319 * between platforms or compilers, unordered.
4320 *
4321 * @param pu Pointer to the variable to update.
4322 * @param uNew The value to assign to *pu.
4323 */
4324#define ASMAtomicUoWriteSize(pu, uNew) \
4325 do { \
4326 switch (sizeof(*(pu))) { \
4327 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
4328 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
4329 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
4330 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
4331 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
4332 } \
4333 } while (0)
4334
4335
4336
4337
4338/**
4339 * Invalidate page.
4340 *
4341 * @param pv Address of the page to invalidate.
4342 */
4343#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4344DECLASM(void) ASMInvalidatePage(void *pv);
4345#else
4346DECLINLINE(void) ASMInvalidatePage(void *pv)
4347{
4348# if RT_INLINE_ASM_USES_INTRIN
4349 __invlpg(pv);
4350
4351# elif RT_INLINE_ASM_GNU_STYLE
4352 __asm__ __volatile__("invlpg %0\n\t"
4353 : : "m" (*(uint8_t *)pv));
4354# else
4355 __asm
4356 {
4357# ifdef RT_ARCH_AMD64
4358 mov rax, [pv]
4359 invlpg [rax]
4360# else
4361 mov eax, [pv]
4362 invlpg [eax]
4363# endif
4364 }
4365# endif
4366}
4367#endif
4368
4369
4370#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
4371# if PAGE_SIZE != 0x1000
4372# error "PAGE_SIZE is not 0x1000!"
4373# endif
4374#endif
4375
4376/**
4377 * Zeros a 4K memory page.
4378 *
4379 * @param pv Pointer to the memory block. This must be page aligned.
4380 */
4381#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4382DECLASM(void) ASMMemZeroPage(volatile void *pv);
4383# else
4384DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
4385{
4386# if RT_INLINE_ASM_USES_INTRIN
4387# ifdef RT_ARCH_AMD64
4388 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
4389# else
4390 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
4391# endif
4392
4393# elif RT_INLINE_ASM_GNU_STYLE
4394 RTCCUINTREG uDummy;
4395# ifdef RT_ARCH_AMD64
4396 __asm__ __volatile__ ("rep stosq"
4397 : "=D" (pv),
4398 "=c" (uDummy)
4399 : "0" (pv),
4400 "c" (0x1000 >> 3),
4401 "a" (0)
4402 : "memory");
4403# else
4404 __asm__ __volatile__ ("rep stosl"
4405 : "=D" (pv),
4406 "=c" (uDummy)
4407 : "0" (pv),
4408 "c" (0x1000 >> 2),
4409 "a" (0)
4410 : "memory");
4411# endif
4412# else
4413 __asm
4414 {
4415# ifdef RT_ARCH_AMD64
4416 xor rax, rax
4417 mov ecx, 0200h
4418 mov rdi, [pv]
4419 rep stosq
4420# else
4421 xor eax, eax
4422 mov ecx, 0400h
4423 mov edi, [pv]
4424 rep stosd
4425# endif
4426 }
4427# endif
4428}
4429# endif
4430
4431
4432/**
4433 * Zeros a memory block with a 32-bit aligned size.
4434 *
4435 * @param pv Pointer to the memory block.
4436 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4437 */
4438#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4439DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
4440#else
4441DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
4442{
4443# if RT_INLINE_ASM_USES_INTRIN
4444# ifdef RT_ARCH_AMD64
4445 if (!(cb & 7))
4446 __stosq((unsigned __int64 *)pv, 0, cb / 8);
4447 else
4448# endif
4449 __stosd((unsigned long *)pv, 0, cb / 4);
4450
4451# elif RT_INLINE_ASM_GNU_STYLE
4452 __asm__ __volatile__ ("rep stosl"
4453 : "=D" (pv),
4454 "=c" (cb)
4455 : "0" (pv),
4456 "1" (cb >> 2),
4457 "a" (0)
4458 : "memory");
4459# else
4460 __asm
4461 {
4462 xor eax, eax
4463# ifdef RT_ARCH_AMD64
4464 mov rcx, [cb]
4465 shr rcx, 2
4466 mov rdi, [pv]
4467# else
4468 mov ecx, [cb]
4469 shr ecx, 2
4470 mov edi, [pv]
4471# endif
4472 rep stosd
4473 }
4474# endif
4475}
4476#endif
4477
4478
4479/**
4480 * Fills a memory block with a 32-bit aligned size.
4481 *
4482 * @param pv Pointer to the memory block.
4483 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4484 * @param u32 The value to fill with.
4485 */
4486#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4487DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
4488#else
4489DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
4490{
4491# if RT_INLINE_ASM_USES_INTRIN
4492# ifdef RT_ARCH_AMD64
4493 if (!(cb & 7))
4494 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
4495 else
4496# endif
4497 __stosd((unsigned long *)pv, u32, cb / 4);
4498
4499# elif RT_INLINE_ASM_GNU_STYLE
4500 __asm__ __volatile__ ("rep stosl"
4501 : "=D" (pv),
4502 "=c" (cb)
4503 : "0" (pv),
4504 "1" (cb >> 2),
4505 "a" (u32)
4506 : "memory");
4507# else
4508 __asm
4509 {
4510# ifdef RT_ARCH_AMD64
4511 mov rcx, [cb]
4512 shr rcx, 2
4513 mov rdi, [pv]
4514# else
4515 mov ecx, [cb]
4516 shr ecx, 2
4517 mov edi, [pv]
4518# endif
4519 mov eax, [u32]
4520 rep stosd
4521 }
4522# endif
4523}
4524#endif
4525
4526
4527/**
4528 * Checks if a memory block is filled with the specified byte.
4529 *
4530 * This is a sort of inverted memchr.
4531 *
4532 * @returns Pointer to the byte which doesn't equal u8.
4533 * @returns NULL if all equal to u8.
4534 *
4535 * @param pv Pointer to the memory block.
4536 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4537 * @param u8 The value it's supposed to be filled with.
4538 */
4539#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4540DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
4541#else
4542DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4543{
4544/** @todo rewrite this in inline assembly? */
4545 uint8_t const *pb = (uint8_t const *)pv;
4546 for (; cb; cb--, pb++)
4547 if (RT_UNLIKELY(*pb != u8))
4548 return (void *)pb;
4549 return NULL;
4550}
4551#endif
4552
4553
4554/**
4555 * Checks if a memory block is filled with the specified 32-bit value.
4556 *
4557 * This is a sort of inverted memchr.
4558 *
4559 * @returns Pointer to the first value which doesn't equal u32.
4560 * @returns NULL if all equal to u32.
4561 *
4562 * @param pv Pointer to the memory block.
4563 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4564 * @param u32 The value it's supposed to be filled with.
4565 */
4566#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4567DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4568#else
4569DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4570{
4571/** @todo rewrite this in inline assembly? */
4572 uint32_t const *pu32 = (uint32_t const *)pv;
4573 for (; cb; cb -= 4, pu32++)
4574 if (RT_UNLIKELY(*pu32 != u32))
4575 return (uint32_t *)pu32;
4576 return NULL;
4577}
4578#endif
4579
4580
4581/**
4582 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4583 *
4584 * @returns u32F1 * u32F2.
4585 */
4586#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4587DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4588#else
4589DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4590{
4591# ifdef RT_ARCH_AMD64
4592 return (uint64_t)u32F1 * u32F2;
4593# else /* !RT_ARCH_AMD64 */
4594 uint64_t u64;
4595# if RT_INLINE_ASM_GNU_STYLE
4596 __asm__ __volatile__("mull %%edx"
4597 : "=A" (u64)
4598 : "a" (u32F2), "d" (u32F1));
4599# else
4600 __asm
4601 {
4602 mov edx, [u32F1]
4603 mov eax, [u32F2]
4604 mul edx
4605 mov dword ptr [u64], eax
4606 mov dword ptr [u64 + 4], edx
4607 }
4608# endif
4609 return u64;
4610# endif /* !RT_ARCH_AMD64 */
4611}
4612#endif
4613
4614
4615/**
4616 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4617 *
4618 * @returns u32F1 * u32F2.
4619 */
4620#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4621DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4622#else
4623DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4624{
4625# ifdef RT_ARCH_AMD64
4626 return (int64_t)i32F1 * i32F2;
4627# else /* !RT_ARCH_AMD64 */
4628 int64_t i64;
4629# if RT_INLINE_ASM_GNU_STYLE
4630 __asm__ __volatile__("imull %%edx"
4631 : "=A" (i64)
4632 : "a" (i32F2), "d" (i32F1));
4633# else
4634 __asm
4635 {
4636 mov edx, [i32F1]
4637 mov eax, [i32F2]
4638 imul edx
4639 mov dword ptr [i64], eax
4640 mov dword ptr [i64 + 4], edx
4641 }
4642# endif
4643 return i64;
4644# endif /* !RT_ARCH_AMD64 */
4645}
4646#endif
4647
4648
4649/**
4650 * Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4651 *
4652 * @returns u64 / u32.
4653 */
4654#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4655DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4656#else
4657DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4658{
4659# ifdef RT_ARCH_AMD64
4660 return (uint32_t)(u64 / u32);
4661# else /* !RT_ARCH_AMD64 */
4662# if RT_INLINE_ASM_GNU_STYLE
4663 RTCCUINTREG uDummy;
4664 __asm__ __volatile__("divl %3"
4665 : "=a" (u32), "=d"(uDummy)
4666 : "A" (u64), "r" (u32));
4667# else
4668 __asm
4669 {
4670 mov eax, dword ptr [u64]
4671 mov edx, dword ptr [u64 + 4]
4672 mov ecx, [u32]
4673 div ecx
4674 mov [u32], eax
4675 }
4676# endif
4677 return u32;
4678# endif /* !RT_ARCH_AMD64 */
4679}
4680#endif
4681
4682
4683/**
4684 * Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4685 *
4686 * @returns u64 / u32.
4687 */
4688#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4689DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4690#else
4691DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4692{
4693# ifdef RT_ARCH_AMD64
4694 return (int32_t)(i64 / i32);
4695# else /* !RT_ARCH_AMD64 */
4696# if RT_INLINE_ASM_GNU_STYLE
4697 RTCCUINTREG iDummy;
4698 __asm__ __volatile__("idivl %3"
4699 : "=a" (i32), "=d"(iDummy)
4700 : "A" (i64), "r" (i32));
4701# else
4702 __asm
4703 {
4704 mov eax, dword ptr [i64]
4705 mov edx, dword ptr [i64 + 4]
4706 mov ecx, [i32]
4707 idiv ecx
4708 mov [i32], eax
4709 }
4710# endif
4711 return i32;
4712# endif /* !RT_ARCH_AMD64 */
4713}
4714#endif
4715
4716
4717/**
4718 * Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
4719 * returning the rest.
4720 *
4721 * @returns u64 % u32.
4722 *
4723 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4724 */
4725#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4726DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
4727#else
4728DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
4729{
4730# ifdef RT_ARCH_AMD64
4731 return (uint32_t)(u64 % u32);
4732# else /* !RT_ARCH_AMD64 */
4733# if RT_INLINE_ASM_GNU_STYLE
4734 RTCCUINTREG uDummy;
4735 __asm__ __volatile__("divl %3"
4736 : "=a" (uDummy), "=d"(u32)
4737 : "A" (u64), "r" (u32));
4738# else
4739 __asm
4740 {
4741 mov eax, dword ptr [u64]
4742 mov edx, dword ptr [u64 + 4]
4743 mov ecx, [u32]
4744 div ecx
4745 mov [u32], edx
4746 }
4747# endif
4748 return u32;
4749# endif /* !RT_ARCH_AMD64 */
4750}
4751#endif
4752
4753
4754/**
4755 * Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
4756 * returning the rest.
4757 *
4758 * @returns u64 % u32.
4759 *
4760 * @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
4761 */
4762#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4763DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
4764#else
4765DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
4766{
4767# ifdef RT_ARCH_AMD64
4768 return (int32_t)(i64 % i32);
4769# else /* !RT_ARCH_AMD64 */
4770# if RT_INLINE_ASM_GNU_STYLE
4771 RTCCUINTREG iDummy;
4772 __asm__ __volatile__("idivl %3"
4773 : "=a" (iDummy), "=d"(i32)
4774 : "A" (i64), "r" (i32));
4775# else
4776 __asm
4777 {
4778 mov eax, dword ptr [i64]
4779 mov edx, dword ptr [i64 + 4]
4780 mov ecx, [i32]
4781 idiv ecx
4782 mov [i32], edx
4783 }
4784# endif
4785 return i32;
4786# endif /* !RT_ARCH_AMD64 */
4787}
4788#endif
4789
4790
4791/**
4792 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4793 * using a 96 bit intermediate result.
4794 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4795 * __udivdi3 and __umoddi3 even if this inline function is not used.
4796 *
4797 * @returns (u64A * u32B) / u32C.
4798 * @param u64A The 64-bit value.
4799 * @param u32B The 32-bit value to multiple by A.
4800 * @param u32C The 32-bit value to divide A*B by.
4801 */
4802#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4803DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4804#else
4805DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4806{
4807# if RT_INLINE_ASM_GNU_STYLE
4808# ifdef RT_ARCH_AMD64
4809 uint64_t u64Result, u64Spill;
4810 __asm__ __volatile__("mulq %2\n\t"
4811 "divq %3\n\t"
4812 : "=a" (u64Result),
4813 "=d" (u64Spill)
4814 : "r" ((uint64_t)u32B),
4815 "r" ((uint64_t)u32C),
4816 "0" (u64A),
4817 "1" (0));
4818 return u64Result;
4819# else
4820 uint32_t u32Dummy;
4821 uint64_t u64Result;
4822 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4823 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4824 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4825 eax = u64A.hi */
4826 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4827 edx = u32C */
4828 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4829 edx = u32B */
4830 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4831 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4832 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4833 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4834 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4835 edx = u64Hi % u32C */
4836 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4837 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4838 "divl %%ecx \n\t" /* u64Result.lo */
4839 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4840 : "=A"(u64Result), "=c"(u32Dummy),
4841 "=S"(u32Dummy), "=D"(u32Dummy)
4842 : "a"((uint32_t)u64A),
4843 "S"((uint32_t)(u64A >> 32)),
4844 "c"(u32B),
4845 "D"(u32C));
4846 return u64Result;
4847# endif
4848# else
4849 RTUINT64U u;
4850 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4851 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4852 u64Hi += (u64Lo >> 32);
4853 u.s.Hi = (uint32_t)(u64Hi / u32C);
4854 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4855 return u.u;
4856# endif
4857}
4858#endif
4859
4860
4861/**
4862 * Probes a byte pointer for read access.
4863 *
4864 * While the function will not fault if the byte is not read accessible,
4865 * the idea is to do this in a safe place like before acquiring locks
4866 * and such like.
4867 *
4868 * Also, this functions guarantees that an eager compiler is not going
4869 * to optimize the probing away.
4870 *
4871 * @param pvByte Pointer to the byte.
4872 */
4873#if RT_INLINE_ASM_EXTERNAL
4874DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4875#else
4876DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4877{
4878 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4879 uint8_t u8;
4880# if RT_INLINE_ASM_GNU_STYLE
4881 __asm__ __volatile__("movb (%1), %0\n\t"
4882 : "=r" (u8)
4883 : "r" (pvByte));
4884# else
4885 __asm
4886 {
4887# ifdef RT_ARCH_AMD64
4888 mov rax, [pvByte]
4889 mov al, [rax]
4890# else
4891 mov eax, [pvByte]
4892 mov al, [eax]
4893# endif
4894 mov [u8], al
4895 }
4896# endif
4897 return u8;
4898}
4899#endif
4900
4901/**
4902 * Probes a buffer for read access page by page.
4903 *
4904 * While the function will fault if the buffer is not fully read
4905 * accessible, the idea is to do this in a safe place like before
4906 * acquiring locks and such like.
4907 *
4908 * Also, this functions guarantees that an eager compiler is not going
4909 * to optimize the probing away.
4910 *
4911 * @param pvBuf Pointer to the buffer.
4912 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4913 */
4914DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4915{
4916 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4917 /* the first byte */
4918 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4919 ASMProbeReadByte(pu8);
4920
4921 /* the pages in between pages. */
4922 while (cbBuf > /*PAGE_SIZE*/0x1000)
4923 {
4924 ASMProbeReadByte(pu8);
4925 cbBuf -= /*PAGE_SIZE*/0x1000;
4926 pu8 += /*PAGE_SIZE*/0x1000;
4927 }
4928
4929 /* the last byte */
4930 ASMProbeReadByte(pu8 + cbBuf - 1);
4931}
4932
4933
4934/** @def ASMBreakpoint
4935 * Debugger Breakpoint.
4936 * @remark In the gnu world we add a nop instruction after the int3 to
4937 * force gdb to remain at the int3 source line.
4938 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4939 * @internal
4940 */
4941#if RT_INLINE_ASM_GNU_STYLE
4942# ifndef __L4ENV__
4943# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4944# else
4945# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4946# endif
4947#else
4948# define ASMBreakpoint() __debugbreak()
4949#endif
4950
4951
4952
4953/** @defgroup grp_inline_bits Bit Operations
4954 * @{
4955 */
4956
4957
4958/**
4959 * Sets a bit in a bitmap.
4960 *
4961 * @param pvBitmap Pointer to the bitmap.
4962 * @param iBit The bit to set.
4963 */
4964#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4965DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4966#else
4967DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4968{
4969# if RT_INLINE_ASM_USES_INTRIN
4970 _bittestandset((long *)pvBitmap, iBit);
4971
4972# elif RT_INLINE_ASM_GNU_STYLE
4973 __asm__ __volatile__ ("btsl %1, %0"
4974 : "=m" (*(volatile long *)pvBitmap)
4975 : "Ir" (iBit),
4976 "m" (*(volatile long *)pvBitmap)
4977 : "memory");
4978# else
4979 __asm
4980 {
4981# ifdef RT_ARCH_AMD64
4982 mov rax, [pvBitmap]
4983 mov edx, [iBit]
4984 bts [rax], edx
4985# else
4986 mov eax, [pvBitmap]
4987 mov edx, [iBit]
4988 bts [eax], edx
4989# endif
4990 }
4991# endif
4992}
4993#endif
4994
4995
4996/**
4997 * Atomically sets a bit in a bitmap, ordered.
4998 *
4999 * @param pvBitmap Pointer to the bitmap.
5000 * @param iBit The bit to set.
5001 */
5002#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5003DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
5004#else
5005DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
5006{
5007# if RT_INLINE_ASM_USES_INTRIN
5008 _interlockedbittestandset((long *)pvBitmap, iBit);
5009# elif RT_INLINE_ASM_GNU_STYLE
5010 __asm__ __volatile__ ("lock; btsl %1, %0"
5011 : "=m" (*(volatile long *)pvBitmap)
5012 : "Ir" (iBit),
5013 "m" (*(volatile long *)pvBitmap)
5014 : "memory");
5015# else
5016 __asm
5017 {
5018# ifdef RT_ARCH_AMD64
5019 mov rax, [pvBitmap]
5020 mov edx, [iBit]
5021 lock bts [rax], edx
5022# else
5023 mov eax, [pvBitmap]
5024 mov edx, [iBit]
5025 lock bts [eax], edx
5026# endif
5027 }
5028# endif
5029}
5030#endif
5031
5032
5033/**
5034 * Clears a bit in a bitmap.
5035 *
5036 * @param pvBitmap Pointer to the bitmap.
5037 * @param iBit The bit to clear.
5038 */
5039#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5040DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
5041#else
5042DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
5043{
5044# if RT_INLINE_ASM_USES_INTRIN
5045 _bittestandreset((long *)pvBitmap, iBit);
5046
5047# elif RT_INLINE_ASM_GNU_STYLE
5048 __asm__ __volatile__ ("btrl %1, %0"
5049 : "=m" (*(volatile long *)pvBitmap)
5050 : "Ir" (iBit),
5051 "m" (*(volatile long *)pvBitmap)
5052 : "memory");
5053# else
5054 __asm
5055 {
5056# ifdef RT_ARCH_AMD64
5057 mov rax, [pvBitmap]
5058 mov edx, [iBit]
5059 btr [rax], edx
5060# else
5061 mov eax, [pvBitmap]
5062 mov edx, [iBit]
5063 btr [eax], edx
5064# endif
5065 }
5066# endif
5067}
5068#endif
5069
5070
5071/**
5072 * Atomically clears a bit in a bitmap, ordered.
5073 *
5074 * @param pvBitmap Pointer to the bitmap.
5075 * @param iBit The bit to toggle set.
5076 * @remark No memory barrier, take care on smp.
5077 */
5078#if RT_INLINE_ASM_EXTERNAL
5079DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
5080#else
5081DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
5082{
5083# if RT_INLINE_ASM_GNU_STYLE
5084 __asm__ __volatile__ ("lock; btrl %1, %0"
5085 : "=m" (*(volatile long *)pvBitmap)
5086 : "Ir" (iBit),
5087 "m" (*(volatile long *)pvBitmap)
5088 : "memory");
5089# else
5090 __asm
5091 {
5092# ifdef RT_ARCH_AMD64
5093 mov rax, [pvBitmap]
5094 mov edx, [iBit]
5095 lock btr [rax], edx
5096# else
5097 mov eax, [pvBitmap]
5098 mov edx, [iBit]
5099 lock btr [eax], edx
5100# endif
5101 }
5102# endif
5103}
5104#endif
5105
5106
5107/**
5108 * Toggles a bit in a bitmap.
5109 *
5110 * @param pvBitmap Pointer to the bitmap.
5111 * @param iBit The bit to toggle.
5112 */
5113#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5114DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
5115#else
5116DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
5117{
5118# if RT_INLINE_ASM_USES_INTRIN
5119 _bittestandcomplement((long *)pvBitmap, iBit);
5120# elif RT_INLINE_ASM_GNU_STYLE
5121 __asm__ __volatile__ ("btcl %1, %0"
5122 : "=m" (*(volatile long *)pvBitmap)
5123 : "Ir" (iBit),
5124 "m" (*(volatile long *)pvBitmap)
5125 : "memory");
5126# else
5127 __asm
5128 {
5129# ifdef RT_ARCH_AMD64
5130 mov rax, [pvBitmap]
5131 mov edx, [iBit]
5132 btc [rax], edx
5133# else
5134 mov eax, [pvBitmap]
5135 mov edx, [iBit]
5136 btc [eax], edx
5137# endif
5138 }
5139# endif
5140}
5141#endif
5142
5143
5144/**
5145 * Atomically toggles a bit in a bitmap, ordered.
5146 *
5147 * @param pvBitmap Pointer to the bitmap.
5148 * @param iBit The bit to test and set.
5149 */
5150#if RT_INLINE_ASM_EXTERNAL
5151DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
5152#else
5153DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
5154{
5155# if RT_INLINE_ASM_GNU_STYLE
5156 __asm__ __volatile__ ("lock; btcl %1, %0"
5157 : "=m" (*(volatile long *)pvBitmap)
5158 : "Ir" (iBit),
5159 "m" (*(volatile long *)pvBitmap)
5160 : "memory");
5161# else
5162 __asm
5163 {
5164# ifdef RT_ARCH_AMD64
5165 mov rax, [pvBitmap]
5166 mov edx, [iBit]
5167 lock btc [rax], edx
5168# else
5169 mov eax, [pvBitmap]
5170 mov edx, [iBit]
5171 lock btc [eax], edx
5172# endif
5173 }
5174# endif
5175}
5176#endif
5177
5178
5179/**
5180 * Tests and sets a bit in a bitmap.
5181 *
5182 * @returns true if the bit was set.
5183 * @returns false if the bit was clear.
5184 * @param pvBitmap Pointer to the bitmap.
5185 * @param iBit The bit to test and set.
5186 */
5187#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5188DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5189#else
5190DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5191{
5192 union { bool f; uint32_t u32; uint8_t u8; } rc;
5193# if RT_INLINE_ASM_USES_INTRIN
5194 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
5195
5196# elif RT_INLINE_ASM_GNU_STYLE
5197 __asm__ __volatile__ ("btsl %2, %1\n\t"
5198 "setc %b0\n\t"
5199 "andl $1, %0\n\t"
5200 : "=q" (rc.u32),
5201 "=m" (*(volatile long *)pvBitmap)
5202 : "Ir" (iBit),
5203 "m" (*(volatile long *)pvBitmap)
5204 : "memory");
5205# else
5206 __asm
5207 {
5208 mov edx, [iBit]
5209# ifdef RT_ARCH_AMD64
5210 mov rax, [pvBitmap]
5211 bts [rax], edx
5212# else
5213 mov eax, [pvBitmap]
5214 bts [eax], edx
5215# endif
5216 setc al
5217 and eax, 1
5218 mov [rc.u32], eax
5219 }
5220# endif
5221 return rc.f;
5222}
5223#endif
5224
5225
5226/**
5227 * Atomically tests and sets a bit in a bitmap, ordered.
5228 *
5229 * @returns true if the bit was set.
5230 * @returns false if the bit was clear.
5231 * @param pvBitmap Pointer to the bitmap.
5232 * @param iBit The bit to set.
5233 */
5234#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5235DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
5236#else
5237DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
5238{
5239 union { bool f; uint32_t u32; uint8_t u8; } rc;
5240# if RT_INLINE_ASM_USES_INTRIN
5241 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
5242# elif RT_INLINE_ASM_GNU_STYLE
5243 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
5244 "setc %b0\n\t"
5245 "andl $1, %0\n\t"
5246 : "=q" (rc.u32),
5247 "=m" (*(volatile long *)pvBitmap)
5248 : "Ir" (iBit),
5249 "m" (*(volatile long *)pvBitmap)
5250 : "memory");
5251# else
5252 __asm
5253 {
5254 mov edx, [iBit]
5255# ifdef RT_ARCH_AMD64
5256 mov rax, [pvBitmap]
5257 lock bts [rax], edx
5258# else
5259 mov eax, [pvBitmap]
5260 lock bts [eax], edx
5261# endif
5262 setc al
5263 and eax, 1
5264 mov [rc.u32], eax
5265 }
5266# endif
5267 return rc.f;
5268}
5269#endif
5270
5271
5272/**
5273 * Tests and clears a bit in a bitmap.
5274 *
5275 * @returns true if the bit was set.
5276 * @returns false if the bit was clear.
5277 * @param pvBitmap Pointer to the bitmap.
5278 * @param iBit The bit to test and clear.
5279 */
5280#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5281DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5282#else
5283DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5284{
5285 union { bool f; uint32_t u32; uint8_t u8; } rc;
5286# if RT_INLINE_ASM_USES_INTRIN
5287 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
5288
5289# elif RT_INLINE_ASM_GNU_STYLE
5290 __asm__ __volatile__ ("btrl %2, %1\n\t"
5291 "setc %b0\n\t"
5292 "andl $1, %0\n\t"
5293 : "=q" (rc.u32),
5294 "=m" (*(volatile long *)pvBitmap)
5295 : "Ir" (iBit),
5296 "m" (*(volatile long *)pvBitmap)
5297 : "memory");
5298# else
5299 __asm
5300 {
5301 mov edx, [iBit]
5302# ifdef RT_ARCH_AMD64
5303 mov rax, [pvBitmap]
5304 btr [rax], edx
5305# else
5306 mov eax, [pvBitmap]
5307 btr [eax], edx
5308# endif
5309 setc al
5310 and eax, 1
5311 mov [rc.u32], eax
5312 }
5313# endif
5314 return rc.f;
5315}
5316#endif
5317
5318
5319/**
5320 * Atomically tests and clears a bit in a bitmap, ordered.
5321 *
5322 * @returns true if the bit was set.
5323 * @returns false if the bit was clear.
5324 * @param pvBitmap Pointer to the bitmap.
5325 * @param iBit The bit to test and clear.
5326 * @remark No memory barrier, take care on smp.
5327 */
5328#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5329DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
5330#else
5331DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
5332{
5333 union { bool f; uint32_t u32; uint8_t u8; } rc;
5334# if RT_INLINE_ASM_USES_INTRIN
5335 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
5336
5337# elif RT_INLINE_ASM_GNU_STYLE
5338 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
5339 "setc %b0\n\t"
5340 "andl $1, %0\n\t"
5341 : "=q" (rc.u32),
5342 "=m" (*(volatile long *)pvBitmap)
5343 : "Ir" (iBit),
5344 "m" (*(volatile long *)pvBitmap)
5345 : "memory");
5346# else
5347 __asm
5348 {
5349 mov edx, [iBit]
5350# ifdef RT_ARCH_AMD64
5351 mov rax, [pvBitmap]
5352 lock btr [rax], edx
5353# else
5354 mov eax, [pvBitmap]
5355 lock btr [eax], edx
5356# endif
5357 setc al
5358 and eax, 1
5359 mov [rc.u32], eax
5360 }
5361# endif
5362 return rc.f;
5363}
5364#endif
5365
5366
5367/**
5368 * Tests and toggles a bit in a bitmap.
5369 *
5370 * @returns true if the bit was set.
5371 * @returns false if the bit was clear.
5372 * @param pvBitmap Pointer to the bitmap.
5373 * @param iBit The bit to test and toggle.
5374 */
5375#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5376DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5377#else
5378DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5379{
5380 union { bool f; uint32_t u32; uint8_t u8; } rc;
5381# if RT_INLINE_ASM_USES_INTRIN
5382 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
5383
5384# elif RT_INLINE_ASM_GNU_STYLE
5385 __asm__ __volatile__ ("btcl %2, %1\n\t"
5386 "setc %b0\n\t"
5387 "andl $1, %0\n\t"
5388 : "=q" (rc.u32),
5389 "=m" (*(volatile long *)pvBitmap)
5390 : "Ir" (iBit),
5391 "m" (*(volatile long *)pvBitmap)
5392 : "memory");
5393# else
5394 __asm
5395 {
5396 mov edx, [iBit]
5397# ifdef RT_ARCH_AMD64
5398 mov rax, [pvBitmap]
5399 btc [rax], edx
5400# else
5401 mov eax, [pvBitmap]
5402 btc [eax], edx
5403# endif
5404 setc al
5405 and eax, 1
5406 mov [rc.u32], eax
5407 }
5408# endif
5409 return rc.f;
5410}
5411#endif
5412
5413
5414/**
5415 * Atomically tests and toggles a bit in a bitmap, ordered.
5416 *
5417 * @returns true if the bit was set.
5418 * @returns false if the bit was clear.
5419 * @param pvBitmap Pointer to the bitmap.
5420 * @param iBit The bit to test and toggle.
5421 */
5422#if RT_INLINE_ASM_EXTERNAL
5423DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
5424#else
5425DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
5426{
5427 union { bool f; uint32_t u32; uint8_t u8; } rc;
5428# if RT_INLINE_ASM_GNU_STYLE
5429 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
5430 "setc %b0\n\t"
5431 "andl $1, %0\n\t"
5432 : "=q" (rc.u32),
5433 "=m" (*(volatile long *)pvBitmap)
5434 : "Ir" (iBit),
5435 "m" (*(volatile long *)pvBitmap)
5436 : "memory");
5437# else
5438 __asm
5439 {
5440 mov edx, [iBit]
5441# ifdef RT_ARCH_AMD64
5442 mov rax, [pvBitmap]
5443 lock btc [rax], edx
5444# else
5445 mov eax, [pvBitmap]
5446 lock btc [eax], edx
5447# endif
5448 setc al
5449 and eax, 1
5450 mov [rc.u32], eax
5451 }
5452# endif
5453 return rc.f;
5454}
5455#endif
5456
5457
5458/**
5459 * Tests if a bit in a bitmap is set.
5460 *
5461 * @returns true if the bit is set.
5462 * @returns false if the bit is clear.
5463 * @param pvBitmap Pointer to the bitmap.
5464 * @param iBit The bit to test.
5465 */
5466#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5467DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
5468#else
5469DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
5470{
5471 union { bool f; uint32_t u32; uint8_t u8; } rc;
5472# if RT_INLINE_ASM_USES_INTRIN
5473 rc.u32 = _bittest((long *)pvBitmap, iBit);
5474# elif RT_INLINE_ASM_GNU_STYLE
5475
5476 __asm__ __volatile__ ("btl %2, %1\n\t"
5477 "setc %b0\n\t"
5478 "andl $1, %0\n\t"
5479 : "=q" (rc.u32)
5480 : "m" (*(const volatile long *)pvBitmap),
5481 "Ir" (iBit)
5482 : "memory");
5483# else
5484 __asm
5485 {
5486 mov edx, [iBit]
5487# ifdef RT_ARCH_AMD64
5488 mov rax, [pvBitmap]
5489 bt [rax], edx
5490# else
5491 mov eax, [pvBitmap]
5492 bt [eax], edx
5493# endif
5494 setc al
5495 and eax, 1
5496 mov [rc.u32], eax
5497 }
5498# endif
5499 return rc.f;
5500}
5501#endif
5502
5503
5504/**
5505 * Clears a bit range within a bitmap.
5506 *
5507 * @param pvBitmap Pointer to the bitmap.
5508 * @param iBitStart The First bit to clear.
5509 * @param iBitEnd The first bit not to clear.
5510 */
5511DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5512{
5513 if (iBitStart < iBitEnd)
5514 {
5515 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5516 int iStart = iBitStart & ~31;
5517 int iEnd = iBitEnd & ~31;
5518 if (iStart == iEnd)
5519 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
5520 else
5521 {
5522 /* bits in first dword. */
5523 if (iBitStart & 31)
5524 {
5525 *pu32 &= (1 << (iBitStart & 31)) - 1;
5526 pu32++;
5527 iBitStart = iStart + 32;
5528 }
5529
5530 /* whole dword. */
5531 if (iBitStart != iEnd)
5532 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
5533
5534 /* bits in last dword. */
5535 if (iBitEnd & 31)
5536 {
5537 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5538 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
5539 }
5540 }
5541 }
5542}
5543
5544
5545/**
5546 * Sets a bit range within a bitmap.
5547 *
5548 * @param pvBitmap Pointer to the bitmap.
5549 * @param iBitStart The First bit to set.
5550 * @param iBitEnd The first bit not to set.
5551 */
5552DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
5553{
5554 if (iBitStart < iBitEnd)
5555 {
5556 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
5557 int iStart = iBitStart & ~31;
5558 int iEnd = iBitEnd & ~31;
5559 if (iStart == iEnd)
5560 *pu32 |= ((1 << (iBitEnd - iBitStart)) - 1) << iBitStart;
5561 else
5562 {
5563 /* bits in first dword. */
5564 if (iBitStart & 31)
5565 {
5566 *pu32 |= ~((1 << (iBitStart & 31)) - 1);
5567 pu32++;
5568 iBitStart = iStart + 32;
5569 }
5570
5571 /* whole dword. */
5572 if (iBitStart != iEnd)
5573 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~0);
5574
5575 /* bits in last dword. */
5576 if (iBitEnd & 31)
5577 {
5578 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
5579 *pu32 |= (1 << (iBitEnd & 31)) - 1;
5580 }
5581 }
5582 }
5583}
5584
5585
5586/**
5587 * Finds the first clear bit in a bitmap.
5588 *
5589 * @returns Index of the first zero bit.
5590 * @returns -1 if no clear bit was found.
5591 * @param pvBitmap Pointer to the bitmap.
5592 * @param cBits The number of bits in the bitmap. Multiple of 32.
5593 */
5594#if RT_INLINE_ASM_EXTERNAL
5595DECLASM(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
5596#else
5597DECLINLINE(int) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
5598{
5599 if (cBits)
5600 {
5601 int32_t iBit;
5602# if RT_INLINE_ASM_GNU_STYLE
5603 RTCCUINTREG uEAX, uECX, uEDI;
5604 cBits = RT_ALIGN_32(cBits, 32);
5605 __asm__ __volatile__("repe; scasl\n\t"
5606 "je 1f\n\t"
5607# ifdef RT_ARCH_AMD64
5608 "lea -4(%%rdi), %%rdi\n\t"
5609 "xorl (%%rdi), %%eax\n\t"
5610 "subq %5, %%rdi\n\t"
5611# else
5612 "lea -4(%%edi), %%edi\n\t"
5613 "xorl (%%edi), %%eax\n\t"
5614 "subl %5, %%edi\n\t"
5615# endif
5616 "shll $3, %%edi\n\t"
5617 "bsfl %%eax, %%edx\n\t"
5618 "addl %%edi, %%edx\n\t"
5619 "1:\t\n"
5620 : "=d" (iBit),
5621 "=&c" (uECX),
5622 "=&D" (uEDI),
5623 "=&a" (uEAX)
5624 : "0" (0xffffffff),
5625 "mr" (pvBitmap),
5626 "1" (cBits >> 5),
5627 "2" (pvBitmap),
5628 "3" (0xffffffff));
5629# else
5630 cBits = RT_ALIGN_32(cBits, 32);
5631 __asm
5632 {
5633# ifdef RT_ARCH_AMD64
5634 mov rdi, [pvBitmap]
5635 mov rbx, rdi
5636# else
5637 mov edi, [pvBitmap]
5638 mov ebx, edi
5639# endif
5640 mov edx, 0ffffffffh
5641 mov eax, edx
5642 mov ecx, [cBits]
5643 shr ecx, 5
5644 repe scasd
5645 je done
5646
5647# ifdef RT_ARCH_AMD64
5648 lea rdi, [rdi - 4]
5649 xor eax, [rdi]
5650 sub rdi, rbx
5651# else
5652 lea edi, [edi - 4]
5653 xor eax, [edi]
5654 sub edi, ebx
5655# endif
5656 shl edi, 3
5657 bsf edx, eax
5658 add edx, edi
5659 done:
5660 mov [iBit], edx
5661 }
5662# endif
5663 return iBit;
5664 }
5665 return -1;
5666}
5667#endif
5668
5669
5670/**
5671 * Finds the next clear bit in a bitmap.
5672 *
5673 * @returns Index of the first zero bit.
5674 * @returns -1 if no clear bit was found.
5675 * @param pvBitmap Pointer to the bitmap.
5676 * @param cBits The number of bits in the bitmap. Multiple of 32.
5677 * @param iBitPrev The bit returned from the last search.
5678 * The search will start at iBitPrev + 1.
5679 */
5680#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5681DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5682#else
5683DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5684{
5685 int iBit = ++iBitPrev & 31;
5686 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5687 cBits -= iBitPrev & ~31;
5688 if (iBit)
5689 {
5690 /* inspect the first dword. */
5691 uint32_t u32 = (~*(const volatile uint32_t *)pvBitmap) >> iBit;
5692# if RT_INLINE_ASM_USES_INTRIN
5693 unsigned long ulBit = 0;
5694 if (_BitScanForward(&ulBit, u32))
5695 return ulBit + iBitPrev;
5696 iBit = -1;
5697# else
5698# if RT_INLINE_ASM_GNU_STYLE
5699 __asm__ __volatile__("bsf %1, %0\n\t"
5700 "jnz 1f\n\t"
5701 "movl $-1, %0\n\t"
5702 "1:\n\t"
5703 : "=r" (iBit)
5704 : "r" (u32));
5705# else
5706 __asm
5707 {
5708 mov edx, [u32]
5709 bsf eax, edx
5710 jnz done
5711 mov eax, 0ffffffffh
5712 done:
5713 mov [iBit], eax
5714 }
5715# endif
5716 if (iBit >= 0)
5717 return iBit + iBitPrev;
5718# endif
5719 /* Search the rest of the bitmap, if there is anything. */
5720 if (cBits > 32)
5721 {
5722 iBit = ASMBitFirstClear((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5723 if (iBit >= 0)
5724 return iBit + (iBitPrev & ~31) + 32;
5725 }
5726 }
5727 else
5728 {
5729 /* Search the rest of the bitmap. */
5730 iBit = ASMBitFirstClear(pvBitmap, cBits);
5731 if (iBit >= 0)
5732 return iBit + (iBitPrev & ~31);
5733 }
5734 return iBit;
5735}
5736#endif
5737
5738
5739/**
5740 * Finds the first set bit in a bitmap.
5741 *
5742 * @returns Index of the first set bit.
5743 * @returns -1 if no clear bit was found.
5744 * @param pvBitmap Pointer to the bitmap.
5745 * @param cBits The number of bits in the bitmap. Multiple of 32.
5746 */
5747#if RT_INLINE_ASM_EXTERNAL
5748DECLASM(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
5749#else
5750DECLINLINE(int) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
5751{
5752 if (cBits)
5753 {
5754 int32_t iBit;
5755# if RT_INLINE_ASM_GNU_STYLE
5756 RTCCUINTREG uEAX, uECX, uEDI;
5757 cBits = RT_ALIGN_32(cBits, 32);
5758 __asm__ __volatile__("repe; scasl\n\t"
5759 "je 1f\n\t"
5760# ifdef RT_ARCH_AMD64
5761 "lea -4(%%rdi), %%rdi\n\t"
5762 "movl (%%rdi), %%eax\n\t"
5763 "subq %5, %%rdi\n\t"
5764# else
5765 "lea -4(%%edi), %%edi\n\t"
5766 "movl (%%edi), %%eax\n\t"
5767 "subl %5, %%edi\n\t"
5768# endif
5769 "shll $3, %%edi\n\t"
5770 "bsfl %%eax, %%edx\n\t"
5771 "addl %%edi, %%edx\n\t"
5772 "1:\t\n"
5773 : "=d" (iBit),
5774 "=&c" (uECX),
5775 "=&D" (uEDI),
5776 "=&a" (uEAX)
5777 : "0" (0xffffffff),
5778 "mr" (pvBitmap),
5779 "1" (cBits >> 5),
5780 "2" (pvBitmap),
5781 "3" (0));
5782# else
5783 cBits = RT_ALIGN_32(cBits, 32);
5784 __asm
5785 {
5786# ifdef RT_ARCH_AMD64
5787 mov rdi, [pvBitmap]
5788 mov rbx, rdi
5789# else
5790 mov edi, [pvBitmap]
5791 mov ebx, edi
5792# endif
5793 mov edx, 0ffffffffh
5794 xor eax, eax
5795 mov ecx, [cBits]
5796 shr ecx, 5
5797 repe scasd
5798 je done
5799# ifdef RT_ARCH_AMD64
5800 lea rdi, [rdi - 4]
5801 mov eax, [rdi]
5802 sub rdi, rbx
5803# else
5804 lea edi, [edi - 4]
5805 mov eax, [edi]
5806 sub edi, ebx
5807# endif
5808 shl edi, 3
5809 bsf edx, eax
5810 add edx, edi
5811 done:
5812 mov [iBit], edx
5813 }
5814# endif
5815 return iBit;
5816 }
5817 return -1;
5818}
5819#endif
5820
5821
5822/**
5823 * Finds the next set bit in a bitmap.
5824 *
5825 * @returns Index of the next set bit.
5826 * @returns -1 if no set bit was found.
5827 * @param pvBitmap Pointer to the bitmap.
5828 * @param cBits The number of bits in the bitmap. Multiple of 32.
5829 * @param iBitPrev The bit returned from the last search.
5830 * The search will start at iBitPrev + 1.
5831 */
5832#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5833DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5834#else
5835DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5836{
5837 int iBit = ++iBitPrev & 31;
5838 pvBitmap = (const volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5839 cBits -= iBitPrev & ~31;
5840 if (iBit)
5841 {
5842 /* inspect the first dword. */
5843 uint32_t u32 = *(const volatile uint32_t *)pvBitmap >> iBit;
5844# if RT_INLINE_ASM_USES_INTRIN
5845 unsigned long ulBit = 0;
5846 if (_BitScanForward(&ulBit, u32))
5847 return ulBit + iBitPrev;
5848 iBit = -1;
5849# else
5850# if RT_INLINE_ASM_GNU_STYLE
5851 __asm__ __volatile__("bsf %1, %0\n\t"
5852 "jnz 1f\n\t"
5853 "movl $-1, %0\n\t"
5854 "1:\n\t"
5855 : "=r" (iBit)
5856 : "r" (u32));
5857# else
5858 __asm
5859 {
5860 mov edx, u32
5861 bsf eax, edx
5862 jnz done
5863 mov eax, 0ffffffffh
5864 done:
5865 mov [iBit], eax
5866 }
5867# endif
5868 if (iBit >= 0)
5869 return iBit + iBitPrev;
5870# endif
5871 /* Search the rest of the bitmap, if there is anything. */
5872 if (cBits > 32)
5873 {
5874 iBit = ASMBitFirstSet((const volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5875 if (iBit >= 0)
5876 return iBit + (iBitPrev & ~31) + 32;
5877 }
5878
5879 }
5880 else
5881 {
5882 /* Search the rest of the bitmap. */
5883 iBit = ASMBitFirstSet(pvBitmap, cBits);
5884 if (iBit >= 0)
5885 return iBit + (iBitPrev & ~31);
5886 }
5887 return iBit;
5888}
5889#endif
5890
5891
5892/**
5893 * Finds the first bit which is set in the given 32-bit integer.
5894 * Bits are numbered from 1 (least significant) to 32.
5895 *
5896 * @returns index [1..32] of the first set bit.
5897 * @returns 0 if all bits are cleared.
5898 * @param u32 Integer to search for set bits.
5899 * @remark Similar to ffs() in BSD.
5900 */
5901DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5902{
5903# if RT_INLINE_ASM_USES_INTRIN
5904 unsigned long iBit;
5905 if (_BitScanForward(&iBit, u32))
5906 iBit++;
5907 else
5908 iBit = 0;
5909# elif RT_INLINE_ASM_GNU_STYLE
5910 uint32_t iBit;
5911 __asm__ __volatile__("bsf %1, %0\n\t"
5912 "jnz 1f\n\t"
5913 "xorl %0, %0\n\t"
5914 "jmp 2f\n"
5915 "1:\n\t"
5916 "incl %0\n"
5917 "2:\n\t"
5918 : "=r" (iBit)
5919 : "rm" (u32));
5920# else
5921 uint32_t iBit;
5922 _asm
5923 {
5924 bsf eax, [u32]
5925 jnz found
5926 xor eax, eax
5927 jmp done
5928 found:
5929 inc eax
5930 done:
5931 mov [iBit], eax
5932 }
5933# endif
5934 return iBit;
5935}
5936
5937
5938/**
5939 * Finds the first bit which is set in the given 32-bit integer.
5940 * Bits are numbered from 1 (least significant) to 32.
5941 *
5942 * @returns index [1..32] of the first set bit.
5943 * @returns 0 if all bits are cleared.
5944 * @param i32 Integer to search for set bits.
5945 * @remark Similar to ffs() in BSD.
5946 */
5947DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5948{
5949 return ASMBitFirstSetU32((uint32_t)i32);
5950}
5951
5952
5953/**
5954 * Finds the last bit which is set in the given 32-bit integer.
5955 * Bits are numbered from 1 (least significant) to 32.
5956 *
5957 * @returns index [1..32] of the last set bit.
5958 * @returns 0 if all bits are cleared.
5959 * @param u32 Integer to search for set bits.
5960 * @remark Similar to fls() in BSD.
5961 */
5962DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5963{
5964# if RT_INLINE_ASM_USES_INTRIN
5965 unsigned long iBit;
5966 if (_BitScanReverse(&iBit, u32))
5967 iBit++;
5968 else
5969 iBit = 0;
5970# elif RT_INLINE_ASM_GNU_STYLE
5971 uint32_t iBit;
5972 __asm__ __volatile__("bsrl %1, %0\n\t"
5973 "jnz 1f\n\t"
5974 "xorl %0, %0\n\t"
5975 "jmp 2f\n"
5976 "1:\n\t"
5977 "incl %0\n"
5978 "2:\n\t"
5979 : "=r" (iBit)
5980 : "rm" (u32));
5981# else
5982 uint32_t iBit;
5983 _asm
5984 {
5985 bsr eax, [u32]
5986 jnz found
5987 xor eax, eax
5988 jmp done
5989 found:
5990 inc eax
5991 done:
5992 mov [iBit], eax
5993 }
5994# endif
5995 return iBit;
5996}
5997
5998
5999/**
6000 * Finds the last bit which is set in the given 32-bit integer.
6001 * Bits are numbered from 1 (least significant) to 32.
6002 *
6003 * @returns index [1..32] of the last set bit.
6004 * @returns 0 if all bits are cleared.
6005 * @param i32 Integer to search for set bits.
6006 * @remark Similar to fls() in BSD.
6007 */
6008DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
6009{
6010 return ASMBitLastSetS32((uint32_t)i32);
6011}
6012
6013/**
6014 * Reverse the byte order of the given 16-bit integer.
6015 *
6016 * @returns Revert
6017 * @param u16 16-bit integer value.
6018 */
6019DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
6020{
6021#if RT_INLINE_ASM_USES_INTRIN
6022 u16 = _byteswap_ushort(u16);
6023#elif RT_INLINE_ASM_GNU_STYLE
6024 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
6025#else
6026 _asm
6027 {
6028 mov ax, [u16]
6029 ror ax, 8
6030 mov [u16], ax
6031 }
6032#endif
6033 return u16;
6034}
6035
6036/**
6037 * Reverse the byte order of the given 32-bit integer.
6038 *
6039 * @returns Revert
6040 * @param u32 32-bit integer value.
6041 */
6042DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
6043{
6044#if RT_INLINE_ASM_USES_INTRIN
6045 u32 = _byteswap_ulong(u32);
6046#elif RT_INLINE_ASM_GNU_STYLE
6047 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
6048#else
6049 _asm
6050 {
6051 mov eax, [u32]
6052 bswap eax
6053 mov [u32], eax
6054 }
6055#endif
6056 return u32;
6057}
6058
6059
6060/**
6061 * Reverse the byte order of the given 64-bit integer.
6062 *
6063 * @returns Revert
6064 * @param u64 64-bit integer value.
6065 */
6066DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
6067{
6068#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
6069 u64 = _byteswap_uint64(u64);
6070#else
6071 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
6072 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
6073#endif
6074 return u64;
6075}
6076
6077
6078/** @} */
6079
6080
6081/** @} */
6082#endif
6083
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette