VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 9061

Last change on this file since 9061 was 8889, checked in by vboxsync, 17 years ago

Added ASMGetCpuModuleIntel/Amd and ASMIsIntelCpu and ASMIsIntelCpuEx because intel idffers sligtly in the way they calulate the module number.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 136.5 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_asm_h
31#define ___iprt_asm_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/assert.h>
36/** @todo #include <iprt/param.h> for PAGE_SIZE. */
37/** @def RT_INLINE_ASM_USES_INTRIN
38 * Defined as 1 if we're using a _MSC_VER 1400.
39 * Otherwise defined as 0.
40 */
41
42#ifdef _MSC_VER
43# if _MSC_VER >= 1400
44# define RT_INLINE_ASM_USES_INTRIN 1
45# include <intrin.h>
46 /* Emit the intrinsics at all optimization levels. */
47# pragma intrinsic(_ReadWriteBarrier)
48# pragma intrinsic(__cpuid)
49# pragma intrinsic(_enable)
50# pragma intrinsic(_disable)
51# pragma intrinsic(__rdtsc)
52# pragma intrinsic(__readmsr)
53# pragma intrinsic(__writemsr)
54# pragma intrinsic(__outbyte)
55# pragma intrinsic(__outword)
56# pragma intrinsic(__outdword)
57# pragma intrinsic(__inbyte)
58# pragma intrinsic(__inword)
59# pragma intrinsic(__indword)
60# pragma intrinsic(__invlpg)
61# pragma intrinsic(__stosd)
62# pragma intrinsic(__stosw)
63# pragma intrinsic(__stosb)
64# pragma intrinsic(__readcr0)
65# pragma intrinsic(__readcr2)
66# pragma intrinsic(__readcr3)
67# pragma intrinsic(__readcr4)
68# pragma intrinsic(__writecr0)
69# pragma intrinsic(__writecr3)
70# pragma intrinsic(__writecr4)
71# pragma intrinsic(_BitScanForward)
72# pragma intrinsic(_BitScanReverse)
73# pragma intrinsic(_bittest)
74# pragma intrinsic(_bittestandset)
75# pragma intrinsic(_bittestandreset)
76# pragma intrinsic(_bittestandcomplement)
77# pragma intrinsic(_byteswap_ushort)
78# pragma intrinsic(_byteswap_ulong)
79# pragma intrinsic(_interlockedbittestandset)
80# pragma intrinsic(_interlockedbittestandreset)
81# pragma intrinsic(_InterlockedAnd)
82# pragma intrinsic(_InterlockedOr)
83# pragma intrinsic(_InterlockedIncrement)
84# pragma intrinsic(_InterlockedDecrement)
85# pragma intrinsic(_InterlockedExchange)
86# pragma intrinsic(_InterlockedExchangeAdd)
87# pragma intrinsic(_InterlockedCompareExchange)
88# pragma intrinsic(_InterlockedCompareExchange64)
89# ifdef RT_ARCH_AMD64
90# pragma intrinsic(__stosq)
91# pragma intrinsic(__readcr8)
92# pragma intrinsic(__writecr8)
93# pragma intrinsic(_byteswap_uint64)
94# pragma intrinsic(_InterlockedExchange64)
95# endif
96# endif
97#endif
98#ifndef RT_INLINE_ASM_USES_INTRIN
99# define RT_INLINE_ASM_USES_INTRIN 0
100#endif
101
102
103
104/** @defgroup grp_asm ASM - Assembly Routines
105 * @ingroup grp_rt
106 *
107 * @remarks The difference between ordered and unordered atomic operations are that
108 * the former will complete outstanding reads and writes before continuing
109 * while the latter doesn't make any promisses about the order. Ordered
110 * operations doesn't, it seems, make any 100% promise wrt to whether
111 * the operation will complete before any subsequent memory access.
112 * (please, correct if wrong.)
113 *
114 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
115 * are unordered (note the Uo).
116 *
117 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
118 * or even optimize assembler instructions away. For instance, in the following code
119 * the second rdmsr instruction is optimized away because gcc treats that instruction
120 * as deterministic:
121 *
122 * @code
123 * static inline uint64_t rdmsr_low(int idx)
124 * {
125 * uint32_t low;
126 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
127 * }
128 * ...
129 * uint32_t msr1 = rdmsr_low(1);
130 * foo(msr1);
131 * msr1 = rdmsr_low(1);
132 * bar(msr1);
133 * @endcode
134 *
135 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
136 * use the result of the first call as input parameter for bar() as well. For rdmsr this
137 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143/** @def RT_INLINE_ASM_EXTERNAL
144 * Defined as 1 if the compiler does not support inline assembly.
145 * The ASM* functions will then be implemented in an external .asm file.
146 *
147 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
148 * inline assmebly in their AMD64 compiler.
149 */
150#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
151# define RT_INLINE_ASM_EXTERNAL 1
152#else
153# define RT_INLINE_ASM_EXTERNAL 0
154#endif
155
156/** @def RT_INLINE_ASM_GNU_STYLE
157 * Defined as 1 if the compiler understand GNU style inline assembly.
158 */
159#if defined(_MSC_VER)
160# define RT_INLINE_ASM_GNU_STYLE 0
161#else
162# define RT_INLINE_ASM_GNU_STYLE 1
163#endif
164
165
166/** @todo find a more proper place for this structure? */
167#pragma pack(1)
168/** IDTR */
169typedef struct RTIDTR
170{
171 /** Size of the IDT. */
172 uint16_t cbIdt;
173 /** Address of the IDT. */
174 uintptr_t pIdt;
175} RTIDTR, *PRTIDTR;
176#pragma pack()
177
178#pragma pack(1)
179/** GDTR */
180typedef struct RTGDTR
181{
182 /** Size of the GDT. */
183 uint16_t cbGdt;
184 /** Address of the GDT. */
185 uintptr_t pGdt;
186} RTGDTR, *PRTGDTR;
187#pragma pack()
188
189
190/** @def ASMReturnAddress
191 * Gets the return address of the current (or calling if you like) function or method.
192 */
193#ifdef _MSC_VER
194# ifdef __cplusplus
195extern "C"
196# endif
197void * _ReturnAddress(void);
198# pragma intrinsic(_ReturnAddress)
199# define ASMReturnAddress() _ReturnAddress()
200#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
201# define ASMReturnAddress() __builtin_return_address(0)
202#else
203# error "Unsupported compiler."
204#endif
205
206
207/**
208 * Gets the content of the IDTR CPU register.
209 * @param pIdtr Where to store the IDTR contents.
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
213#else
214DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 sidt [rax]
224# else
225 mov eax, [pIdtr]
226 sidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Sets the content of the IDTR CPU register.
236 * @param pIdtr Where to load the IDTR contents from
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
240#else
241DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pIdtr]
250 lidt [rax]
251# else
252 mov eax, [pIdtr]
253 lidt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260
261/**
262 * Gets the content of the GDTR CPU register.
263 * @param pGdtr Where to store the GDTR contents.
264 */
265#if RT_INLINE_ASM_EXTERNAL
266DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
267#else
268DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
269{
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rax, [pGdtr]
277 sgdt [rax]
278# else
279 mov eax, [pGdtr]
280 sgdt [eax]
281# endif
282 }
283# endif
284}
285#endif
286
287/**
288 * Get the cs register.
289 * @returns cs.
290 */
291#if RT_INLINE_ASM_EXTERNAL
292DECLASM(RTSEL) ASMGetCS(void);
293#else
294DECLINLINE(RTSEL) ASMGetCS(void)
295{
296 RTSEL SelCS;
297# if RT_INLINE_ASM_GNU_STYLE
298 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
299# else
300 __asm
301 {
302 mov ax, cs
303 mov [SelCS], ax
304 }
305# endif
306 return SelCS;
307}
308#endif
309
310
311/**
312 * Get the DS register.
313 * @returns DS.
314 */
315#if RT_INLINE_ASM_EXTERNAL
316DECLASM(RTSEL) ASMGetDS(void);
317#else
318DECLINLINE(RTSEL) ASMGetDS(void)
319{
320 RTSEL SelDS;
321# if RT_INLINE_ASM_GNU_STYLE
322 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
323# else
324 __asm
325 {
326 mov ax, ds
327 mov [SelDS], ax
328 }
329# endif
330 return SelDS;
331}
332#endif
333
334
335/**
336 * Get the ES register.
337 * @returns ES.
338 */
339#if RT_INLINE_ASM_EXTERNAL
340DECLASM(RTSEL) ASMGetES(void);
341#else
342DECLINLINE(RTSEL) ASMGetES(void)
343{
344 RTSEL SelES;
345# if RT_INLINE_ASM_GNU_STYLE
346 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
347# else
348 __asm
349 {
350 mov ax, es
351 mov [SelES], ax
352 }
353# endif
354 return SelES;
355}
356#endif
357
358
359/**
360 * Get the FS register.
361 * @returns FS.
362 */
363#if RT_INLINE_ASM_EXTERNAL
364DECLASM(RTSEL) ASMGetFS(void);
365#else
366DECLINLINE(RTSEL) ASMGetFS(void)
367{
368 RTSEL SelFS;
369# if RT_INLINE_ASM_GNU_STYLE
370 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
371# else
372 __asm
373 {
374 mov ax, fs
375 mov [SelFS], ax
376 }
377# endif
378 return SelFS;
379}
380# endif
381
382
383/**
384 * Get the GS register.
385 * @returns GS.
386 */
387#if RT_INLINE_ASM_EXTERNAL
388DECLASM(RTSEL) ASMGetGS(void);
389#else
390DECLINLINE(RTSEL) ASMGetGS(void)
391{
392 RTSEL SelGS;
393# if RT_INLINE_ASM_GNU_STYLE
394 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
395# else
396 __asm
397 {
398 mov ax, gs
399 mov [SelGS], ax
400 }
401# endif
402 return SelGS;
403}
404#endif
405
406
407/**
408 * Get the SS register.
409 * @returns SS.
410 */
411#if RT_INLINE_ASM_EXTERNAL
412DECLASM(RTSEL) ASMGetSS(void);
413#else
414DECLINLINE(RTSEL) ASMGetSS(void)
415{
416 RTSEL SelSS;
417# if RT_INLINE_ASM_GNU_STYLE
418 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
419# else
420 __asm
421 {
422 mov ax, ss
423 mov [SelSS], ax
424 }
425# endif
426 return SelSS;
427}
428#endif
429
430
431/**
432 * Get the TR register.
433 * @returns TR.
434 */
435#if RT_INLINE_ASM_EXTERNAL
436DECLASM(RTSEL) ASMGetTR(void);
437#else
438DECLINLINE(RTSEL) ASMGetTR(void)
439{
440 RTSEL SelTR;
441# if RT_INLINE_ASM_GNU_STYLE
442 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
443# else
444 __asm
445 {
446 str ax
447 mov [SelTR], ax
448 }
449# endif
450 return SelTR;
451}
452#endif
453
454
455/**
456 * Get the [RE]FLAGS register.
457 * @returns [RE]FLAGS.
458 */
459#if RT_INLINE_ASM_EXTERNAL
460DECLASM(RTCCUINTREG) ASMGetFlags(void);
461#else
462DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
463{
464 RTCCUINTREG uFlags;
465# if RT_INLINE_ASM_GNU_STYLE
466# ifdef RT_ARCH_AMD64
467 __asm__ __volatile__("pushfq\n\t"
468 "popq %0\n\t"
469 : "=g" (uFlags));
470# else
471 __asm__ __volatile__("pushfl\n\t"
472 "popl %0\n\t"
473 : "=g" (uFlags));
474# endif
475# else
476 __asm
477 {
478# ifdef RT_ARCH_AMD64
479 pushfq
480 pop [uFlags]
481# else
482 pushfd
483 pop [uFlags]
484# endif
485 }
486# endif
487 return uFlags;
488}
489#endif
490
491
492/**
493 * Set the [RE]FLAGS register.
494 * @param uFlags The new [RE]FLAGS value.
495 */
496#if RT_INLINE_ASM_EXTERNAL
497DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
498#else
499DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
500{
501# if RT_INLINE_ASM_GNU_STYLE
502# ifdef RT_ARCH_AMD64
503 __asm__ __volatile__("pushq %0\n\t"
504 "popfq\n\t"
505 : : "g" (uFlags));
506# else
507 __asm__ __volatile__("pushl %0\n\t"
508 "popfl\n\t"
509 : : "g" (uFlags));
510# endif
511# else
512 __asm
513 {
514# ifdef RT_ARCH_AMD64
515 push [uFlags]
516 popfq
517# else
518 push [uFlags]
519 popfd
520# endif
521 }
522# endif
523}
524#endif
525
526
527/**
528 * Gets the content of the CPU timestamp counter register.
529 *
530 * @returns TSC.
531 */
532#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
533DECLASM(uint64_t) ASMReadTSC(void);
534#else
535DECLINLINE(uint64_t) ASMReadTSC(void)
536{
537 RTUINT64U u;
538# if RT_INLINE_ASM_GNU_STYLE
539 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
540# else
541# if RT_INLINE_ASM_USES_INTRIN
542 u.u = __rdtsc();
543# else
544 __asm
545 {
546 rdtsc
547 mov [u.s.Lo], eax
548 mov [u.s.Hi], edx
549 }
550# endif
551# endif
552 return u.u;
553}
554#endif
555
556
557/**
558 * Performs the cpuid instruction returning all registers.
559 *
560 * @param uOperator CPUID operation (eax).
561 * @param pvEAX Where to store eax.
562 * @param pvEBX Where to store ebx.
563 * @param pvECX Where to store ecx.
564 * @param pvEDX Where to store edx.
565 * @remark We're using void pointers to ease the use of special bitfield structures and such.
566 */
567#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
568DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
569#else
570DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
571{
572# if RT_INLINE_ASM_GNU_STYLE
573# ifdef RT_ARCH_AMD64
574 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
575 __asm__ ("cpuid\n\t"
576 : "=a" (uRAX),
577 "=b" (uRBX),
578 "=c" (uRCX),
579 "=d" (uRDX)
580 : "0" (uOperator));
581 *(uint32_t *)pvEAX = (uint32_t)uRAX;
582 *(uint32_t *)pvEBX = (uint32_t)uRBX;
583 *(uint32_t *)pvECX = (uint32_t)uRCX;
584 *(uint32_t *)pvEDX = (uint32_t)uRDX;
585# else
586 __asm__ ("xchgl %%ebx, %1\n\t"
587 "cpuid\n\t"
588 "xchgl %%ebx, %1\n\t"
589 : "=a" (*(uint32_t *)pvEAX),
590 "=r" (*(uint32_t *)pvEBX),
591 "=c" (*(uint32_t *)pvECX),
592 "=d" (*(uint32_t *)pvEDX)
593 : "0" (uOperator));
594# endif
595
596# elif RT_INLINE_ASM_USES_INTRIN
597 int aInfo[4];
598 __cpuid(aInfo, uOperator);
599 *(uint32_t *)pvEAX = aInfo[0];
600 *(uint32_t *)pvEBX = aInfo[1];
601 *(uint32_t *)pvECX = aInfo[2];
602 *(uint32_t *)pvEDX = aInfo[3];
603
604# else
605 uint32_t uEAX;
606 uint32_t uEBX;
607 uint32_t uECX;
608 uint32_t uEDX;
609 __asm
610 {
611 push ebx
612 mov eax, [uOperator]
613 cpuid
614 mov [uEAX], eax
615 mov [uEBX], ebx
616 mov [uECX], ecx
617 mov [uEDX], edx
618 pop ebx
619 }
620 *(uint32_t *)pvEAX = uEAX;
621 *(uint32_t *)pvEBX = uEBX;
622 *(uint32_t *)pvECX = uECX;
623 *(uint32_t *)pvEDX = uEDX;
624# endif
625}
626#endif
627
628
629/**
630 * Performs the cpuid instruction returning all registers.
631 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
632 *
633 * @param uOperator CPUID operation (eax).
634 * @param uIdxECX ecx index
635 * @param pvEAX Where to store eax.
636 * @param pvEBX Where to store ebx.
637 * @param pvECX Where to store ecx.
638 * @param pvEDX Where to store edx.
639 * @remark We're using void pointers to ease the use of special bitfield structures and such.
640 */
641#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
642DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
643#else
644DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
645{
646# if RT_INLINE_ASM_GNU_STYLE
647# ifdef RT_ARCH_AMD64
648 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
649 __asm__ ("cpuid\n\t"
650 : "=a" (uRAX),
651 "=b" (uRBX),
652 "=c" (uRCX),
653 "=d" (uRDX)
654 : "0" (uOperator),
655 "2" (uIdxECX));
656 *(uint32_t *)pvEAX = (uint32_t)uRAX;
657 *(uint32_t *)pvEBX = (uint32_t)uRBX;
658 *(uint32_t *)pvECX = (uint32_t)uRCX;
659 *(uint32_t *)pvEDX = (uint32_t)uRDX;
660# else
661 __asm__ ("xchgl %%ebx, %1\n\t"
662 "cpuid\n\t"
663 "xchgl %%ebx, %1\n\t"
664 : "=a" (*(uint32_t *)pvEAX),
665 "=r" (*(uint32_t *)pvEBX),
666 "=c" (*(uint32_t *)pvECX),
667 "=d" (*(uint32_t *)pvEDX)
668 : "0" (uOperator),
669 "2" (uIdxECX));
670# endif
671
672# elif RT_INLINE_ASM_USES_INTRIN
673 int aInfo[4];
674 /* ??? another intrinsic ??? */
675 __cpuid(aInfo, uOperator);
676 *(uint32_t *)pvEAX = aInfo[0];
677 *(uint32_t *)pvEBX = aInfo[1];
678 *(uint32_t *)pvECX = aInfo[2];
679 *(uint32_t *)pvEDX = aInfo[3];
680
681# else
682 uint32_t uEAX;
683 uint32_t uEBX;
684 uint32_t uECX;
685 uint32_t uEDX;
686 __asm
687 {
688 push ebx
689 mov eax, [uOperator]
690 mov ecx, [uIdxECX]
691 cpuid
692 mov [uEAX], eax
693 mov [uEBX], ebx
694 mov [uECX], ecx
695 mov [uEDX], edx
696 pop ebx
697 }
698 *(uint32_t *)pvEAX = uEAX;
699 *(uint32_t *)pvEBX = uEBX;
700 *(uint32_t *)pvECX = uECX;
701 *(uint32_t *)pvEDX = uEDX;
702# endif
703}
704#endif
705
706
707/**
708 * Performs the cpuid instruction returning ecx and edx.
709 *
710 * @param uOperator CPUID operation (eax).
711 * @param pvECX Where to store ecx.
712 * @param pvEDX Where to store edx.
713 * @remark We're using void pointers to ease the use of special bitfield structures and such.
714 */
715#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
716DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
717#else
718DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
719{
720 uint32_t uEBX;
721 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
722}
723#endif
724
725
726/**
727 * Performs the cpuid instruction returning edx.
728 *
729 * @param uOperator CPUID operation (eax).
730 * @returns EDX after cpuid operation.
731 */
732#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
733DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
734#else
735DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
736{
737 RTCCUINTREG xDX;
738# if RT_INLINE_ASM_GNU_STYLE
739# ifdef RT_ARCH_AMD64
740 RTCCUINTREG uSpill;
741 __asm__ ("cpuid"
742 : "=a" (uSpill),
743 "=d" (xDX)
744 : "0" (uOperator)
745 : "rbx", "rcx");
746# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
747 __asm__ ("push %%ebx\n\t"
748 "cpuid\n\t"
749 "pop %%ebx\n\t"
750 : "=a" (uOperator),
751 "=d" (xDX)
752 : "0" (uOperator)
753 : "ecx");
754# else
755 __asm__ ("cpuid"
756 : "=a" (uOperator),
757 "=d" (xDX)
758 : "0" (uOperator)
759 : "ebx", "ecx");
760# endif
761
762# elif RT_INLINE_ASM_USES_INTRIN
763 int aInfo[4];
764 __cpuid(aInfo, uOperator);
765 xDX = aInfo[3];
766
767# else
768 __asm
769 {
770 push ebx
771 mov eax, [uOperator]
772 cpuid
773 mov [xDX], edx
774 pop ebx
775 }
776# endif
777 return (uint32_t)xDX;
778}
779#endif
780
781
782/**
783 * Performs the cpuid instruction returning ecx.
784 *
785 * @param uOperator CPUID operation (eax).
786 * @returns ECX after cpuid operation.
787 */
788#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
789DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
790#else
791DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
792{
793 RTCCUINTREG xCX;
794# if RT_INLINE_ASM_GNU_STYLE
795# ifdef RT_ARCH_AMD64
796 RTCCUINTREG uSpill;
797 __asm__ ("cpuid"
798 : "=a" (uSpill),
799 "=c" (xCX)
800 : "0" (uOperator)
801 : "rbx", "rdx");
802# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
803 __asm__ ("push %%ebx\n\t"
804 "cpuid\n\t"
805 "pop %%ebx\n\t"
806 : "=a" (uOperator),
807 "=c" (xCX)
808 : "0" (uOperator)
809 : "edx");
810# else
811 __asm__ ("cpuid"
812 : "=a" (uOperator),
813 "=c" (xCX)
814 : "0" (uOperator)
815 : "ebx", "edx");
816
817# endif
818
819# elif RT_INLINE_ASM_USES_INTRIN
820 int aInfo[4];
821 __cpuid(aInfo, uOperator);
822 xCX = aInfo[2];
823
824# else
825 __asm
826 {
827 push ebx
828 mov eax, [uOperator]
829 cpuid
830 mov [xCX], ecx
831 pop ebx
832 }
833# endif
834 return (uint32_t)xCX;
835}
836#endif
837
838
839/**
840 * Checks if the current CPU supports CPUID.
841 *
842 * @returns true if CPUID is supported.
843 */
844DECLINLINE(bool) ASMHasCpuId(void)
845{
846#ifdef RT_ARCH_AMD64
847 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
848#else /* !RT_ARCH_AMD64 */
849 bool fRet = false;
850# if RT_INLINE_ASM_GNU_STYLE
851 uint32_t u1;
852 uint32_t u2;
853 __asm__ ("pushf\n\t"
854 "pop %1\n\t"
855 "mov %1, %2\n\t"
856 "xorl $0x200000, %1\n\t"
857 "push %1\n\t"
858 "popf\n\t"
859 "pushf\n\t"
860 "pop %1\n\t"
861 "cmpl %1, %2\n\t"
862 "setne %0\n\t"
863 "push %2\n\t"
864 "popf\n\t"
865 : "=m" (fRet), "=r" (u1), "=r" (u2));
866# else
867 __asm
868 {
869 pushfd
870 pop eax
871 mov ebx, eax
872 xor eax, 0200000h
873 push eax
874 popfd
875 pushfd
876 pop eax
877 cmp eax, ebx
878 setne fRet
879 push ebx
880 popfd
881 }
882# endif
883 return fRet;
884#endif /* !RT_ARCH_AMD64 */
885}
886
887
888/**
889 * Gets the APIC ID of the current CPU.
890 *
891 * @returns the APIC ID.
892 */
893#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
894DECLASM(uint8_t) ASMGetApicId(void);
895#else
896DECLINLINE(uint8_t) ASMGetApicId(void)
897{
898 RTCCUINTREG xBX;
899# if RT_INLINE_ASM_GNU_STYLE
900# ifdef RT_ARCH_AMD64
901 RTCCUINTREG uSpill;
902 __asm__ ("cpuid"
903 : "=a" (uSpill),
904 "=b" (xBX)
905 : "0" (1)
906 : "rcx", "rdx");
907# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
908 RTCCUINTREG uSpill;
909 __asm__ ("mov %%ebx,%1\n\t"
910 "cpuid\n\t"
911 "xchgl %%ebx,%1\n\t"
912 : "=a" (uSpill),
913 "=r" (xBX)
914 : "0" (1)
915 : "ecx", "edx");
916# else
917 RTCCUINTREG uSpill;
918 __asm__ ("cpuid"
919 : "=a" (uSpill),
920 "=b" (xBX)
921 : "0" (1)
922 : "ecx", "edx");
923# endif
924
925# elif RT_INLINE_ASM_USES_INTRIN
926 int aInfo[4];
927 __cpuid(aInfo, 1);
928 xBX = aInfo[1];
929
930# else
931 __asm
932 {
933 push ebx
934 mov eax, 1
935 cpuid
936 mov [xBX], ebx
937 pop ebx
938 }
939# endif
940 return (uint8_t)(xBX >> 24);
941}
942#endif
943
944
945/**
946 * Tests if it an genuin Intel CPU based on the ASMCpuId(0) output.
947 *
948 * @returns true/false.
949 * @param uEBX EBX return from ASMCpuId(0)
950 * @param uECX ECX return from ASMCpuId(0)
951 * @param uEDX EDX return from ASMCpuId(0)
952 */
953DECLINLINE(bool) ASMIsIntelCpuEx(uint32_t uEBX, uint32_t uECX, uint32_t uEDX)
954{
955 return uEBX == 0x756e6547
956 || uECX == 0x6c65746e
957 || uEDX == 0x49656e69;
958}
959
960
961/**
962 * Tests if this is an genuin Intel CPU.
963 *
964 * @returns true/false.
965 */
966DECLINLINE(bool) ASMIsIntelCpu(void)
967{
968 uint32_t uEAX, uEBX, uECX, uEDX;
969 ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX);
970 return ASMIsIntelCpuEx(uEBX, uECX, uEDX);
971}
972
973
974/**
975 * Extracts the CPU family from ASMCpuId(1) or ASMCpuId(0x80000001)
976 *
977 * @returns Family.
978 * @param uEAX EAX return from ASMCpuId(1) or ASMCpuId(0x80000001).
979 */
980DECLINLINE(uint32_t) ASMGetCpuFamily(uint32_t uEAX)
981{
982 return ((uEAX >> 8) & 0xf) == 0xf
983 ? ((uEAX >> 20) & 0x7f) + 0xf
984 : ((uEAX >> 8) & 0xf);
985}
986
987
988/**
989 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), Intel variant.
990 *
991 * @returns Model.
992 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
993 * @param fIntel Whether it's an intel CPU.
994 */
995DECLINLINE(uint32_t) ASMGetCpuModelIntel(uint32_t uEAX)
996{
997 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6) /* family! */
998 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
999 : ((uEAX >> 4) & 0xf);
1000}
1001
1002
1003/**
1004 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001), AMD variant.
1005 *
1006 * @returns Model.
1007 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1008 * @param fIntel Whether it's an intel CPU.
1009 */
1010DECLINLINE(uint32_t) ASMGetCpuModelAMD(uint32_t uEAX)
1011{
1012 return ((uEAX >> 8) & 0xf) == 0xf
1013 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1014 : ((uEAX >> 4) & 0xf);
1015}
1016
1017
1018/**
1019 * Extracts the CPU model from ASMCpuId(1) or ASMCpuId(0x80000001)
1020 *
1021 * @returns Model.
1022 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1023 * @param fIntel Whether it's an intel CPU. Use ASMIsIntelCpuEx() or ASMIsIntelCpu().
1024 */
1025DECLINLINE(uint32_t) ASMGetCpuModel(uint32_t uEAX, bool fIntel)
1026{
1027 return ((uEAX >> 8) & 0xf) == 0xf || (((uEAX >> 8) & 0xf) == 0x6 && fIntel) /* family! */
1028 ? ((uEAX >> 4) & 0xf) | ((uEAX >> 12) & 0xf0)
1029 : ((uEAX >> 4) & 0xf);
1030}
1031
1032
1033/**
1034 * Extracts the CPU stepping from ASMCpuId(1) or ASMCpuId(0x80000001)
1035 *
1036 * @returns Model.
1037 * @param uEAX EAX from ASMCpuId(1) or ASMCpuId(0x80000001).
1038 */
1039DECLINLINE(uint32_t) ASMGetCpuStepping(uint32_t uEAX)
1040{
1041 return uEAX & 0xf;
1042}
1043
1044
1045/**
1046 * Get cr0.
1047 * @returns cr0.
1048 */
1049#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1050DECLASM(RTCCUINTREG) ASMGetCR0(void);
1051#else
1052DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
1053{
1054 RTCCUINTREG uCR0;
1055# if RT_INLINE_ASM_USES_INTRIN
1056 uCR0 = __readcr0();
1057
1058# elif RT_INLINE_ASM_GNU_STYLE
1059# ifdef RT_ARCH_AMD64
1060 __asm__ __volatile__("movq %%cr0, %0\t\n" : "=r" (uCR0));
1061# else
1062 __asm__ __volatile__("movl %%cr0, %0\t\n" : "=r" (uCR0));
1063# endif
1064# else
1065 __asm
1066 {
1067# ifdef RT_ARCH_AMD64
1068 mov rax, cr0
1069 mov [uCR0], rax
1070# else
1071 mov eax, cr0
1072 mov [uCR0], eax
1073# endif
1074 }
1075# endif
1076 return uCR0;
1077}
1078#endif
1079
1080
1081/**
1082 * Sets the CR0 register.
1083 * @param uCR0 The new CR0 value.
1084 */
1085#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1086DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
1087#else
1088DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
1089{
1090# if RT_INLINE_ASM_USES_INTRIN
1091 __writecr0(uCR0);
1092
1093# elif RT_INLINE_ASM_GNU_STYLE
1094# ifdef RT_ARCH_AMD64
1095 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
1096# else
1097 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
1098# endif
1099# else
1100 __asm
1101 {
1102# ifdef RT_ARCH_AMD64
1103 mov rax, [uCR0]
1104 mov cr0, rax
1105# else
1106 mov eax, [uCR0]
1107 mov cr0, eax
1108# endif
1109 }
1110# endif
1111}
1112#endif
1113
1114
1115/**
1116 * Get cr2.
1117 * @returns cr2.
1118 */
1119#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1120DECLASM(RTCCUINTREG) ASMGetCR2(void);
1121#else
1122DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
1123{
1124 RTCCUINTREG uCR2;
1125# if RT_INLINE_ASM_USES_INTRIN
1126 uCR2 = __readcr2();
1127
1128# elif RT_INLINE_ASM_GNU_STYLE
1129# ifdef RT_ARCH_AMD64
1130 __asm__ __volatile__("movq %%cr2, %0\t\n" : "=r" (uCR2));
1131# else
1132 __asm__ __volatile__("movl %%cr2, %0\t\n" : "=r" (uCR2));
1133# endif
1134# else
1135 __asm
1136 {
1137# ifdef RT_ARCH_AMD64
1138 mov rax, cr2
1139 mov [uCR2], rax
1140# else
1141 mov eax, cr2
1142 mov [uCR2], eax
1143# endif
1144 }
1145# endif
1146 return uCR2;
1147}
1148#endif
1149
1150
1151/**
1152 * Sets the CR2 register.
1153 * @param uCR2 The new CR0 value.
1154 */
1155#if RT_INLINE_ASM_EXTERNAL
1156DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1157#else
1158DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1159{
1160# if RT_INLINE_ASM_GNU_STYLE
1161# ifdef RT_ARCH_AMD64
1162 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1163# else
1164 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1165# endif
1166# else
1167 __asm
1168 {
1169# ifdef RT_ARCH_AMD64
1170 mov rax, [uCR2]
1171 mov cr2, rax
1172# else
1173 mov eax, [uCR2]
1174 mov cr2, eax
1175# endif
1176 }
1177# endif
1178}
1179#endif
1180
1181
1182/**
1183 * Get cr3.
1184 * @returns cr3.
1185 */
1186#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1187DECLASM(RTCCUINTREG) ASMGetCR3(void);
1188#else
1189DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1190{
1191 RTCCUINTREG uCR3;
1192# if RT_INLINE_ASM_USES_INTRIN
1193 uCR3 = __readcr3();
1194
1195# elif RT_INLINE_ASM_GNU_STYLE
1196# ifdef RT_ARCH_AMD64
1197 __asm__ __volatile__("movq %%cr3, %0\t\n" : "=r" (uCR3));
1198# else
1199 __asm__ __volatile__("movl %%cr3, %0\t\n" : "=r" (uCR3));
1200# endif
1201# else
1202 __asm
1203 {
1204# ifdef RT_ARCH_AMD64
1205 mov rax, cr3
1206 mov [uCR3], rax
1207# else
1208 mov eax, cr3
1209 mov [uCR3], eax
1210# endif
1211 }
1212# endif
1213 return uCR3;
1214}
1215#endif
1216
1217
1218/**
1219 * Sets the CR3 register.
1220 *
1221 * @param uCR3 New CR3 value.
1222 */
1223#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1224DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1225#else
1226DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1227{
1228# if RT_INLINE_ASM_USES_INTRIN
1229 __writecr3(uCR3);
1230
1231# elif RT_INLINE_ASM_GNU_STYLE
1232# ifdef RT_ARCH_AMD64
1233 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1234# else
1235 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1236# endif
1237# else
1238 __asm
1239 {
1240# ifdef RT_ARCH_AMD64
1241 mov rax, [uCR3]
1242 mov cr3, rax
1243# else
1244 mov eax, [uCR3]
1245 mov cr3, eax
1246# endif
1247 }
1248# endif
1249}
1250#endif
1251
1252
1253/**
1254 * Reloads the CR3 register.
1255 */
1256#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1257DECLASM(void) ASMReloadCR3(void);
1258#else
1259DECLINLINE(void) ASMReloadCR3(void)
1260{
1261# if RT_INLINE_ASM_USES_INTRIN
1262 __writecr3(__readcr3());
1263
1264# elif RT_INLINE_ASM_GNU_STYLE
1265 RTCCUINTREG u;
1266# ifdef RT_ARCH_AMD64
1267 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1268 "movq %0, %%cr3\n\t"
1269 : "=r" (u));
1270# else
1271 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1272 "movl %0, %%cr3\n\t"
1273 : "=r" (u));
1274# endif
1275# else
1276 __asm
1277 {
1278# ifdef RT_ARCH_AMD64
1279 mov rax, cr3
1280 mov cr3, rax
1281# else
1282 mov eax, cr3
1283 mov cr3, eax
1284# endif
1285 }
1286# endif
1287}
1288#endif
1289
1290
1291/**
1292 * Get cr4.
1293 * @returns cr4.
1294 */
1295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1296DECLASM(RTCCUINTREG) ASMGetCR4(void);
1297#else
1298DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1299{
1300 RTCCUINTREG uCR4;
1301# if RT_INLINE_ASM_USES_INTRIN
1302 uCR4 = __readcr4();
1303
1304# elif RT_INLINE_ASM_GNU_STYLE
1305# ifdef RT_ARCH_AMD64
1306 __asm__ __volatile__("movq %%cr4, %0\t\n" : "=r" (uCR4));
1307# else
1308 __asm__ __volatile__("movl %%cr4, %0\t\n" : "=r" (uCR4));
1309# endif
1310# else
1311 __asm
1312 {
1313# ifdef RT_ARCH_AMD64
1314 mov rax, cr4
1315 mov [uCR4], rax
1316# else
1317 push eax /* just in case */
1318 /*mov eax, cr4*/
1319 _emit 0x0f
1320 _emit 0x20
1321 _emit 0xe0
1322 mov [uCR4], eax
1323 pop eax
1324# endif
1325 }
1326# endif
1327 return uCR4;
1328}
1329#endif
1330
1331
1332/**
1333 * Sets the CR4 register.
1334 *
1335 * @param uCR4 New CR4 value.
1336 */
1337#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1338DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1339#else
1340DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1341{
1342# if RT_INLINE_ASM_USES_INTRIN
1343 __writecr4(uCR4);
1344
1345# elif RT_INLINE_ASM_GNU_STYLE
1346# ifdef RT_ARCH_AMD64
1347 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1348# else
1349 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1350# endif
1351# else
1352 __asm
1353 {
1354# ifdef RT_ARCH_AMD64
1355 mov rax, [uCR4]
1356 mov cr4, rax
1357# else
1358 mov eax, [uCR4]
1359 _emit 0x0F
1360 _emit 0x22
1361 _emit 0xE0 /* mov cr4, eax */
1362# endif
1363 }
1364# endif
1365}
1366#endif
1367
1368
1369/**
1370 * Get cr8.
1371 * @returns cr8.
1372 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1373 */
1374#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1375DECLASM(RTCCUINTREG) ASMGetCR8(void);
1376#else
1377DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1378{
1379# ifdef RT_ARCH_AMD64
1380 RTCCUINTREG uCR8;
1381# if RT_INLINE_ASM_USES_INTRIN
1382 uCR8 = __readcr8();
1383
1384# elif RT_INLINE_ASM_GNU_STYLE
1385 __asm__ __volatile__("movq %%cr8, %0\t\n" : "=r" (uCR8));
1386# else
1387 __asm
1388 {
1389 mov rax, cr8
1390 mov [uCR8], rax
1391 }
1392# endif
1393 return uCR8;
1394# else /* !RT_ARCH_AMD64 */
1395 return 0;
1396# endif /* !RT_ARCH_AMD64 */
1397}
1398#endif
1399
1400
1401/**
1402 * Enables interrupts (EFLAGS.IF).
1403 */
1404#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1405DECLASM(void) ASMIntEnable(void);
1406#else
1407DECLINLINE(void) ASMIntEnable(void)
1408{
1409# if RT_INLINE_ASM_GNU_STYLE
1410 __asm("sti\n");
1411# elif RT_INLINE_ASM_USES_INTRIN
1412 _enable();
1413# else
1414 __asm sti
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Disables interrupts (!EFLAGS.IF).
1422 */
1423#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1424DECLASM(void) ASMIntDisable(void);
1425#else
1426DECLINLINE(void) ASMIntDisable(void)
1427{
1428# if RT_INLINE_ASM_GNU_STYLE
1429 __asm("cli\n");
1430# elif RT_INLINE_ASM_USES_INTRIN
1431 _disable();
1432# else
1433 __asm cli
1434# endif
1435}
1436#endif
1437
1438
1439/**
1440 * Disables interrupts and returns previous xFLAGS.
1441 */
1442#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1443DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1444#else
1445DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1446{
1447 RTCCUINTREG xFlags;
1448# if RT_INLINE_ASM_GNU_STYLE
1449# ifdef RT_ARCH_AMD64
1450 __asm__ __volatile__("pushfq\n\t"
1451 "cli\n\t"
1452 "popq %0\n\t"
1453 : "=rm" (xFlags));
1454# else
1455 __asm__ __volatile__("pushfl\n\t"
1456 "cli\n\t"
1457 "popl %0\n\t"
1458 : "=rm" (xFlags));
1459# endif
1460# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1461 xFlags = ASMGetFlags();
1462 _disable();
1463# else
1464 __asm {
1465 pushfd
1466 cli
1467 pop [xFlags]
1468 }
1469# endif
1470 return xFlags;
1471}
1472#endif
1473
1474
1475/**
1476 * Reads a machine specific register.
1477 *
1478 * @returns Register content.
1479 * @param uRegister Register to read.
1480 */
1481#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1482DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1483#else
1484DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1485{
1486 RTUINT64U u;
1487# if RT_INLINE_ASM_GNU_STYLE
1488 __asm__ __volatile__("rdmsr\n\t"
1489 : "=a" (u.s.Lo),
1490 "=d" (u.s.Hi)
1491 : "c" (uRegister));
1492
1493# elif RT_INLINE_ASM_USES_INTRIN
1494 u.u = __readmsr(uRegister);
1495
1496# else
1497 __asm
1498 {
1499 mov ecx, [uRegister]
1500 rdmsr
1501 mov [u.s.Lo], eax
1502 mov [u.s.Hi], edx
1503 }
1504# endif
1505
1506 return u.u;
1507}
1508#endif
1509
1510
1511/**
1512 * Writes a machine specific register.
1513 *
1514 * @returns Register content.
1515 * @param uRegister Register to write to.
1516 * @param u64Val Value to write.
1517 */
1518#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1519DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1520#else
1521DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1522{
1523 RTUINT64U u;
1524
1525 u.u = u64Val;
1526# if RT_INLINE_ASM_GNU_STYLE
1527 __asm__ __volatile__("wrmsr\n\t"
1528 ::"a" (u.s.Lo),
1529 "d" (u.s.Hi),
1530 "c" (uRegister));
1531
1532# elif RT_INLINE_ASM_USES_INTRIN
1533 __writemsr(uRegister, u.u);
1534
1535# else
1536 __asm
1537 {
1538 mov ecx, [uRegister]
1539 mov edx, [u.s.Hi]
1540 mov eax, [u.s.Lo]
1541 wrmsr
1542 }
1543# endif
1544}
1545#endif
1546
1547
1548/**
1549 * Reads low part of a machine specific register.
1550 *
1551 * @returns Register content.
1552 * @param uRegister Register to read.
1553 */
1554#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1555DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1556#else
1557DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1558{
1559 uint32_t u32;
1560# if RT_INLINE_ASM_GNU_STYLE
1561 __asm__ __volatile__("rdmsr\n\t"
1562 : "=a" (u32)
1563 : "c" (uRegister)
1564 : "edx");
1565
1566# elif RT_INLINE_ASM_USES_INTRIN
1567 u32 = (uint32_t)__readmsr(uRegister);
1568
1569#else
1570 __asm
1571 {
1572 mov ecx, [uRegister]
1573 rdmsr
1574 mov [u32], eax
1575 }
1576# endif
1577
1578 return u32;
1579}
1580#endif
1581
1582
1583/**
1584 * Reads high part of a machine specific register.
1585 *
1586 * @returns Register content.
1587 * @param uRegister Register to read.
1588 */
1589#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1590DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1591#else
1592DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1593{
1594 uint32_t u32;
1595# if RT_INLINE_ASM_GNU_STYLE
1596 __asm__ __volatile__("rdmsr\n\t"
1597 : "=d" (u32)
1598 : "c" (uRegister)
1599 : "eax");
1600
1601# elif RT_INLINE_ASM_USES_INTRIN
1602 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1603
1604# else
1605 __asm
1606 {
1607 mov ecx, [uRegister]
1608 rdmsr
1609 mov [u32], edx
1610 }
1611# endif
1612
1613 return u32;
1614}
1615#endif
1616
1617
1618/**
1619 * Gets dr7.
1620 *
1621 * @returns dr7.
1622 */
1623#if RT_INLINE_ASM_EXTERNAL
1624DECLASM(RTCCUINTREG) ASMGetDR7(void);
1625#else
1626DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1627{
1628 RTCCUINTREG uDR7;
1629# if RT_INLINE_ASM_GNU_STYLE
1630# ifdef RT_ARCH_AMD64
1631 __asm__ __volatile__("movq %%dr7, %0\n\t" : "=r" (uDR7));
1632# else
1633 __asm__ __volatile__("movl %%dr7, %0\n\t" : "=r" (uDR7));
1634# endif
1635# else
1636 __asm
1637 {
1638# ifdef RT_ARCH_AMD64
1639 mov rax, dr7
1640 mov [uDR7], rax
1641# else
1642 mov eax, dr7
1643 mov [uDR7], eax
1644# endif
1645 }
1646# endif
1647 return uDR7;
1648}
1649#endif
1650
1651
1652/**
1653 * Gets dr6.
1654 *
1655 * @returns dr6.
1656 */
1657#if RT_INLINE_ASM_EXTERNAL
1658DECLASM(RTCCUINTREG) ASMGetDR6(void);
1659#else
1660DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1661{
1662 RTCCUINTREG uDR6;
1663# if RT_INLINE_ASM_GNU_STYLE
1664# ifdef RT_ARCH_AMD64
1665 __asm__ __volatile__("movq %%dr6, %0\n\t" : "=r" (uDR6));
1666# else
1667 __asm__ __volatile__("movl %%dr6, %0\n\t" : "=r" (uDR6));
1668# endif
1669# else
1670 __asm
1671 {
1672# ifdef RT_ARCH_AMD64
1673 mov rax, dr6
1674 mov [uDR6], rax
1675# else
1676 mov eax, dr6
1677 mov [uDR6], eax
1678# endif
1679 }
1680# endif
1681 return uDR6;
1682}
1683#endif
1684
1685
1686/**
1687 * Reads and clears DR6.
1688 *
1689 * @returns DR6.
1690 */
1691#if RT_INLINE_ASM_EXTERNAL
1692DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1693#else
1694DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1695{
1696 RTCCUINTREG uDR6;
1697# if RT_INLINE_ASM_GNU_STYLE
1698 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1699# ifdef RT_ARCH_AMD64
1700 __asm__ __volatile__("movq %%dr6, %0\n\t"
1701 "movq %1, %%dr6\n\t"
1702 : "=r" (uDR6)
1703 : "r" (uNewValue));
1704# else
1705 __asm__ __volatile__("movl %%dr6, %0\n\t"
1706 "movl %1, %%dr6\n\t"
1707 : "=r" (uDR6)
1708 : "r" (uNewValue));
1709# endif
1710# else
1711 __asm
1712 {
1713# ifdef RT_ARCH_AMD64
1714 mov rax, dr6
1715 mov [uDR6], rax
1716 mov rcx, rax
1717 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1718 mov dr6, rcx
1719# else
1720 mov eax, dr6
1721 mov [uDR6], eax
1722 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1723 mov dr6, ecx
1724# endif
1725 }
1726# endif
1727 return uDR6;
1728}
1729#endif
1730
1731
1732/**
1733 * Compiler memory barrier.
1734 *
1735 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1736 * values or any outstanding writes when returning from this function.
1737 *
1738 * This function must be used if non-volatile data is modified by a
1739 * device or the VMM. Typical cases are port access, MMIO access,
1740 * trapping instruction, etc.
1741 */
1742#if RT_INLINE_ASM_GNU_STYLE
1743# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1744#elif RT_INLINE_ASM_USES_INTRIN
1745# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1746#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1747DECLINLINE(void) ASMCompilerBarrier(void)
1748{
1749 __asm
1750 {
1751 }
1752}
1753#endif
1754
1755
1756/**
1757 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1758 *
1759 * @param Port I/O port to read from.
1760 * @param u8 8-bit integer to write.
1761 */
1762#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1763DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1764#else
1765DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1766{
1767# if RT_INLINE_ASM_GNU_STYLE
1768 __asm__ __volatile__("outb %b1, %w0\n\t"
1769 :: "Nd" (Port),
1770 "a" (u8));
1771
1772# elif RT_INLINE_ASM_USES_INTRIN
1773 __outbyte(Port, u8);
1774
1775# else
1776 __asm
1777 {
1778 mov dx, [Port]
1779 mov al, [u8]
1780 out dx, al
1781 }
1782# endif
1783}
1784#endif
1785
1786
1787/**
1788 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1789 *
1790 * @returns 8-bit integer.
1791 * @param Port I/O port to read from.
1792 */
1793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1794DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1795#else
1796DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1797{
1798 uint8_t u8;
1799# if RT_INLINE_ASM_GNU_STYLE
1800 __asm__ __volatile__("inb %w1, %b0\n\t"
1801 : "=a" (u8)
1802 : "Nd" (Port));
1803
1804# elif RT_INLINE_ASM_USES_INTRIN
1805 u8 = __inbyte(Port);
1806
1807# else
1808 __asm
1809 {
1810 mov dx, [Port]
1811 in al, dx
1812 mov [u8], al
1813 }
1814# endif
1815 return u8;
1816}
1817#endif
1818
1819
1820/**
1821 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1822 *
1823 * @param Port I/O port to read from.
1824 * @param u16 16-bit integer to write.
1825 */
1826#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1827DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1828#else
1829DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1830{
1831# if RT_INLINE_ASM_GNU_STYLE
1832 __asm__ __volatile__("outw %w1, %w0\n\t"
1833 :: "Nd" (Port),
1834 "a" (u16));
1835
1836# elif RT_INLINE_ASM_USES_INTRIN
1837 __outword(Port, u16);
1838
1839# else
1840 __asm
1841 {
1842 mov dx, [Port]
1843 mov ax, [u16]
1844 out dx, ax
1845 }
1846# endif
1847}
1848#endif
1849
1850
1851/**
1852 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1853 *
1854 * @returns 16-bit integer.
1855 * @param Port I/O port to read from.
1856 */
1857#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1858DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1859#else
1860DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1861{
1862 uint16_t u16;
1863# if RT_INLINE_ASM_GNU_STYLE
1864 __asm__ __volatile__("inw %w1, %w0\n\t"
1865 : "=a" (u16)
1866 : "Nd" (Port));
1867
1868# elif RT_INLINE_ASM_USES_INTRIN
1869 u16 = __inword(Port);
1870
1871# else
1872 __asm
1873 {
1874 mov dx, [Port]
1875 in ax, dx
1876 mov [u16], ax
1877 }
1878# endif
1879 return u16;
1880}
1881#endif
1882
1883
1884/**
1885 * Writes a 32-bit unsigned integer to an I/O port, ordered.
1886 *
1887 * @param Port I/O port to read from.
1888 * @param u32 32-bit integer to write.
1889 */
1890#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1891DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1892#else
1893DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1894{
1895# if RT_INLINE_ASM_GNU_STYLE
1896 __asm__ __volatile__("outl %1, %w0\n\t"
1897 :: "Nd" (Port),
1898 "a" (u32));
1899
1900# elif RT_INLINE_ASM_USES_INTRIN
1901 __outdword(Port, u32);
1902
1903# else
1904 __asm
1905 {
1906 mov dx, [Port]
1907 mov eax, [u32]
1908 out dx, eax
1909 }
1910# endif
1911}
1912#endif
1913
1914
1915/**
1916 * Gets a 32-bit unsigned integer from an I/O port, ordered.
1917 *
1918 * @returns 32-bit integer.
1919 * @param Port I/O port to read from.
1920 */
1921#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1922DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1923#else
1924DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1925{
1926 uint32_t u32;
1927# if RT_INLINE_ASM_GNU_STYLE
1928 __asm__ __volatile__("inl %w1, %0\n\t"
1929 : "=a" (u32)
1930 : "Nd" (Port));
1931
1932# elif RT_INLINE_ASM_USES_INTRIN
1933 u32 = __indword(Port);
1934
1935# else
1936 __asm
1937 {
1938 mov dx, [Port]
1939 in eax, dx
1940 mov [u32], eax
1941 }
1942# endif
1943 return u32;
1944}
1945#endif
1946
1947/** @todo string i/o */
1948
1949
1950/**
1951 * Atomically Exchange an unsigned 8-bit value, ordered.
1952 *
1953 * @returns Current *pu8 value
1954 * @param pu8 Pointer to the 8-bit variable to update.
1955 * @param u8 The 8-bit value to assign to *pu8.
1956 */
1957#if RT_INLINE_ASM_EXTERNAL
1958DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1959#else
1960DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1961{
1962# if RT_INLINE_ASM_GNU_STYLE
1963 __asm__ __volatile__("xchgb %0, %1\n\t"
1964 : "=m" (*pu8),
1965 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
1966 : "1" (u8));
1967# else
1968 __asm
1969 {
1970# ifdef RT_ARCH_AMD64
1971 mov rdx, [pu8]
1972 mov al, [u8]
1973 xchg [rdx], al
1974 mov [u8], al
1975# else
1976 mov edx, [pu8]
1977 mov al, [u8]
1978 xchg [edx], al
1979 mov [u8], al
1980# endif
1981 }
1982# endif
1983 return u8;
1984}
1985#endif
1986
1987
1988/**
1989 * Atomically Exchange a signed 8-bit value, ordered.
1990 *
1991 * @returns Current *pu8 value
1992 * @param pi8 Pointer to the 8-bit variable to update.
1993 * @param i8 The 8-bit value to assign to *pi8.
1994 */
1995DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1996{
1997 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1998}
1999
2000
2001/**
2002 * Atomically Exchange a bool value, ordered.
2003 *
2004 * @returns Current *pf value
2005 * @param pf Pointer to the 8-bit variable to update.
2006 * @param f The 8-bit value to assign to *pi8.
2007 */
2008DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
2009{
2010#ifdef _MSC_VER
2011 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2012#else
2013 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
2014#endif
2015}
2016
2017
2018/**
2019 * Atomically Exchange an unsigned 16-bit value, ordered.
2020 *
2021 * @returns Current *pu16 value
2022 * @param pu16 Pointer to the 16-bit variable to update.
2023 * @param u16 The 16-bit value to assign to *pu16.
2024 */
2025#if RT_INLINE_ASM_EXTERNAL
2026DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
2027#else
2028DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
2029{
2030# if RT_INLINE_ASM_GNU_STYLE
2031 __asm__ __volatile__("xchgw %0, %1\n\t"
2032 : "=m" (*pu16),
2033 "=r" (u16)
2034 : "1" (u16));
2035# else
2036 __asm
2037 {
2038# ifdef RT_ARCH_AMD64
2039 mov rdx, [pu16]
2040 mov ax, [u16]
2041 xchg [rdx], ax
2042 mov [u16], ax
2043# else
2044 mov edx, [pu16]
2045 mov ax, [u16]
2046 xchg [edx], ax
2047 mov [u16], ax
2048# endif
2049 }
2050# endif
2051 return u16;
2052}
2053#endif
2054
2055
2056/**
2057 * Atomically Exchange a signed 16-bit value, ordered.
2058 *
2059 * @returns Current *pu16 value
2060 * @param pi16 Pointer to the 16-bit variable to update.
2061 * @param i16 The 16-bit value to assign to *pi16.
2062 */
2063DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
2064{
2065 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
2066}
2067
2068
2069/**
2070 * Atomically Exchange an unsigned 32-bit value, ordered.
2071 *
2072 * @returns Current *pu32 value
2073 * @param pu32 Pointer to the 32-bit variable to update.
2074 * @param u32 The 32-bit value to assign to *pu32.
2075 */
2076#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2077DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
2078#else
2079DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
2080{
2081# if RT_INLINE_ASM_GNU_STYLE
2082 __asm__ __volatile__("xchgl %0, %1\n\t"
2083 : "=m" (*pu32),
2084 "=r" (u32)
2085 : "1" (u32));
2086
2087# elif RT_INLINE_ASM_USES_INTRIN
2088 u32 = _InterlockedExchange((long *)pu32, u32);
2089
2090# else
2091 __asm
2092 {
2093# ifdef RT_ARCH_AMD64
2094 mov rdx, [pu32]
2095 mov eax, u32
2096 xchg [rdx], eax
2097 mov [u32], eax
2098# else
2099 mov edx, [pu32]
2100 mov eax, u32
2101 xchg [edx], eax
2102 mov [u32], eax
2103# endif
2104 }
2105# endif
2106 return u32;
2107}
2108#endif
2109
2110
2111/**
2112 * Atomically Exchange a signed 32-bit value, ordered.
2113 *
2114 * @returns Current *pu32 value
2115 * @param pi32 Pointer to the 32-bit variable to update.
2116 * @param i32 The 32-bit value to assign to *pi32.
2117 */
2118DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
2119{
2120 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
2121}
2122
2123
2124/**
2125 * Atomically Exchange an unsigned 64-bit value, ordered.
2126 *
2127 * @returns Current *pu64 value
2128 * @param pu64 Pointer to the 64-bit variable to update.
2129 * @param u64 The 64-bit value to assign to *pu64.
2130 */
2131#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2132DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2133#else
2134DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2135{
2136# if defined(RT_ARCH_AMD64)
2137# if RT_INLINE_ASM_USES_INTRIN
2138 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2139
2140# elif RT_INLINE_ASM_GNU_STYLE
2141 __asm__ __volatile__("xchgq %0, %1\n\t"
2142 : "=m" (*pu64),
2143 "=r" (u64)
2144 : "1" (u64));
2145# else
2146 __asm
2147 {
2148 mov rdx, [pu64]
2149 mov rax, [u64]
2150 xchg [rdx], rax
2151 mov [u64], rax
2152 }
2153# endif
2154# else /* !RT_ARCH_AMD64 */
2155# if RT_INLINE_ASM_GNU_STYLE
2156# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2157 uint32_t u32 = (uint32_t)u64;
2158 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2159 "xchgl %%ebx, %3\n\t"
2160 "1:\n\t"
2161 "lock; cmpxchg8b (%5)\n\t"
2162 "jnz 1b\n\t"
2163 "xchgl %%ebx, %3\n\t"
2164 /*"xchgl %%esi, %5\n\t"*/
2165 : "=A" (u64),
2166 "=m" (*pu64)
2167 : "0" (*pu64),
2168 "m" ( u32 ),
2169 "c" ( (uint32_t)(u64 >> 32) ),
2170 "S" (pu64) );
2171# else /* !PIC */
2172 __asm__ __volatile__("1:\n\t"
2173 "lock; cmpxchg8b %1\n\t"
2174 "jnz 1b\n\t"
2175 : "=A" (u64),
2176 "=m" (*pu64)
2177 : "0" (*pu64),
2178 "b" ( (uint32_t)u64 ),
2179 "c" ( (uint32_t)(u64 >> 32) ));
2180# endif
2181# else
2182 __asm
2183 {
2184 mov ebx, dword ptr [u64]
2185 mov ecx, dword ptr [u64 + 4]
2186 mov edi, pu64
2187 mov eax, dword ptr [edi]
2188 mov edx, dword ptr [edi + 4]
2189 retry:
2190 lock cmpxchg8b [edi]
2191 jnz retry
2192 mov dword ptr [u64], eax
2193 mov dword ptr [u64 + 4], edx
2194 }
2195# endif
2196# endif /* !RT_ARCH_AMD64 */
2197 return u64;
2198}
2199#endif
2200
2201
2202/**
2203 * Atomically Exchange an signed 64-bit value, ordered.
2204 *
2205 * @returns Current *pi64 value
2206 * @param pi64 Pointer to the 64-bit variable to update.
2207 * @param i64 The 64-bit value to assign to *pi64.
2208 */
2209DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2210{
2211 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2212}
2213
2214
2215#ifdef RT_ARCH_AMD64
2216/**
2217 * Atomically Exchange an unsigned 128-bit value, ordered.
2218 *
2219 * @returns Current *pu128.
2220 * @param pu128 Pointer to the 128-bit variable to update.
2221 * @param u128 The 128-bit value to assign to *pu128.
2222 *
2223 * @remark We cannot really assume that any hardware supports this. Nor do I have
2224 * GAS support for it. So, for the time being we'll BREAK the atomic
2225 * bit of this function and use two 64-bit exchanges instead.
2226 */
2227# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2228DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2229# else
2230DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2231{
2232 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2233 {
2234 /** @todo this is clumsy code */
2235 RTUINT128U u128Ret;
2236 u128Ret.u = u128;
2237 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2238 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2239 return u128Ret.u;
2240 }
2241#if 0 /* later? */
2242 else
2243 {
2244# if RT_INLINE_ASM_GNU_STYLE
2245 __asm__ __volatile__("1:\n\t"
2246 "lock; cmpxchg8b %1\n\t"
2247 "jnz 1b\n\t"
2248 : "=A" (u128),
2249 "=m" (*pu128)
2250 : "0" (*pu128),
2251 "b" ( (uint64_t)u128 ),
2252 "c" ( (uint64_t)(u128 >> 64) ));
2253# else
2254 __asm
2255 {
2256 mov rbx, dword ptr [u128]
2257 mov rcx, dword ptr [u128 + 8]
2258 mov rdi, pu128
2259 mov rax, dword ptr [rdi]
2260 mov rdx, dword ptr [rdi + 8]
2261 retry:
2262 lock cmpxchg16b [rdi]
2263 jnz retry
2264 mov dword ptr [u128], rax
2265 mov dword ptr [u128 + 8], rdx
2266 }
2267# endif
2268 }
2269 return u128;
2270#endif
2271}
2272# endif
2273#endif /* RT_ARCH_AMD64 */
2274
2275
2276/**
2277 * Atomically Exchange a value which size might differ
2278 * between platforms or compilers, ordered.
2279 *
2280 * @param pu Pointer to the variable to update.
2281 * @param uNew The value to assign to *pu.
2282 */
2283#define ASMAtomicXchgSize(pu, uNew) \
2284 do { \
2285 switch (sizeof(*(pu))) { \
2286 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2287 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2288 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2289 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2290 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2291 } \
2292 } while (0)
2293
2294
2295/**
2296 * Atomically Exchange a pointer value, ordered.
2297 *
2298 * @returns Current *ppv value
2299 * @param ppv Pointer to the pointer variable to update.
2300 * @param pv The pointer value to assign to *ppv.
2301 */
2302DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2303{
2304#if ARCH_BITS == 32
2305 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2306#elif ARCH_BITS == 64
2307 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2308#else
2309# error "ARCH_BITS is bogus"
2310#endif
2311}
2312
2313
2314/**
2315 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2316 *
2317 * @returns true if xchg was done.
2318 * @returns false if xchg wasn't done.
2319 *
2320 * @param pu32 Pointer to the value to update.
2321 * @param u32New The new value to assigned to *pu32.
2322 * @param u32Old The old value to *pu32 compare with.
2323 */
2324#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2325DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2326#else
2327DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2328{
2329# if RT_INLINE_ASM_GNU_STYLE
2330 uint8_t u8Ret;
2331 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2332 "setz %1\n\t"
2333 : "=m" (*pu32),
2334 "=qm" (u8Ret),
2335 "=a" (u32Old)
2336 : "r" (u32New),
2337 "2" (u32Old));
2338 return (bool)u8Ret;
2339
2340# elif RT_INLINE_ASM_USES_INTRIN
2341 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2342
2343# else
2344 uint32_t u32Ret;
2345 __asm
2346 {
2347# ifdef RT_ARCH_AMD64
2348 mov rdx, [pu32]
2349# else
2350 mov edx, [pu32]
2351# endif
2352 mov eax, [u32Old]
2353 mov ecx, [u32New]
2354# ifdef RT_ARCH_AMD64
2355 lock cmpxchg [rdx], ecx
2356# else
2357 lock cmpxchg [edx], ecx
2358# endif
2359 setz al
2360 movzx eax, al
2361 mov [u32Ret], eax
2362 }
2363 return !!u32Ret;
2364# endif
2365}
2366#endif
2367
2368
2369/**
2370 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2371 *
2372 * @returns true if xchg was done.
2373 * @returns false if xchg wasn't done.
2374 *
2375 * @param pi32 Pointer to the value to update.
2376 * @param i32New The new value to assigned to *pi32.
2377 * @param i32Old The old value to *pi32 compare with.
2378 */
2379DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2380{
2381 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2382}
2383
2384
2385/**
2386 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2387 *
2388 * @returns true if xchg was done.
2389 * @returns false if xchg wasn't done.
2390 *
2391 * @param pu64 Pointer to the 64-bit variable to update.
2392 * @param u64New The 64-bit value to assign to *pu64.
2393 * @param u64Old The value to compare with.
2394 */
2395#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2396DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2397#else
2398DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2399{
2400# if RT_INLINE_ASM_USES_INTRIN
2401 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2402
2403# elif defined(RT_ARCH_AMD64)
2404# if RT_INLINE_ASM_GNU_STYLE
2405 uint8_t u8Ret;
2406 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2407 "setz %1\n\t"
2408 : "=m" (*pu64),
2409 "=qm" (u8Ret),
2410 "=a" (u64Old)
2411 : "r" (u64New),
2412 "2" (u64Old));
2413 return (bool)u8Ret;
2414# else
2415 bool fRet;
2416 __asm
2417 {
2418 mov rdx, [pu32]
2419 mov rax, [u64Old]
2420 mov rcx, [u64New]
2421 lock cmpxchg [rdx], rcx
2422 setz al
2423 mov [fRet], al
2424 }
2425 return fRet;
2426# endif
2427# else /* !RT_ARCH_AMD64 */
2428 uint32_t u32Ret;
2429# if RT_INLINE_ASM_GNU_STYLE
2430# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2431 uint32_t u32 = (uint32_t)u64New;
2432 uint32_t u32Spill;
2433 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2434 "lock; cmpxchg8b (%6)\n\t"
2435 "setz %%al\n\t"
2436 "xchgl %%ebx, %4\n\t"
2437 "movzbl %%al, %%eax\n\t"
2438 : "=a" (u32Ret),
2439 "=d" (u32Spill),
2440 "=m" (*pu64)
2441 : "A" (u64Old),
2442 "m" ( u32 ),
2443 "c" ( (uint32_t)(u64New >> 32) ),
2444 "S" (pu64) );
2445# else /* !PIC */
2446 uint32_t u32Spill;
2447 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2448 "setz %%al\n\t"
2449 "movzbl %%al, %%eax\n\t"
2450 : "=a" (u32Ret),
2451 "=d" (u32Spill),
2452 "=m" (*pu64)
2453 : "A" (u64Old),
2454 "b" ( (uint32_t)u64New ),
2455 "c" ( (uint32_t)(u64New >> 32) ));
2456# endif
2457 return (bool)u32Ret;
2458# else
2459 __asm
2460 {
2461 mov ebx, dword ptr [u64New]
2462 mov ecx, dword ptr [u64New + 4]
2463 mov edi, [pu64]
2464 mov eax, dword ptr [u64Old]
2465 mov edx, dword ptr [u64Old + 4]
2466 lock cmpxchg8b [edi]
2467 setz al
2468 movzx eax, al
2469 mov dword ptr [u32Ret], eax
2470 }
2471 return !!u32Ret;
2472# endif
2473# endif /* !RT_ARCH_AMD64 */
2474}
2475#endif
2476
2477
2478/**
2479 * Atomically Compare and exchange a signed 64-bit value, ordered.
2480 *
2481 * @returns true if xchg was done.
2482 * @returns false if xchg wasn't done.
2483 *
2484 * @param pi64 Pointer to the 64-bit variable to update.
2485 * @param i64 The 64-bit value to assign to *pu64.
2486 * @param i64Old The value to compare with.
2487 */
2488DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2489{
2490 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2491}
2492
2493
2494/** @def ASMAtomicCmpXchgSize
2495 * Atomically Compare and Exchange a value which size might differ
2496 * between platforms or compilers, ordered.
2497 *
2498 * @param pu Pointer to the value to update.
2499 * @param uNew The new value to assigned to *pu.
2500 * @param uOld The old value to *pu compare with.
2501 * @param fRc Where to store the result.
2502 */
2503#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2504 do { \
2505 switch (sizeof(*(pu))) { \
2506 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2507 break; \
2508 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2509 break; \
2510 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2511 (fRc) = false; \
2512 break; \
2513 } \
2514 } while (0)
2515
2516
2517/**
2518 * Atomically Compare and Exchange a pointer value, ordered.
2519 *
2520 * @returns true if xchg was done.
2521 * @returns false if xchg wasn't done.
2522 *
2523 * @param ppv Pointer to the value to update.
2524 * @param pvNew The new value to assigned to *ppv.
2525 * @param pvOld The old value to *ppv compare with.
2526 */
2527DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2528{
2529#if ARCH_BITS == 32
2530 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2531#elif ARCH_BITS == 64
2532 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2533#else
2534# error "ARCH_BITS is bogus"
2535#endif
2536}
2537
2538
2539/**
2540 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2541 * passes back old value, ordered.
2542 *
2543 * @returns true if xchg was done.
2544 * @returns false if xchg wasn't done.
2545 *
2546 * @param pu32 Pointer to the value to update.
2547 * @param u32New The new value to assigned to *pu32.
2548 * @param u32Old The old value to *pu32 compare with.
2549 * @param pu32Old Pointer store the old value at.
2550 */
2551#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2552DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2553#else
2554DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2555{
2556# if RT_INLINE_ASM_GNU_STYLE
2557 uint8_t u8Ret;
2558 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2559 "setz %1\n\t"
2560 : "=m" (*pu32),
2561 "=qm" (u8Ret),
2562 "=a" (*pu32Old)
2563 : "r" (u32New),
2564 "a" (u32Old));
2565 return (bool)u8Ret;
2566
2567# elif RT_INLINE_ASM_USES_INTRIN
2568 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2569
2570# else
2571 uint32_t u32Ret;
2572 __asm
2573 {
2574# ifdef RT_ARCH_AMD64
2575 mov rdx, [pu32]
2576# else
2577 mov edx, [pu32]
2578# endif
2579 mov eax, [u32Old]
2580 mov ecx, [u32New]
2581# ifdef RT_ARCH_AMD64
2582 lock cmpxchg [rdx], ecx
2583 mov rdx, [pu32Old]
2584 mov [rdx], eax
2585# else
2586 lock cmpxchg [edx], ecx
2587 mov edx, [pu32Old]
2588 mov [edx], eax
2589# endif
2590 setz al
2591 movzx eax, al
2592 mov [u32Ret], eax
2593 }
2594 return !!u32Ret;
2595# endif
2596}
2597#endif
2598
2599
2600/**
2601 * Atomically Compare and Exchange a signed 32-bit value, additionally
2602 * passes back old value, ordered.
2603 *
2604 * @returns true if xchg was done.
2605 * @returns false if xchg wasn't done.
2606 *
2607 * @param pi32 Pointer to the value to update.
2608 * @param i32New The new value to assigned to *pi32.
2609 * @param i32Old The old value to *pi32 compare with.
2610 * @param pi32Old Pointer store the old value at.
2611 */
2612DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2613{
2614 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2615}
2616
2617
2618/**
2619 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2620 * passing back old value, ordered.
2621 *
2622 * @returns true if xchg was done.
2623 * @returns false if xchg wasn't done.
2624 *
2625 * @param pu64 Pointer to the 64-bit variable to update.
2626 * @param u64New The 64-bit value to assign to *pu64.
2627 * @param u64Old The value to compare with.
2628 * @param pu64Old Pointer store the old value at.
2629 */
2630#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2631DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2632#else
2633DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2634{
2635# if RT_INLINE_ASM_USES_INTRIN
2636 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2637
2638# elif defined(RT_ARCH_AMD64)
2639# if RT_INLINE_ASM_GNU_STYLE
2640 uint8_t u8Ret;
2641 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2642 "setz %1\n\t"
2643 : "=m" (*pu64),
2644 "=qm" (u8Ret),
2645 "=a" (*pu64Old)
2646 : "r" (u64New),
2647 "a" (u64Old));
2648 return (bool)u8Ret;
2649# else
2650 bool fRet;
2651 __asm
2652 {
2653 mov rdx, [pu32]
2654 mov rax, [u64Old]
2655 mov rcx, [u64New]
2656 lock cmpxchg [rdx], rcx
2657 mov rdx, [pu64Old]
2658 mov [rdx], rax
2659 setz al
2660 mov [fRet], al
2661 }
2662 return fRet;
2663# endif
2664# else /* !RT_ARCH_AMD64 */
2665# if RT_INLINE_ASM_GNU_STYLE
2666 uint64_t u64Ret;
2667# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2668 /* NB: this code uses a memory clobber description, because the clean
2669 * solution with an output value for *pu64 makes gcc run out of registers.
2670 * This will cause suboptimal code, and anyone with a better solution is
2671 * welcome to improve this. */
2672 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2673 "lock; cmpxchg8b %3\n\t"
2674 "xchgl %%ebx, %1\n\t"
2675 : "=A" (u64Ret)
2676 : "DS" ((uint32_t)u64New),
2677 "c" ((uint32_t)(u64New >> 32)),
2678 "m" (*pu64),
2679 "0" (u64Old)
2680 : "memory" );
2681# else /* !PIC */
2682 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2683 : "=A" (u64Ret),
2684 "=m" (*pu64)
2685 : "b" ((uint32_t)u64New),
2686 "c" ((uint32_t)(u64New >> 32)),
2687 "m" (*pu64),
2688 "0" (u64Old));
2689# endif
2690 *pu64Old = u64Ret;
2691 return u64Ret == u64Old;
2692# else
2693 uint32_t u32Ret;
2694 __asm
2695 {
2696 mov ebx, dword ptr [u64New]
2697 mov ecx, dword ptr [u64New + 4]
2698 mov edi, [pu64]
2699 mov eax, dword ptr [u64Old]
2700 mov edx, dword ptr [u64Old + 4]
2701 lock cmpxchg8b [edi]
2702 mov ebx, [pu64Old]
2703 mov [ebx], eax
2704 setz al
2705 movzx eax, al
2706 add ebx, 4
2707 mov [ebx], edx
2708 mov dword ptr [u32Ret], eax
2709 }
2710 return !!u32Ret;
2711# endif
2712# endif /* !RT_ARCH_AMD64 */
2713}
2714#endif
2715
2716
2717/**
2718 * Atomically Compare and exchange a signed 64-bit value, additionally
2719 * passing back old value, ordered.
2720 *
2721 * @returns true if xchg was done.
2722 * @returns false if xchg wasn't done.
2723 *
2724 * @param pi64 Pointer to the 64-bit variable to update.
2725 * @param i64 The 64-bit value to assign to *pu64.
2726 * @param i64Old The value to compare with.
2727 * @param pi64Old Pointer store the old value at.
2728 */
2729DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2730{
2731 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2732}
2733
2734
2735/** @def ASMAtomicCmpXchgExSize
2736 * Atomically Compare and Exchange a value which size might differ
2737 * between platforms or compilers. Additionally passes back old value.
2738 *
2739 * @param pu Pointer to the value to update.
2740 * @param uNew The new value to assigned to *pu.
2741 * @param uOld The old value to *pu compare with.
2742 * @param fRc Where to store the result.
2743 * @param uOldVal Where to store the old value.
2744 */
2745#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2746 do { \
2747 switch (sizeof(*(pu))) { \
2748 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2749 break; \
2750 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2751 break; \
2752 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2753 (fRc) = false; \
2754 (uOldVal) = 0; \
2755 break; \
2756 } \
2757 } while (0)
2758
2759
2760/**
2761 * Atomically Compare and Exchange a pointer value, additionally
2762 * passing back old value, ordered.
2763 *
2764 * @returns true if xchg was done.
2765 * @returns false if xchg wasn't done.
2766 *
2767 * @param ppv Pointer to the value to update.
2768 * @param pvNew The new value to assigned to *ppv.
2769 * @param pvOld The old value to *ppv compare with.
2770 * @param ppvOld Pointer store the old value at.
2771 */
2772DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2773{
2774#if ARCH_BITS == 32
2775 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2776#elif ARCH_BITS == 64
2777 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2778#else
2779# error "ARCH_BITS is bogus"
2780#endif
2781}
2782
2783
2784/**
2785 * Atomically exchanges and adds to a 32-bit value, ordered.
2786 *
2787 * @returns The old value.
2788 * @param pu32 Pointer to the value.
2789 * @param u32 Number to add.
2790 */
2791#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2792DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2793#else
2794DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2795{
2796# if RT_INLINE_ASM_USES_INTRIN
2797 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2798 return u32;
2799
2800# elif RT_INLINE_ASM_GNU_STYLE
2801 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2802 : "=r" (u32),
2803 "=m" (*pu32)
2804 : "0" (u32)
2805 : "memory");
2806 return u32;
2807# else
2808 __asm
2809 {
2810 mov eax, [u32]
2811# ifdef RT_ARCH_AMD64
2812 mov rdx, [pu32]
2813 lock xadd [rdx], eax
2814# else
2815 mov edx, [pu32]
2816 lock xadd [edx], eax
2817# endif
2818 mov [u32], eax
2819 }
2820 return u32;
2821# endif
2822}
2823#endif
2824
2825
2826/**
2827 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2828 *
2829 * @returns The old value.
2830 * @param pi32 Pointer to the value.
2831 * @param i32 Number to add.
2832 */
2833DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2834{
2835 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2836}
2837
2838
2839/**
2840 * Atomically increment a 32-bit value, ordered.
2841 *
2842 * @returns The new value.
2843 * @param pu32 Pointer to the value to increment.
2844 */
2845#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2846DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2847#else
2848DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2849{
2850 uint32_t u32;
2851# if RT_INLINE_ASM_USES_INTRIN
2852 u32 = _InterlockedIncrement((long *)pu32);
2853 return u32;
2854
2855# elif RT_INLINE_ASM_GNU_STYLE
2856 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2857 : "=r" (u32),
2858 "=m" (*pu32)
2859 : "0" (1)
2860 : "memory");
2861 return u32+1;
2862# else
2863 __asm
2864 {
2865 mov eax, 1
2866# ifdef RT_ARCH_AMD64
2867 mov rdx, [pu32]
2868 lock xadd [rdx], eax
2869# else
2870 mov edx, [pu32]
2871 lock xadd [edx], eax
2872# endif
2873 mov u32, eax
2874 }
2875 return u32+1;
2876# endif
2877}
2878#endif
2879
2880
2881/**
2882 * Atomically increment a signed 32-bit value, ordered.
2883 *
2884 * @returns The new value.
2885 * @param pi32 Pointer to the value to increment.
2886 */
2887DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2888{
2889 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2890}
2891
2892
2893/**
2894 * Atomically decrement an unsigned 32-bit value, ordered.
2895 *
2896 * @returns The new value.
2897 * @param pu32 Pointer to the value to decrement.
2898 */
2899#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2900DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2901#else
2902DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2903{
2904 uint32_t u32;
2905# if RT_INLINE_ASM_USES_INTRIN
2906 u32 = _InterlockedDecrement((long *)pu32);
2907 return u32;
2908
2909# elif RT_INLINE_ASM_GNU_STYLE
2910 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2911 : "=r" (u32),
2912 "=m" (*pu32)
2913 : "0" (-1)
2914 : "memory");
2915 return u32-1;
2916# else
2917 __asm
2918 {
2919 mov eax, -1
2920# ifdef RT_ARCH_AMD64
2921 mov rdx, [pu32]
2922 lock xadd [rdx], eax
2923# else
2924 mov edx, [pu32]
2925 lock xadd [edx], eax
2926# endif
2927 mov u32, eax
2928 }
2929 return u32-1;
2930# endif
2931}
2932#endif
2933
2934
2935/**
2936 * Atomically decrement a signed 32-bit value, ordered.
2937 *
2938 * @returns The new value.
2939 * @param pi32 Pointer to the value to decrement.
2940 */
2941DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2942{
2943 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2944}
2945
2946
2947/**
2948 * Atomically Or an unsigned 32-bit value, ordered.
2949 *
2950 * @param pu32 Pointer to the pointer variable to OR u32 with.
2951 * @param u32 The value to OR *pu32 with.
2952 */
2953#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2954DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2955#else
2956DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2957{
2958# if RT_INLINE_ASM_USES_INTRIN
2959 _InterlockedOr((long volatile *)pu32, (long)u32);
2960
2961# elif RT_INLINE_ASM_GNU_STYLE
2962 __asm__ __volatile__("lock; orl %1, %0\n\t"
2963 : "=m" (*pu32)
2964 : "ir" (u32));
2965# else
2966 __asm
2967 {
2968 mov eax, [u32]
2969# ifdef RT_ARCH_AMD64
2970 mov rdx, [pu32]
2971 lock or [rdx], eax
2972# else
2973 mov edx, [pu32]
2974 lock or [edx], eax
2975# endif
2976 }
2977# endif
2978}
2979#endif
2980
2981
2982/**
2983 * Atomically Or a signed 32-bit value, ordered.
2984 *
2985 * @param pi32 Pointer to the pointer variable to OR u32 with.
2986 * @param i32 The value to OR *pu32 with.
2987 */
2988DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2989{
2990 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2991}
2992
2993
2994/**
2995 * Atomically And an unsigned 32-bit value, ordered.
2996 *
2997 * @param pu32 Pointer to the pointer variable to AND u32 with.
2998 * @param u32 The value to AND *pu32 with.
2999 */
3000#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3001DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3002#else
3003DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3004{
3005# if RT_INLINE_ASM_USES_INTRIN
3006 _InterlockedAnd((long volatile *)pu32, u32);
3007
3008# elif RT_INLINE_ASM_GNU_STYLE
3009 __asm__ __volatile__("lock; andl %1, %0\n\t"
3010 : "=m" (*pu32)
3011 : "ir" (u32));
3012# else
3013 __asm
3014 {
3015 mov eax, [u32]
3016# ifdef RT_ARCH_AMD64
3017 mov rdx, [pu32]
3018 lock and [rdx], eax
3019# else
3020 mov edx, [pu32]
3021 lock and [edx], eax
3022# endif
3023 }
3024# endif
3025}
3026#endif
3027
3028
3029/**
3030 * Atomically And a signed 32-bit value, ordered.
3031 *
3032 * @param pi32 Pointer to the pointer variable to AND i32 with.
3033 * @param i32 The value to AND *pi32 with.
3034 */
3035DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3036{
3037 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3038}
3039
3040
3041/**
3042 * Memory fence, waits for any pending writes and reads to complete.
3043 */
3044DECLINLINE(void) ASMMemoryFence(void)
3045{
3046 /** @todo use mfence? check if all cpus we care for support it. */
3047 uint32_t volatile u32;
3048 ASMAtomicXchgU32(&u32, 0);
3049}
3050
3051
3052/**
3053 * Write fence, waits for any pending writes to complete.
3054 */
3055DECLINLINE(void) ASMWriteFence(void)
3056{
3057 /** @todo use sfence? check if all cpus we care for support it. */
3058 ASMMemoryFence();
3059}
3060
3061
3062/**
3063 * Read fence, waits for any pending reads to complete.
3064 */
3065DECLINLINE(void) ASMReadFence(void)
3066{
3067 /** @todo use lfence? check if all cpus we care for support it. */
3068 ASMMemoryFence();
3069}
3070
3071
3072/**
3073 * Atomically reads an unsigned 8-bit value, ordered.
3074 *
3075 * @returns Current *pu8 value
3076 * @param pu8 Pointer to the 8-bit variable to read.
3077 */
3078DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
3079{
3080 ASMMemoryFence();
3081 return *pu8; /* byte reads are atomic on x86 */
3082}
3083
3084
3085/**
3086 * Atomically reads an unsigned 8-bit value, unordered.
3087 *
3088 * @returns Current *pu8 value
3089 * @param pu8 Pointer to the 8-bit variable to read.
3090 */
3091DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
3092{
3093 return *pu8; /* byte reads are atomic on x86 */
3094}
3095
3096
3097/**
3098 * Atomically reads a signed 8-bit value, ordered.
3099 *
3100 * @returns Current *pi8 value
3101 * @param pi8 Pointer to the 8-bit variable to read.
3102 */
3103DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
3104{
3105 ASMMemoryFence();
3106 return *pi8; /* byte reads are atomic on x86 */
3107}
3108
3109
3110/**
3111 * Atomically reads a signed 8-bit value, unordered.
3112 *
3113 * @returns Current *pi8 value
3114 * @param pi8 Pointer to the 8-bit variable to read.
3115 */
3116DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
3117{
3118 return *pi8; /* byte reads are atomic on x86 */
3119}
3120
3121
3122/**
3123 * Atomically reads an unsigned 16-bit value, ordered.
3124 *
3125 * @returns Current *pu16 value
3126 * @param pu16 Pointer to the 16-bit variable to read.
3127 */
3128DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3129{
3130 ASMMemoryFence();
3131 Assert(!((uintptr_t)pu16 & 1));
3132 return *pu16;
3133}
3134
3135
3136/**
3137 * Atomically reads an unsigned 16-bit value, unordered.
3138 *
3139 * @returns Current *pu16 value
3140 * @param pu16 Pointer to the 16-bit variable to read.
3141 */
3142DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3143{
3144 Assert(!((uintptr_t)pu16 & 1));
3145 return *pu16;
3146}
3147
3148
3149/**
3150 * Atomically reads a signed 16-bit value, ordered.
3151 *
3152 * @returns Current *pi16 value
3153 * @param pi16 Pointer to the 16-bit variable to read.
3154 */
3155DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3156{
3157 ASMMemoryFence();
3158 Assert(!((uintptr_t)pi16 & 1));
3159 return *pi16;
3160}
3161
3162
3163/**
3164 * Atomically reads a signed 16-bit value, unordered.
3165 *
3166 * @returns Current *pi16 value
3167 * @param pi16 Pointer to the 16-bit variable to read.
3168 */
3169DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3170{
3171 Assert(!((uintptr_t)pi16 & 1));
3172 return *pi16;
3173}
3174
3175
3176/**
3177 * Atomically reads an unsigned 32-bit value, ordered.
3178 *
3179 * @returns Current *pu32 value
3180 * @param pu32 Pointer to the 32-bit variable to read.
3181 */
3182DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3183{
3184 ASMMemoryFence();
3185 Assert(!((uintptr_t)pu32 & 3));
3186 return *pu32;
3187}
3188
3189
3190/**
3191 * Atomically reads an unsigned 32-bit value, unordered.
3192 *
3193 * @returns Current *pu32 value
3194 * @param pu32 Pointer to the 32-bit variable to read.
3195 */
3196DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3197{
3198 Assert(!((uintptr_t)pu32 & 3));
3199 return *pu32;
3200}
3201
3202
3203/**
3204 * Atomically reads a signed 32-bit value, ordered.
3205 *
3206 * @returns Current *pi32 value
3207 * @param pi32 Pointer to the 32-bit variable to read.
3208 */
3209DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3210{
3211 ASMMemoryFence();
3212 Assert(!((uintptr_t)pi32 & 3));
3213 return *pi32;
3214}
3215
3216
3217/**
3218 * Atomically reads a signed 32-bit value, unordered.
3219 *
3220 * @returns Current *pi32 value
3221 * @param pi32 Pointer to the 32-bit variable to read.
3222 */
3223DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3224{
3225 Assert(!((uintptr_t)pi32 & 3));
3226 return *pi32;
3227}
3228
3229
3230/**
3231 * Atomically reads an unsigned 64-bit value, ordered.
3232 *
3233 * @returns Current *pu64 value
3234 * @param pu64 Pointer to the 64-bit variable to read.
3235 * The memory pointed to must be writable.
3236 * @remark This will fault if the memory is read-only!
3237 */
3238#if RT_INLINE_ASM_EXTERNAL
3239DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3240#else
3241DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3242{
3243 uint64_t u64;
3244# ifdef RT_ARCH_AMD64
3245# if RT_INLINE_ASM_GNU_STYLE
3246 Assert(!((uintptr_t)pu64 & 7));
3247 __asm__ __volatile__( "mfence\n\t"
3248 "movq %1, %0\n\t"
3249 : "=r" (u64)
3250 : "m" (*pu64));
3251# else
3252 __asm
3253 {
3254 mfence
3255 mov rdx, [pu64]
3256 mov rax, [rdx]
3257 mov [u64], rax
3258 }
3259# endif
3260# else /* !RT_ARCH_AMD64 */
3261# if RT_INLINE_ASM_GNU_STYLE
3262# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3263 uint32_t u32EBX = 0;
3264 Assert(!((uintptr_t)pu64 & 7));
3265 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3266 "lock; cmpxchg8b (%5)\n\t"
3267 "xchgl %%ebx, %3\n\t"
3268 : "=A" (u64),
3269 "=m" (*pu64)
3270 : "0" (0),
3271 "m" (u32EBX),
3272 "c" (0),
3273 "S" (pu64));
3274# else /* !PIC */
3275 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3276 : "=A" (u64),
3277 "=m" (*pu64)
3278 : "0" (0),
3279 "b" (0),
3280 "c" (0));
3281# endif
3282# else
3283 Assert(!((uintptr_t)pu64 & 7));
3284 __asm
3285 {
3286 xor eax, eax
3287 xor edx, edx
3288 mov edi, pu64
3289 xor ecx, ecx
3290 xor ebx, ebx
3291 lock cmpxchg8b [edi]
3292 mov dword ptr [u64], eax
3293 mov dword ptr [u64 + 4], edx
3294 }
3295# endif
3296# endif /* !RT_ARCH_AMD64 */
3297 return u64;
3298}
3299#endif
3300
3301
3302/**
3303 * Atomically reads an unsigned 64-bit value, unordered.
3304 *
3305 * @returns Current *pu64 value
3306 * @param pu64 Pointer to the 64-bit variable to read.
3307 * The memory pointed to must be writable.
3308 * @remark This will fault if the memory is read-only!
3309 */
3310#if RT_INLINE_ASM_EXTERNAL
3311DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3312#else
3313DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3314{
3315 uint64_t u64;
3316# ifdef RT_ARCH_AMD64
3317# if RT_INLINE_ASM_GNU_STYLE
3318 Assert(!((uintptr_t)pu64 & 7));
3319 __asm__ __volatile__("movq %1, %0\n\t"
3320 : "=r" (u64)
3321 : "m" (*pu64));
3322# else
3323 __asm
3324 {
3325 mov rdx, [pu64]
3326 mov rax, [rdx]
3327 mov [u64], rax
3328 }
3329# endif
3330# else /* !RT_ARCH_AMD64 */
3331# if RT_INLINE_ASM_GNU_STYLE
3332# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3333 uint32_t u32EBX = 0;
3334 Assert(!((uintptr_t)pu64 & 7));
3335 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3336 "lock; cmpxchg8b (%5)\n\t"
3337 "xchgl %%ebx, %3\n\t"
3338 : "=A" (u64),
3339 "=m" (*pu64)
3340 : "0" (0),
3341 "m" (u32EBX),
3342 "c" (0),
3343 "S" (pu64));
3344# else /* !PIC */
3345 __asm__ __volatile__("cmpxchg8b %1\n\t"
3346 : "=A" (u64),
3347 "=m" (*pu64)
3348 : "0" (0),
3349 "b" (0),
3350 "c" (0));
3351# endif
3352# else
3353 Assert(!((uintptr_t)pu64 & 7));
3354 __asm
3355 {
3356 xor eax, eax
3357 xor edx, edx
3358 mov edi, pu64
3359 xor ecx, ecx
3360 xor ebx, ebx
3361 lock cmpxchg8b [edi]
3362 mov dword ptr [u64], eax
3363 mov dword ptr [u64 + 4], edx
3364 }
3365# endif
3366# endif /* !RT_ARCH_AMD64 */
3367 return u64;
3368}
3369#endif
3370
3371
3372/**
3373 * Atomically reads a signed 64-bit value, ordered.
3374 *
3375 * @returns Current *pi64 value
3376 * @param pi64 Pointer to the 64-bit variable to read.
3377 * The memory pointed to must be writable.
3378 * @remark This will fault if the memory is read-only!
3379 */
3380DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3381{
3382 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3383}
3384
3385
3386/**
3387 * Atomically reads a signed 64-bit value, unordered.
3388 *
3389 * @returns Current *pi64 value
3390 * @param pi64 Pointer to the 64-bit variable to read.
3391 * The memory pointed to must be writable.
3392 * @remark This will fault if the memory is read-only!
3393 */
3394DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3395{
3396 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3397}
3398
3399
3400/**
3401 * Atomically reads a pointer value, ordered.
3402 *
3403 * @returns Current *pv value
3404 * @param ppv Pointer to the pointer variable to read.
3405 */
3406DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3407{
3408#if ARCH_BITS == 32
3409 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3410#elif ARCH_BITS == 64
3411 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3412#else
3413# error "ARCH_BITS is bogus"
3414#endif
3415}
3416
3417
3418/**
3419 * Atomically reads a pointer value, unordered.
3420 *
3421 * @returns Current *pv value
3422 * @param ppv Pointer to the pointer variable to read.
3423 */
3424DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3425{
3426#if ARCH_BITS == 32
3427 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3428#elif ARCH_BITS == 64
3429 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3430#else
3431# error "ARCH_BITS is bogus"
3432#endif
3433}
3434
3435
3436/**
3437 * Atomically reads a boolean value, ordered.
3438 *
3439 * @returns Current *pf value
3440 * @param pf Pointer to the boolean variable to read.
3441 */
3442DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3443{
3444 ASMMemoryFence();
3445 return *pf; /* byte reads are atomic on x86 */
3446}
3447
3448
3449/**
3450 * Atomically reads a boolean value, unordered.
3451 *
3452 * @returns Current *pf value
3453 * @param pf Pointer to the boolean variable to read.
3454 */
3455DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3456{
3457 return *pf; /* byte reads are atomic on x86 */
3458}
3459
3460
3461/**
3462 * Atomically read a value which size might differ
3463 * between platforms or compilers, ordered.
3464 *
3465 * @param pu Pointer to the variable to update.
3466 * @param puRes Where to store the result.
3467 */
3468#define ASMAtomicReadSize(pu, puRes) \
3469 do { \
3470 switch (sizeof(*(pu))) { \
3471 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3472 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3473 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3474 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3475 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3476 } \
3477 } while (0)
3478
3479
3480/**
3481 * Atomically read a value which size might differ
3482 * between platforms or compilers, unordered.
3483 *
3484 * @param pu Pointer to the variable to update.
3485 * @param puRes Where to store the result.
3486 */
3487#define ASMAtomicUoReadSize(pu, puRes) \
3488 do { \
3489 switch (sizeof(*(pu))) { \
3490 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3491 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3492 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3493 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3494 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3495 } \
3496 } while (0)
3497
3498
3499/**
3500 * Atomically writes an unsigned 8-bit value, ordered.
3501 *
3502 * @param pu8 Pointer to the 8-bit variable.
3503 * @param u8 The 8-bit value to assign to *pu8.
3504 */
3505DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3506{
3507 ASMAtomicXchgU8(pu8, u8);
3508}
3509
3510
3511/**
3512 * Atomically writes an unsigned 8-bit value, unordered.
3513 *
3514 * @param pu8 Pointer to the 8-bit variable.
3515 * @param u8 The 8-bit value to assign to *pu8.
3516 */
3517DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3518{
3519 *pu8 = u8; /* byte writes are atomic on x86 */
3520}
3521
3522
3523/**
3524 * Atomically writes a signed 8-bit value, ordered.
3525 *
3526 * @param pi8 Pointer to the 8-bit variable to read.
3527 * @param i8 The 8-bit value to assign to *pi8.
3528 */
3529DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3530{
3531 ASMAtomicXchgS8(pi8, i8);
3532}
3533
3534
3535/**
3536 * Atomically writes a signed 8-bit value, unordered.
3537 *
3538 * @param pi8 Pointer to the 8-bit variable to read.
3539 * @param i8 The 8-bit value to assign to *pi8.
3540 */
3541DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3542{
3543 *pi8 = i8; /* byte writes are atomic on x86 */
3544}
3545
3546
3547/**
3548 * Atomically writes an unsigned 16-bit value, ordered.
3549 *
3550 * @param pu16 Pointer to the 16-bit variable.
3551 * @param u16 The 16-bit value to assign to *pu16.
3552 */
3553DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3554{
3555 ASMAtomicXchgU16(pu16, u16);
3556}
3557
3558
3559/**
3560 * Atomically writes an unsigned 16-bit value, unordered.
3561 *
3562 * @param pu16 Pointer to the 16-bit variable.
3563 * @param u16 The 16-bit value to assign to *pu16.
3564 */
3565DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3566{
3567 Assert(!((uintptr_t)pu16 & 1));
3568 *pu16 = u16;
3569}
3570
3571
3572/**
3573 * Atomically writes a signed 16-bit value, ordered.
3574 *
3575 * @param pi16 Pointer to the 16-bit variable to read.
3576 * @param i16 The 16-bit value to assign to *pi16.
3577 */
3578DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3579{
3580 ASMAtomicXchgS16(pi16, i16);
3581}
3582
3583
3584/**
3585 * Atomically writes a signed 16-bit value, unordered.
3586 *
3587 * @param pi16 Pointer to the 16-bit variable to read.
3588 * @param i16 The 16-bit value to assign to *pi16.
3589 */
3590DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3591{
3592 Assert(!((uintptr_t)pi16 & 1));
3593 *pi16 = i16;
3594}
3595
3596
3597/**
3598 * Atomically writes an unsigned 32-bit value, ordered.
3599 *
3600 * @param pu32 Pointer to the 32-bit variable.
3601 * @param u32 The 32-bit value to assign to *pu32.
3602 */
3603DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3604{
3605 ASMAtomicXchgU32(pu32, u32);
3606}
3607
3608
3609/**
3610 * Atomically writes an unsigned 32-bit value, unordered.
3611 *
3612 * @param pu32 Pointer to the 32-bit variable.
3613 * @param u32 The 32-bit value to assign to *pu32.
3614 */
3615DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3616{
3617 Assert(!((uintptr_t)pu32 & 3));
3618 *pu32 = u32;
3619}
3620
3621
3622/**
3623 * Atomically writes a signed 32-bit value, ordered.
3624 *
3625 * @param pi32 Pointer to the 32-bit variable to read.
3626 * @param i32 The 32-bit value to assign to *pi32.
3627 */
3628DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3629{
3630 ASMAtomicXchgS32(pi32, i32);
3631}
3632
3633
3634/**
3635 * Atomically writes a signed 32-bit value, unordered.
3636 *
3637 * @param pi32 Pointer to the 32-bit variable to read.
3638 * @param i32 The 32-bit value to assign to *pi32.
3639 */
3640DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3641{
3642 Assert(!((uintptr_t)pi32 & 3));
3643 *pi32 = i32;
3644}
3645
3646
3647/**
3648 * Atomically writes an unsigned 64-bit value, ordered.
3649 *
3650 * @param pu64 Pointer to the 64-bit variable.
3651 * @param u64 The 64-bit value to assign to *pu64.
3652 */
3653DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3654{
3655 ASMAtomicXchgU64(pu64, u64);
3656}
3657
3658
3659/**
3660 * Atomically writes an unsigned 64-bit value, unordered.
3661 *
3662 * @param pu64 Pointer to the 64-bit variable.
3663 * @param u64 The 64-bit value to assign to *pu64.
3664 */
3665DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3666{
3667 Assert(!((uintptr_t)pu64 & 7));
3668#if ARCH_BITS == 64
3669 *pu64 = u64;
3670#else
3671 ASMAtomicXchgU64(pu64, u64);
3672#endif
3673}
3674
3675
3676/**
3677 * Atomically writes a signed 64-bit value, ordered.
3678 *
3679 * @param pi64 Pointer to the 64-bit variable.
3680 * @param i64 The 64-bit value to assign to *pi64.
3681 */
3682DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3683{
3684 ASMAtomicXchgS64(pi64, i64);
3685}
3686
3687
3688/**
3689 * Atomically writes a signed 64-bit value, unordered.
3690 *
3691 * @param pi64 Pointer to the 64-bit variable.
3692 * @param i64 The 64-bit value to assign to *pi64.
3693 */
3694DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3695{
3696 Assert(!((uintptr_t)pi64 & 7));
3697#if ARCH_BITS == 64
3698 *pi64 = i64;
3699#else
3700 ASMAtomicXchgS64(pi64, i64);
3701#endif
3702}
3703
3704
3705/**
3706 * Atomically writes a boolean value, unordered.
3707 *
3708 * @param pf Pointer to the boolean variable.
3709 * @param f The boolean value to assign to *pf.
3710 */
3711DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3712{
3713 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3714}
3715
3716
3717/**
3718 * Atomically writes a boolean value, unordered.
3719 *
3720 * @param pf Pointer to the boolean variable.
3721 * @param f The boolean value to assign to *pf.
3722 */
3723DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3724{
3725 *pf = f; /* byte writes are atomic on x86 */
3726}
3727
3728
3729/**
3730 * Atomically writes a pointer value, ordered.
3731 *
3732 * @returns Current *pv value
3733 * @param ppv Pointer to the pointer variable.
3734 * @param pv The pointer value to assigne to *ppv.
3735 */
3736DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3737{
3738#if ARCH_BITS == 32
3739 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3740#elif ARCH_BITS == 64
3741 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3742#else
3743# error "ARCH_BITS is bogus"
3744#endif
3745}
3746
3747
3748/**
3749 * Atomically writes a pointer value, unordered.
3750 *
3751 * @returns Current *pv value
3752 * @param ppv Pointer to the pointer variable.
3753 * @param pv The pointer value to assigne to *ppv.
3754 */
3755DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
3756{
3757#if ARCH_BITS == 32
3758 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3759#elif ARCH_BITS == 64
3760 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3761#else
3762# error "ARCH_BITS is bogus"
3763#endif
3764}
3765
3766
3767/**
3768 * Atomically write a value which size might differ
3769 * between platforms or compilers, ordered.
3770 *
3771 * @param pu Pointer to the variable to update.
3772 * @param uNew The value to assign to *pu.
3773 */
3774#define ASMAtomicWriteSize(pu, uNew) \
3775 do { \
3776 switch (sizeof(*(pu))) { \
3777 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3778 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3779 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3780 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3781 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3782 } \
3783 } while (0)
3784
3785/**
3786 * Atomically write a value which size might differ
3787 * between platforms or compilers, unordered.
3788 *
3789 * @param pu Pointer to the variable to update.
3790 * @param uNew The value to assign to *pu.
3791 */
3792#define ASMAtomicUoWriteSize(pu, uNew) \
3793 do { \
3794 switch (sizeof(*(pu))) { \
3795 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3796 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3797 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3798 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3799 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3800 } \
3801 } while (0)
3802
3803
3804
3805
3806/**
3807 * Invalidate page.
3808 *
3809 * @param pv Address of the page to invalidate.
3810 */
3811#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3812DECLASM(void) ASMInvalidatePage(void *pv);
3813#else
3814DECLINLINE(void) ASMInvalidatePage(void *pv)
3815{
3816# if RT_INLINE_ASM_USES_INTRIN
3817 __invlpg(pv);
3818
3819# elif RT_INLINE_ASM_GNU_STYLE
3820 __asm__ __volatile__("invlpg %0\n\t"
3821 : : "m" (*(uint8_t *)pv));
3822# else
3823 __asm
3824 {
3825# ifdef RT_ARCH_AMD64
3826 mov rax, [pv]
3827 invlpg [rax]
3828# else
3829 mov eax, [pv]
3830 invlpg [eax]
3831# endif
3832 }
3833# endif
3834}
3835#endif
3836
3837
3838#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3839# if PAGE_SIZE != 0x1000
3840# error "PAGE_SIZE is not 0x1000!"
3841# endif
3842#endif
3843
3844/**
3845 * Zeros a 4K memory page.
3846 *
3847 * @param pv Pointer to the memory block. This must be page aligned.
3848 */
3849#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3850DECLASM(void) ASMMemZeroPage(volatile void *pv);
3851# else
3852DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3853{
3854# if RT_INLINE_ASM_USES_INTRIN
3855# ifdef RT_ARCH_AMD64
3856 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
3857# else
3858 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
3859# endif
3860
3861# elif RT_INLINE_ASM_GNU_STYLE
3862 RTUINTREG uDummy;
3863# ifdef RT_ARCH_AMD64
3864 __asm__ __volatile__ ("rep stosq"
3865 : "=D" (pv),
3866 "=c" (uDummy)
3867 : "0" (pv),
3868 "c" (0x1000 >> 3),
3869 "a" (0)
3870 : "memory");
3871# else
3872 __asm__ __volatile__ ("rep stosl"
3873 : "=D" (pv),
3874 "=c" (uDummy)
3875 : "0" (pv),
3876 "c" (0x1000 >> 2),
3877 "a" (0)
3878 : "memory");
3879# endif
3880# else
3881 __asm
3882 {
3883# ifdef RT_ARCH_AMD64
3884 xor rax, rax
3885 mov ecx, 0200h
3886 mov rdi, [pv]
3887 rep stosq
3888# else
3889 xor eax, eax
3890 mov ecx, 0400h
3891 mov edi, [pv]
3892 rep stosd
3893# endif
3894 }
3895# endif
3896}
3897# endif
3898
3899
3900/**
3901 * Zeros a memory block with a 32-bit aligned size.
3902 *
3903 * @param pv Pointer to the memory block.
3904 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3905 */
3906#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3907DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3908#else
3909DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3910{
3911# if RT_INLINE_ASM_USES_INTRIN
3912 __stosd((unsigned long *)pv, 0, cb >> 2);
3913
3914# elif RT_INLINE_ASM_GNU_STYLE
3915 __asm__ __volatile__ ("rep stosl"
3916 : "=D" (pv),
3917 "=c" (cb)
3918 : "0" (pv),
3919 "1" (cb >> 2),
3920 "a" (0)
3921 : "memory");
3922# else
3923 __asm
3924 {
3925 xor eax, eax
3926# ifdef RT_ARCH_AMD64
3927 mov rcx, [cb]
3928 shr rcx, 2
3929 mov rdi, [pv]
3930# else
3931 mov ecx, [cb]
3932 shr ecx, 2
3933 mov edi, [pv]
3934# endif
3935 rep stosd
3936 }
3937# endif
3938}
3939#endif
3940
3941
3942/**
3943 * Fills a memory block with a 32-bit aligned size.
3944 *
3945 * @param pv Pointer to the memory block.
3946 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3947 * @param u32 The value to fill with.
3948 */
3949#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3950DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3951#else
3952DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3953{
3954# if RT_INLINE_ASM_USES_INTRIN
3955 __stosd((unsigned long *)pv, 0, cb >> 2);
3956
3957# elif RT_INLINE_ASM_GNU_STYLE
3958 __asm__ __volatile__ ("rep stosl"
3959 : "=D" (pv),
3960 "=c" (cb)
3961 : "0" (pv),
3962 "1" (cb >> 2),
3963 "a" (u32)
3964 : "memory");
3965# else
3966 __asm
3967 {
3968# ifdef RT_ARCH_AMD64
3969 mov rcx, [cb]
3970 shr rcx, 2
3971 mov rdi, [pv]
3972# else
3973 mov ecx, [cb]
3974 shr ecx, 2
3975 mov edi, [pv]
3976# endif
3977 mov eax, [u32]
3978 rep stosd
3979 }
3980# endif
3981}
3982#endif
3983
3984
3985/**
3986 * Checks if a memory block is filled with the specified byte.
3987 *
3988 * This is a sort of inverted memchr.
3989 *
3990 * @returns Pointer to the byte which doesn't equal u8.
3991 * @returns NULL if all equal to u8.
3992 *
3993 * @param pv Pointer to the memory block.
3994 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3995 * @param u8 The value it's supposed to be filled with.
3996 */
3997#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3998DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
3999#else
4000DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
4001{
4002/** @todo rewrite this in inline assembly? */
4003 uint8_t const *pb = (uint8_t const *)pv;
4004 for (; cb; cb--, pb++)
4005 if (RT_UNLIKELY(*pb != u8))
4006 return (void *)pb;
4007 return NULL;
4008}
4009#endif
4010
4011
4012/**
4013 * Checks if a memory block is filled with the specified 32-bit value.
4014 *
4015 * This is a sort of inverted memchr.
4016 *
4017 * @returns Pointer to the first value which doesn't equal u32.
4018 * @returns NULL if all equal to u32.
4019 *
4020 * @param pv Pointer to the memory block.
4021 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4022 * @param u32 The value it's supposed to be filled with.
4023 */
4024#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4025DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
4026#else
4027DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
4028{
4029/** @todo rewrite this in inline assembly? */
4030 uint32_t const *pu32 = (uint32_t const *)pv;
4031 for (; cb; cb -= 4, pu32++)
4032 if (RT_UNLIKELY(*pu32 != u32))
4033 return (uint32_t *)pu32;
4034 return NULL;
4035}
4036#endif
4037
4038
4039/**
4040 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
4041 *
4042 * @returns u32F1 * u32F2.
4043 */
4044#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4045DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
4046#else
4047DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
4048{
4049# ifdef RT_ARCH_AMD64
4050 return (uint64_t)u32F1 * u32F2;
4051# else /* !RT_ARCH_AMD64 */
4052 uint64_t u64;
4053# if RT_INLINE_ASM_GNU_STYLE
4054 __asm__ __volatile__("mull %%edx"
4055 : "=A" (u64)
4056 : "a" (u32F2), "d" (u32F1));
4057# else
4058 __asm
4059 {
4060 mov edx, [u32F1]
4061 mov eax, [u32F2]
4062 mul edx
4063 mov dword ptr [u64], eax
4064 mov dword ptr [u64 + 4], edx
4065 }
4066# endif
4067 return u64;
4068# endif /* !RT_ARCH_AMD64 */
4069}
4070#endif
4071
4072
4073/**
4074 * Multiplies two signed 32-bit values returning a signed 64-bit result.
4075 *
4076 * @returns u32F1 * u32F2.
4077 */
4078#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4079DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
4080#else
4081DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
4082{
4083# ifdef RT_ARCH_AMD64
4084 return (int64_t)i32F1 * i32F2;
4085# else /* !RT_ARCH_AMD64 */
4086 int64_t i64;
4087# if RT_INLINE_ASM_GNU_STYLE
4088 __asm__ __volatile__("imull %%edx"
4089 : "=A" (i64)
4090 : "a" (i32F2), "d" (i32F1));
4091# else
4092 __asm
4093 {
4094 mov edx, [i32F1]
4095 mov eax, [i32F2]
4096 imul edx
4097 mov dword ptr [i64], eax
4098 mov dword ptr [i64 + 4], edx
4099 }
4100# endif
4101 return i64;
4102# endif /* !RT_ARCH_AMD64 */
4103}
4104#endif
4105
4106
4107/**
4108 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
4109 *
4110 * @returns u64 / u32.
4111 */
4112#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4113DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
4114#else
4115DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
4116{
4117# ifdef RT_ARCH_AMD64
4118 return (uint32_t)(u64 / u32);
4119# else /* !RT_ARCH_AMD64 */
4120# if RT_INLINE_ASM_GNU_STYLE
4121 RTUINTREG uDummy;
4122 __asm__ __volatile__("divl %3"
4123 : "=a" (u32), "=d"(uDummy)
4124 : "A" (u64), "r" (u32));
4125# else
4126 __asm
4127 {
4128 mov eax, dword ptr [u64]
4129 mov edx, dword ptr [u64 + 4]
4130 mov ecx, [u32]
4131 div ecx
4132 mov [u32], eax
4133 }
4134# endif
4135 return u32;
4136# endif /* !RT_ARCH_AMD64 */
4137}
4138#endif
4139
4140
4141/**
4142 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4143 *
4144 * @returns u64 / u32.
4145 */
4146#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4147DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4148#else
4149DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4150{
4151# ifdef RT_ARCH_AMD64
4152 return (int32_t)(i64 / i32);
4153# else /* !RT_ARCH_AMD64 */
4154# if RT_INLINE_ASM_GNU_STYLE
4155 RTUINTREG iDummy;
4156 __asm__ __volatile__("idivl %3"
4157 : "=a" (i32), "=d"(iDummy)
4158 : "A" (i64), "r" (i32));
4159# else
4160 __asm
4161 {
4162 mov eax, dword ptr [i64]
4163 mov edx, dword ptr [i64 + 4]
4164 mov ecx, [i32]
4165 idiv ecx
4166 mov [i32], eax
4167 }
4168# endif
4169 return i32;
4170# endif /* !RT_ARCH_AMD64 */
4171}
4172#endif
4173
4174
4175/**
4176 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4177 * using a 96 bit intermediate result.
4178 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4179 * __udivdi3 and __umoddi3 even if this inline function is not used.
4180 *
4181 * @returns (u64A * u32B) / u32C.
4182 * @param u64A The 64-bit value.
4183 * @param u32B The 32-bit value to multiple by A.
4184 * @param u32C The 32-bit value to divide A*B by.
4185 */
4186#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4187DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4188#else
4189DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4190{
4191# if RT_INLINE_ASM_GNU_STYLE
4192# ifdef RT_ARCH_AMD64
4193 uint64_t u64Result, u64Spill;
4194 __asm__ __volatile__("mulq %2\n\t"
4195 "divq %3\n\t"
4196 : "=a" (u64Result),
4197 "=d" (u64Spill)
4198 : "r" ((uint64_t)u32B),
4199 "r" ((uint64_t)u32C),
4200 "0" (u64A),
4201 "1" (0));
4202 return u64Result;
4203# else
4204 uint32_t u32Dummy;
4205 uint64_t u64Result;
4206 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4207 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4208 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4209 eax = u64A.hi */
4210 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4211 edx = u32C */
4212 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4213 edx = u32B */
4214 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4215 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4216 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4217 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4218 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4219 edx = u64Hi % u32C */
4220 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4221 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4222 "divl %%ecx \n\t" /* u64Result.lo */
4223 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4224 : "=A"(u64Result), "=c"(u32Dummy),
4225 "=S"(u32Dummy), "=D"(u32Dummy)
4226 : "a"((uint32_t)u64A),
4227 "S"((uint32_t)(u64A >> 32)),
4228 "c"(u32B),
4229 "D"(u32C));
4230 return u64Result;
4231# endif
4232# else
4233 RTUINT64U u;
4234 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4235 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4236 u64Hi += (u64Lo >> 32);
4237 u.s.Hi = (uint32_t)(u64Hi / u32C);
4238 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4239 return u.u;
4240# endif
4241}
4242#endif
4243
4244
4245/**
4246 * Probes a byte pointer for read access.
4247 *
4248 * While the function will not fault if the byte is not read accessible,
4249 * the idea is to do this in a safe place like before acquiring locks
4250 * and such like.
4251 *
4252 * Also, this functions guarantees that an eager compiler is not going
4253 * to optimize the probing away.
4254 *
4255 * @param pvByte Pointer to the byte.
4256 */
4257#if RT_INLINE_ASM_EXTERNAL
4258DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4259#else
4260DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4261{
4262 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4263 uint8_t u8;
4264# if RT_INLINE_ASM_GNU_STYLE
4265 __asm__ __volatile__("movb (%1), %0\n\t"
4266 : "=r" (u8)
4267 : "r" (pvByte));
4268# else
4269 __asm
4270 {
4271# ifdef RT_ARCH_AMD64
4272 mov rax, [pvByte]
4273 mov al, [rax]
4274# else
4275 mov eax, [pvByte]
4276 mov al, [eax]
4277# endif
4278 mov [u8], al
4279 }
4280# endif
4281 return u8;
4282}
4283#endif
4284
4285/**
4286 * Probes a buffer for read access page by page.
4287 *
4288 * While the function will fault if the buffer is not fully read
4289 * accessible, the idea is to do this in a safe place like before
4290 * acquiring locks and such like.
4291 *
4292 * Also, this functions guarantees that an eager compiler is not going
4293 * to optimize the probing away.
4294 *
4295 * @param pvBuf Pointer to the buffer.
4296 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4297 */
4298DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4299{
4300 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4301 /* the first byte */
4302 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4303 ASMProbeReadByte(pu8);
4304
4305 /* the pages in between pages. */
4306 while (cbBuf > /*PAGE_SIZE*/0x1000)
4307 {
4308 ASMProbeReadByte(pu8);
4309 cbBuf -= /*PAGE_SIZE*/0x1000;
4310 pu8 += /*PAGE_SIZE*/0x1000;
4311 }
4312
4313 /* the last byte */
4314 ASMProbeReadByte(pu8 + cbBuf - 1);
4315}
4316
4317
4318/** @def ASMBreakpoint
4319 * Debugger Breakpoint.
4320 * @remark In the gnu world we add a nop instruction after the int3 to
4321 * force gdb to remain at the int3 source line.
4322 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4323 * @internal
4324 */
4325#if RT_INLINE_ASM_GNU_STYLE
4326# ifndef __L4ENV__
4327# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4328# else
4329# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4330# endif
4331#else
4332# define ASMBreakpoint() __debugbreak()
4333#endif
4334
4335
4336
4337/** @defgroup grp_inline_bits Bit Operations
4338 * @{
4339 */
4340
4341
4342/**
4343 * Sets a bit in a bitmap.
4344 *
4345 * @param pvBitmap Pointer to the bitmap.
4346 * @param iBit The bit to set.
4347 */
4348#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4349DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4350#else
4351DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4352{
4353# if RT_INLINE_ASM_USES_INTRIN
4354 _bittestandset((long *)pvBitmap, iBit);
4355
4356# elif RT_INLINE_ASM_GNU_STYLE
4357 __asm__ __volatile__ ("btsl %1, %0"
4358 : "=m" (*(volatile long *)pvBitmap)
4359 : "Ir" (iBit)
4360 : "memory");
4361# else
4362 __asm
4363 {
4364# ifdef RT_ARCH_AMD64
4365 mov rax, [pvBitmap]
4366 mov edx, [iBit]
4367 bts [rax], edx
4368# else
4369 mov eax, [pvBitmap]
4370 mov edx, [iBit]
4371 bts [eax], edx
4372# endif
4373 }
4374# endif
4375}
4376#endif
4377
4378
4379/**
4380 * Atomically sets a bit in a bitmap, ordered.
4381 *
4382 * @param pvBitmap Pointer to the bitmap.
4383 * @param iBit The bit to set.
4384 */
4385#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4386DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4387#else
4388DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4389{
4390# if RT_INLINE_ASM_USES_INTRIN
4391 _interlockedbittestandset((long *)pvBitmap, iBit);
4392# elif RT_INLINE_ASM_GNU_STYLE
4393 __asm__ __volatile__ ("lock; btsl %1, %0"
4394 : "=m" (*(volatile long *)pvBitmap)
4395 : "Ir" (iBit)
4396 : "memory");
4397# else
4398 __asm
4399 {
4400# ifdef RT_ARCH_AMD64
4401 mov rax, [pvBitmap]
4402 mov edx, [iBit]
4403 lock bts [rax], edx
4404# else
4405 mov eax, [pvBitmap]
4406 mov edx, [iBit]
4407 lock bts [eax], edx
4408# endif
4409 }
4410# endif
4411}
4412#endif
4413
4414
4415/**
4416 * Clears a bit in a bitmap.
4417 *
4418 * @param pvBitmap Pointer to the bitmap.
4419 * @param iBit The bit to clear.
4420 */
4421#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4422DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4423#else
4424DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4425{
4426# if RT_INLINE_ASM_USES_INTRIN
4427 _bittestandreset((long *)pvBitmap, iBit);
4428
4429# elif RT_INLINE_ASM_GNU_STYLE
4430 __asm__ __volatile__ ("btrl %1, %0"
4431 : "=m" (*(volatile long *)pvBitmap)
4432 : "Ir" (iBit)
4433 : "memory");
4434# else
4435 __asm
4436 {
4437# ifdef RT_ARCH_AMD64
4438 mov rax, [pvBitmap]
4439 mov edx, [iBit]
4440 btr [rax], edx
4441# else
4442 mov eax, [pvBitmap]
4443 mov edx, [iBit]
4444 btr [eax], edx
4445# endif
4446 }
4447# endif
4448}
4449#endif
4450
4451
4452/**
4453 * Atomically clears a bit in a bitmap, ordered.
4454 *
4455 * @param pvBitmap Pointer to the bitmap.
4456 * @param iBit The bit to toggle set.
4457 * @remark No memory barrier, take care on smp.
4458 */
4459#if RT_INLINE_ASM_EXTERNAL
4460DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4461#else
4462DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4463{
4464# if RT_INLINE_ASM_GNU_STYLE
4465 __asm__ __volatile__ ("lock; btrl %1, %0"
4466 : "=m" (*(volatile long *)pvBitmap)
4467 : "Ir" (iBit)
4468 : "memory");
4469# else
4470 __asm
4471 {
4472# ifdef RT_ARCH_AMD64
4473 mov rax, [pvBitmap]
4474 mov edx, [iBit]
4475 lock btr [rax], edx
4476# else
4477 mov eax, [pvBitmap]
4478 mov edx, [iBit]
4479 lock btr [eax], edx
4480# endif
4481 }
4482# endif
4483}
4484#endif
4485
4486
4487/**
4488 * Toggles a bit in a bitmap.
4489 *
4490 * @param pvBitmap Pointer to the bitmap.
4491 * @param iBit The bit to toggle.
4492 */
4493#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4494DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4495#else
4496DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4497{
4498# if RT_INLINE_ASM_USES_INTRIN
4499 _bittestandcomplement((long *)pvBitmap, iBit);
4500# elif RT_INLINE_ASM_GNU_STYLE
4501 __asm__ __volatile__ ("btcl %1, %0"
4502 : "=m" (*(volatile long *)pvBitmap)
4503 : "Ir" (iBit)
4504 : "memory");
4505# else
4506 __asm
4507 {
4508# ifdef RT_ARCH_AMD64
4509 mov rax, [pvBitmap]
4510 mov edx, [iBit]
4511 btc [rax], edx
4512# else
4513 mov eax, [pvBitmap]
4514 mov edx, [iBit]
4515 btc [eax], edx
4516# endif
4517 }
4518# endif
4519}
4520#endif
4521
4522
4523/**
4524 * Atomically toggles a bit in a bitmap, ordered.
4525 *
4526 * @param pvBitmap Pointer to the bitmap.
4527 * @param iBit The bit to test and set.
4528 */
4529#if RT_INLINE_ASM_EXTERNAL
4530DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4531#else
4532DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4533{
4534# if RT_INLINE_ASM_GNU_STYLE
4535 __asm__ __volatile__ ("lock; btcl %1, %0"
4536 : "=m" (*(volatile long *)pvBitmap)
4537 : "Ir" (iBit)
4538 : "memory");
4539# else
4540 __asm
4541 {
4542# ifdef RT_ARCH_AMD64
4543 mov rax, [pvBitmap]
4544 mov edx, [iBit]
4545 lock btc [rax], edx
4546# else
4547 mov eax, [pvBitmap]
4548 mov edx, [iBit]
4549 lock btc [eax], edx
4550# endif
4551 }
4552# endif
4553}
4554#endif
4555
4556
4557/**
4558 * Tests and sets a bit in a bitmap.
4559 *
4560 * @returns true if the bit was set.
4561 * @returns false if the bit was clear.
4562 * @param pvBitmap Pointer to the bitmap.
4563 * @param iBit The bit to test and set.
4564 */
4565#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4566DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4567#else
4568DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4569{
4570 union { bool f; uint32_t u32; uint8_t u8; } rc;
4571# if RT_INLINE_ASM_USES_INTRIN
4572 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4573
4574# elif RT_INLINE_ASM_GNU_STYLE
4575 __asm__ __volatile__ ("btsl %2, %1\n\t"
4576 "setc %b0\n\t"
4577 "andl $1, %0\n\t"
4578 : "=q" (rc.u32),
4579 "=m" (*(volatile long *)pvBitmap)
4580 : "Ir" (iBit)
4581 : "memory");
4582# else
4583 __asm
4584 {
4585 mov edx, [iBit]
4586# ifdef RT_ARCH_AMD64
4587 mov rax, [pvBitmap]
4588 bts [rax], edx
4589# else
4590 mov eax, [pvBitmap]
4591 bts [eax], edx
4592# endif
4593 setc al
4594 and eax, 1
4595 mov [rc.u32], eax
4596 }
4597# endif
4598 return rc.f;
4599}
4600#endif
4601
4602
4603/**
4604 * Atomically tests and sets a bit in a bitmap, ordered.
4605 *
4606 * @returns true if the bit was set.
4607 * @returns false if the bit was clear.
4608 * @param pvBitmap Pointer to the bitmap.
4609 * @param iBit The bit to set.
4610 */
4611#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4612DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4613#else
4614DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4615{
4616 union { bool f; uint32_t u32; uint8_t u8; } rc;
4617# if RT_INLINE_ASM_USES_INTRIN
4618 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4619# elif RT_INLINE_ASM_GNU_STYLE
4620 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4621 "setc %b0\n\t"
4622 "andl $1, %0\n\t"
4623 : "=q" (rc.u32),
4624 "=m" (*(volatile long *)pvBitmap)
4625 : "Ir" (iBit)
4626 : "memory");
4627# else
4628 __asm
4629 {
4630 mov edx, [iBit]
4631# ifdef RT_ARCH_AMD64
4632 mov rax, [pvBitmap]
4633 lock bts [rax], edx
4634# else
4635 mov eax, [pvBitmap]
4636 lock bts [eax], edx
4637# endif
4638 setc al
4639 and eax, 1
4640 mov [rc.u32], eax
4641 }
4642# endif
4643 return rc.f;
4644}
4645#endif
4646
4647
4648/**
4649 * Tests and clears a bit in a bitmap.
4650 *
4651 * @returns true if the bit was set.
4652 * @returns false if the bit was clear.
4653 * @param pvBitmap Pointer to the bitmap.
4654 * @param iBit The bit to test and clear.
4655 */
4656#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4657DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4658#else
4659DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4660{
4661 union { bool f; uint32_t u32; uint8_t u8; } rc;
4662# if RT_INLINE_ASM_USES_INTRIN
4663 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4664
4665# elif RT_INLINE_ASM_GNU_STYLE
4666 __asm__ __volatile__ ("btrl %2, %1\n\t"
4667 "setc %b0\n\t"
4668 "andl $1, %0\n\t"
4669 : "=q" (rc.u32),
4670 "=m" (*(volatile long *)pvBitmap)
4671 : "Ir" (iBit)
4672 : "memory");
4673# else
4674 __asm
4675 {
4676 mov edx, [iBit]
4677# ifdef RT_ARCH_AMD64
4678 mov rax, [pvBitmap]
4679 btr [rax], edx
4680# else
4681 mov eax, [pvBitmap]
4682 btr [eax], edx
4683# endif
4684 setc al
4685 and eax, 1
4686 mov [rc.u32], eax
4687 }
4688# endif
4689 return rc.f;
4690}
4691#endif
4692
4693
4694/**
4695 * Atomically tests and clears a bit in a bitmap, ordered.
4696 *
4697 * @returns true if the bit was set.
4698 * @returns false if the bit was clear.
4699 * @param pvBitmap Pointer to the bitmap.
4700 * @param iBit The bit to test and clear.
4701 * @remark No memory barrier, take care on smp.
4702 */
4703#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4704DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4705#else
4706DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4707{
4708 union { bool f; uint32_t u32; uint8_t u8; } rc;
4709# if RT_INLINE_ASM_USES_INTRIN
4710 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4711
4712# elif RT_INLINE_ASM_GNU_STYLE
4713 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
4714 "setc %b0\n\t"
4715 "andl $1, %0\n\t"
4716 : "=q" (rc.u32),
4717 "=m" (*(volatile long *)pvBitmap)
4718 : "Ir" (iBit)
4719 : "memory");
4720# else
4721 __asm
4722 {
4723 mov edx, [iBit]
4724# ifdef RT_ARCH_AMD64
4725 mov rax, [pvBitmap]
4726 lock btr [rax], edx
4727# else
4728 mov eax, [pvBitmap]
4729 lock btr [eax], edx
4730# endif
4731 setc al
4732 and eax, 1
4733 mov [rc.u32], eax
4734 }
4735# endif
4736 return rc.f;
4737}
4738#endif
4739
4740
4741/**
4742 * Tests and toggles a bit in a bitmap.
4743 *
4744 * @returns true if the bit was set.
4745 * @returns false if the bit was clear.
4746 * @param pvBitmap Pointer to the bitmap.
4747 * @param iBit The bit to test and toggle.
4748 */
4749#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4750DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4751#else
4752DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4753{
4754 union { bool f; uint32_t u32; uint8_t u8; } rc;
4755# if RT_INLINE_ASM_USES_INTRIN
4756 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4757
4758# elif RT_INLINE_ASM_GNU_STYLE
4759 __asm__ __volatile__ ("btcl %2, %1\n\t"
4760 "setc %b0\n\t"
4761 "andl $1, %0\n\t"
4762 : "=q" (rc.u32),
4763 "=m" (*(volatile long *)pvBitmap)
4764 : "Ir" (iBit)
4765 : "memory");
4766# else
4767 __asm
4768 {
4769 mov edx, [iBit]
4770# ifdef RT_ARCH_AMD64
4771 mov rax, [pvBitmap]
4772 btc [rax], edx
4773# else
4774 mov eax, [pvBitmap]
4775 btc [eax], edx
4776# endif
4777 setc al
4778 and eax, 1
4779 mov [rc.u32], eax
4780 }
4781# endif
4782 return rc.f;
4783}
4784#endif
4785
4786
4787/**
4788 * Atomically tests and toggles a bit in a bitmap, ordered.
4789 *
4790 * @returns true if the bit was set.
4791 * @returns false if the bit was clear.
4792 * @param pvBitmap Pointer to the bitmap.
4793 * @param iBit The bit to test and toggle.
4794 */
4795#if RT_INLINE_ASM_EXTERNAL
4796DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4797#else
4798DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4799{
4800 union { bool f; uint32_t u32; uint8_t u8; } rc;
4801# if RT_INLINE_ASM_GNU_STYLE
4802 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
4803 "setc %b0\n\t"
4804 "andl $1, %0\n\t"
4805 : "=q" (rc.u32),
4806 "=m" (*(volatile long *)pvBitmap)
4807 : "Ir" (iBit)
4808 : "memory");
4809# else
4810 __asm
4811 {
4812 mov edx, [iBit]
4813# ifdef RT_ARCH_AMD64
4814 mov rax, [pvBitmap]
4815 lock btc [rax], edx
4816# else
4817 mov eax, [pvBitmap]
4818 lock btc [eax], edx
4819# endif
4820 setc al
4821 and eax, 1
4822 mov [rc.u32], eax
4823 }
4824# endif
4825 return rc.f;
4826}
4827#endif
4828
4829
4830/**
4831 * Tests if a bit in a bitmap is set.
4832 *
4833 * @returns true if the bit is set.
4834 * @returns false if the bit is clear.
4835 * @param pvBitmap Pointer to the bitmap.
4836 * @param iBit The bit to test.
4837 */
4838#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4839DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
4840#else
4841DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
4842{
4843 union { bool f; uint32_t u32; uint8_t u8; } rc;
4844# if RT_INLINE_ASM_USES_INTRIN
4845 rc.u32 = _bittest((long *)pvBitmap, iBit);
4846# elif RT_INLINE_ASM_GNU_STYLE
4847
4848 __asm__ __volatile__ ("btl %2, %1\n\t"
4849 "setc %b0\n\t"
4850 "andl $1, %0\n\t"
4851 : "=q" (rc.u32),
4852 "=m" (*(volatile long *)pvBitmap)
4853 : "Ir" (iBit)
4854 : "memory");
4855# else
4856 __asm
4857 {
4858 mov edx, [iBit]
4859# ifdef RT_ARCH_AMD64
4860 mov rax, [pvBitmap]
4861 bt [rax], edx
4862# else
4863 mov eax, [pvBitmap]
4864 bt [eax], edx
4865# endif
4866 setc al
4867 and eax, 1
4868 mov [rc.u32], eax
4869 }
4870# endif
4871 return rc.f;
4872}
4873#endif
4874
4875
4876/**
4877 * Clears a bit range within a bitmap.
4878 *
4879 * @param pvBitmap Pointer to the bitmap.
4880 * @param iBitStart The First bit to clear.
4881 * @param iBitEnd The first bit not to clear.
4882 */
4883DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4884{
4885 if (iBitStart < iBitEnd)
4886 {
4887 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4888 int iStart = iBitStart & ~31;
4889 int iEnd = iBitEnd & ~31;
4890 if (iStart == iEnd)
4891 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4892 else
4893 {
4894 /* bits in first dword. */
4895 if (iBitStart & 31)
4896 {
4897 *pu32 &= (1 << (iBitStart & 31)) - 1;
4898 pu32++;
4899 iBitStart = iStart + 32;
4900 }
4901
4902 /* whole dword. */
4903 if (iBitStart != iEnd)
4904 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4905
4906 /* bits in last dword. */
4907 if (iBitEnd & 31)
4908 {
4909 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4910 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4911 }
4912 }
4913 }
4914}
4915
4916
4917/**
4918 * Finds the first clear bit in a bitmap.
4919 *
4920 * @returns Index of the first zero bit.
4921 * @returns -1 if no clear bit was found.
4922 * @param pvBitmap Pointer to the bitmap.
4923 * @param cBits The number of bits in the bitmap. Multiple of 32.
4924 */
4925#if RT_INLINE_ASM_EXTERNAL
4926DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
4927#else
4928DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
4929{
4930 if (cBits)
4931 {
4932 int32_t iBit;
4933# if RT_INLINE_ASM_GNU_STYLE
4934 RTCCUINTREG uEAX, uECX, uEDI;
4935 cBits = RT_ALIGN_32(cBits, 32);
4936 __asm__ __volatile__("repe; scasl\n\t"
4937 "je 1f\n\t"
4938# ifdef RT_ARCH_AMD64
4939 "lea -4(%%rdi), %%rdi\n\t"
4940 "xorl (%%rdi), %%eax\n\t"
4941 "subq %5, %%rdi\n\t"
4942# else
4943 "lea -4(%%edi), %%edi\n\t"
4944 "xorl (%%edi), %%eax\n\t"
4945 "subl %5, %%edi\n\t"
4946# endif
4947 "shll $3, %%edi\n\t"
4948 "bsfl %%eax, %%edx\n\t"
4949 "addl %%edi, %%edx\n\t"
4950 "1:\t\n"
4951 : "=d" (iBit),
4952 "=&c" (uECX),
4953 "=&D" (uEDI),
4954 "=&a" (uEAX)
4955 : "0" (0xffffffff),
4956 "mr" (pvBitmap),
4957 "1" (cBits >> 5),
4958 "2" (pvBitmap),
4959 "3" (0xffffffff));
4960# else
4961 cBits = RT_ALIGN_32(cBits, 32);
4962 __asm
4963 {
4964# ifdef RT_ARCH_AMD64
4965 mov rdi, [pvBitmap]
4966 mov rbx, rdi
4967# else
4968 mov edi, [pvBitmap]
4969 mov ebx, edi
4970# endif
4971 mov edx, 0ffffffffh
4972 mov eax, edx
4973 mov ecx, [cBits]
4974 shr ecx, 5
4975 repe scasd
4976 je done
4977
4978# ifdef RT_ARCH_AMD64
4979 lea rdi, [rdi - 4]
4980 xor eax, [rdi]
4981 sub rdi, rbx
4982# else
4983 lea edi, [edi - 4]
4984 xor eax, [edi]
4985 sub edi, ebx
4986# endif
4987 shl edi, 3
4988 bsf edx, eax
4989 add edx, edi
4990 done:
4991 mov [iBit], edx
4992 }
4993# endif
4994 return iBit;
4995 }
4996 return -1;
4997}
4998#endif
4999
5000
5001/**
5002 * Finds the next clear bit in a bitmap.
5003 *
5004 * @returns Index of the first zero bit.
5005 * @returns -1 if no clear bit was found.
5006 * @param pvBitmap Pointer to the bitmap.
5007 * @param cBits The number of bits in the bitmap. Multiple of 32.
5008 * @param iBitPrev The bit returned from the last search.
5009 * The search will start at iBitPrev + 1.
5010 */
5011#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5012DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5013#else
5014DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5015{
5016 int iBit = ++iBitPrev & 31;
5017 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5018 cBits -= iBitPrev & ~31;
5019 if (iBit)
5020 {
5021 /* inspect the first dword. */
5022 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
5023# if RT_INLINE_ASM_USES_INTRIN
5024 unsigned long ulBit = 0;
5025 if (_BitScanForward(&ulBit, u32))
5026 return ulBit + iBitPrev;
5027 iBit = -1;
5028# else
5029# if RT_INLINE_ASM_GNU_STYLE
5030 __asm__ __volatile__("bsf %1, %0\n\t"
5031 "jnz 1f\n\t"
5032 "movl $-1, %0\n\t"
5033 "1:\n\t"
5034 : "=r" (iBit)
5035 : "r" (u32));
5036# else
5037 __asm
5038 {
5039 mov edx, [u32]
5040 bsf eax, edx
5041 jnz done
5042 mov eax, 0ffffffffh
5043 done:
5044 mov [iBit], eax
5045 }
5046# endif
5047 if (iBit >= 0)
5048 return iBit + iBitPrev;
5049# endif
5050 /* Search the rest of the bitmap, if there is anything. */
5051 if (cBits > 32)
5052 {
5053 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5054 if (iBit >= 0)
5055 return iBit + (iBitPrev & ~31) + 32;
5056 }
5057 }
5058 else
5059 {
5060 /* Search the rest of the bitmap. */
5061 iBit = ASMBitFirstClear(pvBitmap, cBits);
5062 if (iBit >= 0)
5063 return iBit + (iBitPrev & ~31);
5064 }
5065 return iBit;
5066}
5067#endif
5068
5069
5070/**
5071 * Finds the first set bit in a bitmap.
5072 *
5073 * @returns Index of the first set bit.
5074 * @returns -1 if no clear bit was found.
5075 * @param pvBitmap Pointer to the bitmap.
5076 * @param cBits The number of bits in the bitmap. Multiple of 32.
5077 */
5078#if RT_INLINE_ASM_EXTERNAL
5079DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
5080#else
5081DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
5082{
5083 if (cBits)
5084 {
5085 int32_t iBit;
5086# if RT_INLINE_ASM_GNU_STYLE
5087 RTCCUINTREG uEAX, uECX, uEDI;
5088 cBits = RT_ALIGN_32(cBits, 32);
5089 __asm__ __volatile__("repe; scasl\n\t"
5090 "je 1f\n\t"
5091# ifdef RT_ARCH_AMD64
5092 "lea -4(%%rdi), %%rdi\n\t"
5093 "movl (%%rdi), %%eax\n\t"
5094 "subq %5, %%rdi\n\t"
5095# else
5096 "lea -4(%%edi), %%edi\n\t"
5097 "movl (%%edi), %%eax\n\t"
5098 "subl %5, %%edi\n\t"
5099# endif
5100 "shll $3, %%edi\n\t"
5101 "bsfl %%eax, %%edx\n\t"
5102 "addl %%edi, %%edx\n\t"
5103 "1:\t\n"
5104 : "=d" (iBit),
5105 "=&c" (uECX),
5106 "=&D" (uEDI),
5107 "=&a" (uEAX)
5108 : "0" (0xffffffff),
5109 "mr" (pvBitmap),
5110 "1" (cBits >> 5),
5111 "2" (pvBitmap),
5112 "3" (0));
5113# else
5114 cBits = RT_ALIGN_32(cBits, 32);
5115 __asm
5116 {
5117# ifdef RT_ARCH_AMD64
5118 mov rdi, [pvBitmap]
5119 mov rbx, rdi
5120# else
5121 mov edi, [pvBitmap]
5122 mov ebx, edi
5123# endif
5124 mov edx, 0ffffffffh
5125 xor eax, eax
5126 mov ecx, [cBits]
5127 shr ecx, 5
5128 repe scasd
5129 je done
5130# ifdef RT_ARCH_AMD64
5131 lea rdi, [rdi - 4]
5132 mov eax, [rdi]
5133 sub rdi, rbx
5134# else
5135 lea edi, [edi - 4]
5136 mov eax, [edi]
5137 sub edi, ebx
5138# endif
5139 shl edi, 3
5140 bsf edx, eax
5141 add edx, edi
5142 done:
5143 mov [iBit], edx
5144 }
5145# endif
5146 return iBit;
5147 }
5148 return -1;
5149}
5150#endif
5151
5152
5153/**
5154 * Finds the next set bit in a bitmap.
5155 *
5156 * @returns Index of the next set bit.
5157 * @returns -1 if no set bit was found.
5158 * @param pvBitmap Pointer to the bitmap.
5159 * @param cBits The number of bits in the bitmap. Multiple of 32.
5160 * @param iBitPrev The bit returned from the last search.
5161 * The search will start at iBitPrev + 1.
5162 */
5163#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5164DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5165#else
5166DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5167{
5168 int iBit = ++iBitPrev & 31;
5169 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5170 cBits -= iBitPrev & ~31;
5171 if (iBit)
5172 {
5173 /* inspect the first dword. */
5174 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
5175# if RT_INLINE_ASM_USES_INTRIN
5176 unsigned long ulBit = 0;
5177 if (_BitScanForward(&ulBit, u32))
5178 return ulBit + iBitPrev;
5179 iBit = -1;
5180# else
5181# if RT_INLINE_ASM_GNU_STYLE
5182 __asm__ __volatile__("bsf %1, %0\n\t"
5183 "jnz 1f\n\t"
5184 "movl $-1, %0\n\t"
5185 "1:\n\t"
5186 : "=r" (iBit)
5187 : "r" (u32));
5188# else
5189 __asm
5190 {
5191 mov edx, u32
5192 bsf eax, edx
5193 jnz done
5194 mov eax, 0ffffffffh
5195 done:
5196 mov [iBit], eax
5197 }
5198# endif
5199 if (iBit >= 0)
5200 return iBit + iBitPrev;
5201# endif
5202 /* Search the rest of the bitmap, if there is anything. */
5203 if (cBits > 32)
5204 {
5205 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5206 if (iBit >= 0)
5207 return iBit + (iBitPrev & ~31) + 32;
5208 }
5209
5210 }
5211 else
5212 {
5213 /* Search the rest of the bitmap. */
5214 iBit = ASMBitFirstSet(pvBitmap, cBits);
5215 if (iBit >= 0)
5216 return iBit + (iBitPrev & ~31);
5217 }
5218 return iBit;
5219}
5220#endif
5221
5222
5223/**
5224 * Finds the first bit which is set in the given 32-bit integer.
5225 * Bits are numbered from 1 (least significant) to 32.
5226 *
5227 * @returns index [1..32] of the first set bit.
5228 * @returns 0 if all bits are cleared.
5229 * @param u32 Integer to search for set bits.
5230 * @remark Similar to ffs() in BSD.
5231 */
5232DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5233{
5234# if RT_INLINE_ASM_USES_INTRIN
5235 unsigned long iBit;
5236 if (_BitScanForward(&iBit, u32))
5237 iBit++;
5238 else
5239 iBit = 0;
5240# elif RT_INLINE_ASM_GNU_STYLE
5241 uint32_t iBit;
5242 __asm__ __volatile__("bsf %1, %0\n\t"
5243 "jnz 1f\n\t"
5244 "xorl %0, %0\n\t"
5245 "jmp 2f\n"
5246 "1:\n\t"
5247 "incl %0\n"
5248 "2:\n\t"
5249 : "=r" (iBit)
5250 : "rm" (u32));
5251# else
5252 uint32_t iBit;
5253 _asm
5254 {
5255 bsf eax, [u32]
5256 jnz found
5257 xor eax, eax
5258 jmp done
5259 found:
5260 inc eax
5261 done:
5262 mov [iBit], eax
5263 }
5264# endif
5265 return iBit;
5266}
5267
5268
5269/**
5270 * Finds the first bit which is set in the given 32-bit integer.
5271 * Bits are numbered from 1 (least significant) to 32.
5272 *
5273 * @returns index [1..32] of the first set bit.
5274 * @returns 0 if all bits are cleared.
5275 * @param i32 Integer to search for set bits.
5276 * @remark Similar to ffs() in BSD.
5277 */
5278DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5279{
5280 return ASMBitFirstSetU32((uint32_t)i32);
5281}
5282
5283
5284/**
5285 * Finds the last bit which is set in the given 32-bit integer.
5286 * Bits are numbered from 1 (least significant) to 32.
5287 *
5288 * @returns index [1..32] of the last set bit.
5289 * @returns 0 if all bits are cleared.
5290 * @param u32 Integer to search for set bits.
5291 * @remark Similar to fls() in BSD.
5292 */
5293DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5294{
5295# if RT_INLINE_ASM_USES_INTRIN
5296 unsigned long iBit;
5297 if (_BitScanReverse(&iBit, u32))
5298 iBit++;
5299 else
5300 iBit = 0;
5301# elif RT_INLINE_ASM_GNU_STYLE
5302 uint32_t iBit;
5303 __asm__ __volatile__("bsrl %1, %0\n\t"
5304 "jnz 1f\n\t"
5305 "xorl %0, %0\n\t"
5306 "jmp 2f\n"
5307 "1:\n\t"
5308 "incl %0\n"
5309 "2:\n\t"
5310 : "=r" (iBit)
5311 : "rm" (u32));
5312# else
5313 uint32_t iBit;
5314 _asm
5315 {
5316 bsr eax, [u32]
5317 jnz found
5318 xor eax, eax
5319 jmp done
5320 found:
5321 inc eax
5322 done:
5323 mov [iBit], eax
5324 }
5325# endif
5326 return iBit;
5327}
5328
5329
5330/**
5331 * Finds the last bit which is set in the given 32-bit integer.
5332 * Bits are numbered from 1 (least significant) to 32.
5333 *
5334 * @returns index [1..32] of the last set bit.
5335 * @returns 0 if all bits are cleared.
5336 * @param i32 Integer to search for set bits.
5337 * @remark Similar to fls() in BSD.
5338 */
5339DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5340{
5341 return ASMBitLastSetS32((uint32_t)i32);
5342}
5343
5344
5345/**
5346 * Reverse the byte order of the given 32-bit integer.
5347 * @param u32 Integer
5348 */
5349DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5350{
5351#if RT_INLINE_ASM_USES_INTRIN
5352 u32 = _byteswap_ulong(u32);
5353#elif RT_INLINE_ASM_GNU_STYLE
5354 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5355#else
5356 _asm
5357 {
5358 mov eax, [u32]
5359 bswap eax
5360 mov [u32], eax
5361 }
5362#endif
5363 return u32;
5364}
5365
5366/** @} */
5367
5368
5369/** @} */
5370#endif
5371
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette